@panda-agent/panda-cli 0.1.29 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +258 -247
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,733 @@
1
+ """
2
+ Foundation class providing shared validation primitives for Office XML packages.
3
+ """
4
+
5
+ import re
6
+ import pathlib
7
+ import tempfile
8
+ import zipfile
9
+
10
+ import defusedxml.minidom
11
+ import lxml.etree
12
+
13
+
14
+ class BaseSchemaValidator:
15
+ """Abstract base that concrete validators (DOCX, PPTX …) inherit from."""
16
+
17
+ # Errors matching any of these substrings are silently suppressed.
18
+ IGNORED_VALIDATION_ERRORS = [
19
+ "hyphenationZone",
20
+ "purl.org/dc/terms",
21
+ ]
22
+
23
+ # Mapping: element local-name → (id-attribute, scope)
24
+ # scope = "file" → unique within the same XML file
25
+ # scope = "global" → unique across the entire package
26
+ UNIQUE_ID_REQUIREMENTS = {
27
+ "comment": ("id", "file"),
28
+ "commentrangestart": ("id", "file"),
29
+ "commentrangeend": ("id", "file"),
30
+ "bookmarkstart": ("id", "file"),
31
+ "bookmarkend": ("id", "file"),
32
+ "sldid": ("id", "file"),
33
+ "sldmasterid": ("id", "global"),
34
+ "sldlayoutid": ("id", "global"),
35
+ "cm": ("authorid", "file"),
36
+ "sheet": ("sheetid", "file"),
37
+ "definedname": ("id", "file"),
38
+ "cxnsp": ("id", "file"),
39
+ "sp": ("id", "file"),
40
+ "pic": ("id", "file"),
41
+ "grpsp": ("id", "file"),
42
+ }
43
+
44
+ EXCLUDED_ID_CONTAINERS = {"sectionlst"}
45
+
46
+ ELEMENT_RELATIONSHIP_TYPES = {}
47
+
48
+ SCHEMA_MAPPINGS = {
49
+ "word": "ISO-IEC29500-4_2016/wml.xsd",
50
+ "ppt": "ISO-IEC29500-4_2016/pml.xsd",
51
+ "xl": "ISO-IEC29500-4_2016/sml.xsd",
52
+ "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
53
+ "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
54
+ "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
55
+ "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
56
+ ".rels": "ecma/fouth-edition/opc-relationships.xsd",
57
+ "people.xml": "microsoft/wml-2012.xsd",
58
+ "commentsIds.xml": "microsoft/wml-cid-2016.xsd",
59
+ "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
60
+ "commentsExtended.xml": "microsoft/wml-2012.xsd",
61
+ "chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
62
+ "theme": "ISO-IEC29500-4_2016/dml-main.xsd",
63
+ "drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
64
+ }
65
+
66
+ MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
67
+ XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
68
+
69
+ PACKAGE_RELATIONSHIPS_NAMESPACE = (
70
+ "http://schemas.openxmlformats.org/package/2006/relationships"
71
+ )
72
+ OFFICE_RELATIONSHIPS_NAMESPACE = (
73
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
74
+ )
75
+ CONTENT_TYPES_NAMESPACE = (
76
+ "http://schemas.openxmlformats.org/package/2006/content-types"
77
+ )
78
+
79
+ MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}
80
+
81
+ OOXML_NAMESPACES = {
82
+ "http://schemas.openxmlformats.org/officeDocument/2006/math",
83
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
84
+ "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
85
+ "http://schemas.openxmlformats.org/drawingml/2006/main",
86
+ "http://schemas.openxmlformats.org/drawingml/2006/chart",
87
+ "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
88
+ "http://schemas.openxmlformats.org/drawingml/2006/diagram",
89
+ "http://schemas.openxmlformats.org/drawingml/2006/picture",
90
+ "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
91
+ "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
92
+ "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
93
+ "http://schemas.openxmlformats.org/presentationml/2006/main",
94
+ "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
95
+ "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
96
+ "http://www.w3.org/XML/1998/namespace",
97
+ }
98
+
99
+ # ── Initialisation ───────────────────────────────────────────────────
100
+
101
+ def __init__(self, unpacked_dir, original_file=None, verbose=False):
102
+ self.unpacked_dir = pathlib.Path(unpacked_dir).resolve()
103
+ self.original_file = pathlib.Path(original_file) if original_file else None
104
+ self.verbose = verbose
105
+
106
+ self.schemas_dir = pathlib.Path(__file__).parent.parent / "schemas"
107
+
108
+ self.xml_files = [
109
+ fp
110
+ for glob in ("*.xml", "*.rels")
111
+ for fp in self.unpacked_dir.rglob(glob)
112
+ ]
113
+
114
+ if not self.xml_files:
115
+ print("Warning: No XML files found in %s" % self.unpacked_dir)
116
+
117
+ # ── Abstract interface ───────────────────────────────────────────────
118
+
119
+ def validate(self):
120
+ raise NotImplementedError("Subclasses must implement the validate method")
121
+
122
+ def repair(self) -> int:
123
+ return self._fix_whitespace_preservation()
124
+
125
+ # ── Repair: xml:space="preserve" ─────────────────────────────────────
126
+
127
+ def repair_whitespace_preservation(self) -> int:
128
+ return self._fix_whitespace_preservation()
129
+
130
+ def _fix_whitespace_preservation(self) -> int:
131
+ n_fixed = 0
132
+ for fp in self.xml_files:
133
+ try:
134
+ raw = fp.read_text(encoding="utf-8")
135
+ dom = defusedxml.minidom.parseString(raw)
136
+ touched = False
137
+
138
+ for el in dom.getElementsByTagName("*"):
139
+ if not el.tagName.endswith(":t"):
140
+ continue
141
+ if el.firstChild is None:
142
+ continue
143
+ txt = el.firstChild.nodeValue
144
+ if txt and (txt.startswith((' ', '\t')) or txt.endswith((' ', '\t'))):
145
+ if el.getAttribute("xml:space") != "preserve":
146
+ el.setAttribute("xml:space", "preserve")
147
+ preview = repr(txt[:30]) + "..." if len(txt) > 30 else repr(txt)
148
+ print(" Repaired: %s: Added xml:space='preserve' to %s: %s"
149
+ % (fp.name, el.tagName, preview))
150
+ n_fixed += 1
151
+ touched = True
152
+
153
+ if touched:
154
+ fp.write_bytes(dom.toxml(encoding="UTF-8"))
155
+ except Exception:
156
+ pass
157
+ return n_fixed
158
+
159
+ # ── Well-formedness ──────────────────────────────────────────────────
160
+
161
+ def validate_xml(self):
162
+ problems: list[str] = []
163
+ for fp in self.xml_files:
164
+ try:
165
+ lxml.etree.parse(str(fp))
166
+ except lxml.etree.XMLSyntaxError as exc:
167
+ problems.append(" %s: Line %d: %s"
168
+ % (fp.relative_to(self.unpacked_dir), exc.lineno, exc.msg))
169
+ except Exception as exc:
170
+ problems.append(" %s: Unexpected error: %s"
171
+ % (fp.relative_to(self.unpacked_dir), exc))
172
+
173
+ if problems:
174
+ print("FAILED - Found %d XML violations:" % len(problems))
175
+ for p in problems:
176
+ print(p)
177
+ return False
178
+ if self.verbose:
179
+ print("PASSED - All XML files are well-formed")
180
+ return True
181
+
182
+ # ── Namespace coherence ──────────────────────────────────────────────
183
+
184
+ def validate_namespaces(self):
185
+ problems: list[str] = []
186
+ for fp in self.xml_files:
187
+ try:
188
+ root = lxml.etree.parse(str(fp)).getroot()
189
+ declared = set(root.nsmap.keys()) - {None}
190
+ for attr_val in [v for k, v in root.attrib.items() if k.endswith("Ignorable")]:
191
+ missing = set(attr_val.split()) - declared
192
+ problems.extend(
193
+ " %s: Namespace '%s' in Ignorable but not declared"
194
+ % (fp.relative_to(self.unpacked_dir), ns)
195
+ for ns in missing
196
+ )
197
+ except lxml.etree.XMLSyntaxError:
198
+ continue
199
+
200
+ if problems:
201
+ print("FAILED - %d namespace issues:" % len(problems))
202
+ for p in problems:
203
+ print(p)
204
+ return False
205
+ if self.verbose:
206
+ print("PASSED - All namespace prefixes properly declared")
207
+ return True
208
+
209
+ # ── ID uniqueness ────────────────────────────────────────────────────
210
+
211
+ def validate_unique_ids(self):
212
+ problems: list[str] = []
213
+ gids: dict = {}
214
+
215
+ for fp in self.xml_files:
216
+ try:
217
+ root = lxml.etree.parse(str(fp)).getroot()
218
+ fids: dict = {}
219
+
220
+ for mc_elem in root.xpath(".//mc:AlternateContent",
221
+ namespaces={"mc": self.MC_NAMESPACE}):
222
+ mc_elem.getparent().remove(mc_elem)
223
+
224
+ for el in root.iter():
225
+ raw_tag = el.tag.split("}")[-1].lower() if "}" in el.tag else el.tag.lower()
226
+
227
+ if raw_tag not in self.UNIQUE_ID_REQUIREMENTS:
228
+ continue
229
+
230
+ excluded = any(
231
+ anc.tag.split("}")[-1].lower() in self.EXCLUDED_ID_CONTAINERS
232
+ for anc in el.iterancestors()
233
+ )
234
+ if excluded:
235
+ continue
236
+
237
+ attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[raw_tag]
238
+
239
+ id_val = None
240
+ for a, v in el.attrib.items():
241
+ a_local = a.split("}")[-1].lower() if "}" in a else a.lower()
242
+ if a_local == attr_name:
243
+ id_val = v
244
+ break
245
+
246
+ if id_val is None:
247
+ continue
248
+
249
+ if scope == "global":
250
+ if id_val in gids:
251
+ pf, pl, pt = gids[id_val]
252
+ problems.append(
253
+ " %s: Line %s: Global ID '%s' in <%s> "
254
+ "already used in %s at line %s in <%s>"
255
+ % (fp.relative_to(self.unpacked_dir), el.sourceline,
256
+ id_val, raw_tag, pf, pl, pt))
257
+ else:
258
+ gids[id_val] = (fp.relative_to(self.unpacked_dir),
259
+ el.sourceline, raw_tag)
260
+ else:
261
+ key = (raw_tag, attr_name)
262
+ fids.setdefault(key, {})
263
+ if id_val in fids[key]:
264
+ problems.append(
265
+ " %s: Line %s: Duplicate %s='%s' in <%s> "
266
+ "(first occurrence at line %s)"
267
+ % (fp.relative_to(self.unpacked_dir), el.sourceline,
268
+ attr_name, id_val, raw_tag, fids[key][id_val]))
269
+ else:
270
+ fids[key][id_val] = el.sourceline
271
+
272
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
273
+ problems.append(" %s: Error: %s" % (fp.relative_to(self.unpacked_dir), exc))
274
+
275
+ if problems:
276
+ print("FAILED - Found %d ID uniqueness violations:" % len(problems))
277
+ for p in problems:
278
+ print(p)
279
+ return False
280
+ if self.verbose:
281
+ print("PASSED - All required IDs are unique")
282
+ return True
283
+
284
+ # ── Relationship file references ─────────────────────────────────────
285
+
286
+ def validate_file_references(self):
287
+ problems: list[str] = []
288
+ rels_list = list(self.unpacked_dir.rglob("*.rels"))
289
+
290
+ if not rels_list:
291
+ if self.verbose:
292
+ print("PASSED - No .rels files found")
293
+ return True
294
+
295
+ physical_files = [
296
+ fp.resolve()
297
+ for fp in self.unpacked_dir.rglob("*")
298
+ if fp.is_file()
299
+ and fp.name != "[Content_Types].xml"
300
+ and not fp.name.endswith(".rels")
301
+ ]
302
+
303
+ touched: set = set()
304
+
305
+ if self.verbose:
306
+ print("Found %d .rels files and %d target files" % (len(rels_list), len(physical_files)))
307
+
308
+ for rf in rels_list:
309
+ try:
310
+ rroot = lxml.etree.parse(str(rf)).getroot()
311
+ rdir = rf.parent
312
+ found_here: set = set()
313
+ broken: list = []
314
+
315
+ for rel in rroot.findall(".//ns:Relationship",
316
+ namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE}):
317
+ tgt = rel.get("Target")
318
+ if not tgt or tgt.startswith(("http", "mailto:")):
319
+ continue
320
+ if tgt.startswith("/"):
321
+ resolved = self.unpacked_dir / tgt.lstrip("/")
322
+ elif rf.name == ".rels":
323
+ resolved = self.unpacked_dir / tgt
324
+ else:
325
+ resolved = rdir.parent / tgt
326
+
327
+ try:
328
+ resolved = resolved.resolve()
329
+ if resolved.exists() and resolved.is_file():
330
+ found_here.add(resolved)
331
+ touched.add(resolved)
332
+ else:
333
+ broken.append((tgt, rel.sourceline))
334
+ except (OSError, ValueError):
335
+ broken.append((tgt, rel.sourceline))
336
+
337
+ if broken:
338
+ rp = rf.relative_to(self.unpacked_dir)
339
+ for b_tgt, b_line in broken:
340
+ problems.append(" %s: Line %s: Broken reference to %s" % (rp, b_line, b_tgt))
341
+
342
+ except Exception as exc:
343
+ problems.append(" Error parsing %s: %s" % (rf.relative_to(self.unpacked_dir), exc))
344
+
345
+ orphans = set(physical_files) - touched
346
+ for o in sorted(orphans):
347
+ problems.append(" Unreferenced file: %s" % o.relative_to(self.unpacked_dir))
348
+
349
+ if problems:
350
+ print("FAILED - Found %d relationship validation errors:" % len(problems))
351
+ for p in problems:
352
+ print(p)
353
+ print(
354
+ "CRITICAL: These errors will cause the document to appear corrupt. "
355
+ + "Broken references MUST be fixed, "
356
+ + "and unreferenced files MUST be referenced or removed."
357
+ )
358
+ return False
359
+ if self.verbose:
360
+ print("PASSED - All references are valid and all files are properly referenced")
361
+ return True
362
+ # ── Relationship ID cross-check ─────────────────────────────────────
363
+
364
+ def validate_all_relationship_ids(self):
365
+ import lxml.etree
366
+
367
+ problems: list[str] = []
368
+
369
+ for fp in self.xml_files:
370
+ if fp.suffix == ".rels":
371
+ continue
372
+
373
+ rels_dir = fp.parent / "_rels"
374
+ companion = rels_dir / ("%s.rels" % fp.name)
375
+ if not companion.exists():
376
+ continue
377
+
378
+ try:
379
+ rroot = lxml.etree.parse(str(companion)).getroot()
380
+ rid_map: dict[str, str] = {}
381
+
382
+ for rel in rroot.findall("{%s}Relationship" % self.PACKAGE_RELATIONSHIPS_NAMESPACE):
383
+ rid = rel.get("Id")
384
+ rtype = rel.get("Type", "")
385
+ if not rid:
386
+ continue
387
+ if rid in rid_map:
388
+ problems.append(
389
+ " %s: Line %s: Duplicate relationship ID '%s' (IDs must be unique)"
390
+ % (companion.relative_to(self.unpacked_dir), rel.sourceline, rid))
391
+ type_short = rtype.rsplit("/", 1)[-1] if "/" in rtype else rtype
392
+ rid_map[rid] = type_short
393
+
394
+ xroot = lxml.etree.parse(str(fp)).getroot()
395
+ r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE
396
+ for el in xroot.iter():
397
+ for aname in ("id", "embed", "link"):
398
+ ref = el.get("{%s}%s" % (r_ns, aname))
399
+ if not ref:
400
+ continue
401
+ xrp = fp.relative_to(self.unpacked_dir)
402
+ ename = el.tag.split("}")[-1] if "}" in el.tag else el.tag
403
+
404
+ if ref not in rid_map:
405
+ top5 = ", ".join(sorted(rid_map.keys())[:5])
406
+ suffix = "..." if len(rid_map) > 5 else ""
407
+ problems.append(
408
+ " %s: Line %s: <%s> r:%s references non-existent relationship '%s' "
409
+ "(valid IDs: %s%s)"
410
+ % (xrp, el.sourceline, ename, aname, ref, top5, suffix))
411
+ elif aname == "id" and self.ELEMENT_RELATIONSHIP_TYPES:
412
+ expected = self._get_expected_relationship_type(ename)
413
+ if expected and expected not in rid_map[ref].lower():
414
+ problems.append(
415
+ " %s: Line %s: <%s> references '%s' which points to '%s' "
416
+ "but should point to a '%s' relationship"
417
+ % (xrp, el.sourceline, ename, ref, rid_map[ref], expected))
418
+
419
+ except Exception as exc:
420
+ problems.append(" Error processing %s: %s" % (fp.relative_to(self.unpacked_dir), exc))
421
+
422
+ if problems:
423
+ print("FAILED - Found %d relationship ID reference errors:" % len(problems))
424
+ for p in problems:
425
+ print(p)
426
+ print("\nThese ID mismatches will cause the document to appear corrupt!")
427
+ return False
428
+ if self.verbose:
429
+ print("PASSED - All relationship ID references are valid")
430
+ return True
431
+
432
+ def _get_expected_relationship_type(self, element_name):
433
+ low = element_name.lower()
434
+
435
+ if low in self.ELEMENT_RELATIONSHIP_TYPES:
436
+ return self.ELEMENT_RELATIONSHIP_TYPES[low]
437
+
438
+ if low.endswith("id") and len(low) > 2:
439
+ stem = low[:-2]
440
+ if stem.endswith("master") or stem.endswith("layout"):
441
+ return stem
442
+ return "slide" if stem == "sld" else stem
443
+
444
+ if low.endswith("reference") and len(low) > 9:
445
+ return low[:-9]
446
+
447
+ return None
448
+
449
+ # ── Content-type declarations ────────────────────────────────────────
450
+
451
+ def validate_content_types(self):
452
+ problems: list[str] = []
453
+ ct_file = self.unpacked_dir / "[Content_Types].xml"
454
+ if not ct_file.exists():
455
+ print("FAILED - [Content_Types].xml file not found")
456
+ return False
457
+
458
+ try:
459
+ ct_root = lxml.etree.parse(str(ct_file)).getroot()
460
+ declared_parts: set[str] = set()
461
+ declared_exts: set[str] = set()
462
+
463
+ for ov in ct_root.findall("{%s}Override" % self.CONTENT_TYPES_NAMESPACE):
464
+ pname = ov.get("PartName")
465
+ if pname is not None:
466
+ declared_parts.add(pname.lstrip("/"))
467
+
468
+ for df in ct_root.findall("{%s}Default" % self.CONTENT_TYPES_NAMESPACE):
469
+ ext = df.get("Extension")
470
+ if ext is not None:
471
+ declared_exts.add(ext.lower())
472
+
473
+ _declarable = {
474
+ "sld", "sldLayout", "sldMaster", "presentation",
475
+ "document", "workbook", "worksheet", "theme",
476
+ }
477
+
478
+ _media_ct = {
479
+ "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg",
480
+ "gif": "image/gif", "bmp": "image/bmp", "tiff": "image/tiff",
481
+ "wmf": "image/x-wmf", "emf": "image/x-emf",
482
+ }
483
+
484
+ for xf in self.xml_files:
485
+ rel = str(xf.relative_to(self.unpacked_dir)).replace("\\", "/")
486
+ if any(s in rel for s in (".rels", "[Content_Types]", "docProps/", "_rels/")):
487
+ continue
488
+ try:
489
+ rtag = lxml.etree.parse(str(xf)).getroot().tag
490
+ rname = rtag.split("}")[-1] if "}" in rtag else rtag
491
+ if rname in _declarable and rel not in declared_parts:
492
+ problems.append(
493
+ " %s: File with <%s> root not declared in [Content_Types].xml"
494
+ % (rel, rname))
495
+ except Exception:
496
+ continue
497
+
498
+ for fp in self.unpacked_dir.rglob("*"):
499
+ if not fp.is_file():
500
+ continue
501
+ if fp.suffix.lower() in {".xml", ".rels"}:
502
+ continue
503
+ if fp.name == "[Content_Types].xml":
504
+ continue
505
+ if "_rels" in fp.parts or "docProps" in fp.parts:
506
+ continue
507
+ ext = fp.suffix.lstrip(".").lower()
508
+ if ext and ext not in declared_exts and ext in _media_ct:
509
+ problems.append(
510
+ ' %s: File with extension \'%s\' not declared in [Content_Types].xml '
511
+ '- should add: <Default Extension="%s" ContentType="%s"/>'
512
+ % (fp.relative_to(self.unpacked_dir), ext, ext, _media_ct[ext]))
513
+
514
+ except Exception as exc:
515
+ problems.append(" Error parsing [Content_Types].xml: %s" % exc)
516
+
517
+ if problems:
518
+ print("FAILED - Found %d content type declaration errors:" % len(problems))
519
+ for p in problems:
520
+ print(p)
521
+ return False
522
+ if self.verbose:
523
+ print("PASSED - All content files are properly declared in [Content_Types].xml")
524
+ return True
525
+
526
+ # ── Single-file XSD validation ───────────────────────────────────────
527
+
528
+ def validate_file_against_xsd(self, xml_file, verbose=False):
529
+ xml_file = pathlib.Path(xml_file).resolve()
530
+ base = self.unpacked_dir.resolve()
531
+
532
+ ok, cur_errs = self._check_single_xsd(xml_file, base)
533
+
534
+ if ok is None:
535
+ return None, set()
536
+ if ok:
537
+ return True, set()
538
+
539
+ orig_errs = self._original_errors(xml_file)
540
+
541
+ assert cur_errs is not None
542
+ fresh = cur_errs - orig_errs
543
+ fresh = {e for e in fresh
544
+ if not any(pat in e for pat in self.IGNORED_VALIDATION_ERRORS)}
545
+
546
+ if fresh:
547
+ if verbose:
548
+ rp = xml_file.relative_to(base)
549
+ print("FAILED - %s: %d new error(s)" % (rp, len(fresh)))
550
+ for e in list(fresh)[:3]:
551
+ trunc = (e[:250] + "...") if len(e) > 250 else e
552
+ print(" - %s" % trunc)
553
+ return False, fresh
554
+ if verbose:
555
+ print("PASSED - No new errors (original had %d errors)" % len(cur_errs))
556
+ return True, set()
557
+
558
+ # ── Batch XSD validation ─────────────────────────────────────────────
559
+
560
+ def validate_against_xsd(self):
561
+ fresh_errors: list[str] = []
562
+ orig_err_count = 0
563
+ ok_count = 0
564
+ skip_count = 0
565
+
566
+ for fp in self.xml_files:
567
+ rp = str(fp.relative_to(self.unpacked_dir))
568
+ ok, file_errs = self.validate_file_against_xsd(fp, verbose=False)
569
+
570
+ if ok is None:
571
+ skip_count += 1
572
+ elif ok and not file_errs:
573
+ ok_count += 1
574
+ elif ok:
575
+ orig_err_count += 1
576
+ ok_count += 1
577
+ else:
578
+ fresh_errors.append(" %s: %d new error(s)" % (rp, len(file_errs)))
579
+ for e in list(file_errs)[:3]:
580
+ fresh_errors.append(
581
+ " - %s..." % e[:250] if len(e) > 250 else " - %s" % e)
582
+
583
+ if self.verbose:
584
+ print("Validated %d files:" % len(self.xml_files))
585
+ print(" - Valid: %d" % ok_count)
586
+ print(" - Skipped (no schema): %d" % skip_count)
587
+ if orig_err_count:
588
+ print(" - With original errors (ignored): %d" % orig_err_count)
589
+ n_err_files = len([ln for ln in fresh_errors if not ln.startswith(" ")])
590
+ print(" - With NEW errors: %d" % n_err_files)
591
+
592
+ if fresh_errors:
593
+ print("\nFAILED - Found NEW validation errors:")
594
+ for ln in fresh_errors:
595
+ print(ln)
596
+ return False
597
+ if self.verbose:
598
+ print("\nPASSED - No new XSD validation errors introduced")
599
+ return True
600
+
601
+ # ── Internal: schema resolution ──────────────────────────────────────
602
+
603
+ def _get_schema_path(self, fp):
604
+ if fp.name in self.SCHEMA_MAPPINGS:
605
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[fp.name]
606
+ if fp.suffix == ".rels":
607
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]
608
+ if "charts/" in str(fp) and fp.name.startswith("chart"):
609
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]
610
+ if "theme/" in str(fp) and fp.name.startswith("theme"):
611
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]
612
+ if fp.parent.name in self.MAIN_CONTENT_FOLDERS:
613
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[fp.parent.name]
614
+ return None
615
+
616
+ # ── Internal: MC namespace stripping ─────────────────────────────────
617
+
618
+ def _clean_ignorable_namespaces(self, tree):
619
+ xml_str = lxml.etree.tostring(tree, encoding="unicode")
620
+ copy = lxml.etree.fromstring(xml_str)
621
+
622
+ for el in copy.iter():
623
+ bad_attrs = [a for a in el.attrib
624
+ if "{" in a and a.split("}")[0][1:] not in self.OOXML_NAMESPACES]
625
+ for a in bad_attrs:
626
+ del el.attrib[a]
627
+
628
+ self._drop_non_ooxml_elements(copy)
629
+ return lxml.etree.ElementTree(copy)
630
+
631
+ def _drop_non_ooxml_elements(self, root):
632
+ doomed = []
633
+ for child in list(root):
634
+ if not hasattr(child, "tag") or callable(child.tag):
635
+ continue
636
+ tag_s = str(child.tag)
637
+ if tag_s.startswith("{"):
638
+ ns = tag_s.split("}")[0][1:]
639
+ if ns not in self.OOXML_NAMESPACES:
640
+ doomed.append(child)
641
+ continue
642
+ self._drop_non_ooxml_elements(child)
643
+ for d in doomed:
644
+ root.remove(d)
645
+
646
+ def _strip_mc_ignorable(self, tree):
647
+ rt = tree.getroot()
648
+ key = "{%s}Ignorable" % self.MC_NAMESPACE
649
+ if key in rt.attrib:
650
+ del rt.attrib[key]
651
+ return tree
652
+
653
+ # ── Internal: XSD check for one file ─────────────────────────────────
654
+
655
+ def _check_single_xsd(self, fp, base):
656
+ schema_path = self._get_schema_path(fp)
657
+ if schema_path is None:
658
+ return None, None
659
+
660
+ try:
661
+ with open(schema_path, "rb") as fh:
662
+ xsd_doc = lxml.etree.parse(fh, parser=lxml.etree.XMLParser(),
663
+ base_url=str(schema_path))
664
+ schema = lxml.etree.XMLSchema(xsd_doc)
665
+
666
+ with open(fp, "r") as fh:
667
+ xml_tree = lxml.etree.parse(fh)
668
+
669
+ xml_tree, _ = self._scrub_template_placeholders(xml_tree)
670
+ xml_tree = self._strip_mc_ignorable(xml_tree)
671
+
672
+ rp = fp.relative_to(base)
673
+ if rp.parts and rp.parts[0] in self.MAIN_CONTENT_FOLDERS:
674
+ xml_tree = self._clean_ignorable_namespaces(xml_tree)
675
+
676
+ if schema.validate(xml_tree):
677
+ return True, set()
678
+ return False, {err.message for err in schema.error_log}
679
+ except Exception as exc:
680
+ return False, {str(exc)}
681
+
682
+ # ── Internal: original-file error baseline ───────────────────────────
683
+
684
+ def _original_errors(self, fp):
685
+ if self.original_file is None:
686
+ return set()
687
+
688
+ fp = pathlib.Path(fp).resolve()
689
+ base = self.unpacked_dir.resolve()
690
+ rp = fp.relative_to(base)
691
+
692
+ with tempfile.TemporaryDirectory() as td:
693
+ tp = pathlib.Path(td)
694
+ with zipfile.ZipFile(self.original_file, "r") as zf:
695
+ zf.extractall(tp)
696
+ orig_fp = tp / rp
697
+ if not orig_fp.exists():
698
+ return set()
699
+ _, errs = self._check_single_xsd(orig_fp, tp)
700
+ return errs if errs else set()
701
+
702
+ # ── Internal: template-tag removal ───────────────────────────────────
703
+
704
+ def _scrub_template_placeholders(self, tree):
705
+ warnings: list[str] = []
706
+ pat = re.compile(r"\{\{[^}]*\}\}")
707
+
708
+ xml_str = lxml.etree.tostring(tree, encoding="unicode")
709
+ copy = lxml.etree.fromstring(xml_str)
710
+
711
+ def _clean(txt, kind):
712
+ if not txt:
713
+ return txt
714
+ hits = list(pat.finditer(txt))
715
+ if hits:
716
+ warnings.extend("Found template tag in %s: %s" % (kind, m.group()) for m in hits)
717
+ return pat.sub("", txt)
718
+ return txt
719
+
720
+ for el in copy.iter():
721
+ if not hasattr(el, "tag") or callable(el.tag):
722
+ continue
723
+ tag_s = str(el.tag)
724
+ if tag_s.endswith("}t") or tag_s == "t":
725
+ continue
726
+ el.text = _clean(el.text, "text content")
727
+ el.tail = _clean(el.tail, "tail content")
728
+
729
+ return lxml.etree.ElementTree(copy), warnings
730
+
731
+
732
+ if __name__ == "__main__":
733
+ raise RuntimeError("This module should not be run directly.")