@panda-agent/panda-cli 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +267 -258
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,354 @@
1
+ """
2
+ Word-specific schema and structural validation on unpacked DOCX packages.
3
+ """
4
+
5
+ import random
6
+ import re
7
+ import tempfile
8
+ import zipfile
9
+
10
+ import defusedxml.minidom
11
+ import lxml.etree
12
+
13
+ from .base import BaseSchemaValidator
14
+
15
+
16
+ class DOCXSchemaValidator(BaseSchemaValidator):
17
+ """Extends the base validator with DOCX-specific checks."""
18
+
19
+ _NS_WML = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
20
+ _NS_W14 = "http://schemas.microsoft.com/office/word/2010/wordml"
21
+ _NS_CID = "http://schemas.microsoft.com/office/word/2016/wordml/cid"
22
+
23
+ WORD_2006_NAMESPACE = _NS_WML
24
+ W14_NAMESPACE = _NS_W14
25
+ W16CID_NAMESPACE = _NS_CID
26
+
27
+ ELEMENT_RELATIONSHIP_TYPES = {}
28
+
29
+ # ── Orchestrator ─────────────────────────────────────────────────────
30
+
31
+ def validate(self):
32
+ if not self.validate_xml():
33
+ return False
34
+
35
+ checks = [
36
+ self.validate_namespaces,
37
+ self.validate_unique_ids,
38
+ self.validate_file_references,
39
+ self.validate_content_types,
40
+ self.validate_against_xsd,
41
+ self._check_whitespace,
42
+ self._check_deletions,
43
+ self._check_insertions,
44
+ self.validate_all_relationship_ids,
45
+ self._check_id_bounds,
46
+ self._check_comment_markers,
47
+ ]
48
+ ok = True
49
+ for fn in checks:
50
+ if not fn():
51
+ ok = False
52
+
53
+ self._report_paragraph_delta()
54
+ return ok
55
+
56
+ # ── Whitespace preservation ──────────────────────────────────────────
57
+
58
+ def validate_whitespace_preservation(self):
59
+ return self._check_whitespace()
60
+
61
+ def _check_whitespace(self):
62
+ issues: list[str] = []
63
+ for fp in self.xml_files:
64
+ if fp.name != "document.xml":
65
+ continue
66
+ try:
67
+ root = lxml.etree.parse(str(fp)).getroot()
68
+ space_attr = "{%s}space" % self.XML_NAMESPACE
69
+ for t_el in root.iter("{%s}t" % self._NS_WML):
70
+ txt = t_el.text
71
+ if not txt:
72
+ continue
73
+ if re.search(r"^[ \t\n\r]", txt) or re.search(r"[ \t\n\r]$", txt):
74
+ if space_attr not in t_el.attrib or t_el.attrib[space_attr] != "preserve":
75
+ preview = repr(txt)[:50] + "..." if len(repr(txt)) > 50 else repr(txt)
76
+ issues.append(
77
+ " %s: Line %s: w:t element with whitespace missing "
78
+ "xml:space='preserve': %s"
79
+ % (fp.relative_to(self.unpacked_dir), t_el.sourceline, preview))
80
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
81
+ issues.append(" %s: Error: %s" % (fp.relative_to(self.unpacked_dir), exc))
82
+
83
+ if issues:
84
+ print("FAILED - Found %d whitespace preservation violations:" % len(issues))
85
+ for i in issues:
86
+ print(i)
87
+ return False
88
+ if self.verbose:
89
+ print("PASSED - All whitespace is properly preserved")
90
+ return True
91
+
92
+ # ── Deletion correctness ─────────────────────────────────────────────
93
+
94
+ def validate_deletions(self):
95
+ return self._check_deletions()
96
+
97
+ def _check_deletions(self):
98
+ issues: list[str] = []
99
+ ns = {"w": self._NS_WML}
100
+
101
+ for fp in self.xml_files:
102
+ if fp.name != "document.xml":
103
+ continue
104
+ try:
105
+ root = lxml.etree.parse(str(fp)).getroot()
106
+ for bad_t in root.xpath(".//w:del//w:t", namespaces=ns):
107
+ if bad_t.text:
108
+ preview = repr(bad_t.text)[:50] + "..." if len(repr(bad_t.text)) > 50 else repr(bad_t.text)
109
+ issues.append(
110
+ " %s: Line %s: <w:t> found within <w:del>: %s"
111
+ % (fp.relative_to(self.unpacked_dir), bad_t.sourceline, preview))
112
+ for bad_i in root.xpath(".//w:del//w:instrText", namespaces=ns):
113
+ preview = repr(bad_i.text or "")[:50] + "..." if len(repr(bad_i.text or "")) > 50 else repr(bad_i.text or "")
114
+ issues.append(
115
+ " %s: Line %s: <w:instrText> found within <w:del> (use <w:delInstrText>): %s"
116
+ % (fp.relative_to(self.unpacked_dir), bad_i.sourceline, preview))
117
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
118
+ issues.append(" %s: Error: %s" % (fp.relative_to(self.unpacked_dir), exc))
119
+
120
+ if issues:
121
+ print("FAILED - Found %d deletion validation violations:" % len(issues))
122
+ for i in issues:
123
+ print(i)
124
+ return False
125
+ if self.verbose:
126
+ print("PASSED - No w:t elements found within w:del elements")
127
+ return True
128
+
129
+ # ── Insertion correctness ────────────────────────────────────────────
130
+
131
+ def validate_insertions(self):
132
+ return self._check_insertions()
133
+
134
+ def _check_insertions(self):
135
+ issues: list[str] = []
136
+ ns = {"w": self._NS_WML}
137
+
138
+ for fp in self.xml_files:
139
+ if fp.name != "document.xml":
140
+ continue
141
+ try:
142
+ root = lxml.etree.parse(str(fp)).getroot()
143
+ for el in root.xpath(".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=ns):
144
+ preview = repr(el.text or "")[:50] + "..." if len(repr(el.text or "")) > 50 else repr(el.text or "")
145
+ issues.append(
146
+ " %s: Line %s: <w:delText> within <w:ins>: %s"
147
+ % (fp.relative_to(self.unpacked_dir), el.sourceline, preview))
148
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
149
+ issues.append(" %s: Error: %s" % (fp.relative_to(self.unpacked_dir), exc))
150
+
151
+ if issues:
152
+ print("FAILED - Found %d insertion validation violations:" % len(issues))
153
+ for i in issues:
154
+ print(i)
155
+ return False
156
+ if self.verbose:
157
+ print("PASSED - No w:delText elements within w:ins elements")
158
+ return True
159
+
160
+ # ── Paragraph count comparison ───────────────────────────────────────
161
+
162
+ def count_paragraphs_in_unpacked(self):
163
+ for fp in self.xml_files:
164
+ if fp.name != "document.xml":
165
+ continue
166
+ try:
167
+ root = lxml.etree.parse(str(fp)).getroot()
168
+ return len(root.findall(".//{%s}p" % self._NS_WML))
169
+ except Exception as exc:
170
+ print("Error counting paragraphs in unpacked document: %s" % exc)
171
+ return 0
172
+
173
+ def count_paragraphs_in_original(self):
174
+ if self.original_file is None:
175
+ return 0
176
+ try:
177
+ with tempfile.TemporaryDirectory() as td:
178
+ with zipfile.ZipFile(self.original_file, "r") as zf:
179
+ zf.extractall(td)
180
+ root = lxml.etree.parse(td + "/word/document.xml").getroot()
181
+ return len(root.findall(".//{%s}p" % self._NS_WML))
182
+ except Exception as exc:
183
+ print("Error counting paragraphs in original document: %s" % exc)
184
+ return 0
185
+
186
+ def compare_paragraph_counts(self):
187
+ self._report_paragraph_delta()
188
+
189
+ def _report_paragraph_delta(self):
190
+ before = self.count_paragraphs_in_original()
191
+ after = self.count_paragraphs_in_unpacked()
192
+ delta = after - before
193
+ sign = "+%d" % delta if delta > 0 else str(delta)
194
+ print("\nParagraphs: %d \u2192 %d (%s)" % (before, after, sign))
195
+
196
+ # ── ID bound constraints ─────────────────────────────────────────────
197
+
198
+ def _parse_id_value(self, raw: str, base: int = 16) -> int:
199
+ return int(raw, base)
200
+
201
+ def validate_id_constraints(self):
202
+ return self._check_id_bounds()
203
+
204
+ def _check_id_bounds(self):
205
+ issues: list[str] = []
206
+ pid_attr = "{%s}paraId" % self._NS_W14
207
+ did_attr = "{%s}durableId" % self._NS_CID
208
+
209
+ for fp in self.xml_files:
210
+ try:
211
+ for el in lxml.etree.parse(str(fp)).iter():
212
+ pval = el.get(pid_attr)
213
+ if pval and self._parse_id_value(pval, 16) >= 0x80000000:
214
+ issues.append(
215
+ " %s:%s: paraId=%s >= 0x80000000" % (fp.name, el.sourceline, pval))
216
+
217
+ dval = el.get(did_attr)
218
+ if dval:
219
+ if fp.name == "numbering.xml":
220
+ try:
221
+ if self._parse_id_value(dval, 10) >= 0x7FFFFFFF:
222
+ issues.append(
223
+ " %s:%s: durableId=%s >= 0x7FFFFFFF"
224
+ % (fp.name, el.sourceline, dval))
225
+ except ValueError:
226
+ issues.append(
227
+ " %s:%s: durableId=%s must be decimal in numbering.xml"
228
+ % (fp.name, el.sourceline, dval))
229
+ else:
230
+ if self._parse_id_value(dval, 16) >= 0x7FFFFFFF:
231
+ issues.append(
232
+ " %s:%s: durableId=%s >= 0x7FFFFFFF"
233
+ % (fp.name, el.sourceline, dval))
234
+ except Exception:
235
+ pass
236
+
237
+ if issues:
238
+ print("FAILED - %d ID constraint violations:" % len(issues))
239
+ for i in issues:
240
+ print(i)
241
+ elif self.verbose:
242
+ print("PASSED - All paraId/durableId values within constraints")
243
+ return not bool(issues)
244
+
245
+ # ── Comment marker integrity ─────────────────────────────────────────
246
+
247
+ def validate_comment_markers(self):
248
+ return self._check_comment_markers()
249
+
250
+ def _check_comment_markers(self):
251
+ issues: list[str] = []
252
+ doc_fp = None
253
+ cmt_fp = None
254
+ for fp in self.xml_files:
255
+ if fp.name == "document.xml" and "word" in str(fp):
256
+ doc_fp = fp
257
+ elif fp.name == "comments.xml":
258
+ cmt_fp = fp
259
+
260
+ if doc_fp is None:
261
+ if self.verbose:
262
+ print("PASSED - No document.xml found (skipping comment validation)")
263
+ return True
264
+
265
+ try:
266
+ ns = {"w": self._NS_WML}
267
+ droot = lxml.etree.parse(str(doc_fp)).getroot()
268
+ wid = "{%s}id" % self._NS_WML
269
+
270
+ starts = {el.get(wid) for el in droot.xpath(".//w:commentRangeStart", namespaces=ns)}
271
+ ends = {el.get(wid) for el in droot.xpath(".//w:commentRangeEnd", namespaces=ns)}
272
+ refs = {el.get(wid) for el in droot.xpath(".//w:commentReference", namespaces=ns)}
273
+
274
+ _key = lambda x: int(x) if x and x.isdigit() else 0
275
+
276
+ for cid in sorted(ends - starts, key=_key):
277
+ issues.append(
278
+ ' document.xml: commentRangeEnd id="%s" has no matching commentRangeStart' % cid)
279
+ for cid in sorted(starts - ends, key=_key):
280
+ issues.append(
281
+ ' document.xml: commentRangeStart id="%s" has no matching commentRangeEnd' % cid)
282
+
283
+ if cmt_fp and cmt_fp.exists():
284
+ croot = lxml.etree.parse(str(cmt_fp)).getroot()
285
+ defined = {el.get(wid) for el in croot.xpath(".//w:comment", namespaces=ns)}
286
+ all_markers = starts | ends | refs
287
+ for cid in sorted(all_markers - defined, key=_key):
288
+ if cid:
289
+ issues.append(
290
+ ' document.xml: marker id="%s" references non-existent comment' % cid)
291
+
292
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
293
+ issues.append(" Error parsing XML: %s" % exc)
294
+
295
+ if issues:
296
+ print("FAILED - %d comment marker violations:" % len(issues))
297
+ for i in issues:
298
+ print(i)
299
+ return False
300
+ if self.verbose:
301
+ print("PASSED - All comment markers properly paired")
302
+ return True
303
+
304
+ # ── Repair: durableId overflow ───────────────────────────────────────
305
+
306
+ def repair(self) -> int:
307
+ n = super().repair()
308
+ n += self._fix_durable_ids()
309
+ return n
310
+
311
+ def repair_durableId(self) -> int:
312
+ return self._fix_durable_ids()
313
+
314
+ def _fix_durable_ids(self) -> int:
315
+ n = 0
316
+ for fp in self.xml_files:
317
+ try:
318
+ raw = fp.read_text(encoding="utf-8")
319
+ dom = defusedxml.minidom.parseString(raw)
320
+ changed = False
321
+
322
+ for el in dom.getElementsByTagName("*"):
323
+ if not el.hasAttribute("w16cid:durableId"):
324
+ continue
325
+ old = el.getAttribute("w16cid:durableId")
326
+ bad = False
327
+ if fp.name == "numbering.xml":
328
+ try:
329
+ bad = self._parse_id_value(old, 10) >= 0x7FFFFFFF
330
+ except ValueError:
331
+ bad = True
332
+ else:
333
+ try:
334
+ bad = self._parse_id_value(old, 16) >= 0x7FFFFFFF
335
+ except ValueError:
336
+ bad = True
337
+
338
+ if bad:
339
+ v = random.randint(1, 0x7FFFFFFE)
340
+ replacement = str(v) if fp.name == "numbering.xml" else "{:08X}".format(v)
341
+ el.setAttribute("w16cid:durableId", replacement)
342
+ print(" Repaired: %s: durableId %s \u2192 %s" % (fp.name, old, replacement))
343
+ n += 1
344
+ changed = True
345
+
346
+ if changed:
347
+ fp.write_bytes(dom.toxml(encoding="UTF-8"))
348
+ except Exception:
349
+ pass
350
+ return n
351
+
352
+
353
+ if __name__ == "__main__":
354
+ raise RuntimeError("This module should not be run directly.")
@@ -0,0 +1,230 @@
1
+ """
2
+ PowerPoint-specific XML validation against OOXML schemas.
3
+ """
4
+
5
+ import re
6
+
7
+ from .base import BaseSchemaValidator
8
+
9
+
10
+ class PPTXSchemaValidator(BaseSchemaValidator):
11
+ """Extends the base validator with PPTX-specific structural checks."""
12
+
13
+ _PML_NS = "http://schemas.openxmlformats.org/presentationml/2006/main"
14
+
15
+ PRESENTATIONML_NAMESPACE = _PML_NS
16
+
17
+ ELEMENT_RELATIONSHIP_TYPES = {
18
+ "sldid": "slide",
19
+ "sldmasterid": "slidemaster",
20
+ "notesmasterid": "notesmaster",
21
+ "sldlayoutid": "slidelayout",
22
+ "themeid": "theme",
23
+ "tablestyleid": "tablestyles",
24
+ }
25
+
26
+ # ── Orchestrator ─────────────────────────────────────────────────────
27
+
28
+ def validate(self):
29
+ if not self.validate_xml():
30
+ return False
31
+
32
+ ok = True
33
+ for fn in (
34
+ self.validate_namespaces,
35
+ self.validate_unique_ids,
36
+ self._check_uuid_format,
37
+ self.validate_file_references,
38
+ self._check_layout_ids,
39
+ self.validate_content_types,
40
+ self.validate_against_xsd,
41
+ self._check_notes_refs,
42
+ self.validate_all_relationship_ids,
43
+ self._check_duplicate_layouts,
44
+ ):
45
+ if not fn():
46
+ ok = False
47
+ return ok
48
+
49
+ # ── UUID format ──────────────────────────────────────────────────────
50
+
51
+ def validate_uuid_ids(self):
52
+ return self._check_uuid_format()
53
+
54
+ def _check_uuid_format(self):
55
+ import lxml.etree
56
+
57
+ issues: list[str] = []
58
+ _UUID_RE = re.compile(
59
+ r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$"
60
+ )
61
+
62
+ for fp in self.xml_files:
63
+ try:
64
+ root = lxml.etree.parse(str(fp)).getroot()
65
+ for el in root.iter():
66
+ for attr, val in el.attrib.items():
67
+ aname = attr.split("}")[-1].lower()
68
+ if aname != "id" and not aname.endswith("id"):
69
+ continue
70
+ if self._resembles_uuid(val) and not _UUID_RE.match(val):
71
+ issues.append(
72
+ " %s: Line %s: ID '%s' appears to be a UUID but contains invalid hex characters"
73
+ % (fp.relative_to(self.unpacked_dir), el.sourceline, val))
74
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
75
+ issues.append(" %s: Error: %s" % (fp.relative_to(self.unpacked_dir), exc))
76
+
77
+ if issues:
78
+ print("FAILED - Found %d UUID ID validation errors:" % len(issues))
79
+ for i in issues:
80
+ print(i)
81
+ return False
82
+ if self.verbose:
83
+ print("PASSED - All UUID-like IDs contain valid hex values")
84
+ return True
85
+
86
+ @staticmethod
87
+ def _resembles_uuid(val: str) -> bool:
88
+ stripped = val.strip("{}()").replace("-", "")
89
+ return len(stripped) == 32 and stripped.isalnum()
90
+
91
+ # ── Slide-layout IDs in slide-masters ────────────────────────────────
92
+
93
+ def validate_slide_layout_ids(self):
94
+ return self._check_layout_ids()
95
+
96
+ def _check_layout_ids(self):
97
+ import lxml.etree
98
+
99
+ issues: list[str] = []
100
+ masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml"))
101
+
102
+ if not masters:
103
+ if self.verbose:
104
+ print("PASSED - No slide masters found")
105
+ return True
106
+
107
+ for sm in masters:
108
+ try:
109
+ root = lxml.etree.parse(str(sm)).getroot()
110
+ rf = sm.parent / "_rels" / ("%s.rels" % sm.name)
111
+ if not rf.exists():
112
+ issues.append(
113
+ " %s: Missing relationships file: %s"
114
+ % (sm.relative_to(self.unpacked_dir), rf.relative_to(self.unpacked_dir)))
115
+ continue
116
+
117
+ rroot = lxml.etree.parse(str(rf)).getroot()
118
+ layout_rids = {
119
+ rel.get("Id")
120
+ for rel in rroot.findall("{%s}Relationship" % self.PACKAGE_RELATIONSHIPS_NAMESPACE)
121
+ if "slideLayout" in rel.get("Type", "")
122
+ }
123
+
124
+ for lid_el in root.findall(".//{%s}sldLayoutId" % self._PML_NS):
125
+ rid = lid_el.get("{%s}id" % self.OFFICE_RELATIONSHIPS_NAMESPACE)
126
+ lid = lid_el.get("id")
127
+ if rid and rid not in layout_rids:
128
+ issues.append(
129
+ " %s: Line %s: sldLayoutId with id='%s' "
130
+ "references r:id='%s' which is not found in slide layout relationships"
131
+ % (sm.relative_to(self.unpacked_dir), lid_el.sourceline, lid, rid))
132
+
133
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
134
+ issues.append(" %s: Error: %s" % (sm.relative_to(self.unpacked_dir), exc))
135
+
136
+ if issues:
137
+ print("FAILED - Found %d slide layout ID validation errors:" % len(issues))
138
+ for i in issues:
139
+ print(i)
140
+ print("Remove invalid references or add missing slide layouts to the relationships file.")
141
+ return False
142
+ if self.verbose:
143
+ print("PASSED - All slide layout IDs reference valid slide layouts")
144
+ return True
145
+
146
+ # ── Duplicate slide layouts per slide ────────────────────────────────
147
+
148
+ def validate_no_duplicate_slide_layouts(self):
149
+ return self._check_duplicate_layouts()
150
+
151
+ def _check_duplicate_layouts(self):
152
+ import lxml.etree
153
+
154
+ issues: list[str] = []
155
+ for rf in self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"):
156
+ try:
157
+ root = lxml.etree.parse(str(rf)).getroot()
158
+ layout_count = sum(
159
+ 1 for rel in root.findall("{%s}Relationship" % self.PACKAGE_RELATIONSHIPS_NAMESPACE)
160
+ if "slideLayout" in rel.get("Type", "")
161
+ )
162
+ if layout_count > 1:
163
+ issues.append(
164
+ " %s: has %d slideLayout references"
165
+ % (rf.relative_to(self.unpacked_dir), layout_count))
166
+ except Exception as exc:
167
+ issues.append(" %s: Error: %s" % (rf.relative_to(self.unpacked_dir), exc))
168
+
169
+ if issues:
170
+ print("FAILED - Found slides with duplicate slideLayout references:")
171
+ for i in issues:
172
+ print(i)
173
+ return False
174
+ if self.verbose:
175
+ print("PASSED - All slides have exactly one slideLayout reference")
176
+ return True
177
+
178
+ # ── Notes-slide uniqueness ───────────────────────────────────────────
179
+
180
+ def validate_notes_slide_references(self):
181
+ return self._check_notes_refs()
182
+
183
+ def _check_notes_refs(self):
184
+ import lxml.etree
185
+
186
+ issues: list[str] = []
187
+ notes_map: dict[str, list] = {}
188
+
189
+ rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))
190
+ if not rels_files:
191
+ if self.verbose:
192
+ print("PASSED - No slide relationship files found")
193
+ return True
194
+
195
+ for rf in rels_files:
196
+ try:
197
+ root = lxml.etree.parse(str(rf)).getroot()
198
+ for rel in root.findall("{%s}Relationship" % self.PACKAGE_RELATIONSHIPS_NAMESPACE):
199
+ if "notesSlide" not in rel.get("Type", ""):
200
+ continue
201
+ tgt = rel.get("Target", "")
202
+ if not tgt:
203
+ continue
204
+ normalised = tgt.replace("../", "")
205
+ slide = rf.stem.replace(".xml", "")
206
+ notes_map.setdefault(normalised, []).append((slide, rf))
207
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
208
+ issues.append(" %s: Error: %s" % (rf.relative_to(self.unpacked_dir), exc))
209
+
210
+ for tgt, refs in notes_map.items():
211
+ if len(refs) > 1:
212
+ names = [r[0] for r in refs]
213
+ issues.append(" Notes slide '%s' is referenced by multiple slides: %s" % (tgt, ", ".join(names)))
214
+ for _, rf in refs:
215
+ issues.append(" - %s" % rf.relative_to(self.unpacked_dir))
216
+
217
+ if issues:
218
+ main_count = len([i for i in issues if not i.startswith(" ")])
219
+ print("FAILED - Found %d notes slide reference validation errors:" % main_count)
220
+ for i in issues:
221
+ print(i)
222
+ print("Each slide may optionally have its own slide file.")
223
+ return False
224
+ if self.verbose:
225
+ print("PASSED - All notes slide references are unique")
226
+ return True
227
+
228
+
229
+ if __name__ == "__main__":
230
+ raise RuntimeError("This module should not be run directly.")