@panda-agent/panda-cli 0.1.29 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +258 -247
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,383 @@
1
+ #!/usr/bin/env python3
2
+ # ──────────────────────────────────────────────────────────────────
3
+ # DOCX-specific schema and structural validator.
4
+ #
5
+ # Extends BaseSchemaValidator with checks for:
6
+ # • whitespace preservation on <w:t>
7
+ # • invalid <w:t> inside <w:del>
8
+ # • invalid <w:delText> inside <w:ins>
9
+ # • paraId / durableId numeric constraints
10
+ # • comment marker pairing
11
+ # • paragraph count comparison
12
+ # ──────────────────────────────────────────────────────────────────
13
+
14
+ import random
15
+ import re
16
+ import tempfile
17
+ import zipfile
18
+
19
+ import defusedxml.minidom
20
+ import lxml.etree
21
+
22
+ from .base import BaseSchemaValidator
23
+
24
+ _PREVIEW_LEN = 50
25
+
26
+
27
+ class DOCXSchemaValidator(BaseSchemaValidator):
28
+ """Validator tailored to Word (.docx) documents."""
29
+
30
+ WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
31
+ W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml"
32
+ W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid"
33
+
34
+ ELEMENT_RELATIONSHIP_TYPES = {}
35
+
36
+ # ── main orchestrator ──
37
+
38
+ def validate(self):
39
+ if not self.validate_xml():
40
+ return False
41
+
42
+ checks = [
43
+ self.validate_namespaces,
44
+ self.validate_unique_ids,
45
+ self.validate_file_references,
46
+ self.validate_content_types,
47
+ self.validate_against_xsd,
48
+ self.validate_whitespace_preservation,
49
+ self.validate_deletions,
50
+ self.validate_insertions,
51
+ self.validate_all_relationship_ids,
52
+ self.validate_id_constraints,
53
+ self.validate_comment_markers,
54
+ ]
55
+ ok = True
56
+ for chk in checks:
57
+ if not chk():
58
+ ok = False
59
+
60
+ self.compare_paragraph_counts()
61
+ return ok
62
+
63
+ # ──────────────────────────────────────────────────────────────
64
+ # Whitespace
65
+ # ──────────────────────────────────────────────────────────────
66
+
67
+ def validate_whitespace_preservation(self):
68
+ errs = []
69
+ wns = self.WORD_2006_NAMESPACE
70
+ xns = self.XML_NAMESPACE
71
+
72
+ for fp in self.xml_files:
73
+ if fp.name != "document.xml":
74
+ continue
75
+ try:
76
+ root = lxml.etree.parse(str(fp)).getroot()
77
+ for t_el in root.iter("{{{}}}t".format(wns)):
78
+ txt = t_el.text
79
+ if not txt:
80
+ continue
81
+ if re.search(r"^[ \t\n\r]", txt) or re.search(r"[ \t\n\r]$", txt):
82
+ space_attr = "{{{}}}space".format(xns)
83
+ if t_el.attrib.get(space_attr) != "preserve":
84
+ preview = repr(txt)[:_PREVIEW_LEN] + "..." if len(repr(txt)) > _PREVIEW_LEN else repr(txt)
85
+ errs.append(
86
+ " {}: Line {}: w:t element with whitespace missing "
87
+ "xml:space='preserve': {}".format(
88
+ fp.relative_to(self.unpacked_dir), t_el.sourceline, preview
89
+ )
90
+ )
91
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
92
+ errs.append(" {}: Error: {}".format(fp.relative_to(self.unpacked_dir), exc))
93
+
94
+ if errs:
95
+ print("FAILED - Found {} whitespace preservation violations:".format(len(errs)))
96
+ for ln in errs:
97
+ print(ln)
98
+ return False
99
+ if self.verbose:
100
+ print("PASSED - All whitespace is properly preserved")
101
+ return True
102
+
103
+ # ──────────────────────────────────────────────────────────────
104
+ # Deletion integrity
105
+ # ──────────────────────────────────────────────────────────────
106
+
107
+ def validate_deletions(self):
108
+ errs = []
109
+ wns = self.WORD_2006_NAMESPACE
110
+ ns_map = {"w": wns}
111
+
112
+ for fp in self.xml_files:
113
+ if fp.name != "document.xml":
114
+ continue
115
+ try:
116
+ root = lxml.etree.parse(str(fp)).getroot()
117
+
118
+ for t in root.xpath(".//w:del//w:t", namespaces=ns_map):
119
+ if t.text:
120
+ preview = repr(t.text)[:_PREVIEW_LEN] + "..." if len(repr(t.text)) > _PREVIEW_LEN else repr(t.text)
121
+ errs.append(
122
+ " {}: Line {}: <w:t> found within <w:del>: {}".format(
123
+ fp.relative_to(self.unpacked_dir), t.sourceline, preview
124
+ )
125
+ )
126
+
127
+ for instr in root.xpath(".//w:del//w:instrText", namespaces=ns_map):
128
+ preview = repr(instr.text or "")[:_PREVIEW_LEN] + "..." if len(repr(instr.text or "")) > _PREVIEW_LEN else repr(instr.text or "")
129
+ errs.append(
130
+ " {}: Line {}: <w:instrText> found within <w:del> "
131
+ "(use <w:delInstrText>): {}".format(
132
+ fp.relative_to(self.unpacked_dir), instr.sourceline, preview
133
+ )
134
+ )
135
+
136
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
137
+ errs.append(" {}: Error: {}".format(fp.relative_to(self.unpacked_dir), exc))
138
+
139
+ if errs:
140
+ print("FAILED - Found {} deletion validation violations:".format(len(errs)))
141
+ for ln in errs:
142
+ print(ln)
143
+ return False
144
+ if self.verbose:
145
+ print("PASSED - No w:t elements found within w:del elements")
146
+ return True
147
+
148
+ # ──────────────────────────────────────────────────────────────
149
+ # Insertion integrity
150
+ # ──────────────────────────────────────────────────────────────
151
+
152
+ def validate_insertions(self):
153
+ errs = []
154
+ ns_map = {"w": self.WORD_2006_NAMESPACE}
155
+
156
+ for fp in self.xml_files:
157
+ if fp.name != "document.xml":
158
+ continue
159
+ try:
160
+ root = lxml.etree.parse(str(fp)).getroot()
161
+ bad = root.xpath(".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=ns_map)
162
+ for nd in bad:
163
+ preview = repr(nd.text or "")[:_PREVIEW_LEN] + "..." if len(repr(nd.text or "")) > _PREVIEW_LEN else repr(nd.text or "")
164
+ errs.append(
165
+ " {}: Line {}: <w:delText> within <w:ins>: {}".format(
166
+ fp.relative_to(self.unpacked_dir), nd.sourceline, preview
167
+ )
168
+ )
169
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
170
+ errs.append(" {}: Error: {}".format(fp.relative_to(self.unpacked_dir), exc))
171
+
172
+ if errs:
173
+ print("FAILED - Found {} insertion validation violations:".format(len(errs)))
174
+ for ln in errs:
175
+ print(ln)
176
+ return False
177
+ if self.verbose:
178
+ print("PASSED - No w:delText elements within w:ins elements")
179
+ return True
180
+
181
+ # ──────────────────────────────────────────────────────────────
182
+ # Paragraph counts
183
+ # ──────────────────────────────────────────────────────────────
184
+
185
+ def count_paragraphs_in_unpacked(self):
186
+ total = 0
187
+ for fp in self.xml_files:
188
+ if fp.name != "document.xml":
189
+ continue
190
+ try:
191
+ root = lxml.etree.parse(str(fp)).getroot()
192
+ total = len(root.findall(".//{{{}}}p".format(self.WORD_2006_NAMESPACE)))
193
+ except Exception as exc:
194
+ print("Error counting paragraphs in unpacked document: {}".format(exc))
195
+ return total
196
+
197
+ def count_paragraphs_in_original(self):
198
+ if self.original_file is None:
199
+ return 0
200
+ n = 0
201
+ try:
202
+ with tempfile.TemporaryDirectory() as td:
203
+ with zipfile.ZipFile(self.original_file, "r") as zf:
204
+ zf.extractall(td)
205
+ root = lxml.etree.parse(td + "/word/document.xml").getroot()
206
+ n = len(root.findall(".//{{{}}}p".format(self.WORD_2006_NAMESPACE)))
207
+ except Exception as exc:
208
+ print("Error counting paragraphs in original document: {}".format(exc))
209
+ return n
210
+
211
+ def compare_paragraph_counts(self):
212
+ orig = self.count_paragraphs_in_original()
213
+ cur = self.count_paragraphs_in_unpacked()
214
+ delta = cur - orig
215
+ sign = "+{}".format(delta) if delta > 0 else str(delta)
216
+ print("\nParagraphs: {} \u2192 {} ({})".format(orig, cur, sign))
217
+
218
+ # ──────────────────────────────────────────────────────────────
219
+ # ID numeric constraints (paraId, durableId)
220
+ # ──────────────────────────────────────────────────────────────
221
+
222
+ def _parse_id_value(self, raw: str, base: int = 16) -> int:
223
+ return int(raw, base)
224
+
225
+ def validate_id_constraints(self):
226
+ errs = []
227
+ pid_attr = "{{{}}}paraId".format(self.W14_NAMESPACE)
228
+ did_attr = "{{{}}}durableId".format(self.W16CID_NAMESPACE)
229
+
230
+ _HEX_CEILING = 0x80000000
231
+ _DUR_CEILING = 0x7FFFFFFF
232
+
233
+ for fp in self.xml_files:
234
+ try:
235
+ for nd in lxml.etree.parse(str(fp)).iter():
236
+ pv = nd.get(pid_attr)
237
+ if pv is not None and self._parse_id_value(pv, 16) >= _HEX_CEILING:
238
+ errs.append(" {}:{}: paraId={} >= 0x80000000".format(
239
+ fp.name, nd.sourceline, pv
240
+ ))
241
+
242
+ dv = nd.get(did_attr)
243
+ if dv is not None:
244
+ if fp.name == "numbering.xml":
245
+ try:
246
+ if self._parse_id_value(dv, 10) >= _DUR_CEILING:
247
+ errs.append(" {}:{}: durableId={} >= 0x7FFFFFFF".format(
248
+ fp.name, nd.sourceline, dv
249
+ ))
250
+ except ValueError:
251
+ errs.append(" {}:{}: durableId={} must be decimal in numbering.xml".format(
252
+ fp.name, nd.sourceline, dv
253
+ ))
254
+ else:
255
+ if self._parse_id_value(dv, 16) >= _DUR_CEILING:
256
+ errs.append(" {}:{}: durableId={} >= 0x7FFFFFFF".format(
257
+ fp.name, nd.sourceline, dv
258
+ ))
259
+ except Exception:
260
+ pass
261
+
262
+ if errs:
263
+ print("FAILED - {} ID constraint violations:".format(len(errs)))
264
+ for ln in errs:
265
+ print(ln)
266
+ elif self.verbose:
267
+ print("PASSED - All paraId/durableId values within constraints")
268
+ return not bool(errs)
269
+
270
+ # ──────────────────────────────────────────────────────────────
271
+ # Comment marker pairing
272
+ # ──────────────────────────────────────────────────────────────
273
+
274
+ def validate_comment_markers(self):
275
+ errs = []
276
+
277
+ doc_xml = None
278
+ cmt_xml = None
279
+ for fp in self.xml_files:
280
+ if fp.name == "document.xml" and "word" in str(fp):
281
+ doc_xml = fp
282
+ elif fp.name == "comments.xml":
283
+ cmt_xml = fp
284
+
285
+ if doc_xml is None:
286
+ if self.verbose:
287
+ print("PASSED - No document.xml found (skipping comment validation)")
288
+ return True
289
+
290
+ try:
291
+ dr = lxml.etree.parse(str(doc_xml)).getroot()
292
+ ns = {"w": self.WORD_2006_NAMESPACE}
293
+ wid = "{{{}}}id".format(self.WORD_2006_NAMESPACE)
294
+
295
+ starts = {el.get(wid) for el in dr.xpath(".//w:commentRangeStart", namespaces=ns)}
296
+ ends = {el.get(wid) for el in dr.xpath(".//w:commentRangeEnd", namespaces=ns)}
297
+ refs = {el.get(wid) for el in dr.xpath(".//w:commentReference", namespaces=ns)}
298
+
299
+ _sort_key = lambda x: int(x) if x and x.isdigit() else 0
300
+
301
+ for cid in sorted(ends - starts, key=_sort_key):
302
+ errs.append(' document.xml: commentRangeEnd id="{}" has no matching commentRangeStart'.format(cid))
303
+
304
+ for cid in sorted(starts - ends, key=_sort_key):
305
+ errs.append(' document.xml: commentRangeStart id="{}" has no matching commentRangeEnd'.format(cid))
306
+
307
+ if cmt_xml and cmt_xml.exists():
308
+ cr = lxml.etree.parse(str(cmt_xml)).getroot()
309
+ defined = {el.get(wid) for el in cr.xpath(".//w:comment", namespaces=ns)}
310
+
311
+ for cid in sorted((starts | ends | refs) - defined, key=_sort_key):
312
+ if cid:
313
+ errs.append(' document.xml: marker id="{}" references non-existent comment'.format(cid))
314
+
315
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
316
+ errs.append(" Error parsing XML: {}".format(exc))
317
+
318
+ if errs:
319
+ print("FAILED - {} comment marker violations:".format(len(errs)))
320
+ for ln in errs:
321
+ print(ln)
322
+ return False
323
+ if self.verbose:
324
+ print("PASSED - All comment markers properly paired")
325
+ return True
326
+
327
+ # ──────────────────────────────────────────────────────────────
328
+ # Repair: durableId overflow
329
+ # ──────────────────────────────────────────────────────────────
330
+
331
+ def repair(self) -> int:
332
+ n = super().repair()
333
+ n += self._fix_durable_ids()
334
+ return n
335
+
336
+ def _fix_durable_ids(self) -> int:
337
+ n_fixed = 0
338
+ _LIMIT = 0x7FFFFFFF
339
+
340
+ for fp in self.xml_files:
341
+ try:
342
+ raw = fp.read_text(encoding="utf-8")
343
+ dom = defusedxml.minidom.parseString(raw)
344
+ changed = False
345
+
346
+ for el in dom.getElementsByTagName("*"):
347
+ if not el.hasAttribute("w16cid:durableId"):
348
+ continue
349
+ old_val = el.getAttribute("w16cid:durableId")
350
+ bad = False
351
+
352
+ if fp.name == "numbering.xml":
353
+ try:
354
+ bad = self._parse_id_value(old_val, 10) >= _LIMIT
355
+ except ValueError:
356
+ bad = True
357
+ else:
358
+ try:
359
+ bad = self._parse_id_value(old_val, 16) >= _LIMIT
360
+ except ValueError:
361
+ bad = True
362
+
363
+ if bad:
364
+ rv = random.randint(1, _LIMIT - 1)
365
+ new_val = str(rv) if fp.name == "numbering.xml" else "{:08X}".format(rv)
366
+ el.setAttribute("w16cid:durableId", new_val)
367
+ print(" Repaired: {}: durableId {} \u2192 {}".format(fp.name, old_val, new_val))
368
+ n_fixed += 1
369
+ changed = True
370
+
371
+ if changed:
372
+ fp.write_bytes(dom.toxml(encoding="UTF-8"))
373
+ except Exception:
374
+ pass
375
+
376
+ return n_fixed
377
+
378
+ # keep legacy name
379
+ repair_durableId = _fix_durable_ids
380
+
381
+
382
+ if __name__ == "__main__":
383
+ raise RuntimeError("This module should not be run directly.")
@@ -0,0 +1,250 @@
1
+ #!/usr/bin/env python3
2
+ # ──────────────────────────────────────────────────────────────────
3
+ # PPTX-specific schema and structural validator.
4
+ #
5
+ # Extends BaseSchemaValidator with checks for:
6
+ # • UUID format validation on ID attributes
7
+ # • Slide-layout ↔ slide-master relationship integrity
8
+ # • Notes-slide reference uniqueness
9
+ # • Duplicate slideLayout relationship detection
10
+ # ──────────────────────────────────────────────────────────────────
11
+
12
+ import re
13
+
14
+ from .base import BaseSchemaValidator
15
+
16
+ _UUID_RE = re.compile(
17
+ r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}"
18
+ r"-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$"
19
+ )
20
+
21
+
22
+ class PPTXSchemaValidator(BaseSchemaValidator):
23
+ """Validator tailored to PowerPoint (.pptx) presentations."""
24
+
25
+ PRESENTATIONML_NAMESPACE = (
26
+ "http://schemas.openxmlformats.org/presentationml/2006/main"
27
+ )
28
+
29
+ ELEMENT_RELATIONSHIP_TYPES = {
30
+ "sldid": "slide",
31
+ "sldmasterid": "slidemaster",
32
+ "notesmasterid": "notesmaster",
33
+ "sldlayoutid": "slidelayout",
34
+ "themeid": "theme",
35
+ "tablestyleid": "tablestyles",
36
+ }
37
+
38
+ # ── orchestrator ──
39
+
40
+ def validate(self):
41
+ if not self.validate_xml():
42
+ return False
43
+
44
+ ok = True
45
+ for chk in (
46
+ self.validate_namespaces,
47
+ self.validate_unique_ids,
48
+ self.validate_uuid_ids,
49
+ self.validate_file_references,
50
+ self.validate_slide_layout_ids,
51
+ self.validate_content_types,
52
+ self.validate_against_xsd,
53
+ self.validate_notes_slide_references,
54
+ self.validate_all_relationship_ids,
55
+ self.validate_no_duplicate_slide_layouts,
56
+ ):
57
+ if not chk():
58
+ ok = False
59
+ return ok
60
+
61
+ # ──────────────────────────────────────────────────────────────
62
+ # UUID-format IDs
63
+ # ──────────────────────────────────────────────────────────────
64
+
65
+ def validate_uuid_ids(self):
66
+ import lxml.etree
67
+
68
+ errs = []
69
+ for fp in self.xml_files:
70
+ try:
71
+ root = lxml.etree.parse(str(fp)).getroot()
72
+ for nd in root.iter():
73
+ for attr_key, attr_val in nd.attrib.items():
74
+ aname = attr_key.split("}")[-1].lower()
75
+ if aname != "id" and not aname.endswith("id"):
76
+ continue
77
+ if self._resembles_uuid(attr_val) and not _UUID_RE.match(attr_val):
78
+ errs.append(
79
+ " {}: Line {}: ID '{}' appears to be a UUID "
80
+ "but contains invalid hex characters".format(
81
+ fp.relative_to(self.unpacked_dir), nd.sourceline, attr_val
82
+ )
83
+ )
84
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
85
+ errs.append(" {}: Error: {}".format(fp.relative_to(self.unpacked_dir), exc))
86
+
87
+ if errs:
88
+ print("FAILED - Found {} UUID ID validation errors:".format(len(errs)))
89
+ for ln in errs:
90
+ print(ln)
91
+ return False
92
+ if self.verbose:
93
+ print("PASSED - All UUID-like IDs contain valid hex values")
94
+ return True
95
+
96
+ @staticmethod
97
+ def _resembles_uuid(val):
98
+ stripped = val.strip("{}()").replace("-", "")
99
+ return len(stripped) == 32 and stripped.isalnum()
100
+
101
+ # ──────────────────────────────────────────────────────────────
102
+ # Slide-layout IDs ↔ relationships
103
+ # ──────────────────────────────────────────────────────────────
104
+
105
+ def validate_slide_layout_ids(self):
106
+ import lxml.etree
107
+
108
+ errs = []
109
+ masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml"))
110
+
111
+ if not masters:
112
+ if self.verbose:
113
+ print("PASSED - No slide masters found")
114
+ return True
115
+
116
+ for sm in masters:
117
+ try:
118
+ root = lxml.etree.parse(str(sm)).getroot()
119
+ rels_path = sm.parent / "_rels" / "{}.rels".format(sm.name)
120
+
121
+ if not rels_path.exists():
122
+ errs.append(" {}: Missing relationships file: {}".format(
123
+ sm.relative_to(self.unpacked_dir),
124
+ rels_path.relative_to(self.unpacked_dir),
125
+ ))
126
+ continue
127
+
128
+ rroot = lxml.etree.parse(str(rels_path)).getroot()
129
+ valid_rids = {
130
+ r.get("Id")
131
+ for r in rroot.findall(
132
+ ".//{{{}}}Relationship".format(self.PACKAGE_RELATIONSHIPS_NAMESPACE)
133
+ )
134
+ if "slideLayout" in r.get("Type", "")
135
+ }
136
+
137
+ pml = self.PRESENTATIONML_NAMESPACE
138
+ for lid in root.findall(".//{{{}}}sldLayoutId".format(pml)):
139
+ rid = lid.get("{{{}}}id".format(self.OFFICE_RELATIONSHIPS_NAMESPACE))
140
+ layout_id = lid.get("id")
141
+ if rid and rid not in valid_rids:
142
+ errs.append(
143
+ " {}: Line {}: sldLayoutId with id='{}' "
144
+ "references r:id='{}' which is not found in slide layout relationships".format(
145
+ sm.relative_to(self.unpacked_dir), lid.sourceline, layout_id, rid
146
+ )
147
+ )
148
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
149
+ errs.append(" {}: Error: {}".format(sm.relative_to(self.unpacked_dir), exc))
150
+
151
+ if errs:
152
+ print("FAILED - Found {} slide layout ID validation errors:".format(len(errs)))
153
+ for ln in errs:
154
+ print(ln)
155
+ print("Remove invalid references or add missing slide layouts to the relationships file.")
156
+ return False
157
+ if self.verbose:
158
+ print("PASSED - All slide layout IDs reference valid slide layouts")
159
+ return True
160
+
161
+ # ──────────────────────────────────────────────────────────────
162
+ # Duplicate slideLayout refs per slide
163
+ # ──────────────────────────────────────────────────────────────
164
+
165
+ def validate_no_duplicate_slide_layouts(self):
166
+ import lxml.etree
167
+
168
+ errs = []
169
+ for rf in self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"):
170
+ try:
171
+ root = lxml.etree.parse(str(rf)).getroot()
172
+ layouts = [
173
+ r for r in root.findall(
174
+ ".//{{{}}}Relationship".format(self.PACKAGE_RELATIONSHIPS_NAMESPACE)
175
+ )
176
+ if "slideLayout" in r.get("Type", "")
177
+ ]
178
+ if len(layouts) > 1:
179
+ errs.append(" {}: has {} slideLayout references".format(
180
+ rf.relative_to(self.unpacked_dir), len(layouts)
181
+ ))
182
+ except Exception as exc:
183
+ errs.append(" {}: Error: {}".format(rf.relative_to(self.unpacked_dir), exc))
184
+
185
+ if errs:
186
+ print("FAILED - Found slides with duplicate slideLayout references:")
187
+ for ln in errs:
188
+ print(ln)
189
+ return False
190
+ if self.verbose:
191
+ print("PASSED - All slides have exactly one slideLayout reference")
192
+ return True
193
+
194
+ # ──────────────────────────────────────────────────────────────
195
+ # Notes-slide reference uniqueness
196
+ # ──────────────────────────────────────────────────────────────
197
+
198
+ def validate_notes_slide_references(self):
199
+ import lxml.etree
200
+
201
+ errs = []
202
+ target_map = {}
203
+
204
+ slide_rels = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))
205
+ if not slide_rels:
206
+ if self.verbose:
207
+ print("PASSED - No slide relationship files found")
208
+ return True
209
+
210
+ for rf in slide_rels:
211
+ try:
212
+ root = lxml.etree.parse(str(rf)).getroot()
213
+ for rel in root.findall(
214
+ ".//{{{}}}Relationship".format(self.PACKAGE_RELATIONSHIPS_NAMESPACE)
215
+ ):
216
+ if "notesSlide" not in rel.get("Type", ""):
217
+ continue
218
+ tgt = rel.get("Target", "")
219
+ if not tgt:
220
+ continue
221
+ norm = tgt.replace("../", "")
222
+ slide = rf.stem.replace(".xml", "")
223
+ target_map.setdefault(norm, []).append((slide, rf))
224
+
225
+ except (lxml.etree.XMLSyntaxError, Exception) as exc:
226
+ errs.append(" {}: Error: {}".format(rf.relative_to(self.unpacked_dir), exc))
227
+
228
+ for tgt, refs in target_map.items():
229
+ if len(refs) > 1:
230
+ names = [r[0] for r in refs]
231
+ errs.append(" Notes slide '{}' is referenced by multiple slides: {}".format(
232
+ tgt, ", ".join(names)
233
+ ))
234
+ for name, rfile in refs:
235
+ errs.append(" - {}".format(rfile.relative_to(self.unpacked_dir)))
236
+
237
+ if errs:
238
+ top_errs = [e for e in errs if not e.startswith(" ")]
239
+ print("FAILED - Found {} notes slide reference validation errors:".format(len(top_errs)))
240
+ for ln in errs:
241
+ print(ln)
242
+ print("Each slide may optionally have its own slide file.")
243
+ return False
244
+ if self.verbose:
245
+ print("PASSED - All notes slide references are unique")
246
+ return True
247
+
248
+
249
+ if __name__ == "__main__":
250
+ raise RuntimeError("This module should not be run directly.")