@panda-agent/panda-cli 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +267 -258
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,190 @@
1
+ """Coalesce adjacent ``<w:r>`` elements that share identical formatting.
2
+
3
+ Operates on paragraphs *and* tracked-change containers (``<w:ins>``,
4
+ ``<w:del>``). Additionally strips RSID attributes from runs and removes
5
+ ``proofErr`` spell/grammar markers that would otherwise prevent merging.
6
+ """
7
+
8
+ import pathlib
9
+
10
+ import defusedxml.minidom
11
+
12
+
13
+ # ── DOM traversal utilities ──────────────────────────────────────────────────
14
+
15
+ def _collect_by_tag(root, tag: str) -> list:
16
+ """Depth-first search for every element whose local name matches *tag*."""
17
+ hits: list = []
18
+ def _walk(nd):
19
+ if nd.nodeType == nd.ELEMENT_NODE:
20
+ lname = nd.localName or nd.tagName
21
+ if lname == tag or lname.endswith(":%s" % tag):
22
+ hits.append(nd)
23
+ for ch in nd.childNodes:
24
+ _walk(ch)
25
+ _walk(root)
26
+ return hits
27
+
28
+
29
+ def _child_by_tag(parent, tag: str):
30
+ """Return the first direct child element matching *tag*, or ``None``."""
31
+ for ch in parent.childNodes:
32
+ if ch.nodeType != ch.ELEMENT_NODE:
33
+ continue
34
+ lname = ch.localName or ch.tagName
35
+ if lname == tag or lname.endswith(":%s" % tag):
36
+ return ch
37
+ return None
38
+
39
+
40
+ def _children_by_tag(parent, tag: str) -> list:
41
+ return [
42
+ ch for ch in parent.childNodes
43
+ if ch.nodeType == ch.ELEMENT_NODE
44
+ and ((ch.localName or ch.tagName) == tag
45
+ or (ch.localName or ch.tagName).endswith(":%s" % tag))
46
+ ]
47
+
48
+
49
+ def _directly_adjacent(a, b) -> bool:
50
+ """True when *a* and *b* are separated only by insignificant whitespace."""
51
+ cur = a.nextSibling
52
+ while cur is not None:
53
+ if cur is b:
54
+ return True
55
+ if cur.nodeType == cur.ELEMENT_NODE:
56
+ return False
57
+ if cur.nodeType == cur.TEXT_NODE and cur.data.strip():
58
+ return False
59
+ cur = cur.nextSibling
60
+ return False
61
+
62
+
63
+ def _tag_matches_run(nd) -> bool:
64
+ lname = nd.localName or nd.tagName
65
+ return lname == "r" or lname.endswith(":r")
66
+
67
+
68
+ # ── Cleanup passes ───────────────────────────────────────────────────────────
69
+
70
+ def _purge_elements(root, tag: str) -> None:
71
+ for el in _collect_by_tag(root, tag):
72
+ if el.parentNode:
73
+ el.parentNode.removeChild(el)
74
+
75
+
76
+ def _erase_rsid_attributes(root) -> None:
77
+ for rn in _collect_by_tag(root, "r"):
78
+ doomed = [a for a in rn.attributes.values() if "rsid" in a.name.lower()]
79
+ for attr in doomed:
80
+ rn.removeAttribute(attr.name)
81
+
82
+
83
+ # ── Core merging logic ───────────────────────────────────────────────────────
84
+
85
+ def _formatting_equal(r1, r2) -> bool:
86
+ rpr_a = _child_by_tag(r1, "rPr")
87
+ rpr_b = _child_by_tag(r2, "rPr")
88
+ if (rpr_a is None) != (rpr_b is None):
89
+ return False
90
+ return True if rpr_a is None else rpr_a.toxml() == rpr_b.toxml()
91
+
92
+
93
+ def _absorb_content(dest, src) -> None:
94
+ """Move non-rPr children of *src* into *dest*."""
95
+ for ch in list(src.childNodes):
96
+ if ch.nodeType != ch.ELEMENT_NODE:
97
+ continue
98
+ lname = ch.localName or ch.tagName
99
+ if lname == "rPr" or lname.endswith(":rPr"):
100
+ continue
101
+ dest.appendChild(ch)
102
+
103
+
104
+ def _next_elem(nd):
105
+ sib = nd.nextSibling
106
+ while sib is not None:
107
+ if sib.nodeType == sib.ELEMENT_NODE:
108
+ return sib
109
+ sib = sib.nextSibling
110
+ return None
111
+
112
+
113
+ def _next_run_sibling(nd):
114
+ sib = nd.nextSibling
115
+ while sib is not None:
116
+ if sib.nodeType == sib.ELEMENT_NODE and _tag_matches_run(sib):
117
+ return sib
118
+ sib = sib.nextSibling
119
+ return None
120
+
121
+
122
+ def _first_run_child(container):
123
+ for ch in container.childNodes:
124
+ if ch.nodeType == ch.ELEMENT_NODE and _tag_matches_run(ch):
125
+ return ch
126
+ return None
127
+
128
+
129
+ def _squash_text_nodes(run) -> None:
130
+ """Combine consecutive ``<w:t>`` (or ``<w:delText>``) nodes inside *run*."""
131
+ t_nodes = _children_by_tag(run, "t")
132
+ idx = len(t_nodes) - 1
133
+ while idx > 0:
134
+ cur, prev = t_nodes[idx], t_nodes[idx - 1]
135
+ if _directly_adjacent(prev, cur):
136
+ txt_prev = prev.firstChild.data if prev.firstChild else ""
137
+ txt_cur = cur.firstChild.data if cur.firstChild else ""
138
+ combined = txt_prev + txt_cur
139
+ if prev.firstChild:
140
+ prev.firstChild.data = combined
141
+ else:
142
+ prev.appendChild(run.ownerDocument.createTextNode(combined))
143
+ if combined[0:1] == " " or combined[-1:] == " ":
144
+ prev.setAttribute("xml:space", "preserve")
145
+ elif prev.hasAttribute("xml:space"):
146
+ prev.removeAttribute("xml:space")
147
+ run.removeChild(cur)
148
+ idx -= 1
149
+
150
+
151
+ def _merge_within(container) -> int:
152
+ """Merge consecutive runs with equal formatting inside *container*."""
153
+ total = 0
154
+ rn = _first_run_child(container)
155
+ while rn is not None:
156
+ while True:
157
+ nxt = _next_elem(rn)
158
+ if nxt is not None and _tag_matches_run(nxt) and _formatting_equal(rn, nxt):
159
+ _absorb_content(rn, nxt)
160
+ container.removeChild(nxt)
161
+ total += 1
162
+ else:
163
+ break
164
+ _squash_text_nodes(rn)
165
+ rn = _next_run_sibling(rn)
166
+ return total
167
+
168
+
169
+ # ── Public entry point ───────────────────────────────────────────────────────
170
+
171
+ def merge_runs(input_dir: str) -> tuple[int, str]:
172
+ """Coalesce adjacent identically-formatted runs in ``document.xml``."""
173
+ doc_path = pathlib.Path(input_dir) / "word" / "document.xml"
174
+ if not doc_path.exists():
175
+ return 0, "Error: %s not found" % doc_path
176
+
177
+ try:
178
+ dom = defusedxml.minidom.parseString(doc_path.read_text(encoding="utf-8"))
179
+ top = dom.documentElement
180
+
181
+ _purge_elements(top, "proofErr")
182
+ _erase_rsid_attributes(top)
183
+
184
+ parents = {rn.parentNode for rn in _collect_by_tag(top, "r")}
185
+ merged = sum(_merge_within(p) for p in parents)
186
+
187
+ doc_path.write_bytes(dom.toxml(encoding="UTF-8"))
188
+ return merged, "Merged %d runs" % merged
189
+ except Exception as exc:
190
+ return 0, "Error: %s" % exc
@@ -0,0 +1,185 @@
1
+ """Collapse consecutive tracked-change wrappers from the same reviewer.
2
+
3
+ Adjacent ``<w:ins>`` blocks by the same author are folded into one element;
4
+ likewise for ``<w:del>``. This dramatically reduces clutter in documents
5
+ with heavy revision history.
6
+
7
+ Constraints:
8
+ * Only same-type merges: ``ins`` with ``ins``, ``del`` with ``del``.
9
+ * Author must match (timestamps are ignored).
10
+ * Elements must be truly adjacent — only insignificant whitespace allowed
11
+ between them.
12
+ """
13
+
14
+ import pathlib
15
+ import xml.etree.ElementTree as ET
16
+ import zipfile
17
+
18
+ import defusedxml.minidom
19
+
20
+ _WML_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
21
+
22
+
23
+ # ── DOM helpers (minidom) ────────────────────────────────────────────────────
24
+
25
+ def _scan_elements(root, tag: str) -> list:
26
+ found: list = []
27
+ def _recurse(nd):
28
+ if nd.nodeType == nd.ELEMENT_NODE:
29
+ lname = nd.localName or nd.tagName
30
+ if lname == tag or lname.endswith(":%s" % tag):
31
+ found.append(nd)
32
+ for ch in nd.childNodes:
33
+ _recurse(ch)
34
+ _recurse(root)
35
+ return found
36
+
37
+
38
+ def _tag_match(nd, tag: str) -> bool:
39
+ lname = nd.localName or nd.tagName
40
+ return lname == tag or lname.endswith(":%s" % tag)
41
+
42
+
43
+ def _extract_author(elem) -> str:
44
+ val = elem.getAttribute("w:author")
45
+ if val:
46
+ return val
47
+ for attr in elem.attributes.values():
48
+ if attr.localName == "author" or attr.name.endswith(":author"):
49
+ return attr.value
50
+ return ""
51
+
52
+
53
+ def _only_whitespace_between(first, second) -> bool:
54
+ cur = first.nextSibling
55
+ while cur is not None and cur is not second:
56
+ if cur.nodeType == cur.ELEMENT_NODE:
57
+ return False
58
+ if cur.nodeType == cur.TEXT_NODE and cur.data.strip():
59
+ return False
60
+ cur = cur.nextSibling
61
+ return True
62
+
63
+
64
+ def _transplant_children(dest, src) -> None:
65
+ while src.firstChild:
66
+ node = src.firstChild
67
+ src.removeChild(node)
68
+ dest.appendChild(node)
69
+
70
+
71
+ def _fold_tracked_in(container, tag: str) -> int:
72
+ candidates = [
73
+ ch for ch in container.childNodes
74
+ if ch.nodeType == ch.ELEMENT_NODE and _tag_match(ch, tag)
75
+ ]
76
+ if len(candidates) < 2:
77
+ return 0
78
+
79
+ count = 0
80
+ pos = 0
81
+ while pos < len(candidates) - 1:
82
+ left, right = candidates[pos], candidates[pos + 1]
83
+ if _extract_author(left) == _extract_author(right) and _only_whitespace_between(left, right):
84
+ _transplant_children(left, right)
85
+ container.removeChild(right)
86
+ candidates.pop(pos + 1)
87
+ count += 1
88
+ else:
89
+ pos += 1
90
+ return count
91
+
92
+
93
+ # ── Public API (minidom-based) ───────────────────────────────────────────────
94
+
95
+ def simplify_redlines(input_dir: str) -> tuple[int, str]:
96
+ """Merge adjacent same-author tracked changes in ``document.xml``."""
97
+ doc_path = pathlib.Path(input_dir) / "word" / "document.xml"
98
+ if not doc_path.exists():
99
+ return 0, "Error: %s not found" % doc_path
100
+
101
+ try:
102
+ dom = defusedxml.minidom.parseString(doc_path.read_text(encoding="utf-8"))
103
+ top = dom.documentElement
104
+
105
+ buckets = _scan_elements(top, "p") + _scan_elements(top, "tc")
106
+ total = 0
107
+ for bkt in buckets:
108
+ total += _fold_tracked_in(bkt, "ins")
109
+ total += _fold_tracked_in(bkt, "del")
110
+
111
+ dom_bytes = dom.toxml(encoding="UTF-8")
112
+ doc_path.write_bytes(dom_bytes)
113
+ return total, "Simplified %d tracked changes" % total
114
+ except Exception as exc:
115
+ return 0, "Error: %s" % exc
116
+
117
+
118
+ # ── ElementTree-based author analysis ────────────────────────────────────────
119
+
120
+ def get_tracked_change_authors(doc_xml_path: pathlib.Path) -> dict[str, int]:
121
+ """Return ``{author: change_count}`` from an unpacked ``document.xml``."""
122
+ if not doc_xml_path.exists():
123
+ return {}
124
+ try:
125
+ tree = ET.parse(doc_xml_path)
126
+ except ET.ParseError:
127
+ return {}
128
+
129
+ ns = {"w": _WML_NS}
130
+ attr_key = "{%s}author" % _WML_NS
131
+ tally: dict[str, int] = {}
132
+ for kind in ("ins", "del"):
133
+ for el in tree.getroot().findall(".//w:%s" % kind, ns):
134
+ who = el.get(attr_key)
135
+ if who:
136
+ tally[who] = tally.get(who, 0) + 1
137
+ return tally
138
+
139
+
140
+ def _authors_inside_docx(docx_path: pathlib.Path) -> dict[str, int]:
141
+ """Read author stats directly from a zipped ``.docx``."""
142
+ try:
143
+ with zipfile.ZipFile(docx_path, "r") as zf:
144
+ if "word/document.xml" not in zf.namelist():
145
+ return {}
146
+ with zf.open("word/document.xml") as fh:
147
+ tree = ET.parse(fh)
148
+
149
+ ns = {"w": _WML_NS}
150
+ attr_key = "{%s}author" % _WML_NS
151
+ tally: dict[str, int] = {}
152
+ for kind in ("ins", "del"):
153
+ for el in tree.getroot().findall(".//w:%s" % kind, ns):
154
+ who = el.get(attr_key)
155
+ if who:
156
+ tally[who] = tally.get(who, 0) + 1
157
+ return tally
158
+ except (zipfile.BadZipFile, ET.ParseError):
159
+ return {}
160
+
161
+
162
+ def infer_author(modified_dir: pathlib.Path, original_docx: pathlib.Path, default: str = "Claude") -> str:
163
+ """Guess which single author introduced new tracked changes."""
164
+ mod_xml = modified_dir / "word" / "document.xml"
165
+ mod_authors = get_tracked_change_authors(mod_xml)
166
+ if not mod_authors:
167
+ return default
168
+
169
+ orig_authors = _authors_inside_docx(original_docx)
170
+
171
+ delta: dict[str, int] = {}
172
+ for who, n in mod_authors.items():
173
+ diff = n - orig_authors.get(who, 0)
174
+ if diff > 0:
175
+ delta[who] = diff
176
+
177
+ if not delta:
178
+ return default
179
+ if len(delta) == 1:
180
+ return next(iter(delta))
181
+
182
+ raise ValueError(
183
+ "Multiple authors added new changes: %s. "
184
+ "Cannot infer which author to validate." % delta
185
+ )
@@ -0,0 +1,167 @@
1
+ """Reassemble an unpacked Office directory into a DOCX / PPTX / XLSX archive.
2
+
3
+ The tool validates with automatic repair, strips cosmetic whitespace from XML,
4
+ and produces the final ZIP-based file.
5
+
6
+ Invocation::
7
+
8
+ python pack.py <src_dir> <dest_file> [--original <file>] [--validate true|false]
9
+
10
+ Samples::
11
+
12
+ python pack.py unpacked/ output.docx --original input.docx
13
+ python pack.py unpacked/ output.pptx --validate false
14
+ """
15
+
16
+ import argparse
17
+ import shutil
18
+ import sys
19
+ import tempfile
20
+ import zipfile
21
+ import pathlib
22
+
23
+ import defusedxml.minidom
24
+
25
+ from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
26
+
27
+
28
+ _ALLOWED_EXTENSIONS = {".docx", ".pptx", ".xlsx"}
29
+
30
+
31
+ def _strip_xml_formatting(fp: pathlib.Path) -> None:
32
+ """Collapse pretty-printed XML back to a compact single-line form."""
33
+ try:
34
+ with open(fp, encoding="utf-8") as fh:
35
+ parsed = defusedxml.minidom.parse(fh)
36
+
37
+ for el in parsed.getElementsByTagName("*"):
38
+ if el.tagName.endswith(":t"):
39
+ continue
40
+ children_to_drop = [
41
+ ch for ch in list(el.childNodes)
42
+ if (ch.nodeType == ch.TEXT_NODE and ch.nodeValue and ch.nodeValue.strip() == "")
43
+ or ch.nodeType == ch.COMMENT_NODE
44
+ ]
45
+ for ch in children_to_drop:
46
+ el.removeChild(ch)
47
+
48
+ fp.write_bytes(parsed.toxml(encoding="UTF-8"))
49
+ except Exception as err:
50
+ print("ERROR: Failed to parse {}: {}".format(fp.name, err), file=sys.stderr)
51
+ raise
52
+
53
+
54
+ def _execute_validators(
55
+ src_dir: pathlib.Path,
56
+ orig: pathlib.Path,
57
+ ext: str,
58
+ author_fn=None,
59
+ ) -> tuple[bool, str | None]:
60
+ """Run the appropriate validator chain and return (ok, log_text)."""
61
+ log_parts: list[str] = []
62
+ checkers: list = []
63
+
64
+ if ext == ".docx":
65
+ writer = "Claude"
66
+ if author_fn:
67
+ try:
68
+ writer = author_fn(src_dir, orig)
69
+ except ValueError as ve:
70
+ print("Warning: {} Using default author 'Claude'.".format(ve), file=sys.stderr)
71
+ checkers = [
72
+ DOCXSchemaValidator(src_dir, orig),
73
+ RedliningValidator(src_dir, orig, author=writer),
74
+ ]
75
+ elif ext == ".pptx":
76
+ checkers = [PPTXSchemaValidator(src_dir, orig)]
77
+
78
+ if not checkers:
79
+ return True, None
80
+
81
+ fixed = sum(c.repair() for c in checkers)
82
+ if fixed:
83
+ log_parts.append("Auto-repaired {} issue(s)".format(fixed))
84
+
85
+ ok = all(c.validate() for c in checkers)
86
+ if ok:
87
+ log_parts.append("All validations PASSED!")
88
+
89
+ return ok, "\n".join(log_parts) if log_parts else None
90
+
91
+
92
+ def pack(
93
+ input_directory: str,
94
+ output_file: str,
95
+ original_file: str | None = None,
96
+ validate: bool = True,
97
+ infer_author_func=None,
98
+ ) -> tuple[None, str]:
99
+ """Pack *input_directory* into *output_file* (Office ZIP archive)."""
100
+ src = pathlib.Path(input_directory)
101
+ dest = pathlib.Path(output_file)
102
+ ext = dest.suffix.lower()
103
+
104
+ if not src.is_dir():
105
+ return None, "Error: {} is not a directory".format(src)
106
+
107
+ if ext not in _ALLOWED_EXTENSIONS:
108
+ return None, "Error: {} must be a .docx, .pptx, or .xlsx file".format(output_file)
109
+
110
+ if validate and original_file:
111
+ orig_p = pathlib.Path(original_file)
112
+ if orig_p.exists():
113
+ ok, report = _execute_validators(src, orig_p, ext, infer_author_func)
114
+ if report:
115
+ print(report)
116
+ if not ok:
117
+ return None, "Error: Validation failed for {}".format(src)
118
+
119
+ with tempfile.TemporaryDirectory() as scratch:
120
+ staging = pathlib.Path(scratch) / "content"
121
+ shutil.copytree(src, staging)
122
+
123
+ xml_globs = ("*.xml", "*.rels")
124
+ for g in xml_globs:
125
+ for xf in staging.rglob(g):
126
+ _strip_xml_formatting(xf)
127
+
128
+ dest.parent.mkdir(parents=True, exist_ok=True)
129
+ with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as zf:
130
+ for item in staging.rglob("*"):
131
+ if item.is_file():
132
+ zf.write(item, item.relative_to(staging))
133
+
134
+ return None, "Successfully packed {} to {}".format(src, output_file)
135
+
136
+
137
+ # ── CLI ──────────────────────────────────────────────────────────────────────
138
+
139
+ if __name__ == "__main__":
140
+ ap = argparse.ArgumentParser(
141
+ description="Pack a directory into a DOCX, PPTX, or XLSX file"
142
+ )
143
+ ap.add_argument("input_directory", help="Unpacked Office document directory")
144
+ ap.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)")
145
+ ap.add_argument(
146
+ "--original",
147
+ help="Original file for validation comparison",
148
+ )
149
+ ap.add_argument(
150
+ "--validate",
151
+ type=lambda v: v.lower() == "true",
152
+ default=True,
153
+ metavar="true|false",
154
+ help="Run validation with auto-repair (default: true)",
155
+ )
156
+ cli = ap.parse_args()
157
+
158
+ _, message = pack(
159
+ cli.input_directory,
160
+ cli.output_file,
161
+ original_file=cli.original,
162
+ validate=cli.validate,
163
+ )
164
+ print(message)
165
+
166
+ if "Error" in message:
167
+ sys.exit(1)