@heylemon/lemonade 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/build-info.json +3 -3
  2. package/dist/canvas-host/a2ui/.bundle.hash +1 -1
  3. package/dist/gateway/skills-http.js +74 -19
  4. package/package.json +1 -1
  5. package/skills/docx/SKILL.md +25 -30
  6. package/skills/docx/scripts/accept_changes.py +0 -17
  7. package/skills/docx/scripts/comment.py +10 -39
  8. package/skills/docx/scripts/office/helpers/merge_runs.py +1 -33
  9. package/skills/docx/scripts/office/helpers/simplify_redlines.py +0 -43
  10. package/skills/docx/scripts/office/pack.py +0 -30
  11. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  12. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  13. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  14. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  21. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  22. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  23. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  24. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  25. package/skills/docx/scripts/office/soffice.py +0 -55
  26. package/skills/docx/scripts/office/unpack.py +5 -27
  27. package/skills/docx/scripts/office/validate.py +19 -14
  28. package/skills/docx/scripts/office/validators/base.py +48 -224
  29. package/skills/docx/scripts/office/validators/docx.py +44 -117
  30. package/skills/docx/scripts/office/validators/pptx.py +2 -42
  31. package/skills/docx/scripts/office/validators/redlining.py +3 -40
  32. package/skills/pdf/SKILL.md +22 -15
  33. package/skills/pdf/{FORMS.md → forms.md} +0 -14
  34. package/skills/pdf/scripts/check_bounding_boxes.py +0 -5
  35. package/skills/pdf/scripts/check_fillable_fields.py +0 -1
  36. package/skills/pdf/scripts/convert_pdf_to_images.py +0 -2
  37. package/skills/pdf/scripts/create_validation_image.py +0 -4
  38. package/skills/pdf/scripts/extract_form_field_info.py +1 -31
  39. package/skills/pdf/scripts/extract_form_structure.py +0 -9
  40. package/skills/pdf/scripts/fill_fillable_fields.py +0 -23
  41. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +3 -38
  42. package/skills/pptx/SKILL.md +2 -29
  43. package/skills/pptx/editing.md +2 -2
  44. package/skills/pptx/pptxgenjs.md +53 -8
  45. package/skills/pptx/scripts/add_slide.py +0 -30
  46. package/skills/pptx/scripts/clean.py +0 -23
  47. package/skills/pptx/scripts/office/helpers/merge_runs.py +1 -33
  48. package/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -43
  49. package/skills/pptx/scripts/office/pack.py +0 -30
  50. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  51. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  52. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  53. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  54. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  55. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  56. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  57. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  58. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  59. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  60. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  61. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  62. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  63. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  64. package/skills/pptx/scripts/office/soffice.py +0 -55
  65. package/skills/pptx/scripts/office/unpack.py +5 -27
  66. package/skills/pptx/scripts/office/validate.py +19 -14
  67. package/skills/pptx/scripts/office/validators/base.py +48 -224
  68. package/skills/pptx/scripts/office/validators/docx.py +44 -117
  69. package/skills/pptx/scripts/office/validators/pptx.py +2 -42
  70. package/skills/pptx/scripts/office/validators/redlining.py +3 -40
  71. package/skills/pptx/scripts/thumbnail.py +0 -31
  72. package/skills/xlsx/SKILL.md +3 -26
  73. package/skills/xlsx/scripts/office/helpers/merge_runs.py +1 -33
  74. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -43
  75. package/skills/xlsx/scripts/office/pack.py +0 -30
  76. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  77. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  78. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  79. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  80. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  81. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  82. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  83. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  84. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  85. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  86. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  87. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  88. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  89. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  90. package/skills/xlsx/scripts/office/soffice.py +0 -55
  91. package/skills/xlsx/scripts/office/unpack.py +5 -27
  92. package/skills/xlsx/scripts/office/validate.py +19 -14
  93. package/skills/xlsx/scripts/office/validators/base.py +48 -224
  94. package/skills/xlsx/scripts/office/validators/docx.py +44 -117
  95. package/skills/xlsx/scripts/office/validators/pptx.py +2 -42
  96. package/skills/xlsx/scripts/office/validators/redlining.py +3 -40
  97. package/skills/xlsx/scripts/recalc.py +2 -26
  98. package/skills/docx/scripts/__init__.py +0 -1
  99. package/skills/docx/scripts/office/helpers/__init__.py +0 -0
  100. package/skills/docx/scripts/office/validators/__init__.py +0 -15
  101. package/skills/pptx/scripts/__init__.py +0 -0
  102. package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  103. package/skills/pptx/scripts/office/validators/__init__.py +0 -15
  104. package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  105. package/skills/xlsx/scripts/office/validators/__init__.py +0 -15
  106. /package/skills/pdf/{REFERENCE.md → reference.md} +0 -0
@@ -14,14 +14,6 @@ import defusedxml.minidom
14
14
 
15
15
 
16
16
  def merge_runs(input_dir: str) -> tuple[int, str]:
17
- """Merge adjacent runs in document.xml.
18
-
19
- Args:
20
- input_dir: Path to unpacked DOCX directory
21
-
22
- Returns:
23
- (merge_count, message)
24
- """
25
17
  doc_xml = Path(input_dir) / "word" / "document.xml"
26
18
 
27
19
  if not doc_xml.exists():
@@ -31,14 +23,11 @@ def merge_runs(input_dir: str) -> tuple[int, str]:
31
23
  dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
32
24
  root = dom.documentElement
33
25
 
34
- # Clean up elements that block merging
35
26
  _remove_elements(root, "proofErr")
36
27
  _strip_run_rsid_attrs(root)
37
28
 
38
- # Find all containers that have runs
39
29
  containers = {run.parentNode for run in _find_elements(root, "r")}
40
30
 
41
- # Merge runs in each container
42
31
  merge_count = 0
43
32
  for container in containers:
44
33
  merge_count += _merge_runs_in(container)
@@ -50,11 +39,9 @@ def merge_runs(input_dir: str) -> tuple[int, str]:
50
39
  return 0, f"Error: {e}"
51
40
 
52
41
 
53
- # --- Element helpers ---
54
42
 
55
43
 
56
44
  def _find_elements(root, tag: str) -> list:
57
- """Find all elements matching tag name (with or without namespace)."""
58
45
  results = []
59
46
 
60
47
  def traverse(node):
@@ -70,7 +57,6 @@ def _find_elements(root, tag: str) -> list:
70
57
 
71
58
 
72
59
  def _get_child(parent, tag: str):
73
- """Get first child element matching tag name."""
74
60
  for child in parent.childNodes:
75
61
  if child.nodeType == child.ELEMENT_NODE:
76
62
  name = child.localName or child.tagName
@@ -80,7 +66,6 @@ def _get_child(parent, tag: str):
80
66
 
81
67
 
82
68
  def _get_children(parent, tag: str) -> list:
83
- """Get all direct child elements matching tag name."""
84
69
  results = []
85
70
  for child in parent.childNodes:
86
71
  if child.nodeType == child.ELEMENT_NODE:
@@ -91,7 +76,6 @@ def _get_children(parent, tag: str) -> list:
91
76
 
92
77
 
93
78
  def _is_adjacent(elem1, elem2) -> bool:
94
- """Check if two elements are adjacent (only whitespace between them)."""
95
79
  node = elem1.nextSibling
96
80
  while node:
97
81
  if node == elem2:
@@ -104,34 +88,28 @@ def _is_adjacent(elem1, elem2) -> bool:
104
88
  return False
105
89
 
106
90
 
107
- # --- Cleanup functions ---
108
91
 
109
92
 
110
93
  def _remove_elements(root, tag: str):
111
- """Remove all elements matching tag name."""
112
94
  for elem in _find_elements(root, tag):
113
95
  if elem.parentNode:
114
96
  elem.parentNode.removeChild(elem)
115
97
 
116
98
 
117
99
  def _strip_run_rsid_attrs(root):
118
- """Remove rsid attributes from all run elements."""
119
100
  for run in _find_elements(root, "r"):
120
101
  for attr in list(run.attributes.values()):
121
102
  if "rsid" in attr.name.lower():
122
103
  run.removeAttribute(attr.name)
123
104
 
124
105
 
125
- # --- Merge functions ---
126
106
 
127
107
 
128
108
  def _merge_runs_in(container) -> int:
129
- """Merge adjacent runs with identical formatting in a container element."""
130
109
  merge_count = 0
131
110
  run = _first_child_run(container)
132
111
 
133
112
  while run:
134
- # Absorb adjacent runs with same formatting
135
113
  while True:
136
114
  next_elem = _next_element_sibling(run)
137
115
  if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
@@ -148,7 +126,6 @@ def _merge_runs_in(container) -> int:
148
126
 
149
127
 
150
128
  def _first_child_run(container):
151
- """Get the first run child of a container."""
152
129
  for child in container.childNodes:
153
130
  if child.nodeType == child.ELEMENT_NODE and _is_run(child):
154
131
  return child
@@ -156,7 +133,6 @@ def _first_child_run(container):
156
133
 
157
134
 
158
135
  def _next_element_sibling(node):
159
- """Get the next element sibling, skipping text/whitespace nodes."""
160
136
  sibling = node.nextSibling
161
137
  while sibling:
162
138
  if sibling.nodeType == sibling.ELEMENT_NODE:
@@ -166,25 +142,21 @@ def _next_element_sibling(node):
166
142
 
167
143
 
168
144
  def _next_sibling_run(node):
169
- """Get the next sibling that is a run element."""
170
145
  sibling = node.nextSibling
171
146
  while sibling:
172
147
  if sibling.nodeType == sibling.ELEMENT_NODE:
173
148
  if _is_run(sibling):
174
149
  return sibling
175
- # Skip non-run elements (bookmarks, etc.) but keep looking
176
150
  sibling = sibling.nextSibling
177
151
  return None
178
152
 
179
153
 
180
154
  def _is_run(node) -> bool:
181
- """Check if node is a run element."""
182
155
  name = node.localName or node.tagName
183
156
  return name == "r" or name.endswith(":r")
184
157
 
185
158
 
186
159
  def _can_merge(run1, run2) -> bool:
187
- """Check if two runs have identical formatting."""
188
160
  rpr1 = _get_child(run1, "rPr")
189
161
  rpr2 = _get_child(run2, "rPr")
190
162
 
@@ -192,11 +164,10 @@ def _can_merge(run1, run2) -> bool:
192
164
  return False
193
165
  if rpr1 is None:
194
166
  return True
195
- return rpr1.toxml() == rpr2.toxml() # type: ignore
167
+ return rpr1.toxml() == rpr2.toxml()
196
168
 
197
169
 
198
170
  def _merge_run_content(target, source):
199
- """Move content from source run to target run (excluding rPr)."""
200
171
  for child in list(source.childNodes):
201
172
  if child.nodeType == child.ELEMENT_NODE:
202
173
  name = child.localName or child.tagName
@@ -205,10 +176,8 @@ def _merge_run_content(target, source):
205
176
 
206
177
 
207
178
  def _consolidate_text(run):
208
- """Merge adjacent <w:t> elements within a run."""
209
179
  t_elements = _get_children(run, "t")
210
180
 
211
- # Work backwards to safely remove elements
212
181
  for i in range(len(t_elements) - 1, 0, -1):
213
182
  curr, prev = t_elements[i], t_elements[i - 1]
214
183
 
@@ -222,7 +191,6 @@ def _consolidate_text(run):
222
191
  else:
223
192
  prev.appendChild(run.ownerDocument.createTextNode(merged))
224
193
 
225
- # Preserve whitespace if needed
226
194
  if merged.startswith(" ") or merged.endswith(" "):
227
195
  prev.setAttribute("xml:space", "preserve")
228
196
  elif prev.hasAttribute("xml:space"):
@@ -20,14 +20,6 @@ WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
20
20
 
21
21
 
22
22
  def simplify_redlines(input_dir: str) -> tuple[int, str]:
23
- """Merge adjacent tracked changes from the same author in document.xml.
24
-
25
- Args:
26
- input_dir: Path to unpacked DOCX directory
27
-
28
- Returns:
29
- (merge_count, message)
30
- """
31
23
  doc_xml = Path(input_dir) / "word" / "document.xml"
32
24
 
33
25
  if not doc_xml.exists():
@@ -39,7 +31,6 @@ def simplify_redlines(input_dir: str) -> tuple[int, str]:
39
31
 
40
32
  merge_count = 0
41
33
 
42
- # Find all paragraphs and table cells (containers for content)
43
34
  containers = _find_elements(root, "p") + _find_elements(root, "tc")
44
35
 
45
36
  for container in containers:
@@ -54,10 +45,8 @@ def simplify_redlines(input_dir: str) -> tuple[int, str]:
54
45
 
55
46
 
56
47
  def _merge_tracked_changes_in(container, tag: str) -> int:
57
- """Merge adjacent w:ins or w:del elements from the same author."""
58
48
  merge_count = 0
59
49
 
60
- # Get direct children that are tracked changes of this type
61
50
  tracked = [
62
51
  child
63
52
  for child in container.childNodes
@@ -67,7 +56,6 @@ def _merge_tracked_changes_in(container, tag: str) -> int:
67
56
  if len(tracked) < 2:
68
57
  return 0
69
58
 
70
- # Process from front: merge next into current when possible
71
59
  i = 0
72
60
  while i < len(tracked) - 1:
73
61
  curr = tracked[i]
@@ -78,7 +66,6 @@ def _merge_tracked_changes_in(container, tag: str) -> int:
78
66
  container.removeChild(next_elem)
79
67
  tracked.pop(i + 1)
80
68
  merge_count += 1
81
- # Don't increment i - try to merge more into curr
82
69
  else:
83
70
  i += 1
84
71
 
@@ -86,13 +73,11 @@ def _merge_tracked_changes_in(container, tag: str) -> int:
86
73
 
87
74
 
88
75
  def _is_element(node, tag: str) -> bool:
89
- """Check if node matches the given tag name."""
90
76
  name = node.localName or node.tagName
91
77
  return name == tag or name.endswith(f":{tag}")
92
78
 
93
79
 
94
80
  def _get_author(elem) -> str:
95
- """Get the author attribute from a tracked change element."""
96
81
  author = elem.getAttribute("w:author")
97
82
  if not author:
98
83
  for attr in elem.attributes.values():
@@ -102,12 +87,9 @@ def _get_author(elem) -> str:
102
87
 
103
88
 
104
89
  def _can_merge_tracked(elem1, elem2) -> bool:
105
- """Check if two tracked change elements can be merged."""
106
- # Must be same author
107
90
  if _get_author(elem1) != _get_author(elem2):
108
91
  return False
109
92
 
110
- # Must be truly adjacent (only whitespace between them)
111
93
  node = elem1.nextSibling
112
94
  while node and node != elem2:
113
95
  if node.nodeType == node.ELEMENT_NODE:
@@ -120,7 +102,6 @@ def _can_merge_tracked(elem1, elem2) -> bool:
120
102
 
121
103
 
122
104
  def _merge_tracked_content(target, source):
123
- """Move all children from source tracked change to target."""
124
105
  while source.firstChild:
125
106
  child = source.firstChild
126
107
  source.removeChild(child)
@@ -128,7 +109,6 @@ def _merge_tracked_content(target, source):
128
109
 
129
110
 
130
111
  def _find_elements(root, tag: str) -> list:
131
- """Find all elements matching tag name (with or without namespace)."""
132
112
  results = []
133
113
 
134
114
  def traverse(node):
@@ -144,11 +124,6 @@ def _find_elements(root, tag: str) -> list:
144
124
 
145
125
 
146
126
  def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
147
- """Get authors and their tracked change counts from a document.xml file.
148
-
149
- Returns:
150
- Dict mapping author name to count of tracked changes (w:ins + w:del)
151
- """
152
127
  if not doc_xml_path.exists():
153
128
  return {}
154
129
 
@@ -172,7 +147,6 @@ def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
172
147
 
173
148
 
174
149
  def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
175
- """Get authors and counts from a packed DOCX file."""
176
150
  try:
177
151
  with zipfile.ZipFile(docx_path, "r") as zf:
178
152
  if "word/document.xml" not in zf.namelist():
@@ -196,22 +170,6 @@ def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
196
170
 
197
171
 
198
172
  def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
199
- """Infer the author to validate by finding who added tracked changes.
200
-
201
- Compares tracked change counts between modified and original documents.
202
- Returns the author who added new tracked changes.
203
-
204
- Args:
205
- modified_dir: Path to unpacked DOCX directory
206
- original_docx: Path to original DOCX file
207
- default: Default author if no new changes found
208
-
209
- Returns:
210
- Author name to use for validation
211
-
212
- Raises:
213
- ValueError: If multiple authors added new changes (ambiguous)
214
- """
215
173
  modified_xml = modified_dir / "word" / "document.xml"
216
174
  modified_authors = get_tracked_change_authors(modified_xml)
217
175
 
@@ -220,7 +178,6 @@ def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude
220
178
 
221
179
  original_authors = _get_authors_from_docx(original_docx)
222
180
 
223
- # Calculate new changes per author (modified count - original count)
224
181
  new_changes: dict[str, int] = {}
225
182
  for author, count in modified_authors.items():
226
183
  original_count = original_authors.get(author, 0)
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  """Pack a directory into a DOCX, PPTX, or XLSX file.
3
2
 
4
3
  Validates with auto-repair, condenses XML formatting, and creates the Office file.
@@ -29,18 +28,6 @@ def pack(
29
28
  validate: bool = True,
30
29
  infer_author_func=None,
31
30
  ) -> tuple[None, str]:
32
- """Pack a directory into an Office file (DOCX, PPTX, or XLSX).
33
-
34
- Args:
35
- input_directory: Path to unpacked Office document directory
36
- output_file: Path to output Office file
37
- original_file: Path to original file for validation comparison
38
- validate: If True, run validation with auto-repair before packing
39
- infer_author_func: Optional function to infer author for redlining validation
40
-
41
- Returns:
42
- (None, message) - message indicates success or failure
43
- """
44
31
  input_dir = Path(input_directory)
45
32
  output_path = Path(output_file)
46
33
  suffix = output_path.suffix.lower()
@@ -51,7 +38,6 @@ def pack(
51
38
  if suffix not in {".docx", ".pptx", ".xlsx"}:
52
39
  return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file"
53
40
 
54
- # Validate with auto-repair if requested and original file provided
55
41
  if validate and original_file:
56
42
  original_path = Path(original_file)
57
43
  if original_path.exists():
@@ -63,17 +49,14 @@ def pack(
63
49
  if not success:
64
50
  return None, f"Error: Validation failed for {input_dir}"
65
51
 
66
- # Work in temporary directory to avoid modifying original
67
52
  with tempfile.TemporaryDirectory() as temp_dir:
68
53
  temp_content_dir = Path(temp_dir) / "content"
69
54
  shutil.copytree(input_dir, temp_content_dir)
70
55
 
71
- # Process XML files to remove pretty-printing whitespace
72
56
  for pattern in ["*.xml", "*.rels"]:
73
57
  for xml_file in temp_content_dir.rglob(pattern):
74
58
  _condense_xml(xml_file)
75
59
 
76
- # Create final Office file as zip archive
77
60
  output_path.parent.mkdir(parents=True, exist_ok=True)
78
61
  with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
79
62
  for f in temp_content_dir.rglob("*"):
@@ -89,16 +72,10 @@ def _run_validation(
89
72
  suffix: str,
90
73
  infer_author_func=None,
91
74
  ) -> tuple[bool, str | None]:
92
- """Run validation with auto-repair.
93
-
94
- Returns:
95
- (success, output) - success is True if all validations pass
96
- """
97
75
  output_lines = []
98
76
  validators = []
99
77
 
100
78
  if suffix == ".docx":
101
- # Infer author for redlining validation
102
79
  author = "Claude"
103
80
  if infer_author_func:
104
81
  try:
@@ -112,17 +89,14 @@ def _run_validation(
112
89
  ]
113
90
  elif suffix == ".pptx":
114
91
  validators = [PPTXSchemaValidator(unpacked_dir, original_file)]
115
- # xlsx has no schema validator yet
116
92
 
117
93
  if not validators:
118
94
  return True, None
119
95
 
120
- # Run auto-repair
121
96
  total_repairs = sum(v.repair() for v in validators)
122
97
  if total_repairs:
123
98
  output_lines.append(f"Auto-repaired {total_repairs} issue(s)")
124
99
 
125
- # Run validation
126
100
  success = all(v.validate() for v in validators)
127
101
 
128
102
  if success:
@@ -132,18 +106,14 @@ def _run_validation(
132
106
 
133
107
 
134
108
  def _condense_xml(xml_file: Path) -> None:
135
- """Strip unnecessary whitespace and remove comments from XML."""
136
109
  try:
137
110
  with open(xml_file, encoding="utf-8") as f:
138
111
  dom = defusedxml.minidom.parse(f)
139
112
 
140
- # Process each element to remove whitespace and comments
141
113
  for element in dom.getElementsByTagName("*"):
142
- # Skip text elements (w:t, a:t, etc.) - preserve their content
143
114
  if element.tagName.endswith(":t"):
144
115
  continue
145
116
 
146
- # Remove whitespace-only text nodes and comment nodes
147
117
  for child in list(element.childNodes):
148
118
  if (
149
119
  child.nodeType == child.TEXT_NODE