@panda-agent/panda-cli 0.1.29 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/pandacli.mjs +6 -1
- package/bundled-preset-skills/.gitkeep +0 -0
- package/bundled-preset-skills/README.md +17 -0
- package/bundled-preset-skills/docx/.skill-metadata.yaml +173 -0
- package/bundled-preset-skills/docx/LICENSE.txt +30 -0
- package/bundled-preset-skills/docx/SKILL.md +589 -0
- package/bundled-preset-skills/docx/scripts/__init__.py +1 -0
- package/bundled-preset-skills/docx/scripts/accept_changes.py +206 -0
- package/bundled-preset-skills/docx/scripts/comment.py +442 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/__init__.py +1 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/merge_runs.py +190 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
- package/bundled-preset-skills/docx/scripts/office/pack.py +167 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bundled-preset-skills/docx/scripts/office/soffice.py +194 -0
- package/bundled-preset-skills/docx/scripts/office/unpack.py +145 -0
- package/bundled-preset-skills/docx/scripts/office/validate.py +114 -0
- package/bundled-preset-skills/docx/scripts/office/validators/__init__.py +16 -0
- package/bundled-preset-skills/docx/scripts/office/validators/base.py +733 -0
- package/bundled-preset-skills/docx/scripts/office/validators/docx.py +354 -0
- package/bundled-preset-skills/docx/scripts/office/validators/pptx.py +230 -0
- package/bundled-preset-skills/docx/scripts/office/validators/redlining.py +212 -0
- package/bundled-preset-skills/docx/scripts/templates/comments.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/people.xml +3 -0
- package/bundled-preset-skills/frontend-design/LICENSE.txt +177 -0
- package/bundled-preset-skills/frontend-design/SKILL.md +42 -0
- package/bundled-preset-skills/pdf/.skill-metadata.yaml +273 -0
- package/bundled-preset-skills/pdf/LICENSE.txt +30 -0
- package/bundled-preset-skills/pdf/SKILL.md +324 -0
- package/bundled-preset-skills/pdf/advanced-reference.md +609 -0
- package/bundled-preset-skills/pdf/form-filling-guide.md +318 -0
- package/bundled-preset-skills/pdf/forms.md +294 -0
- package/bundled-preset-skills/pdf/reference.md +612 -0
- package/bundled-preset-skills/pdf/scripts/check_bounding_boxes.py +198 -0
- package/bundled-preset-skills/pdf/scripts/check_fillable_fields.py +64 -0
- package/bundled-preset-skills/pdf/scripts/convert_pdf_to_images.py +102 -0
- package/bundled-preset-skills/pdf/scripts/create_validation_image.py +125 -0
- package/bundled-preset-skills/pdf/scripts/extract_form_field_info.py +220 -0
- package/bundled-preset-skills/pdf/scripts/extract_form_structure.py +202 -0
- package/bundled-preset-skills/pdf/scripts/fill_fillable_fields.py +205 -0
- package/bundled-preset-skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
- package/bundled-preset-skills/pptx-generator/SKILL.md +204 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/business.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/minimal.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/modern.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
- package/bundled-preset-skills/pptx-generator/references/collaboration_guide.md +381 -0
- package/bundled-preset-skills/pptx-generator/references/json_format_spec.md +215 -0
- package/bundled-preset-skills/pptx-generator/references/layout_guide.md +290 -0
- package/bundled-preset-skills/pptx-generator/scripts/json_validator.py +194 -0
- package/bundled-preset-skills/pptx-generator/scripts/pptx_builder.py +340 -0
- package/bundled-preset-skills/pptx-generator/scripts/pptx_validator.py +162 -0
- package/bundled-preset-skills/skill-creator/LICENSE.txt +202 -0
- package/bundled-preset-skills/skill-creator/SKILL.md +479 -0
- package/bundled-preset-skills/skill-creator/agents/analyzer.md +274 -0
- package/bundled-preset-skills/skill-creator/agents/comparator.md +202 -0
- package/bundled-preset-skills/skill-creator/agents/grader.md +223 -0
- package/bundled-preset-skills/skill-creator/assets/eval_review.html +146 -0
- package/bundled-preset-skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/bundled-preset-skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/bundled-preset-skills/skill-creator/references/schemas.md +430 -0
- package/bundled-preset-skills/skill-creator/scripts/__init__.py +0 -0
- package/bundled-preset-skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/bundled-preset-skills/skill-creator/scripts/generate_report.py +326 -0
- package/bundled-preset-skills/skill-creator/scripts/improve_description.py +248 -0
- package/bundled-preset-skills/skill-creator/scripts/package_skill.py +136 -0
- package/bundled-preset-skills/skill-creator/scripts/quick_validate.py +103 -0
- package/bundled-preset-skills/skill-creator/scripts/run_eval.py +310 -0
- package/bundled-preset-skills/skill-creator/scripts/run_loop.py +332 -0
- package/bundled-preset-skills/skill-creator/scripts/utils.py +47 -0
- package/bundled-preset-skills/xlsx/.skill-metadata.yaml +185 -0
- package/bundled-preset-skills/xlsx/LICENSE.txt +30 -0
- package/bundled-preset-skills/xlsx/SKILL.md +233 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/__init__.py +1 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
- package/bundled-preset-skills/xlsx/scripts/office/pack.py +162 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bundled-preset-skills/xlsx/scripts/office/soffice.py +185 -0
- package/bundled-preset-skills/xlsx/scripts/office/unpack.py +146 -0
- package/bundled-preset-skills/xlsx/scripts/office/validate.py +108 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/__init__.py +13 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/base.py +800 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/docx.py +383 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/pptx.py +250 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/redlining.py +229 -0
- package/bundled-preset-skills/xlsx/scripts/recalc.py +296 -0
- package/dist/panda-cli-ink.bundle.mjs +276 -342
- package/package.json +6 -4
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ──────────────────────────────────────────────────────────────────
|
|
3
|
+
# Tracked-change (redlining) consistency validator for DOCX.
|
|
4
|
+
#
|
|
5
|
+
# Verifies that the textual content of the modified document matches
|
|
6
|
+
# the original *after* stripping out all tracked changes attributed
|
|
7
|
+
# to the specified author. If there's a mismatch, a word-level diff
|
|
8
|
+
# is produced via `git diff --word-diff`.
|
|
9
|
+
# ──────────────────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
import subprocess
|
|
12
|
+
import tempfile
|
|
13
|
+
import zipfile
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
_W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RedliningValidator:
|
|
20
|
+
"""Ensure that an author's changes are fully tracked in the DOCX XML."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"):
|
|
23
|
+
self.work_dir = Path(unpacked_dir)
|
|
24
|
+
self.ref_docx = Path(original_docx)
|
|
25
|
+
self.verbose = verbose
|
|
26
|
+
self.author = author
|
|
27
|
+
self._ns = {"w": _W_NS}
|
|
28
|
+
|
|
29
|
+
# kept for interface compat
|
|
30
|
+
@property
|
|
31
|
+
def unpacked_dir(self):
|
|
32
|
+
return self.work_dir
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def original_docx(self):
|
|
36
|
+
return self.ref_docx
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def namespaces(self):
|
|
40
|
+
return self._ns
|
|
41
|
+
|
|
42
|
+
def repair(self) -> int:
|
|
43
|
+
return 0
|
|
44
|
+
|
|
45
|
+
# ──────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
def validate(self):
|
|
48
|
+
mod_xml = self.work_dir / "word" / "document.xml"
|
|
49
|
+
if not mod_xml.exists():
|
|
50
|
+
print("FAILED - Modified document.xml not found at {}".format(mod_xml))
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
# Quick check: any tracked changes by this author?
|
|
54
|
+
try:
|
|
55
|
+
import xml.etree.ElementTree as ET
|
|
56
|
+
|
|
57
|
+
tree = ET.parse(mod_xml)
|
|
58
|
+
root = tree.getroot()
|
|
59
|
+
|
|
60
|
+
w_author = "{{{}}}author".format(_W_NS)
|
|
61
|
+
del_by_author = [
|
|
62
|
+
e for e in root.findall(".//w:del", self._ns)
|
|
63
|
+
if e.get(w_author) == self.author
|
|
64
|
+
]
|
|
65
|
+
ins_by_author = [
|
|
66
|
+
e for e in root.findall(".//w:ins", self._ns)
|
|
67
|
+
if e.get(w_author) == self.author
|
|
68
|
+
]
|
|
69
|
+
if not del_by_author and not ins_by_author:
|
|
70
|
+
if self.verbose:
|
|
71
|
+
print("PASSED - No tracked changes by {} found.".format(self.author))
|
|
72
|
+
return True
|
|
73
|
+
except Exception:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
# Full comparison
|
|
77
|
+
with tempfile.TemporaryDirectory() as td:
|
|
78
|
+
tmp = Path(td)
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
with zipfile.ZipFile(self.ref_docx, "r") as zf:
|
|
82
|
+
zf.extractall(tmp)
|
|
83
|
+
except Exception as exc:
|
|
84
|
+
print("FAILED - Error unpacking original docx: {}".format(exc))
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
orig_xml = tmp / "word" / "document.xml"
|
|
88
|
+
if not orig_xml.exists():
|
|
89
|
+
print("FAILED - Original document.xml not found in {}".format(self.ref_docx))
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
import xml.etree.ElementTree as ET
|
|
94
|
+
|
|
95
|
+
mod_root = ET.parse(mod_xml).getroot()
|
|
96
|
+
orig_root = ET.parse(orig_xml).getroot()
|
|
97
|
+
except ET.ParseError as exc:
|
|
98
|
+
print("FAILED - Error parsing XML files: {}".format(exc))
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
self._strip_author_changes(orig_root)
|
|
102
|
+
self._strip_author_changes(mod_root)
|
|
103
|
+
|
|
104
|
+
txt_mod = self._body_text(mod_root)
|
|
105
|
+
txt_orig = self._body_text(orig_root)
|
|
106
|
+
|
|
107
|
+
if txt_mod != txt_orig:
|
|
108
|
+
print(self._build_diff_report(txt_orig, txt_mod))
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
if self.verbose:
|
|
112
|
+
print("PASSED - All changes by {} are properly tracked".format(self.author))
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
# ──────────────────────────────────────────────────────────────
|
|
116
|
+
# Diff report generation
|
|
117
|
+
# ──────────────────────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
def _build_diff_report(self, old_text, new_text):
|
|
120
|
+
parts = [
|
|
121
|
+
"FAILED - Document text doesn't match after removing {}'s tracked changes".format(self.author),
|
|
122
|
+
"",
|
|
123
|
+
"Likely causes:",
|
|
124
|
+
" 1. Modified text inside another author's <w:ins> or <w:del> tags",
|
|
125
|
+
" 2. Made edits without proper tracked changes",
|
|
126
|
+
" 3. Didn't nest <w:del> inside <w:ins> when deleting another's insertion",
|
|
127
|
+
"",
|
|
128
|
+
"For pre-redlined documents, use correct patterns:",
|
|
129
|
+
" - To reject another's INSERTION: Nest <w:del> inside their <w:ins>",
|
|
130
|
+
" - To restore another's DELETION: Add new <w:ins> AFTER their <w:del>",
|
|
131
|
+
"",
|
|
132
|
+
]
|
|
133
|
+
diff = self._word_diff(old_text, new_text)
|
|
134
|
+
if diff:
|
|
135
|
+
parts += ["Differences:", "============", diff]
|
|
136
|
+
else:
|
|
137
|
+
parts.append("Unable to generate word diff (git not available)")
|
|
138
|
+
return "\n".join(parts)
|
|
139
|
+
|
|
140
|
+
def _word_diff(self, a, b):
|
|
141
|
+
"""Produce a character-level word diff using git."""
|
|
142
|
+
try:
|
|
143
|
+
with tempfile.TemporaryDirectory() as td:
|
|
144
|
+
p = Path(td)
|
|
145
|
+
fa, fb = p / "original.txt", p / "modified.txt"
|
|
146
|
+
fa.write_text(a, encoding="utf-8")
|
|
147
|
+
fb.write_text(b, encoding="utf-8")
|
|
148
|
+
|
|
149
|
+
for extra_args in (
|
|
150
|
+
["--word-diff-regex=."],
|
|
151
|
+
[],
|
|
152
|
+
):
|
|
153
|
+
proc = subprocess.run(
|
|
154
|
+
[
|
|
155
|
+
"git", "diff", "--word-diff=plain", "-U0",
|
|
156
|
+
"--no-index", str(fa), str(fb),
|
|
157
|
+
] + extra_args,
|
|
158
|
+
capture_output=True, text=True,
|
|
159
|
+
)
|
|
160
|
+
if not proc.stdout.strip():
|
|
161
|
+
continue
|
|
162
|
+
content = []
|
|
163
|
+
active = False
|
|
164
|
+
for line in proc.stdout.split("\n"):
|
|
165
|
+
if line.startswith("@@"):
|
|
166
|
+
active = True
|
|
167
|
+
continue
|
|
168
|
+
if active and line.strip():
|
|
169
|
+
content.append(line)
|
|
170
|
+
if content:
|
|
171
|
+
return "\n".join(content)
|
|
172
|
+
|
|
173
|
+
except (subprocess.CalledProcessError, FileNotFoundError, Exception):
|
|
174
|
+
pass
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
# ──────────────────────────────────────────────────────────────
|
|
178
|
+
# XML manipulation
|
|
179
|
+
# ──────────────────────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
def _strip_author_changes(self, root):
|
|
182
|
+
"""Remove this author's tracked insertions; inline their deletions."""
|
|
183
|
+
ins_tag = "{{{}}}ins".format(_W_NS)
|
|
184
|
+
del_tag = "{{{}}}del".format(_W_NS)
|
|
185
|
+
auth_key = "{{{}}}author".format(_W_NS)
|
|
186
|
+
|
|
187
|
+
# Pass 1: remove <w:ins> by this author entirely
|
|
188
|
+
for parent in root.iter():
|
|
189
|
+
doomed = [
|
|
190
|
+
ch for ch in parent
|
|
191
|
+
if ch.tag == ins_tag and ch.get(auth_key) == self.author
|
|
192
|
+
]
|
|
193
|
+
for el in doomed:
|
|
194
|
+
parent.remove(el)
|
|
195
|
+
|
|
196
|
+
# Pass 2: inline <w:del> by this author (convert delText → t)
|
|
197
|
+
deltext_tag = "{{{}}}delText".format(_W_NS)
|
|
198
|
+
t_tag = "{{{}}}t".format(_W_NS)
|
|
199
|
+
|
|
200
|
+
for parent in root.iter():
|
|
201
|
+
targets = [
|
|
202
|
+
(ch, list(parent).index(ch))
|
|
203
|
+
for ch in parent
|
|
204
|
+
if ch.tag == del_tag and ch.get(auth_key) == self.author
|
|
205
|
+
]
|
|
206
|
+
for del_el, idx in reversed(targets):
|
|
207
|
+
for nd in del_el.iter():
|
|
208
|
+
if nd.tag == deltext_tag:
|
|
209
|
+
nd.tag = t_tag
|
|
210
|
+
for kid in reversed(list(del_el)):
|
|
211
|
+
parent.insert(idx, kid)
|
|
212
|
+
parent.remove(del_el)
|
|
213
|
+
|
|
214
|
+
def _body_text(self, root):
|
|
215
|
+
"""Extract the visible paragraph text from the document body."""
|
|
216
|
+
p_tag = "{{{}}}p".format(_W_NS)
|
|
217
|
+
t_tag = "{{{}}}t".format(_W_NS)
|
|
218
|
+
|
|
219
|
+
paragraphs = []
|
|
220
|
+
for p in root.findall(".//{}".format(p_tag)):
|
|
221
|
+
pieces = [t.text for t in p.findall(".//{}".format(t_tag)) if t.text]
|
|
222
|
+
joined = "".join(pieces)
|
|
223
|
+
if joined:
|
|
224
|
+
paragraphs.append(joined)
|
|
225
|
+
return "\n".join(paragraphs)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
raise RuntimeError("This module should not be run directly.")
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Spreadsheet formula recalculation via LibreOffice.
|
|
3
|
+
|
|
4
|
+
Deploys a LibreOffice Basic macro, invokes headless recalculation on an Excel
|
|
5
|
+
workbook, then scans all cells for error markers and emits a structured JSON
|
|
6
|
+
report to stdout.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python recalc.py <excel_file> [timeout_seconds]
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import platform
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
from dataclasses import asdict, dataclass, field
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
import openpyxl
|
|
23
|
+
|
|
24
|
+
from office.soffice import get_soffice_env
|
|
25
|
+
|
|
26
|
+
# ─── Platform Constants ──────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
_SYSTEM = platform.system()
|
|
29
|
+
|
|
30
|
+
_MACRO_BASE_DIRS: dict[str, str] = {
|
|
31
|
+
"Darwin": "~/Library/Application Support/LibreOffice/4/user/basic/Standard",
|
|
32
|
+
"Linux": "~/.config/libreoffice/4/user/basic/Standard",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_MACRO_MODULE_NAME = "Module1.xba"
|
|
36
|
+
|
|
37
|
+
_MACRO_ENTRY_POINT = (
|
|
38
|
+
"vnd.sun.star.script:Standard.Module1.RecalculateAndSave"
|
|
39
|
+
"?language=Basic&location=application"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
_MACRO_XML = (
|
|
43
|
+
'<?xml version="1.0" encoding="UTF-8"?>\n'
|
|
44
|
+
'<!DOCTYPE script:module PUBLIC '
|
|
45
|
+
'"-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">\n'
|
|
46
|
+
'<script:module xmlns:script="http://openoffice.org/2000/script" '
|
|
47
|
+
'script:name="Module1" script:language="StarBasic">\n'
|
|
48
|
+
" Sub RecalculateAndSave()\n"
|
|
49
|
+
" ThisComponent.calculateAll()\n"
|
|
50
|
+
" ThisComponent.store()\n"
|
|
51
|
+
" ThisComponent.close(True)\n"
|
|
52
|
+
" End Sub\n"
|
|
53
|
+
"</script:module>"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# ─── Excel Error Markers ─────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
CELL_ERROR_MARKERS: tuple[str, ...] = (
|
|
59
|
+
"#VALUE!", "#DIV/0!", "#REF!", "#NAME?", "#NULL!", "#NUM!", "#N/A",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
_MAX_ERROR_LOCATIONS = 20
|
|
63
|
+
_DEFAULT_TIMEOUT_SECONDS = 30
|
|
64
|
+
_TIMEOUT_EXIT_CODE = 124
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ─── Data Structures ─────────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class ErrorBucket:
|
|
71
|
+
"""Aggregated info for one category of cell errors."""
|
|
72
|
+
count: int
|
|
73
|
+
locations: list[str]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class RecalcReport:
|
|
78
|
+
"""Structured result of a recalculation + error-scan pass."""
|
|
79
|
+
status: str = "success"
|
|
80
|
+
total_errors: int = 0
|
|
81
|
+
total_formulas: int = 0
|
|
82
|
+
error_summary: dict[str, ErrorBucket] = field(default_factory=dict)
|
|
83
|
+
error: Optional[str] = None
|
|
84
|
+
|
|
85
|
+
def to_dict(self) -> dict:
|
|
86
|
+
"""Serialise to a plain dict matching the original JSON contract."""
|
|
87
|
+
result: dict = {}
|
|
88
|
+
if self.error is not None:
|
|
89
|
+
result["error"] = self.error
|
|
90
|
+
return result
|
|
91
|
+
result["status"] = self.status
|
|
92
|
+
result["total_errors"] = self.total_errors
|
|
93
|
+
result["error_summary"] = {
|
|
94
|
+
kind: asdict(bucket) for kind, bucket in self.error_summary.items()
|
|
95
|
+
}
|
|
96
|
+
result["total_formulas"] = self.total_formulas
|
|
97
|
+
return result
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ─── Timeout Utility ─────────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
def _has_gtimeout() -> bool:
|
|
103
|
+
"""Check whether GNU coreutils ``gtimeout`` binary exists on this host."""
|
|
104
|
+
try:
|
|
105
|
+
subprocess.run(
|
|
106
|
+
["gtimeout", "--version"],
|
|
107
|
+
capture_output=True,
|
|
108
|
+
timeout=1,
|
|
109
|
+
check=False,
|
|
110
|
+
)
|
|
111
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
112
|
+
return False
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ─── Macro Deployment ────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
def _deploy_macro() -> bool:
|
|
119
|
+
"""Ensure the RecalculateAndSave Basic macro is present in the LO user dir.
|
|
120
|
+
|
|
121
|
+
Returns ``True`` on success, ``False`` if LibreOffice is missing or the
|
|
122
|
+
macro file could not be written.
|
|
123
|
+
"""
|
|
124
|
+
base_template = _MACRO_BASE_DIRS.get(_SYSTEM, _MACRO_BASE_DIRS["Linux"])
|
|
125
|
+
base_dir = Path(base_template).expanduser()
|
|
126
|
+
target = base_dir / _MACRO_MODULE_NAME
|
|
127
|
+
|
|
128
|
+
# Already deployed?
|
|
129
|
+
if target.exists() and "RecalculateAndSave" in target.read_text():
|
|
130
|
+
return True
|
|
131
|
+
|
|
132
|
+
# Bootstrap LibreOffice profile when the directory tree is missing.
|
|
133
|
+
if not base_dir.exists():
|
|
134
|
+
try:
|
|
135
|
+
subprocess.run(
|
|
136
|
+
["soffice", "--headless", "--terminate_after_init"],
|
|
137
|
+
capture_output=True,
|
|
138
|
+
timeout=10,
|
|
139
|
+
env=get_soffice_env(),
|
|
140
|
+
)
|
|
141
|
+
except FileNotFoundError:
|
|
142
|
+
return False
|
|
143
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
target.write_text(_MACRO_XML)
|
|
147
|
+
except OSError:
|
|
148
|
+
return False
|
|
149
|
+
return True
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ─── Workbook Inspection ─────────────────────────────────────────────
|
|
153
|
+
|
|
154
|
+
def _count_formulas(filepath: Path) -> int:
|
|
155
|
+
"""Open *filepath* without cached values and count formula cells."""
|
|
156
|
+
wb = openpyxl.load_workbook(str(filepath), data_only=False)
|
|
157
|
+
total = 0
|
|
158
|
+
for sheet_name in wb.sheetnames:
|
|
159
|
+
ws = wb[sheet_name]
|
|
160
|
+
for row in ws.iter_rows():
|
|
161
|
+
for cell in row:
|
|
162
|
+
if isinstance(cell.value, str) and cell.value.startswith("="):
|
|
163
|
+
total += 1
|
|
164
|
+
wb.close()
|
|
165
|
+
return total
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _scan_errors(filepath: Path) -> tuple[int, dict[str, ErrorBucket]]:
|
|
169
|
+
"""Scan cached cell values for Excel error strings.
|
|
170
|
+
|
|
171
|
+
Returns a ``(total_error_count, buckets)`` tuple where *buckets* maps each
|
|
172
|
+
error marker that has at least one occurrence to an :class:`ErrorBucket`.
|
|
173
|
+
"""
|
|
174
|
+
wb = openpyxl.load_workbook(str(filepath), data_only=True)
|
|
175
|
+
raw_buckets: dict[str, list[str]] = {m: [] for m in CELL_ERROR_MARKERS}
|
|
176
|
+
total = 0
|
|
177
|
+
|
|
178
|
+
for sheet_name in wb.sheetnames:
|
|
179
|
+
ws = wb[sheet_name]
|
|
180
|
+
for row in ws.iter_rows():
|
|
181
|
+
for cell in row:
|
|
182
|
+
if cell.value is None or not isinstance(cell.value, str):
|
|
183
|
+
continue
|
|
184
|
+
for marker in CELL_ERROR_MARKERS:
|
|
185
|
+
if marker in cell.value:
|
|
186
|
+
raw_buckets[marker].append(
|
|
187
|
+
f"{sheet_name}!{cell.coordinate}"
|
|
188
|
+
)
|
|
189
|
+
total += 1
|
|
190
|
+
break
|
|
191
|
+
|
|
192
|
+
wb.close()
|
|
193
|
+
|
|
194
|
+
buckets: dict[str, ErrorBucket] = {}
|
|
195
|
+
for kind, locs in raw_buckets.items():
|
|
196
|
+
if locs:
|
|
197
|
+
buckets[kind] = ErrorBucket(
|
|
198
|
+
count=len(locs),
|
|
199
|
+
locations=locs[:_MAX_ERROR_LOCATIONS],
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
return total, buckets
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# ─── Core Recalculation ──────────────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
def _build_command(filepath: str, timeout: int) -> list[str]:
|
|
208
|
+
"""Assemble the ``soffice`` invocation with optional timeout wrapper."""
|
|
209
|
+
argv = [
|
|
210
|
+
"soffice", "--headless", "--norestore",
|
|
211
|
+
_MACRO_ENTRY_POINT,
|
|
212
|
+
filepath,
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
if _SYSTEM == "Linux":
|
|
216
|
+
argv = ["timeout", str(timeout)] + argv
|
|
217
|
+
elif _SYSTEM == "Darwin" and _has_gtimeout():
|
|
218
|
+
argv = ["gtimeout", str(timeout)] + argv
|
|
219
|
+
|
|
220
|
+
return argv
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def recalc(filename: str, timeout: int = _DEFAULT_TIMEOUT_SECONDS) -> dict:
|
|
224
|
+
"""Run the LO macro on *filename* then inspect the result for errors.
|
|
225
|
+
|
|
226
|
+
Returns a plain ``dict`` suitable for JSON serialisation.
|
|
227
|
+
"""
|
|
228
|
+
source = Path(filename)
|
|
229
|
+
|
|
230
|
+
if not source.exists():
|
|
231
|
+
return RecalcReport(error=f"File {filename} does not exist").to_dict()
|
|
232
|
+
|
|
233
|
+
absolute_path = str(source.absolute())
|
|
234
|
+
|
|
235
|
+
if not _deploy_macro():
|
|
236
|
+
return RecalcReport(error="Failed to setup LibreOffice macro").to_dict()
|
|
237
|
+
|
|
238
|
+
argv = _build_command(absolute_path, timeout)
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
proc = subprocess.run(
|
|
242
|
+
argv, capture_output=True, text=True, env=get_soffice_env(),
|
|
243
|
+
)
|
|
244
|
+
except FileNotFoundError:
|
|
245
|
+
return RecalcReport(
|
|
246
|
+
error="LibreOffice (soffice) is not installed or not in PATH",
|
|
247
|
+
).to_dict()
|
|
248
|
+
|
|
249
|
+
if proc.returncode not in (0, _TIMEOUT_EXIT_CODE):
|
|
250
|
+
msg = proc.stderr or "Unknown error during recalculation"
|
|
251
|
+
if "Module1" in msg or "RecalculateAndSave" not in msg:
|
|
252
|
+
return RecalcReport(
|
|
253
|
+
error="LibreOffice macro not configured properly",
|
|
254
|
+
).to_dict()
|
|
255
|
+
return RecalcReport(error=msg).to_dict()
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
err_total, buckets = _scan_errors(source)
|
|
259
|
+
|
|
260
|
+
report = RecalcReport(
|
|
261
|
+
status="success" if err_total == 0 else "errors_found",
|
|
262
|
+
total_errors=err_total,
|
|
263
|
+
error_summary=buckets,
|
|
264
|
+
total_formulas=_count_formulas(source),
|
|
265
|
+
)
|
|
266
|
+
return report.to_dict()
|
|
267
|
+
|
|
268
|
+
except Exception as exc:
|
|
269
|
+
return RecalcReport(error=str(exc)).to_dict()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# ─── CLI Entry Point ─────────────────────────────────────────────────
|
|
273
|
+
|
|
274
|
+
def main() -> None:
|
|
275
|
+
"""Parse CLI arguments and run recalculation."""
|
|
276
|
+
if len(sys.argv) < 2:
|
|
277
|
+
sys.stderr.write(
|
|
278
|
+
"Usage: python recalc.py <excel_file> [timeout_seconds]\n\n"
|
|
279
|
+
"Recalculates all formulas in an Excel file using LibreOffice\n\n"
|
|
280
|
+
"Returns JSON with error details:\n"
|
|
281
|
+
" - status: 'success' or 'errors_found'\n"
|
|
282
|
+
" - total_errors: Total number of Excel errors found\n"
|
|
283
|
+
" - total_formulas: Number of formulas in the file\n"
|
|
284
|
+
" - error_summary: Breakdown by error type with locations\n"
|
|
285
|
+
" - #VALUE!, #DIV/0!, #REF!, #NAME?, #NULL!, #NUM!, #N/A\n"
|
|
286
|
+
)
|
|
287
|
+
sys.exit(1)
|
|
288
|
+
|
|
289
|
+
filepath = sys.argv[1]
|
|
290
|
+
timeout = int(sys.argv[2]) if len(sys.argv) > 2 else _DEFAULT_TIMEOUT_SECONDS
|
|
291
|
+
|
|
292
|
+
print(json.dumps(recalc(filepath, timeout), indent=2))
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
if __name__ == "__main__":
|
|
296
|
+
main()
|