@panda-agent/panda-cli 0.1.29 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/panda-cli-ink.bundle.mjs +258 -247
- package/package.json +6 -4
- package/skills/.gitkeep +0 -0
- package/skills/README.md +13 -0
- package/skills/docx/.skill-metadata.yaml +173 -0
- package/skills/docx/LICENSE.txt +30 -0
- package/skills/docx/SKILL.md +589 -0
- package/skills/docx/scripts/__init__.py +1 -0
- package/skills/docx/scripts/accept_changes.py +206 -0
- package/skills/docx/scripts/comment.py +442 -0
- package/skills/docx/scripts/office/helpers/__init__.py +1 -0
- package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
- package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
- package/skills/docx/scripts/office/pack.py +167 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/scripts/office/soffice.py +194 -0
- package/skills/docx/scripts/office/unpack.py +145 -0
- package/skills/docx/scripts/office/validate.py +114 -0
- package/skills/docx/scripts/office/validators/__init__.py +16 -0
- package/skills/docx/scripts/office/validators/base.py +733 -0
- package/skills/docx/scripts/office/validators/docx.py +354 -0
- package/skills/docx/scripts/office/validators/pptx.py +230 -0
- package/skills/docx/scripts/office/validators/redlining.py +212 -0
- package/skills/docx/scripts/templates/comments.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/skills/docx/scripts/templates/people.xml +3 -0
- package/skills/frontend-design/LICENSE.txt +177 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/pdf/.skill-metadata.yaml +273 -0
- package/skills/pdf/LICENSE.txt +30 -0
- package/skills/pdf/SKILL.md +324 -0
- package/skills/pdf/advanced-reference.md +609 -0
- package/skills/pdf/form-filling-guide.md +318 -0
- package/skills/pdf/forms.md +294 -0
- package/skills/pdf/reference.md +612 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
- package/skills/pdf/scripts/check_fillable_fields.py +64 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
- package/skills/pdf/scripts/create_validation_image.py +125 -0
- package/skills/pdf/scripts/extract_form_field_info.py +220 -0
- package/skills/pdf/scripts/extract_form_structure.py +202 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
- package/skills/pptx-generator/SKILL.md +204 -0
- package/skills/pptx-generator/assets/styles/business.json +8 -0
- package/skills/pptx-generator/assets/styles/minimal.json +8 -0
- package/skills/pptx-generator/assets/styles/modern.json +8 -0
- package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
- package/skills/pptx-generator/references/collaboration_guide.md +381 -0
- package/skills/pptx-generator/references/json_format_spec.md +215 -0
- package/skills/pptx-generator/references/layout_guide.md +290 -0
- package/skills/pptx-generator/scripts/json_validator.py +194 -0
- package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
- package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/SKILL.md +479 -0
- package/skills/skill-creator/agents/analyzer.md +274 -0
- package/skills/skill-creator/agents/comparator.md +202 -0
- package/skills/skill-creator/agents/grader.md +223 -0
- package/skills/skill-creator/assets/eval_review.html +146 -0
- package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/skill-creator/references/schemas.md +430 -0
- package/skills/skill-creator/scripts/__init__.py +0 -0
- package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/skill-creator/scripts/generate_report.py +326 -0
- package/skills/skill-creator/scripts/improve_description.py +248 -0
- package/skills/skill-creator/scripts/package_skill.py +136 -0
- package/skills/skill-creator/scripts/quick_validate.py +103 -0
- package/skills/skill-creator/scripts/run_eval.py +310 -0
- package/skills/skill-creator/scripts/run_loop.py +332 -0
- package/skills/skill-creator/scripts/utils.py +47 -0
- package/skills/xlsx/.skill-metadata.yaml +185 -0
- package/skills/xlsx/LICENSE.txt +30 -0
- package/skills/xlsx/SKILL.md +233 -0
- package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
- package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
- package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
- package/skills/xlsx/scripts/office/pack.py +162 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/xlsx/scripts/office/soffice.py +185 -0
- package/skills/xlsx/scripts/office/unpack.py +146 -0
- package/skills/xlsx/scripts/office/validate.py +108 -0
- package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
- package/skills/xlsx/scripts/office/validators/base.py +800 -0
- package/skills/xlsx/scripts/office/validators/docx.py +383 -0
- package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
- package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
- package/skills/xlsx/scripts/recalc.py +296 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2018/wordml/cex" targetNamespace="http://schemas.microsoft.com/office/word/2018/wordml/cex">
|
|
2
|
+
<xsd:import id="w16" namespace="http://schemas.microsoft.com/office/word/2018/wordml" schemaLocation="wml-2018.xsd"/>
|
|
3
|
+
<xsd:import id="w" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
4
|
+
<xsd:import id="s" namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" schemaLocation="../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd"/>
|
|
5
|
+
<xsd:complexType name="CT_CommentsExtensible">
|
|
6
|
+
<xsd:sequence>
|
|
7
|
+
<xsd:element name="commentExtensible" type="CT_CommentExtensible" minOccurs="0" maxOccurs="unbounded"/>
|
|
8
|
+
<xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
|
|
9
|
+
</xsd:sequence>
|
|
10
|
+
</xsd:complexType>
|
|
11
|
+
<xsd:complexType name="CT_CommentExtensible">
|
|
12
|
+
<xsd:sequence>
|
|
13
|
+
<xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
|
|
14
|
+
</xsd:sequence>
|
|
15
|
+
<xsd:attribute name="durableId" type="w:ST_LongHexNumber" use="required"/>
|
|
16
|
+
<xsd:attribute name="dateUtc" type="w:ST_DateTime" use="optional"/>
|
|
17
|
+
<xsd:attribute name="intelligentPlaceholder" type="s:ST_OnOff" use="optional"/>
|
|
18
|
+
</xsd:complexType>
|
|
19
|
+
<xsd:element name="commentsExtensible" type="CT_CommentsExtensible"/>
|
|
20
|
+
</xsd:schema>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2016/wordml/cid" targetNamespace="http://schemas.microsoft.com/office/word/2016/wordml/cid">
|
|
2
|
+
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
+
<xsd:complexType name="CT_CommentsIds">
|
|
4
|
+
<xsd:sequence>
|
|
5
|
+
<xsd:element name="commentId" type="CT_CommentId" minOccurs="0" maxOccurs="unbounded"/>
|
|
6
|
+
</xsd:sequence>
|
|
7
|
+
</xsd:complexType>
|
|
8
|
+
<xsd:complexType name="CT_CommentId">
|
|
9
|
+
<xsd:attribute name="paraId" type="w12:ST_LongHexNumber" use="required"/>
|
|
10
|
+
<xsd:attribute name="durableId" type="w12:ST_LongHexNumber" use="required"/>
|
|
11
|
+
</xsd:complexType>
|
|
12
|
+
<xsd:element name="commentsIds" type="CT_CommentsIds"/>
|
|
13
|
+
</xsd:schema>
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" targetNamespace="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash">
|
|
2
|
+
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
+
<xsd:attribute name="storeItemChecksum" type="w12:ST_String"/>
|
|
4
|
+
</xsd:schema>
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2015/wordml/symex" targetNamespace="http://schemas.microsoft.com/office/word/2015/wordml/symex">
|
|
2
|
+
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
+
<xsd:complexType name="CT_SymEx">
|
|
4
|
+
<xsd:attribute name="font" type="w12:ST_String"/>
|
|
5
|
+
<xsd:attribute name="char" type="w12:ST_LongHexNumber"/>
|
|
6
|
+
</xsd:complexType>
|
|
7
|
+
<xsd:element name="symEx" type="CT_SymEx"/>
|
|
8
|
+
</xsd:schema>
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ──────────────────────────────────────────────────────────────────
|
|
3
|
+
# LibreOffice runner with automatic AF_UNIX socket shim.
|
|
4
|
+
#
|
|
5
|
+
# Provides two public entry-points:
|
|
6
|
+
#
|
|
7
|
+
# get_soffice_env() → dict – environment dict for subprocess calls
|
|
8
|
+
# run_soffice(args) → CompletedProcess
|
|
9
|
+
#
|
|
10
|
+
# When the host kernel blocks AF_UNIX sockets (common in sandboxed VMs),
|
|
11
|
+
# a small C shim is compiled on-the-fly and injected via LD_PRELOAD.
|
|
12
|
+
# ──────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import socket
|
|
16
|
+
import subprocess
|
|
17
|
+
import tempfile
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ── compiled shim location ──
|
|
22
|
+
_COMPILED_SHIM = Path(tempfile.gettempdir()) / "lo_socket_shim.so"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_soffice_env() -> dict:
|
|
26
|
+
"""Return a copy of os.environ with LibreOffice-specific tweaks."""
|
|
27
|
+
env = dict(os.environ)
|
|
28
|
+
env["SAL_USE_VCLPLUGIN"] = "svp"
|
|
29
|
+
|
|
30
|
+
if _host_needs_shim():
|
|
31
|
+
env["LD_PRELOAD"] = str(_build_shim_if_needed())
|
|
32
|
+
|
|
33
|
+
return env
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def run_soffice(args: list[str], **kw) -> subprocess.CompletedProcess:
|
|
37
|
+
"""Convenience wrapper: call soffice with the patched environment."""
|
|
38
|
+
return subprocess.run(["soffice"] + args, env=get_soffice_env(), **kw)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ──────────────────────────────────────────────────────────────────
|
|
42
|
+
# private helpers
|
|
43
|
+
# ──────────────────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
def _host_needs_shim() -> bool:
|
|
46
|
+
"""Return True when creating AF_UNIX sockets raises OSError."""
|
|
47
|
+
try:
|
|
48
|
+
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
49
|
+
sock.close()
|
|
50
|
+
except OSError:
|
|
51
|
+
return True
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _build_shim_if_needed() -> Path:
|
|
56
|
+
"""Compile the C shim once and cache the .so in /tmp."""
|
|
57
|
+
if _COMPILED_SHIM.exists():
|
|
58
|
+
return _COMPILED_SHIM
|
|
59
|
+
|
|
60
|
+
c_src = Path(tempfile.gettempdir()) / "lo_socket_shim.c"
|
|
61
|
+
c_src.write_text(_C_SHIM_CODE)
|
|
62
|
+
subprocess.run(
|
|
63
|
+
["gcc", "-shared", "-fPIC", "-o", str(_COMPILED_SHIM), str(c_src), "-ldl"],
|
|
64
|
+
check=True,
|
|
65
|
+
capture_output=True,
|
|
66
|
+
)
|
|
67
|
+
c_src.unlink()
|
|
68
|
+
return _COMPILED_SHIM
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ── inline C source for the LD_PRELOAD shim ──
|
|
72
|
+
_C_SHIM_CODE = r"""
|
|
73
|
+
#define _GNU_SOURCE
|
|
74
|
+
#include <dlfcn.h>
|
|
75
|
+
#include <errno.h>
|
|
76
|
+
#include <signal.h>
|
|
77
|
+
#include <stdio.h>
|
|
78
|
+
#include <stdlib.h>
|
|
79
|
+
#include <sys/socket.h>
|
|
80
|
+
#include <unistd.h>
|
|
81
|
+
|
|
82
|
+
static int (*real_socket)(int, int, int);
|
|
83
|
+
static int (*real_socketpair)(int, int, int, int[2]);
|
|
84
|
+
static int (*real_listen)(int, int);
|
|
85
|
+
static int (*real_accept)(int, struct sockaddr *, socklen_t *);
|
|
86
|
+
static int (*real_close)(int);
|
|
87
|
+
static int (*real_read)(int, void *, size_t);
|
|
88
|
+
|
|
89
|
+
/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */
|
|
90
|
+
static int is_shimmed[1024];
|
|
91
|
+
static int peer_of[1024];
|
|
92
|
+
static int wake_r[1024]; /* accept() blocks reading this */
|
|
93
|
+
static int wake_w[1024]; /* close() writes to this */
|
|
94
|
+
static int listener_fd = -1; /* FD that received listen() */
|
|
95
|
+
|
|
96
|
+
__attribute__((constructor))
|
|
97
|
+
static void init(void) {
|
|
98
|
+
real_socket = dlsym(RTLD_NEXT, "socket");
|
|
99
|
+
real_socketpair = dlsym(RTLD_NEXT, "socketpair");
|
|
100
|
+
real_listen = dlsym(RTLD_NEXT, "listen");
|
|
101
|
+
real_accept = dlsym(RTLD_NEXT, "accept");
|
|
102
|
+
real_close = dlsym(RTLD_NEXT, "close");
|
|
103
|
+
real_read = dlsym(RTLD_NEXT, "read");
|
|
104
|
+
for (int i = 0; i < 1024; i++) {
|
|
105
|
+
peer_of[i] = -1;
|
|
106
|
+
wake_r[i] = -1;
|
|
107
|
+
wake_w[i] = -1;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/* ---- socket ---------------------------------------------------------- */
|
|
112
|
+
int socket(int domain, int type, int protocol) {
|
|
113
|
+
if (domain == AF_UNIX) {
|
|
114
|
+
int fd = real_socket(domain, type, protocol);
|
|
115
|
+
if (fd >= 0) return fd;
|
|
116
|
+
/* socket(AF_UNIX) blocked – fall back to socketpair(). */
|
|
117
|
+
int sv[2];
|
|
118
|
+
if (real_socketpair(domain, type, protocol, sv) == 0) {
|
|
119
|
+
if (sv[0] >= 0 && sv[0] < 1024) {
|
|
120
|
+
is_shimmed[sv[0]] = 1;
|
|
121
|
+
peer_of[sv[0]] = sv[1];
|
|
122
|
+
int wp[2];
|
|
123
|
+
if (pipe(wp) == 0) {
|
|
124
|
+
wake_r[sv[0]] = wp[0];
|
|
125
|
+
wake_w[sv[0]] = wp[1];
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return sv[0];
|
|
129
|
+
}
|
|
130
|
+
errno = EPERM;
|
|
131
|
+
return -1;
|
|
132
|
+
}
|
|
133
|
+
return real_socket(domain, type, protocol);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/* ---- listen ---------------------------------------------------------- */
|
|
137
|
+
int listen(int sockfd, int backlog) {
|
|
138
|
+
if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
|
|
139
|
+
listener_fd = sockfd;
|
|
140
|
+
return 0;
|
|
141
|
+
}
|
|
142
|
+
return real_listen(sockfd, backlog);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/* ---- accept ---------------------------------------------------------- */
|
|
146
|
+
int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
|
|
147
|
+
if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
|
|
148
|
+
/* Block until close() writes to the wake pipe. */
|
|
149
|
+
if (wake_r[sockfd] >= 0) {
|
|
150
|
+
char buf;
|
|
151
|
+
real_read(wake_r[sockfd], &buf, 1);
|
|
152
|
+
}
|
|
153
|
+
errno = ECONNABORTED;
|
|
154
|
+
return -1;
|
|
155
|
+
}
|
|
156
|
+
return real_accept(sockfd, addr, addrlen);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/* ---- close ----------------------------------------------------------- */
|
|
160
|
+
int close(int fd) {
|
|
161
|
+
if (fd >= 0 && fd < 1024 && is_shimmed[fd]) {
|
|
162
|
+
int was_listener = (fd == listener_fd);
|
|
163
|
+
is_shimmed[fd] = 0;
|
|
164
|
+
|
|
165
|
+
if (wake_w[fd] >= 0) { /* unblock accept() */
|
|
166
|
+
char c = 0;
|
|
167
|
+
write(wake_w[fd], &c, 1);
|
|
168
|
+
real_close(wake_w[fd]);
|
|
169
|
+
wake_w[fd] = -1;
|
|
170
|
+
}
|
|
171
|
+
if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd] = -1; }
|
|
172
|
+
if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; }
|
|
173
|
+
|
|
174
|
+
if (was_listener)
|
|
175
|
+
_exit(0); /* conversion done – exit */
|
|
176
|
+
}
|
|
177
|
+
return real_close(fd);
|
|
178
|
+
}
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
if __name__ == "__main__":
|
|
183
|
+
import sys
|
|
184
|
+
rc = run_soffice(sys.argv[1:])
|
|
185
|
+
sys.exit(rc.returncode)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ──────────────────────────────────────────────────────────────────
|
|
3
|
+
# Extract Office archives (DOCX / PPTX / XLSX) into an editable tree.
|
|
4
|
+
#
|
|
5
|
+
# After extraction, XML is pretty-printed for readability. For DOCX
|
|
6
|
+
# files two optional post-processing passes are available:
|
|
7
|
+
# • run-merging – coalesce adjacent <w:r> with matching properties
|
|
8
|
+
# • redline-simplification – coalesce adjacent <w:ins>/<w:del> tags
|
|
9
|
+
#
|
|
10
|
+
# CLI:
|
|
11
|
+
# python unpack.py <office_file> <out_dir> [--merge-runs true|false]
|
|
12
|
+
# [--simplify-redlines true|false]
|
|
13
|
+
# ──────────────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import sys
|
|
17
|
+
import zipfile
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import defusedxml.minidom
|
|
21
|
+
|
|
22
|
+
from helpers.merge_runs import merge_runs as _coalesce_runs
|
|
23
|
+
from helpers.simplify_redlines import simplify_redlines as _coalesce_redlines
|
|
24
|
+
|
|
25
|
+
# Unicode curly-quote → XML entity mapping
|
|
26
|
+
_CURLY_QUOTES = {
|
|
27
|
+
"\u201c": "“",
|
|
28
|
+
"\u201d": "”",
|
|
29
|
+
"\u2018": "‘",
|
|
30
|
+
"\u2019": "’",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
_OFFICE_SUFFIXES = {".docx", ".pptx", ".xlsx"}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def unpack(
|
|
37
|
+
src_file: str,
|
|
38
|
+
out_dir: str,
|
|
39
|
+
coalesce_runs: bool = True,
|
|
40
|
+
coalesce_redlines: bool = True,
|
|
41
|
+
) -> tuple[None, str]:
|
|
42
|
+
"""Extract *src_file* into *out_dir* and post-process XML."""
|
|
43
|
+
src = Path(src_file)
|
|
44
|
+
dest = Path(out_dir)
|
|
45
|
+
ext = src.suffix.lower()
|
|
46
|
+
|
|
47
|
+
if not src.exists():
|
|
48
|
+
return None, "Error: {} does not exist".format(src_file)
|
|
49
|
+
|
|
50
|
+
if ext not in _OFFICE_SUFFIXES:
|
|
51
|
+
return None, "Error: {} must be a .docx, .pptx, or .xlsx file".format(src_file)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
with zipfile.ZipFile(src, "r") as archive:
|
|
57
|
+
archive.extractall(dest)
|
|
58
|
+
|
|
59
|
+
xml_paths = [
|
|
60
|
+
p for p in list(dest.rglob("*.xml")) + list(dest.rglob("*.rels"))
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
for xp in xml_paths:
|
|
64
|
+
_reformat_xml(xp)
|
|
65
|
+
|
|
66
|
+
info = "Unpacked {} ({} XML files)".format(src_file, len(xml_paths))
|
|
67
|
+
|
|
68
|
+
# DOCX-specific post-processing
|
|
69
|
+
if ext == ".docx":
|
|
70
|
+
if coalesce_redlines:
|
|
71
|
+
n, _ = _coalesce_redlines(str(dest))
|
|
72
|
+
info += ", simplified {} tracked changes".format(n)
|
|
73
|
+
if coalesce_runs:
|
|
74
|
+
n, _ = _coalesce_runs(str(dest))
|
|
75
|
+
info += ", merged {} runs".format(n)
|
|
76
|
+
|
|
77
|
+
for xp in xml_paths:
|
|
78
|
+
_encode_curly_quotes(xp)
|
|
79
|
+
|
|
80
|
+
return None, info
|
|
81
|
+
|
|
82
|
+
except zipfile.BadZipFile:
|
|
83
|
+
return None, "Error: {} is not a valid Office file".format(src_file)
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
return None, "Error unpacking: {}".format(exc)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ──────────────────────────────────────────────────────────────────
|
|
89
|
+
|
|
90
|
+
def _reformat_xml(fp: Path) -> None:
|
|
91
|
+
"""Pretty-print an XML file in-place (2-space indent)."""
|
|
92
|
+
try:
|
|
93
|
+
raw = fp.read_text(encoding="utf-8")
|
|
94
|
+
doc = defusedxml.minidom.parseString(raw)
|
|
95
|
+
fp.write_bytes(doc.toprettyxml(indent=" ", encoding="utf-8"))
|
|
96
|
+
except Exception:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _encode_curly_quotes(fp: Path) -> None:
|
|
101
|
+
"""Replace curly quotes with their XML entity equivalents."""
|
|
102
|
+
try:
|
|
103
|
+
data = fp.read_text(encoding="utf-8")
|
|
104
|
+
for ch, ent in _CURLY_QUOTES.items():
|
|
105
|
+
data = data.replace(ch, ent)
|
|
106
|
+
fp.write_text(data, encoding="utf-8")
|
|
107
|
+
except Exception:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ──────────────────────────────────────────────────────────────────
|
|
112
|
+
# CLI
|
|
113
|
+
# ──────────────────────────────────────────────────────────────────
|
|
114
|
+
|
|
115
|
+
if __name__ == "__main__":
|
|
116
|
+
ap = argparse.ArgumentParser(
|
|
117
|
+
description="Unpack an Office file (DOCX, PPTX, XLSX) for editing"
|
|
118
|
+
)
|
|
119
|
+
ap.add_argument("input_file", help="Office file to unpack")
|
|
120
|
+
ap.add_argument("output_directory", help="Output directory")
|
|
121
|
+
ap.add_argument(
|
|
122
|
+
"--merge-runs",
|
|
123
|
+
type=lambda x: x.lower() == "true",
|
|
124
|
+
default=True,
|
|
125
|
+
metavar="true|false",
|
|
126
|
+
help="Merge adjacent runs with identical formatting (DOCX only, default: true)",
|
|
127
|
+
)
|
|
128
|
+
ap.add_argument(
|
|
129
|
+
"--simplify-redlines",
|
|
130
|
+
type=lambda x: x.lower() == "true",
|
|
131
|
+
default=True,
|
|
132
|
+
metavar="true|false",
|
|
133
|
+
help="Merge adjacent tracked changes from same author (DOCX only, default: true)",
|
|
134
|
+
)
|
|
135
|
+
ns = ap.parse_args()
|
|
136
|
+
|
|
137
|
+
_, msg = unpack(
|
|
138
|
+
ns.input_file,
|
|
139
|
+
ns.output_directory,
|
|
140
|
+
coalesce_runs=ns.merge_runs,
|
|
141
|
+
coalesce_redlines=ns.simplify_redlines,
|
|
142
|
+
)
|
|
143
|
+
print(msg)
|
|
144
|
+
|
|
145
|
+
if "Error" in msg:
|
|
146
|
+
sys.exit(1)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# ──────────────────────────────────────────────────────────────────
|
|
3
|
+
# Standalone CLI for validating unpacked Office XML against XSD schemas
|
|
4
|
+
# and checking tracked-change consistency.
|
|
5
|
+
#
|
|
6
|
+
# Accepts either an unpacked directory or a packed .docx/.pptx/.xlsx
|
|
7
|
+
# (the latter is extracted to a temp dir automatically).
|
|
8
|
+
#
|
|
9
|
+
# Auto-repair capabilities:
|
|
10
|
+
# • paraId / durableId values exceeding OOXML limits
|
|
11
|
+
# • Missing xml:space="preserve" on <w:t> elements with whitespace
|
|
12
|
+
# ──────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import sys
|
|
16
|
+
import tempfile
|
|
17
|
+
import zipfile
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
|
|
21
|
+
|
|
22
|
+
_VALID_SUFFIXES = [".docx", ".pptx", ".xlsx"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def main():
|
|
26
|
+
ap = argparse.ArgumentParser(description="Validate Office document XML files")
|
|
27
|
+
ap.add_argument(
|
|
28
|
+
"path",
|
|
29
|
+
help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)",
|
|
30
|
+
)
|
|
31
|
+
ap.add_argument(
|
|
32
|
+
"--original", required=False, default=None,
|
|
33
|
+
help=(
|
|
34
|
+
"Path to original file (.docx/.pptx/.xlsx). "
|
|
35
|
+
"If omitted, all XSD errors are reported and redlining validation is skipped."
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
ap.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
|
39
|
+
ap.add_argument(
|
|
40
|
+
"--auto-repair", action="store_true",
|
|
41
|
+
help="Automatically repair common issues (hex IDs, whitespace preservation)",
|
|
42
|
+
)
|
|
43
|
+
ap.add_argument(
|
|
44
|
+
"--author", default="Claude",
|
|
45
|
+
help="Author name for redlining validation (default: Claude)",
|
|
46
|
+
)
|
|
47
|
+
opts = ap.parse_args()
|
|
48
|
+
|
|
49
|
+
target = Path(opts.path)
|
|
50
|
+
assert target.exists(), "Error: {} does not exist".format(target)
|
|
51
|
+
|
|
52
|
+
orig = None
|
|
53
|
+
if opts.original is not None:
|
|
54
|
+
orig = Path(opts.original)
|
|
55
|
+
assert orig.is_file(), "Error: {} is not a file".format(orig)
|
|
56
|
+
assert orig.suffix.lower() in _VALID_SUFFIXES, (
|
|
57
|
+
"Error: {} must be a .docx, .pptx, or .xlsx file".format(orig)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
detected_ext = (orig or target).suffix.lower()
|
|
61
|
+
assert detected_ext in _VALID_SUFFIXES, (
|
|
62
|
+
"Error: Cannot determine file type from {}. "
|
|
63
|
+
"Use --original or provide a .docx/.pptx/.xlsx file.".format(target)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Auto-extract packed files into a temp directory
|
|
67
|
+
if target.is_file() and target.suffix.lower() in _VALID_SUFFIXES:
|
|
68
|
+
tmp = tempfile.mkdtemp()
|
|
69
|
+
with zipfile.ZipFile(target, "r") as zf:
|
|
70
|
+
zf.extractall(tmp)
|
|
71
|
+
work_dir = Path(tmp)
|
|
72
|
+
else:
|
|
73
|
+
assert target.is_dir(), "Error: {} is not a directory or Office file".format(target)
|
|
74
|
+
work_dir = target
|
|
75
|
+
|
|
76
|
+
# Build the appropriate validator chain
|
|
77
|
+
match detected_ext:
|
|
78
|
+
case ".docx":
|
|
79
|
+
checkers = [
|
|
80
|
+
DOCXSchemaValidator(work_dir, orig, verbose=opts.verbose),
|
|
81
|
+
]
|
|
82
|
+
if orig is not None:
|
|
83
|
+
checkers.append(
|
|
84
|
+
RedliningValidator(work_dir, orig, verbose=opts.verbose, author=opts.author)
|
|
85
|
+
)
|
|
86
|
+
case ".pptx":
|
|
87
|
+
checkers = [
|
|
88
|
+
PPTXSchemaValidator(work_dir, orig, verbose=opts.verbose),
|
|
89
|
+
]
|
|
90
|
+
case _:
|
|
91
|
+
print("Error: Validation not supported for file type {}".format(detected_ext))
|
|
92
|
+
sys.exit(1)
|
|
93
|
+
|
|
94
|
+
if opts.auto_repair:
|
|
95
|
+
n_fixed = sum(c.repair() for c in checkers)
|
|
96
|
+
if n_fixed:
|
|
97
|
+
print("Auto-repaired {} issue(s)".format(n_fixed))
|
|
98
|
+
|
|
99
|
+
ok = all(c.validate() for c in checkers)
|
|
100
|
+
|
|
101
|
+
if ok:
|
|
102
|
+
print("All validations PASSED!")
|
|
103
|
+
|
|
104
|
+
sys.exit(0 if ok else 1)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
main()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# ── Office document validation sub-package ──
|
|
2
|
+
|
|
3
|
+
from .base import BaseSchemaValidator
|
|
4
|
+
from .docx import DOCXSchemaValidator
|
|
5
|
+
from .pptx import PPTXSchemaValidator
|
|
6
|
+
from .redlining import RedliningValidator
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BaseSchemaValidator",
|
|
10
|
+
"DOCXSchemaValidator",
|
|
11
|
+
"PPTXSchemaValidator",
|
|
12
|
+
"RedliningValidator",
|
|
13
|
+
]
|