@panda-agent/panda-cli 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/panda-cli-ink.bundle.mjs +267 -258
- package/package.json +6 -4
- package/skills/.gitkeep +0 -0
- package/skills/README.md +13 -0
- package/skills/docx/.skill-metadata.yaml +173 -0
- package/skills/docx/LICENSE.txt +30 -0
- package/skills/docx/SKILL.md +589 -0
- package/skills/docx/scripts/__init__.py +1 -0
- package/skills/docx/scripts/accept_changes.py +206 -0
- package/skills/docx/scripts/comment.py +442 -0
- package/skills/docx/scripts/office/helpers/__init__.py +1 -0
- package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
- package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
- package/skills/docx/scripts/office/pack.py +167 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/scripts/office/soffice.py +194 -0
- package/skills/docx/scripts/office/unpack.py +145 -0
- package/skills/docx/scripts/office/validate.py +114 -0
- package/skills/docx/scripts/office/validators/__init__.py +16 -0
- package/skills/docx/scripts/office/validators/base.py +733 -0
- package/skills/docx/scripts/office/validators/docx.py +354 -0
- package/skills/docx/scripts/office/validators/pptx.py +230 -0
- package/skills/docx/scripts/office/validators/redlining.py +212 -0
- package/skills/docx/scripts/templates/comments.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/skills/docx/scripts/templates/people.xml +3 -0
- package/skills/frontend-design/LICENSE.txt +177 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/pdf/.skill-metadata.yaml +273 -0
- package/skills/pdf/LICENSE.txt +30 -0
- package/skills/pdf/SKILL.md +324 -0
- package/skills/pdf/advanced-reference.md +609 -0
- package/skills/pdf/form-filling-guide.md +318 -0
- package/skills/pdf/forms.md +294 -0
- package/skills/pdf/reference.md +612 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
- package/skills/pdf/scripts/check_fillable_fields.py +64 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
- package/skills/pdf/scripts/create_validation_image.py +125 -0
- package/skills/pdf/scripts/extract_form_field_info.py +220 -0
- package/skills/pdf/scripts/extract_form_structure.py +202 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
- package/skills/pptx-generator/SKILL.md +204 -0
- package/skills/pptx-generator/assets/styles/business.json +8 -0
- package/skills/pptx-generator/assets/styles/minimal.json +8 -0
- package/skills/pptx-generator/assets/styles/modern.json +8 -0
- package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
- package/skills/pptx-generator/references/collaboration_guide.md +381 -0
- package/skills/pptx-generator/references/json_format_spec.md +215 -0
- package/skills/pptx-generator/references/layout_guide.md +290 -0
- package/skills/pptx-generator/scripts/json_validator.py +194 -0
- package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
- package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/SKILL.md +479 -0
- package/skills/skill-creator/agents/analyzer.md +274 -0
- package/skills/skill-creator/agents/comparator.md +202 -0
- package/skills/skill-creator/agents/grader.md +223 -0
- package/skills/skill-creator/assets/eval_review.html +146 -0
- package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/skill-creator/references/schemas.md +430 -0
- package/skills/skill-creator/scripts/__init__.py +0 -0
- package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/skill-creator/scripts/generate_report.py +326 -0
- package/skills/skill-creator/scripts/improve_description.py +248 -0
- package/skills/skill-creator/scripts/package_skill.py +136 -0
- package/skills/skill-creator/scripts/quick_validate.py +103 -0
- package/skills/skill-creator/scripts/run_eval.py +310 -0
- package/skills/skill-creator/scripts/run_loop.py +332 -0
- package/skills/skill-creator/scripts/utils.py +47 -0
- package/skills/xlsx/.skill-metadata.yaml +185 -0
- package/skills/xlsx/LICENSE.txt +30 -0
- package/skills/xlsx/SKILL.md +233 -0
- package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
- package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
- package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
- package/skills/xlsx/scripts/office/pack.py +162 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/xlsx/scripts/office/soffice.py +185 -0
- package/skills/xlsx/scripts/office/unpack.py +146 -0
- package/skills/xlsx/scripts/office/validate.py +108 -0
- package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
- package/skills/xlsx/scripts/office/validators/base.py +800 -0
- package/skills/xlsx/scripts/office/validators/docx.py +383 -0
- package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
- package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
- package/skills/xlsx/scripts/recalc.py +296 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2018/wordml/cex" targetNamespace="http://schemas.microsoft.com/office/word/2018/wordml/cex">
|
|
2
|
+
<xsd:import id="w16" namespace="http://schemas.microsoft.com/office/word/2018/wordml" schemaLocation="wml-2018.xsd"/>
|
|
3
|
+
<xsd:import id="w" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
4
|
+
<xsd:import id="s" namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" schemaLocation="../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd"/>
|
|
5
|
+
<xsd:complexType name="CT_CommentsExtensible">
|
|
6
|
+
<xsd:sequence>
|
|
7
|
+
<xsd:element name="commentExtensible" type="CT_CommentExtensible" minOccurs="0" maxOccurs="unbounded"/>
|
|
8
|
+
<xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
|
|
9
|
+
</xsd:sequence>
|
|
10
|
+
</xsd:complexType>
|
|
11
|
+
<xsd:complexType name="CT_CommentExtensible">
|
|
12
|
+
<xsd:sequence>
|
|
13
|
+
<xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
|
|
14
|
+
</xsd:sequence>
|
|
15
|
+
<xsd:attribute name="durableId" type="w:ST_LongHexNumber" use="required"/>
|
|
16
|
+
<xsd:attribute name="dateUtc" type="w:ST_DateTime" use="optional"/>
|
|
17
|
+
<xsd:attribute name="intelligentPlaceholder" type="s:ST_OnOff" use="optional"/>
|
|
18
|
+
</xsd:complexType>
|
|
19
|
+
<xsd:element name="commentsExtensible" type="CT_CommentsExtensible"/>
|
|
20
|
+
</xsd:schema>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2016/wordml/cid" targetNamespace="http://schemas.microsoft.com/office/word/2016/wordml/cid">
|
|
2
|
+
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
+
<xsd:complexType name="CT_CommentsIds">
|
|
4
|
+
<xsd:sequence>
|
|
5
|
+
<xsd:element name="commentId" type="CT_CommentId" minOccurs="0" maxOccurs="unbounded"/>
|
|
6
|
+
</xsd:sequence>
|
|
7
|
+
</xsd:complexType>
|
|
8
|
+
<xsd:complexType name="CT_CommentId">
|
|
9
|
+
<xsd:attribute name="paraId" type="w12:ST_LongHexNumber" use="required"/>
|
|
10
|
+
<xsd:attribute name="durableId" type="w12:ST_LongHexNumber" use="required"/>
|
|
11
|
+
</xsd:complexType>
|
|
12
|
+
<xsd:element name="commentsIds" type="CT_CommentsIds"/>
|
|
13
|
+
</xsd:schema>
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" targetNamespace="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash">
|
|
2
|
+
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
+
<xsd:attribute name="storeItemChecksum" type="w12:ST_String"/>
|
|
4
|
+
</xsd:schema>
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2015/wordml/symex" targetNamespace="http://schemas.microsoft.com/office/word/2015/wordml/symex">
|
|
2
|
+
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
+
<xsd:complexType name="CT_SymEx">
|
|
4
|
+
<xsd:attribute name="font" type="w12:ST_String"/>
|
|
5
|
+
<xsd:attribute name="char" type="w12:ST_LongHexNumber"/>
|
|
6
|
+
</xsd:complexType>
|
|
7
|
+
<xsd:element name="symEx" type="CT_SymEx"/>
|
|
8
|
+
</xsd:schema>
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Wrapper around LibreOffice (``soffice``) that transparently handles
|
|
3
|
+
sandboxed environments where AF_UNIX sockets are unavailable.
|
|
4
|
+
|
|
5
|
+
The module detects the restriction at startup and, when necessary,
|
|
6
|
+
compiles and injects a tiny C shim via ``LD_PRELOAD``.
|
|
7
|
+
|
|
8
|
+
Public surface::
|
|
9
|
+
|
|
10
|
+
from office.soffice import run_soffice, get_soffice_env
|
|
11
|
+
|
|
12
|
+
# Approach A – call soffice directly
|
|
13
|
+
result = run_soffice(["--headless", "--convert-to", "pdf", "input.docx"])
|
|
14
|
+
|
|
15
|
+
# Approach B – retrieve an env dict for manual subprocess usage
|
|
16
|
+
env = get_soffice_env()
|
|
17
|
+
subprocess.run(["soffice", ...], env=env)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import pathlib
|
|
22
|
+
import socket
|
|
23
|
+
import subprocess
|
|
24
|
+
import tempfile
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ── Shared-object path for the optional shim ────────────────────────────────
|
|
28
|
+
|
|
29
|
+
_COMPILED_SHIM = pathlib.Path(tempfile.gettempdir()) / "lo_socket_shim.so"
|
|
30
|
+
|
|
31
|
+
# ── C source for the LD_PRELOAD shim ────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
_C_SOURCE = r"""
|
|
34
|
+
#define _GNU_SOURCE
|
|
35
|
+
#include <dlfcn.h>
|
|
36
|
+
#include <errno.h>
|
|
37
|
+
#include <signal.h>
|
|
38
|
+
#include <stdio.h>
|
|
39
|
+
#include <stdlib.h>
|
|
40
|
+
#include <sys/socket.h>
|
|
41
|
+
#include <unistd.h>
|
|
42
|
+
|
|
43
|
+
static int (*real_socket)(int, int, int);
|
|
44
|
+
static int (*real_socketpair)(int, int, int, int[2]);
|
|
45
|
+
static int (*real_listen)(int, int);
|
|
46
|
+
static int (*real_accept)(int, struct sockaddr *, socklen_t *);
|
|
47
|
+
static int (*real_close)(int);
|
|
48
|
+
static int (*real_read)(int, void *, size_t);
|
|
49
|
+
|
|
50
|
+
/* Per-FD bookkeeping (FDs >= 1024 are passed through unshimmed). */
|
|
51
|
+
static int is_shimmed[1024];
|
|
52
|
+
static int peer_of[1024];
|
|
53
|
+
static int wake_r[1024]; /* accept() blocks reading this */
|
|
54
|
+
static int wake_w[1024]; /* close() writes to this */
|
|
55
|
+
static int listener_fd = -1; /* FD that received listen() */
|
|
56
|
+
|
|
57
|
+
__attribute__((constructor))
|
|
58
|
+
static void init(void) {
|
|
59
|
+
real_socket = dlsym(RTLD_NEXT, "socket");
|
|
60
|
+
real_socketpair = dlsym(RTLD_NEXT, "socketpair");
|
|
61
|
+
real_listen = dlsym(RTLD_NEXT, "listen");
|
|
62
|
+
real_accept = dlsym(RTLD_NEXT, "accept");
|
|
63
|
+
real_close = dlsym(RTLD_NEXT, "close");
|
|
64
|
+
real_read = dlsym(RTLD_NEXT, "read");
|
|
65
|
+
for (int i = 0; i < 1024; i++) {
|
|
66
|
+
peer_of[i] = -1;
|
|
67
|
+
wake_r[i] = -1;
|
|
68
|
+
wake_w[i] = -1;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/* ---- socket ---------------------------------------------------------- */
|
|
73
|
+
int socket(int domain, int type, int protocol) {
|
|
74
|
+
if (domain == AF_UNIX) {
|
|
75
|
+
int fd = real_socket(domain, type, protocol);
|
|
76
|
+
if (fd >= 0) return fd;
|
|
77
|
+
/* socket(AF_UNIX) blocked – fall back to socketpair(). */
|
|
78
|
+
int sv[2];
|
|
79
|
+
if (real_socketpair(domain, type, protocol, sv) == 0) {
|
|
80
|
+
if (sv[0] >= 0 && sv[0] < 1024) {
|
|
81
|
+
is_shimmed[sv[0]] = 1;
|
|
82
|
+
peer_of[sv[0]] = sv[1];
|
|
83
|
+
int wp[2];
|
|
84
|
+
if (pipe(wp) == 0) {
|
|
85
|
+
wake_r[sv[0]] = wp[0];
|
|
86
|
+
wake_w[sv[0]] = wp[1];
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return sv[0];
|
|
90
|
+
}
|
|
91
|
+
errno = EPERM;
|
|
92
|
+
return -1;
|
|
93
|
+
}
|
|
94
|
+
return real_socket(domain, type, protocol);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/* ---- listen ---------------------------------------------------------- */
|
|
98
|
+
int listen(int sockfd, int backlog) {
|
|
99
|
+
if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
|
|
100
|
+
listener_fd = sockfd;
|
|
101
|
+
return 0;
|
|
102
|
+
}
|
|
103
|
+
return real_listen(sockfd, backlog);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/* ---- accept ---------------------------------------------------------- */
|
|
107
|
+
int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
|
|
108
|
+
if (sockfd >= 0 && sockfd < 1024 && is_shimmed[sockfd]) {
|
|
109
|
+
/* Block until close() writes to the wake pipe. */
|
|
110
|
+
if (wake_r[sockfd] >= 0) {
|
|
111
|
+
char buf;
|
|
112
|
+
real_read(wake_r[sockfd], &buf, 1);
|
|
113
|
+
}
|
|
114
|
+
errno = ECONNABORTED;
|
|
115
|
+
return -1;
|
|
116
|
+
}
|
|
117
|
+
return real_accept(sockfd, addr, addrlen);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/* ---- close ----------------------------------------------------------- */
|
|
121
|
+
int close(int fd) {
|
|
122
|
+
if (fd >= 0 && fd < 1024 && is_shimmed[fd]) {
|
|
123
|
+
int was_listener = (fd == listener_fd);
|
|
124
|
+
is_shimmed[fd] = 0;
|
|
125
|
+
|
|
126
|
+
if (wake_w[fd] >= 0) { /* unblock accept() */
|
|
127
|
+
char c = 0;
|
|
128
|
+
write(wake_w[fd], &c, 1);
|
|
129
|
+
real_close(wake_w[fd]);
|
|
130
|
+
wake_w[fd] = -1;
|
|
131
|
+
}
|
|
132
|
+
if (wake_r[fd] >= 0) { real_close(wake_r[fd]); wake_r[fd] = -1; }
|
|
133
|
+
if (peer_of[fd] >= 0) { real_close(peer_of[fd]); peer_of[fd] = -1; }
|
|
134
|
+
|
|
135
|
+
if (was_listener)
|
|
136
|
+
_exit(0); /* conversion done – exit */
|
|
137
|
+
}
|
|
138
|
+
return real_close(fd);
|
|
139
|
+
}
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ── Internal helpers ─────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
def _unix_sockets_blocked() -> bool:
|
|
146
|
+
"""Return *True* when the OS refuses to create AF_UNIX sockets."""
|
|
147
|
+
try:
|
|
148
|
+
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
149
|
+
sock.close()
|
|
150
|
+
return False
|
|
151
|
+
except OSError:
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _build_shim_if_missing() -> pathlib.Path:
|
|
156
|
+
"""Compile the C shim to a shared object when it is not yet present."""
|
|
157
|
+
if _COMPILED_SHIM.exists():
|
|
158
|
+
return _COMPILED_SHIM
|
|
159
|
+
|
|
160
|
+
c_file = pathlib.Path(tempfile.gettempdir()) / "lo_socket_shim.c"
|
|
161
|
+
c_file.write_text(_C_SOURCE)
|
|
162
|
+
subprocess.run(
|
|
163
|
+
["gcc", "-shared", "-fPIC", "-o", str(_COMPILED_SHIM), str(c_file), "-ldl"],
|
|
164
|
+
check=True,
|
|
165
|
+
capture_output=True,
|
|
166
|
+
)
|
|
167
|
+
c_file.unlink()
|
|
168
|
+
return _COMPILED_SHIM
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ── Public API ───────────────────────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
def get_soffice_env() -> dict:
|
|
174
|
+
"""Return an ``env`` dict suitable for ``subprocess.run(env=…)``."""
|
|
175
|
+
merged = os.environ.copy()
|
|
176
|
+
merged["SAL_USE_VCLPLUGIN"] = "svp"
|
|
177
|
+
|
|
178
|
+
if _unix_sockets_blocked():
|
|
179
|
+
merged["LD_PRELOAD"] = str(_build_shim_if_missing())
|
|
180
|
+
|
|
181
|
+
return merged
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def run_soffice(args: list[str], **kw) -> subprocess.CompletedProcess:
|
|
185
|
+
"""Launch ``soffice`` with the correct environment and given *args*."""
|
|
186
|
+
return subprocess.run(["soffice"] + args, env=get_soffice_env(), **kw)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ── CLI passthrough ──────────────────────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
import sys
|
|
193
|
+
outcome = run_soffice(sys.argv[1:])
|
|
194
|
+
sys.exit(outcome.returncode)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Extract and beautify Office archives (DOCX / PPTX / XLSX) for manual XML editing.
|
|
2
|
+
|
|
3
|
+
The ZIP contents are inflated, every XML file is pretty-printed, and — for
|
|
4
|
+
Word documents — adjacent runs may be coalesced and redundant tracked-change
|
|
5
|
+
wrappers collapsed.
|
|
6
|
+
|
|
7
|
+
Invocation::
|
|
8
|
+
|
|
9
|
+
python unpack.py <office_file> <output_dir> [options]
|
|
10
|
+
|
|
11
|
+
Samples::
|
|
12
|
+
|
|
13
|
+
python unpack.py document.docx unpacked/
|
|
14
|
+
python unpack.py presentation.pptx unpacked/
|
|
15
|
+
python unpack.py document.docx unpacked/ --merge-runs false
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import pathlib
|
|
20
|
+
import sys
|
|
21
|
+
import zipfile
|
|
22
|
+
|
|
23
|
+
import defusedxml.minidom
|
|
24
|
+
|
|
25
|
+
from helpers.merge_runs import merge_runs as _coalesce_runs
|
|
26
|
+
from helpers.simplify_redlines import simplify_redlines as _compact_redlines
|
|
27
|
+
|
|
28
|
+
_TYPOGRAPHIC_QUOTES = {
|
|
29
|
+
"\u201c": "“",
|
|
30
|
+
"\u201d": "”",
|
|
31
|
+
"\u2018": "‘",
|
|
32
|
+
"\u2019": "’",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_OFFICE_SUFFIXES = {".docx", ".pptx", ".xlsx"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _beautify_xml(fp: pathlib.Path) -> None:
|
|
39
|
+
"""Rewrite *fp* in indented form via minidom."""
|
|
40
|
+
try:
|
|
41
|
+
raw = fp.read_text(encoding="utf-8")
|
|
42
|
+
dom = defusedxml.minidom.parseString(raw)
|
|
43
|
+
fp.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8"))
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _replace_typographic_quotes(fp: pathlib.Path) -> None:
|
|
49
|
+
"""Convert Unicode curly quotes to XML numeric entities so they survive round-trips."""
|
|
50
|
+
try:
|
|
51
|
+
blob = fp.read_text(encoding="utf-8")
|
|
52
|
+
for ch, entity in _TYPOGRAPHIC_QUOTES.items():
|
|
53
|
+
blob = blob.replace(ch, entity)
|
|
54
|
+
fp.write_text(blob, encoding="utf-8")
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def unpack(
|
|
60
|
+
input_file: str,
|
|
61
|
+
output_directory: str,
|
|
62
|
+
merge_runs: bool = True,
|
|
63
|
+
simplify_redlines: bool = True,
|
|
64
|
+
) -> tuple[None, str]:
|
|
65
|
+
"""Inflate *input_file* into *output_directory* and post-process XML."""
|
|
66
|
+
src = pathlib.Path(input_file)
|
|
67
|
+
dest = pathlib.Path(output_directory)
|
|
68
|
+
ext = src.suffix.lower()
|
|
69
|
+
|
|
70
|
+
if not src.exists():
|
|
71
|
+
return None, "Error: %s does not exist" % input_file
|
|
72
|
+
|
|
73
|
+
if ext not in _OFFICE_SUFFIXES:
|
|
74
|
+
return None, "Error: %s must be a .docx, .pptx, or .xlsx file" % input_file
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
|
|
79
|
+
with zipfile.ZipFile(src, "r") as zf:
|
|
80
|
+
zf.extractall(dest)
|
|
81
|
+
|
|
82
|
+
xml_inventory = [
|
|
83
|
+
*dest.rglob("*.xml"),
|
|
84
|
+
*dest.rglob("*.rels"),
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
for xf in xml_inventory:
|
|
88
|
+
_beautify_xml(xf)
|
|
89
|
+
|
|
90
|
+
summary = "Unpacked %s (%d XML files)" % (input_file, len(xml_inventory))
|
|
91
|
+
|
|
92
|
+
if ext == ".docx":
|
|
93
|
+
if simplify_redlines:
|
|
94
|
+
n_simplified, _ = _compact_redlines(str(dest))
|
|
95
|
+
summary += ", simplified %d tracked changes" % n_simplified
|
|
96
|
+
|
|
97
|
+
if merge_runs:
|
|
98
|
+
n_merged, _ = _coalesce_runs(str(dest))
|
|
99
|
+
summary += ", merged %d runs" % n_merged
|
|
100
|
+
|
|
101
|
+
for xf in xml_inventory:
|
|
102
|
+
_replace_typographic_quotes(xf)
|
|
103
|
+
|
|
104
|
+
return None, summary
|
|
105
|
+
|
|
106
|
+
except zipfile.BadZipFile:
|
|
107
|
+
return None, "Error: %s is not a valid Office file" % input_file
|
|
108
|
+
except Exception as exc:
|
|
109
|
+
return None, "Error unpacking: %s" % exc
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ── CLI entry point ──────────────────────────────────────────────────────────
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
ap = argparse.ArgumentParser(
|
|
116
|
+
description="Unpack an Office file (DOCX, PPTX, XLSX) for editing"
|
|
117
|
+
)
|
|
118
|
+
ap.add_argument("input_file", help="Office file to unpack")
|
|
119
|
+
ap.add_argument("output_directory", help="Output directory")
|
|
120
|
+
ap.add_argument(
|
|
121
|
+
"--merge-runs",
|
|
122
|
+
type=lambda v: v.lower() == "true",
|
|
123
|
+
default=True,
|
|
124
|
+
metavar="true|false",
|
|
125
|
+
help="Merge adjacent runs with identical formatting (DOCX only, default: true)",
|
|
126
|
+
)
|
|
127
|
+
ap.add_argument(
|
|
128
|
+
"--simplify-redlines",
|
|
129
|
+
type=lambda v: v.lower() == "true",
|
|
130
|
+
default=True,
|
|
131
|
+
metavar="true|false",
|
|
132
|
+
help="Merge adjacent tracked changes from same author (DOCX only, default: true)",
|
|
133
|
+
)
|
|
134
|
+
cli = ap.parse_args()
|
|
135
|
+
|
|
136
|
+
_, message = unpack(
|
|
137
|
+
cli.input_file,
|
|
138
|
+
cli.output_directory,
|
|
139
|
+
merge_runs=cli.merge_runs,
|
|
140
|
+
simplify_redlines=cli.simplify_redlines,
|
|
141
|
+
)
|
|
142
|
+
print(message)
|
|
143
|
+
|
|
144
|
+
if "Error" in message:
|
|
145
|
+
sys.exit(1)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI for checking Office XML against XSD schemas and tracked-change rules.
|
|
3
|
+
|
|
4
|
+
Invocation::
|
|
5
|
+
|
|
6
|
+
python validate.py <path> [--original <original_file>] [--auto-repair] [--author NAME]
|
|
7
|
+
|
|
8
|
+
``<path>`` may be either an already-unpacked directory **or** a packed
|
|
9
|
+
``.docx``/``.pptx``/``.xlsx`` file (which is temporarily inflated).
|
|
10
|
+
|
|
11
|
+
Auto-repair capabilities:
|
|
12
|
+
|
|
13
|
+
* ``paraId`` / ``durableId`` values exceeding OOXML limits are regenerated.
|
|
14
|
+
* Missing ``xml:space="preserve"`` on ``w:t`` nodes with leading/trailing
|
|
15
|
+
whitespace is injected.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import pathlib
|
|
20
|
+
import sys
|
|
21
|
+
import tempfile
|
|
22
|
+
import zipfile
|
|
23
|
+
|
|
24
|
+
from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
|
|
25
|
+
|
|
26
|
+
_SUPPORTED_EXTS = [".docx", ".pptx", ".xlsx"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def main():
|
|
30
|
+
ap = argparse.ArgumentParser(description="Validate Office document XML files")
|
|
31
|
+
ap.add_argument(
|
|
32
|
+
"path",
|
|
33
|
+
help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)",
|
|
34
|
+
)
|
|
35
|
+
ap.add_argument(
|
|
36
|
+
"--original",
|
|
37
|
+
required=False,
|
|
38
|
+
default=None,
|
|
39
|
+
help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.",
|
|
40
|
+
)
|
|
41
|
+
ap.add_argument(
|
|
42
|
+
"-v", "--verbose",
|
|
43
|
+
action="store_true",
|
|
44
|
+
help="Enable verbose output",
|
|
45
|
+
)
|
|
46
|
+
ap.add_argument(
|
|
47
|
+
"--auto-repair",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Automatically repair common issues (hex IDs, whitespace preservation)",
|
|
50
|
+
)
|
|
51
|
+
ap.add_argument(
|
|
52
|
+
"--author",
|
|
53
|
+
default="Claude",
|
|
54
|
+
help="Author name for redlining validation (default: Claude)",
|
|
55
|
+
)
|
|
56
|
+
opts = ap.parse_args()
|
|
57
|
+
|
|
58
|
+
target = pathlib.Path(opts.path)
|
|
59
|
+
assert target.exists(), "Error: %s does not exist" % target
|
|
60
|
+
|
|
61
|
+
ref_file = None
|
|
62
|
+
if opts.original:
|
|
63
|
+
ref_file = pathlib.Path(opts.original)
|
|
64
|
+
assert ref_file.is_file(), "Error: %s is not a file" % ref_file
|
|
65
|
+
assert ref_file.suffix.lower() in _SUPPORTED_EXTS, (
|
|
66
|
+
"Error: %s must be a .docx, .pptx, or .xlsx file" % ref_file
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
ext = (ref_file or target).suffix.lower()
|
|
70
|
+
assert ext in _SUPPORTED_EXTS, (
|
|
71
|
+
"Error: Cannot determine file type from %s. Use --original or provide a .docx/.pptx/.xlsx file." % target
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if target.is_file() and target.suffix.lower() in _SUPPORTED_EXTS:
|
|
75
|
+
scratch = tempfile.mkdtemp()
|
|
76
|
+
with zipfile.ZipFile(target, "r") as zf:
|
|
77
|
+
zf.extractall(scratch)
|
|
78
|
+
work_dir = pathlib.Path(scratch)
|
|
79
|
+
else:
|
|
80
|
+
assert target.is_dir(), "Error: %s is not a directory or Office file" % target
|
|
81
|
+
work_dir = target
|
|
82
|
+
|
|
83
|
+
match ext:
|
|
84
|
+
case ".docx":
|
|
85
|
+
checkers = [
|
|
86
|
+
DOCXSchemaValidator(work_dir, ref_file, verbose=opts.verbose),
|
|
87
|
+
]
|
|
88
|
+
if ref_file:
|
|
89
|
+
checkers.append(
|
|
90
|
+
RedliningValidator(work_dir, ref_file, verbose=opts.verbose, author=opts.author)
|
|
91
|
+
)
|
|
92
|
+
case ".pptx":
|
|
93
|
+
checkers = [
|
|
94
|
+
PPTXSchemaValidator(work_dir, ref_file, verbose=opts.verbose),
|
|
95
|
+
]
|
|
96
|
+
case _:
|
|
97
|
+
print("Error: Validation not supported for file type %s" % ext)
|
|
98
|
+
sys.exit(1)
|
|
99
|
+
|
|
100
|
+
if opts.auto_repair:
|
|
101
|
+
n_fixed = sum(c.repair() for c in checkers)
|
|
102
|
+
if n_fixed:
|
|
103
|
+
print("Auto-repaired %d issue(s)" % n_fixed)
|
|
104
|
+
|
|
105
|
+
ok = all(c.validate() for c in checkers)
|
|
106
|
+
|
|
107
|
+
if ok:
|
|
108
|
+
print("All validations PASSED!")
|
|
109
|
+
|
|
110
|
+
sys.exit(0 if ok else 1)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
main()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Office document validation toolkit — exposes checker classes for
|
|
3
|
+
DOCX, PPTX, and tracked-change (redlining) scenarios.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .base import BaseSchemaValidator
|
|
7
|
+
from .docx import DOCXSchemaValidator
|
|
8
|
+
from .pptx import PPTXSchemaValidator
|
|
9
|
+
from .redlining import RedliningValidator
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BaseSchemaValidator",
|
|
13
|
+
"DOCXSchemaValidator",
|
|
14
|
+
"PPTXSchemaValidator",
|
|
15
|
+
"RedliningValidator",
|
|
16
|
+
]
|