@brookmind/ai-toolkit 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -14
- package/agents/code-reviewer.md +6 -1
- package/agents/code-simplifier.md +52 -0
- package/bin/cli.js +1 -5
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +321 -0
- package/dist/index.js.map +1 -0
- package/mcps/context7/.mcp.json +13 -0
- package/mcps/expo-mcp/.mcp.json +13 -0
- package/mcps/figma-mcp/.mcp.json +4 -6
- package/package.json +22 -11
- package/skills/pdf-processing-pro/FORMS.md +610 -0
- package/skills/pdf-processing-pro/OCR.md +137 -0
- package/skills/pdf-processing-pro/SKILL.md +296 -0
- package/skills/pdf-processing-pro/TABLES.md +626 -0
- package/skills/pdf-processing-pro/scripts/analyze_form.py +307 -0
- package/skills/react-best-practices/AGENTS.md +915 -0
- package/skills/react-best-practices/README.md +127 -0
- package/skills/react-best-practices/SKILL.md +110 -0
- package/skills/react-best-practices/metadata.json +14 -0
- package/skills/react-best-practices/rules/_sections.md +41 -0
- package/skills/react-best-practices/rules/_template.md +28 -0
- package/skills/react-best-practices/rules/advanced-event-handler-refs.md +80 -0
- package/skills/react-best-practices/rules/advanced-use-latest.md +76 -0
- package/skills/react-best-practices/rules/async-defer-await.md +80 -0
- package/skills/react-best-practices/rules/async-dependencies.md +36 -0
- package/skills/react-best-practices/rules/async-parallel.md +28 -0
- package/skills/react-best-practices/rules/async-suspense-boundaries.md +100 -0
- package/skills/react-best-practices/rules/bundle-barrel-imports.md +42 -0
- package/skills/react-best-practices/rules/bundle-conditional.md +106 -0
- package/skills/react-best-practices/rules/bundle-preload.md +44 -0
- package/skills/react-best-practices/rules/client-event-listeners.md +131 -0
- package/skills/react-best-practices/rules/client-swr-dedup.md +133 -0
- package/skills/react-best-practices/rules/js-batch-dom-css.md +82 -0
- package/skills/react-best-practices/rules/js-cache-function-results.md +80 -0
- package/skills/react-best-practices/rules/js-cache-property-access.md +28 -0
- package/skills/react-best-practices/rules/js-cache-storage.md +70 -0
- package/skills/react-best-practices/rules/js-combine-iterations.md +32 -0
- package/skills/react-best-practices/rules/js-early-exit.md +50 -0
- package/skills/react-best-practices/rules/js-hoist-regexp.md +45 -0
- package/skills/react-best-practices/rules/js-index-maps.md +37 -0
- package/skills/react-best-practices/rules/js-length-check-first.md +49 -0
- package/skills/react-best-practices/rules/js-min-max-loop.md +82 -0
- package/skills/react-best-practices/rules/js-set-map-lookups.md +24 -0
- package/skills/react-best-practices/rules/js-tosorted-immutable.md +57 -0
- package/skills/react-best-practices/rules/rendering-activity.md +90 -0
- package/skills/react-best-practices/rules/rendering-animate-svg-wrapper.md +47 -0
- package/skills/react-best-practices/rules/rendering-conditional-render.md +40 -0
- package/skills/react-best-practices/rules/rendering-content-visibility.md +38 -0
- package/skills/react-best-practices/rules/rendering-hoist-jsx.md +65 -0
- package/skills/react-best-practices/rules/rendering-svg-precision.md +28 -0
- package/skills/react-best-practices/rules/rerender-defer-reads.md +39 -0
- package/skills/react-best-practices/rules/rerender-dependencies.md +45 -0
- package/skills/react-best-practices/rules/rerender-derived-state.md +29 -0
- package/skills/react-best-practices/rules/rerender-functional-setstate.md +74 -0
- package/skills/react-best-practices/rules/rerender-lazy-state-init.md +58 -0
- package/skills/react-best-practices/rules/rerender-memo.md +85 -0
- package/skills/react-best-practices/rules/rerender-transitions.md +40 -0
- package/themes/README.md +68 -0
- package/themes/claude-vivid.json +72 -0
- package/mcps/context7/.claude-plugin +0 -1
- package/mcps/context7/README.md +0 -1
- package/mcps/context7/server.json +0 -1
- package/mcps/expo-mcp/README.md +0 -33
- package/mcps/expo-mcp/package.json +0 -30
- package/mcps/figma-mcp/README.md +0 -554
- package/mcps/figma-mcp/server.json +0 -17
- package/mcps/figma-mcp/skills/code-connect-components +0 -1
- package/mcps/figma-mcp/skills/create-design-system-rules +0 -1
- package/mcps/figma-mcp/skills/implement-design +0 -1
- package/mcps/pg-aiguide/.claude-plugin +0 -1
- package/mcps/pg-aiguide/CLAUDE.md +0 -21
- package/mcps/pg-aiguide/README.md +0 -275
- package/mcps/pg-aiguide/skills/design-postgres-tables +0 -1
- package/mcps/pg-aiguide/skills/find-hypertable-candidates +0 -1
- package/mcps/pg-aiguide/skills/migrate-postgres-tables-to-hypertables +0 -1
- package/mcps/pg-aiguide/skills/setup-timescaledb-hypertables +0 -1
- package/mcps/pg-aiguide/skills.yaml +0 -4
- package/skills/cloudflare-cli/SKILL.md +0 -151
- package/skills/docx/LICENSE.txt +0 -30
- package/skills/docx/SKILL.md +0 -197
- package/skills/docx/docx-js.md +0 -350
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
- package/skills/docx/ooxml/schemas/mce/mc.xsd +0 -75
- package/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +0 -560
- package/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +0 -67
- package/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +0 -14
- package/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +0 -20
- package/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +0 -13
- package/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
- package/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +0 -8
- package/skills/docx/ooxml/scripts/pack.py +0 -159
- package/skills/docx/ooxml/scripts/unpack.py +0 -29
- package/skills/docx/ooxml/scripts/validate.py +0 -69
- package/skills/docx/ooxml/scripts/validation/__init__.py +0 -15
- package/skills/docx/ooxml/scripts/validation/base.py +0 -951
- package/skills/docx/ooxml/scripts/validation/docx.py +0 -274
- package/skills/docx/ooxml/scripts/validation/pptx.py +0 -315
- package/skills/docx/ooxml/scripts/validation/redlining.py +0 -279
- package/skills/docx/ooxml.md +0 -610
- package/skills/docx/scripts/__init__.py +0 -1
- package/skills/docx/scripts/document.py +0 -1276
- package/skills/docx/scripts/templates/comments.xml +0 -3
- package/skills/docx/scripts/templates/commentsExtended.xml +0 -3
- package/skills/docx/scripts/templates/commentsExtensible.xml +0 -3
- package/skills/docx/scripts/templates/commentsIds.xml +0 -3
- package/skills/docx/scripts/templates/people.xml +0 -3
- package/skills/docx/scripts/utilities.py +0 -374
- package/skills/pdf/LICENSE.txt +0 -30
- package/skills/pdf/SKILL.md +0 -294
- package/skills/pdf/forms.md +0 -205
- package/skills/pdf/reference.md +0 -612
- package/skills/pdf/scripts/check_bounding_boxes.py +0 -70
- package/skills/pdf/scripts/check_bounding_boxes_test.py +0 -226
- package/skills/pdf/scripts/check_fillable_fields.py +0 -12
- package/skills/pdf/scripts/convert_pdf_to_images.py +0 -35
- package/skills/pdf/scripts/create_validation_image.py +0 -41
- package/skills/pdf/scripts/extract_form_field_info.py +0 -152
- package/skills/pdf/scripts/fill_fillable_fields.py +0 -114
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -108
- package/skills/xlsx/LICENSE.txt +0 -30
- package/skills/xlsx/SKILL.md +0 -289
- package/skills/xlsx/recalc.py +0 -178
- package/src/index.js +0 -365
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:s="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2018/wordml/cex" targetNamespace="http://schemas.microsoft.com/office/word/2018/wordml/cex">
|
|
2
|
-
<xsd:import id="w16" namespace="http://schemas.microsoft.com/office/word/2018/wordml" schemaLocation="wml-2018.xsd"/>
|
|
3
|
-
<xsd:import id="w" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
4
|
-
<xsd:import id="s" namespace="http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" schemaLocation="../ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd"/>
|
|
5
|
-
<xsd:complexType name="CT_CommentsExtensible">
|
|
6
|
-
<xsd:sequence>
|
|
7
|
-
<xsd:element name="commentExtensible" type="CT_CommentExtensible" minOccurs="0" maxOccurs="unbounded"/>
|
|
8
|
-
<xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
|
|
9
|
-
</xsd:sequence>
|
|
10
|
-
</xsd:complexType>
|
|
11
|
-
<xsd:complexType name="CT_CommentExtensible">
|
|
12
|
-
<xsd:sequence>
|
|
13
|
-
<xsd:element name="extLst" type="w16:CT_ExtensionList" minOccurs="0" maxOccurs="1"/>
|
|
14
|
-
</xsd:sequence>
|
|
15
|
-
<xsd:attribute name="durableId" type="w:ST_LongHexNumber" use="required"/>
|
|
16
|
-
<xsd:attribute name="dateUtc" type="w:ST_DateTime" use="optional"/>
|
|
17
|
-
<xsd:attribute name="intelligentPlaceholder" type="s:ST_OnOff" use="optional"/>
|
|
18
|
-
</xsd:complexType>
|
|
19
|
-
<xsd:element name="commentsExtensible" type="CT_CommentsExtensible"/>
|
|
20
|
-
</xsd:schema>
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2016/wordml/cid" targetNamespace="http://schemas.microsoft.com/office/word/2016/wordml/cid">
|
|
2
|
-
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
-
<xsd:complexType name="CT_CommentsIds">
|
|
4
|
-
<xsd:sequence>
|
|
5
|
-
<xsd:element name="commentId" type="CT_CommentId" minOccurs="0" maxOccurs="unbounded"/>
|
|
6
|
-
</xsd:sequence>
|
|
7
|
-
</xsd:complexType>
|
|
8
|
-
<xsd:complexType name="CT_CommentId">
|
|
9
|
-
<xsd:attribute name="paraId" type="w12:ST_LongHexNumber" use="required"/>
|
|
10
|
-
<xsd:attribute name="durableId" type="w12:ST_LongHexNumber" use="required"/>
|
|
11
|
-
</xsd:complexType>
|
|
12
|
-
<xsd:element name="commentsIds" type="CT_CommentsIds"/>
|
|
13
|
-
</xsd:schema>
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" targetNamespace="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash">
|
|
2
|
-
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
-
<xsd:attribute name="storeItemChecksum" type="w12:ST_String"/>
|
|
4
|
-
</xsd:schema>
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:w12="http://schemas.openxmlformats.org/wordprocessingml/2006/main" elementFormDefault="qualified" attributeFormDefault="qualified" blockDefault="#all" xmlns="http://schemas.microsoft.com/office/word/2015/wordml/symex" targetNamespace="http://schemas.microsoft.com/office/word/2015/wordml/symex">
|
|
2
|
-
<xsd:import id="w12" namespace="http://schemas.openxmlformats.org/wordprocessingml/2006/main" schemaLocation="../ISO-IEC29500-4_2016/wml.xsd"/>
|
|
3
|
-
<xsd:complexType name="CT_SymEx">
|
|
4
|
-
<xsd:attribute name="font" type="w12:ST_String"/>
|
|
5
|
-
<xsd:attribute name="char" type="w12:ST_LongHexNumber"/>
|
|
6
|
-
</xsd:complexType>
|
|
7
|
-
<xsd:element name="symEx" type="CT_SymEx"/>
|
|
8
|
-
</xsd:schema>
|
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Tool to pack a directory into a .docx, .pptx, or .xlsx file with XML formatting undone.
|
|
4
|
-
|
|
5
|
-
Example usage:
|
|
6
|
-
python pack.py <input_directory> <office_file> [--force]
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import argparse
|
|
10
|
-
import shutil
|
|
11
|
-
import subprocess
|
|
12
|
-
import sys
|
|
13
|
-
import tempfile
|
|
14
|
-
import defusedxml.minidom
|
|
15
|
-
import zipfile
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def main():
|
|
20
|
-
parser = argparse.ArgumentParser(description="Pack a directory into an Office file")
|
|
21
|
-
parser.add_argument("input_directory", help="Unpacked Office document directory")
|
|
22
|
-
parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)")
|
|
23
|
-
parser.add_argument("--force", action="store_true", help="Skip validation")
|
|
24
|
-
args = parser.parse_args()
|
|
25
|
-
|
|
26
|
-
try:
|
|
27
|
-
success = pack_document(
|
|
28
|
-
args.input_directory, args.output_file, validate=not args.force
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
# Show warning if validation was skipped
|
|
32
|
-
if args.force:
|
|
33
|
-
print("Warning: Skipped validation, file may be corrupt", file=sys.stderr)
|
|
34
|
-
# Exit with error if validation failed
|
|
35
|
-
elif not success:
|
|
36
|
-
print("Contents would produce a corrupt file.", file=sys.stderr)
|
|
37
|
-
print("Please validate XML before repacking.", file=sys.stderr)
|
|
38
|
-
print("Use --force to skip validation and pack anyway.", file=sys.stderr)
|
|
39
|
-
sys.exit(1)
|
|
40
|
-
|
|
41
|
-
except ValueError as e:
|
|
42
|
-
sys.exit(f"Error: {e}")
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def pack_document(input_dir, output_file, validate=False):
|
|
46
|
-
"""Pack a directory into an Office file (.docx/.pptx/.xlsx).
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
input_dir: Path to unpacked Office document directory
|
|
50
|
-
output_file: Path to output Office file
|
|
51
|
-
validate: If True, validates with soffice (default: False)
|
|
52
|
-
|
|
53
|
-
Returns:
|
|
54
|
-
bool: True if successful, False if validation failed
|
|
55
|
-
"""
|
|
56
|
-
input_dir = Path(input_dir)
|
|
57
|
-
output_file = Path(output_file)
|
|
58
|
-
|
|
59
|
-
if not input_dir.is_dir():
|
|
60
|
-
raise ValueError(f"{input_dir} is not a directory")
|
|
61
|
-
if output_file.suffix.lower() not in {".docx", ".pptx", ".xlsx"}:
|
|
62
|
-
raise ValueError(f"{output_file} must be a .docx, .pptx, or .xlsx file")
|
|
63
|
-
|
|
64
|
-
# Work in temporary directory to avoid modifying original
|
|
65
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
66
|
-
temp_content_dir = Path(temp_dir) / "content"
|
|
67
|
-
shutil.copytree(input_dir, temp_content_dir)
|
|
68
|
-
|
|
69
|
-
# Process XML files to remove pretty-printing whitespace
|
|
70
|
-
for pattern in ["*.xml", "*.rels"]:
|
|
71
|
-
for xml_file in temp_content_dir.rglob(pattern):
|
|
72
|
-
condense_xml(xml_file)
|
|
73
|
-
|
|
74
|
-
# Create final Office file as zip archive
|
|
75
|
-
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
-
with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
77
|
-
for f in temp_content_dir.rglob("*"):
|
|
78
|
-
if f.is_file():
|
|
79
|
-
zf.write(f, f.relative_to(temp_content_dir))
|
|
80
|
-
|
|
81
|
-
# Validate if requested
|
|
82
|
-
if validate:
|
|
83
|
-
if not validate_document(output_file):
|
|
84
|
-
output_file.unlink() # Delete the corrupt file
|
|
85
|
-
return False
|
|
86
|
-
|
|
87
|
-
return True
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def validate_document(doc_path):
|
|
91
|
-
"""Validate document by converting to HTML with soffice."""
|
|
92
|
-
# Determine the correct filter based on file extension
|
|
93
|
-
match doc_path.suffix.lower():
|
|
94
|
-
case ".docx":
|
|
95
|
-
filter_name = "html:HTML"
|
|
96
|
-
case ".pptx":
|
|
97
|
-
filter_name = "html:impress_html_Export"
|
|
98
|
-
case ".xlsx":
|
|
99
|
-
filter_name = "html:HTML (StarCalc)"
|
|
100
|
-
|
|
101
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
102
|
-
try:
|
|
103
|
-
result = subprocess.run(
|
|
104
|
-
[
|
|
105
|
-
"soffice",
|
|
106
|
-
"--headless",
|
|
107
|
-
"--convert-to",
|
|
108
|
-
filter_name,
|
|
109
|
-
"--outdir",
|
|
110
|
-
temp_dir,
|
|
111
|
-
str(doc_path),
|
|
112
|
-
],
|
|
113
|
-
capture_output=True,
|
|
114
|
-
timeout=10,
|
|
115
|
-
text=True,
|
|
116
|
-
)
|
|
117
|
-
if not (Path(temp_dir) / f"{doc_path.stem}.html").exists():
|
|
118
|
-
error_msg = result.stderr.strip() or "Document validation failed"
|
|
119
|
-
print(f"Validation error: {error_msg}", file=sys.stderr)
|
|
120
|
-
return False
|
|
121
|
-
return True
|
|
122
|
-
except FileNotFoundError:
|
|
123
|
-
print("Warning: soffice not found. Skipping validation.", file=sys.stderr)
|
|
124
|
-
return True
|
|
125
|
-
except subprocess.TimeoutExpired:
|
|
126
|
-
print("Validation error: Timeout during conversion", file=sys.stderr)
|
|
127
|
-
return False
|
|
128
|
-
except Exception as e:
|
|
129
|
-
print(f"Validation error: {e}", file=sys.stderr)
|
|
130
|
-
return False
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def condense_xml(xml_file):
|
|
134
|
-
"""Strip unnecessary whitespace and remove comments."""
|
|
135
|
-
with open(xml_file, "r", encoding="utf-8") as f:
|
|
136
|
-
dom = defusedxml.minidom.parse(f)
|
|
137
|
-
|
|
138
|
-
# Process each element to remove whitespace and comments
|
|
139
|
-
for element in dom.getElementsByTagName("*"):
|
|
140
|
-
# Skip w:t elements and their processing
|
|
141
|
-
if element.tagName.endswith(":t"):
|
|
142
|
-
continue
|
|
143
|
-
|
|
144
|
-
# Remove whitespace-only text nodes and comment nodes
|
|
145
|
-
for child in list(element.childNodes):
|
|
146
|
-
if (
|
|
147
|
-
child.nodeType == child.TEXT_NODE
|
|
148
|
-
and child.nodeValue
|
|
149
|
-
and child.nodeValue.strip() == ""
|
|
150
|
-
) or child.nodeType == child.COMMENT_NODE:
|
|
151
|
-
element.removeChild(child)
|
|
152
|
-
|
|
153
|
-
# Write back the condensed XML
|
|
154
|
-
with open(xml_file, "wb") as f:
|
|
155
|
-
f.write(dom.toxml(encoding="UTF-8"))
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if __name__ == "__main__":
|
|
159
|
-
main()
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
|
|
3
|
-
|
|
4
|
-
import random
|
|
5
|
-
import sys
|
|
6
|
-
import defusedxml.minidom
|
|
7
|
-
import zipfile
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
|
|
10
|
-
# Get command line arguments
|
|
11
|
-
assert len(sys.argv) == 3, "Usage: python unpack.py <office_file> <output_dir>"
|
|
12
|
-
input_file, output_dir = sys.argv[1], sys.argv[2]
|
|
13
|
-
|
|
14
|
-
# Extract and format
|
|
15
|
-
output_path = Path(output_dir)
|
|
16
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
17
|
-
zipfile.ZipFile(input_file).extractall(output_path)
|
|
18
|
-
|
|
19
|
-
# Pretty print all XML files
|
|
20
|
-
xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
|
|
21
|
-
for xml_file in xml_files:
|
|
22
|
-
content = xml_file.read_text(encoding="utf-8")
|
|
23
|
-
dom = defusedxml.minidom.parseString(content)
|
|
24
|
-
xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="ascii"))
|
|
25
|
-
|
|
26
|
-
# For .docx files, suggest an RSID for tracked changes
|
|
27
|
-
if input_file.endswith(".docx"):
|
|
28
|
-
suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
|
|
29
|
-
print(f"Suggested RSID for edit session: {suggested_rsid}")
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Command line tool to validate Office document XML files against XSD schemas and tracked changes.
|
|
4
|
-
|
|
5
|
-
Usage:
|
|
6
|
-
python validate.py <dir> --original <original_file>
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import argparse
|
|
10
|
-
import sys
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
from validation import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def main():
|
|
17
|
-
parser = argparse.ArgumentParser(description="Validate Office document XML files")
|
|
18
|
-
parser.add_argument(
|
|
19
|
-
"unpacked_dir",
|
|
20
|
-
help="Path to unpacked Office document directory",
|
|
21
|
-
)
|
|
22
|
-
parser.add_argument(
|
|
23
|
-
"--original",
|
|
24
|
-
required=True,
|
|
25
|
-
help="Path to original file (.docx/.pptx/.xlsx)",
|
|
26
|
-
)
|
|
27
|
-
parser.add_argument(
|
|
28
|
-
"-v",
|
|
29
|
-
"--verbose",
|
|
30
|
-
action="store_true",
|
|
31
|
-
help="Enable verbose output",
|
|
32
|
-
)
|
|
33
|
-
args = parser.parse_args()
|
|
34
|
-
|
|
35
|
-
# Validate paths
|
|
36
|
-
unpacked_dir = Path(args.unpacked_dir)
|
|
37
|
-
original_file = Path(args.original)
|
|
38
|
-
file_extension = original_file.suffix.lower()
|
|
39
|
-
assert unpacked_dir.is_dir(), f"Error: {unpacked_dir} is not a directory"
|
|
40
|
-
assert original_file.is_file(), f"Error: {original_file} is not a file"
|
|
41
|
-
assert file_extension in [".docx", ".pptx", ".xlsx"], (
|
|
42
|
-
f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# Run validations
|
|
46
|
-
match file_extension:
|
|
47
|
-
case ".docx":
|
|
48
|
-
validators = [DOCXSchemaValidator, RedliningValidator]
|
|
49
|
-
case ".pptx":
|
|
50
|
-
validators = [PPTXSchemaValidator]
|
|
51
|
-
case _:
|
|
52
|
-
print(f"Error: Validation not supported for file type {file_extension}")
|
|
53
|
-
sys.exit(1)
|
|
54
|
-
|
|
55
|
-
# Run validators
|
|
56
|
-
success = True
|
|
57
|
-
for V in validators:
|
|
58
|
-
validator = V(unpacked_dir, original_file, verbose=args.verbose)
|
|
59
|
-
if not validator.validate():
|
|
60
|
-
success = False
|
|
61
|
-
|
|
62
|
-
if success:
|
|
63
|
-
print("All validations PASSED!")
|
|
64
|
-
|
|
65
|
-
sys.exit(0 if success else 1)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if __name__ == "__main__":
|
|
69
|
-
main()
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Validation modules for Word document processing.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from .base import BaseSchemaValidator
|
|
6
|
-
from .docx import DOCXSchemaValidator
|
|
7
|
-
from .pptx import PPTXSchemaValidator
|
|
8
|
-
from .redlining import RedliningValidator
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"BaseSchemaValidator",
|
|
12
|
-
"DOCXSchemaValidator",
|
|
13
|
-
"PPTXSchemaValidator",
|
|
14
|
-
"RedliningValidator",
|
|
15
|
-
]
|