@farazirfan/costar-server-executor 1.7.37 → 1.7.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/agent.d.ts +90 -0
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +606 -0
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/pi-embedded-runner/run.d.ts.map +1 -1
- package/dist/agent/pi-embedded-runner/run.js +2 -1
- package/dist/agent/pi-embedded-runner/run.js.map +1 -1
- package/dist/agent/pi-embedded-runner/system-prompt.d.ts.map +1 -1
- package/dist/agent/pi-embedded-runner/system-prompt.js +16 -37
- package/dist/agent/pi-embedded-runner/system-prompt.js.map +1 -1
- package/dist/agent/pi-embedded-runner/tools.d.ts +4 -1
- package/dist/agent/pi-embedded-runner/tools.d.ts.map +1 -1
- package/dist/agent/pi-embedded-runner/tools.js +3 -1
- package/dist/agent/pi-embedded-runner/tools.js.map +1 -1
- package/dist/agent/pi-embedded-runner/types.d.ts +4 -0
- package/dist/agent/pi-embedded-runner/types.d.ts.map +1 -1
- package/dist/cli/env-loader.d.ts.map +1 -1
- package/dist/cli/env-loader.js +1 -0
- package/dist/cli/env-loader.js.map +1 -1
- package/dist/cli/setup.js +2 -2
- package/dist/cli/setup.js.map +1 -1
- package/dist/cron/normalize.d.ts +31 -0
- package/dist/cron/normalize.d.ts.map +1 -0
- package/dist/cron/normalize.js +211 -0
- package/dist/cron/normalize.js.map +1 -0
- package/dist/cron/scheduler.d.ts +33 -3
- package/dist/cron/scheduler.d.ts.map +1 -1
- package/dist/cron/scheduler.js +253 -48
- package/dist/cron/scheduler.js.map +1 -1
- package/dist/heartbeat/runner.d.ts +27 -12
- package/dist/heartbeat/runner.d.ts.map +1 -1
- package/dist/heartbeat/runner.js +82 -104
- package/dist/heartbeat/runner.js.map +1 -1
- package/dist/infra/heartbeat-events-filter.d.ts +29 -0
- package/dist/infra/heartbeat-events-filter.d.ts.map +1 -0
- package/dist/infra/heartbeat-events-filter.js +80 -0
- package/dist/infra/heartbeat-events-filter.js.map +1 -0
- package/dist/infra/index.d.ts +9 -0
- package/dist/infra/index.d.ts.map +1 -0
- package/dist/infra/index.js +9 -0
- package/dist/infra/index.js.map +1 -0
- package/dist/infra/system-events.d.ts +58 -2
- package/dist/infra/system-events.d.ts.map +1 -1
- package/dist/infra/system-events.js +80 -14
- package/dist/infra/system-events.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +6 -1
- package/dist/server.js.map +1 -1
- package/dist/services/platform-keys.d.ts +19 -0
- package/dist/services/platform-keys.d.ts.map +1 -0
- package/dist/services/platform-keys.js +74 -0
- package/dist/services/platform-keys.js.map +1 -0
- package/dist/subagent/registry.d.ts +96 -0
- package/dist/subagent/registry.d.ts.map +1 -0
- package/dist/subagent/registry.js +180 -0
- package/dist/subagent/registry.js.map +1 -0
- package/dist/tools/complete-turn.d.ts +2 -2
- package/dist/tools/complete-turn.js +10 -10
- package/dist/tools/complete-turn.js.map +1 -1
- package/dist/tools/contacts.d.ts +13 -0
- package/dist/tools/contacts.d.ts.map +1 -0
- package/dist/tools/contacts.js +80 -0
- package/dist/tools/contacts.js.map +1 -0
- package/dist/tools/cron.d.ts +17 -2
- package/dist/tools/cron.d.ts.map +1 -1
- package/dist/tools/cron.js +117 -35
- package/dist/tools/cron.js.map +1 -1
- package/dist/tools/google-maps.d.ts +6 -6
- package/dist/tools/google-maps.d.ts.map +1 -1
- package/dist/tools/google-maps.js +207 -262
- package/dist/tools/google-maps.js.map +1 -1
- package/dist/tools/index.d.ts +17 -7
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +40 -9
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/phone-call.d.ts +11 -0
- package/dist/tools/phone-call.d.ts.map +1 -0
- package/dist/tools/phone-call.js +151 -0
- package/dist/tools/phone-call.js.map +1 -0
- package/dist/tools/sessions-spawn.d.ts +33 -0
- package/dist/tools/sessions-spawn.d.ts.map +1 -0
- package/dist/tools/sessions-spawn.js +164 -0
- package/dist/tools/sessions-spawn.js.map +1 -0
- package/dist/tools/spotify.d.ts +12 -0
- package/dist/tools/spotify.d.ts.map +1 -0
- package/dist/tools/spotify.js +251 -0
- package/dist/tools/spotify.js.map +1 -0
- package/dist/tools/subagents.d.ts +23 -0
- package/dist/tools/subagents.d.ts.map +1 -0
- package/dist/tools/subagents.js +209 -0
- package/dist/tools/subagents.js.map +1 -0
- package/dist/tools/whatsapp.d.ts +13 -0
- package/dist/tools/whatsapp.d.ts.map +1 -0
- package/dist/tools/whatsapp.js +215 -0
- package/dist/tools/whatsapp.js.map +1 -0
- package/dist/tools/youtube.d.ts +12 -0
- package/dist/tools/youtube.d.ts.map +1 -0
- package/dist/tools/youtube.js +218 -0
- package/dist/tools/youtube.js.map +1 -0
- package/dist/utils/asterizk-auth.d.ts +43 -0
- package/dist/utils/asterizk-auth.d.ts.map +1 -0
- package/dist/utils/asterizk-auth.js +125 -0
- package/dist/utils/asterizk-auth.js.map +1 -0
- package/dist/web-server.d.ts.map +1 -1
- package/dist/web-server.js +132 -0
- package/dist/web-server.js.map +1 -1
- package/dist/workspace/index.d.ts +3 -4
- package/dist/workspace/index.d.ts.map +1 -1
- package/dist/workspace/index.js +3 -4
- package/dist/workspace/index.js.map +1 -1
- package/dist/workspace/templates.d.ts +8 -7
- package/dist/workspace/templates.d.ts.map +1 -1
- package/dist/workspace/templates.js +18 -127
- package/dist/workspace/templates.js.map +1 -1
- package/dist/workspace/workspace.d.ts +2 -4
- package/dist/workspace/workspace.d.ts.map +1 -1
- package/dist/workspace/workspace.js +7 -16
- package/dist/workspace/workspace.js.map +1 -1
- package/package.json +1 -1
- package/public/index.html +231 -0
- package/skills/docx/SKILL.md +468 -0
- package/skills/docx/scripts/__init__.py +1 -0
- package/skills/docx/scripts/accept_changes.py +181 -0
- package/skills/docx/scripts/comment.py +347 -0
- package/skills/docx/scripts/helpers/__init__.py +0 -0
- package/skills/docx/scripts/helpers/merge_runs.py +231 -0
- package/skills/docx/scripts/helpers/simplify_redlines.py +240 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/scripts/ooxml/schemas/mce/mc.xsd +75 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/scripts/ooxml/scripts/pack.py +159 -0
- package/skills/docx/scripts/ooxml/scripts/unpack.py +29 -0
- package/skills/docx/scripts/ooxml/scripts/validate.py +106 -0
- package/skills/docx/scripts/ooxml/scripts/validation/__init__.py +15 -0
- package/skills/docx/scripts/ooxml/scripts/validation/base.py +1023 -0
- package/skills/docx/scripts/ooxml/scripts/validation/docx.py +519 -0
- package/skills/docx/scripts/ooxml/scripts/validation/pptx.py +315 -0
- package/skills/docx/scripts/ooxml/scripts/validation/redlining.py +284 -0
- package/skills/docx/scripts/pack.py +166 -0
- package/skills/docx/scripts/templates/comments.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/skills/docx/scripts/templates/people.xml +3 -0
- package/skills/docx/scripts/unpack.py +134 -0
- package/skills/longform-video-generation/SKILL.md +298 -0
- package/skills/longform-video-generation/references/advanced_techniques.md +474 -0
- package/skills/longform-video-generation/references/google_api_guide.md +288 -0
- package/skills/longform-video-generation/scripts/video_generator.py +579 -0
- package/skills/pdf/FORMS.md +305 -0
- package/skills/pdf/REFERENCE.md +612 -0
- package/skills/pdf/SKILL.md +293 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +70 -0
- package/skills/pdf/scripts/check_fillable_fields.py +12 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- package/skills/pdf/scripts/create_validation_image.py +41 -0
- package/skills/pdf/scripts/extract_form_field_info.py +152 -0
- package/skills/pdf/scripts/extract_form_structure.py +124 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +116 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +136 -0
- package/skills/pptx/SKILL.md +171 -0
- package/skills/pptx/editing.md +205 -0
- package/skills/pptx/pptxgenjs.md +377 -0
- package/skills/pptx/scripts/add_slide.py +225 -0
- package/skills/pptx/scripts/clean.py +309 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/pptx/scripts/ooxml/schemas/mce/mc.xsd +75 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/pptx/scripts/ooxml/scripts/pack.py +159 -0
- package/skills/pptx/scripts/ooxml/scripts/unpack.py +29 -0
- package/skills/pptx/scripts/ooxml/scripts/validate.py +106 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/__init__.py +15 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/base.py +1023 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/docx.py +519 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/pptx.py +315 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/redlining.py +284 -0
- package/skills/pptx/scripts/pack.py +168 -0
- package/skills/pptx/scripts/thumbnail.py +318 -0
- package/skills/pptx/scripts/unpack.py +86 -0
- package/skills/xlsx/SKILL.md +291 -0
- package/skills/xlsx/recalc.py +247 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Pack a directory into a PPTX file.
|
|
3
|
+
|
|
4
|
+
Validates with auto-repair, condenses XML formatting, and creates the PPTX.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import shutil
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
import zipfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import defusedxml.minidom
|
|
15
|
+
|
|
16
|
+
from ooxml.scripts.validation import PPTXSchemaValidator
|
|
17
|
+
|
|
18
|
+
# Smart quotes to re-encode after DOM serialization (DOM decodes entities to Unicode)
|
|
19
|
+
SMART_QUOTE_REPLACEMENTS = {
|
|
20
|
+
"\u201c": "“", # Left double quote "
|
|
21
|
+
"\u201d": "”", # Right double quote "
|
|
22
|
+
"\u2018": "‘", # Left single quote '
|
|
23
|
+
"\u2019": "’", # Right single quote '
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def pack(
|
|
28
|
+
input_directory: str,
|
|
29
|
+
output_file: str,
|
|
30
|
+
original_file: str | None = None,
|
|
31
|
+
validate: bool = True,
|
|
32
|
+
) -> tuple[None, str]:
|
|
33
|
+
"""Pack a directory into a PPTX file.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
input_directory: Path to unpacked PPTX directory
|
|
37
|
+
output_file: Path to output PPTX file
|
|
38
|
+
original_file: Path to original PPTX for validation comparison
|
|
39
|
+
validate: If True, run validation with auto-repair before packing
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
(None, message) - message indicates success or failure
|
|
43
|
+
"""
|
|
44
|
+
input_dir = Path(input_directory)
|
|
45
|
+
output_path = Path(output_file)
|
|
46
|
+
|
|
47
|
+
if not input_dir.is_dir():
|
|
48
|
+
return None, f"Error: {input_dir} is not a directory"
|
|
49
|
+
|
|
50
|
+
if output_path.suffix.lower() != ".pptx":
|
|
51
|
+
return None, f"Error: {output_file} must be a .pptx file"
|
|
52
|
+
|
|
53
|
+
# Validate with auto-repair if requested and original file provided
|
|
54
|
+
if validate and original_file:
|
|
55
|
+
original_path = Path(original_file)
|
|
56
|
+
if original_path.exists():
|
|
57
|
+
success, output = _run_validation(input_dir, original_path)
|
|
58
|
+
if output:
|
|
59
|
+
print(output)
|
|
60
|
+
if not success:
|
|
61
|
+
return None, f"Error: Validation failed for {input_dir}"
|
|
62
|
+
|
|
63
|
+
# Work in temporary directory to avoid modifying original
|
|
64
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
65
|
+
temp_content_dir = Path(temp_dir) / "content"
|
|
66
|
+
shutil.copytree(input_dir, temp_content_dir)
|
|
67
|
+
|
|
68
|
+
# Process XML files to remove pretty-printing whitespace
|
|
69
|
+
for pattern in ["*.xml", "*.rels"]:
|
|
70
|
+
for xml_file in temp_content_dir.rglob(pattern):
|
|
71
|
+
_condense_xml(xml_file)
|
|
72
|
+
|
|
73
|
+
# Create final PPTX file as zip archive
|
|
74
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
76
|
+
for f in temp_content_dir.rglob("*"):
|
|
77
|
+
if f.is_file():
|
|
78
|
+
zf.write(f, f.relative_to(temp_content_dir))
|
|
79
|
+
|
|
80
|
+
return None, f"Successfully packed {input_dir} to {output_file}"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _run_validation(unpacked_dir: Path, original_file: Path) -> tuple[bool, str | None]:
|
|
84
|
+
"""Run validation with auto-repair.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
(success, output) - success is True if all validations pass
|
|
88
|
+
"""
|
|
89
|
+
output_lines = []
|
|
90
|
+
|
|
91
|
+
# Run auto-repair
|
|
92
|
+
validator = PPTXSchemaValidator(unpacked_dir, original_file)
|
|
93
|
+
repairs = validator.repair()
|
|
94
|
+
if repairs:
|
|
95
|
+
output_lines.append(f"Auto-repaired {repairs} issue(s)")
|
|
96
|
+
|
|
97
|
+
# Run validation
|
|
98
|
+
validator = PPTXSchemaValidator(unpacked_dir, original_file)
|
|
99
|
+
success = validator.validate()
|
|
100
|
+
|
|
101
|
+
if success:
|
|
102
|
+
output_lines.append("All validations PASSED!")
|
|
103
|
+
|
|
104
|
+
return success, "\n".join(output_lines) if output_lines else None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _encode_smart_quotes(text: str) -> str:
|
|
108
|
+
"""Re-encode smart quotes as XML entities after DOM serialization."""
|
|
109
|
+
for char, entity in SMART_QUOTE_REPLACEMENTS.items():
|
|
110
|
+
text = text.replace(char, entity)
|
|
111
|
+
return text
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _condense_xml(xml_file: Path) -> None:
|
|
115
|
+
"""Strip unnecessary whitespace and remove comments from XML."""
|
|
116
|
+
try:
|
|
117
|
+
with open(xml_file, encoding="utf-8") as f:
|
|
118
|
+
dom = defusedxml.minidom.parse(f)
|
|
119
|
+
|
|
120
|
+
# Process each element to remove whitespace and comments
|
|
121
|
+
for element in dom.getElementsByTagName("*"):
|
|
122
|
+
# Skip text elements (w:t, a:t, etc.) - preserve their content
|
|
123
|
+
if element.tagName.endswith(":t"):
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
# Remove whitespace-only text nodes and comment nodes
|
|
127
|
+
for child in list(element.childNodes):
|
|
128
|
+
if (
|
|
129
|
+
child.nodeType == child.TEXT_NODE
|
|
130
|
+
and child.nodeValue
|
|
131
|
+
and child.nodeValue.strip() == ""
|
|
132
|
+
) or child.nodeType == child.COMMENT_NODE:
|
|
133
|
+
element.removeChild(child)
|
|
134
|
+
|
|
135
|
+
# Re-encode smart quotes that DOM decoded to Unicode
|
|
136
|
+
output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8"))
|
|
137
|
+
xml_file.write_text(output, encoding="utf-8")
|
|
138
|
+
except Exception:
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
if __name__ == "__main__":
|
|
143
|
+
parser = argparse.ArgumentParser(description="Pack a directory into a PPTX file")
|
|
144
|
+
parser.add_argument("input_directory", help="Unpacked PPTX directory")
|
|
145
|
+
parser.add_argument("output_file", help="Output PPTX file")
|
|
146
|
+
parser.add_argument(
|
|
147
|
+
"--original",
|
|
148
|
+
help="Original PPTX file for validation comparison",
|
|
149
|
+
)
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
"--validate",
|
|
152
|
+
type=lambda x: x.lower() == "true",
|
|
153
|
+
default=True,
|
|
154
|
+
metavar="true|false",
|
|
155
|
+
help="Run validation with auto-repair (default: true)",
|
|
156
|
+
)
|
|
157
|
+
args = parser.parse_args()
|
|
158
|
+
|
|
159
|
+
_, message = pack(
|
|
160
|
+
args.input_directory,
|
|
161
|
+
args.output_file,
|
|
162
|
+
original_file=args.original,
|
|
163
|
+
validate=args.validate,
|
|
164
|
+
)
|
|
165
|
+
print(message)
|
|
166
|
+
|
|
167
|
+
if "Error" in message:
|
|
168
|
+
sys.exit(1)
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Create thumbnail grids from PowerPoint presentation slides.
|
|
3
|
+
|
|
4
|
+
Creates a grid layout of slide thumbnails for quick visual analysis.
|
|
5
|
+
Labels each thumbnail with its XML filename (e.g., slide1.xml).
|
|
6
|
+
Hidden slides are shown with a placeholder pattern.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python thumbnail.py input.pptx [output_prefix] [--cols N]
|
|
10
|
+
|
|
11
|
+
Examples:
|
|
12
|
+
python thumbnail.py presentation.pptx
|
|
13
|
+
# Creates: thumbnails.jpg
|
|
14
|
+
|
|
15
|
+
python thumbnail.py template.pptx grid --cols 4
|
|
16
|
+
# Creates: grid.jpg (or grid-1.jpg, grid-2.jpg for large decks)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import subprocess
|
|
21
|
+
import sys
|
|
22
|
+
import tempfile
|
|
23
|
+
import zipfile
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
import defusedxml.minidom
|
|
27
|
+
from PIL import Image, ImageDraw, ImageFont
|
|
28
|
+
|
|
29
|
+
# Constants
|
|
30
|
+
THUMBNAIL_WIDTH = 300
|
|
31
|
+
CONVERSION_DPI = 100
|
|
32
|
+
MAX_COLS = 6
|
|
33
|
+
DEFAULT_COLS = 3
|
|
34
|
+
JPEG_QUALITY = 95
|
|
35
|
+
GRID_PADDING = 20
|
|
36
|
+
BORDER_WIDTH = 2
|
|
37
|
+
FONT_SIZE_RATIO = 0.10
|
|
38
|
+
LABEL_PADDING_RATIO = 0.4
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def main():
|
|
42
|
+
parser = argparse.ArgumentParser(
|
|
43
|
+
description="Create thumbnail grids from PowerPoint slides."
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument("input", help="Input PowerPoint file (.pptx)")
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"output_prefix",
|
|
48
|
+
nargs="?",
|
|
49
|
+
default="thumbnails",
|
|
50
|
+
help="Output prefix for image files (default: thumbnails)",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument(
|
|
53
|
+
"--cols",
|
|
54
|
+
type=int,
|
|
55
|
+
default=DEFAULT_COLS,
|
|
56
|
+
help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
args = parser.parse_args()
|
|
60
|
+
|
|
61
|
+
cols = min(args.cols, MAX_COLS)
|
|
62
|
+
if args.cols > MAX_COLS:
|
|
63
|
+
print(f"Warning: Columns limited to {MAX_COLS}")
|
|
64
|
+
|
|
65
|
+
input_path = Path(args.input)
|
|
66
|
+
if not input_path.exists() or input_path.suffix.lower() != ".pptx":
|
|
67
|
+
print(f"Error: Invalid PowerPoint file: {args.input}", file=sys.stderr)
|
|
68
|
+
sys.exit(1)
|
|
69
|
+
|
|
70
|
+
output_path = Path(f"{args.output_prefix}.jpg")
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
# Get slide info (filenames and hidden status) in presentation order
|
|
74
|
+
slide_info = get_slide_info(input_path)
|
|
75
|
+
|
|
76
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
77
|
+
temp_path = Path(temp_dir)
|
|
78
|
+
visible_images = convert_to_images(input_path, temp_path)
|
|
79
|
+
|
|
80
|
+
if not visible_images and not any(s["hidden"] for s in slide_info):
|
|
81
|
+
print("Error: No slides found", file=sys.stderr)
|
|
82
|
+
sys.exit(1)
|
|
83
|
+
|
|
84
|
+
# Build slide list with images (visible) or placeholders (hidden)
|
|
85
|
+
slides = build_slide_list(slide_info, visible_images, temp_path)
|
|
86
|
+
|
|
87
|
+
grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path)
|
|
88
|
+
|
|
89
|
+
print(f"Created {len(grid_files)} grid(s):")
|
|
90
|
+
for grid_file in grid_files:
|
|
91
|
+
print(f" {grid_file}")
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_slide_info(pptx_path: Path) -> list[dict]:
|
|
99
|
+
"""Get slide filenames and hidden status in presentation order.
|
|
100
|
+
|
|
101
|
+
Returns list of dicts with 'name' and 'hidden' keys.
|
|
102
|
+
"""
|
|
103
|
+
with zipfile.ZipFile(pptx_path, "r") as zf:
|
|
104
|
+
# Read presentation.xml.rels to get rId -> slide filename mapping
|
|
105
|
+
rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8")
|
|
106
|
+
rels_dom = defusedxml.minidom.parseString(rels_content)
|
|
107
|
+
|
|
108
|
+
rid_to_slide = {}
|
|
109
|
+
for rel in rels_dom.getElementsByTagName("Relationship"):
|
|
110
|
+
rid = rel.getAttribute("Id")
|
|
111
|
+
target = rel.getAttribute("Target")
|
|
112
|
+
rel_type = rel.getAttribute("Type")
|
|
113
|
+
if "slide" in rel_type and target.startswith("slides/"):
|
|
114
|
+
rid_to_slide[rid] = target.replace("slides/", "")
|
|
115
|
+
|
|
116
|
+
# Read presentation.xml to get slide order and hidden status
|
|
117
|
+
pres_content = zf.read("ppt/presentation.xml").decode("utf-8")
|
|
118
|
+
pres_dom = defusedxml.minidom.parseString(pres_content)
|
|
119
|
+
|
|
120
|
+
slides = []
|
|
121
|
+
for sld_id in pres_dom.getElementsByTagName("p:sldId"):
|
|
122
|
+
rid = sld_id.getAttribute("r:id")
|
|
123
|
+
if rid in rid_to_slide:
|
|
124
|
+
# Check if slide is hidden (show="0")
|
|
125
|
+
hidden = sld_id.getAttribute("show") == "0"
|
|
126
|
+
slides.append({"name": rid_to_slide[rid], "hidden": hidden})
|
|
127
|
+
|
|
128
|
+
return slides
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def build_slide_list(
|
|
132
|
+
slide_info: list[dict],
|
|
133
|
+
visible_images: list[Path],
|
|
134
|
+
temp_dir: Path,
|
|
135
|
+
) -> list[tuple[Path, str]]:
|
|
136
|
+
"""Build list of (image_path, slide_name) tuples.
|
|
137
|
+
|
|
138
|
+
Hidden slides get placeholder images.
|
|
139
|
+
"""
|
|
140
|
+
# Get placeholder size from first visible image
|
|
141
|
+
if visible_images:
|
|
142
|
+
with Image.open(visible_images[0]) as img:
|
|
143
|
+
placeholder_size = img.size
|
|
144
|
+
else:
|
|
145
|
+
placeholder_size = (1920, 1080)
|
|
146
|
+
|
|
147
|
+
slides = []
|
|
148
|
+
visible_idx = 0
|
|
149
|
+
|
|
150
|
+
for info in slide_info:
|
|
151
|
+
if info["hidden"]:
|
|
152
|
+
# Create placeholder for hidden slide
|
|
153
|
+
placeholder_path = temp_dir / f"hidden-{info['name']}.jpg"
|
|
154
|
+
placeholder_img = create_hidden_placeholder(placeholder_size)
|
|
155
|
+
placeholder_img.save(placeholder_path, "JPEG")
|
|
156
|
+
slides.append((placeholder_path, f"{info['name']} (hidden)"))
|
|
157
|
+
else:
|
|
158
|
+
# Use visible image
|
|
159
|
+
if visible_idx < len(visible_images):
|
|
160
|
+
slides.append((visible_images[visible_idx], info["name"]))
|
|
161
|
+
visible_idx += 1
|
|
162
|
+
|
|
163
|
+
return slides
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def create_hidden_placeholder(size: tuple[int, int]) -> Image.Image:
|
|
167
|
+
"""Create placeholder image for hidden slides (gray with X pattern)."""
|
|
168
|
+
img = Image.new("RGB", size, color="#F0F0F0")
|
|
169
|
+
draw = ImageDraw.Draw(img)
|
|
170
|
+
line_width = max(5, min(size) // 100)
|
|
171
|
+
draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
|
|
172
|
+
draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
|
|
173
|
+
return img
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]:
|
|
177
|
+
"""Convert PowerPoint to images via PDF."""
|
|
178
|
+
pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
|
|
179
|
+
|
|
180
|
+
# Convert to PDF
|
|
181
|
+
result = subprocess.run(
|
|
182
|
+
[
|
|
183
|
+
"soffice",
|
|
184
|
+
"--headless",
|
|
185
|
+
"--convert-to",
|
|
186
|
+
"pdf",
|
|
187
|
+
"--outdir",
|
|
188
|
+
str(temp_dir),
|
|
189
|
+
str(pptx_path),
|
|
190
|
+
],
|
|
191
|
+
capture_output=True,
|
|
192
|
+
text=True,
|
|
193
|
+
)
|
|
194
|
+
if result.returncode != 0 or not pdf_path.exists():
|
|
195
|
+
raise RuntimeError("PDF conversion failed")
|
|
196
|
+
|
|
197
|
+
# Convert PDF to images
|
|
198
|
+
result = subprocess.run(
|
|
199
|
+
[
|
|
200
|
+
"pdftoppm",
|
|
201
|
+
"-jpeg",
|
|
202
|
+
"-r",
|
|
203
|
+
str(CONVERSION_DPI),
|
|
204
|
+
str(pdf_path),
|
|
205
|
+
str(temp_dir / "slide"),
|
|
206
|
+
],
|
|
207
|
+
capture_output=True,
|
|
208
|
+
text=True,
|
|
209
|
+
)
|
|
210
|
+
if result.returncode != 0:
|
|
211
|
+
raise RuntimeError("Image conversion failed")
|
|
212
|
+
|
|
213
|
+
return sorted(temp_dir.glob("slide-*.jpg"))
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def create_grids(
|
|
217
|
+
slides: list[tuple[Path, str]],
|
|
218
|
+
cols: int,
|
|
219
|
+
width: int,
|
|
220
|
+
output_path: Path,
|
|
221
|
+
) -> list[str]:
|
|
222
|
+
"""Create thumbnail grids, max cols×(cols+1) images per grid."""
|
|
223
|
+
max_per_grid = cols * (cols + 1)
|
|
224
|
+
grid_files = []
|
|
225
|
+
|
|
226
|
+
for chunk_idx, start_idx in enumerate(range(0, len(slides), max_per_grid)):
|
|
227
|
+
end_idx = min(start_idx + max_per_grid, len(slides))
|
|
228
|
+
chunk_slides = slides[start_idx:end_idx]
|
|
229
|
+
|
|
230
|
+
grid = create_grid(chunk_slides, cols, width)
|
|
231
|
+
|
|
232
|
+
if len(slides) <= max_per_grid:
|
|
233
|
+
grid_filename = output_path
|
|
234
|
+
else:
|
|
235
|
+
stem = output_path.stem
|
|
236
|
+
suffix = output_path.suffix
|
|
237
|
+
grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"
|
|
238
|
+
|
|
239
|
+
grid_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
240
|
+
grid.save(str(grid_filename), quality=JPEG_QUALITY)
|
|
241
|
+
grid_files.append(str(grid_filename))
|
|
242
|
+
|
|
243
|
+
return grid_files
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def create_grid(
|
|
247
|
+
slides: list[tuple[Path, str]],
|
|
248
|
+
cols: int,
|
|
249
|
+
width: int,
|
|
250
|
+
) -> Image.Image:
|
|
251
|
+
"""Create a single thumbnail grid."""
|
|
252
|
+
font_size = int(width * FONT_SIZE_RATIO)
|
|
253
|
+
label_padding = int(font_size * LABEL_PADDING_RATIO)
|
|
254
|
+
|
|
255
|
+
# Get dimensions from first image
|
|
256
|
+
with Image.open(slides[0][0]) as img:
|
|
257
|
+
aspect = img.height / img.width
|
|
258
|
+
height = int(width * aspect)
|
|
259
|
+
|
|
260
|
+
# Calculate grid size
|
|
261
|
+
rows = (len(slides) + cols - 1) // cols
|
|
262
|
+
grid_w = cols * width + (cols + 1) * GRID_PADDING
|
|
263
|
+
grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
|
|
264
|
+
|
|
265
|
+
# Create grid
|
|
266
|
+
grid = Image.new("RGB", (grid_w, grid_h), "white")
|
|
267
|
+
draw = ImageDraw.Draw(grid)
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
font = ImageFont.load_default(size=font_size)
|
|
271
|
+
except Exception:
|
|
272
|
+
font = ImageFont.load_default()
|
|
273
|
+
|
|
274
|
+
# Place thumbnails
|
|
275
|
+
for i, (img_path, slide_name) in enumerate(slides):
|
|
276
|
+
row, col = i // cols, i % cols
|
|
277
|
+
x = col * width + (col + 1) * GRID_PADDING
|
|
278
|
+
y_base = (
|
|
279
|
+
row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Add slide filename label
|
|
283
|
+
label = slide_name
|
|
284
|
+
bbox = draw.textbbox((0, 0), label, font=font)
|
|
285
|
+
text_w = bbox[2] - bbox[0]
|
|
286
|
+
draw.text(
|
|
287
|
+
(x + (width - text_w) // 2, y_base + label_padding),
|
|
288
|
+
label,
|
|
289
|
+
fill="black",
|
|
290
|
+
font=font,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Add thumbnail
|
|
294
|
+
y_thumbnail = y_base + label_padding + font_size + label_padding
|
|
295
|
+
|
|
296
|
+
with Image.open(img_path) as img:
|
|
297
|
+
img.thumbnail((width, height), Image.Resampling.LANCZOS)
|
|
298
|
+
w, h = img.size
|
|
299
|
+
tx = x + (width - w) // 2
|
|
300
|
+
ty = y_thumbnail + (height - h) // 2
|
|
301
|
+
grid.paste(img, (tx, ty))
|
|
302
|
+
|
|
303
|
+
# Add border
|
|
304
|
+
if BORDER_WIDTH > 0:
|
|
305
|
+
draw.rectangle(
|
|
306
|
+
[
|
|
307
|
+
(tx - BORDER_WIDTH, ty - BORDER_WIDTH),
|
|
308
|
+
(tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
|
|
309
|
+
],
|
|
310
|
+
outline="gray",
|
|
311
|
+
width=BORDER_WIDTH,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return grid
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if __name__ == "__main__":
|
|
318
|
+
main()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Unpack and format XML contents of PPTX files.
|
|
3
|
+
|
|
4
|
+
Usage: python unpack.py <pptx_file> <output_dir>
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
python unpack.py presentation.pptx unpacked/
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
import zipfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import defusedxml.minidom
|
|
15
|
+
|
|
16
|
+
# Smart quotes that get mangled by the tokenizer - convert to XML entities
|
|
17
|
+
SMART_QUOTE_REPLACEMENTS = {
|
|
18
|
+
"\u201c": "“", # Left double quote "
|
|
19
|
+
"\u201d": "”", # Right double quote "
|
|
20
|
+
"\u2018": "‘", # Left single quote '
|
|
21
|
+
"\u2019": "’", # Right single quote '
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def unpack(input_file: str, output_dir: str) -> None:
|
|
26
|
+
"""Unpack a PPTX file and pretty-print XML contents.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
input_file: Path to PPTX file
|
|
30
|
+
output_dir: Path to output directory
|
|
31
|
+
"""
|
|
32
|
+
input_path = Path(input_file)
|
|
33
|
+
output_path = Path(output_dir)
|
|
34
|
+
|
|
35
|
+
if not input_path.exists():
|
|
36
|
+
print(f"Error: {input_file} not found", file=sys.stderr)
|
|
37
|
+
sys.exit(1)
|
|
38
|
+
|
|
39
|
+
if input_path.suffix.lower() != ".pptx":
|
|
40
|
+
print(f"Error: {input_file} must be a .pptx file", file=sys.stderr)
|
|
41
|
+
sys.exit(1)
|
|
42
|
+
|
|
43
|
+
# Extract
|
|
44
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
with zipfile.ZipFile(input_path, "r") as zf:
|
|
46
|
+
zf.extractall(output_path)
|
|
47
|
+
|
|
48
|
+
# Pretty print all XML files
|
|
49
|
+
xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
|
|
50
|
+
for xml_file in xml_files:
|
|
51
|
+
_pretty_print_xml(xml_file)
|
|
52
|
+
|
|
53
|
+
# Escape smart quotes so they survive tokenization
|
|
54
|
+
for xml_file in xml_files:
|
|
55
|
+
_escape_smart_quotes(xml_file)
|
|
56
|
+
|
|
57
|
+
print(f"Unpacked {input_file} to {output_dir}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _pretty_print_xml(xml_file: Path) -> None:
|
|
61
|
+
"""Pretty print an XML file with indentation."""
|
|
62
|
+
try:
|
|
63
|
+
content = xml_file.read_text(encoding="utf-8")
|
|
64
|
+
dom = defusedxml.minidom.parseString(content)
|
|
65
|
+
xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8"))
|
|
66
|
+
except Exception:
|
|
67
|
+
pass # Skip files that can't be parsed
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _escape_smart_quotes(xml_file: Path) -> None:
|
|
71
|
+
"""Replace smart quotes with XML entities so they survive tokenization."""
|
|
72
|
+
try:
|
|
73
|
+
content = xml_file.read_text(encoding="utf-8")
|
|
74
|
+
for char, entity in SMART_QUOTE_REPLACEMENTS.items():
|
|
75
|
+
content = content.replace(char, entity)
|
|
76
|
+
xml_file.write_text(content, encoding="utf-8")
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
if __name__ == "__main__":
|
|
82
|
+
if len(sys.argv) != 3:
|
|
83
|
+
print("Usage: python unpack.py <pptx_file> <output_dir>", file=sys.stderr)
|
|
84
|
+
sys.exit(1)
|
|
85
|
+
|
|
86
|
+
unpack(sys.argv[1], sys.argv[2])
|