marp2pptx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- marp2pptx-0.1.0/PKG-INFO +167 -0
- marp2pptx-0.1.0/README.md +155 -0
- marp2pptx-0.1.0/marp2pptx/__init__.py +3 -0
- marp2pptx-0.1.0/marp2pptx/__main__.py +345 -0
- marp2pptx-0.1.0/marp2pptx/marp_convert.py +100 -0
- marp2pptx-0.1.0/marp2pptx/postprocessing.py +992 -0
- marp2pptx-0.1.0/marp2pptx/preprocessing.py +92 -0
- marp2pptx-0.1.0/marp2pptx/render_div_as_image.py +385 -0
- marp2pptx-0.1.0/marp2pptx.egg-info/PKG-INFO +167 -0
- marp2pptx-0.1.0/marp2pptx.egg-info/SOURCES.txt +15 -0
- marp2pptx-0.1.0/marp2pptx.egg-info/dependency_links.txt +1 -0
- marp2pptx-0.1.0/marp2pptx.egg-info/entry_points.txt +2 -0
- marp2pptx-0.1.0/marp2pptx.egg-info/requires.txt +5 -0
- marp2pptx-0.1.0/marp2pptx.egg-info/top_level.txt +1 -0
- marp2pptx-0.1.0/pyproject.toml +29 -0
- marp2pptx-0.1.0/setup.cfg +4 -0
- marp2pptx-0.1.0/tests/test_marp_postprocess.py +1107 -0
marp2pptx-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: marp2pptx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Convert Marp Markdown files to polished PPTX presentations
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: beautifulsoup4>=4.14.3
|
|
8
|
+
Requires-Dist: pillow>=12.1.1
|
|
9
|
+
Requires-Dist: pydantic>=2.12.5
|
|
10
|
+
Requires-Dist: python-pptx>=1.0.2
|
|
11
|
+
Requires-Dist: requests>=2.32.5
|
|
12
|
+
|
|
13
|
+
### Marp PPTX Post-Processing: Status & Next Steps
|
|
14
|
+
|
|
15
|
+
#### 1. Marp HTML Structure (Key Concepts)
|
|
16
|
+
|
|
17
|
+
**Slides:**
|
|
18
|
+
- Each slide is represented by a `<svg data-marpit-svg ...>` element.
|
|
19
|
+
- Inside each SVG, there are one or more layers, each as a `<foreignObject>` with width/height attributes.
|
|
20
|
+
- Each `<foreignObject>` contains a `<section>` element, which holds the content for that layer.
|
|
21
|
+
|
|
22
|
+
**Headers:**
|
|
23
|
+
- If a slide has a header, a `<header>` element is placed under the `<section>` element.
|
|
24
|
+
- Split background images are never placed in the header.
|
|
25
|
+
|
|
26
|
+
**Marpit Advanced Backgrounds:**
|
|
27
|
+
- Advanced backgrounds (true and split) are always placed in a `<div data-marpit-advanced-background-container="true">`.
|
|
28
|
+
- This div can contain one or more `<figure>` elements, each with a background image.
|
|
29
|
+
- The direction (horizontal/vertical) is set by `data-marpit-advanced-background-direction`.
|
|
30
|
+
- Split backgrounds are indicated by attributes like `data-marpit-advanced-background-split` on the `<section>`.
|
|
31
|
+
|
|
32
|
+
#### 2. What We're Trying to Achieve
|
|
33
|
+
|
|
34
|
+
We want to post-process a Marp-exported PowerPoint (`.pptx`) so that all background images specified with Marp’s advanced image syntax are visually correct in the exported `.pptx`. This includes:
|
|
35
|
+
|
|
36
|
+
- **True backgrounds** (``): Fill the slide, appear behind content, and match Marp’s CSS background-size/position logic (cover, contain, auto, etc.).
|
|
37
|
+
- **Multiple backgrounds** (`` x N, with `horizontal`/`vertical`): Stack images in the correct order and direction, matching Marp’s advanced backgrounds.
|
|
38
|
+
- **Split backgrounds** (`![bg left]`, `![bg right]`, `![bg left:33%]`, etc.): Place the image as a foreground element in a defined region (not as a slide-wide background), shrinking the content area as Marp does. Only `left` and `right` split directions are supported (not `top` or `bottom`).
|
|
39
|
+
- **Cropping and placement**: All cropping, scaling, and placement (e.g., `right:35%`, `left:38% 70%`, `w:100% h:50%`) should visually match Marp’s HTML/PDF output.
|
|
40
|
+
|
|
41
|
+
Reference: [Marpit image syntax documentation](https://marpit.marp.app/image-syntax)
|
|
42
|
+
|
|
43
|
+
----
|
|
44
|
+
|
|
45
|
+
#### 3. What We've Done So Far
|
|
46
|
+
|
|
47
|
+
- **HTML Parsing:**
|
|
48
|
+
- The script parses Marp HTML output, extracting slide backgrounds, split info, image URLs, and layout instructions, by walking the SVG/foreignObject/section/container/figure structure.
|
|
49
|
+
- It builds a slide model for PPTX generation, replacing the old markdown/image logic.
|
|
50
|
+
|
|
51
|
+
- **True Backgrounds & Multiple Backgrounds:**
|
|
52
|
+
- Images fill the slide or are stacked horizontally/vertically, matching Marp's stacking logic.
|
|
53
|
+
|
|
54
|
+
- **Advanced Background Scaling:**
|
|
55
|
+
- Implemented full support for Marp's `background-size` property, parsed from the generated HTML.
|
|
56
|
+
- **Supported keywords**: `cover` (fills the area, cropping if necessary), `contain` and `fit` (scales to fit within the area), `auto` (uses original image size), and percentage values (e.g., `50%`, which scales the image relative to the container).
|
|
57
|
+
|
|
58
|
+
- **Split Backgrounds:**
|
|
59
|
+
- Split backgrounds (left, right, with percentage) are placed in the correct region and cropped to fill only the split space. Only `left` and `right` split directions are supported.
|
|
60
|
+
- Multiple images in a split region are distributed equally within the split space (always horizontally), matching Marp's stacking logic.
|
|
61
|
+
|
|
62
|
+
- **Debugging and Logging:**
|
|
63
|
+
- Extensive debug logging shows all image shapes, sizes, and the matching process for troubleshooting.
|
|
64
|
+
|
|
65
|
+
- **Robust Image Mapping**: Refactored the processing logic to map all images from the Marp HTML (including headers, content, and backgrounds) one-to-one with the picture shapes in the PPTX slide. This ensures that transformations are applied *only* to the correct background shapes, preventing unintended modifications to other images on the slide.
|
|
66
|
+
|
|
67
|
+
----
|
|
68
|
+
|
|
69
|
+
### 4. CLI & Pipeline Automation
|
|
70
|
+
|
|
71
|
+
The script has been refactored into a full command-line interface to automate the entire Marp to post-processed PPTX pipeline.
|
|
72
|
+
|
|
73
|
+
- **End-to-End Automation**: The script now orchestrates the three main steps of the conversion process:
|
|
74
|
+
1. **HTML Generation**: It calls `npx @marp-team/marp-cli` to convert the source Markdown file into an HTML file.
|
|
75
|
+
2. **Initial PPTX Generation**: It uses the same CLI tool to create a raw, editable PPTX file (`*_raw.pptx`).
|
|
76
|
+
3. **Post-Processing**: It runs the existing background image processing logic on the generated HTML and raw PPTX files.
|
|
77
|
+
- **File Management**:
|
|
78
|
+
- **Smart Naming**: Automatically creates an intermediate `_raw.pptx` file and saves the final output as `<input_name>.pptx`.
|
|
79
|
+
- **Automatic Cleanup**: Deletes the intermediate HTML and `_raw.pptx` files by default to keep the workspace clean.
|
|
80
|
+
- **Keep Intermediates**: A `--debug` flag is available to prevent cleanup for debugging purposes.
|
|
81
|
+
- **Improved Usability**:
|
|
82
|
+
- **CLI Arguments**: The script now uses `argparse` for robust handling of command-line arguments, including the input file, output file, and other options.
|
|
83
|
+
- **Help Documentation**: A `--help` command provides clear instructions on how to use the script and its available options.
|
|
84
|
+
- **Enhanced Typing**: Static typing has been improved throughout the codebase for better maintainability and reliability.
|
|
85
|
+
|
|
86
|
+
----
|
|
87
|
+
|
|
88
|
+
### 5. Testing
|
|
89
|
+
|
|
90
|
+
The new CLI simplifies testing significantly. To process a Marp Markdown file, run the script with the input file path.
|
|
91
|
+
|
|
92
|
+
Set the name of the Marp markdown file:
|
|
93
|
+
```powershell
|
|
94
|
+
$MARP_MARKDOWN_FILE = "sample.marp.md"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Run the end-to-end processing pipeline with a single command:
|
|
98
|
+
```powershell
|
|
99
|
+
uv run marp_pptx_postprocess.py ./${MARP_MARKDOWN_FILE}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The script will handle the intermediate steps and produce a final, post-processed PPTX file named `sample.marp.pptx`.
|
|
103
|
+
|
|
104
|
+
To inspect the final output:
|
|
105
|
+
```powershell
|
|
106
|
+
explorer "${MARP_MARKDOWN_FILE}.pptx"
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
To run the pipeline and keep the intermediate files for debugging:
|
|
110
|
+
```powershell
|
|
111
|
+
uv run marp_pptx_postprocess.py ./${MARP_MARKDOWN_FILE} --debug
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
This will leave the following files for inspection:
|
|
115
|
+
- `${MARP_MARKDOWN_FILE}.html`
|
|
116
|
+
- `${MARP_MARKDOWN_FILE}_raw.pptx`
|
|
117
|
+
- `${MARP_MARKDOWN_FILE}.pptx` (final output)
|
|
118
|
+
|
|
119
|
+
----
|
|
120
|
+
|
|
121
|
+
### 6. Next Steps (Advanced Placement)
|
|
122
|
+
|
|
123
|
+
#### Recent Progress
|
|
124
|
+
- Implemented a fix to widen all text boxes by 4cm (1133 pixels) to prevent unwanted text wrapping in headings when viewed in LibreOffice.
|
|
125
|
+
- This adjustment ensures that headings display correctly without wrapping issues.
|
|
126
|
+
|
|
127
|
+
The next major step is to handle explicit `width` and `height` parameters for background images, which are specified in the markdown but not always available in the final HTML `background-size` property.
|
|
128
|
+
|
|
129
|
+
- **Parameters to Support:**
|
|
130
|
+
- **Explicit `width` and `height`**: Keywords like `width: 300px` or `h: 50%`.
|
|
131
|
+
- **Shorthand `w` and `h`**: e.g., `w:300px`.
|
|
132
|
+
- **Positional arguments**: e.g. `![bg 50%]` or `![bg 300px 200px]`. The script will need to parse the original markdown to get these.
|
|
133
|
+
|
|
134
|
+
- **Goal:** Correctly position and scale images that use these markdown-specific parameters.
|
|
135
|
+
|
|
136
|
+
- **Content Area Shrinking:**
|
|
137
|
+
- For split backgrounds, shrink the content area as Marp does, so content is not covered by split backgrounds.
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# Install and run tool
|
|
141
|
+
## Run as module without installing:
|
|
142
|
+
```bash
|
|
143
|
+
python -m marp2pptx --help
|
|
144
|
+
```
|
|
145
|
+
## Install locally and run:
|
|
146
|
+
```bash
|
|
147
|
+
uv pip install -e .
|
|
148
|
+
```
|
|
149
|
+
```bash
|
|
150
|
+
marp2pptx --help
|
|
151
|
+
```
|
|
152
|
+
to unsinstall:
|
|
153
|
+
```bash
|
|
154
|
+
uv pip uninstall marp2pptx
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Install globally and run:
|
|
158
|
+
```bash
|
|
159
|
+
uv pip install -e --global .
|
|
160
|
+
```
|
|
161
|
+
```bash
|
|
162
|
+
marp2pptx --help
|
|
163
|
+
```
|
|
164
|
+
to unsinstall:
|
|
165
|
+
```bash
|
|
166
|
+
uv pip uninstall marp2pptx --global
|
|
167
|
+
```
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
### Marp PPTX Post-Processing: Status & Next Steps
|
|
2
|
+
|
|
3
|
+
#### 1. Marp HTML Structure (Key Concepts)
|
|
4
|
+
|
|
5
|
+
**Slides:**
|
|
6
|
+
- Each slide is represented by a `<svg data-marpit-svg ...>` element.
|
|
7
|
+
- Inside each SVG, there are one or more layers, each as a `<foreignObject>` with width/height attributes.
|
|
8
|
+
- Each `<foreignObject>` contains a `<section>` element, which holds the content for that layer.
|
|
9
|
+
|
|
10
|
+
**Headers:**
|
|
11
|
+
- If a slide has a header, a `<header>` element is placed under the `<section>` element.
|
|
12
|
+
- Split background images are never placed in the header.
|
|
13
|
+
|
|
14
|
+
**Marpit Advanced Backgrounds:**
|
|
15
|
+
- Advanced backgrounds (true and split) are always placed in a `<div data-marpit-advanced-background-container="true">`.
|
|
16
|
+
- This div can contain one or more `<figure>` elements, each with a background image.
|
|
17
|
+
- The direction (horizontal/vertical) is set by `data-marpit-advanced-background-direction`.
|
|
18
|
+
- Split backgrounds are indicated by attributes like `data-marpit-advanced-background-split` on the `<section>`.
|
|
19
|
+
|
|
20
|
+
#### 2. What We're Trying to Achieve
|
|
21
|
+
|
|
22
|
+
We want to post-process a Marp-exported PowerPoint (`.pptx`) so that all background images specified with Marp’s advanced image syntax are visually correct in the exported `.pptx`. This includes:
|
|
23
|
+
|
|
24
|
+
- **True backgrounds** (``): Fill the slide, appear behind content, and match Marp’s CSS background-size/position logic (cover, contain, auto, etc.).
|
|
25
|
+
- **Multiple backgrounds** (`` x N, with `horizontal`/`vertical`): Stack images in the correct order and direction, matching Marp’s advanced backgrounds.
|
|
26
|
+
- **Split backgrounds** (`![bg left]`, `![bg right]`, `![bg left:33%]`, etc.): Place the image as a foreground element in a defined region (not as a slide-wide background), shrinking the content area as Marp does. Only `left` and `right` split directions are supported (not `top` or `bottom`).
|
|
27
|
+
- **Cropping and placement**: All cropping, scaling, and placement (e.g., `right:35%`, `left:38% 70%`, `w:100% h:50%`) should visually match Marp’s HTML/PDF output.
|
|
28
|
+
|
|
29
|
+
Reference: [Marpit image syntax documentation](https://marpit.marp.app/image-syntax)
|
|
30
|
+
|
|
31
|
+
----
|
|
32
|
+
|
|
33
|
+
#### 3. What We've Done So Far
|
|
34
|
+
|
|
35
|
+
- **HTML Parsing:**
|
|
36
|
+
- The script parses Marp HTML output, extracting slide backgrounds, split info, image URLs, and layout instructions, by walking the SVG/foreignObject/section/container/figure structure.
|
|
37
|
+
- It builds a slide model for PPTX generation, replacing the old markdown/image logic.
|
|
38
|
+
|
|
39
|
+
- **True Backgrounds & Multiple Backgrounds:**
|
|
40
|
+
- Images fill the slide or are stacked horizontally/vertically, matching Marp's stacking logic.
|
|
41
|
+
|
|
42
|
+
- **Advanced Background Scaling:**
|
|
43
|
+
- Implemented full support for Marp's `background-size` property, parsed from the generated HTML.
|
|
44
|
+
- **Supported keywords**: `cover` (fills the area, cropping if necessary), `contain` and `fit` (scales to fit within the area), `auto` (uses original image size), and percentage values (e.g., `50%`, which scales the image relative to the container).
|
|
45
|
+
|
|
46
|
+
- **Split Backgrounds:**
|
|
47
|
+
- Split backgrounds (left, right, with percentage) are placed in the correct region and cropped to fill only the split space. Only `left` and `right` split directions are supported.
|
|
48
|
+
- Multiple images in a split region are distributed equally within the split space (always horizontally), matching Marp's stacking logic.
|
|
49
|
+
|
|
50
|
+
- **Debugging and Logging:**
|
|
51
|
+
- Extensive debug logging shows all image shapes, sizes, and the matching process for troubleshooting.
|
|
52
|
+
|
|
53
|
+
- **Robust Image Mapping**: Refactored the processing logic to map all images from the Marp HTML (including headers, content, and backgrounds) one-to-one with the picture shapes in the PPTX slide. This ensures that transformations are applied *only* to the correct background shapes, preventing unintended modifications to other images on the slide.
|
|
54
|
+
|
|
55
|
+
----
|
|
56
|
+
|
|
57
|
+
### 4. CLI & Pipeline Automation
|
|
58
|
+
|
|
59
|
+
The script has been refactored into a full command-line interface to automate the entire Marp to post-processed PPTX pipeline.
|
|
60
|
+
|
|
61
|
+
- **End-to-End Automation**: The script now orchestrates the three main steps of the conversion process:
|
|
62
|
+
1. **HTML Generation**: It calls `npx @marp-team/marp-cli` to convert the source Markdown file into an HTML file.
|
|
63
|
+
2. **Initial PPTX Generation**: It uses the same CLI tool to create a raw, editable PPTX file (`*_raw.pptx`).
|
|
64
|
+
3. **Post-Processing**: It runs the existing background image processing logic on the generated HTML and raw PPTX files.
|
|
65
|
+
- **File Management**:
|
|
66
|
+
- **Smart Naming**: Automatically creates an intermediate `_raw.pptx` file and saves the final output as `<input_name>.pptx`.
|
|
67
|
+
- **Automatic Cleanup**: Deletes the intermediate HTML and `_raw.pptx` files by default to keep the workspace clean.
|
|
68
|
+
- **Keep Intermediates**: A `--debug` flag is available to prevent cleanup for debugging purposes.
|
|
69
|
+
- **Improved Usability**:
|
|
70
|
+
- **CLI Arguments**: The script now uses `argparse` for robust handling of command-line arguments, including the input file, output file, and other options.
|
|
71
|
+
- **Help Documentation**: A `--help` command provides clear instructions on how to use the script and its available options.
|
|
72
|
+
- **Enhanced Typing**: Static typing has been improved throughout the codebase for better maintainability and reliability.
|
|
73
|
+
|
|
74
|
+
----
|
|
75
|
+
|
|
76
|
+
### 5. Testing
|
|
77
|
+
|
|
78
|
+
The new CLI simplifies testing significantly. To process a Marp Markdown file, run the script with the input file path.
|
|
79
|
+
|
|
80
|
+
Set the name of the Marp markdown file:
|
|
81
|
+
```powershell
|
|
82
|
+
$MARP_MARKDOWN_FILE = "sample.marp.md"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Run the end-to-end processing pipeline with a single command:
|
|
86
|
+
```powershell
|
|
87
|
+
uv run marp_pptx_postprocess.py ./${MARP_MARKDOWN_FILE}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The script will handle the intermediate steps and produce a final, post-processed PPTX file named `sample.marp.pptx`.
|
|
91
|
+
|
|
92
|
+
To inspect the final output:
|
|
93
|
+
```powershell
|
|
94
|
+
explorer "${MARP_MARKDOWN_FILE}.pptx"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
To run the pipeline and keep the intermediate files for debugging:
|
|
98
|
+
```powershell
|
|
99
|
+
uv run marp_pptx_postprocess.py ./${MARP_MARKDOWN_FILE} --debug
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
This will leave the following files for inspection:
|
|
103
|
+
- `${MARP_MARKDOWN_FILE}.html`
|
|
104
|
+
- `${MARP_MARKDOWN_FILE}_raw.pptx`
|
|
105
|
+
- `${MARP_MARKDOWN_FILE}.pptx` (final output)
|
|
106
|
+
|
|
107
|
+
----
|
|
108
|
+
|
|
109
|
+
### 6. Next Steps (Advanced Placement)
|
|
110
|
+
|
|
111
|
+
#### Recent Progress
|
|
112
|
+
- Implemented a fix to widen all text boxes by 4cm (1133 pixels) to prevent unwanted text wrapping in headings when viewed in LibreOffice.
|
|
113
|
+
- This adjustment ensures that headings display correctly without wrapping issues.
|
|
114
|
+
|
|
115
|
+
The next major step is to handle explicit `width` and `height` parameters for background images, which are specified in the markdown but not always available in the final HTML `background-size` property.
|
|
116
|
+
|
|
117
|
+
- **Parameters to Support:**
|
|
118
|
+
- **Explicit `width` and `height`**: Keywords like `width: 300px` or `h: 50%`.
|
|
119
|
+
- **Shorthand `w` and `h`**: e.g., `w:300px`.
|
|
120
|
+
- **Positional arguments**: e.g. `![bg 50%]` or `![bg 300px 200px]`. The script will need to parse the original markdown to get these.
|
|
121
|
+
|
|
122
|
+
- **Goal:** Correctly position and scale images that use these markdown-specific parameters.
|
|
123
|
+
|
|
124
|
+
- **Content Area Shrinking:**
|
|
125
|
+
- For split backgrounds, shrink the content area as Marp does, so content is not covered by split backgrounds.
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# Install and run tool
|
|
129
|
+
## Run as module without installing:
|
|
130
|
+
```bash
|
|
131
|
+
python -m marp2pptx --help
|
|
132
|
+
```
|
|
133
|
+
## Install locally and run:
|
|
134
|
+
```bash
|
|
135
|
+
uv pip install -e .
|
|
136
|
+
```
|
|
137
|
+
```bash
|
|
138
|
+
marp2pptx --help
|
|
139
|
+
```
|
|
140
|
+
to unsinstall:
|
|
141
|
+
```bash
|
|
142
|
+
uv pip uninstall marp2pptx
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Install globally and run:
|
|
146
|
+
```bash
|
|
147
|
+
uv pip install -e --global .
|
|
148
|
+
```
|
|
149
|
+
```bash
|
|
150
|
+
marp2pptx --help
|
|
151
|
+
```
|
|
152
|
+
to unsinstall:
|
|
153
|
+
```bash
|
|
154
|
+
uv pip uninstall marp2pptx --global
|
|
155
|
+
```
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
#!/usr/bin/env -S uv run --script
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from pptx import Presentation
|
|
11
|
+
|
|
12
|
+
# Import from new modules
|
|
13
|
+
from .marp_convert import (
|
|
14
|
+
get_npx_path,
|
|
15
|
+
marp_generate_in_parallel,
|
|
16
|
+
)
|
|
17
|
+
from .preprocessing import (
|
|
18
|
+
preprocess_markdown,
|
|
19
|
+
)
|
|
20
|
+
from .postprocessing import (
|
|
21
|
+
parse_marp_html,
|
|
22
|
+
widen_text_shapes,
|
|
23
|
+
normalize_font_names,
|
|
24
|
+
remove_redundant_marp_white_rectangles,
|
|
25
|
+
process_native_marp_images,
|
|
26
|
+
process_styled_divs,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Module logger
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def open_pptx_file(pptx_path: Path) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Open a PPTX file in the default viewer.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
# Use os.startfile on Windows, open on macOS, xdg-open on Linux
|
|
39
|
+
if sys.platform == "win32":
|
|
40
|
+
os.startfile(str(pptx_path))
|
|
41
|
+
elif sys.platform == "darwin":
|
|
42
|
+
subprocess.run(["open", str(pptx_path)], check=True)
|
|
43
|
+
else:
|
|
44
|
+
subprocess.run(["xdg-open", str(pptx_path)], check=True)
|
|
45
|
+
|
|
46
|
+
logger.info(f"Opening PPTX file: {pptx_path}")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.error(f"Failed to open PPTX file: {e}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def convert_command(args) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Execute the convert command to transform a Marp Markdown file to PPTX.
|
|
54
|
+
"""
|
|
55
|
+
input_md_file = Path(args.input_file)
|
|
56
|
+
if not input_md_file.is_file():
|
|
57
|
+
logger.error(f"Input file not found: {input_md_file}")
|
|
58
|
+
sys.exit(1)
|
|
59
|
+
|
|
60
|
+
# Define file paths based on user's requirements
|
|
61
|
+
preprocessed_md_path = Path(f"{args.input_file}-m2p.preprocessed.marp.md")
|
|
62
|
+
html_path = Path(f"{args.input_file}-m2p.html")
|
|
63
|
+
raw_pptx_path = Path(f"{args.input_file}-m2p_raw.pptx")
|
|
64
|
+
final_pptx_path = (
|
|
65
|
+
Path(args.output) if args.output else Path(f"{args.input_file}-m2p.pptx")
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Include output files in cleanup list
|
|
69
|
+
intermediate_files = [preprocessed_md_path, html_path, raw_pptx_path]
|
|
70
|
+
conversion_successful = False
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
# --- Step 1: Preprocess Markdown ---
|
|
74
|
+
logger.info("Preprocessing Markdown to remove invisible characters...")
|
|
75
|
+
preprocess_markdown(input_md_file, preprocessed_md_path)
|
|
76
|
+
logger.debug(f"Preprocessed Markdown created: {preprocessed_md_path}")
|
|
77
|
+
|
|
78
|
+
# run HTML + raw PPTX generation concurrently to improve throughput
|
|
79
|
+
marp_generate_in_parallel(preprocessed_md_path, html_path, raw_pptx_path)
|
|
80
|
+
|
|
81
|
+
# --- Step 3: Post-process the PPTX ---
|
|
82
|
+
logger.info(f"Post-processing raw PPTX to create final file: {final_pptx_path}")
|
|
83
|
+
|
|
84
|
+
process_pptx_html(
|
|
85
|
+
Path(html_path),
|
|
86
|
+
Path(raw_pptx_path),
|
|
87
|
+
Path(final_pptx_path),
|
|
88
|
+
save_rendered_divs=args.debug and logger.isEnabledFor(logging.DEBUG),
|
|
89
|
+
run_styled_divs=args.experimental,
|
|
90
|
+
)
|
|
91
|
+
logger.info(f"Successfully created final PPTX: {final_pptx_path}")
|
|
92
|
+
conversion_successful = True
|
|
93
|
+
|
|
94
|
+
except FileNotFoundError:
|
|
95
|
+
logger.error(
|
|
96
|
+
f"Error: '{get_npx_path()}' command not found. Is Node.js and npm installed and in your PATH?"
|
|
97
|
+
)
|
|
98
|
+
sys.exit(1)
|
|
99
|
+
except subprocess.CalledProcessError as e:
|
|
100
|
+
logger.error(f"Marp CLI failed to execute. The command was: {' '.join(e.cmd)}")
|
|
101
|
+
# Stderr is not captured when streaming, so we can't print it here.
|
|
102
|
+
# The error from the subprocess itself should be visible in the console.
|
|
103
|
+
sys.exit(1)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.error(f"An unexpected error occurred: {e}")
|
|
106
|
+
logger.debug("Full traceback:", exc_info=True)
|
|
107
|
+
sys.exit(1)
|
|
108
|
+
|
|
109
|
+
finally:
|
|
110
|
+
# --- Step 4: Cleanup ---
|
|
111
|
+
if not args.debug:
|
|
112
|
+
logger.info("Cleaning up intermediate files...")
|
|
113
|
+
for f in intermediate_files:
|
|
114
|
+
try:
|
|
115
|
+
if f.is_file():
|
|
116
|
+
os.remove(f)
|
|
117
|
+
logger.debug(f"Removed {f}")
|
|
118
|
+
except OSError as e:
|
|
119
|
+
logger.warning(f"Could not remove intermediate file {f}: {e}")
|
|
120
|
+
logger.info("Cleanup complete.")
|
|
121
|
+
else:
|
|
122
|
+
logger.info("Intermediate files kept as requested.")
|
|
123
|
+
|
|
124
|
+
# Opening PPTX after the entire process is complete
|
|
125
|
+
if conversion_successful and args.open_pptx:
|
|
126
|
+
open_pptx_file(final_pptx_path)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def cleanup_command(args) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Execute the clean-up command to remove debug files.
|
|
132
|
+
"""
|
|
133
|
+
Path(args.input_file)
|
|
134
|
+
|
|
135
|
+
# Define debug file paths
|
|
136
|
+
preprocessed_md_path = Path(f"{args.input_file}-m2p.preprocessed.marp.md")
|
|
137
|
+
html_path = Path(f"{args.input_file}-m2p.html")
|
|
138
|
+
raw_pptx_path = Path(f"{args.input_file}-m2p_raw.pptx")
|
|
139
|
+
|
|
140
|
+
debug_files = [preprocessed_md_path, html_path, raw_pptx_path]
|
|
141
|
+
|
|
142
|
+
removed_count = 0
|
|
143
|
+
for f in debug_files:
|
|
144
|
+
try:
|
|
145
|
+
if f.is_file():
|
|
146
|
+
os.remove(f)
|
|
147
|
+
logger.info(f"Removed {f}")
|
|
148
|
+
removed_count += 1
|
|
149
|
+
else:
|
|
150
|
+
logger.debug(f"File not found: {f}")
|
|
151
|
+
except OSError as e:
|
|
152
|
+
logger.warning(f"Could not remove file {f}: {e}")
|
|
153
|
+
|
|
154
|
+
if removed_count > 0:
|
|
155
|
+
logger.info(f"Cleanup complete. Removed {removed_count} file(s).")
|
|
156
|
+
else:
|
|
157
|
+
logger.info("No debug files found to remove.")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def open_pptx_command(args) -> None:
|
|
161
|
+
"""
|
|
162
|
+
Execute the open-pptx command to open the generated PPTX file.
|
|
163
|
+
"""
|
|
164
|
+
pptx_path = Path(f"{args.input_file}-m2p.pptx")
|
|
165
|
+
|
|
166
|
+
if not pptx_path.is_file():
|
|
167
|
+
logger.error(f"PPTX file not found: {pptx_path}")
|
|
168
|
+
sys.exit(1)
|
|
169
|
+
|
|
170
|
+
open_pptx_file(pptx_path)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def process_pptx_html(
|
|
174
|
+
html_path: Path,
|
|
175
|
+
pptx_path: Path,
|
|
176
|
+
output_path: Path,
|
|
177
|
+
save_rendered_divs: bool = False,
|
|
178
|
+
run_styled_divs: bool = True,
|
|
179
|
+
) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Processes a PPTX file using information from a Marp HTML file to fix backgrounds.
|
|
182
|
+
|
|
183
|
+
If `save_rendered_divs` is True and `rendered_output_dir` is provided, styled div
|
|
184
|
+
screenshots will be copied there (used when --debug + debug).
|
|
185
|
+
|
|
186
|
+
Parameters:
|
|
187
|
+
run_styled_divs: when False, skip the `process_styled_divs` pipeline. This
|
|
188
|
+
allows the CLI to disable the experimental styled-div rendering step.
|
|
189
|
+
"""
|
|
190
|
+
slides_data = parse_marp_html(html_path)
|
|
191
|
+
logger.debug("Parsed Marp HTML -> slides_data length: %d", len(slides_data))
|
|
192
|
+
logger.debug("Slides data content presence:")
|
|
193
|
+
for _i, _sd in enumerate(slides_data):
|
|
194
|
+
logger.debug(f" slide[{_i}] content present: {bool(_sd.get('content'))}")
|
|
195
|
+
|
|
196
|
+
# global CSS for styled-div rendering is computed inside `process_styled_divs`.
|
|
197
|
+
# (moved there to reduce the number of arguments passed into the helper.)
|
|
198
|
+
|
|
199
|
+
prs = Presentation(str(pptx_path))
|
|
200
|
+
# Handle native Marp image/background sizing (extracted to helper)
|
|
201
|
+
process_native_marp_images(
|
|
202
|
+
prs=prs,
|
|
203
|
+
slides_data=slides_data,
|
|
204
|
+
)
|
|
205
|
+
# Widen text boxes to avoid wrapping issues in some viewers (extracted to helper).
|
|
206
|
+
widen_text_shapes(prs=prs, extra_width_cm=.7)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# Handle styled HTML <div> elements that contain images. This was extracted to
|
|
210
|
+
# the dedicated pipeline function `process_styled_divs` which fixes a known
|
|
211
|
+
# Marp->PPTX conversion gap for custom HTML/CSS (rounded portraits, object-fit,
|
|
212
|
+
# background-image on divs, inline border-radius, etc.).
|
|
213
|
+
# This behavior is experimental and can be disabled via the CLI flag
|
|
214
|
+
# `--experimental` (disabled by default).
|
|
215
|
+
if run_styled_divs:
|
|
216
|
+
process_styled_divs(
|
|
217
|
+
prs=prs,
|
|
218
|
+
slides_data=slides_data,
|
|
219
|
+
html_path=html_path,
|
|
220
|
+
save_rendered_divs=save_rendered_divs,
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
logger.info("Skipping experimental styled-div rendering (disabled)")
|
|
224
|
+
|
|
225
|
+
# Normalize font names (fix known Marp->PPTX mismatches such as 'SegoeUI' -> 'Segoe UI')
|
|
226
|
+
try:
|
|
227
|
+
normalize_font_names(prs)
|
|
228
|
+
except Exception:
|
|
229
|
+
logger.debug("process_pptx_html: normalize_font_names failed", exc_info=True)
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
removed_shapes = remove_redundant_marp_white_rectangles(prs)
|
|
233
|
+
logger.info("Removed %d redundant white rectangle(s)", removed_shapes)
|
|
234
|
+
except Exception:
|
|
235
|
+
logger.debug("process_pptx_html: remove_redundant_marp_white_rectangles failed", exc_info=True)
|
|
236
|
+
|
|
237
|
+
# Save the modified presentation
|
|
238
|
+
prs.save(str(output_path))
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def main() -> None:
|
|
242
|
+
"""
|
|
243
|
+
Main function to run the CLI tool.
|
|
244
|
+
"""
|
|
245
|
+
# configure module logging when running as a script (don't configure at import time)
|
|
246
|
+
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
247
|
+
|
|
248
|
+
parser = argparse.ArgumentParser(
|
|
249
|
+
description="Process a Marp Markdown file to create a polished PPTX.",
|
|
250
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Create subparsers for commands
|
|
254
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
255
|
+
|
|
256
|
+
# Create the 'convert' subcommand
|
|
257
|
+
convert_parser = subparsers.add_parser(
|
|
258
|
+
"convert",
|
|
259
|
+
help="""Convert a Marp Markdown file to a polished PPTX.
|
|
260
|
+
This script automates the pipeline:
|
|
261
|
+
1. Preprocess Markdown to remove invisible characters.
|
|
262
|
+
2. Convert Markdown to HTML using Marp CLI (runs in parallel with step 3).
|
|
263
|
+
3. Convert Markdown to a raw PPTX using Marp CLI (runs in parallel with step 2).
|
|
264
|
+
4. Post-process the raw PPTX, using the HTML to fix background image layouts, creating the final PPTX.
|
|
265
|
+
5. Clean up intermediate files (preprocessed Markdown, HTML, raw PPTX) unless specified otherwise.
|
|
266
|
+
""",
|
|
267
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
268
|
+
)
|
|
269
|
+
convert_parser.add_argument(
|
|
270
|
+
"input_file",
|
|
271
|
+
type=str,
|
|
272
|
+
help='Path to the input Marp Markdown file (e.g., "sample.marp.md")',
|
|
273
|
+
)
|
|
274
|
+
convert_parser.add_argument(
|
|
275
|
+
"-o",
|
|
276
|
+
"--output",
|
|
277
|
+
type=str,
|
|
278
|
+
help='Path for the final output PPTX file. Defaults to "<input_file>.pptx" (e.g., "sample.marp.md.pptx").',
|
|
279
|
+
)
|
|
280
|
+
convert_parser.add_argument(
|
|
281
|
+
"--debug",
|
|
282
|
+
action="store_true",
|
|
283
|
+
help="Keep the intermediate HTML and raw PPTX files for debugging.",
|
|
284
|
+
)
|
|
285
|
+
convert_parser.add_argument(
|
|
286
|
+
"--experimental",
|
|
287
|
+
action="store_true",
|
|
288
|
+
help="Enable experimental styled-div rendering (disabled by default).",
|
|
289
|
+
)
|
|
290
|
+
convert_parser.add_argument(
|
|
291
|
+
"-v", "--verbose", action="store_true", help="Enable verbose debug logging."
|
|
292
|
+
)
|
|
293
|
+
convert_parser.add_argument(
|
|
294
|
+
"--open-pptx",
|
|
295
|
+
action="store_true",
|
|
296
|
+
help="Open the generated PPTX file in the default viewer after successful conversion.",
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Create the 'clean-up' subcommand
|
|
300
|
+
cleanup_parser = subparsers.add_parser(
|
|
301
|
+
"clean-up",
|
|
302
|
+
help="Remove debug files (preprocessed Markdown, HTML, raw PPTX) generated by the convert command.",
|
|
303
|
+
)
|
|
304
|
+
cleanup_parser.add_argument(
|
|
305
|
+
"input_file",
|
|
306
|
+
type=str,
|
|
307
|
+
help='Path to the Marp Markdown file (e.g., "sample.marp.md")',
|
|
308
|
+
)
|
|
309
|
+
cleanup_parser.add_argument(
|
|
310
|
+
"-v", "--verbose", action="store_true", help="Enable verbose debug logging."
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Create the 'open-pptx' subcommand
|
|
314
|
+
open_parser = subparsers.add_parser(
|
|
315
|
+
"open-pptx",
|
|
316
|
+
help="Open the generated PPTX file in the default viewer.",
|
|
317
|
+
)
|
|
318
|
+
open_parser.add_argument(
|
|
319
|
+
"input_file",
|
|
320
|
+
type=str,
|
|
321
|
+
help='Path to the Marp Markdown file (e.g., "sample.marp.md")',
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
args = parser.parse_args()
|
|
325
|
+
|
|
326
|
+
# Check if a command was provided
|
|
327
|
+
if not args.command:
|
|
328
|
+
parser.print_help()
|
|
329
|
+
sys.exit(1)
|
|
330
|
+
|
|
331
|
+
if hasattr(args, 'verbose') and args.verbose:
|
|
332
|
+
logger.setLevel(logging.DEBUG)
|
|
333
|
+
|
|
334
|
+
# Dispatch to appropriate command function
|
|
335
|
+
if args.command == "convert":
|
|
336
|
+
convert_command(args)
|
|
337
|
+
elif args.command == "clean-up":
|
|
338
|
+
cleanup_command(args)
|
|
339
|
+
elif args.command == "open-pptx":
|
|
340
|
+
open_pptx_command(args)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
if __name__ == "__main__":
|
|
344
|
+
main()
|
|
345
|
+
|