report-compiler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. report_compiler-0.1.0/PKG-INFO +330 -0
  2. report_compiler-0.1.0/README.md +313 -0
  3. report_compiler-0.1.0/pyproject.toml +29 -0
  4. report_compiler-0.1.0/setup.cfg +4 -0
  5. report_compiler-0.1.0/src/report_compiler/__init__.py +14 -0
  6. report_compiler-0.1.0/src/report_compiler/cli.py +327 -0
  7. report_compiler-0.1.0/src/report_compiler/core/__init__.py +1 -0
  8. report_compiler-0.1.0/src/report_compiler/core/compiler.py +414 -0
  9. report_compiler-0.1.0/src/report_compiler/core/config.py +74 -0
  10. report_compiler-0.1.0/src/report_compiler/document/__init__.py +1 -0
  11. report_compiler-0.1.0/src/report_compiler/document/docx_processor.py +224 -0
  12. report_compiler-0.1.0/src/report_compiler/document/libreoffice_converter.py +44 -0
  13. report_compiler-0.1.0/src/report_compiler/document/placeholder_parser.py +202 -0
  14. report_compiler-0.1.0/src/report_compiler/document/word_converter.py +140 -0
  15. report_compiler-0.1.0/src/report_compiler/pdf/__init__.py +1 -0
  16. report_compiler-0.1.0/src/report_compiler/pdf/content_analyzer.py +239 -0
  17. report_compiler-0.1.0/src/report_compiler/pdf/marker_remover.py +147 -0
  18. report_compiler-0.1.0/src/report_compiler/pdf/merge_processor.py +247 -0
  19. report_compiler-0.1.0/src/report_compiler/pdf/overlay_processor.py +168 -0
  20. report_compiler-0.1.0/src/report_compiler/utils/__init__.py +1 -0
  21. report_compiler-0.1.0/src/report_compiler/utils/conversions.py +12 -0
  22. report_compiler-0.1.0/src/report_compiler/utils/file_manager.py +208 -0
  23. report_compiler-0.1.0/src/report_compiler/utils/logging_config.py +181 -0
  24. report_compiler-0.1.0/src/report_compiler/utils/page_selector.py +182 -0
  25. report_compiler-0.1.0/src/report_compiler/utils/pdf_to_svg.py +116 -0
  26. report_compiler-0.1.0/src/report_compiler/utils/validators.py +287 -0
  27. report_compiler-0.1.0/src/report_compiler.egg-info/PKG-INFO +330 -0
  28. report_compiler-0.1.0/src/report_compiler.egg-info/SOURCES.txt +30 -0
  29. report_compiler-0.1.0/src/report_compiler.egg-info/dependency_links.txt +1 -0
  30. report_compiler-0.1.0/src/report_compiler.egg-info/entry_points.txt +2 -0
  31. report_compiler-0.1.0/src/report_compiler.egg-info/requires.txt +8 -0
  32. report_compiler-0.1.0/src/report_compiler.egg-info/top_level.txt +1 -0
@@ -0,0 +1,330 @@
1
+ Metadata-Version: 2.4
2
+ Name: report_compiler
3
+ Version: 0.1.0
4
+ Summary: A tool for compiling reports from various sources.
5
+ Author-email: YOUR NAME <your@email.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.7
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: comtypes>=1.2.1
12
+ Requires-Dist: Pillow>=10.2.0
13
+ Requires-Dist: python-docx>=1.1.0
14
+ Requires-Dist: PyMuPDF>=1.26.3
15
+ Requires-Dist: typer>=0.9.0
16
+ Requires-Dist: pywin32; sys_platform == "win32"
17
+
18
+ # Report Compiler
19
+
20
+ A Python-based automated DOCX and PDF report compiler for engineering teams. This tool allows engineers to write reports in Word, use placeholders to insert external PDFs, and compile everything into a professional PDF with a single command.
21
+
22
+ ## Overview
23
+
24
+ The Report Compiler automates the creation of comprehensive PDF reports by:
25
+
26
+ 1. **Finding PDF placeholders** in Word documents using two types of tags:
27
+ - `[[OVERLAY: path/to/file.pdf, page=5]]` for table-based overlays
28
+ - `[[INSERT: path/to/file.pdf]]` for paragraph-based insertions
29
+ 2. **Modifying the Word document** to create markers and page breaks
30
+ 3. **Converting to PDF** using Word automation (win32com)
31
+ 4. **Processing PDF insertions** with overlays and merges using PyMuPDF
32
+
33
+ ## Features
34
+
35
+ - ✅ **Two insertion types** - Table-based overlays and paragraph-based merges
36
+ - ✅ **Relative path support** - PDF paths resolved relative to the input Word document
37
+ - ✅ **Page selection support** - Specify which pages to include from source PDFs using flexible syntax
38
+ - ✅ **Multi-page PDF support** - Automatic cell replication for multi-page table overlays
39
+ - ✅ **Annotation preservation** - PDF annotations automatically baked into content during processing
40
+ - ✅ **Marker removal** - Automatic removal of placement markers from final PDF
41
+ - ✅ **Robust page breaks** - Proper page breaks for paragraph-based insertions
42
+ - ✅ **Error handling** - Comprehensive error reporting and validation
43
+ - ✅ **Debug support** - `--keep-temp` flag to retain temporary files for debugging
44
+ - ✅ **Table-based overlay** - Precise PDF placement using table dimensions and marker positioning
45
+ - ✅ **Cell replication** - Multi-page PDFs create consecutive table cells automatically
46
+ - ✅ **Intelligent positioning** - Uses table properties for automatic overlay rectangle calculation
47
+ - ✅ **Modular architecture** - Clean separation of concerns with focused classes and modules
48
+
49
+ ## Architecture
50
+
51
+ The Report Compiler uses a modular architecture with clear separation of responsibilities:
52
+
53
+ ### Core Modules
54
+
55
+ - **`report_compiler.core`** - Main orchestration and configuration
56
+ - `ReportCompiler` - Main orchestrator class
57
+ - `Config` - Configuration management and constants
58
+
59
+ - **`report_compiler.document`** - Word document processing
60
+ - `PlaceholderParser` - Detects and parses PDF placeholders
61
+ - `DocxProcessor` - Modifies DOCX files (markers, page breaks, cell replication)
62
+ - `WordConverter` - Converts DOCX to PDF using Word automation
63
+
64
+ - **`report_compiler.pdf`** - PDF processing and manipulation
65
+ - `ContentAnalyzer` - Analyzes PDF content and structure
66
+ - `OverlayProcessor` - Handles table-based PDF overlays
67
+ - `MergeProcessor` - Handles paragraph-based PDF merges
68
+ - `MarkerRemover` - Removes placement markers from final PDF
69
+
70
+ - **`report_compiler.utils`** - Utility classes and helpers
71
+ - `FileManager` - Temporary file management and cleanup
72
+ - `Validators` - Input validation and PDF verification
73
+ - `PageSelector` - Page selection parsing and processing
74
+
75
+ ### Usage as a Library
76
+
77
+ ```python
78
+ from report_compiler.core.compiler import ReportCompiler
79
+
80
+ # Basic usage
81
+ compiler = ReportCompiler("input.docx", "output.pdf")
82
+ compiler.compile()
83
+
84
+ # With debug mode
85
+ compiler = ReportCompiler("input.docx", "output.pdf", keep_temp=True)
86
+ compiler.compile()
87
+ ```
88
+
89
+ ## Quick Start
90
+
91
+ ### Installation
92
+
93
+ ```bash
94
+ pip install -r requirements.txt
95
+ ```
96
+
97
+ ### Basic Usage
98
+
99
+ ```bash
100
+ report-compiler compile input_report.docx output_report.pdf
101
+ ```
102
+
103
+ ### Debug Mode (with temp files)
104
+
105
+ ```bash
106
+ report-compiler compile input_report.docx output_report.pdf --keep-temp
107
+ ```
108
+
109
+ ## Placeholder Format
110
+
111
+ The Report Compiler supports two types of PDF insertion placeholders:
112
+
113
+ ### Table-based Overlays (OVERLAY tags)
114
+
115
+ For inserting PDFs as overlays onto existing pages, preserving the main document's content and layout. Place these in **single-cell (1x1) tables**:
116
+
117
+ ```text
118
+ [[OVERLAY: appendices/sketch.pdf]]
119
+ [[OVERLAY: calculations/diagram.pdf, page=2]]
120
+ [[OVERLAY: C:\Shared\drawing.pdf, page=1-3]]
121
+ [[OVERLAY: diagrams/full_page.pdf, crop=false]]
122
+ [[OVERLAY: sketches/detail.pdf, page=2, crop=false]]
123
+ ```
124
+
125
+ **OVERLAY Parameters:**
126
+
127
+ - `page=` - Page selection (same format as INSERT)
128
+ - `crop=` - Content cropping control:
129
+ - `crop=true` (default): Automatically crops to content bounding box, removing excess whitespace
130
+ - `crop=false`: Uses the full page dimensions without cropping
131
+
132
+ ### Paragraph-based Merges (INSERT tags)
133
+
134
+ For inserting entire PDF pages after a marker position. The original paragraph content is preserved, and PDF pages are inserted immediately after it. Place these in **standalone paragraphs**:
135
+
136
+ ```text
137
+ [[INSERT: appendices/structural_analysis.pdf]]
138
+ [[INSERT: calculations/load_analysis.pdf:1-5]]
139
+ [[INSERT: C:\Shared\external_report.pdf]]
140
+ ```
141
+
142
+ ### Page Selection
143
+
144
+ Both OVERLAY and INSERT tags support page selection:
145
+
146
+ **OVERLAY page selection (using `page=` parameter):**
147
+
148
+ ```text
149
+ [[OVERLAY: appendices/report.pdf, page=5]] # Page 5 only
150
+ [[OVERLAY: appendices/report.pdf, page=1-3]] # Pages 1, 2, and 3
151
+ [[OVERLAY: appendices/report.pdf, page=1,3,5]] # Pages 1, 3, and 5
152
+ [[OVERLAY: appendices/report.pdf, page=2-]] # Pages 2 to end
153
+ ```
154
+
155
+ **INSERT page selection (using `:` separator):**
156
+
157
+ ```text
158
+ [[INSERT: appendices/report.pdf:1-3]] # Pages 1, 2, and 3
159
+ [[INSERT: appendices/report.pdf:5]] # Page 5 only
160
+ [[INSERT: appendices/report.pdf:1,3,5]] # Pages 1, 3, and 5
161
+ [[INSERT: appendices/report.pdf:2-]] # Pages 2 to end
162
+ [[INSERT: appendices/report.pdf:1-3,7,9-]] # Mixed: pages 1-3, 7, and 9 to end
163
+ ```
164
+
165
+ **Page Selection Formats:**
166
+
167
+ - `5` - Single page (page 5)
168
+ - `1-3` - Range of pages (pages 1, 2, 3)
169
+ - `2-` - Open-ended range (pages 2 to end of document)
170
+ - `1,3,5` - Specific pages (pages 1, 3, and 5)
171
+ - `1-3,7,9-12` - Combined specifications
172
+
173
+ **Note:** Page numbers are 1-indexed (first page = 1). Invalid page numbers are automatically filtered out.
174
+
175
+ **Multi-page PDFs**: Automatically handled via cell replication (table-based overlays) or sequential page insertion (paragraph-based merges)
176
+
177
+ **Note**: Relative paths are resolved relative to the Word document's location.
178
+
179
+ ## How It Works
180
+
181
+ ### 1. Placeholder Detection
182
+
183
+ - **Table scanning** - Identifies `[[OVERLAY: ...]]` tags in single-cell tables
184
+ - **Paragraph scanning** - Identifies `[[INSERT: ...]]` tags in standalone paragraphs
185
+ - **Path resolution** - Resolves relative paths relative to Word document location
186
+ - **Page parsing** - Parses page selection syntax (e.g., `:1-3`, `,page=5`)
187
+ - **PDF validation** - Validates that referenced PDF files exist and are readable
188
+ - **Page counting** - Counts effective pages after applying page selection filters
189
+ - **Layout detection** - Identifies single-cell tables vs standalone paragraphs
190
+
191
+ ### 2. Document Modification
192
+
193
+ - **Table placeholders** - Replaces with visible red markers (`%%OVERLAY_START_N%%`)
194
+ - **Cell replication** - Creates additional table cells for multi-page selections
195
+ - **Paragraph placeholders** - Replaces with merge markers and page breaks (`%%MERGE_START_N%%`)
196
+ - **Marker placement** - Places markers first, then page breaks for correct timing
197
+ - **Temporary document** - Saves modified document for PDF conversion
198
+
199
+ ### 3. PDF Conversion
200
+
201
+ - Converts modified Word document to PDF using Word automation
202
+ - Preserves formatting and creates base PDF with markers
203
+
204
+ ### 4. PDF Processing
205
+
206
+ #### Paragraph-based Merges (INSERT)
207
+
208
+ - **Marker location** - Finds merge markers in the base PDF
209
+ - **Marker removal** - Removes markers using redaction (white fill)
210
+ - **Page insertion** - Inserts PDF pages immediately after marker position
211
+ - **Content preservation** - Original document content remains intact
212
+
213
+ #### Table-based Overlays (OVERLAY)
214
+
215
+ - **Page selection** - Processes only the specified pages from source PDFs
216
+ - **Annotation preservation** - Automatically bakes PDF annotations into content using `Document.bake()`
217
+ - **Multi-page support** - Creates additional table cells for multi-page selections
218
+ - **Precise positioning** - Searches for overlay markers in the base PDF
219
+ - **Rectangle calculation** - Uses the marker position as the top-left corner of the overlay area
220
+ - **Marker removal** - Removes markers using redaction (white fill)
221
+ - **Sequential overlay** - Overlays each selected page onto calculated rectangles
222
+ - **Final assembly** - Saves completed PDF with all appendices integrated
223
+
224
+ ## Table-Based Overlay System
225
+
226
+ The Report Compiler uses a precise approach for PDF overlay placement with full support for multi-page PDFs and annotation preservation:
227
+
228
+ ### Single-Page PDF Overlay
229
+
230
+ 1. **Table Detection** - Identifies single-cell tables containing `[[OVERLAY: path.pdf]]` placeholders
231
+ 2. **Page Selection** - Parses page specifications like `,page=1-3` or `,page=5` if provided
232
+ 3. **Dimension Extraction** - Extracts exact table dimensions from Word document metadata
233
+ 4. **Marker Placement** - Places a red marker at the top-left of the table cell
234
+ 5. **Rectangle Calculation** - Uses marker position + table dimensions = overlay area
235
+ 6. **Annotation Preservation** - Bakes PDF annotations into content before overlay
236
+ 7. **Precise Overlay** - Places selected PDF pages exactly within the calculated rectangle
237
+
238
+ ### Multi-Page PDF Overlay
239
+
240
+ For multi-page PDFs or page selections, the system automatically replicates table cells:
241
+
242
+ 1. **Page Detection** - Identifies PDFs with multiple pages or page selections
243
+ 2. **Cell Replication** - Adds consecutive table rows for each selected page
244
+ 3. **Marker Generation** - Creates unique markers for each cell (`%%OVERLAY_START_00_PAGE_02%%`)
245
+ 4. **Sequential Overlay** - Overlays selected pages into consecutive table cells
246
+ 5. **Unified Layout** - All selected PDF pages appear together in the same table area
247
+
248
+ ### Page Selection Examples
249
+
250
+ ```text
251
+ [[OVERLAY: report.pdf, page=1-3]] → 3 table cells with pages 1, 2, 3
252
+ [[OVERLAY: report.pdf, page=2,5,7]] → 3 table cells with pages 2, 5, 7
253
+ [[OVERLAY: report.pdf, page=3-]] → Multiple cells with pages 3 to end
254
+ ```
255
+
256
+ ### Example Output
257
+
258
+ ```text
259
+ Single Table → Page Selection:
260
+ ┌─────────────────┐
261
+ │ PDF Page 2 │ ← Only page 2 (from [[OVERLAY: doc.pdf, page=2]])
262
+ └─────────────────┘
263
+
264
+ Single Table → Multi-Page Selection:
265
+ ┌─────────────────┐
266
+ │ PDF Page 1 │ ← From [[OVERLAY: doc.pdf, page=1,3,5]]
267
+ ├─────────────────┤
268
+ │ PDF Page 3 │ ← Replicated cell
269
+ ├─────────────────┤
270
+ │ PDF Page 5 │ ← Replicated cell
271
+ └─────────────────┘
272
+ ```
273
+
274
+ ### Example Debug Output
275
+
276
+ ```text
277
+ 📋 Table found: 7.50 x 4.00 inches
278
+ 📍 Marker at: (0.50, 1.59) inches
279
+ 📐 Overlay: (0.50, 1.59) to (8.00, 5.59) inches
280
+ 🔥 Baking annotations: 12 found
281
+ ✅ PDF positioned perfectly
282
+ ```
283
+
284
+ ### Key Benefits
285
+
286
+ - **Simple & Reliable** - Single marker approach with cell replication
287
+ - **Flexible Page Selection** - Extract exactly the pages you need from large PDFs
288
+ - **Multi-page Support** - Automatic handling of PDFs with any number of pages
289
+ - **Annotation Preservation** - PDF annotations automatically preserved during overlay
290
+ - **Accurate** - Uses Word's own measurements
291
+ - **Easy to Debug** - Clear inch measurements and detailed logging with page selection info
292
+ - **Consistent** - Predictable placement and unified layout
293
+
294
+ ## Example Workflow
295
+
296
+ ```text
297
+ Input: bridge_report.docx containing [[INSERT: appendices/analysis.pdf:2-4,7]]
298
+
299
+ Step 1: Find placeholder and validate analysis.pdf (10 pages)
300
+ Parse page spec "2-4,7" → pages 2, 3, 4, 7 (4 pages selected)
301
+
302
+ Step 2: Replace placeholder with marker + replicate table cells for 4 pages
303
+
304
+ Step 3: Convert modified DOCX to PDF (creates base PDF with 4 table cells)
305
+
306
+ Step 4: Bake annotations, find markers, overlay pages 2,3,4,7 sequentially
307
+
308
+ Output: bridge_report.pdf with selected pages integrated in consecutive cells
309
+ ```
310
+
311
+ ## Requirements
312
+
313
+ - **Windows** (for Word automation via win32com)
314
+ - **Microsoft Word** installed and accessible
315
+ - **Python 3.7+**
316
+ - **Dependencies**: `python-docx`, `pywin32`, `PyMuPDF`
317
+
318
+ ## VS Code Debugging
319
+
320
+ The project includes comprehensive VS Code launch configurations:
321
+
322
+ - **Debug Report Compiler - Example File** - Basic debugging with example file
323
+ - **Debug Report Compiler - Example File (Keep Temp)** - Debug with temp files retained
324
+ - **Debug Report Compiler - Custom Input** - Interactive file input debugging
325
+ - **Debug Report Compiler - Step Into All Code** - Detailed debugging with all code
326
+ - **Debug Report Compiler - Error Testing** - Test error handling scenarios
327
+
328
+ ## License
329
+
330
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,313 @@
1
+ # Report Compiler
2
+
3
+ A Python-based automated DOCX and PDF report compiler for engineering teams. This tool allows engineers to write reports in Word, use placeholders to insert external PDFs, and compile everything into a professional PDF with a single command.
4
+
5
+ ## Overview
6
+
7
+ The Report Compiler automates the creation of comprehensive PDF reports by:
8
+
9
+ 1. **Finding PDF placeholders** in Word documents using two types of tags:
10
+ - `[[OVERLAY: path/to/file.pdf, page=5]]` for table-based overlays
11
+ - `[[INSERT: path/to/file.pdf]]` for paragraph-based insertions
12
+ 2. **Modifying the Word document** to create markers and page breaks
13
+ 3. **Converting to PDF** using Word automation (win32com)
14
+ 4. **Processing PDF insertions** with overlays and merges using PyMuPDF
15
+
16
+ ## Features
17
+
18
+ - ✅ **Two insertion types** - Table-based overlays and paragraph-based merges
19
+ - ✅ **Relative path support** - PDF paths resolved relative to the input Word document
20
+ - ✅ **Page selection support** - Specify which pages to include from source PDFs using flexible syntax
21
+ - ✅ **Multi-page PDF support** - Automatic cell replication for multi-page table overlays
22
+ - ✅ **Annotation preservation** - PDF annotations automatically baked into content during processing
23
+ - ✅ **Marker removal** - Automatic removal of placement markers from final PDF
24
+ - ✅ **Robust page breaks** - Proper page breaks for paragraph-based insertions
25
+ - ✅ **Error handling** - Comprehensive error reporting and validation
26
+ - ✅ **Debug support** - `--keep-temp` flag to retain temporary files for debugging
27
+ - ✅ **Table-based overlay** - Precise PDF placement using table dimensions and marker positioning
28
+ - ✅ **Cell replication** - Multi-page PDFs create consecutive table cells automatically
29
+ - ✅ **Intelligent positioning** - Uses table properties for automatic overlay rectangle calculation
30
+ - ✅ **Modular architecture** - Clean separation of concerns with focused classes and modules
31
+
32
+ ## Architecture
33
+
34
+ The Report Compiler uses a modular architecture with clear separation of responsibilities:
35
+
36
+ ### Core Modules
37
+
38
+ - **`report_compiler.core`** - Main orchestration and configuration
39
+ - `ReportCompiler` - Main orchestrator class
40
+ - `Config` - Configuration management and constants
41
+
42
+ - **`report_compiler.document`** - Word document processing
43
+ - `PlaceholderParser` - Detects and parses PDF placeholders
44
+ - `DocxProcessor` - Modifies DOCX files (markers, page breaks, cell replication)
45
+ - `WordConverter` - Converts DOCX to PDF using Word automation
46
+
47
+ - **`report_compiler.pdf`** - PDF processing and manipulation
48
+ - `ContentAnalyzer` - Analyzes PDF content and structure
49
+ - `OverlayProcessor` - Handles table-based PDF overlays
50
+ - `MergeProcessor` - Handles paragraph-based PDF merges
51
+ - `MarkerRemover` - Removes placement markers from final PDF
52
+
53
+ - **`report_compiler.utils`** - Utility classes and helpers
54
+ - `FileManager` - Temporary file management and cleanup
55
+ - `Validators` - Input validation and PDF verification
56
+ - `PageSelector` - Page selection parsing and processing
57
+
58
+ ### Usage as a Library
59
+
60
+ ```python
61
+ from report_compiler.core.compiler import ReportCompiler
62
+
63
+ # Basic usage
64
+ compiler = ReportCompiler("input.docx", "output.pdf")
65
+ compiler.compile()
66
+
67
+ # With debug mode
68
+ compiler = ReportCompiler("input.docx", "output.pdf", keep_temp=True)
69
+ compiler.compile()
70
+ ```
71
+
72
+ ## Quick Start
73
+
74
+ ### Installation
75
+
76
+ ```bash
77
+ pip install -r requirements.txt
78
+ ```
79
+
80
+ ### Basic Usage
81
+
82
+ ```bash
83
+ report-compiler compile input_report.docx output_report.pdf
84
+ ```
85
+
86
+ ### Debug Mode (with temp files)
87
+
88
+ ```bash
89
+ report-compiler compile input_report.docx output_report.pdf --keep-temp
90
+ ```
91
+
92
+ ## Placeholder Format
93
+
94
+ The Report Compiler supports two types of PDF insertion placeholders:
95
+
96
+ ### Table-based Overlays (OVERLAY tags)
97
+
98
+ For inserting PDFs as overlays onto existing pages, preserving the main document's content and layout. Place these in **single-cell (1x1) tables**:
99
+
100
+ ```text
101
+ [[OVERLAY: appendices/sketch.pdf]]
102
+ [[OVERLAY: calculations/diagram.pdf, page=2]]
103
+ [[OVERLAY: C:\Shared\drawing.pdf, page=1-3]]
104
+ [[OVERLAY: diagrams/full_page.pdf, crop=false]]
105
+ [[OVERLAY: sketches/detail.pdf, page=2, crop=false]]
106
+ ```
107
+
108
+ **OVERLAY Parameters:**
109
+
110
+ - `page=` - Page selection (same format as INSERT)
111
+ - `crop=` - Content cropping control:
112
+ - `crop=true` (default): Automatically crops to content bounding box, removing excess whitespace
113
+ - `crop=false`: Uses the full page dimensions without cropping
114
+
115
+ ### Paragraph-based Merges (INSERT tags)
116
+
117
+ For inserting entire PDF pages after a marker position. The original paragraph content is preserved, and PDF pages are inserted immediately after it. Place these in **standalone paragraphs**:
118
+
119
+ ```text
120
+ [[INSERT: appendices/structural_analysis.pdf]]
121
+ [[INSERT: calculations/load_analysis.pdf:1-5]]
122
+ [[INSERT: C:\Shared\external_report.pdf]]
123
+ ```
124
+
125
+ ### Page Selection
126
+
127
+ Both OVERLAY and INSERT tags support page selection:
128
+
129
+ **OVERLAY page selection (using `page=` parameter):**
130
+
131
+ ```text
132
+ [[OVERLAY: appendices/report.pdf, page=5]] # Page 5 only
133
+ [[OVERLAY: appendices/report.pdf, page=1-3]] # Pages 1, 2, and 3
134
+ [[OVERLAY: appendices/report.pdf, page=1,3,5]] # Pages 1, 3, and 5
135
+ [[OVERLAY: appendices/report.pdf, page=2-]] # Pages 2 to end
136
+ ```
137
+
138
+ **INSERT page selection (using `:` separator):**
139
+
140
+ ```text
141
+ [[INSERT: appendices/report.pdf:1-3]] # Pages 1, 2, and 3
142
+ [[INSERT: appendices/report.pdf:5]] # Page 5 only
143
+ [[INSERT: appendices/report.pdf:1,3,5]] # Pages 1, 3, and 5
144
+ [[INSERT: appendices/report.pdf:2-]] # Pages 2 to end
145
+ [[INSERT: appendices/report.pdf:1-3,7,9-]] # Mixed: pages 1-3, 7, and 9 to end
146
+ ```
147
+
148
+ **Page Selection Formats:**
149
+
150
+ - `5` - Single page (page 5)
151
+ - `1-3` - Range of pages (pages 1, 2, 3)
152
+ - `2-` - Open-ended range (pages 2 to end of document)
153
+ - `1,3,5` - Specific pages (pages 1, 3, and 5)
154
+ - `1-3,7,9-12` - Combined specifications
155
+
156
+ **Note:** Page numbers are 1-indexed (first page = 1). Invalid page numbers are automatically filtered out.
157
+
158
+ **Multi-page PDFs**: Automatically handled via cell replication (table-based overlays) or sequential page insertion (paragraph-based merges)
159
+
160
+ **Note**: Relative paths are resolved relative to the Word document's location.
161
+
162
+ ## How It Works
163
+
164
+ ### 1. Placeholder Detection
165
+
166
+ - **Table scanning** - Identifies `[[OVERLAY: ...]]` tags in single-cell tables
167
+ - **Paragraph scanning** - Identifies `[[INSERT: ...]]` tags in standalone paragraphs
168
+ - **Path resolution** - Resolves relative paths relative to Word document location
169
+ - **Page parsing** - Parses page selection syntax (e.g., `:1-3`, `,page=5`)
170
+ - **PDF validation** - Validates that referenced PDF files exist and are readable
171
+ - **Page counting** - Counts effective pages after applying page selection filters
172
+ - **Layout detection** - Identifies single-cell tables vs standalone paragraphs
173
+
174
+ ### 2. Document Modification
175
+
176
+ - **Table placeholders** - Replaces with visible red markers (`%%OVERLAY_START_N%%`)
177
+ - **Cell replication** - Creates additional table cells for multi-page selections
178
+ - **Paragraph placeholders** - Replaces with merge markers and page breaks (`%%MERGE_START_N%%`)
179
+ - **Marker placement** - Places markers first, then page breaks for correct timing
180
+ - **Temporary document** - Saves modified document for PDF conversion
181
+
182
+ ### 3. PDF Conversion
183
+
184
+ - Converts modified Word document to PDF using Word automation
185
+ - Preserves formatting and creates base PDF with markers
186
+
187
+ ### 4. PDF Processing
188
+
189
+ #### Paragraph-based Merges (INSERT)
190
+
191
+ - **Marker location** - Finds merge markers in the base PDF
192
+ - **Marker removal** - Removes markers using redaction (white fill)
193
+ - **Page insertion** - Inserts PDF pages immediately after marker position
194
+ - **Content preservation** - Original document content remains intact
195
+
196
+ #### Table-based Overlays (OVERLAY)
197
+
198
+ - **Page selection** - Processes only the specified pages from source PDFs
199
+ - **Annotation preservation** - Automatically bakes PDF annotations into content using `Document.bake()`
200
+ - **Multi-page support** - Creates additional table cells for multi-page selections
201
+ - **Precise positioning** - Searches for overlay markers in the base PDF
202
+ - **Rectangle calculation** - Uses the marker position as the top-left corner of the overlay area
203
+ - **Marker removal** - Removes markers using redaction (white fill)
204
+ - **Sequential overlay** - Overlays each selected page onto calculated rectangles
205
+ - **Final assembly** - Saves completed PDF with all appendices integrated
206
+
207
+ ## Table-Based Overlay System
208
+
209
+ The Report Compiler uses a precise approach for PDF overlay placement with full support for multi-page PDFs and annotation preservation:
210
+
211
+ ### Single-Page PDF Overlay
212
+
213
+ 1. **Table Detection** - Identifies single-cell tables containing `[[OVERLAY: path.pdf]]` placeholders
214
+ 2. **Page Selection** - Parses page specifications like `,page=1-3` or `,page=5` if provided
215
+ 3. **Dimension Extraction** - Extracts exact table dimensions from Word document metadata
216
+ 4. **Marker Placement** - Places a red marker at the top-left of the table cell
217
+ 5. **Rectangle Calculation** - Uses marker position + table dimensions = overlay area
218
+ 6. **Annotation Preservation** - Bakes PDF annotations into content before overlay
219
+ 7. **Precise Overlay** - Places selected PDF pages exactly within the calculated rectangle
220
+
221
+ ### Multi-Page PDF Overlay
222
+
223
+ For multi-page PDFs or page selections, the system automatically replicates table cells:
224
+
225
+ 1. **Page Detection** - Identifies PDFs with multiple pages or page selections
226
+ 2. **Cell Replication** - Adds consecutive table rows for each selected page
227
+ 3. **Marker Generation** - Creates unique markers for each cell (`%%OVERLAY_START_00_PAGE_02%%`)
228
+ 4. **Sequential Overlay** - Overlays selected pages into consecutive table cells
229
+ 5. **Unified Layout** - All selected PDF pages appear together in the same table area
230
+
231
+ ### Page Selection Examples
232
+
233
+ ```text
234
+ [[OVERLAY: report.pdf, page=1-3]] → 3 table cells with pages 1, 2, 3
235
+ [[OVERLAY: report.pdf, page=2,5,7]] → 3 table cells with pages 2, 5, 7
236
+ [[OVERLAY: report.pdf, page=3-]] → Multiple cells with pages 3 to end
237
+ ```
238
+
239
+ ### Example Output
240
+
241
+ ```text
242
+ Single Table → Page Selection:
243
+ ┌─────────────────┐
244
+ │ PDF Page 2 │ ← Only page 2 (from [[OVERLAY: doc.pdf, page=2]])
245
+ └─────────────────┘
246
+
247
+ Single Table → Multi-Page Selection:
248
+ ┌─────────────────┐
249
+ │ PDF Page 1 │ ← From [[OVERLAY: doc.pdf, page=1,3,5]]
250
+ ├─────────────────┤
251
+ │ PDF Page 3 │ ← Replicated cell
252
+ ├─────────────────┤
253
+ │ PDF Page 5 │ ← Replicated cell
254
+ └─────────────────┘
255
+ ```
256
+
257
+ ### Example Debug Output
258
+
259
+ ```text
260
+ 📋 Table found: 7.50 x 4.00 inches
261
+ 📍 Marker at: (0.50, 1.59) inches
262
+ 📐 Overlay: (0.50, 1.59) to (8.00, 5.59) inches
263
+ 🔥 Baking annotations: 12 found
264
+ ✅ PDF positioned perfectly
265
+ ```
266
+
267
+ ### Key Benefits
268
+
269
+ - **Simple & Reliable** - Single marker approach with cell replication
270
+ - **Flexible Page Selection** - Extract exactly the pages you need from large PDFs
271
+ - **Multi-page Support** - Automatic handling of PDFs with any number of pages
272
+ - **Annotation Preservation** - PDF annotations automatically preserved during overlay
273
+ - **Accurate** - Uses Word's own measurements
274
+ - **Easy to Debug** - Clear inch measurements and detailed logging with page selection info
275
+ - **Consistent** - Predictable placement and unified layout
276
+
277
+ ## Example Workflow
278
+
279
+ ```text
280
+ Input: bridge_report.docx containing [[INSERT: appendices/analysis.pdf:2-4,7]]
281
+
282
+ Step 1: Find placeholder and validate analysis.pdf (10 pages)
283
+ Parse page spec "2-4,7" → pages 2, 3, 4, 7 (4 pages selected)
284
+
285
+ Step 2: Replace placeholder with marker + replicate table cells for 4 pages
286
+
287
+ Step 3: Convert modified DOCX to PDF (creates base PDF with 4 table cells)
288
+
289
+ Step 4: Bake annotations, find markers, overlay pages 2,3,4,7 sequentially
290
+
291
+ Output: bridge_report.pdf with selected pages integrated in consecutive cells
292
+ ```
293
+
294
+ ## Requirements
295
+
296
+ - **Windows** (for Word automation via win32com)
297
+ - **Microsoft Word** installed and accessible
298
+ - **Python 3.7+**
299
+ - **Dependencies**: `python-docx`, `pywin32`, `PyMuPDF`
300
+
301
+ ## VS Code Debugging
302
+
303
+ The project includes comprehensive VS Code launch configurations:
304
+
305
+ - **Debug Report Compiler - Example File** - Basic debugging with example file
306
+ - **Debug Report Compiler - Example File (Keep Temp)** - Debug with temp files retained
307
+ - **Debug Report Compiler - Custom Input** - Interactive file input debugging
308
+ - **Debug Report Compiler - Step Into All Code** - Detailed debugging with all code
309
+ - **Debug Report Compiler - Error Testing** - Test error handling scenarios
310
+
311
+ ## License
312
+
313
+ This project is licensed under the MIT License - see the LICENSE file for details.
@@ -0,0 +1,29 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "report_compiler"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="YOUR NAME", email="your@email.com" },
10
+ ]
11
+ description = "A tool for compiling reports from various sources."
12
+ readme = "README.md"
13
+ requires-python = ">=3.7"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ dependencies = [
20
+ "comtypes>=1.2.1",
21
+ "Pillow>=10.2.0",
22
+ "python-docx>=1.1.0",
23
+ "PyMuPDF>=1.26.3",
24
+ "typer>=0.9.0",
25
+ "pywin32; sys_platform == 'win32'"
26
+ ]
27
+
28
+ [project.scripts]
29
+ report-compiler = "report_compiler.cli:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ """
2
+ Report Compiler - A Python-based DOCX+PDF report compiler for engineering teams.
3
+
4
+ This package provides functionality to compile Word documents with embedded PDF placeholders
5
+ into professional PDF reports with precise overlay positioning and merged appendices.
6
+ """
7
+
8
+ __version__ = "2.0.0"
9
+ __author__ = "Report Compiler Team"
10
+
11
+ # from .core.compiler import ReportCompiler # Temporarily commented
12
+ from .core.config import Config
13
+
14
+ __all__ = ['Config'] # 'ReportCompiler'