report-compiler 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- report_compiler/__init__.py +14 -0
- report_compiler/cli.py +327 -0
- report_compiler/core/__init__.py +1 -0
- report_compiler/core/compiler.py +414 -0
- report_compiler/core/config.py +74 -0
- report_compiler/document/__init__.py +1 -0
- report_compiler/document/docx_processor.py +224 -0
- report_compiler/document/libreoffice_converter.py +44 -0
- report_compiler/document/placeholder_parser.py +202 -0
- report_compiler/document/word_converter.py +140 -0
- report_compiler/pdf/__init__.py +1 -0
- report_compiler/pdf/content_analyzer.py +239 -0
- report_compiler/pdf/marker_remover.py +147 -0
- report_compiler/pdf/merge_processor.py +247 -0
- report_compiler/pdf/overlay_processor.py +168 -0
- report_compiler/utils/__init__.py +1 -0
- report_compiler/utils/conversions.py +12 -0
- report_compiler/utils/file_manager.py +208 -0
- report_compiler/utils/logging_config.py +181 -0
- report_compiler/utils/page_selector.py +182 -0
- report_compiler/utils/pdf_to_svg.py +116 -0
- report_compiler/utils/validators.py +287 -0
- report_compiler-0.1.0.dist-info/METADATA +330 -0
- report_compiler-0.1.0.dist-info/RECORD +27 -0
- report_compiler-0.1.0.dist-info/WHEEL +5 -0
- report_compiler-0.1.0.dist-info/entry_points.txt +2 -0
- report_compiler-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Report Compiler - A Python-based DOCX+PDF report compiler for engineering teams.
|
|
3
|
+
|
|
4
|
+
This package provides functionality to compile Word documents with embedded PDF placeholders
|
|
5
|
+
into professional PDF reports with precise overlay positioning and merged appendices.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "2.0.0"
|
|
9
|
+
__author__ = "Report Compiler Team"
|
|
10
|
+
|
|
11
|
+
# from .core.compiler import ReportCompiler # Temporarily commented
|
|
12
|
+
from .core.config import Config
|
|
13
|
+
|
|
14
|
+
__all__ = ['Config'] # 'ReportCompiler'
|
report_compiler/cli.py
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Report Compiler - CLI logic.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
from report_compiler.core.compiler import ReportCompiler
|
|
12
|
+
from report_compiler.core.config import Config
|
|
13
|
+
from report_compiler.utils.logging_config import setup_logging, get_logger
|
|
14
|
+
from report_compiler.utils.pdf_to_svg import PdfToSvgConverter
|
|
15
|
+
|
|
16
|
+
app = typer.Typer(
|
|
17
|
+
help="""
|
|
18
|
+
Report Compiler v2.0 - Compile DOCX documents with embedded PDF placeholders
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
report-compiler report.docx final_report.pdf
|
|
22
|
+
report-compiler report.docx output.pdf --keep-temp
|
|
23
|
+
report-compiler svg-import input.pdf output.svg --page 3
|
|
24
|
+
|
|
25
|
+
Placeholder Types:
|
|
26
|
+
[[OVERLAY: path/file.pdf]] - Table-based overlay (precise positioning)
|
|
27
|
+
[[OVERLAY: path/file.pdf, crop=false]] - Overlay without content cropping
|
|
28
|
+
[[INSERT: path/file.pdf]] - Paragraph-based merge (full document)
|
|
29
|
+
[[INSERT: path/file.pdf:1-3,7]] - Insert specific pages only
|
|
30
|
+
[[INSERT: path/file.docx]] - Recursively compile and insert a DOCX file
|
|
31
|
+
|
|
32
|
+
Features:
|
|
33
|
+
• Recursive compilation of DOCX files
|
|
34
|
+
• Content-aware cropping with border preservation
|
|
35
|
+
• Multi-page overlay support with automatic table replication
|
|
36
|
+
• High-quality PDF to SVG conversion for single or multiple pages
|
|
37
|
+
• Comprehensive validation and error reporting
|
|
38
|
+
"""
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def version_callback(value: bool):
|
|
42
|
+
if value:
|
|
43
|
+
typer.echo(f"Report Compiler v{getattr(Config, '__version__', 'Unknown')}")
|
|
44
|
+
raise typer.Exit()
|
|
45
|
+
|
|
46
|
+
@app.command("compile")
|
|
47
|
+
def compile_docx(
|
|
48
|
+
input_file: str = typer.Argument(..., help="Input DOCX file path"),
|
|
49
|
+
output_file: str = typer.Argument(..., help="Output PDF file path"),
|
|
50
|
+
keep_temp: bool = typer.Option(False, help="Keep temporary files for debugging"),
|
|
51
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", "--debug", help="Enable verbose logging (DEBUG level)"),
|
|
52
|
+
log_file: str = typer.Option(None, help="Log to file in addition to console"),
|
|
53
|
+
version: bool = typer.Option(False, "--version", callback=version_callback, is_eager=True, help="Show version and exit")
|
|
54
|
+
):
|
|
55
|
+
"""Compile DOCX to PDF."""
|
|
56
|
+
setup_logging(log_file=log_file, verbose=verbose)
|
|
57
|
+
logger = get_logger()
|
|
58
|
+
logger.info("=" * 60)
|
|
59
|
+
logger.info("Report Compiler v2.0 - Starting compilation")
|
|
60
|
+
logger.info("=" * 60)
|
|
61
|
+
class Args:
|
|
62
|
+
def __init__(self, input_file, output_file, keep_temp, verbose, log_file):
|
|
63
|
+
self.input_file = input_file
|
|
64
|
+
self.output_file = output_file
|
|
65
|
+
self.keep_temp = keep_temp
|
|
66
|
+
self.verbose = verbose
|
|
67
|
+
self.log_file = log_file
|
|
68
|
+
args = Args(input_file, output_file, keep_temp, verbose, log_file)
|
|
69
|
+
return handle_compilation(args, logger)
|
|
70
|
+
|
|
71
|
+
@app.command("svg-import")
|
|
72
|
+
def svg_import(
|
|
73
|
+
input_file: str = typer.Argument(..., help="Input PDF file path"),
|
|
74
|
+
output_file: str = typer.Argument(..., help="Output SVG file path"),
|
|
75
|
+
page: str = typer.Option("all", help="Page(s) to convert: single number, range (1-3), list (1,3,5), or 'all'"),
|
|
76
|
+
verbose: bool = typer.Option(False, "-v", "--verbose", "--debug", help="Enable verbose logging (DEBUG level)"),
|
|
77
|
+
log_file: str = typer.Option(None, help="Log to file in addition to console"),
|
|
78
|
+
version: bool = typer.Option(False, "--version", callback=version_callback, is_eager=True, help="Show version and exit")
|
|
79
|
+
):
|
|
80
|
+
"""Convert PDF page(s) to SVG format."""
|
|
81
|
+
setup_logging(log_file=log_file, verbose=verbose)
|
|
82
|
+
logger = get_logger()
|
|
83
|
+
logger.info("=" * 60)
|
|
84
|
+
logger.info("Report Compiler v2.0 - Starting PDF to SVG conversion")
|
|
85
|
+
logger.info("=" * 60)
|
|
86
|
+
class Args:
|
|
87
|
+
def __init__(self, input_file, output_file, page, verbose, log_file):
|
|
88
|
+
self.input_file = input_file
|
|
89
|
+
self.output_file = output_file
|
|
90
|
+
self.page = page
|
|
91
|
+
self.verbose = verbose
|
|
92
|
+
self.log_file = log_file
|
|
93
|
+
args = Args(input_file, output_file, page, verbose, log_file)
|
|
94
|
+
return handle_svg_import(args, logger)
|
|
95
|
+
|
|
96
|
+
def main():
|
|
97
|
+
app()
|
|
98
|
+
|
|
99
|
+
def handle_svg_import(args, logger) -> int:
|
|
100
|
+
"""Handle PDF to SVG conversion."""
|
|
101
|
+
logger.info("Mode: PDF to SVG conversion")
|
|
102
|
+
|
|
103
|
+
# Validate input file
|
|
104
|
+
input_path = Path(args.input_file)
|
|
105
|
+
if not input_path.exists():
|
|
106
|
+
logger.error(f"Input file not found: {args.input_file}")
|
|
107
|
+
return 1
|
|
108
|
+
|
|
109
|
+
if not input_path.suffix.lower() == '.pdf':
|
|
110
|
+
logger.error(f"Input file must be a PDF document: {args.input_file}")
|
|
111
|
+
return 1
|
|
112
|
+
|
|
113
|
+
logger.info(f"Input PDF: {input_path.absolute()}")
|
|
114
|
+
|
|
115
|
+
# Validate output file
|
|
116
|
+
output_path = Path(args.output_file)
|
|
117
|
+
if not output_path.suffix.lower() == '.svg':
|
|
118
|
+
logger.error(f"Output file must have .svg extension: {args.output_file}")
|
|
119
|
+
return 1
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
123
|
+
logger.info(f"Output SVG: {output_path.absolute()}")
|
|
124
|
+
logger.debug(f"Output directory created/verified: {output_path.parent}")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.error(f"Cannot create output directory: {e}", exc_info=True)
|
|
127
|
+
return 1
|
|
128
|
+
|
|
129
|
+
# Initialize converter and validate PDF
|
|
130
|
+
converter = PdfToSvgConverter()
|
|
131
|
+
validation_result = converter.validate_pdf(str(input_path.absolute()))
|
|
132
|
+
|
|
133
|
+
if not validation_result['valid']:
|
|
134
|
+
logger.error(f"PDF validation failed: {validation_result['error']}")
|
|
135
|
+
return 1
|
|
136
|
+
|
|
137
|
+
logger.info(f"PDF is valid with {validation_result['page_count']} pages")
|
|
138
|
+
|
|
139
|
+
# Parse page specification
|
|
140
|
+
try:
|
|
141
|
+
pages_to_convert = parse_page_range(args.page, validation_result['page_count'])
|
|
142
|
+
except ValueError as e:
|
|
143
|
+
logger.error(f"Invalid page specification: {e}")
|
|
144
|
+
return 1
|
|
145
|
+
|
|
146
|
+
logger.info(f"Converting {len(pages_to_convert)} page(s): {pages_to_convert}")
|
|
147
|
+
|
|
148
|
+
# Handle multiple pages
|
|
149
|
+
if len(pages_to_convert) == 1:
|
|
150
|
+
# Single page - use the original output path
|
|
151
|
+
page_num = pages_to_convert[0]
|
|
152
|
+
logger.info(f"Converting page {page_num} to SVG...")
|
|
153
|
+
|
|
154
|
+
success = converter.convert_page_to_svg(
|
|
155
|
+
pdf_path=str(input_path.absolute()),
|
|
156
|
+
page_number=page_num,
|
|
157
|
+
output_svg_path=str(output_path.absolute())
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if success:
|
|
161
|
+
logger.info("=" * 60)
|
|
162
|
+
logger.info("🎉 PDF to SVG conversion completed successfully!")
|
|
163
|
+
logger.info(f"📄 Output: {output_path.absolute()}")
|
|
164
|
+
logger.info("=" * 60)
|
|
165
|
+
return 0
|
|
166
|
+
else:
|
|
167
|
+
logger.error("=" * 60)
|
|
168
|
+
logger.error("❌ PDF to SVG conversion failed!")
|
|
169
|
+
logger.error("=" * 60)
|
|
170
|
+
return 1
|
|
171
|
+
else:
|
|
172
|
+
# Multiple pages - create numbered files
|
|
173
|
+
output_dir = output_path.parent
|
|
174
|
+
output_stem = output_path.stem
|
|
175
|
+
|
|
176
|
+
successful_conversions = 0
|
|
177
|
+
|
|
178
|
+
for page_num in pages_to_convert:
|
|
179
|
+
# Create filename like "output_page_1.svg", "output_page_2.svg", etc.
|
|
180
|
+
page_output_path = output_dir / f"{output_stem}_page_{page_num}.svg"
|
|
181
|
+
|
|
182
|
+
logger.info(f"Converting page {page_num} to {page_output_path.name}...")
|
|
183
|
+
|
|
184
|
+
success = converter.convert_page_to_svg(
|
|
185
|
+
pdf_path=str(input_path.absolute()),
|
|
186
|
+
page_number=page_num,
|
|
187
|
+
output_svg_path=str(page_output_path.absolute())
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if success:
|
|
191
|
+
successful_conversions += 1
|
|
192
|
+
else:
|
|
193
|
+
logger.error(f"Failed to convert page {page_num}")
|
|
194
|
+
|
|
195
|
+
if successful_conversions == len(pages_to_convert):
|
|
196
|
+
logger.info("=" * 60)
|
|
197
|
+
logger.info("🎉 All PDF pages converted successfully!")
|
|
198
|
+
logger.info(f"📄 {successful_conversions} SVG files created in: {output_dir.absolute()}")
|
|
199
|
+
logger.info("=" * 60)
|
|
200
|
+
return 0
|
|
201
|
+
elif successful_conversions > 0:
|
|
202
|
+
logger.warning("=" * 60)
|
|
203
|
+
logger.warning(f"⚠️ Partial success: {successful_conversions}/{len(pages_to_convert)} pages converted")
|
|
204
|
+
logger.warning(f"📄 {successful_conversions} SVG files created in: {output_dir.absolute()}")
|
|
205
|
+
logger.warning("=" * 60)
|
|
206
|
+
return 1
|
|
207
|
+
else:
|
|
208
|
+
logger.error("=" * 60)
|
|
209
|
+
logger.error("❌ All PDF to SVG conversions failed!")
|
|
210
|
+
logger.error("=" * 60)
|
|
211
|
+
return 1
|
|
212
|
+
|
|
213
|
+
def handle_compilation(args, logger) -> int:
|
|
214
|
+
"""Handle the traditional DOCX compilation."""
|
|
215
|
+
logger.info("Mode: DOCX compilation")
|
|
216
|
+
|
|
217
|
+
# Validate input file
|
|
218
|
+
input_path = Path(args.input_file)
|
|
219
|
+
if not input_path.exists():
|
|
220
|
+
logger.error(f"Input file not found: {args.input_file}")
|
|
221
|
+
return 1
|
|
222
|
+
|
|
223
|
+
if not input_path.suffix.lower() == '.docx':
|
|
224
|
+
logger.error(f"Input file must be a DOCX document: {args.input_file}")
|
|
225
|
+
return 1
|
|
226
|
+
|
|
227
|
+
logger.info(f"Input DOCX: {input_path.absolute()}")
|
|
228
|
+
|
|
229
|
+
# Validate output directory
|
|
230
|
+
output_path = Path(args.output_file)
|
|
231
|
+
try:
|
|
232
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
233
|
+
logger.info(f"Output PDF: {output_path.absolute()}")
|
|
234
|
+
logger.debug(f"Output directory created/verified: {output_path.parent}")
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error(f"Cannot create output directory: {e}", exc_info=True)
|
|
237
|
+
return 1
|
|
238
|
+
|
|
239
|
+
# Run the report compiler
|
|
240
|
+
compiler = None
|
|
241
|
+
try:
|
|
242
|
+
compiler = ReportCompiler(
|
|
243
|
+
input_path=str(input_path.absolute()),
|
|
244
|
+
output_path=str(output_path.absolute()),
|
|
245
|
+
keep_temp=args.keep_temp
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
success = compiler.run()
|
|
249
|
+
|
|
250
|
+
if success:
|
|
251
|
+
logger.info("=" * 60)
|
|
252
|
+
logger.info("🎉 Report compilation completed successfully!")
|
|
253
|
+
logger.info(f"📄 Output: {output_path.absolute()}")
|
|
254
|
+
logger.info("=" * 60)
|
|
255
|
+
return 0
|
|
256
|
+
else:
|
|
257
|
+
logger.error("=" * 60)
|
|
258
|
+
logger.error("❌ Report compilation failed!")
|
|
259
|
+
logger.error("=" * 60)
|
|
260
|
+
return 1
|
|
261
|
+
|
|
262
|
+
except KeyboardInterrupt:
|
|
263
|
+
logger.warning("\n⚠️ Report compilation interrupted by user.")
|
|
264
|
+
return 1
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.error(f"\n❌ An unexpected error occurred during compilation: {e}", exc_info=True)
|
|
267
|
+
return 1
|
|
268
|
+
finally:
|
|
269
|
+
if compiler and hasattr(compiler, 'word_converter'):
|
|
270
|
+
compiler.word_converter.disconnect()
|
|
271
|
+
|
|
272
|
+
def parse_page_range(page_spec: str, total_pages: int) -> list:
|
|
273
|
+
"""
|
|
274
|
+
Parse page specification into a list of page numbers.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
page_spec: Page specification string (e.g., "1", "1-3", "1,3,5", "all")
|
|
278
|
+
total_pages: Total number of pages in the PDF
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
List of page numbers (1-based indexing)
|
|
282
|
+
|
|
283
|
+
Raises:
|
|
284
|
+
ValueError: If page specification is invalid
|
|
285
|
+
"""
|
|
286
|
+
page_spec = page_spec.strip().lower()
|
|
287
|
+
|
|
288
|
+
if page_spec == "all":
|
|
289
|
+
return list(range(1, total_pages + 1))
|
|
290
|
+
|
|
291
|
+
pages = []
|
|
292
|
+
|
|
293
|
+
# Split by commas to handle lists like "1,3,5"
|
|
294
|
+
for part in page_spec.split(','):
|
|
295
|
+
part = part.strip()
|
|
296
|
+
|
|
297
|
+
if '-' in part:
|
|
298
|
+
# Handle ranges like "1-3"
|
|
299
|
+
try:
|
|
300
|
+
start, end = part.split('-', 1)
|
|
301
|
+
start = int(start.strip())
|
|
302
|
+
end = int(end.strip())
|
|
303
|
+
|
|
304
|
+
if start < 1 or end < 1 or start > total_pages or end > total_pages:
|
|
305
|
+
raise ValueError(f"Page range {start}-{end} is out of bounds (1-{total_pages})")
|
|
306
|
+
if start > end:
|
|
307
|
+
raise ValueError(f"Invalid range {start}-{end}: start page must be <= end page")
|
|
308
|
+
|
|
309
|
+
pages.extend(range(start, end + 1))
|
|
310
|
+
except ValueError as e:
|
|
311
|
+
if "invalid literal" in str(e):
|
|
312
|
+
raise ValueError(f"Invalid page range format: {part}")
|
|
313
|
+
raise
|
|
314
|
+
else:
|
|
315
|
+
# Handle single page numbers
|
|
316
|
+
try:
|
|
317
|
+
page_num = int(part)
|
|
318
|
+
if page_num < 1 or page_num > total_pages:
|
|
319
|
+
raise ValueError(f"Page {page_num} is out of bounds (1-{total_pages})")
|
|
320
|
+
pages.append(page_num)
|
|
321
|
+
except ValueError as e:
|
|
322
|
+
if "invalid literal" in str(e):
|
|
323
|
+
raise ValueError(f"Invalid page number: {part}")
|
|
324
|
+
raise
|
|
325
|
+
|
|
326
|
+
# Remove duplicates and sort
|
|
327
|
+
return sorted(list(set(pages)))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core module for report compilation functionality."""
|