ima-claude 2.9.0 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -15
- package/dist/cli.js +385 -17
- package/package.json +1 -1
- package/platforms/gemini/adapter.ts +443 -0
- package/platforms/gemini/gemini-extension.json +17 -0
- package/platforms/gemini/hooks-translator.py +66 -0
- package/platforms/shared/detector.ts +5 -1
- package/plugins/ima-claude/.claude-plugin/plugin.json +2 -2
- package/plugins/ima-claude/skills/gh-cli/SKILL.md +286 -0
- package/plugins/ima-claude/skills/ima-doc2pdf/SKILL.md +242 -0
- package/plugins/ima-claude/skills/ima-doc2pdf/references/formatting-spec.md +88 -0
- package/plugins/ima-claude/skills/ima-doc2pdf/scripts/docx_utils.py +21 -0
- package/plugins/ima-claude/skills/ima-doc2pdf/scripts/extract_docx.py +384 -0
- package/plugins/ima-claude/skills/ima-doc2pdf/scripts/generate_pdf.py +663 -0
- package/plugins/ima-claude/skills/mcp-gitea/SKILL.md +358 -0
- package/plugins/ima-claude/skills/mcp-github/SKILL.md +200 -0
- package/plugins/ima-claude/skills/mcp-qdrant/SKILL.md +21 -10
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generate a styled IMA branded document PDF directly from a Word doc.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python3 generate_pdf.py <path_to_docx> [--out <output.pdf>]
|
|
6
|
+
|
|
7
|
+
Required: pip install reportlab python-docx
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
import io
|
|
12
|
+
import re
|
|
13
|
+
import base64
|
|
14
|
+
import tempfile
|
|
15
|
+
import urllib.request
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
19
|
+
from extract_docx import extract_document
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from reportlab.lib.pagesizes import LETTER
|
|
23
|
+
from reportlab.lib.units import inch
|
|
24
|
+
from reportlab.lib.colors import HexColor, white, black
|
|
25
|
+
from reportlab.lib.styles import ParagraphStyle
|
|
26
|
+
from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY
|
|
27
|
+
from reportlab.platypus import (
|
|
28
|
+
BaseDocTemplate, PageTemplate, Frame, Paragraph, Spacer,
|
|
29
|
+
ListFlowable, ListItem, HRFlowable, PageBreak, NextPageTemplate,
|
|
30
|
+
KeepTogether, Image
|
|
31
|
+
)
|
|
32
|
+
from reportlab.platypus.flowables import Flowable
|
|
33
|
+
from reportlab.pdfgen import canvas as pdfcanvas
|
|
34
|
+
from reportlab.pdfbase import pdfmetrics
|
|
35
|
+
from reportlab.pdfbase.ttfonts import TTFont
|
|
36
|
+
except ImportError:
|
|
37
|
+
print("ERROR: reportlab not installed. Run:")
|
|
38
|
+
print(" pip install reportlab")
|
|
39
|
+
sys.exit(1)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ── Register Lato fonts ──────────────────────────────────────────────────────
|
|
43
|
+
FONT_DIR = Path(__file__).parent.parent / "fonts"
|
|
44
|
+
LATO_FONTS = ("Lato-Regular.ttf", "Lato-Bold.ttf", "Lato-Italic.ttf", "Lato-BoldItalic.ttf")
|
|
45
|
+
LATO_BASE_URL = "https://github.com/google/fonts/raw/main/ofl/lato/"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def ensure_fonts():
|
|
49
|
+
"""Download Lato fonts from Google Fonts GitHub repo if any are missing."""
|
|
50
|
+
if FONT_DIR.exists() and all((FONT_DIR / f).exists() for f in LATO_FONTS):
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
print("Downloading Lato fonts...")
|
|
54
|
+
FONT_DIR.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
for font_file in LATO_FONTS:
|
|
57
|
+
dest = FONT_DIR / font_file
|
|
58
|
+
if dest.exists():
|
|
59
|
+
continue
|
|
60
|
+
url = LATO_BASE_URL + font_file
|
|
61
|
+
urllib.request.urlretrieve(url, dest)
|
|
62
|
+
print(f" {font_file}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def register_fonts():
|
|
66
|
+
"""Register Lato TTF fonts with reportlab."""
|
|
67
|
+
ensure_fonts()
|
|
68
|
+
fonts = {
|
|
69
|
+
"Lato": "Lato-Regular.ttf",
|
|
70
|
+
"Lato-Bold": "Lato-Bold.ttf",
|
|
71
|
+
"Lato-Italic": "Lato-Italic.ttf",
|
|
72
|
+
"Lato-BoldItalic": "Lato-BoldItalic.ttf",
|
|
73
|
+
}
|
|
74
|
+
for name, filename in fonts.items():
|
|
75
|
+
path = FONT_DIR / filename
|
|
76
|
+
if path.exists():
|
|
77
|
+
pdfmetrics.registerFont(TTFont(name, str(path)))
|
|
78
|
+
else:
|
|
79
|
+
print(f"WARNING: Font not found: {path}")
|
|
80
|
+
|
|
81
|
+
# Register font family so <b> and <i> markup works in Paragraphs
|
|
82
|
+
from reportlab.pdfbase.pdfmetrics import registerFontFamily
|
|
83
|
+
registerFontFamily(
|
|
84
|
+
"Lato",
|
|
85
|
+
normal="Lato",
|
|
86
|
+
bold="Lato-Bold",
|
|
87
|
+
italic="Lato-Italic",
|
|
88
|
+
boldItalic="Lato-BoldItalic",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ── Brand colours (IMA Brand Book v4.0) ──────────────────────────────────────
|
|
93
|
+
NAVY = HexColor("#00066F") # Trustworthy Indigo
|
|
94
|
+
GOLD = HexColor("#FFCC00") # Vital Gold
|
|
95
|
+
BODY_TEXT = HexColor("#000000")
|
|
96
|
+
GREY_LIGHT = HexColor("#CCCCCC")
|
|
97
|
+
GREY_TEXT = HexColor("#666666")
|
|
98
|
+
|
|
99
|
+
PAGE_W, PAGE_H = LETTER # 612 × 792 pt
|
|
100
|
+
MARGIN = 0.5 * inch
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ── Styles (confirmed from Canva) ────────────────────────────────────────────
|
|
104
|
+
def build_styles():
|
|
105
|
+
S = {}
|
|
106
|
+
|
|
107
|
+
def s(name, **kw):
|
|
108
|
+
S[name] = ParagraphStyle(name=name, **kw)
|
|
109
|
+
|
|
110
|
+
# ── Cover ────────────────────────────────────────────────────────────
|
|
111
|
+
s("cover_title_1",
|
|
112
|
+
fontName="Lato-Bold", fontSize=90.5, leading=95,
|
|
113
|
+
textColor=white, alignment=TA_CENTER, spaceAfter=8)
|
|
114
|
+
|
|
115
|
+
s("cover_title_2",
|
|
116
|
+
fontName="Lato-Bold", fontSize=67, leading=72,
|
|
117
|
+
textColor=white, alignment=TA_CENTER, spaceAfter=8)
|
|
118
|
+
|
|
119
|
+
s("cover_subtitle",
|
|
120
|
+
fontName="Lato-Bold", fontSize=24, leading=30,
|
|
121
|
+
textColor=white, alignment=TA_CENTER, spaceAfter=6)
|
|
122
|
+
|
|
123
|
+
s("cover_authors",
|
|
124
|
+
fontName="Lato", fontSize=20, leading=26,
|
|
125
|
+
textColor=white, alignment=TA_CENTER, spaceAfter=4)
|
|
126
|
+
|
|
127
|
+
s("cover_disclaimer",
|
|
128
|
+
fontName="Lato", fontSize=12, leading=16,
|
|
129
|
+
textColor=white, alignment=TA_CENTER, spaceAfter=3)
|
|
130
|
+
|
|
131
|
+
s("cover_date",
|
|
132
|
+
fontName="Lato-Italic", fontSize=12, leading=16,
|
|
133
|
+
textColor=white, alignment=TA_CENTER)
|
|
134
|
+
|
|
135
|
+
# ── Content headings ─────────────────────────────────────────────────
|
|
136
|
+
s("intro_heading",
|
|
137
|
+
fontName="Lato-Bold", fontSize=15, leading=20,
|
|
138
|
+
textColor=NAVY, alignment=TA_CENTER,
|
|
139
|
+
spaceBefore=6, spaceAfter=10)
|
|
140
|
+
|
|
141
|
+
s("section_heading",
|
|
142
|
+
fontName="Lato-Bold", fontSize=15, leading=20,
|
|
143
|
+
textColor=NAVY, spaceBefore=16, spaceAfter=5)
|
|
144
|
+
|
|
145
|
+
s("sub_heading",
|
|
146
|
+
fontName="Lato-Bold", fontSize=13, leading=17,
|
|
147
|
+
textColor=NAVY, spaceBefore=10, spaceAfter=3)
|
|
148
|
+
|
|
149
|
+
# ── Body text ────────────────────────────────────────────────────────
|
|
150
|
+
s("body",
|
|
151
|
+
fontName="Lato", fontSize=12, leading=14.5,
|
|
152
|
+
textColor=BODY_TEXT, spaceBefore=0, spaceAfter=6,
|
|
153
|
+
alignment=TA_JUSTIFY)
|
|
154
|
+
|
|
155
|
+
s("bullet",
|
|
156
|
+
fontName="Lato", fontSize=12, leading=14.5,
|
|
157
|
+
textColor=BODY_TEXT, spaceBefore=1, spaceAfter=1)
|
|
158
|
+
|
|
159
|
+
# ── Warning box — navy bg, white + Vital Gold text ───────────────────
|
|
160
|
+
s("warning",
|
|
161
|
+
fontName="Lato-Bold", fontSize=12, leading=16,
|
|
162
|
+
textColor=white, alignment=TA_CENTER,
|
|
163
|
+
backColor=NAVY, borderColor=NAVY,
|
|
164
|
+
borderWidth=0, borderPad=10,
|
|
165
|
+
spaceBefore=10, spaceAfter=10)
|
|
166
|
+
|
|
167
|
+
# ── Q&A ──────────────────────────────────────────────────────────────
|
|
168
|
+
s("qa_question",
|
|
169
|
+
fontName="Lato-Bold", fontSize=12, leading=16,
|
|
170
|
+
textColor=NAVY, spaceBefore=10, spaceAfter=2)
|
|
171
|
+
|
|
172
|
+
s("qa_answer",
|
|
173
|
+
fontName="Lato", fontSize=12, leading=14.5,
|
|
174
|
+
textColor=BODY_TEXT, spaceBefore=2, spaceAfter=4,
|
|
175
|
+
alignment=TA_JUSTIFY)
|
|
176
|
+
|
|
177
|
+
# ── References ───────────────────────────────────────────────────────
|
|
178
|
+
s("ref_heading",
|
|
179
|
+
fontName="Lato-Bold", fontSize=13, leading=17,
|
|
180
|
+
textColor=NAVY, spaceBefore=10, spaceAfter=8)
|
|
181
|
+
|
|
182
|
+
s("reference",
|
|
183
|
+
fontName="Lato", fontSize=8, leading=11,
|
|
184
|
+
textColor=HexColor("#333333"), spaceBefore=1, spaceAfter=1,
|
|
185
|
+
leftIndent=14, firstLineIndent=-14)
|
|
186
|
+
|
|
187
|
+
# ── Captions ─────────────────────────────────────────────────────────
|
|
188
|
+
s("caption",
|
|
189
|
+
fontName="Lato", fontSize=12, leading=14.5,
|
|
190
|
+
textColor=BODY_TEXT, spaceBefore=4, spaceAfter=4)
|
|
191
|
+
|
|
192
|
+
s("disclaimer",
|
|
193
|
+
fontName="Lato", fontSize=12, leading=16,
|
|
194
|
+
textColor=white, alignment=TA_CENTER,
|
|
195
|
+
spaceBefore=4, spaceAfter=4)
|
|
196
|
+
|
|
197
|
+
s("content_disclaimer",
|
|
198
|
+
fontName="Lato", fontSize=12, leading=16,
|
|
199
|
+
textColor=BODY_TEXT, alignment=TA_JUSTIFY,
|
|
200
|
+
spaceBefore=4, spaceAfter=6)
|
|
201
|
+
|
|
202
|
+
# ── Footer ───────────────────────────────────────────────────────────
|
|
203
|
+
s("footer",
|
|
204
|
+
fontName="Lato", fontSize=10, leading=12,
|
|
205
|
+
textColor=GREY_TEXT, alignment=TA_CENTER)
|
|
206
|
+
|
|
207
|
+
return S
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# ── Page canvas callbacks ─────────────────────────────────────────────────────
|
|
211
|
+
def make_cover_canvas(footer_h):
|
|
212
|
+
def on_cover(canv, doc):
|
|
213
|
+
canv.saveState()
|
|
214
|
+
# Full navy background
|
|
215
|
+
canv.setFillColor(NAVY)
|
|
216
|
+
canv.rect(0, 0, PAGE_W, PAGE_H, fill=1, stroke=0)
|
|
217
|
+
canv.restoreState()
|
|
218
|
+
return on_cover
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def make_content_canvas(title_short, date_str, footer_h):
|
|
222
|
+
if date_str:
|
|
223
|
+
clean_date = date_str.replace("Updated ", "").strip()
|
|
224
|
+
footer_text = f"{title_short} ({clean_date})"
|
|
225
|
+
else:
|
|
226
|
+
footer_text = title_short
|
|
227
|
+
|
|
228
|
+
def on_content(canv, doc):
|
|
229
|
+
canv.saveState()
|
|
230
|
+
# Bottom footer rule
|
|
231
|
+
canv.setStrokeColor(GREY_LIGHT)
|
|
232
|
+
canv.setLineWidth(0.5)
|
|
233
|
+
canv.line(MARGIN, footer_h + 8, PAGE_W - MARGIN, footer_h + 8)
|
|
234
|
+
# Single centered footer line — Lato 10pt
|
|
235
|
+
canv.setFont("Lato", 10)
|
|
236
|
+
canv.setFillColor(GREY_TEXT)
|
|
237
|
+
canv.drawCentredString(PAGE_W / 2, footer_h - 2, footer_text)
|
|
238
|
+
canv.restoreState()
|
|
239
|
+
return on_content
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# ── Markup helpers ────────────────────────────────────────────────────────────
|
|
243
|
+
def safe(text):
|
|
244
|
+
return (text.replace("&", "&")
|
|
245
|
+
.replace("<", "<")
|
|
246
|
+
.replace(">", ">"))
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def runs_to_markup(runs):
|
|
250
|
+
if not runs:
|
|
251
|
+
return ""
|
|
252
|
+
out = []
|
|
253
|
+
for r in runs:
|
|
254
|
+
t = safe(r.get("text", ""))
|
|
255
|
+
if r.get("bold") and r.get("italic"):
|
|
256
|
+
out.append(f"<b><i>{t}</i></b>")
|
|
257
|
+
elif r.get("bold"):
|
|
258
|
+
out.append(f"<b>{t}</b>")
|
|
259
|
+
elif r.get("italic"):
|
|
260
|
+
out.append(f"<i>{t}</i>")
|
|
261
|
+
else:
|
|
262
|
+
out.append(t)
|
|
263
|
+
return "".join(out)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def para_markup(entry):
|
|
267
|
+
markup = runs_to_markup(entry.get("runs", []))
|
|
268
|
+
return markup if markup else safe(entry["text"])
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def warning_markup(text):
|
|
272
|
+
"""Format warning text with Vital Gold for emphasis portions."""
|
|
273
|
+
# The warning box has white text with gold emphasis
|
|
274
|
+
# For now, render all as white; gold portions need manual Word markup
|
|
275
|
+
return safe(text)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# ── Extract cover metadata ────────────────────────────────────────────────────
|
|
279
|
+
def extract_cover_meta(sections):
|
|
280
|
+
"""
|
|
281
|
+
Pull title, authors, disclaimer, date from the document preamble.
|
|
282
|
+
Only the FIRST heading is the guide title — stop taking headings after that
|
|
283
|
+
so section headings like 'Introduction' don't bleed onto the cover.
|
|
284
|
+
"""
|
|
285
|
+
title = disclaimer = date_str = ""
|
|
286
|
+
author_list = []
|
|
287
|
+
got_title = False
|
|
288
|
+
|
|
289
|
+
for entry in sections[:30]:
|
|
290
|
+
t = entry["type"]
|
|
291
|
+
text = entry["text"].strip()
|
|
292
|
+
|
|
293
|
+
if t in ("h1", "heading_bold"):
|
|
294
|
+
if not got_title:
|
|
295
|
+
title = text
|
|
296
|
+
got_title = True
|
|
297
|
+
else:
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
elif t == "author":
|
|
301
|
+
author_list.append(text)
|
|
302
|
+
|
|
303
|
+
elif t == "disclaimer":
|
|
304
|
+
disclaimer = text
|
|
305
|
+
|
|
306
|
+
elif t == "date":
|
|
307
|
+
date_str = text
|
|
308
|
+
|
|
309
|
+
elif t == "body" and got_title:
|
|
310
|
+
break
|
|
311
|
+
|
|
312
|
+
authors = "<br/>".join(safe(a) for a in author_list) if author_list else ""
|
|
313
|
+
|
|
314
|
+
return title, authors, disclaimer, date_str
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
# ── Image extraction ─────────────────────────────────────────────────────────
|
|
318
|
+
def extract_images_from_docx(docx_path):
|
|
319
|
+
"""Extract embedded images from DOCX, return dict of paragraph_index -> temp file path."""
|
|
320
|
+
from docx import Document
|
|
321
|
+
from docx.oxml.ns import qn
|
|
322
|
+
|
|
323
|
+
doc = Document(docx_path)
|
|
324
|
+
images = {} # rId -> (blob, content_type)
|
|
325
|
+
for rel in doc.part.rels.values():
|
|
326
|
+
if 'image' in rel.reltype:
|
|
327
|
+
images[rel.rId] = (rel.target_part.blob, rel.target_part.content_type)
|
|
328
|
+
|
|
329
|
+
# Find which paragraphs have images
|
|
330
|
+
positions = {} # para_index -> [(rId, blob, content_type)]
|
|
331
|
+
for i, para in enumerate(doc.paragraphs):
|
|
332
|
+
for run in para.runs:
|
|
333
|
+
drawings = run._element.findall(qn('w:drawing'))
|
|
334
|
+
for d in drawings:
|
|
335
|
+
blips = d.findall('.//' + qn('a:blip'))
|
|
336
|
+
for blip in blips:
|
|
337
|
+
embed = blip.get(qn('r:embed'))
|
|
338
|
+
if embed and embed in images:
|
|
339
|
+
blob, ct = images[embed]
|
|
340
|
+
positions.setdefault(i, []).append((embed, blob, ct))
|
|
341
|
+
|
|
342
|
+
# Write to temp files and return para_index -> [filepath]
|
|
343
|
+
result = {}
|
|
344
|
+
for para_idx, items in positions.items():
|
|
345
|
+
paths = []
|
|
346
|
+
for rId, blob, ct in items:
|
|
347
|
+
ext = '.png' if 'png' in ct else '.jpg'
|
|
348
|
+
tf = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
|
|
349
|
+
tf.write(blob)
|
|
350
|
+
tf.close()
|
|
351
|
+
paths.append(tf.name)
|
|
352
|
+
result[para_idx] = paths
|
|
353
|
+
|
|
354
|
+
return result
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# ── Build story helpers ───────────────────────────────────────────────────────
|
|
358
|
+
def group_consecutive_bullets(sections):
|
|
359
|
+
"""Merge consecutive bullet entries into grouped blocks.
|
|
360
|
+
|
|
361
|
+
Returns a new list where runs of {'type': 'bullet'} entries are replaced
|
|
362
|
+
by a single {'type': 'bullet_group', 'items': [...]} dict.
|
|
363
|
+
"""
|
|
364
|
+
result = []
|
|
365
|
+
i = 0
|
|
366
|
+
while i < len(sections):
|
|
367
|
+
if sections[i]["type"] == "bullet":
|
|
368
|
+
group = []
|
|
369
|
+
while i < len(sections) and sections[i]["type"] == "bullet":
|
|
370
|
+
group.append(sections[i])
|
|
371
|
+
i += 1
|
|
372
|
+
result.append({"type": "bullet_group", "items": group})
|
|
373
|
+
else:
|
|
374
|
+
result.append(sections[i])
|
|
375
|
+
i += 1
|
|
376
|
+
return result
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def block_to_flowables(block, styles, intro_added, heading_count):
|
|
380
|
+
"""Map a single block dict to a list of ReportLab flowables.
|
|
381
|
+
|
|
382
|
+
Returns (flowables, intro_added, heading_count) — the bool state is
|
|
383
|
+
threaded through so this stays a pure transformation.
|
|
384
|
+
"""
|
|
385
|
+
t = block["type"]
|
|
386
|
+
|
|
387
|
+
if t in ("h1", "date", "author"):
|
|
388
|
+
return [], intro_added, heading_count
|
|
389
|
+
|
|
390
|
+
if t == "heading_bold":
|
|
391
|
+
heading_count += 1
|
|
392
|
+
if heading_count == 1:
|
|
393
|
+
return [], intro_added, heading_count
|
|
394
|
+
if not intro_added and block["text"].strip().lower() == "introduction":
|
|
395
|
+
return (
|
|
396
|
+
[Spacer(1, 0.15 * inch),
|
|
397
|
+
Paragraph("Introduction", styles["intro_heading"]),
|
|
398
|
+
Spacer(1, 0.05 * inch)],
|
|
399
|
+
True,
|
|
400
|
+
heading_count,
|
|
401
|
+
)
|
|
402
|
+
return [Paragraph(para_markup(block), styles["section_heading"])], intro_added, heading_count
|
|
403
|
+
|
|
404
|
+
if t == "h2":
|
|
405
|
+
if not intro_added and block["text"].strip().lower() == "introduction":
|
|
406
|
+
return (
|
|
407
|
+
[Spacer(1, 0.15 * inch),
|
|
408
|
+
Paragraph("Introduction", styles["intro_heading"]),
|
|
409
|
+
Spacer(1, 0.05 * inch)],
|
|
410
|
+
True,
|
|
411
|
+
heading_count,
|
|
412
|
+
)
|
|
413
|
+
return [Paragraph(para_markup(block), styles["section_heading"])], intro_added, heading_count
|
|
414
|
+
|
|
415
|
+
if t == "h3":
|
|
416
|
+
return [Paragraph(para_markup(block), styles["sub_heading"])], intro_added, heading_count
|
|
417
|
+
|
|
418
|
+
if t == "ref_heading":
|
|
419
|
+
return (
|
|
420
|
+
[PageBreak(),
|
|
421
|
+
Paragraph("References", styles["ref_heading"]),
|
|
422
|
+
HRFlowable(width="100%", color=NAVY, thickness=1.5, spaceAfter=8)],
|
|
423
|
+
intro_added,
|
|
424
|
+
heading_count,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
if t == "warning":
|
|
428
|
+
return [Paragraph(warning_markup(block["text"]), styles["warning"])], intro_added, heading_count
|
|
429
|
+
|
|
430
|
+
if t == "disclaimer":
|
|
431
|
+
return [Paragraph(safe(block["text"]), styles["content_disclaimer"])], intro_added, heading_count
|
|
432
|
+
|
|
433
|
+
if t == "body":
|
|
434
|
+
return [Paragraph(para_markup(block), styles["body"])], intro_added, heading_count
|
|
435
|
+
|
|
436
|
+
if t == "bullet_group":
|
|
437
|
+
items = [
|
|
438
|
+
ListItem(
|
|
439
|
+
Paragraph(para_markup(entry), styles["bullet"]),
|
|
440
|
+
bulletColor=NAVY, bulletFontSize=10, leftIndent=18,
|
|
441
|
+
)
|
|
442
|
+
for entry in block["items"]
|
|
443
|
+
]
|
|
444
|
+
return (
|
|
445
|
+
[ListFlowable(
|
|
446
|
+
items, bulletType="bullet",
|
|
447
|
+
bulletFontName="Lato", bulletFontSize=10,
|
|
448
|
+
leftIndent=18, bulletOffsetY=-1,
|
|
449
|
+
spaceBefore=4, spaceAfter=6,
|
|
450
|
+
)],
|
|
451
|
+
intro_added,
|
|
452
|
+
heading_count,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if t in ("figure_caption", "table_caption"):
|
|
456
|
+
return [Paragraph(safe(block["text"]), styles["caption"])], intro_added, heading_count
|
|
457
|
+
|
|
458
|
+
# Default
|
|
459
|
+
return [Paragraph(para_markup(block), styles["body"])], intro_added, heading_count
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
# ── Build story ───────────────────────────────────────────────────────────────
|
|
463
|
+
def build_story(data, styles, title_short, date_str, image_positions=None):
|
|
464
|
+
story = []
|
|
465
|
+
footer_h = 0.4 * inch
|
|
466
|
+
if image_positions is None:
|
|
467
|
+
image_positions = {}
|
|
468
|
+
|
|
469
|
+
# ── Cover page (all on navy background) ──────────────────────────────
|
|
470
|
+
title, authors, disclaimer, cover_date = extract_cover_meta(
|
|
471
|
+
data["sections"]
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
story.append(Spacer(1, 1.2 * inch))
|
|
475
|
+
if title:
|
|
476
|
+
# Split title visually: first half as hero text, second half as subtitle.
|
|
477
|
+
# This is a placeholder cover — ima-cover-creator replaces it for final output.
|
|
478
|
+
words = title.split()
|
|
479
|
+
mid = max(1, len(words) // 2)
|
|
480
|
+
line1 = " ".join(words[:mid]).upper()
|
|
481
|
+
line2 = " ".join(words[mid:]).upper() if len(words) > mid else ""
|
|
482
|
+
|
|
483
|
+
story.append(Paragraph(safe(line1), styles["cover_title_1"]))
|
|
484
|
+
if line2:
|
|
485
|
+
story.append(Spacer(1, 0.2 * inch))
|
|
486
|
+
story.append(Paragraph(safe(line2), styles["cover_title_2"]))
|
|
487
|
+
|
|
488
|
+
story.append(Spacer(1, 0.5 * inch))
|
|
489
|
+
if authors:
|
|
490
|
+
story.append(Paragraph(authors, styles["cover_authors"]))
|
|
491
|
+
if disclaimer:
|
|
492
|
+
story.append(Spacer(1, 0.4 * inch))
|
|
493
|
+
story.append(Paragraph(safe(disclaimer), styles["cover_disclaimer"]))
|
|
494
|
+
if cover_date:
|
|
495
|
+
story.append(Spacer(1, 0.15 * inch))
|
|
496
|
+
story.append(Paragraph(safe(cover_date), styles["cover_date"]))
|
|
497
|
+
|
|
498
|
+
story.append(NextPageTemplate("content"))
|
|
499
|
+
story.append(PageBreak())
|
|
500
|
+
|
|
501
|
+
# ── Content — pipeline: group bullets → map blocks → flatten ─────────
|
|
502
|
+
grouped = group_consecutive_bullets(data["sections"])
|
|
503
|
+
intro_added = False
|
|
504
|
+
heading_count = 0
|
|
505
|
+
rendered_images = set()
|
|
506
|
+
|
|
507
|
+
# Build a lookup of section index -> paragraph index for image placement
|
|
508
|
+
for block in grouped:
|
|
509
|
+
flowables, intro_added, heading_count = block_to_flowables(
|
|
510
|
+
block, styles, intro_added, heading_count
|
|
511
|
+
)
|
|
512
|
+
story.extend(flowables)
|
|
513
|
+
|
|
514
|
+
# Insert images at this paragraph position
|
|
515
|
+
para_idx = block.get("index", -1)
|
|
516
|
+
if para_idx in image_positions and para_idx not in rendered_images:
|
|
517
|
+
max_w = PAGE_W - 2 * MARGIN - 0.5 * inch
|
|
518
|
+
for img_path in image_positions[para_idx]:
|
|
519
|
+
try:
|
|
520
|
+
img = Image(img_path, width=max_w, height=None)
|
|
521
|
+
# Let reportlab scale proportionally
|
|
522
|
+
img._restrictSize(max_w, PAGE_H - 3 * inch)
|
|
523
|
+
story.append(Spacer(1, 6))
|
|
524
|
+
story.append(img)
|
|
525
|
+
story.append(Spacer(1, 6))
|
|
526
|
+
except Exception as e:
|
|
527
|
+
print(f" Warning: Could not embed image at para {para_idx}: {e}")
|
|
528
|
+
rendered_images.add(para_idx)
|
|
529
|
+
|
|
530
|
+
# Also check for images between consecutive section indices
|
|
531
|
+
if block.get("type") == "bullet_group":
|
|
532
|
+
for item in block.get("items", []):
|
|
533
|
+
item_idx = item.get("index", -1)
|
|
534
|
+
if item_idx in image_positions and item_idx not in rendered_images:
|
|
535
|
+
max_w = PAGE_W - 2 * MARGIN - 0.5 * inch
|
|
536
|
+
for img_path in image_positions[item_idx]:
|
|
537
|
+
try:
|
|
538
|
+
img = Image(img_path, width=max_w, height=None)
|
|
539
|
+
img._restrictSize(max_w, PAGE_H - 3 * inch)
|
|
540
|
+
story.append(Spacer(1, 6))
|
|
541
|
+
story.append(img)
|
|
542
|
+
story.append(Spacer(1, 6))
|
|
543
|
+
except Exception as e:
|
|
544
|
+
print(f" Warning: Could not embed image: {e}")
|
|
545
|
+
rendered_images.add(item_idx)
|
|
546
|
+
|
|
547
|
+
# Insert any remaining images not yet rendered (e.g. image-only paragraphs)
|
|
548
|
+
for para_idx, paths in image_positions.items():
|
|
549
|
+
if para_idx not in rendered_images:
|
|
550
|
+
max_w = PAGE_W - 2 * MARGIN - 0.5 * inch
|
|
551
|
+
for img_path in paths:
|
|
552
|
+
try:
|
|
553
|
+
img = Image(img_path, width=max_w, height=None)
|
|
554
|
+
img._restrictSize(max_w, PAGE_H - 3 * inch)
|
|
555
|
+
story.append(Spacer(1, 6))
|
|
556
|
+
story.append(img)
|
|
557
|
+
story.append(Spacer(1, 6))
|
|
558
|
+
except Exception as e:
|
|
559
|
+
print(f" Warning: Could not embed orphan image: {e}")
|
|
560
|
+
rendered_images.add(para_idx)
|
|
561
|
+
|
|
562
|
+
# ── Q&A ───────────────────────────────────────────────────────────────
|
|
563
|
+
if data.get("qa_pairs"):
|
|
564
|
+
story.append(Spacer(1, 0.1 * inch))
|
|
565
|
+
story.append(HRFlowable(width="100%", color=GREY_LIGHT,
|
|
566
|
+
thickness=0.5, spaceAfter=8))
|
|
567
|
+
for qa in data["qa_pairs"]:
|
|
568
|
+
block = [Paragraph(para_markup(qa["question"]), styles["qa_question"])]
|
|
569
|
+
for ans in qa["answer_parts"]:
|
|
570
|
+
block.append(Paragraph(para_markup(ans), styles["qa_answer"]))
|
|
571
|
+
story.append(KeepTogether(block))
|
|
572
|
+
|
|
573
|
+
# ── References ────────────────────────────────────────────────────────
|
|
574
|
+
if data.get("references"):
|
|
575
|
+
story.append(PageBreak())
|
|
576
|
+
story.append(Paragraph("References", styles["ref_heading"]))
|
|
577
|
+
story.append(HRFlowable(width="100%", color=NAVY, thickness=1.5,
|
|
578
|
+
spaceAfter=8))
|
|
579
|
+
for ref in data["references"]:
|
|
580
|
+
story.append(Paragraph(safe(ref["text"]), styles["reference"]))
|
|
581
|
+
|
|
582
|
+
return story
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
586
|
+
def generate_pdf(docx_path, out_path):
|
|
587
|
+
print(f"Extracting: {docx_path}")
|
|
588
|
+
data = extract_document(docx_path)
|
|
589
|
+
|
|
590
|
+
# Extract images from DOCX
|
|
591
|
+
print("Extracting images...")
|
|
592
|
+
image_positions = extract_images_from_docx(docx_path)
|
|
593
|
+
print(f" Found images at {len(image_positions)} paragraph positions")
|
|
594
|
+
|
|
595
|
+
register_fonts()
|
|
596
|
+
styles = build_styles()
|
|
597
|
+
|
|
598
|
+
title, authors, disclaimer, date_str = extract_cover_meta(
|
|
599
|
+
data["sections"]
|
|
600
|
+
)
|
|
601
|
+
title_short = (title[:60] + "...") if len(title) > 60 else title
|
|
602
|
+
|
|
603
|
+
footer_h = 0.4 * inch
|
|
604
|
+
|
|
605
|
+
doc = BaseDocTemplate(
|
|
606
|
+
str(out_path),
|
|
607
|
+
pagesize=LETTER,
|
|
608
|
+
leftMargin=MARGIN, rightMargin=MARGIN,
|
|
609
|
+
topMargin=MARGIN, bottomMargin=MARGIN,
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
cover_frame = Frame(
|
|
613
|
+
MARGIN, footer_h + 0.3 * inch,
|
|
614
|
+
PAGE_W - 2 * MARGIN,
|
|
615
|
+
PAGE_H - 2 * MARGIN - footer_h,
|
|
616
|
+
id="cover_frame"
|
|
617
|
+
)
|
|
618
|
+
content_frame = Frame(
|
|
619
|
+
MARGIN, footer_h + 0.4 * inch,
|
|
620
|
+
PAGE_W - 2 * MARGIN,
|
|
621
|
+
PAGE_H - 2 * MARGIN - footer_h,
|
|
622
|
+
id="content_frame"
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
doc.addPageTemplates([
|
|
626
|
+
PageTemplate(
|
|
627
|
+
id="cover",
|
|
628
|
+
frames=[cover_frame],
|
|
629
|
+
onPage=make_cover_canvas(footer_h)
|
|
630
|
+
),
|
|
631
|
+
PageTemplate(
|
|
632
|
+
id="content",
|
|
633
|
+
frames=[content_frame],
|
|
634
|
+
onPage=make_content_canvas(title_short, date_str, footer_h)
|
|
635
|
+
),
|
|
636
|
+
])
|
|
637
|
+
|
|
638
|
+
story = build_story(data, styles, title_short, date_str, image_positions)
|
|
639
|
+
|
|
640
|
+
print(f"Building PDF...")
|
|
641
|
+
doc.build(story)
|
|
642
|
+
print(f"Done → {out_path}")
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
# ── Entry point ───────────────────────────────────────────────────────────────
|
|
646
|
+
if __name__ == "__main__":
|
|
647
|
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
|
648
|
+
|
|
649
|
+
if len(sys.argv) < 2:
|
|
650
|
+
print("Usage: python generate_pdf.py <path_to_docx> [--out output.pdf]")
|
|
651
|
+
sys.exit(1)
|
|
652
|
+
|
|
653
|
+
docx_path = Path(sys.argv[1])
|
|
654
|
+
if not docx_path.exists():
|
|
655
|
+
print(f"Error: File not found: {docx_path}")
|
|
656
|
+
sys.exit(1)
|
|
657
|
+
|
|
658
|
+
if "--out" in sys.argv:
|
|
659
|
+
out_path = Path(sys.argv[sys.argv.index("--out") + 1])
|
|
660
|
+
else:
|
|
661
|
+
out_path = docx_path.with_suffix(".pdf")
|
|
662
|
+
|
|
663
|
+
generate_pdf(docx_path, out_path)
|