pdfgen-juanipis 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdfgen/__init__.py +17 -0
- pdfgen/api.py +69 -0
- pdfgen/assets/banner-clean.png +0 -0
- pdfgen/assets/banner.png +0 -0
- pdfgen/assets/fonts/BCDEEE_Calibri_5.ttf +0 -0
- pdfgen/assets/fonts/BCDFEE_CenturyGothic-Bold_9.ttf +0 -0
- pdfgen/assets/fonts/BCDGEE_CenturyGothic-Bold_14.ttf +0 -0
- pdfgen/assets/fonts/BCDHEE_Calibri-Bold_20.ttf +0 -0
- pdfgen/assets/fonts/BCDIEE_Calibri-Bold_25.ttf +0 -0
- pdfgen/assets/fonts/BCDJEE_Calibri_27.ttf +0 -0
- pdfgen/assets/fonts/BCDKEE_Calibri-Italic_33.ttf +0 -0
- pdfgen/assets/fonts/BCDLEE_Calibri-Italic_52.ttf +0 -0
- pdfgen/assets/fonts/BCDMEE_SegoeUI_54.ttf +0 -0
- pdfgen/assets/fonts/BCDNEE_SegoeUI_60.ttf +0 -0
- pdfgen/assets/fonts/BCDOEE_Aptos Narrow,Bold_142.ttf +0 -0
- pdfgen/assets/fonts/BCDPEE_Aptos Narrow,Bold_144.ttf +0 -0
- pdfgen/assets/fonts/BCEAEE_Aptos Narrow_149.ttf +0 -0
- pdfgen/assets/fonts/BCEBEE_Aptos Narrow_154.ttf +0 -0
- pdfgen/assets/fonts/TimesNewRomanPS-BoldMT_38.ttf +0 -0
- pdfgen/assets/logo.png +0 -0
- pdfgen/cli.py +106 -0
- pdfgen/pagination.py +1045 -0
- pdfgen/render.py +348 -0
- pdfgen/schema.json +126 -0
- pdfgen/templates/boletin.css +389 -0
- pdfgen/templates/boletin_template.html.jinja +129 -0
- pdfgen/validator.py +247 -0
- pdfgen_juanipis-0.1.3.dist-info/METADATA +170 -0
- pdfgen_juanipis-0.1.3.dist-info/RECORD +33 -0
- pdfgen_juanipis-0.1.3.dist-info/WHEEL +5 -0
- pdfgen_juanipis-0.1.3.dist-info/entry_points.txt +2 -0
- pdfgen_juanipis-0.1.3.dist-info/licenses/LICENSE +21 -0
- pdfgen_juanipis-0.1.3.dist-info/top_level.txt +1 -0
pdfgen/pagination.py
ADDED
|
@@ -0,0 +1,1045 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
import math
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from weasyprint import HTML, CSS
|
|
11
|
+
|
|
12
|
+
WEASYPRINT_AVAILABLE = True
|
|
13
|
+
except Exception: # pragma: no cover - optional dependency for measurement
|
|
14
|
+
HTML = None
|
|
15
|
+
CSS = None
|
|
16
|
+
WEASYPRINT_AVAILABLE = False
|
|
17
|
+
|
|
18
|
+
LOGGER = logging.getLogger(__name__)
|
|
19
|
+
CSS_PX_TO_PT = 72.0 / 96.0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass(frozen=True)
|
|
23
|
+
class LayoutConfig:
|
|
24
|
+
page_width_pt: float = 612.0
|
|
25
|
+
page_height_pt: float = 792.0
|
|
26
|
+
content_left_pt: float = 85.1
|
|
27
|
+
content_width_pt: float = 444.0
|
|
28
|
+
default_intro_top_pt: float = 122.18
|
|
29
|
+
default_content_top_pt: float = 150.0
|
|
30
|
+
continuation_content_top_pt: float = 110.0
|
|
31
|
+
header_title_top_pt: float = 73.7
|
|
32
|
+
header_subtitle_top_pt: float = 90.77
|
|
33
|
+
header_title_left_pt: float = 94.7
|
|
34
|
+
header_title_width_pt: float = 430.0
|
|
35
|
+
header_subtitle_left_pt: float = 260.8
|
|
36
|
+
header_subtitle_width_pt: float = 220.0
|
|
37
|
+
header_logo_top_pt: float = 13.95
|
|
38
|
+
header_logo_height_pt: float = 36.75
|
|
39
|
+
header_banner_height_pt: float = 79.5
|
|
40
|
+
header_title_min_top_pt: float = 100.0
|
|
41
|
+
footer_contact_bottom_pt: float = 32.0
|
|
42
|
+
footer_page_bottom_pt: float = 134.0
|
|
43
|
+
footer_meta_bottom_pt: float = 70.0
|
|
44
|
+
footer_meta_gap_pt: float = 6.0
|
|
45
|
+
header_gap_pt: float = 6.0
|
|
46
|
+
intro_gap_pt: float = 12.0
|
|
47
|
+
header_subtitle_gap_pt: float = 2.0
|
|
48
|
+
safety_pad_pt: float = 6.0
|
|
49
|
+
min_content_height_pt: float = 48.0
|
|
50
|
+
|
|
51
|
+
def to_template(self) -> Dict[str, float]:
|
|
52
|
+
return {
|
|
53
|
+
"content_top": self.default_content_top_pt,
|
|
54
|
+
"intro_top": self.default_intro_top_pt,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclasses.dataclass
|
|
59
|
+
class PageLayoutState:
|
|
60
|
+
intro_top_pt: float
|
|
61
|
+
content_top_pt: float
|
|
62
|
+
content_height_base_pt: float
|
|
63
|
+
content_height_meta_pt: float
|
|
64
|
+
reserved_base_pt: float
|
|
65
|
+
footer_meta_bottom_pt: float
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclasses.dataclass
|
|
69
|
+
class BlockItem:
|
|
70
|
+
data: Dict[str, Any]
|
|
71
|
+
height_pt: float
|
|
72
|
+
keep_with_next: bool = False
|
|
73
|
+
refs: List[str] = dataclasses.field(default_factory=list)
|
|
74
|
+
notes: List[str] = dataclasses.field(default_factory=list)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclasses.dataclass
|
|
78
|
+
class PageBuild:
|
|
79
|
+
blocks: List[BlockItem]
|
|
80
|
+
height_pt: float
|
|
81
|
+
refs: List[str]
|
|
82
|
+
notes: List[str]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class BlockMeasurer:
|
|
86
|
+
def __init__(self, css_path: str, base_url: str, layout: LayoutConfig):
|
|
87
|
+
self.css_path = css_path
|
|
88
|
+
self.base_url = base_url
|
|
89
|
+
self.layout = layout
|
|
90
|
+
self._height_cache: Dict[Tuple[Any, ...], float] = {}
|
|
91
|
+
|
|
92
|
+
def measure_html(self, html_fragment: str) -> float:
|
|
93
|
+
key = ("html", html_fragment)
|
|
94
|
+
cached = self._height_cache.get(key)
|
|
95
|
+
if cached is not None:
|
|
96
|
+
return cached
|
|
97
|
+
|
|
98
|
+
height = self._measure_with_weasyprint(
|
|
99
|
+
f"<div class=\"content\"><div id=\"probe\">{html_fragment}</div></div>",
|
|
100
|
+
"probe",
|
|
101
|
+
)
|
|
102
|
+
if height is None:
|
|
103
|
+
height = self._estimate_html_height(html_fragment)
|
|
104
|
+
self._height_cache[key] = height
|
|
105
|
+
return height
|
|
106
|
+
|
|
107
|
+
def measure_text_block(self, text: str, class_name: str) -> float:
|
|
108
|
+
key = ("text", class_name, text)
|
|
109
|
+
cached = self._height_cache.get(key)
|
|
110
|
+
if cached is not None:
|
|
111
|
+
return cached
|
|
112
|
+
|
|
113
|
+
html = f"<div id=\"probe\" class=\"{class_name}\">{text}</div>"
|
|
114
|
+
height = self._measure_with_weasyprint(html, "probe")
|
|
115
|
+
if height is None:
|
|
116
|
+
height = self._estimate_text_height(text, class_name)
|
|
117
|
+
self._height_cache[key] = height
|
|
118
|
+
return height
|
|
119
|
+
|
|
120
|
+
def measure_table(self, table: Dict[str, Any], show_header: bool) -> float:
|
|
121
|
+
rows_key = tuple(tuple(row.get("vals", [])) + (row.get("dep", ""),) for row in table["rows"])
|
|
122
|
+
key = ("table", show_header, rows_key)
|
|
123
|
+
cached = self._height_cache.get(key)
|
|
124
|
+
if cached is not None:
|
|
125
|
+
return cached
|
|
126
|
+
|
|
127
|
+
html = build_table_html(table, show_header=show_header)
|
|
128
|
+
content_width = table.get("total_width") or self.layout.content_width_pt
|
|
129
|
+
height = self._measure_with_weasyprint(html, "probe-table", content_width=content_width)
|
|
130
|
+
if height is None:
|
|
131
|
+
height = self._estimate_table_height(table, show_header)
|
|
132
|
+
self._height_cache[key] = height
|
|
133
|
+
return height
|
|
134
|
+
|
|
135
|
+
def measure_footer_meta(self, refs: List[str], notes: List[str]) -> float:
|
|
136
|
+
if not refs and not notes:
|
|
137
|
+
return 0.0
|
|
138
|
+
refs_html = "".join(f"<div class=\"refs-text\">{ref}</div>" for ref in refs)
|
|
139
|
+
notes_html = "".join(f"<div>{note}</div>" for note in notes)
|
|
140
|
+
html = """
|
|
141
|
+
<div id=\"probe\" class=\"footer-meta\">
|
|
142
|
+
{refs_block}
|
|
143
|
+
{notes_block}
|
|
144
|
+
</div>
|
|
145
|
+
""".format(
|
|
146
|
+
refs_block=(
|
|
147
|
+
f"<div class=\"refs\"><div class=\"refs-line\"></div>{refs_html}</div>"
|
|
148
|
+
if refs
|
|
149
|
+
else ""
|
|
150
|
+
),
|
|
151
|
+
notes_block=(f"<div class=\"footer-notes\">{notes_html}</div>" if notes else ""),
|
|
152
|
+
)
|
|
153
|
+
height = self._measure_with_weasyprint(html, "probe")
|
|
154
|
+
if height is None:
|
|
155
|
+
height = self._estimate_refs_height(refs) + self._estimate_notes_height(notes)
|
|
156
|
+
return height
|
|
157
|
+
|
|
158
|
+
def measure_footer_contact(self, site: str, phone: str) -> float:
|
|
159
|
+
html = f"<div id=\"probe\" class=\"footer-contact\"><div>{site}</div><div>{phone}</div></div>"
|
|
160
|
+
height = self._measure_with_weasyprint(html, "probe")
|
|
161
|
+
if height is None:
|
|
162
|
+
height = 22.0
|
|
163
|
+
return height
|
|
164
|
+
|
|
165
|
+
def measure_footer_page(self, page_number: str) -> float:
|
|
166
|
+
if not page_number:
|
|
167
|
+
return 0.0
|
|
168
|
+
html = f"<div id=\"probe\" class=\"footer-page\">{page_number}</div>"
|
|
169
|
+
height = self._measure_with_weasyprint(html, "probe")
|
|
170
|
+
if height is None:
|
|
171
|
+
height = 8.0
|
|
172
|
+
return height
|
|
173
|
+
|
|
174
|
+
def _measure_with_weasyprint(
|
|
175
|
+
self, body_html: str, probe_id: str, content_width: Optional[float] = None
|
|
176
|
+
) -> Optional[float]:
|
|
177
|
+
if not WEASYPRINT_AVAILABLE:
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
if content_width is None:
|
|
181
|
+
content_width = self.layout.content_width_pt
|
|
182
|
+
measure_css = MEASURE_CSS.format(content_width=content_width)
|
|
183
|
+
full_html = f"""
|
|
184
|
+
<!DOCTYPE html>
|
|
185
|
+
<html lang=\"es\">
|
|
186
|
+
<head>
|
|
187
|
+
<meta charset=\"utf-8\" />
|
|
188
|
+
</head>
|
|
189
|
+
<body>
|
|
190
|
+
<div class=\"measure-root\">{body_html}</div>
|
|
191
|
+
</body>
|
|
192
|
+
</html>
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
document = HTML(string=full_html, base_url=self.base_url).render(
|
|
196
|
+
stylesheets=[
|
|
197
|
+
CSS(filename=str(self.css_path)),
|
|
198
|
+
CSS(string=measure_css),
|
|
199
|
+
]
|
|
200
|
+
)
|
|
201
|
+
except Exception as exc: # pragma: no cover - runtime dependency may fail
|
|
202
|
+
LOGGER.warning("WeasyPrint measurement failed: %s", exc)
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
if not document.pages:
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
box = _find_box_by_id(document.pages[0], probe_id)
|
|
209
|
+
if box is None:
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
height = getattr(box, "height", 0.0) or 0.0
|
|
213
|
+
height += getattr(box, "margin_top", 0.0) or 0.0
|
|
214
|
+
height += getattr(box, "margin_bottom", 0.0) or 0.0
|
|
215
|
+
height += getattr(box, "padding_top", 0.0) or 0.0
|
|
216
|
+
height += getattr(box, "padding_bottom", 0.0) or 0.0
|
|
217
|
+
return float(height) * CSS_PX_TO_PT
|
|
218
|
+
|
|
219
|
+
def _estimate_html_height(self, html_fragment: str) -> float:
|
|
220
|
+
lines = (
|
|
221
|
+
html_fragment.count("<br")
|
|
222
|
+
+ html_fragment.count("</p>")
|
|
223
|
+
+ html_fragment.count("</div>")
|
|
224
|
+
)
|
|
225
|
+
if "section-title" in html_fragment:
|
|
226
|
+
return 20 + lines * 14
|
|
227
|
+
return lines * 14 + 10
|
|
228
|
+
|
|
229
|
+
def _estimate_text_height(self, text: str, class_name: str) -> float:
|
|
230
|
+
chars_per_line = 80
|
|
231
|
+
if class_name in {"header-title", "header-subtitle"}:
|
|
232
|
+
chars_per_line = 45
|
|
233
|
+
elif class_name == "intro":
|
|
234
|
+
chars_per_line = 70
|
|
235
|
+
lines = max(1, math.ceil(len(text) / chars_per_line))
|
|
236
|
+
font_size = 12.0
|
|
237
|
+
if class_name in {"header-title", "header-subtitle"}:
|
|
238
|
+
font_size = 14.0
|
|
239
|
+
line_height = 1.05 if class_name in {"header-title", "header-subtitle"} else 1.1
|
|
240
|
+
return lines * font_size * line_height
|
|
241
|
+
|
|
242
|
+
def _estimate_table_height(self, table: Dict[str, Any], show_header: bool) -> float:
|
|
243
|
+
num_rows = len(table["rows"])
|
|
244
|
+
header_height = 40 if show_header else 0
|
|
245
|
+
row_height = 16
|
|
246
|
+
return header_height + (num_rows * row_height) + 16
|
|
247
|
+
|
|
248
|
+
def _estimate_refs_height(self, refs: List[str]) -> float:
|
|
249
|
+
if not refs:
|
|
250
|
+
return 0.0
|
|
251
|
+
line_height = 8.0 * 1.1
|
|
252
|
+
return 6.0 + len(refs) * line_height
|
|
253
|
+
|
|
254
|
+
def _estimate_notes_height(self, notes: List[str]) -> float:
|
|
255
|
+
if not notes:
|
|
256
|
+
return 0.0
|
|
257
|
+
line_height = 8.0 * 1.1
|
|
258
|
+
return len(notes) * line_height
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
MEASURE_CSS = """
|
|
262
|
+
@page {{ size: Letter; margin: 0; }}
|
|
263
|
+
html, body {{ margin: 0; padding: 0; }}
|
|
264
|
+
.measure-root {{ margin: 0; padding: 0; }}
|
|
265
|
+
.page {{ position: static !important; width: auto; height: auto; }}
|
|
266
|
+
.content, .intro, .header-title, .header-subtitle, .footer-contact,
|
|
267
|
+
.footer-page, .footer-meta, .refs, .footer-notes {{
|
|
268
|
+
position: static !important;
|
|
269
|
+
}}
|
|
270
|
+
.content, .intro, .footer-meta, .footer-contact {{ width: {content_width}pt; }}
|
|
271
|
+
.table-wrap {{ margin-left: 0 !important; }}
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _find_box_by_id(page: Any, element_id: str) -> Optional[Any]:
|
|
276
|
+
root = getattr(page, "_page_box", None)
|
|
277
|
+
if root is None:
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
for box in _iter_boxes(root):
|
|
281
|
+
element = getattr(box, "element", None)
|
|
282
|
+
if element is not None and element.get("id") == element_id:
|
|
283
|
+
return box
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _iter_boxes(box: Any) -> Iterable[Any]:
|
|
288
|
+
yield box
|
|
289
|
+
for child in getattr(box, "children", []) or []:
|
|
290
|
+
yield from _iter_boxes(child)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def build_table_html(table: Dict[str, Any], show_header: bool = True) -> str:
|
|
294
|
+
total_width = table.get("total_width") or 532.66
|
|
295
|
+
dep_width = table.get("dep_width") or 120.0
|
|
296
|
+
groups = table.get("groups", [])
|
|
297
|
+
num_cols = sum(len(group.get("months", [])) for group in groups)
|
|
298
|
+
num_width = (total_width - dep_width) / (num_cols if num_cols else 1)
|
|
299
|
+
|
|
300
|
+
cols = [f"<col style=\"width: {dep_width:.2f}pt;\">"]
|
|
301
|
+
cols.extend([f"<col style=\"width: {num_width:.2f}pt;\">" for _ in range(num_cols)])
|
|
302
|
+
|
|
303
|
+
header_html = ""
|
|
304
|
+
if show_header:
|
|
305
|
+
header_top = [
|
|
306
|
+
"<tr>",
|
|
307
|
+
"<th class=\"col-dep\" rowspan=\"2\">Departamento/Mes</th>",
|
|
308
|
+
]
|
|
309
|
+
for group in groups:
|
|
310
|
+
title = group.get("title", "")
|
|
311
|
+
span = len(group.get("months", []))
|
|
312
|
+
header_top.append(f"<th class=\"col-num\" colspan=\"{span}\">{title}</th>")
|
|
313
|
+
header_top.append("</tr>")
|
|
314
|
+
|
|
315
|
+
header_bottom = ["<tr>"]
|
|
316
|
+
for group in groups:
|
|
317
|
+
for month in group.get("months", []):
|
|
318
|
+
header_bottom.append(f"<th class=\"col-num\">{month}</th>")
|
|
319
|
+
header_bottom.append("</tr>")
|
|
320
|
+
|
|
321
|
+
header_html = f"<thead>{''.join(header_top)}{''.join(header_bottom)}</thead>"
|
|
322
|
+
|
|
323
|
+
body_rows = []
|
|
324
|
+
for row in table.get("rows", []):
|
|
325
|
+
cells = [f"<td class=\"col-dep\">{row.get('dep', '')}</td>"]
|
|
326
|
+
cells.extend(f"<td>{val}</td>" for val in row.get("vals", []))
|
|
327
|
+
body_rows.append(f"<tr>{''.join(cells)}</tr>")
|
|
328
|
+
|
|
329
|
+
body_html = f"<tbody>{''.join(body_rows)}</tbody>"
|
|
330
|
+
|
|
331
|
+
return (
|
|
332
|
+
f"<div class=\"content\" style=\"width: {total_width:.2f}pt;\">"
|
|
333
|
+
f"<div class=\"table-wrap\" style=\"width: {total_width:.2f}pt; margin-left: 0;\">"
|
|
334
|
+
"<table id=\"probe-table\" class=\"tabla-abaco\">"
|
|
335
|
+
f"<colgroup>{''.join(cols)}</colgroup>"
|
|
336
|
+
f"{header_html}"
|
|
337
|
+
f"{body_html}"
|
|
338
|
+
"</table>"
|
|
339
|
+
"</div>"
|
|
340
|
+
"</div>"
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
class Paginator:
|
|
345
|
+
def __init__(
|
|
346
|
+
self,
|
|
347
|
+
layout: LayoutConfig,
|
|
348
|
+
css_path: str,
|
|
349
|
+
base_url: str,
|
|
350
|
+
fonts_conf_path: Optional[str] = None,
|
|
351
|
+
):
|
|
352
|
+
if fonts_conf_path:
|
|
353
|
+
os.environ.setdefault("FONTCONFIG_FILE", str(fonts_conf_path))
|
|
354
|
+
self.layout = layout
|
|
355
|
+
self.measurer = BlockMeasurer(css_path, base_url, layout)
|
|
356
|
+
self._header_single_line_height = self.measurer.measure_text_block("X", "header-title")
|
|
357
|
+
|
|
358
|
+
def paginate(self, pages_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
359
|
+
result_pages: List[Dict[str, Any]] = []
|
|
360
|
+
for page in pages_data:
|
|
361
|
+
if page.get("cover"):
|
|
362
|
+
page_copy = dict(page)
|
|
363
|
+
page_copy.setdefault("page_number", "")
|
|
364
|
+
page_copy.setdefault("show_header_titles", False)
|
|
365
|
+
result_pages.append(page_copy)
|
|
366
|
+
continue
|
|
367
|
+
result_pages.extend(self._paginate_single_page(page, result_pages))
|
|
368
|
+
return result_pages
|
|
369
|
+
|
|
370
|
+
def _paginate_single_page(
|
|
371
|
+
self, page: Dict[str, Any], accumulated_pages: List[Dict[str, Any]]
|
|
372
|
+
) -> List[Dict[str, Any]]:
|
|
373
|
+
blocks = page.get("blocks", [])
|
|
374
|
+
refs = page.get("refs", [])
|
|
375
|
+
notes = page.get("footer_notes", [])
|
|
376
|
+
has_meta = bool(refs or notes)
|
|
377
|
+
|
|
378
|
+
(
|
|
379
|
+
header_title_top,
|
|
380
|
+
header_subtitle_top,
|
|
381
|
+
header_bottom,
|
|
382
|
+
header_title_style,
|
|
383
|
+
header_subtitle_style,
|
|
384
|
+
) = self._compute_header_positions(page, show_titles=True)
|
|
385
|
+
(
|
|
386
|
+
header_title_top_other,
|
|
387
|
+
header_subtitle_top_other,
|
|
388
|
+
header_bottom_other,
|
|
389
|
+
header_title_style_other,
|
|
390
|
+
header_subtitle_style_other,
|
|
391
|
+
) = self._compute_header_positions(page, show_titles=False)
|
|
392
|
+
layout_first = self._compute_layout_state(
|
|
393
|
+
page,
|
|
394
|
+
header_bottom,
|
|
395
|
+
include_intro=True,
|
|
396
|
+
compact_top=False,
|
|
397
|
+
)
|
|
398
|
+
layout_other = self._compute_layout_state(
|
|
399
|
+
page,
|
|
400
|
+
header_bottom_other,
|
|
401
|
+
include_intro=False,
|
|
402
|
+
compact_top=True,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
min_page_height = min(
|
|
406
|
+
layout_first.content_height_base_pt,
|
|
407
|
+
layout_first.content_height_meta_pt,
|
|
408
|
+
layout_other.content_height_base_pt,
|
|
409
|
+
layout_other.content_height_meta_pt,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
refs_catalog = page.get("refs_catalog", {})
|
|
413
|
+
normalized_blocks = self._normalize_blocks(blocks, min_page_height, refs_catalog)
|
|
414
|
+
pages_build: List[PageBuild] = []
|
|
415
|
+
idx = 0
|
|
416
|
+
page_idx = 0
|
|
417
|
+
while idx < len(normalized_blocks):
|
|
418
|
+
layout_state = layout_first if page_idx == 0 else layout_other
|
|
419
|
+
page_refs: List[str] = []
|
|
420
|
+
page_notes: List[str] = []
|
|
421
|
+
|
|
422
|
+
if has_meta:
|
|
423
|
+
remaining_height = sum(block.height_pt for block in normalized_blocks[idx:])
|
|
424
|
+
if remaining_height <= layout_state.content_height_meta_pt:
|
|
425
|
+
limit = layout_state.content_height_meta_pt
|
|
426
|
+
else:
|
|
427
|
+
limit = layout_state.content_height_base_pt
|
|
428
|
+
else:
|
|
429
|
+
limit = layout_state.content_height_base_pt
|
|
430
|
+
limit = max(limit, self.layout.min_content_height_pt)
|
|
431
|
+
|
|
432
|
+
used = 0.0
|
|
433
|
+
page_blocks: List[BlockItem] = []
|
|
434
|
+
while idx < len(normalized_blocks):
|
|
435
|
+
block = normalized_blocks[idx]
|
|
436
|
+
block_height = block.height_pt
|
|
437
|
+
block_refs = list(block.refs)
|
|
438
|
+
block_notes = list(block.notes)
|
|
439
|
+
|
|
440
|
+
if block.keep_with_next and idx + 1 < len(normalized_blocks):
|
|
441
|
+
next_block = normalized_blocks[idx + 1]
|
|
442
|
+
next_height = next_block.height_pt
|
|
443
|
+
if next_block.data.get("type") == "table":
|
|
444
|
+
available = limit - used - block_height
|
|
445
|
+
if available <= 0:
|
|
446
|
+
if page_blocks:
|
|
447
|
+
break
|
|
448
|
+
else:
|
|
449
|
+
next_table = next_block.data.get("table", {})
|
|
450
|
+
show_header = next_table.get("show_header", True)
|
|
451
|
+
max_rows = self._max_table_rows_that_fit(
|
|
452
|
+
next_table,
|
|
453
|
+
next_table.get("rows", []),
|
|
454
|
+
available,
|
|
455
|
+
show_header,
|
|
456
|
+
)
|
|
457
|
+
next_height = (
|
|
458
|
+
self.measurer.measure_table(
|
|
459
|
+
{
|
|
460
|
+
"groups": next_table.get("groups", []),
|
|
461
|
+
"rows": next_table.get("rows", [])[: max_rows or 1],
|
|
462
|
+
"total_width": next_table.get("total_width"),
|
|
463
|
+
"dep_width": next_table.get("dep_width"),
|
|
464
|
+
},
|
|
465
|
+
show_header,
|
|
466
|
+
)
|
|
467
|
+
if max_rows
|
|
468
|
+
else next_height
|
|
469
|
+
)
|
|
470
|
+
if used + block_height + next_height > limit:
|
|
471
|
+
if page_blocks:
|
|
472
|
+
break
|
|
473
|
+
if page_idx == 0 and page.get("intro"):
|
|
474
|
+
break
|
|
475
|
+
|
|
476
|
+
if block_refs or block_notes:
|
|
477
|
+
new_limit = min(
|
|
478
|
+
limit,
|
|
479
|
+
self._content_height_with_meta(
|
|
480
|
+
layout_state, page_refs + block_refs, page_notes + block_notes
|
|
481
|
+
),
|
|
482
|
+
)
|
|
483
|
+
if used > new_limit and page_blocks:
|
|
484
|
+
break
|
|
485
|
+
limit = new_limit
|
|
486
|
+
|
|
487
|
+
split_table = False
|
|
488
|
+
if block.data.get("type") == "table":
|
|
489
|
+
available_height = limit - used
|
|
490
|
+
if available_height <= 0 and page_blocks:
|
|
491
|
+
break
|
|
492
|
+
if available_height <= 0:
|
|
493
|
+
available_height = limit
|
|
494
|
+
|
|
495
|
+
block, split_table = self._split_table_to_fit(
|
|
496
|
+
normalized_blocks,
|
|
497
|
+
idx,
|
|
498
|
+
available_height,
|
|
499
|
+
)
|
|
500
|
+
block_height = block.height_pt
|
|
501
|
+
|
|
502
|
+
if used + block_height > limit and page_blocks:
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
if used + block_height > limit and not page_blocks:
|
|
506
|
+
LOGGER.warning(
|
|
507
|
+
"Block exceeds page height limit (%.2f > %.2f); forcing placement.",
|
|
508
|
+
block_height,
|
|
509
|
+
limit,
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
page_blocks.append(block)
|
|
513
|
+
used += block_height
|
|
514
|
+
if block_refs:
|
|
515
|
+
page_refs.extend(block_refs)
|
|
516
|
+
if block_notes:
|
|
517
|
+
page_notes.extend(block_notes)
|
|
518
|
+
idx += 1
|
|
519
|
+
|
|
520
|
+
if split_table:
|
|
521
|
+
break
|
|
522
|
+
|
|
523
|
+
pages_build.append(PageBuild(blocks=page_blocks, height_pt=used, refs=page_refs, notes=page_notes))
|
|
524
|
+
page_idx += 1
|
|
525
|
+
|
|
526
|
+
output_pages: List[Dict[str, Any]] = []
|
|
527
|
+
for build_idx, build in enumerate(pages_build):
|
|
528
|
+
is_first = build_idx == 0
|
|
529
|
+
is_last = build_idx == len(pages_build) - 1
|
|
530
|
+
layout_state = layout_first if is_first else layout_other
|
|
531
|
+
|
|
532
|
+
show_header_titles = len(accumulated_pages) == 0 and is_first
|
|
533
|
+
output_pages.append(
|
|
534
|
+
self._build_page_dict(
|
|
535
|
+
page,
|
|
536
|
+
build,
|
|
537
|
+
layout_state,
|
|
538
|
+
include_intro=is_first,
|
|
539
|
+
include_meta=(has_meta and is_last),
|
|
540
|
+
page_number=str(len(accumulated_pages) + len(output_pages) + 1),
|
|
541
|
+
header_title_top=header_title_top if show_header_titles else header_title_top_other,
|
|
542
|
+
header_subtitle_top=header_subtitle_top if show_header_titles else header_subtitle_top_other,
|
|
543
|
+
header_title_style=header_title_style if show_header_titles else header_title_style_other,
|
|
544
|
+
header_subtitle_style=(
|
|
545
|
+
header_subtitle_style if show_header_titles else header_subtitle_style_other
|
|
546
|
+
),
|
|
547
|
+
show_header_titles=show_header_titles,
|
|
548
|
+
)
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
return output_pages
|
|
552
|
+
|
|
553
|
+
def _build_page_dict(
|
|
554
|
+
self,
|
|
555
|
+
source_page: Dict[str, Any],
|
|
556
|
+
build: PageBuild,
|
|
557
|
+
layout_state: PageLayoutState,
|
|
558
|
+
include_intro: bool,
|
|
559
|
+
include_meta: bool,
|
|
560
|
+
page_number: str,
|
|
561
|
+
header_title_top: float,
|
|
562
|
+
header_subtitle_top: float,
|
|
563
|
+
header_title_style: Dict[str, float],
|
|
564
|
+
header_subtitle_style: Dict[str, float],
|
|
565
|
+
show_header_titles: bool,
|
|
566
|
+
) -> Dict[str, Any]:
|
|
567
|
+
refs = list(build.refs)
|
|
568
|
+
notes = list(build.notes)
|
|
569
|
+
if include_meta:
|
|
570
|
+
refs.extend(source_page.get("refs", []))
|
|
571
|
+
notes.extend(source_page.get("footer_notes", []))
|
|
572
|
+
|
|
573
|
+
banner_path = source_page["header_banner_path"]
|
|
574
|
+
banner_path_cont = source_page.get("header_banner_path_cont")
|
|
575
|
+
if not banner_path_cont:
|
|
576
|
+
try:
|
|
577
|
+
banner_file = Path(banner_path)
|
|
578
|
+
candidate = banner_file.with_name(f"{banner_file.stem}-clean{banner_file.suffix}")
|
|
579
|
+
if candidate.exists():
|
|
580
|
+
banner_path_cont = str(candidate)
|
|
581
|
+
except OSError:
|
|
582
|
+
banner_path_cont = None
|
|
583
|
+
if not banner_path_cont:
|
|
584
|
+
banner_path_cont = banner_path
|
|
585
|
+
|
|
586
|
+
# Prefer clean banner for all pages when available to avoid duplicated titles.
|
|
587
|
+
if banner_path_cont and banner_path_cont != banner_path:
|
|
588
|
+
banner_path = banner_path_cont
|
|
589
|
+
|
|
590
|
+
page_dict = {
|
|
591
|
+
"header_banner_path": banner_path,
|
|
592
|
+
"header_banner_path_cont": banner_path_cont,
|
|
593
|
+
"header_logo_path": source_page["header_logo_path"],
|
|
594
|
+
"title_line1": source_page["title_line1"],
|
|
595
|
+
"title_line2": source_page["title_line2"],
|
|
596
|
+
"intro": source_page.get("intro", "") if include_intro else "",
|
|
597
|
+
"blocks": [block.data for block in build.blocks],
|
|
598
|
+
"refs": refs,
|
|
599
|
+
"footer_notes": notes,
|
|
600
|
+
"page_number": page_number,
|
|
601
|
+
"footer_site": source_page.get("footer_site", ""),
|
|
602
|
+
"footer_phone": source_page.get("footer_phone", ""),
|
|
603
|
+
"intro_top": layout_state.intro_top_pt,
|
|
604
|
+
"content_top": layout_state.content_top_pt,
|
|
605
|
+
"header_title_top": header_title_top,
|
|
606
|
+
"header_subtitle_top": header_subtitle_top,
|
|
607
|
+
"header_title_left": header_title_style["left"],
|
|
608
|
+
"header_title_width": header_title_style["width"],
|
|
609
|
+
"header_title_align": header_title_style["align"],
|
|
610
|
+
"header_subtitle_left": header_subtitle_style["left"],
|
|
611
|
+
"header_subtitle_width": header_subtitle_style["width"],
|
|
612
|
+
"header_subtitle_align": header_subtitle_style["align"],
|
|
613
|
+
"show_header_titles": show_header_titles,
|
|
614
|
+
"footer_meta_bottom": layout_state.footer_meta_bottom_pt,
|
|
615
|
+
}
|
|
616
|
+
return page_dict
|
|
617
|
+
|
|
618
|
+
def _compute_header_positions(
|
|
619
|
+
self, page: Dict[str, Any], show_titles: bool = True
|
|
620
|
+
) -> Tuple[float, float, float, Dict[str, float], Dict[str, float]]:
|
|
621
|
+
title_text = page.get("title_line1", "") if show_titles else ""
|
|
622
|
+
subtitle_text = page.get("title_line2", "") if show_titles else ""
|
|
623
|
+
|
|
624
|
+
title_height = self.measurer.measure_text_block(title_text, "header-title")
|
|
625
|
+
subtitle_height = self.measurer.measure_text_block(subtitle_text, "header-subtitle")
|
|
626
|
+
|
|
627
|
+
title_top = max(self.layout.header_title_top_pt, self.layout.header_title_min_top_pt)
|
|
628
|
+
subtitle_top = max(
|
|
629
|
+
self.layout.header_subtitle_top_pt,
|
|
630
|
+
title_top + title_height + self.layout.header_subtitle_gap_pt,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
is_multi_line = title_height > self._header_single_line_height * 1.15
|
|
634
|
+
if is_multi_line:
|
|
635
|
+
title_style = {
|
|
636
|
+
"left": self.layout.content_left_pt,
|
|
637
|
+
"width": self.layout.content_width_pt,
|
|
638
|
+
"align": "center",
|
|
639
|
+
}
|
|
640
|
+
subtitle_style = {
|
|
641
|
+
"left": self.layout.content_left_pt,
|
|
642
|
+
"width": self.layout.content_width_pt,
|
|
643
|
+
"align": "center",
|
|
644
|
+
}
|
|
645
|
+
else:
|
|
646
|
+
title_style = {
|
|
647
|
+
"left": self.layout.header_title_left_pt,
|
|
648
|
+
"width": self.layout.header_title_width_pt,
|
|
649
|
+
"align": "left",
|
|
650
|
+
}
|
|
651
|
+
subtitle_style = {
|
|
652
|
+
"left": self.layout.header_subtitle_left_pt,
|
|
653
|
+
"width": self.layout.header_subtitle_width_pt,
|
|
654
|
+
"align": "left",
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
header_bottom = max(
|
|
658
|
+
self.layout.header_banner_height_pt,
|
|
659
|
+
self.layout.header_logo_top_pt + self.layout.header_logo_height_pt,
|
|
660
|
+
title_top + title_height,
|
|
661
|
+
subtitle_top + subtitle_height,
|
|
662
|
+
)
|
|
663
|
+
return title_top, subtitle_top, header_bottom, title_style, subtitle_style
|
|
664
|
+
|
|
665
|
+
def _compute_layout_state(
|
|
666
|
+
self,
|
|
667
|
+
page: Dict[str, Any],
|
|
668
|
+
header_bottom: float,
|
|
669
|
+
include_intro: bool,
|
|
670
|
+
compact_top: bool,
|
|
671
|
+
) -> PageLayoutState:
|
|
672
|
+
intro_text = page.get("intro", "") if include_intro else ""
|
|
673
|
+
intro_height = (
|
|
674
|
+
self.measurer.measure_text_block(intro_text, "intro") if intro_text else 0.0
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
intro_top = max(self.layout.default_intro_top_pt, header_bottom + self.layout.header_gap_pt)
|
|
678
|
+
|
|
679
|
+
min_content_top = (
|
|
680
|
+
self.layout.continuation_content_top_pt if compact_top else self.layout.default_content_top_pt
|
|
681
|
+
)
|
|
682
|
+
if include_intro and intro_text:
|
|
683
|
+
content_top = max(
|
|
684
|
+
min_content_top,
|
|
685
|
+
intro_top + intro_height + self.layout.intro_gap_pt,
|
|
686
|
+
)
|
|
687
|
+
else:
|
|
688
|
+
content_top = max(min_content_top, header_bottom + self.layout.header_gap_pt)
|
|
689
|
+
|
|
690
|
+
footer_contact_height = self.measurer.measure_footer_contact(
|
|
691
|
+
page.get("footer_site", ""),
|
|
692
|
+
page.get("footer_phone", ""),
|
|
693
|
+
)
|
|
694
|
+
footer_page_height = self.measurer.measure_footer_page(page.get("page_number", ""))
|
|
695
|
+
footer_meta_height = self.measurer.measure_footer_meta(
|
|
696
|
+
page.get("refs", []),
|
|
697
|
+
page.get("footer_notes", []),
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
reserved_base = max(
|
|
701
|
+
self.layout.footer_contact_bottom_pt + footer_contact_height,
|
|
702
|
+
self.layout.footer_page_bottom_pt + footer_page_height,
|
|
703
|
+
)
|
|
704
|
+
footer_meta_bottom = max(
|
|
705
|
+
self.layout.footer_meta_bottom_pt,
|
|
706
|
+
self.layout.footer_contact_bottom_pt + footer_contact_height + self.layout.footer_meta_gap_pt,
|
|
707
|
+
)
|
|
708
|
+
reserved_meta = max(
|
|
709
|
+
reserved_base,
|
|
710
|
+
footer_meta_bottom + footer_meta_height,
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
content_height_base = (
|
|
714
|
+
self.layout.page_height_pt - content_top - reserved_base - self.layout.safety_pad_pt
|
|
715
|
+
)
|
|
716
|
+
content_height_meta = (
|
|
717
|
+
self.layout.page_height_pt - content_top - reserved_meta - self.layout.safety_pad_pt
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
if content_height_meta < self.layout.min_content_height_pt:
|
|
721
|
+
LOGGER.warning(
|
|
722
|
+
"Footer/meta area exceeds available page space; clamping content height to %.2fpt.",
|
|
723
|
+
self.layout.min_content_height_pt,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
content_height_base = max(content_height_base, self.layout.min_content_height_pt)
|
|
727
|
+
content_height_meta = max(content_height_meta, self.layout.min_content_height_pt)
|
|
728
|
+
|
|
729
|
+
return PageLayoutState(
|
|
730
|
+
intro_top_pt=intro_top,
|
|
731
|
+
content_top_pt=content_top,
|
|
732
|
+
content_height_base_pt=content_height_base,
|
|
733
|
+
content_height_meta_pt=content_height_meta,
|
|
734
|
+
reserved_base_pt=reserved_base,
|
|
735
|
+
footer_meta_bottom_pt=footer_meta_bottom,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
def _normalize_blocks(
|
|
739
|
+
self,
|
|
740
|
+
blocks: List[Dict[str, Any]],
|
|
741
|
+
max_height_pt: float,
|
|
742
|
+
refs_catalog: Dict[str, str],
|
|
743
|
+
) -> List[BlockItem]:
|
|
744
|
+
normalized: List[BlockItem] = []
|
|
745
|
+
for block in blocks:
|
|
746
|
+
block_refs = block.get("refs", [])
|
|
747
|
+
block_notes = block.get("footer_notes", [])
|
|
748
|
+
if block.get("type") == "table":
|
|
749
|
+
table = block.get("table", {})
|
|
750
|
+
show_header = table.get("show_header", True)
|
|
751
|
+
height = self.measurer.measure_table(table, show_header)
|
|
752
|
+
normalized.append(
|
|
753
|
+
BlockItem(data=block, height_pt=height, refs=block_refs, notes=block_notes)
|
|
754
|
+
)
|
|
755
|
+
else:
|
|
756
|
+
html = block.get("html", "")
|
|
757
|
+
keep_with_next = _needs_keep_with_next(html)
|
|
758
|
+
split_html = self._split_html_block(html, max_height_pt)
|
|
759
|
+
for idx, chunk in enumerate(split_html):
|
|
760
|
+
if block_refs:
|
|
761
|
+
chunk_refs = block_refs if idx == 0 else []
|
|
762
|
+
else:
|
|
763
|
+
chunk_refs = _refs_from_html(chunk, refs_catalog)
|
|
764
|
+
height = self.measurer.measure_html(chunk)
|
|
765
|
+
normalized.append(
|
|
766
|
+
BlockItem(
|
|
767
|
+
data={"type": "html", "html": chunk},
|
|
768
|
+
height_pt=height,
|
|
769
|
+
keep_with_next=keep_with_next and idx == 0,
|
|
770
|
+
refs=chunk_refs,
|
|
771
|
+
notes=block_notes if idx == 0 else [],
|
|
772
|
+
)
|
|
773
|
+
)
|
|
774
|
+
return normalized
|
|
775
|
+
|
|
776
|
+
def _split_table_block(self, block: Dict[str, Any], max_height_pt: float) -> List[Dict[str, Any]]:
|
|
777
|
+
if block.get("type") != "table":
|
|
778
|
+
return [block]
|
|
779
|
+
|
|
780
|
+
table = block["table"]
|
|
781
|
+
rows = table.get("rows", [])
|
|
782
|
+
if not rows:
|
|
783
|
+
return [block]
|
|
784
|
+
|
|
785
|
+
result_blocks: List[Dict[str, Any]] = []
|
|
786
|
+
start_idx = 0
|
|
787
|
+
first_chunk = True
|
|
788
|
+
|
|
789
|
+
while start_idx < len(rows):
|
|
790
|
+
show_header = first_chunk
|
|
791
|
+
max_rows = self._max_table_rows_that_fit(table, rows[start_idx:], max_height_pt, show_header)
|
|
792
|
+
if max_rows < 1:
|
|
793
|
+
max_rows = 1
|
|
794
|
+
chunk_rows = rows[start_idx : start_idx + max_rows]
|
|
795
|
+
result_blocks.append(
|
|
796
|
+
{
|
|
797
|
+
"type": "table",
|
|
798
|
+
"table": {
|
|
799
|
+
"groups": table.get("groups", []),
|
|
800
|
+
"rows": chunk_rows,
|
|
801
|
+
"total_width": table.get("total_width"),
|
|
802
|
+
"dep_width": table.get("dep_width"),
|
|
803
|
+
"show_header": show_header,
|
|
804
|
+
},
|
|
805
|
+
}
|
|
806
|
+
)
|
|
807
|
+
start_idx += max_rows
|
|
808
|
+
first_chunk = False
|
|
809
|
+
|
|
810
|
+
return result_blocks
|
|
811
|
+
|
|
812
|
+
def _split_table_to_fit(
|
|
813
|
+
self,
|
|
814
|
+
blocks: List[BlockItem],
|
|
815
|
+
idx: int,
|
|
816
|
+
max_height_pt: float,
|
|
817
|
+
) -> Tuple[BlockItem, bool]:
|
|
818
|
+
block = blocks[idx]
|
|
819
|
+
table = block.data.get("table", {})
|
|
820
|
+
rows = table.get("rows", [])
|
|
821
|
+
if not rows:
|
|
822
|
+
return block, False
|
|
823
|
+
|
|
824
|
+
show_header = table.get("show_header", True)
|
|
825
|
+
if block.height_pt <= max_height_pt:
|
|
826
|
+
return block, False
|
|
827
|
+
|
|
828
|
+
max_rows = self._max_table_rows_that_fit(table, rows, max_height_pt, show_header)
|
|
829
|
+
if max_rows <= 0:
|
|
830
|
+
max_rows = 1
|
|
831
|
+
|
|
832
|
+
chunk_rows = rows[:max_rows]
|
|
833
|
+
remainder_rows = rows[max_rows:]
|
|
834
|
+
|
|
835
|
+
chunk_block = {
|
|
836
|
+
"type": "table",
|
|
837
|
+
"table": {
|
|
838
|
+
"groups": table.get("groups", []),
|
|
839
|
+
"rows": chunk_rows,
|
|
840
|
+
"total_width": table.get("total_width"),
|
|
841
|
+
"dep_width": table.get("dep_width"),
|
|
842
|
+
"show_header": show_header,
|
|
843
|
+
},
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
chunk_height = self.measurer.measure_table(chunk_block["table"], show_header)
|
|
847
|
+
blocks[idx] = BlockItem(
|
|
848
|
+
data=chunk_block,
|
|
849
|
+
height_pt=chunk_height,
|
|
850
|
+
refs=list(block.refs),
|
|
851
|
+
notes=list(block.notes),
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
if remainder_rows:
|
|
855
|
+
remainder_show_header = False if show_header else False
|
|
856
|
+
remainder_block = {
|
|
857
|
+
"type": "table",
|
|
858
|
+
"table": {
|
|
859
|
+
"groups": table.get("groups", []),
|
|
860
|
+
"rows": remainder_rows,
|
|
861
|
+
"total_width": table.get("total_width"),
|
|
862
|
+
"dep_width": table.get("dep_width"),
|
|
863
|
+
"show_header": remainder_show_header,
|
|
864
|
+
},
|
|
865
|
+
}
|
|
866
|
+
remainder_height = self.measurer.measure_table(
|
|
867
|
+
remainder_block["table"], remainder_show_header
|
|
868
|
+
)
|
|
869
|
+
blocks.insert(
|
|
870
|
+
idx + 1,
|
|
871
|
+
BlockItem(data=remainder_block, height_pt=remainder_height),
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
return blocks[idx], bool(remainder_rows)
|
|
875
|
+
|
|
876
|
+
def _max_table_rows_that_fit(
|
|
877
|
+
self,
|
|
878
|
+
table: Dict[str, Any],
|
|
879
|
+
remaining_rows: List[Dict[str, Any]],
|
|
880
|
+
max_height_pt: float,
|
|
881
|
+
show_header: bool,
|
|
882
|
+
) -> int:
|
|
883
|
+
low = 1
|
|
884
|
+
high = len(remaining_rows)
|
|
885
|
+
best = 0
|
|
886
|
+
while low <= high:
|
|
887
|
+
mid = (low + high) // 2
|
|
888
|
+
test_table = {
|
|
889
|
+
"groups": table.get("groups", []),
|
|
890
|
+
"rows": remaining_rows[:mid],
|
|
891
|
+
"total_width": table.get("total_width"),
|
|
892
|
+
"dep_width": table.get("dep_width"),
|
|
893
|
+
}
|
|
894
|
+
height = self.measurer.measure_table(test_table, show_header)
|
|
895
|
+
if height <= max_height_pt:
|
|
896
|
+
best = mid
|
|
897
|
+
low = mid + 1
|
|
898
|
+
else:
|
|
899
|
+
high = mid - 1
|
|
900
|
+
return best
|
|
901
|
+
|
|
902
|
+
def _split_html_block(self, html: str, max_height_pt: float) -> List[str]:
|
|
903
|
+
height = self.measurer.measure_html(html)
|
|
904
|
+
if height <= max_height_pt:
|
|
905
|
+
return [html]
|
|
906
|
+
|
|
907
|
+
chunks = split_html_into_chunks(html)
|
|
908
|
+
if len(chunks) == 1:
|
|
909
|
+
return [html]
|
|
910
|
+
|
|
911
|
+
result: List[str] = []
|
|
912
|
+
buffer: List[str] = []
|
|
913
|
+
for chunk in chunks:
|
|
914
|
+
candidate = "".join(buffer + [chunk])
|
|
915
|
+
candidate_height = self.measurer.measure_html(candidate)
|
|
916
|
+
if candidate_height <= max_height_pt or not buffer:
|
|
917
|
+
buffer.append(chunk)
|
|
918
|
+
continue
|
|
919
|
+
|
|
920
|
+
result.append("".join(buffer))
|
|
921
|
+
buffer = [chunk]
|
|
922
|
+
|
|
923
|
+
if buffer:
|
|
924
|
+
result.append("".join(buffer))
|
|
925
|
+
|
|
926
|
+
return result
|
|
927
|
+
|
|
928
|
+
def _content_height_with_meta(
|
|
929
|
+
self, layout_state: PageLayoutState, refs: List[str], notes: List[str]
|
|
930
|
+
) -> float:
|
|
931
|
+
if not refs and not notes:
|
|
932
|
+
return layout_state.content_height_base_pt
|
|
933
|
+
|
|
934
|
+
footer_meta_height = self.measurer.measure_footer_meta(refs, notes)
|
|
935
|
+
reserved_meta = max(
|
|
936
|
+
layout_state.reserved_base_pt,
|
|
937
|
+
layout_state.footer_meta_bottom_pt + footer_meta_height,
|
|
938
|
+
)
|
|
939
|
+
content_height = (
|
|
940
|
+
self.layout.page_height_pt
|
|
941
|
+
- layout_state.content_top_pt
|
|
942
|
+
- reserved_meta
|
|
943
|
+
- self.layout.safety_pad_pt
|
|
944
|
+
)
|
|
945
|
+
if content_height < self.layout.min_content_height_pt:
|
|
946
|
+
LOGGER.warning(
|
|
947
|
+
"Footer/meta area exceeds available page space; clamping content height to %.2fpt.",
|
|
948
|
+
self.layout.min_content_height_pt,
|
|
949
|
+
)
|
|
950
|
+
return self.layout.min_content_height_pt
|
|
951
|
+
return content_height
|
|
952
|
+
|
|
953
|
+
def split_html_into_chunks(html: str) -> List[str]:
|
|
954
|
+
lowered = html.lower()
|
|
955
|
+
for tag in ("p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"):
|
|
956
|
+
close_tag = f"</{tag}>"
|
|
957
|
+
if close_tag in lowered:
|
|
958
|
+
parts = re.split(f"({re.escape(close_tag)})", html, flags=re.IGNORECASE)
|
|
959
|
+
chunks: List[str] = []
|
|
960
|
+
buffer = ""
|
|
961
|
+
for part in parts:
|
|
962
|
+
buffer += part
|
|
963
|
+
if part.lower() == close_tag:
|
|
964
|
+
if buffer.strip():
|
|
965
|
+
chunks.append(buffer)
|
|
966
|
+
buffer = ""
|
|
967
|
+
if buffer.strip():
|
|
968
|
+
chunks.append(buffer)
|
|
969
|
+
if len(chunks) > 1:
|
|
970
|
+
return chunks
|
|
971
|
+
|
|
972
|
+
if "<br" in lowered:
|
|
973
|
+
parts = re.split(r"(<br\s*/?>)", html, flags=re.IGNORECASE)
|
|
974
|
+
chunks = []
|
|
975
|
+
buffer = ""
|
|
976
|
+
for part in parts:
|
|
977
|
+
buffer += part
|
|
978
|
+
if part.lower().startswith("<br"):
|
|
979
|
+
chunks.append(buffer)
|
|
980
|
+
buffer = ""
|
|
981
|
+
if buffer.strip():
|
|
982
|
+
chunks.append(buffer)
|
|
983
|
+
if len(chunks) > 1:
|
|
984
|
+
return chunks
|
|
985
|
+
|
|
986
|
+
# Fallback: split very long single-paragraph HTML by sentences.
|
|
987
|
+
text_only = re.sub(r"\s+", " ", re.sub(r"<[^>]+>", " ", html)).strip()
|
|
988
|
+
if text_only and (len(text_only) > 800 or ("<p" in lowered and "</p>" in lowered)):
|
|
989
|
+
sentences = re.split(r"(?<=[\.\?\!])\s+", text_only)
|
|
990
|
+
if len(sentences) > 1:
|
|
991
|
+
chunks = [f"<p>{s.strip()}</p>" for s in sentences if s.strip()]
|
|
992
|
+
if len(chunks) > 1:
|
|
993
|
+
return chunks
|
|
994
|
+
|
|
995
|
+
return [html]
|
|
996
|
+
|
|
997
|
+
|
|
998
|
+
def _needs_keep_with_next(html: str) -> bool:
|
|
999
|
+
lowered = html.lower()
|
|
1000
|
+
return "section-title" in lowered or "section-title-serif" in lowered or "section-subtitle" in lowered
|
|
1001
|
+
|
|
1002
|
+
|
|
1003
|
+
def _refs_from_html(html: str, refs_catalog: Dict[str, str]) -> List[str]:
|
|
1004
|
+
if not refs_catalog:
|
|
1005
|
+
return []
|
|
1006
|
+
ids = _extract_ref_ids(html)
|
|
1007
|
+
refs = []
|
|
1008
|
+
for ref_id in ids:
|
|
1009
|
+
ref_text = refs_catalog.get(ref_id)
|
|
1010
|
+
if ref_text:
|
|
1011
|
+
refs.append(ref_text)
|
|
1012
|
+
return refs
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
def _extract_ref_ids(html: str) -> List[str]:
|
|
1016
|
+
ids: List[str] = []
|
|
1017
|
+
seen = set()
|
|
1018
|
+
for match in re.findall(r"\[(.*?)\]", html):
|
|
1019
|
+
for token in re.split(r"[;,]\s*", match.strip()):
|
|
1020
|
+
token = token.strip()
|
|
1021
|
+
if not token:
|
|
1022
|
+
continue
|
|
1023
|
+
range_match = re.match(r"^(\d+)\s*[-–]\s*(\d+)$", token)
|
|
1024
|
+
if range_match:
|
|
1025
|
+
start = int(range_match.group(1))
|
|
1026
|
+
end = int(range_match.group(2))
|
|
1027
|
+
step = 1 if end >= start else -1
|
|
1028
|
+
for val in range(start, end + step, step):
|
|
1029
|
+
key = str(val)
|
|
1030
|
+
if key not in seen:
|
|
1031
|
+
ids.append(key)
|
|
1032
|
+
seen.add(key)
|
|
1033
|
+
continue
|
|
1034
|
+
if re.match(r"^\d+$", token):
|
|
1035
|
+
if token not in seen:
|
|
1036
|
+
ids.append(token)
|
|
1037
|
+
seen.add(token)
|
|
1038
|
+
return ids
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
def _suffix_sums(values: List[float]) -> List[float]:
|
|
1042
|
+
suffix = [0.0] * (len(values) + 1)
|
|
1043
|
+
for idx in range(len(values) - 1, -1, -1):
|
|
1044
|
+
suffix[idx] = suffix[idx + 1] + values[idx]
|
|
1045
|
+
return suffix
|