rxiv-maker 1.16.8__py3-none-any.whl → 1.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxiv_maker/__version__.py +1 -1
- rxiv_maker/cli/commands/build.py +7 -0
- rxiv_maker/cli/framework/workflow_commands.py +69 -3
- rxiv_maker/converters/citation_processor.py +5 -3
- rxiv_maker/core/managers/config_manager.py +1 -0
- rxiv_maker/exporters/docx_citation_mapper.py +18 -0
- rxiv_maker/exporters/docx_content_processor.py +110 -30
- rxiv_maker/exporters/docx_exporter.py +76 -32
- rxiv_maker/exporters/docx_writer.py +345 -67
- rxiv_maker/templates/registry.py +1 -0
- rxiv_maker/tex/style/rxiv_maker_style.cls +33 -33
- rxiv_maker/utils/accent_character_map.py +150 -0
- rxiv_maker/utils/author_affiliation_processor.py +128 -0
- rxiv_maker/utils/citation_range_formatter.py +118 -0
- rxiv_maker/utils/comment_filter.py +46 -0
- rxiv_maker/utils/docx_helpers.py +43 -118
- rxiv_maker/utils/label_extractor.py +185 -0
- rxiv_maker/utils/pdf_splitter.py +116 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/METADATA +2 -1
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/RECORD +23 -17
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/WHEEL +0 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/entry_points.txt +0 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,6 +4,7 @@ This module handles the actual generation of DOCX files using python-docx,
|
|
|
4
4
|
writing structured content with formatting, citations, and references.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import base64
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any, Dict, Optional
|
|
9
10
|
|
|
@@ -11,11 +12,12 @@ from docx import Document
|
|
|
11
12
|
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
|
|
12
13
|
from docx.oxml import OxmlElement
|
|
13
14
|
from docx.oxml.ns import qn
|
|
14
|
-
from docx.shared import Inches, Pt
|
|
15
|
+
from docx.shared import Inches, Pt, RGBColor
|
|
15
16
|
from latex2mathml.converter import convert as latex_to_mathml
|
|
16
17
|
from lxml import etree
|
|
17
18
|
|
|
18
19
|
from ..core.logging_config import get_logger
|
|
20
|
+
from ..utils.author_affiliation_processor import AuthorAffiliationProcessor
|
|
19
21
|
from ..utils.docx_helpers import convert_pdf_to_image
|
|
20
22
|
|
|
21
23
|
logger = get_logger()
|
|
@@ -24,6 +26,29 @@ logger = get_logger()
|
|
|
24
26
|
class DocxWriter:
|
|
25
27
|
"""Writes structured content to DOCX files using python-docx."""
|
|
26
28
|
|
|
29
|
+
# Color mapping for different reference types
|
|
30
|
+
XREF_COLORS = {
|
|
31
|
+
"fig": WD_COLOR_INDEX.BRIGHT_GREEN, # Figures (bright green - lighter)
|
|
32
|
+
"sfig": WD_COLOR_INDEX.TURQUOISE, # Supplementary figures (turquoise - lighter cyan)
|
|
33
|
+
"stable": WD_COLOR_INDEX.TURQUOISE, # Supplementary tables (turquoise - lighter cyan)
|
|
34
|
+
"table": WD_COLOR_INDEX.BLUE, # Main tables
|
|
35
|
+
"eq": WD_COLOR_INDEX.VIOLET, # Equations
|
|
36
|
+
"snote": WD_COLOR_INDEX.TURQUOISE, # Supplementary notes (turquoise - lighter cyan)
|
|
37
|
+
"cite": WD_COLOR_INDEX.YELLOW, # Citations (yellow)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def get_xref_color(xref_type: str):
|
|
42
|
+
"""Get highlight color for a cross-reference type.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
xref_type: Type of cross-reference (fig, sfig, stable, table, eq, snote, cite)
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
WD_COLOR_INDEX color for the xref type, or YELLOW as default
|
|
49
|
+
"""
|
|
50
|
+
return DocxWriter.XREF_COLORS.get(xref_type, WD_COLOR_INDEX.YELLOW)
|
|
51
|
+
|
|
27
52
|
def write(
|
|
28
53
|
self,
|
|
29
54
|
doc_structure: Dict[str, Any],
|
|
@@ -32,6 +57,10 @@ class DocxWriter:
|
|
|
32
57
|
include_footnotes: bool = True,
|
|
33
58
|
base_path: Optional[Path] = None,
|
|
34
59
|
metadata: Optional[Dict[str, Any]] = None,
|
|
60
|
+
table_map: Optional[Dict[str, int]] = None,
|
|
61
|
+
figures_at_end: bool = False,
|
|
62
|
+
hide_highlighting: bool = False,
|
|
63
|
+
hide_comments: bool = False,
|
|
35
64
|
) -> Path:
|
|
36
65
|
"""Write DOCX file from structured content.
|
|
37
66
|
|
|
@@ -42,6 +71,10 @@ class DocxWriter:
|
|
|
42
71
|
include_footnotes: Whether to add DOI footnotes
|
|
43
72
|
base_path: Base path for resolving relative figure paths
|
|
44
73
|
metadata: Document metadata (title, authors, affiliations)
|
|
74
|
+
table_map: Mapping from table labels to numbers (for supplementary tables)
|
|
75
|
+
figures_at_end: Place main figures at end before SI/bibliography
|
|
76
|
+
hide_highlighting: Disable colored highlighting on references and citations
|
|
77
|
+
hide_comments: Exclude all comments (block and inline) from output
|
|
45
78
|
|
|
46
79
|
Returns:
|
|
47
80
|
Path to created DOCX file
|
|
@@ -49,8 +82,14 @@ class DocxWriter:
|
|
|
49
82
|
self.base_path = base_path or Path.cwd()
|
|
50
83
|
self.bibliography = bibliography
|
|
51
84
|
self.include_footnotes = include_footnotes
|
|
85
|
+
self.table_map = table_map or {}
|
|
86
|
+
self.hide_highlighting = hide_highlighting
|
|
87
|
+
self.hide_comments = hide_comments
|
|
52
88
|
doc = Document()
|
|
53
89
|
|
|
90
|
+
# Set default font to Arial for entire document
|
|
91
|
+
self._set_default_font(doc, "Arial")
|
|
92
|
+
|
|
54
93
|
# Add title and author information if metadata provided
|
|
55
94
|
if metadata:
|
|
56
95
|
self._add_title_page(doc, metadata)
|
|
@@ -69,19 +108,44 @@ class DocxWriter:
|
|
|
69
108
|
# Store figure map for use in text processing
|
|
70
109
|
self.figure_map = figure_map
|
|
71
110
|
|
|
72
|
-
#
|
|
111
|
+
# Collect main figures if figures_at_end is True
|
|
112
|
+
collected_main_figures = []
|
|
113
|
+
|
|
114
|
+
# Process each section
|
|
73
115
|
figure_counter = 0
|
|
116
|
+
sfigure_counter = 0
|
|
74
117
|
for section in doc_structure["sections"]:
|
|
75
118
|
if section["type"] == "figure":
|
|
76
|
-
|
|
77
|
-
|
|
119
|
+
is_supplementary = section.get("is_supplementary", False)
|
|
120
|
+
if is_supplementary:
|
|
121
|
+
# Supplementary figures always go inline (in SI section)
|
|
122
|
+
sfigure_counter += 1
|
|
123
|
+
self._add_figure(doc, section, figure_number=sfigure_counter, is_supplementary=True)
|
|
124
|
+
else:
|
|
125
|
+
# Main figures: collect if figures_at_end, otherwise add inline
|
|
126
|
+
figure_counter += 1
|
|
127
|
+
if figures_at_end:
|
|
128
|
+
collected_main_figures.append((section, figure_counter))
|
|
129
|
+
else:
|
|
130
|
+
self._add_figure(doc, section, figure_number=figure_counter, is_supplementary=False)
|
|
78
131
|
else:
|
|
79
132
|
self._add_section(doc, section, bibliography, include_footnotes)
|
|
80
133
|
|
|
134
|
+
# Add collected main figures at the end (before bibliography)
|
|
135
|
+
if figures_at_end and collected_main_figures:
|
|
136
|
+
doc.add_page_break()
|
|
137
|
+
heading = doc.add_heading("Figures", level=1)
|
|
138
|
+
for run in heading.runs:
|
|
139
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
|
|
140
|
+
for section, fig_num in collected_main_figures:
|
|
141
|
+
self._add_figure(doc, section, figure_number=fig_num, is_supplementary=False)
|
|
142
|
+
|
|
81
143
|
# Add bibliography section at the end
|
|
82
144
|
if include_footnotes and bibliography:
|
|
83
145
|
doc.add_page_break()
|
|
84
|
-
doc.add_heading("Bibliography", level=1)
|
|
146
|
+
heading = doc.add_heading("Bibliography", level=1)
|
|
147
|
+
for run in heading.runs:
|
|
148
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
|
|
85
149
|
|
|
86
150
|
# Add numbered bibliography entries
|
|
87
151
|
for num in sorted(bibliography.keys()):
|
|
@@ -92,15 +156,15 @@ class DocxWriter:
|
|
|
92
156
|
num_run = para.add_run(f"[{num}] ")
|
|
93
157
|
num_run.bold = True
|
|
94
158
|
|
|
95
|
-
# Add formatted bibliography text (
|
|
159
|
+
# Add formatted bibliography text (without DOI - added separately below)
|
|
96
160
|
para.add_run(bib_entry["formatted"])
|
|
97
161
|
|
|
98
|
-
# Add DOI as hyperlink with yellow highlighting if present
|
|
162
|
+
# Add DOI as hyperlink with yellow highlighting if present (unless hide_highlighting is enabled)
|
|
99
163
|
if bib_entry.get("doi"):
|
|
100
164
|
doi = bib_entry["doi"]
|
|
101
165
|
doi_url = f"https://doi.org/{doi}" if not doi.startswith("http") else doi
|
|
102
|
-
para.add_run(" ")
|
|
103
|
-
self._add_hyperlink(para, doi_url, doi_url, highlight=
|
|
166
|
+
para.add_run("\nDOI: ")
|
|
167
|
+
self._add_hyperlink(para, doi_url, doi_url, highlight=not self.hide_highlighting)
|
|
104
168
|
|
|
105
169
|
# Add spacing between entries
|
|
106
170
|
para.paragraph_format.space_after = Pt(6)
|
|
@@ -109,6 +173,38 @@ class DocxWriter:
|
|
|
109
173
|
doc.save(str(output_path))
|
|
110
174
|
return output_path
|
|
111
175
|
|
|
176
|
+
def _set_default_font(self, doc: Document, font_name: str):
|
|
177
|
+
"""Set the default font for the entire document.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
doc: Document object
|
|
181
|
+
font_name: Font name to use (e.g., "Arial", "Times New Roman")
|
|
182
|
+
"""
|
|
183
|
+
# Set font on Normal style (base style for most content)
|
|
184
|
+
style = doc.styles["Normal"]
|
|
185
|
+
font = style.font
|
|
186
|
+
font.name = font_name
|
|
187
|
+
font.size = Pt(10) # Default body font size
|
|
188
|
+
|
|
189
|
+
# Also set on heading styles to ensure consistency
|
|
190
|
+
for i in range(1, 10):
|
|
191
|
+
try:
|
|
192
|
+
heading_style = doc.styles[f"Heading {i}"]
|
|
193
|
+
heading_style.font.name = font_name
|
|
194
|
+
except KeyError:
|
|
195
|
+
# Heading style doesn't exist, skip
|
|
196
|
+
pass
|
|
197
|
+
|
|
198
|
+
def _apply_highlight(self, run, color: WD_COLOR_INDEX):
|
|
199
|
+
"""Apply highlight color to a run, unless highlighting is disabled.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
run: The run object to apply highlighting to
|
|
203
|
+
color: The WD_COLOR_INDEX color to apply
|
|
204
|
+
"""
|
|
205
|
+
if not self.hide_highlighting:
|
|
206
|
+
run.font.highlight_color = color
|
|
207
|
+
|
|
112
208
|
def _add_title_page(self, doc: Document, metadata: Dict[str, Any]):
|
|
113
209
|
"""Add title, author and affiliation information.
|
|
114
210
|
|
|
@@ -144,27 +240,16 @@ class DocxWriter:
|
|
|
144
240
|
if not authors:
|
|
145
241
|
return # Nothing more to add
|
|
146
242
|
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
if affil_shortname not in affiliation_map:
|
|
158
|
-
affiliation_map[affil_shortname] = len(affiliation_map) + 1
|
|
159
|
-
# Look up full affiliation info
|
|
160
|
-
affil_info = affiliation_details.get(affil_shortname, {})
|
|
161
|
-
full_name = affil_info.get("full_name", affil_shortname)
|
|
162
|
-
location = affil_info.get("location", "")
|
|
163
|
-
# Format: "Full Name, Location" or just "Full Name" if no location
|
|
164
|
-
affil_text = f"{full_name}, {location}" if location else full_name
|
|
165
|
-
all_affiliations.append(affil_text)
|
|
166
|
-
|
|
167
|
-
# Add authors with superscript affiliation numbers
|
|
243
|
+
# Process author and affiliation metadata using centralized processor
|
|
244
|
+
processor = AuthorAffiliationProcessor()
|
|
245
|
+
processed = processor.process(metadata)
|
|
246
|
+
|
|
247
|
+
affiliation_map = processed["affiliation_map"]
|
|
248
|
+
ordered_affiliations = processed["ordered_affiliations"]
|
|
249
|
+
cofirst_authors = processed["cofirst_authors"]
|
|
250
|
+
corresponding_authors = processed["corresponding_authors"]
|
|
251
|
+
|
|
252
|
+
# Add authors with superscript affiliation numbers and corresponding author markers
|
|
168
253
|
if authors:
|
|
169
254
|
author_para = doc.add_paragraph()
|
|
170
255
|
for i, author in enumerate(authors):
|
|
@@ -182,25 +267,94 @@ class DocxWriter:
|
|
|
182
267
|
sup_run = author_para.add_run(",".join(affil_nums))
|
|
183
268
|
sup_run.font.superscript = True
|
|
184
269
|
|
|
270
|
+
# Add co-first author marker (dagger) if applicable
|
|
271
|
+
is_cofirst = author.get("co_first_author", False)
|
|
272
|
+
if is_cofirst:
|
|
273
|
+
cofirst_run = author_para.add_run("†")
|
|
274
|
+
cofirst_run.font.superscript = True
|
|
275
|
+
|
|
276
|
+
# Add corresponding author marker (asterisk) if applicable
|
|
277
|
+
is_corresponding = author.get("corresponding_author", False)
|
|
278
|
+
if is_corresponding:
|
|
279
|
+
corr_run = author_para.add_run("*")
|
|
280
|
+
corr_run.font.superscript = True
|
|
281
|
+
|
|
185
282
|
author_para.paragraph_format.space_after = Pt(8)
|
|
186
283
|
|
|
187
284
|
# Add affiliations
|
|
188
|
-
if
|
|
189
|
-
for
|
|
285
|
+
if ordered_affiliations:
|
|
286
|
+
for affil_num, _affil_shortname, affil_text in ordered_affiliations:
|
|
190
287
|
affil_para = doc.add_paragraph()
|
|
191
288
|
|
|
192
289
|
# Add superscript number
|
|
193
|
-
num_run = affil_para.add_run(str(
|
|
290
|
+
num_run = affil_para.add_run(str(affil_num))
|
|
194
291
|
num_run.font.superscript = True
|
|
292
|
+
num_run.font.size = Pt(8)
|
|
195
293
|
|
|
196
294
|
# Add affiliation text
|
|
197
|
-
affil_para.add_run(f" {affil_text}")
|
|
295
|
+
affil_run = affil_para.add_run(f" {affil_text}")
|
|
296
|
+
affil_run.font.size = Pt(8)
|
|
198
297
|
affil_para.paragraph_format.space_after = Pt(4)
|
|
199
|
-
affil_para.runs[1].font.size = Pt(10)
|
|
200
298
|
|
|
201
299
|
# Extra space after last affiliation
|
|
202
300
|
affil_para.paragraph_format.space_after = Pt(12)
|
|
203
301
|
|
|
302
|
+
# Add co-first author information if any (already extracted by processor)
|
|
303
|
+
if cofirst_authors:
|
|
304
|
+
cofirst_para = doc.add_paragraph()
|
|
305
|
+
cofirst_marker = cofirst_para.add_run("†")
|
|
306
|
+
cofirst_marker.font.superscript = True
|
|
307
|
+
cofirst_marker.font.size = Pt(8)
|
|
308
|
+
|
|
309
|
+
cofirst_label = cofirst_para.add_run(" These authors contributed equally: ")
|
|
310
|
+
cofirst_label.font.size = Pt(8)
|
|
311
|
+
|
|
312
|
+
for i, author in enumerate(cofirst_authors):
|
|
313
|
+
if i > 0:
|
|
314
|
+
sep_run = cofirst_para.add_run(", ")
|
|
315
|
+
sep_run.font.size = Pt(8)
|
|
316
|
+
|
|
317
|
+
name = author.get("name", "")
|
|
318
|
+
name_run = cofirst_para.add_run(name)
|
|
319
|
+
name_run.font.size = Pt(8)
|
|
320
|
+
|
|
321
|
+
cofirst_para.paragraph_format.space_after = Pt(12)
|
|
322
|
+
|
|
323
|
+
# Add corresponding author information if any (already extracted by processor)
|
|
324
|
+
if corresponding_authors:
|
|
325
|
+
corr_para = doc.add_paragraph()
|
|
326
|
+
corr_marker = corr_para.add_run("*")
|
|
327
|
+
corr_marker.font.superscript = True
|
|
328
|
+
corr_marker.font.size = Pt(8)
|
|
329
|
+
|
|
330
|
+
corr_label = corr_para.add_run(" Correspondence: ")
|
|
331
|
+
corr_label.font.size = Pt(8)
|
|
332
|
+
|
|
333
|
+
for i, author in enumerate(corresponding_authors):
|
|
334
|
+
if i > 0:
|
|
335
|
+
sep_run = corr_para.add_run("; ")
|
|
336
|
+
sep_run.font.size = Pt(8)
|
|
337
|
+
|
|
338
|
+
name = author.get("name", "")
|
|
339
|
+
email = author.get("email", "")
|
|
340
|
+
|
|
341
|
+
# Decode email if it's base64 encoded
|
|
342
|
+
if not email:
|
|
343
|
+
email64 = author.get("email64", "")
|
|
344
|
+
if email64:
|
|
345
|
+
try:
|
|
346
|
+
email = base64.b64decode(email64).decode("utf-8")
|
|
347
|
+
except Exception:
|
|
348
|
+
email = ""
|
|
349
|
+
|
|
350
|
+
if email:
|
|
351
|
+
info_run = corr_para.add_run(f"{name} ({email})")
|
|
352
|
+
else:
|
|
353
|
+
info_run = corr_para.add_run(name)
|
|
354
|
+
info_run.font.size = Pt(8)
|
|
355
|
+
|
|
356
|
+
corr_para.paragraph_format.space_after = Pt(12)
|
|
357
|
+
|
|
204
358
|
def _add_section(
|
|
205
359
|
self,
|
|
206
360
|
doc: Document,
|
|
@@ -228,6 +382,9 @@ class DocxWriter:
|
|
|
228
382
|
self._add_list(doc, section)
|
|
229
383
|
elif section_type == "code_block":
|
|
230
384
|
self._add_code_block(doc, section)
|
|
385
|
+
elif section_type == "comment":
|
|
386
|
+
if not self.hide_comments:
|
|
387
|
+
self._add_comment(doc, section)
|
|
231
388
|
elif section_type == "figure":
|
|
232
389
|
self._add_figure(doc, section)
|
|
233
390
|
elif section_type == "table":
|
|
@@ -253,6 +410,7 @@ class DocxWriter:
|
|
|
253
410
|
run = para.add_run(text)
|
|
254
411
|
run.bold = True
|
|
255
412
|
run.font.size = Pt(12)
|
|
413
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
|
|
256
414
|
|
|
257
415
|
def _add_heading(self, doc: Document, section: Dict[str, Any]):
|
|
258
416
|
"""Add heading to document.
|
|
@@ -263,7 +421,10 @@ class DocxWriter:
|
|
|
263
421
|
"""
|
|
264
422
|
level = section["level"]
|
|
265
423
|
text = section["text"]
|
|
266
|
-
doc.add_heading(text, level=level)
|
|
424
|
+
heading = doc.add_heading(text, level=level)
|
|
425
|
+
# Ensure heading text is black (not blue)
|
|
426
|
+
for run in heading.runs:
|
|
427
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Explicitly set to black
|
|
267
428
|
|
|
268
429
|
def _add_paragraph(
|
|
269
430
|
self,
|
|
@@ -310,13 +471,19 @@ class DocxWriter:
|
|
|
310
471
|
run.italic = True
|
|
311
472
|
if run_data.get("underline"):
|
|
312
473
|
run.underline = True
|
|
474
|
+
if run_data.get("subscript"):
|
|
475
|
+
run.font.subscript = True
|
|
476
|
+
if run_data.get("superscript"):
|
|
477
|
+
run.font.superscript = True
|
|
313
478
|
if run_data.get("code"):
|
|
314
479
|
run.font.name = "Courier New"
|
|
315
480
|
run.font.size = Pt(10)
|
|
316
481
|
if run_data.get("xref"):
|
|
317
|
-
|
|
482
|
+
# Use color based on xref type (fig, sfig, stable, eq, etc.)
|
|
483
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
484
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
318
485
|
if run_data.get("highlight_yellow"):
|
|
319
|
-
run
|
|
486
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
320
487
|
|
|
321
488
|
elif run_data["type"] == "hyperlink":
|
|
322
489
|
# Add hyperlink with yellow highlighting
|
|
@@ -329,11 +496,20 @@ class DocxWriter:
|
|
|
329
496
|
latex_content = run_data.get("latex", "")
|
|
330
497
|
self._add_inline_equation(paragraph, latex_content)
|
|
331
498
|
|
|
499
|
+
elif run_data["type"] == "inline_comment":
|
|
500
|
+
# Add inline comment with gray highlighting (unless hide_comments is enabled)
|
|
501
|
+
if not self.hide_comments:
|
|
502
|
+
comment_text = run_data["text"]
|
|
503
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
504
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
505
|
+
run.italic = True
|
|
506
|
+
run.font.size = Pt(10)
|
|
507
|
+
|
|
332
508
|
elif run_data["type"] == "citation":
|
|
333
509
|
cite_num = run_data["number"]
|
|
334
510
|
# Add citation as [NN] inline with yellow highlighting
|
|
335
511
|
run = paragraph.add_run(f"[{cite_num}]")
|
|
336
|
-
run
|
|
512
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
337
513
|
run.font.size = Pt(10)
|
|
338
514
|
|
|
339
515
|
def _add_list(self, doc: Document, section: Dict[str, Any]):
|
|
@@ -362,15 +538,21 @@ class DocxWriter:
|
|
|
362
538
|
run.bold = True
|
|
363
539
|
if run_data.get("italic"):
|
|
364
540
|
run.italic = True
|
|
541
|
+
if run_data.get("subscript"):
|
|
542
|
+
run.font.subscript = True
|
|
543
|
+
if run_data.get("superscript"):
|
|
544
|
+
run.font.superscript = True
|
|
365
545
|
if run_data.get("code"):
|
|
366
546
|
run.font.name = "Courier New"
|
|
367
547
|
if run_data.get("xref"):
|
|
368
|
-
|
|
548
|
+
# Use color based on xref type
|
|
549
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
550
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
369
551
|
if run_data.get("highlight_yellow"):
|
|
370
|
-
run
|
|
552
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
371
553
|
run.font.size = Pt(10)
|
|
372
554
|
if run_data.get("highlight_yellow"):
|
|
373
|
-
run
|
|
555
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
374
556
|
elif run_data["type"] == "hyperlink":
|
|
375
557
|
text = run_data.get("text", "")
|
|
376
558
|
url = run_data.get("url", "")
|
|
@@ -379,11 +561,20 @@ class DocxWriter:
|
|
|
379
561
|
# Add inline equation as Office Math
|
|
380
562
|
latex_content = run_data.get("latex", "")
|
|
381
563
|
self._add_inline_equation(paragraph, latex_content)
|
|
564
|
+
elif run_data["type"] == "inline_comment":
|
|
565
|
+
# Add inline comment with gray highlighting (unless hide_comments is enabled)
|
|
566
|
+
if not self.hide_comments:
|
|
567
|
+
comment_text = run_data["text"]
|
|
568
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
569
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
570
|
+
run.italic = True
|
|
571
|
+
run.font.size = Pt(10)
|
|
382
572
|
elif run_data["type"] == "citation":
|
|
383
573
|
cite_num = run_data["number"]
|
|
384
574
|
run = paragraph.add_run(f"[{cite_num}]")
|
|
385
575
|
run.bold = True
|
|
386
576
|
run.font.size = Pt(10)
|
|
577
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
387
578
|
|
|
388
579
|
def _add_code_block(self, doc: Document, section: Dict[str, Any]):
|
|
389
580
|
"""Add code block to document.
|
|
@@ -404,6 +595,22 @@ class DocxWriter:
|
|
|
404
595
|
paragraph_format = paragraph.paragraph_format
|
|
405
596
|
paragraph_format.left_indent = Pt(36) # Indent code blocks
|
|
406
597
|
|
|
598
|
+
def _add_comment(self, doc: Document, section: Dict[str, Any]):
|
|
599
|
+
"""Add comment to document with gray highlighting.
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
doc: Document object
|
|
603
|
+
section: Comment section data with 'text'
|
|
604
|
+
"""
|
|
605
|
+
comment_text = section["text"]
|
|
606
|
+
paragraph = doc.add_paragraph()
|
|
607
|
+
|
|
608
|
+
# Add comment text with light gray highlighting to distinguish from colored xrefs
|
|
609
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
610
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
611
|
+
run.italic = True
|
|
612
|
+
run.font.size = Pt(10)
|
|
613
|
+
|
|
407
614
|
def _check_poppler_availability(self) -> bool:
|
|
408
615
|
"""Check if poppler is available for PDF conversion.
|
|
409
616
|
|
|
@@ -417,13 +624,16 @@ class DocxWriter:
|
|
|
417
624
|
|
|
418
625
|
return result.status == DependencyStatus.AVAILABLE
|
|
419
626
|
|
|
420
|
-
def _add_figure(
|
|
627
|
+
def _add_figure(
|
|
628
|
+
self, doc: Document, section: Dict[str, Any], figure_number: int = None, is_supplementary: bool = False
|
|
629
|
+
):
|
|
421
630
|
"""Add figure to document with caption.
|
|
422
631
|
|
|
423
632
|
Args:
|
|
424
633
|
doc: Document object
|
|
425
634
|
section: Figure section data with 'path', 'caption', 'label'
|
|
426
635
|
figure_number: Figure number (1-indexed)
|
|
636
|
+
is_supplementary: Whether this is a supplementary figure
|
|
427
637
|
"""
|
|
428
638
|
figure_path = Path(section["path"])
|
|
429
639
|
caption = section.get("caption", "")
|
|
@@ -470,19 +680,45 @@ class DocxWriter:
|
|
|
470
680
|
logger.warning(f"Unsupported image format: {figure_path.suffix}")
|
|
471
681
|
|
|
472
682
|
if img_source:
|
|
473
|
-
# Add image
|
|
683
|
+
# Add image with proper sizing to fit page
|
|
474
684
|
try:
|
|
475
|
-
|
|
476
|
-
|
|
685
|
+
from PIL import Image as PILImage
|
|
686
|
+
|
|
687
|
+
# Get image dimensions
|
|
688
|
+
with PILImage.open(img_source) as img:
|
|
689
|
+
img_width, img_height = img.size
|
|
690
|
+
aspect_ratio = img_width / img_height
|
|
691
|
+
|
|
692
|
+
# Page dimensions with margins (Letter size: 8.5 x 11 inches, 1 inch margins)
|
|
693
|
+
max_width = Inches(6.5) # 8.5 - 2*1
|
|
694
|
+
max_height = Inches(9) # 11 - 2*1
|
|
695
|
+
|
|
696
|
+
# Add figure centered
|
|
697
|
+
# Note: add_picture() creates a paragraph automatically, but we need to add it explicitly
|
|
698
|
+
# to control alignment
|
|
699
|
+
fig_para = doc.add_paragraph()
|
|
700
|
+
fig_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
701
|
+
|
|
702
|
+
# Calculate optimal size maintaining aspect ratio
|
|
703
|
+
if aspect_ratio > (6.5 / 9): # Wide image - constrain by width
|
|
704
|
+
run = fig_para.add_run()
|
|
705
|
+
run.add_picture(img_source, width=max_width)
|
|
706
|
+
else: # Tall image - constrain by height
|
|
707
|
+
run = fig_para.add_run()
|
|
708
|
+
run.add_picture(img_source, height=max_height)
|
|
709
|
+
|
|
710
|
+
logger.debug(f"Embedded figure: {figure_path} ({img_width}x{img_height})")
|
|
477
711
|
except Exception as e:
|
|
478
712
|
logger.warning(f"Failed to embed figure {figure_path}: {e}")
|
|
479
|
-
# Add placeholder text
|
|
713
|
+
# Add placeholder text (centered)
|
|
480
714
|
p = doc.add_paragraph()
|
|
715
|
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
481
716
|
run = p.add_run(f"[Figure: {figure_path.name}]")
|
|
482
717
|
run.italic = True
|
|
483
718
|
else:
|
|
484
|
-
# Add placeholder if embedding failed
|
|
719
|
+
# Add placeholder if embedding failed (centered)
|
|
485
720
|
p = doc.add_paragraph()
|
|
721
|
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
486
722
|
run = p.add_run(f"[Figure: {figure_path.name}]")
|
|
487
723
|
run.italic = True
|
|
488
724
|
logger.warning(f"Could not embed figure: {figure_path}")
|
|
@@ -494,15 +730,18 @@ class DocxWriter:
|
|
|
494
730
|
# Add small space before caption to separate from figure
|
|
495
731
|
caption_para.paragraph_format.space_before = Pt(3)
|
|
496
732
|
|
|
497
|
-
# Format as "Figure number: "
|
|
733
|
+
# Format as "Figure number: " or "Supp. Fig. number: "
|
|
498
734
|
if figure_number:
|
|
499
|
-
|
|
735
|
+
if is_supplementary:
|
|
736
|
+
run = caption_para.add_run(f"Supp. Fig. S{figure_number}. ")
|
|
737
|
+
else:
|
|
738
|
+
run = caption_para.add_run(f"Fig. {figure_number}. ")
|
|
500
739
|
run.bold = True
|
|
501
|
-
run.font.size = Pt(
|
|
740
|
+
run.font.size = Pt(8)
|
|
502
741
|
else:
|
|
503
742
|
run = caption_para.add_run("Figure: ")
|
|
504
743
|
run.bold = True
|
|
505
|
-
run.font.size = Pt(
|
|
744
|
+
run.font.size = Pt(8)
|
|
506
745
|
|
|
507
746
|
# Parse and add caption with inline formatting
|
|
508
747
|
# Import the processor to parse inline formatting
|
|
@@ -515,28 +754,43 @@ class DocxWriter:
|
|
|
515
754
|
if run_data["type"] == "text":
|
|
516
755
|
text = run_data["text"]
|
|
517
756
|
run = caption_para.add_run(text)
|
|
518
|
-
run.font.size = Pt(
|
|
757
|
+
run.font.size = Pt(8)
|
|
519
758
|
|
|
520
759
|
# Apply formatting
|
|
521
760
|
if run_data.get("bold"):
|
|
522
761
|
run.bold = True
|
|
523
762
|
if run_data.get("italic"):
|
|
524
763
|
run.italic = True
|
|
764
|
+
if run_data.get("subscript"):
|
|
765
|
+
run.font.subscript = True
|
|
766
|
+
if run_data.get("superscript"):
|
|
767
|
+
run.font.superscript = True
|
|
525
768
|
if run_data.get("code"):
|
|
526
769
|
run.font.name = "Courier New"
|
|
527
770
|
if run_data.get("xref"):
|
|
528
|
-
|
|
771
|
+
# Use color based on xref type
|
|
772
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
773
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
529
774
|
if run_data.get("highlight_yellow"):
|
|
530
|
-
run
|
|
775
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
531
776
|
elif run_data["type"] == "inline_equation":
|
|
532
777
|
# Add inline equation as Office Math
|
|
533
778
|
latex_content = run_data.get("latex", "")
|
|
534
779
|
self._add_inline_equation(caption_para, latex_content)
|
|
780
|
+
elif run_data["type"] == "inline_comment":
|
|
781
|
+
# Add inline comment with gray highlighting (unless hide_comments is enabled)
|
|
782
|
+
if not self.hide_comments:
|
|
783
|
+
comment_text = run_data["text"]
|
|
784
|
+
run = caption_para.add_run(f"[Comment: {comment_text}]")
|
|
785
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
786
|
+
run.italic = True
|
|
787
|
+
run.font.size = Pt(8)
|
|
535
788
|
elif run_data["type"] == "citation":
|
|
536
789
|
cite_num = run_data["number"]
|
|
537
790
|
run = caption_para.add_run(f"[{cite_num}]")
|
|
538
791
|
run.bold = True
|
|
539
|
-
run.font.size = Pt(
|
|
792
|
+
run.font.size = Pt(8)
|
|
793
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
540
794
|
|
|
541
795
|
# Add spacing after figure (reduced from 12 to 6 for compactness)
|
|
542
796
|
caption_para.paragraph_format.space_after = Pt(6)
|
|
@@ -602,10 +856,16 @@ class DocxWriter:
|
|
|
602
856
|
run.italic = True
|
|
603
857
|
if run_data.get("underline"):
|
|
604
858
|
run.underline = True
|
|
859
|
+
if run_data.get("subscript"):
|
|
860
|
+
run.font.subscript = True
|
|
861
|
+
if run_data.get("superscript"):
|
|
862
|
+
run.font.superscript = True
|
|
605
863
|
if run_data.get("code"):
|
|
606
864
|
run.font.name = "Courier New"
|
|
607
865
|
if run_data.get("xref"):
|
|
608
|
-
|
|
866
|
+
# Use color based on xref type
|
|
867
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
868
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
609
869
|
|
|
610
870
|
# Add table caption if present
|
|
611
871
|
caption = section.get("caption")
|
|
@@ -616,18 +876,30 @@ class DocxWriter:
|
|
|
616
876
|
# Add small space before caption to separate from table
|
|
617
877
|
caption_para.paragraph_format.space_before = Pt(3)
|
|
618
878
|
|
|
619
|
-
# Determine table number from label
|
|
879
|
+
# Determine table number from label using table_map
|
|
620
880
|
if label and label.startswith("stable:"):
|
|
621
|
-
#
|
|
622
|
-
|
|
623
|
-
#
|
|
624
|
-
|
|
881
|
+
# Extract label name (e.g., "stable:parameters" -> "parameters")
|
|
882
|
+
label_name = label.split(":", 1)[1] if ":" in label else label
|
|
883
|
+
# Look up number in table_map
|
|
884
|
+
table_num = self.table_map.get(label_name)
|
|
885
|
+
if table_num:
|
|
886
|
+
run = caption_para.add_run(f"Supp. Table S{table_num}. ")
|
|
887
|
+
else:
|
|
888
|
+
# Fallback if label not in map
|
|
889
|
+
run = caption_para.add_run("Supp. Table: ")
|
|
625
890
|
run.bold = True
|
|
626
|
-
run.font.size = Pt(
|
|
891
|
+
run.font.size = Pt(8)
|
|
627
892
|
elif label and label.startswith("table:"):
|
|
628
|
-
|
|
893
|
+
# Extract label name for main tables
|
|
894
|
+
label_name = label.split(":", 1)[1] if ":" in label else label
|
|
895
|
+
# Look up number in table_map (though main tables may not be in map)
|
|
896
|
+
table_num = self.table_map.get(label_name)
|
|
897
|
+
if table_num:
|
|
898
|
+
run = caption_para.add_run(f"Table {table_num}. ")
|
|
899
|
+
else:
|
|
900
|
+
run = caption_para.add_run("Table: ")
|
|
629
901
|
run.bold = True
|
|
630
|
-
run.font.size = Pt(
|
|
902
|
+
run.font.size = Pt(8)
|
|
631
903
|
|
|
632
904
|
# Parse and add caption with inline formatting
|
|
633
905
|
caption_runs = processor._parse_inline_formatting(caption, {})
|
|
@@ -635,17 +907,23 @@ class DocxWriter:
|
|
|
635
907
|
if run_data["type"] == "text":
|
|
636
908
|
text = run_data["text"]
|
|
637
909
|
run = caption_para.add_run(text)
|
|
638
|
-
run.font.size = Pt(
|
|
910
|
+
run.font.size = Pt(8)
|
|
639
911
|
if run_data.get("bold"):
|
|
640
912
|
run.bold = True
|
|
641
913
|
if run_data.get("italic"):
|
|
642
914
|
run.italic = True
|
|
643
915
|
if run_data.get("underline"):
|
|
644
916
|
run.underline = True
|
|
917
|
+
if run_data.get("subscript"):
|
|
918
|
+
run.font.subscript = True
|
|
919
|
+
if run_data.get("superscript"):
|
|
920
|
+
run.font.superscript = True
|
|
645
921
|
if run_data.get("code"):
|
|
646
922
|
run.font.name = "Courier New"
|
|
647
923
|
if run_data.get("xref"):
|
|
648
|
-
|
|
924
|
+
# Use color based on xref type
|
|
925
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
926
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
649
927
|
|
|
650
928
|
# Add spacing after table (reduced from 12 to 6 for compactness)
|
|
651
929
|
caption_para.paragraph_format.space_after = Pt(6)
|