rxiv-maker 1.16.8__py3-none-any.whl → 1.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxiv_maker/__version__.py +1 -1
- rxiv_maker/cli/commands/build.py +7 -0
- rxiv_maker/cli/framework/workflow_commands.py +66 -2
- rxiv_maker/converters/citation_processor.py +5 -3
- rxiv_maker/core/managers/config_manager.py +1 -0
- rxiv_maker/exporters/docx_citation_mapper.py +99 -0
- rxiv_maker/exporters/docx_content_processor.py +128 -30
- rxiv_maker/exporters/docx_exporter.py +79 -21
- rxiv_maker/exporters/docx_writer.py +189 -24
- rxiv_maker/templates/registry.py +1 -0
- rxiv_maker/tex/style/rxiv_maker_style.cls +33 -33
- rxiv_maker/utils/docx_helpers.py +39 -1
- rxiv_maker/utils/pdf_splitter.py +116 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/METADATA +2 -1
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/RECORD +18 -17
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/WHEEL +0 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/entry_points.txt +0 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -45,11 +45,16 @@ class DocxExporter:
|
|
|
45
45
|
self.resolve_dois = resolve_dois
|
|
46
46
|
self.include_footnotes = include_footnotes
|
|
47
47
|
|
|
48
|
-
# Load config to get author name format preference
|
|
48
|
+
# Load config to get author name format preference and DOCX options
|
|
49
49
|
config_manager = ConfigManager(base_dir=Path(manuscript_path))
|
|
50
50
|
config = config_manager.load_config()
|
|
51
51
|
self.author_format = config.get("bibliography_author_format", "lastname_firstname")
|
|
52
52
|
|
|
53
|
+
# DOCX export options
|
|
54
|
+
docx_config = config.get("docx", {})
|
|
55
|
+
self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
|
|
56
|
+
self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
|
|
57
|
+
|
|
53
58
|
# Components
|
|
54
59
|
self.citation_mapper = CitationMapper()
|
|
55
60
|
self.content_processor = DocxContentProcessor()
|
|
@@ -98,6 +103,13 @@ class DocxExporter:
|
|
|
98
103
|
markdown_content = self._load_markdown()
|
|
99
104
|
logger.debug(f"Loaded {len(markdown_content)} characters of markdown")
|
|
100
105
|
|
|
106
|
+
# Step 2.5: If SI is hidden from export, still load it for label mapping
|
|
107
|
+
si_content_for_mapping = ""
|
|
108
|
+
if self.hide_si:
|
|
109
|
+
si_content_for_mapping = self._load_si_for_mapping()
|
|
110
|
+
if si_content_for_mapping:
|
|
111
|
+
logger.info("📋 Loaded SI content for label mapping (SI section hidden from export)")
|
|
112
|
+
|
|
101
113
|
# Step 3: Extract and map citations
|
|
102
114
|
citations = self.citation_mapper.extract_citations_from_markdown(markdown_content)
|
|
103
115
|
citation_map = self.citation_mapper.create_mapping(citations)
|
|
@@ -120,14 +132,14 @@ class DocxExporter:
|
|
|
120
132
|
|
|
121
133
|
# Replace @fig:label with "Fig. X" in text, handling optional panel letters
|
|
122
134
|
# Pattern matches: @fig:label optionally followed by space and panel letter(s)
|
|
123
|
-
# Use special markers <<XREF>> to enable
|
|
135
|
+
# Use special markers <<XREF:type>> to enable color-coded highlighting in DOCX
|
|
124
136
|
for label, num in figure_map.items():
|
|
125
137
|
# Match @fig:label with optional panel letters like " a", " a,b", " a-c"
|
|
126
138
|
# Use negative lookahead (?![a-z]) to prevent matching start of words like " is", " and"
|
|
127
139
|
# Panel letters must be followed by non-letter (space, punctuation, end of string)
|
|
128
140
|
markdown_with_numbers = re.sub(
|
|
129
141
|
rf"@fig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
|
|
130
|
-
lambda m, num=num: f"<<XREF>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
142
|
+
lambda m, num=num: f"<<XREF:fig>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
131
143
|
markdown_with_numbers,
|
|
132
144
|
)
|
|
133
145
|
|
|
@@ -135,7 +147,9 @@ class DocxExporter:
|
|
|
135
147
|
|
|
136
148
|
# Find all supplementary figures and create mapping
|
|
137
149
|
# Allow hyphens and underscores in label names
|
|
138
|
-
|
|
150
|
+
# IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
|
|
151
|
+
content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
152
|
+
sfig_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#sfig:([\w-]+)", content_to_scan_for_sfigs)
|
|
139
153
|
sfig_map = {label: i + 1 for i, label in enumerate(sfig_labels)}
|
|
140
154
|
|
|
141
155
|
# Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
|
|
@@ -144,34 +158,51 @@ class DocxExporter:
|
|
|
144
158
|
# Negative lookahead prevents matching start of words
|
|
145
159
|
markdown_with_numbers = re.sub(
|
|
146
160
|
rf"@sfig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
|
|
147
|
-
lambda m, num=num: f"<<XREF>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
161
|
+
lambda m, num=num: f"<<XREF:sfig>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
148
162
|
markdown_with_numbers,
|
|
149
163
|
)
|
|
150
164
|
|
|
151
165
|
logger.debug(f"Mapped {len(sfig_map)} supplementary figure labels to numbers")
|
|
152
166
|
|
|
153
|
-
# Find all tables and create mapping (looking for {#stable:label} tags)
|
|
154
|
-
#
|
|
155
|
-
|
|
167
|
+
# Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
|
|
168
|
+
# IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
|
|
169
|
+
# NOT the order of caption references (%{#stable:X}) which are just metadata
|
|
170
|
+
# When SI is excluded from export, we still need to extract labels from SI
|
|
171
|
+
|
|
172
|
+
content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
173
|
+
|
|
174
|
+
# Extract table labels in document order (both {#stable:label} markdown format and \label{stable:label} LaTeX format)
|
|
175
|
+
# The PDF numbering follows the order these labels appear in the document
|
|
176
|
+
markdown_labels = re.findall(r"\{#stable:([\w-]+)\}", content_to_scan_for_tables)
|
|
177
|
+
latex_labels = re.findall(r"\\label\{stable:([\w-]+)\}", content_to_scan_for_tables)
|
|
178
|
+
|
|
179
|
+
# Combine both formats, preferring LaTeX labels if present (since that's what PDF uses)
|
|
180
|
+
table_labels = latex_labels if latex_labels else markdown_labels
|
|
181
|
+
|
|
182
|
+
# Remove duplicates while preserving order
|
|
183
|
+
seen = set()
|
|
184
|
+
table_labels = [label for label in table_labels if not (label in seen or seen.add(label))]
|
|
185
|
+
|
|
156
186
|
table_map = {label: i + 1 for i, label in enumerate(table_labels)}
|
|
187
|
+
logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
|
|
157
188
|
|
|
158
189
|
# Replace @stable:label with "Supp. Table X" in text
|
|
159
190
|
for label, num in table_map.items():
|
|
160
191
|
markdown_with_numbers = re.sub(
|
|
161
|
-
rf"@stable:{label}\b", f"<<XREF>>Supp. Table {num}<</XREF>>", markdown_with_numbers
|
|
192
|
+
rf"@stable:{label}\b", f"<<XREF:stable>>Supp. Table {num}<</XREF>>", markdown_with_numbers
|
|
162
193
|
)
|
|
163
194
|
|
|
164
|
-
logger.debug(f"Mapped {len(table_map)} supplementary table labels to numbers")
|
|
165
|
-
|
|
166
195
|
# Find all supplementary notes and create mapping (looking for {#snote:label} tags)
|
|
167
196
|
# Allow hyphens and underscores in label names
|
|
168
|
-
|
|
197
|
+
# IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
|
|
198
|
+
content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
199
|
+
snote_labels = re.findall(r"\{#snote:([\w-]+)\}", content_to_scan_for_snotes)
|
|
169
200
|
snote_map = {label: i + 1 for i, label in enumerate(snote_labels)}
|
|
170
201
|
|
|
171
202
|
# Replace @snote:label with "Supp. Note X" in text
|
|
172
203
|
for label, num in snote_map.items():
|
|
173
204
|
markdown_with_numbers = re.sub(
|
|
174
|
-
rf"@snote:{label}\b", f"<<XREF>>Supp. Note {num}<</XREF>>", markdown_with_numbers
|
|
205
|
+
rf"@snote:{label}\b", f"<<XREF:snote>>Supp. Note {num}<</XREF>>", markdown_with_numbers
|
|
175
206
|
)
|
|
176
207
|
|
|
177
208
|
logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
|
|
@@ -186,18 +217,17 @@ class DocxExporter:
|
|
|
186
217
|
for label, num in equation_map.items():
|
|
187
218
|
# Replace (@eq:label) with (Eq. X)
|
|
188
219
|
markdown_with_numbers = re.sub(
|
|
189
|
-
rf"\(@eq:{label}\b\)", f"(<<XREF>>Eq. {num}<</XREF>>)", markdown_with_numbers
|
|
220
|
+
rf"\(@eq:{label}\b\)", f"(<<XREF:eq>>Eq. {num}<</XREF>>)", markdown_with_numbers
|
|
190
221
|
)
|
|
191
222
|
# Replace @eq:label with Eq. X
|
|
192
|
-
markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF>>Eq. {num}<</XREF>>", markdown_with_numbers)
|
|
223
|
+
markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF:eq>>Eq. {num}<</XREF>>", markdown_with_numbers)
|
|
193
224
|
|
|
194
225
|
logger.debug(f"Mapped {len(equation_map)} equation labels to numbers")
|
|
195
226
|
|
|
196
227
|
# Step 5.6: Remove label markers now that mapping is complete
|
|
197
228
|
# These metadata markers should not appear in the final output
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
)
|
|
229
|
+
# NOTE: Keep fig/sfig/stable/table labels - they're needed by content processor and removed during caption parsing
|
|
230
|
+
markdown_with_numbers = re.sub(r"^\{#(?:snote|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE)
|
|
201
231
|
|
|
202
232
|
# Step 6: Convert content to DOCX structure
|
|
203
233
|
doc_structure = self.content_processor.parse(markdown_with_numbers, citation_map)
|
|
@@ -215,6 +245,8 @@ class DocxExporter:
|
|
|
215
245
|
include_footnotes=self.include_footnotes,
|
|
216
246
|
base_path=self.path_manager.manuscript_path,
|
|
217
247
|
metadata=metadata,
|
|
248
|
+
table_map=table_map,
|
|
249
|
+
figures_at_end=self.figures_at_end,
|
|
218
250
|
)
|
|
219
251
|
logger.info(f"DOCX exported successfully: {docx_path}")
|
|
220
252
|
|
|
@@ -265,9 +297,9 @@ class DocxExporter:
|
|
|
265
297
|
|
|
266
298
|
content.append(main_content)
|
|
267
299
|
|
|
268
|
-
# Load 02_SUPPLEMENTARY_INFO.md if exists
|
|
300
|
+
# Load 02_SUPPLEMENTARY_INFO.md if exists and not configured to hide SI
|
|
269
301
|
supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
|
|
270
|
-
if supp_md.exists():
|
|
302
|
+
if supp_md.exists() and not self.hide_si:
|
|
271
303
|
logger.info("Including supplementary information")
|
|
272
304
|
supp_content = supp_md.read_text(encoding="utf-8")
|
|
273
305
|
supp_content = remove_yaml_header(supp_content)
|
|
@@ -281,11 +313,36 @@ class DocxExporter:
|
|
|
281
313
|
content.append("<!-- PAGE_BREAK -->")
|
|
282
314
|
content.append("# Supplementary Information")
|
|
283
315
|
content.append(supp_content)
|
|
316
|
+
elif supp_md.exists() and self.hide_si:
|
|
317
|
+
logger.info("Supplementary information exists but hidden per config (docx.hide_si: true)")
|
|
284
318
|
else:
|
|
285
319
|
logger.debug("No supplementary information file found")
|
|
286
320
|
|
|
287
321
|
return "\n\n".join(content)
|
|
288
322
|
|
|
323
|
+
def _load_si_for_mapping(self) -> str:
|
|
324
|
+
r"""Load SI content for label mapping without including in export.
|
|
325
|
+
|
|
326
|
+
This method is used when hide_si is True but we still need to extract
|
|
327
|
+
SI labels (stable, sfig, snote) for cross-references in the main text.
|
|
328
|
+
|
|
329
|
+
IMPORTANT: We return RAW content (before preprocessing) because we need to
|
|
330
|
+
extract LaTeX labels (\label{stable:X}) which determine the PDF numbering order.
|
|
331
|
+
The preprocessor strips out {{tex: blocks, losing this ordering information.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
SI content as string (raw, before preprocessing), or empty string if SI doesn't exist
|
|
335
|
+
"""
|
|
336
|
+
supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
|
|
337
|
+
if not supp_md.exists():
|
|
338
|
+
return ""
|
|
339
|
+
|
|
340
|
+
# Load RAW SI content (don't preprocess - we need LaTeX labels for ordering)
|
|
341
|
+
supp_content = supp_md.read_text(encoding="utf-8")
|
|
342
|
+
supp_content = remove_yaml_header(supp_content)
|
|
343
|
+
|
|
344
|
+
return supp_content
|
|
345
|
+
|
|
289
346
|
def _build_bibliography(self, citation_map: Dict[str, int]) -> Dict[int, Dict]:
|
|
290
347
|
"""Build bibliography with optional DOI resolution.
|
|
291
348
|
|
|
@@ -327,7 +384,8 @@ class DocxExporter:
|
|
|
327
384
|
logger.info(f"Resolved DOI for {key}: {doi}")
|
|
328
385
|
|
|
329
386
|
# Format entry (full format for DOCX bibliography)
|
|
330
|
-
formatted
|
|
387
|
+
# Don't include DOI in formatted text - it will be added separately as a hyperlink by the writer
|
|
388
|
+
formatted = format_bibliography_entry(entry, doi=None, slim=False, author_format=self.author_format)
|
|
331
389
|
|
|
332
390
|
bibliography[number] = {"key": key, "entry": entry, "doi": doi, "formatted": formatted}
|
|
333
391
|
|
|
@@ -24,6 +24,29 @@ logger = get_logger()
|
|
|
24
24
|
class DocxWriter:
|
|
25
25
|
"""Writes structured content to DOCX files using python-docx."""
|
|
26
26
|
|
|
27
|
+
# Color mapping for different reference types
|
|
28
|
+
XREF_COLORS = {
|
|
29
|
+
"fig": WD_COLOR_INDEX.BRIGHT_GREEN, # Figures (bright green - lighter)
|
|
30
|
+
"sfig": WD_COLOR_INDEX.TURQUOISE, # Supplementary figures (turquoise - lighter cyan)
|
|
31
|
+
"stable": WD_COLOR_INDEX.TURQUOISE, # Supplementary tables (turquoise - lighter cyan)
|
|
32
|
+
"table": WD_COLOR_INDEX.BLUE, # Main tables
|
|
33
|
+
"eq": WD_COLOR_INDEX.VIOLET, # Equations
|
|
34
|
+
"snote": WD_COLOR_INDEX.TURQUOISE, # Supplementary notes (turquoise - lighter cyan)
|
|
35
|
+
"cite": WD_COLOR_INDEX.YELLOW, # Citations (yellow)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def get_xref_color(xref_type: str):
|
|
40
|
+
"""Get highlight color for a cross-reference type.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
xref_type: Type of cross-reference (fig, sfig, stable, table, eq, snote, cite)
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
WD_COLOR_INDEX color for the xref type, or YELLOW as default
|
|
47
|
+
"""
|
|
48
|
+
return DocxWriter.XREF_COLORS.get(xref_type, WD_COLOR_INDEX.YELLOW)
|
|
49
|
+
|
|
27
50
|
def write(
|
|
28
51
|
self,
|
|
29
52
|
doc_structure: Dict[str, Any],
|
|
@@ -32,6 +55,8 @@ class DocxWriter:
|
|
|
32
55
|
include_footnotes: bool = True,
|
|
33
56
|
base_path: Optional[Path] = None,
|
|
34
57
|
metadata: Optional[Dict[str, Any]] = None,
|
|
58
|
+
table_map: Optional[Dict[str, int]] = None,
|
|
59
|
+
figures_at_end: bool = False,
|
|
35
60
|
) -> Path:
|
|
36
61
|
"""Write DOCX file from structured content.
|
|
37
62
|
|
|
@@ -42,6 +67,8 @@ class DocxWriter:
|
|
|
42
67
|
include_footnotes: Whether to add DOI footnotes
|
|
43
68
|
base_path: Base path for resolving relative figure paths
|
|
44
69
|
metadata: Document metadata (title, authors, affiliations)
|
|
70
|
+
table_map: Mapping from table labels to numbers (for supplementary tables)
|
|
71
|
+
figures_at_end: Place main figures at end before SI/bibliography
|
|
45
72
|
|
|
46
73
|
Returns:
|
|
47
74
|
Path to created DOCX file
|
|
@@ -49,6 +76,7 @@ class DocxWriter:
|
|
|
49
76
|
self.base_path = base_path or Path.cwd()
|
|
50
77
|
self.bibliography = bibliography
|
|
51
78
|
self.include_footnotes = include_footnotes
|
|
79
|
+
self.table_map = table_map or {}
|
|
52
80
|
doc = Document()
|
|
53
81
|
|
|
54
82
|
# Add title and author information if metadata provided
|
|
@@ -69,15 +97,36 @@ class DocxWriter:
|
|
|
69
97
|
# Store figure map for use in text processing
|
|
70
98
|
self.figure_map = figure_map
|
|
71
99
|
|
|
72
|
-
#
|
|
100
|
+
# Collect main figures if figures_at_end is True
|
|
101
|
+
collected_main_figures = []
|
|
102
|
+
|
|
103
|
+
# Process each section
|
|
73
104
|
figure_counter = 0
|
|
105
|
+
sfigure_counter = 0
|
|
74
106
|
for section in doc_structure["sections"]:
|
|
75
107
|
if section["type"] == "figure":
|
|
76
|
-
|
|
77
|
-
|
|
108
|
+
is_supplementary = section.get("is_supplementary", False)
|
|
109
|
+
if is_supplementary:
|
|
110
|
+
# Supplementary figures always go inline (in SI section)
|
|
111
|
+
sfigure_counter += 1
|
|
112
|
+
self._add_figure(doc, section, figure_number=sfigure_counter, is_supplementary=True)
|
|
113
|
+
else:
|
|
114
|
+
# Main figures: collect if figures_at_end, otherwise add inline
|
|
115
|
+
figure_counter += 1
|
|
116
|
+
if figures_at_end:
|
|
117
|
+
collected_main_figures.append((section, figure_counter))
|
|
118
|
+
else:
|
|
119
|
+
self._add_figure(doc, section, figure_number=figure_counter, is_supplementary=False)
|
|
78
120
|
else:
|
|
79
121
|
self._add_section(doc, section, bibliography, include_footnotes)
|
|
80
122
|
|
|
123
|
+
# Add collected main figures at the end (before bibliography)
|
|
124
|
+
if figures_at_end and collected_main_figures:
|
|
125
|
+
doc.add_page_break()
|
|
126
|
+
doc.add_heading("Figures", level=1)
|
|
127
|
+
for section, fig_num in collected_main_figures:
|
|
128
|
+
self._add_figure(doc, section, figure_number=fig_num, is_supplementary=False)
|
|
129
|
+
|
|
81
130
|
# Add bibliography section at the end
|
|
82
131
|
if include_footnotes and bibliography:
|
|
83
132
|
doc.add_page_break()
|
|
@@ -92,14 +141,14 @@ class DocxWriter:
|
|
|
92
141
|
num_run = para.add_run(f"[{num}] ")
|
|
93
142
|
num_run.bold = True
|
|
94
143
|
|
|
95
|
-
# Add formatted bibliography text (
|
|
144
|
+
# Add formatted bibliography text (without DOI - added separately below)
|
|
96
145
|
para.add_run(bib_entry["formatted"])
|
|
97
146
|
|
|
98
147
|
# Add DOI as hyperlink with yellow highlighting if present
|
|
99
148
|
if bib_entry.get("doi"):
|
|
100
149
|
doi = bib_entry["doi"]
|
|
101
150
|
doi_url = f"https://doi.org/{doi}" if not doi.startswith("http") else doi
|
|
102
|
-
para.add_run(" ")
|
|
151
|
+
para.add_run("\nDOI: ")
|
|
103
152
|
self._add_hyperlink(para, doi_url, doi_url, highlight=True)
|
|
104
153
|
|
|
105
154
|
# Add spacing between entries
|
|
@@ -228,6 +277,8 @@ class DocxWriter:
|
|
|
228
277
|
self._add_list(doc, section)
|
|
229
278
|
elif section_type == "code_block":
|
|
230
279
|
self._add_code_block(doc, section)
|
|
280
|
+
elif section_type == "comment":
|
|
281
|
+
self._add_comment(doc, section)
|
|
231
282
|
elif section_type == "figure":
|
|
232
283
|
self._add_figure(doc, section)
|
|
233
284
|
elif section_type == "table":
|
|
@@ -310,11 +361,17 @@ class DocxWriter:
|
|
|
310
361
|
run.italic = True
|
|
311
362
|
if run_data.get("underline"):
|
|
312
363
|
run.underline = True
|
|
364
|
+
if run_data.get("subscript"):
|
|
365
|
+
run.font.subscript = True
|
|
366
|
+
if run_data.get("superscript"):
|
|
367
|
+
run.font.superscript = True
|
|
313
368
|
if run_data.get("code"):
|
|
314
369
|
run.font.name = "Courier New"
|
|
315
370
|
run.font.size = Pt(10)
|
|
316
371
|
if run_data.get("xref"):
|
|
317
|
-
|
|
372
|
+
# Use color based on xref type (fig, sfig, stable, eq, etc.)
|
|
373
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
374
|
+
run.font.highlight_color = self.get_xref_color(xref_type)
|
|
318
375
|
if run_data.get("highlight_yellow"):
|
|
319
376
|
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
|
|
320
377
|
|
|
@@ -329,6 +386,14 @@ class DocxWriter:
|
|
|
329
386
|
latex_content = run_data.get("latex", "")
|
|
330
387
|
self._add_inline_equation(paragraph, latex_content)
|
|
331
388
|
|
|
389
|
+
elif run_data["type"] == "inline_comment":
|
|
390
|
+
# Add inline comment with gray highlighting
|
|
391
|
+
comment_text = run_data["text"]
|
|
392
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
393
|
+
run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
|
|
394
|
+
run.italic = True
|
|
395
|
+
run.font.size = Pt(10)
|
|
396
|
+
|
|
332
397
|
elif run_data["type"] == "citation":
|
|
333
398
|
cite_num = run_data["number"]
|
|
334
399
|
# Add citation as [NN] inline with yellow highlighting
|
|
@@ -362,10 +427,16 @@ class DocxWriter:
|
|
|
362
427
|
run.bold = True
|
|
363
428
|
if run_data.get("italic"):
|
|
364
429
|
run.italic = True
|
|
430
|
+
if run_data.get("subscript"):
|
|
431
|
+
run.font.subscript = True
|
|
432
|
+
if run_data.get("superscript"):
|
|
433
|
+
run.font.superscript = True
|
|
365
434
|
if run_data.get("code"):
|
|
366
435
|
run.font.name = "Courier New"
|
|
367
436
|
if run_data.get("xref"):
|
|
368
|
-
|
|
437
|
+
# Use color based on xref type
|
|
438
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
439
|
+
run.font.highlight_color = self.get_xref_color(xref_type)
|
|
369
440
|
if run_data.get("highlight_yellow"):
|
|
370
441
|
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
|
|
371
442
|
run.font.size = Pt(10)
|
|
@@ -379,11 +450,19 @@ class DocxWriter:
|
|
|
379
450
|
# Add inline equation as Office Math
|
|
380
451
|
latex_content = run_data.get("latex", "")
|
|
381
452
|
self._add_inline_equation(paragraph, latex_content)
|
|
453
|
+
elif run_data["type"] == "inline_comment":
|
|
454
|
+
# Add inline comment with gray highlighting
|
|
455
|
+
comment_text = run_data["text"]
|
|
456
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
457
|
+
run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
|
|
458
|
+
run.italic = True
|
|
459
|
+
run.font.size = Pt(10)
|
|
382
460
|
elif run_data["type"] == "citation":
|
|
383
461
|
cite_num = run_data["number"]
|
|
384
462
|
run = paragraph.add_run(f"[{cite_num}]")
|
|
385
463
|
run.bold = True
|
|
386
464
|
run.font.size = Pt(10)
|
|
465
|
+
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
|
|
387
466
|
|
|
388
467
|
def _add_code_block(self, doc: Document, section: Dict[str, Any]):
|
|
389
468
|
"""Add code block to document.
|
|
@@ -404,6 +483,22 @@ class DocxWriter:
|
|
|
404
483
|
paragraph_format = paragraph.paragraph_format
|
|
405
484
|
paragraph_format.left_indent = Pt(36) # Indent code blocks
|
|
406
485
|
|
|
486
|
+
def _add_comment(self, doc: Document, section: Dict[str, Any]):
|
|
487
|
+
"""Add comment to document with gray highlighting.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
doc: Document object
|
|
491
|
+
section: Comment section data with 'text'
|
|
492
|
+
"""
|
|
493
|
+
comment_text = section["text"]
|
|
494
|
+
paragraph = doc.add_paragraph()
|
|
495
|
+
|
|
496
|
+
# Add comment text with light gray highlighting to distinguish from colored xrefs
|
|
497
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
498
|
+
run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
|
|
499
|
+
run.italic = True
|
|
500
|
+
run.font.size = Pt(10)
|
|
501
|
+
|
|
407
502
|
def _check_poppler_availability(self) -> bool:
|
|
408
503
|
"""Check if poppler is available for PDF conversion.
|
|
409
504
|
|
|
@@ -417,13 +512,16 @@ class DocxWriter:
|
|
|
417
512
|
|
|
418
513
|
return result.status == DependencyStatus.AVAILABLE
|
|
419
514
|
|
|
420
|
-
def _add_figure(
|
|
515
|
+
def _add_figure(
|
|
516
|
+
self, doc: Document, section: Dict[str, Any], figure_number: int = None, is_supplementary: bool = False
|
|
517
|
+
):
|
|
421
518
|
"""Add figure to document with caption.
|
|
422
519
|
|
|
423
520
|
Args:
|
|
424
521
|
doc: Document object
|
|
425
522
|
section: Figure section data with 'path', 'caption', 'label'
|
|
426
523
|
figure_number: Figure number (1-indexed)
|
|
524
|
+
is_supplementary: Whether this is a supplementary figure
|
|
427
525
|
"""
|
|
428
526
|
figure_path = Path(section["path"])
|
|
429
527
|
caption = section.get("caption", "")
|
|
@@ -470,19 +568,45 @@ class DocxWriter:
|
|
|
470
568
|
logger.warning(f"Unsupported image format: {figure_path.suffix}")
|
|
471
569
|
|
|
472
570
|
if img_source:
|
|
473
|
-
# Add image
|
|
571
|
+
# Add image with proper sizing to fit page
|
|
474
572
|
try:
|
|
475
|
-
|
|
476
|
-
|
|
573
|
+
from PIL import Image as PILImage
|
|
574
|
+
|
|
575
|
+
# Get image dimensions
|
|
576
|
+
with PILImage.open(img_source) as img:
|
|
577
|
+
img_width, img_height = img.size
|
|
578
|
+
aspect_ratio = img_width / img_height
|
|
579
|
+
|
|
580
|
+
# Page dimensions with margins (Letter size: 8.5 x 11 inches, 1 inch margins)
|
|
581
|
+
max_width = Inches(6.5) # 8.5 - 2*1
|
|
582
|
+
max_height = Inches(9) # 11 - 2*1
|
|
583
|
+
|
|
584
|
+
# Add figure centered
|
|
585
|
+
# Note: add_picture() creates a paragraph automatically, but we need to add it explicitly
|
|
586
|
+
# to control alignment
|
|
587
|
+
fig_para = doc.add_paragraph()
|
|
588
|
+
fig_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
589
|
+
|
|
590
|
+
# Calculate optimal size maintaining aspect ratio
|
|
591
|
+
if aspect_ratio > (6.5 / 9): # Wide image - constrain by width
|
|
592
|
+
run = fig_para.add_run()
|
|
593
|
+
run.add_picture(img_source, width=max_width)
|
|
594
|
+
else: # Tall image - constrain by height
|
|
595
|
+
run = fig_para.add_run()
|
|
596
|
+
run.add_picture(img_source, height=max_height)
|
|
597
|
+
|
|
598
|
+
logger.debug(f"Embedded figure: {figure_path} ({img_width}x{img_height})")
|
|
477
599
|
except Exception as e:
|
|
478
600
|
logger.warning(f"Failed to embed figure {figure_path}: {e}")
|
|
479
|
-
# Add placeholder text
|
|
601
|
+
# Add placeholder text (centered)
|
|
480
602
|
p = doc.add_paragraph()
|
|
603
|
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
481
604
|
run = p.add_run(f"[Figure: {figure_path.name}]")
|
|
482
605
|
run.italic = True
|
|
483
606
|
else:
|
|
484
|
-
# Add placeholder if embedding failed
|
|
607
|
+
# Add placeholder if embedding failed (centered)
|
|
485
608
|
p = doc.add_paragraph()
|
|
609
|
+
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
486
610
|
run = p.add_run(f"[Figure: {figure_path.name}]")
|
|
487
611
|
run.italic = True
|
|
488
612
|
logger.warning(f"Could not embed figure: {figure_path}")
|
|
@@ -494,9 +618,12 @@ class DocxWriter:
|
|
|
494
618
|
# Add small space before caption to separate from figure
|
|
495
619
|
caption_para.paragraph_format.space_before = Pt(3)
|
|
496
620
|
|
|
497
|
-
# Format as "Figure number: "
|
|
621
|
+
# Format as "Figure number: " or "Supp. Fig. number: "
|
|
498
622
|
if figure_number:
|
|
499
|
-
|
|
623
|
+
if is_supplementary:
|
|
624
|
+
run = caption_para.add_run(f"Supp. Fig. S{figure_number}. ")
|
|
625
|
+
else:
|
|
626
|
+
run = caption_para.add_run(f"Fig. {figure_number}. ")
|
|
500
627
|
run.bold = True
|
|
501
628
|
run.font.size = Pt(7)
|
|
502
629
|
else:
|
|
@@ -522,21 +649,35 @@ class DocxWriter:
|
|
|
522
649
|
run.bold = True
|
|
523
650
|
if run_data.get("italic"):
|
|
524
651
|
run.italic = True
|
|
652
|
+
if run_data.get("subscript"):
|
|
653
|
+
run.font.subscript = True
|
|
654
|
+
if run_data.get("superscript"):
|
|
655
|
+
run.font.superscript = True
|
|
525
656
|
if run_data.get("code"):
|
|
526
657
|
run.font.name = "Courier New"
|
|
527
658
|
if run_data.get("xref"):
|
|
528
|
-
|
|
659
|
+
# Use color based on xref type
|
|
660
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
661
|
+
run.font.highlight_color = self.get_xref_color(xref_type)
|
|
529
662
|
if run_data.get("highlight_yellow"):
|
|
530
663
|
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
|
|
531
664
|
elif run_data["type"] == "inline_equation":
|
|
532
665
|
# Add inline equation as Office Math
|
|
533
666
|
latex_content = run_data.get("latex", "")
|
|
534
667
|
self._add_inline_equation(caption_para, latex_content)
|
|
668
|
+
elif run_data["type"] == "inline_comment":
|
|
669
|
+
# Add inline comment with gray highlighting
|
|
670
|
+
comment_text = run_data["text"]
|
|
671
|
+
run = caption_para.add_run(f"[Comment: {comment_text}]")
|
|
672
|
+
run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
|
|
673
|
+
run.italic = True
|
|
674
|
+
run.font.size = Pt(7)
|
|
535
675
|
elif run_data["type"] == "citation":
|
|
536
676
|
cite_num = run_data["number"]
|
|
537
677
|
run = caption_para.add_run(f"[{cite_num}]")
|
|
538
678
|
run.bold = True
|
|
539
679
|
run.font.size = Pt(7)
|
|
680
|
+
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
|
|
540
681
|
|
|
541
682
|
# Add spacing after figure (reduced from 12 to 6 for compactness)
|
|
542
683
|
caption_para.paragraph_format.space_after = Pt(6)
|
|
@@ -602,10 +743,16 @@ class DocxWriter:
|
|
|
602
743
|
run.italic = True
|
|
603
744
|
if run_data.get("underline"):
|
|
604
745
|
run.underline = True
|
|
746
|
+
if run_data.get("subscript"):
|
|
747
|
+
run.font.subscript = True
|
|
748
|
+
if run_data.get("superscript"):
|
|
749
|
+
run.font.superscript = True
|
|
605
750
|
if run_data.get("code"):
|
|
606
751
|
run.font.name = "Courier New"
|
|
607
752
|
if run_data.get("xref"):
|
|
608
|
-
|
|
753
|
+
# Use color based on xref type
|
|
754
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
755
|
+
run.font.highlight_color = self.get_xref_color(xref_type)
|
|
609
756
|
|
|
610
757
|
# Add table caption if present
|
|
611
758
|
caption = section.get("caption")
|
|
@@ -616,16 +763,28 @@ class DocxWriter:
|
|
|
616
763
|
# Add small space before caption to separate from table
|
|
617
764
|
caption_para.paragraph_format.space_before = Pt(3)
|
|
618
765
|
|
|
619
|
-
# Determine table number from label
|
|
766
|
+
# Determine table number from label using table_map
|
|
620
767
|
if label and label.startswith("stable:"):
|
|
621
|
-
#
|
|
622
|
-
|
|
623
|
-
#
|
|
624
|
-
|
|
768
|
+
# Extract label name (e.g., "stable:parameters" -> "parameters")
|
|
769
|
+
label_name = label.split(":", 1)[1] if ":" in label else label
|
|
770
|
+
# Look up number in table_map
|
|
771
|
+
table_num = self.table_map.get(label_name)
|
|
772
|
+
if table_num:
|
|
773
|
+
run = caption_para.add_run(f"Supp. Table S{table_num}. ")
|
|
774
|
+
else:
|
|
775
|
+
# Fallback if label not in map
|
|
776
|
+
run = caption_para.add_run("Supp. Table: ")
|
|
625
777
|
run.bold = True
|
|
626
778
|
run.font.size = Pt(7)
|
|
627
779
|
elif label and label.startswith("table:"):
|
|
628
|
-
|
|
780
|
+
# Extract label name for main tables
|
|
781
|
+
label_name = label.split(":", 1)[1] if ":" in label else label
|
|
782
|
+
# Look up number in table_map (though main tables may not be in map)
|
|
783
|
+
table_num = self.table_map.get(label_name)
|
|
784
|
+
if table_num:
|
|
785
|
+
run = caption_para.add_run(f"Table {table_num}. ")
|
|
786
|
+
else:
|
|
787
|
+
run = caption_para.add_run("Table: ")
|
|
629
788
|
run.bold = True
|
|
630
789
|
run.font.size = Pt(7)
|
|
631
790
|
|
|
@@ -642,10 +801,16 @@ class DocxWriter:
|
|
|
642
801
|
run.italic = True
|
|
643
802
|
if run_data.get("underline"):
|
|
644
803
|
run.underline = True
|
|
804
|
+
if run_data.get("subscript"):
|
|
805
|
+
run.font.subscript = True
|
|
806
|
+
if run_data.get("superscript"):
|
|
807
|
+
run.font.superscript = True
|
|
645
808
|
if run_data.get("code"):
|
|
646
809
|
run.font.name = "Courier New"
|
|
647
810
|
if run_data.get("xref"):
|
|
648
|
-
|
|
811
|
+
# Use color based on xref type
|
|
812
|
+
xref_type = run_data.get("xref_type", "cite")
|
|
813
|
+
run.font.highlight_color = self.get_xref_color(xref_type)
|
|
649
814
|
|
|
650
815
|
# Add spacing after table (reduced from 12 to 6 for compactness)
|
|
651
816
|
caption_para.paragraph_format.space_after = Pt(6)
|