rxiv-maker 1.17.0__py3-none-any.whl → 1.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxiv_maker/__version__.py +1 -1
- rxiv_maker/cli/framework/workflow_commands.py +3 -1
- rxiv_maker/exporters/docx_citation_mapper.py +3 -84
- rxiv_maker/exporters/docx_content_processor.py +5 -23
- rxiv_maker/exporters/docx_exporter.py +14 -28
- rxiv_maker/exporters/docx_writer.py +184 -71
- rxiv_maker/utils/accent_character_map.py +150 -0
- rxiv_maker/utils/author_affiliation_processor.py +128 -0
- rxiv_maker/utils/citation_range_formatter.py +118 -0
- rxiv_maker/utils/comment_filter.py +46 -0
- rxiv_maker/utils/docx_helpers.py +4 -117
- rxiv_maker/utils/label_extractor.py +185 -0
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.0.dist-info}/METADATA +1 -1
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.0.dist-info}/RECORD +17 -12
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.0.dist-info}/WHEEL +0 -0
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.0.dist-info}/entry_points.txt +0 -0
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,6 +4,7 @@ This module handles the actual generation of DOCX files using python-docx,
|
|
|
4
4
|
writing structured content with formatting, citations, and references.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import base64
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any, Dict, Optional
|
|
9
10
|
|
|
@@ -11,11 +12,12 @@ from docx import Document
|
|
|
11
12
|
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
|
|
12
13
|
from docx.oxml import OxmlElement
|
|
13
14
|
from docx.oxml.ns import qn
|
|
14
|
-
from docx.shared import Inches, Pt
|
|
15
|
+
from docx.shared import Inches, Pt, RGBColor
|
|
15
16
|
from latex2mathml.converter import convert as latex_to_mathml
|
|
16
17
|
from lxml import etree
|
|
17
18
|
|
|
18
19
|
from ..core.logging_config import get_logger
|
|
20
|
+
from ..utils.author_affiliation_processor import AuthorAffiliationProcessor
|
|
19
21
|
from ..utils.docx_helpers import convert_pdf_to_image
|
|
20
22
|
|
|
21
23
|
logger = get_logger()
|
|
@@ -57,6 +59,8 @@ class DocxWriter:
|
|
|
57
59
|
metadata: Optional[Dict[str, Any]] = None,
|
|
58
60
|
table_map: Optional[Dict[str, int]] = None,
|
|
59
61
|
figures_at_end: bool = False,
|
|
62
|
+
hide_highlighting: bool = False,
|
|
63
|
+
hide_comments: bool = False,
|
|
60
64
|
) -> Path:
|
|
61
65
|
"""Write DOCX file from structured content.
|
|
62
66
|
|
|
@@ -69,6 +73,8 @@ class DocxWriter:
|
|
|
69
73
|
metadata: Document metadata (title, authors, affiliations)
|
|
70
74
|
table_map: Mapping from table labels to numbers (for supplementary tables)
|
|
71
75
|
figures_at_end: Place main figures at end before SI/bibliography
|
|
76
|
+
hide_highlighting: Disable colored highlighting on references and citations
|
|
77
|
+
hide_comments: Exclude all comments (block and inline) from output
|
|
72
78
|
|
|
73
79
|
Returns:
|
|
74
80
|
Path to created DOCX file
|
|
@@ -77,8 +83,13 @@ class DocxWriter:
|
|
|
77
83
|
self.bibliography = bibliography
|
|
78
84
|
self.include_footnotes = include_footnotes
|
|
79
85
|
self.table_map = table_map or {}
|
|
86
|
+
self.hide_highlighting = hide_highlighting
|
|
87
|
+
self.hide_comments = hide_comments
|
|
80
88
|
doc = Document()
|
|
81
89
|
|
|
90
|
+
# Set default font to Arial for entire document
|
|
91
|
+
self._set_default_font(doc, "Arial")
|
|
92
|
+
|
|
82
93
|
# Add title and author information if metadata provided
|
|
83
94
|
if metadata:
|
|
84
95
|
self._add_title_page(doc, metadata)
|
|
@@ -123,14 +134,18 @@ class DocxWriter:
|
|
|
123
134
|
# Add collected main figures at the end (before bibliography)
|
|
124
135
|
if figures_at_end and collected_main_figures:
|
|
125
136
|
doc.add_page_break()
|
|
126
|
-
doc.add_heading("Figures", level=1)
|
|
137
|
+
heading = doc.add_heading("Figures", level=1)
|
|
138
|
+
for run in heading.runs:
|
|
139
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
|
|
127
140
|
for section, fig_num in collected_main_figures:
|
|
128
141
|
self._add_figure(doc, section, figure_number=fig_num, is_supplementary=False)
|
|
129
142
|
|
|
130
143
|
# Add bibliography section at the end
|
|
131
144
|
if include_footnotes and bibliography:
|
|
132
145
|
doc.add_page_break()
|
|
133
|
-
doc.add_heading("Bibliography", level=1)
|
|
146
|
+
heading = doc.add_heading("Bibliography", level=1)
|
|
147
|
+
for run in heading.runs:
|
|
148
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
|
|
134
149
|
|
|
135
150
|
# Add numbered bibliography entries
|
|
136
151
|
for num in sorted(bibliography.keys()):
|
|
@@ -144,12 +159,12 @@ class DocxWriter:
|
|
|
144
159
|
# Add formatted bibliography text (without DOI - added separately below)
|
|
145
160
|
para.add_run(bib_entry["formatted"])
|
|
146
161
|
|
|
147
|
-
# Add DOI as hyperlink with yellow highlighting if present
|
|
162
|
+
# Add DOI as hyperlink with yellow highlighting if present (unless hide_highlighting is enabled)
|
|
148
163
|
if bib_entry.get("doi"):
|
|
149
164
|
doi = bib_entry["doi"]
|
|
150
165
|
doi_url = f"https://doi.org/{doi}" if not doi.startswith("http") else doi
|
|
151
166
|
para.add_run("\nDOI: ")
|
|
152
|
-
self._add_hyperlink(para, doi_url, doi_url, highlight=
|
|
167
|
+
self._add_hyperlink(para, doi_url, doi_url, highlight=not self.hide_highlighting)
|
|
153
168
|
|
|
154
169
|
# Add spacing between entries
|
|
155
170
|
para.paragraph_format.space_after = Pt(6)
|
|
@@ -158,6 +173,38 @@ class DocxWriter:
|
|
|
158
173
|
doc.save(str(output_path))
|
|
159
174
|
return output_path
|
|
160
175
|
|
|
176
|
+
def _set_default_font(self, doc: Document, font_name: str):
|
|
177
|
+
"""Set the default font for the entire document.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
doc: Document object
|
|
181
|
+
font_name: Font name to use (e.g., "Arial", "Times New Roman")
|
|
182
|
+
"""
|
|
183
|
+
# Set font on Normal style (base style for most content)
|
|
184
|
+
style = doc.styles["Normal"]
|
|
185
|
+
font = style.font
|
|
186
|
+
font.name = font_name
|
|
187
|
+
font.size = Pt(10) # Default body font size
|
|
188
|
+
|
|
189
|
+
# Also set on heading styles to ensure consistency
|
|
190
|
+
for i in range(1, 10):
|
|
191
|
+
try:
|
|
192
|
+
heading_style = doc.styles[f"Heading {i}"]
|
|
193
|
+
heading_style.font.name = font_name
|
|
194
|
+
except KeyError:
|
|
195
|
+
# Heading style doesn't exist, skip
|
|
196
|
+
pass
|
|
197
|
+
|
|
198
|
+
def _apply_highlight(self, run, color: WD_COLOR_INDEX):
|
|
199
|
+
"""Apply highlight color to a run, unless highlighting is disabled.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
run: The run object to apply highlighting to
|
|
203
|
+
color: The WD_COLOR_INDEX color to apply
|
|
204
|
+
"""
|
|
205
|
+
if not self.hide_highlighting:
|
|
206
|
+
run.font.highlight_color = color
|
|
207
|
+
|
|
161
208
|
def _add_title_page(self, doc: Document, metadata: Dict[str, Any]):
|
|
162
209
|
"""Add title, author and affiliation information.
|
|
163
210
|
|
|
@@ -193,27 +240,16 @@ class DocxWriter:
|
|
|
193
240
|
if not authors:
|
|
194
241
|
return # Nothing more to add
|
|
195
242
|
|
|
196
|
-
#
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
if affil_shortname not in affiliation_map:
|
|
207
|
-
affiliation_map[affil_shortname] = len(affiliation_map) + 1
|
|
208
|
-
# Look up full affiliation info
|
|
209
|
-
affil_info = affiliation_details.get(affil_shortname, {})
|
|
210
|
-
full_name = affil_info.get("full_name", affil_shortname)
|
|
211
|
-
location = affil_info.get("location", "")
|
|
212
|
-
# Format: "Full Name, Location" or just "Full Name" if no location
|
|
213
|
-
affil_text = f"{full_name}, {location}" if location else full_name
|
|
214
|
-
all_affiliations.append(affil_text)
|
|
215
|
-
|
|
216
|
-
# Add authors with superscript affiliation numbers
|
|
243
|
+
# Process author and affiliation metadata using centralized processor
|
|
244
|
+
processor = AuthorAffiliationProcessor()
|
|
245
|
+
processed = processor.process(metadata)
|
|
246
|
+
|
|
247
|
+
affiliation_map = processed["affiliation_map"]
|
|
248
|
+
ordered_affiliations = processed["ordered_affiliations"]
|
|
249
|
+
cofirst_authors = processed["cofirst_authors"]
|
|
250
|
+
corresponding_authors = processed["corresponding_authors"]
|
|
251
|
+
|
|
252
|
+
# Add authors with superscript affiliation numbers and corresponding author markers
|
|
217
253
|
if authors:
|
|
218
254
|
author_para = doc.add_paragraph()
|
|
219
255
|
for i, author in enumerate(authors):
|
|
@@ -231,25 +267,94 @@ class DocxWriter:
|
|
|
231
267
|
sup_run = author_para.add_run(",".join(affil_nums))
|
|
232
268
|
sup_run.font.superscript = True
|
|
233
269
|
|
|
270
|
+
# Add co-first author marker (dagger) if applicable
|
|
271
|
+
is_cofirst = author.get("co_first_author", False)
|
|
272
|
+
if is_cofirst:
|
|
273
|
+
cofirst_run = author_para.add_run("†")
|
|
274
|
+
cofirst_run.font.superscript = True
|
|
275
|
+
|
|
276
|
+
# Add corresponding author marker (asterisk) if applicable
|
|
277
|
+
is_corresponding = author.get("corresponding_author", False)
|
|
278
|
+
if is_corresponding:
|
|
279
|
+
corr_run = author_para.add_run("*")
|
|
280
|
+
corr_run.font.superscript = True
|
|
281
|
+
|
|
234
282
|
author_para.paragraph_format.space_after = Pt(8)
|
|
235
283
|
|
|
236
284
|
# Add affiliations
|
|
237
|
-
if
|
|
238
|
-
for
|
|
285
|
+
if ordered_affiliations:
|
|
286
|
+
for affil_num, _affil_shortname, affil_text in ordered_affiliations:
|
|
239
287
|
affil_para = doc.add_paragraph()
|
|
240
288
|
|
|
241
289
|
# Add superscript number
|
|
242
|
-
num_run = affil_para.add_run(str(
|
|
290
|
+
num_run = affil_para.add_run(str(affil_num))
|
|
243
291
|
num_run.font.superscript = True
|
|
292
|
+
num_run.font.size = Pt(8)
|
|
244
293
|
|
|
245
294
|
# Add affiliation text
|
|
246
|
-
affil_para.add_run(f" {affil_text}")
|
|
295
|
+
affil_run = affil_para.add_run(f" {affil_text}")
|
|
296
|
+
affil_run.font.size = Pt(8)
|
|
247
297
|
affil_para.paragraph_format.space_after = Pt(4)
|
|
248
|
-
affil_para.runs[1].font.size = Pt(10)
|
|
249
298
|
|
|
250
299
|
# Extra space after last affiliation
|
|
251
300
|
affil_para.paragraph_format.space_after = Pt(12)
|
|
252
301
|
|
|
302
|
+
# Add co-first author information if any (already extracted by processor)
|
|
303
|
+
if cofirst_authors:
|
|
304
|
+
cofirst_para = doc.add_paragraph()
|
|
305
|
+
cofirst_marker = cofirst_para.add_run("†")
|
|
306
|
+
cofirst_marker.font.superscript = True
|
|
307
|
+
cofirst_marker.font.size = Pt(8)
|
|
308
|
+
|
|
309
|
+
cofirst_label = cofirst_para.add_run(" These authors contributed equally: ")
|
|
310
|
+
cofirst_label.font.size = Pt(8)
|
|
311
|
+
|
|
312
|
+
for i, author in enumerate(cofirst_authors):
|
|
313
|
+
if i > 0:
|
|
314
|
+
sep_run = cofirst_para.add_run(", ")
|
|
315
|
+
sep_run.font.size = Pt(8)
|
|
316
|
+
|
|
317
|
+
name = author.get("name", "")
|
|
318
|
+
name_run = cofirst_para.add_run(name)
|
|
319
|
+
name_run.font.size = Pt(8)
|
|
320
|
+
|
|
321
|
+
cofirst_para.paragraph_format.space_after = Pt(12)
|
|
322
|
+
|
|
323
|
+
# Add corresponding author information if any (already extracted by processor)
|
|
324
|
+
if corresponding_authors:
|
|
325
|
+
corr_para = doc.add_paragraph()
|
|
326
|
+
corr_marker = corr_para.add_run("*")
|
|
327
|
+
corr_marker.font.superscript = True
|
|
328
|
+
corr_marker.font.size = Pt(8)
|
|
329
|
+
|
|
330
|
+
corr_label = corr_para.add_run(" Correspondence: ")
|
|
331
|
+
corr_label.font.size = Pt(8)
|
|
332
|
+
|
|
333
|
+
for i, author in enumerate(corresponding_authors):
|
|
334
|
+
if i > 0:
|
|
335
|
+
sep_run = corr_para.add_run("; ")
|
|
336
|
+
sep_run.font.size = Pt(8)
|
|
337
|
+
|
|
338
|
+
name = author.get("name", "")
|
|
339
|
+
email = author.get("email", "")
|
|
340
|
+
|
|
341
|
+
# Decode email if it's base64 encoded
|
|
342
|
+
if not email:
|
|
343
|
+
email64 = author.get("email64", "")
|
|
344
|
+
if email64:
|
|
345
|
+
try:
|
|
346
|
+
email = base64.b64decode(email64).decode("utf-8")
|
|
347
|
+
except Exception:
|
|
348
|
+
email = ""
|
|
349
|
+
|
|
350
|
+
if email:
|
|
351
|
+
info_run = corr_para.add_run(f"{name} ({email})")
|
|
352
|
+
else:
|
|
353
|
+
info_run = corr_para.add_run(name)
|
|
354
|
+
info_run.font.size = Pt(8)
|
|
355
|
+
|
|
356
|
+
corr_para.paragraph_format.space_after = Pt(12)
|
|
357
|
+
|
|
253
358
|
def _add_section(
|
|
254
359
|
self,
|
|
255
360
|
doc: Document,
|
|
@@ -278,7 +383,8 @@ class DocxWriter:
|
|
|
278
383
|
elif section_type == "code_block":
|
|
279
384
|
self._add_code_block(doc, section)
|
|
280
385
|
elif section_type == "comment":
|
|
281
|
-
self.
|
|
386
|
+
if not self.hide_comments:
|
|
387
|
+
self._add_comment(doc, section)
|
|
282
388
|
elif section_type == "figure":
|
|
283
389
|
self._add_figure(doc, section)
|
|
284
390
|
elif section_type == "table":
|
|
@@ -304,6 +410,7 @@ class DocxWriter:
|
|
|
304
410
|
run = para.add_run(text)
|
|
305
411
|
run.bold = True
|
|
306
412
|
run.font.size = Pt(12)
|
|
413
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
|
|
307
414
|
|
|
308
415
|
def _add_heading(self, doc: Document, section: Dict[str, Any]):
|
|
309
416
|
"""Add heading to document.
|
|
@@ -314,7 +421,10 @@ class DocxWriter:
|
|
|
314
421
|
"""
|
|
315
422
|
level = section["level"]
|
|
316
423
|
text = section["text"]
|
|
317
|
-
doc.add_heading(text, level=level)
|
|
424
|
+
heading = doc.add_heading(text, level=level)
|
|
425
|
+
# Ensure heading text is black (not blue)
|
|
426
|
+
for run in heading.runs:
|
|
427
|
+
run.font.color.rgb = RGBColor(0, 0, 0) # Explicitly set to black
|
|
318
428
|
|
|
319
429
|
def _add_paragraph(
|
|
320
430
|
self,
|
|
@@ -371,9 +481,9 @@ class DocxWriter:
|
|
|
371
481
|
if run_data.get("xref"):
|
|
372
482
|
# Use color based on xref type (fig, sfig, stable, eq, etc.)
|
|
373
483
|
xref_type = run_data.get("xref_type", "cite")
|
|
374
|
-
run
|
|
484
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
375
485
|
if run_data.get("highlight_yellow"):
|
|
376
|
-
run
|
|
486
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
377
487
|
|
|
378
488
|
elif run_data["type"] == "hyperlink":
|
|
379
489
|
# Add hyperlink with yellow highlighting
|
|
@@ -387,18 +497,19 @@ class DocxWriter:
|
|
|
387
497
|
self._add_inline_equation(paragraph, latex_content)
|
|
388
498
|
|
|
389
499
|
elif run_data["type"] == "inline_comment":
|
|
390
|
-
# Add inline comment with gray highlighting
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
500
|
+
# Add inline comment with gray highlighting (unless hide_comments is enabled)
|
|
501
|
+
if not self.hide_comments:
|
|
502
|
+
comment_text = run_data["text"]
|
|
503
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
504
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
505
|
+
run.italic = True
|
|
506
|
+
run.font.size = Pt(10)
|
|
396
507
|
|
|
397
508
|
elif run_data["type"] == "citation":
|
|
398
509
|
cite_num = run_data["number"]
|
|
399
510
|
# Add citation as [NN] inline with yellow highlighting
|
|
400
511
|
run = paragraph.add_run(f"[{cite_num}]")
|
|
401
|
-
run
|
|
512
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
402
513
|
run.font.size = Pt(10)
|
|
403
514
|
|
|
404
515
|
def _add_list(self, doc: Document, section: Dict[str, Any]):
|
|
@@ -436,12 +547,12 @@ class DocxWriter:
|
|
|
436
547
|
if run_data.get("xref"):
|
|
437
548
|
# Use color based on xref type
|
|
438
549
|
xref_type = run_data.get("xref_type", "cite")
|
|
439
|
-
run
|
|
550
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
440
551
|
if run_data.get("highlight_yellow"):
|
|
441
|
-
run
|
|
552
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
442
553
|
run.font.size = Pt(10)
|
|
443
554
|
if run_data.get("highlight_yellow"):
|
|
444
|
-
run
|
|
555
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
445
556
|
elif run_data["type"] == "hyperlink":
|
|
446
557
|
text = run_data.get("text", "")
|
|
447
558
|
url = run_data.get("url", "")
|
|
@@ -451,18 +562,19 @@ class DocxWriter:
|
|
|
451
562
|
latex_content = run_data.get("latex", "")
|
|
452
563
|
self._add_inline_equation(paragraph, latex_content)
|
|
453
564
|
elif run_data["type"] == "inline_comment":
|
|
454
|
-
# Add inline comment with gray highlighting
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
565
|
+
# Add inline comment with gray highlighting (unless hide_comments is enabled)
|
|
566
|
+
if not self.hide_comments:
|
|
567
|
+
comment_text = run_data["text"]
|
|
568
|
+
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
569
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
570
|
+
run.italic = True
|
|
571
|
+
run.font.size = Pt(10)
|
|
460
572
|
elif run_data["type"] == "citation":
|
|
461
573
|
cite_num = run_data["number"]
|
|
462
574
|
run = paragraph.add_run(f"[{cite_num}]")
|
|
463
575
|
run.bold = True
|
|
464
576
|
run.font.size = Pt(10)
|
|
465
|
-
run
|
|
577
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
466
578
|
|
|
467
579
|
def _add_code_block(self, doc: Document, section: Dict[str, Any]):
|
|
468
580
|
"""Add code block to document.
|
|
@@ -495,7 +607,7 @@ class DocxWriter:
|
|
|
495
607
|
|
|
496
608
|
# Add comment text with light gray highlighting to distinguish from colored xrefs
|
|
497
609
|
run = paragraph.add_run(f"[Comment: {comment_text}]")
|
|
498
|
-
run
|
|
610
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
499
611
|
run.italic = True
|
|
500
612
|
run.font.size = Pt(10)
|
|
501
613
|
|
|
@@ -625,11 +737,11 @@ class DocxWriter:
|
|
|
625
737
|
else:
|
|
626
738
|
run = caption_para.add_run(f"Fig. {figure_number}. ")
|
|
627
739
|
run.bold = True
|
|
628
|
-
run.font.size = Pt(
|
|
740
|
+
run.font.size = Pt(8)
|
|
629
741
|
else:
|
|
630
742
|
run = caption_para.add_run("Figure: ")
|
|
631
743
|
run.bold = True
|
|
632
|
-
run.font.size = Pt(
|
|
744
|
+
run.font.size = Pt(8)
|
|
633
745
|
|
|
634
746
|
# Parse and add caption with inline formatting
|
|
635
747
|
# Import the processor to parse inline formatting
|
|
@@ -642,7 +754,7 @@ class DocxWriter:
|
|
|
642
754
|
if run_data["type"] == "text":
|
|
643
755
|
text = run_data["text"]
|
|
644
756
|
run = caption_para.add_run(text)
|
|
645
|
-
run.font.size = Pt(
|
|
757
|
+
run.font.size = Pt(8)
|
|
646
758
|
|
|
647
759
|
# Apply formatting
|
|
648
760
|
if run_data.get("bold"):
|
|
@@ -658,26 +770,27 @@ class DocxWriter:
|
|
|
658
770
|
if run_data.get("xref"):
|
|
659
771
|
# Use color based on xref type
|
|
660
772
|
xref_type = run_data.get("xref_type", "cite")
|
|
661
|
-
run
|
|
773
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
662
774
|
if run_data.get("highlight_yellow"):
|
|
663
|
-
run
|
|
775
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
664
776
|
elif run_data["type"] == "inline_equation":
|
|
665
777
|
# Add inline equation as Office Math
|
|
666
778
|
latex_content = run_data.get("latex", "")
|
|
667
779
|
self._add_inline_equation(caption_para, latex_content)
|
|
668
780
|
elif run_data["type"] == "inline_comment":
|
|
669
|
-
# Add inline comment with gray highlighting
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
781
|
+
# Add inline comment with gray highlighting (unless hide_comments is enabled)
|
|
782
|
+
if not self.hide_comments:
|
|
783
|
+
comment_text = run_data["text"]
|
|
784
|
+
run = caption_para.add_run(f"[Comment: {comment_text}]")
|
|
785
|
+
self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
|
|
786
|
+
run.italic = True
|
|
787
|
+
run.font.size = Pt(8)
|
|
675
788
|
elif run_data["type"] == "citation":
|
|
676
789
|
cite_num = run_data["number"]
|
|
677
790
|
run = caption_para.add_run(f"[{cite_num}]")
|
|
678
791
|
run.bold = True
|
|
679
|
-
run.font.size = Pt(
|
|
680
|
-
run
|
|
792
|
+
run.font.size = Pt(8)
|
|
793
|
+
self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
|
|
681
794
|
|
|
682
795
|
# Add spacing after figure (reduced from 12 to 6 for compactness)
|
|
683
796
|
caption_para.paragraph_format.space_after = Pt(6)
|
|
@@ -752,7 +865,7 @@ class DocxWriter:
|
|
|
752
865
|
if run_data.get("xref"):
|
|
753
866
|
# Use color based on xref type
|
|
754
867
|
xref_type = run_data.get("xref_type", "cite")
|
|
755
|
-
run
|
|
868
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
756
869
|
|
|
757
870
|
# Add table caption if present
|
|
758
871
|
caption = section.get("caption")
|
|
@@ -775,7 +888,7 @@ class DocxWriter:
|
|
|
775
888
|
# Fallback if label not in map
|
|
776
889
|
run = caption_para.add_run("Supp. Table: ")
|
|
777
890
|
run.bold = True
|
|
778
|
-
run.font.size = Pt(
|
|
891
|
+
run.font.size = Pt(8)
|
|
779
892
|
elif label and label.startswith("table:"):
|
|
780
893
|
# Extract label name for main tables
|
|
781
894
|
label_name = label.split(":", 1)[1] if ":" in label else label
|
|
@@ -786,7 +899,7 @@ class DocxWriter:
|
|
|
786
899
|
else:
|
|
787
900
|
run = caption_para.add_run("Table: ")
|
|
788
901
|
run.bold = True
|
|
789
|
-
run.font.size = Pt(
|
|
902
|
+
run.font.size = Pt(8)
|
|
790
903
|
|
|
791
904
|
# Parse and add caption with inline formatting
|
|
792
905
|
caption_runs = processor._parse_inline_formatting(caption, {})
|
|
@@ -794,7 +907,7 @@ class DocxWriter:
|
|
|
794
907
|
if run_data["type"] == "text":
|
|
795
908
|
text = run_data["text"]
|
|
796
909
|
run = caption_para.add_run(text)
|
|
797
|
-
run.font.size = Pt(
|
|
910
|
+
run.font.size = Pt(8)
|
|
798
911
|
if run_data.get("bold"):
|
|
799
912
|
run.bold = True
|
|
800
913
|
if run_data.get("italic"):
|
|
@@ -810,7 +923,7 @@ class DocxWriter:
|
|
|
810
923
|
if run_data.get("xref"):
|
|
811
924
|
# Use color based on xref type
|
|
812
925
|
xref_type = run_data.get("xref_type", "cite")
|
|
813
|
-
run
|
|
926
|
+
self._apply_highlight(run, self.get_xref_color(xref_type))
|
|
814
927
|
|
|
815
928
|
# Add spacing after table (reduced from 12 to 6 for compactness)
|
|
816
929
|
caption_para.paragraph_format.space_after = Pt(6)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
r"""LaTeX accent character to Unicode conversion map.
|
|
2
|
+
|
|
3
|
+
This module provides centralized mapping of LaTeX accent commands to their
|
|
4
|
+
Unicode equivalents. Used by both DOCX export and LaTeX processing to ensure
|
|
5
|
+
consistent character handling across formats.
|
|
6
|
+
|
|
7
|
+
Examples:
|
|
8
|
+
>>> clean_latex_accents("\\'e")
|
|
9
|
+
'é'
|
|
10
|
+
>>> clean_latex_accents("Calf{\\'e}")
|
|
11
|
+
'Café'
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Dict
|
|
15
|
+
|
|
16
|
+
# LaTeX accent commands to Unicode character mapping
|
|
17
|
+
# Handles both with and without backslashes (BibTeX parser may strip them)
|
|
18
|
+
# Also handles variant forms where backslash is replaced with the literal character
|
|
19
|
+
ACCENT_MAP: Dict[str, str] = {
|
|
20
|
+
# Acute accents (é, á, í, ó, ú) - use non-raw strings for single backslash
|
|
21
|
+
"\\'e": "é",
|
|
22
|
+
"{\\'e}": "é",
|
|
23
|
+
"{'e}": "é",
|
|
24
|
+
"{'é}": "é",
|
|
25
|
+
"\\'a": "á",
|
|
26
|
+
"{\\'a}": "á",
|
|
27
|
+
"{'a}": "á",
|
|
28
|
+
"{'á}": "á",
|
|
29
|
+
"\\'i": "í",
|
|
30
|
+
"{\\'i}": "í",
|
|
31
|
+
"{'i}": "í",
|
|
32
|
+
"{'í}": "í",
|
|
33
|
+
"'{\\i}": "í", # Acute on dotless i
|
|
34
|
+
"\\'o": "ó",
|
|
35
|
+
"{\\'o}": "ó",
|
|
36
|
+
"{'o}": "ó",
|
|
37
|
+
"{'ó}": "ó",
|
|
38
|
+
"'{o}": "ó", # Acute o (variant without backslash)
|
|
39
|
+
"\\'u": "ú",
|
|
40
|
+
"{\\'u}": "ú",
|
|
41
|
+
"{'u}": "ú",
|
|
42
|
+
"{'ú}": "ú",
|
|
43
|
+
# Uppercase acute accents
|
|
44
|
+
"\\'E": "É",
|
|
45
|
+
"{\\'E}": "É",
|
|
46
|
+
"{'E}": "É",
|
|
47
|
+
"\\'A": "Á",
|
|
48
|
+
"{\\'A}": "Á",
|
|
49
|
+
"{'A}": "Á",
|
|
50
|
+
"\\'I": "Í",
|
|
51
|
+
"{\\'I}": "Í",
|
|
52
|
+
"{'I}": "Í",
|
|
53
|
+
"'{\\I}": "Í", # Acute on uppercase dotless I
|
|
54
|
+
"\\'O": "Ó",
|
|
55
|
+
"{\\'O}": "Ó",
|
|
56
|
+
"{'O}": "Ó",
|
|
57
|
+
"'{O}": "Ó",
|
|
58
|
+
"\\'U": "Ú",
|
|
59
|
+
"{\\'U}": "Ú",
|
|
60
|
+
"{'U}": "Ú",
|
|
61
|
+
# Umlaut/diaeresis (ë, ä, ï, ö, ü)
|
|
62
|
+
'\\"e': "ë",
|
|
63
|
+
'{\\"e}': "ë",
|
|
64
|
+
'{"e}': "ë",
|
|
65
|
+
'{"ë}': "ë",
|
|
66
|
+
'\\"a': "ä",
|
|
67
|
+
'{\\"a}': "ä",
|
|
68
|
+
'{"a}': "ä",
|
|
69
|
+
'{"ä}': "ä",
|
|
70
|
+
'\\"i': "ï",
|
|
71
|
+
'{\\"i}': "ï",
|
|
72
|
+
'{"i}': "ï",
|
|
73
|
+
'{"ï}': "ï",
|
|
74
|
+
'\\"o': "ö",
|
|
75
|
+
'{\\"o}': "ö",
|
|
76
|
+
'{"o}': "ö",
|
|
77
|
+
'{"ö}': "ö",
|
|
78
|
+
'\\"u': "ü",
|
|
79
|
+
'{\\"u}': "ü",
|
|
80
|
+
'{"u}': "ü",
|
|
81
|
+
'{"ü}': "ü",
|
|
82
|
+
# Grave accents (è, à)
|
|
83
|
+
"\\`e": "è",
|
|
84
|
+
"{\\`e}": "è",
|
|
85
|
+
"{`e}": "è",
|
|
86
|
+
"{`è}": "è",
|
|
87
|
+
"\\`a": "à",
|
|
88
|
+
"{\\`a}": "à",
|
|
89
|
+
"{`a}": "à",
|
|
90
|
+
"{`à}": "à",
|
|
91
|
+
# Circumflex (ê, â)
|
|
92
|
+
"\\^e": "ê",
|
|
93
|
+
"{\\^e}": "ê",
|
|
94
|
+
"{^e}": "ê",
|
|
95
|
+
"{^ê}": "ê",
|
|
96
|
+
"\\^a": "â",
|
|
97
|
+
"{\\^a}": "â",
|
|
98
|
+
"{^a}": "â",
|
|
99
|
+
"{^â}": "â",
|
|
100
|
+
# Tilde (ñ, ã, õ)
|
|
101
|
+
"\\~n": "ñ",
|
|
102
|
+
"{\\~n}": "ñ",
|
|
103
|
+
"{~n}": "ñ",
|
|
104
|
+
"{~ñ}": "ñ",
|
|
105
|
+
"~{n}": "ñ",
|
|
106
|
+
"\\~a": "ã",
|
|
107
|
+
"{\\~a}": "ã",
|
|
108
|
+
"{~a}": "ã",
|
|
109
|
+
"~{a}": "ã", # Tilde on a (variant)
|
|
110
|
+
"{~ã}": "ã",
|
|
111
|
+
"\\~o": "õ",
|
|
112
|
+
"{\\~o}": "õ",
|
|
113
|
+
"{~o}": "õ",
|
|
114
|
+
"~{o}": "õ", # Tilde on o (variant)
|
|
115
|
+
"{~õ}": "õ",
|
|
116
|
+
# Uppercase tilde
|
|
117
|
+
"\\~N": "Ñ",
|
|
118
|
+
"{\\~N}": "Ñ",
|
|
119
|
+
"~{N}": "Ñ",
|
|
120
|
+
"\\~A": "Ã",
|
|
121
|
+
"{\\~A}": "Ã",
|
|
122
|
+
"~{A}": "Ã",
|
|
123
|
+
"\\~O": "Õ",
|
|
124
|
+
"{\\~O}": "Õ",
|
|
125
|
+
"~{O}": "Õ",
|
|
126
|
+
# Cedilla (ç)
|
|
127
|
+
"\\c{c}": "ç",
|
|
128
|
+
"{\\c{c}}": "ç",
|
|
129
|
+
"{\\c{ç}}": "ç",
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def clean_latex_accents(text: str) -> str:
|
|
134
|
+
r"""Convert LaTeX accent commands to Unicode characters.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
text: Text containing LaTeX accent commands
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Text with accent commands converted to Unicode
|
|
141
|
+
|
|
142
|
+
Examples:
|
|
143
|
+
>>> clean_latex_accents("Calf{\\'e}")
|
|
144
|
+
'Café'
|
|
145
|
+
>>> clean_latex_accents("Se\\~nor")
|
|
146
|
+
'Señor'
|
|
147
|
+
"""
|
|
148
|
+
for latex_cmd, unicode_char in ACCENT_MAP.items():
|
|
149
|
+
text = text.replace(latex_cmd, unicode_char)
|
|
150
|
+
return text
|