rxiv-maker 1.17.0__py3-none-any.whl → 1.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ This module handles the actual generation of DOCX files using python-docx,
4
4
  writing structured content with formatting, citations, and references.
5
5
  """
6
6
 
7
+ import base64
7
8
  from pathlib import Path
8
9
  from typing import Any, Dict, Optional
9
10
 
@@ -11,11 +12,12 @@ from docx import Document
11
12
  from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
12
13
  from docx.oxml import OxmlElement
13
14
  from docx.oxml.ns import qn
14
- from docx.shared import Inches, Pt
15
+ from docx.shared import Inches, Pt, RGBColor
15
16
  from latex2mathml.converter import convert as latex_to_mathml
16
17
  from lxml import etree
17
18
 
18
19
  from ..core.logging_config import get_logger
20
+ from ..utils.author_affiliation_processor import AuthorAffiliationProcessor
19
21
  from ..utils.docx_helpers import convert_pdf_to_image
20
22
 
21
23
  logger = get_logger()
@@ -57,6 +59,8 @@ class DocxWriter:
57
59
  metadata: Optional[Dict[str, Any]] = None,
58
60
  table_map: Optional[Dict[str, int]] = None,
59
61
  figures_at_end: bool = False,
62
+ hide_highlighting: bool = False,
63
+ hide_comments: bool = False,
60
64
  ) -> Path:
61
65
  """Write DOCX file from structured content.
62
66
 
@@ -69,6 +73,8 @@ class DocxWriter:
69
73
  metadata: Document metadata (title, authors, affiliations)
70
74
  table_map: Mapping from table labels to numbers (for supplementary tables)
71
75
  figures_at_end: Place main figures at end before SI/bibliography
76
+ hide_highlighting: Disable colored highlighting on references and citations
77
+ hide_comments: Exclude all comments (block and inline) from output
72
78
 
73
79
  Returns:
74
80
  Path to created DOCX file
@@ -77,8 +83,13 @@ class DocxWriter:
77
83
  self.bibliography = bibliography
78
84
  self.include_footnotes = include_footnotes
79
85
  self.table_map = table_map or {}
86
+ self.hide_highlighting = hide_highlighting
87
+ self.hide_comments = hide_comments
80
88
  doc = Document()
81
89
 
90
+ # Set default font to Arial for entire document
91
+ self._set_default_font(doc, "Arial")
92
+
82
93
  # Add title and author information if metadata provided
83
94
  if metadata:
84
95
  self._add_title_page(doc, metadata)
@@ -123,14 +134,18 @@ class DocxWriter:
123
134
  # Add collected main figures at the end (before bibliography)
124
135
  if figures_at_end and collected_main_figures:
125
136
  doc.add_page_break()
126
- doc.add_heading("Figures", level=1)
137
+ heading = doc.add_heading("Figures", level=1)
138
+ for run in heading.runs:
139
+ run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
127
140
  for section, fig_num in collected_main_figures:
128
141
  self._add_figure(doc, section, figure_number=fig_num, is_supplementary=False)
129
142
 
130
143
  # Add bibliography section at the end
131
144
  if include_footnotes and bibliography:
132
145
  doc.add_page_break()
133
- doc.add_heading("Bibliography", level=1)
146
+ heading = doc.add_heading("Bibliography", level=1)
147
+ for run in heading.runs:
148
+ run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
134
149
 
135
150
  # Add numbered bibliography entries
136
151
  for num in sorted(bibliography.keys()):
@@ -144,12 +159,12 @@ class DocxWriter:
144
159
  # Add formatted bibliography text (without DOI - added separately below)
145
160
  para.add_run(bib_entry["formatted"])
146
161
 
147
- # Add DOI as hyperlink with yellow highlighting if present
162
+ # Add DOI as hyperlink with yellow highlighting if present (unless hide_highlighting is enabled)
148
163
  if bib_entry.get("doi"):
149
164
  doi = bib_entry["doi"]
150
165
  doi_url = f"https://doi.org/{doi}" if not doi.startswith("http") else doi
151
166
  para.add_run("\nDOI: ")
152
- self._add_hyperlink(para, doi_url, doi_url, highlight=True)
167
+ self._add_hyperlink(para, doi_url, doi_url, highlight=not self.hide_highlighting)
153
168
 
154
169
  # Add spacing between entries
155
170
  para.paragraph_format.space_after = Pt(6)
@@ -158,6 +173,38 @@ class DocxWriter:
158
173
  doc.save(str(output_path))
159
174
  return output_path
160
175
 
176
+ def _set_default_font(self, doc: Document, font_name: str):
177
+ """Set the default font for the entire document.
178
+
179
+ Args:
180
+ doc: Document object
181
+ font_name: Font name to use (e.g., "Arial", "Times New Roman")
182
+ """
183
+ # Set font on Normal style (base style for most content)
184
+ style = doc.styles["Normal"]
185
+ font = style.font
186
+ font.name = font_name
187
+ font.size = Pt(10) # Default body font size
188
+
189
+ # Also set on heading styles to ensure consistency
190
+ for i in range(1, 10):
191
+ try:
192
+ heading_style = doc.styles[f"Heading {i}"]
193
+ heading_style.font.name = font_name
194
+ except KeyError:
195
+ # Heading style doesn't exist, skip
196
+ pass
197
+
198
+ def _apply_highlight(self, run, color: WD_COLOR_INDEX):
199
+ """Apply highlight color to a run, unless highlighting is disabled.
200
+
201
+ Args:
202
+ run: The run object to apply highlighting to
203
+ color: The WD_COLOR_INDEX color to apply
204
+ """
205
+ if not self.hide_highlighting:
206
+ run.font.highlight_color = color
207
+
161
208
  def _add_title_page(self, doc: Document, metadata: Dict[str, Any]):
162
209
  """Add title, author and affiliation information.
163
210
 
@@ -193,27 +240,16 @@ class DocxWriter:
193
240
  if not authors:
194
241
  return # Nothing more to add
195
242
 
196
- # Collect unique affiliations and build mapping
197
- all_affiliations = []
198
- affiliation_map = {} # Maps affiliation shortname to number
199
-
200
- # Get full affiliation details from metadata
201
- affiliation_details = {a.get("shortname"): a for a in metadata.get("affiliations", [])}
202
-
203
- for author in authors:
204
- author_affils = author.get("affiliations", [])
205
- for affil_shortname in author_affils:
206
- if affil_shortname not in affiliation_map:
207
- affiliation_map[affil_shortname] = len(affiliation_map) + 1
208
- # Look up full affiliation info
209
- affil_info = affiliation_details.get(affil_shortname, {})
210
- full_name = affil_info.get("full_name", affil_shortname)
211
- location = affil_info.get("location", "")
212
- # Format: "Full Name, Location" or just "Full Name" if no location
213
- affil_text = f"{full_name}, {location}" if location else full_name
214
- all_affiliations.append(affil_text)
215
-
216
- # Add authors with superscript affiliation numbers
243
+ # Process author and affiliation metadata using centralized processor
244
+ processor = AuthorAffiliationProcessor()
245
+ processed = processor.process(metadata)
246
+
247
+ affiliation_map = processed["affiliation_map"]
248
+ ordered_affiliations = processed["ordered_affiliations"]
249
+ cofirst_authors = processed["cofirst_authors"]
250
+ corresponding_authors = processed["corresponding_authors"]
251
+
252
+ # Add authors with superscript affiliation numbers and corresponding author markers
217
253
  if authors:
218
254
  author_para = doc.add_paragraph()
219
255
  for i, author in enumerate(authors):
@@ -231,25 +267,94 @@ class DocxWriter:
231
267
  sup_run = author_para.add_run(",".join(affil_nums))
232
268
  sup_run.font.superscript = True
233
269
 
270
+ # Add co-first author marker (dagger) if applicable
271
+ is_cofirst = author.get("co_first_author", False)
272
+ if is_cofirst:
273
+ cofirst_run = author_para.add_run("†")
274
+ cofirst_run.font.superscript = True
275
+
276
+ # Add corresponding author marker (asterisk) if applicable
277
+ is_corresponding = author.get("corresponding_author", False)
278
+ if is_corresponding:
279
+ corr_run = author_para.add_run("*")
280
+ corr_run.font.superscript = True
281
+
234
282
  author_para.paragraph_format.space_after = Pt(8)
235
283
 
236
284
  # Add affiliations
237
- if all_affiliations:
238
- for i, affil_text in enumerate(all_affiliations, start=1):
285
+ if ordered_affiliations:
286
+ for affil_num, _affil_shortname, affil_text in ordered_affiliations:
239
287
  affil_para = doc.add_paragraph()
240
288
 
241
289
  # Add superscript number
242
- num_run = affil_para.add_run(str(i))
290
+ num_run = affil_para.add_run(str(affil_num))
243
291
  num_run.font.superscript = True
292
+ num_run.font.size = Pt(8)
244
293
 
245
294
  # Add affiliation text
246
- affil_para.add_run(f" {affil_text}")
295
+ affil_run = affil_para.add_run(f" {affil_text}")
296
+ affil_run.font.size = Pt(8)
247
297
  affil_para.paragraph_format.space_after = Pt(4)
248
- affil_para.runs[1].font.size = Pt(10)
249
298
 
250
299
  # Extra space after last affiliation
251
300
  affil_para.paragraph_format.space_after = Pt(12)
252
301
 
302
+ # Add co-first author information if any (already extracted by processor)
303
+ if cofirst_authors:
304
+ cofirst_para = doc.add_paragraph()
305
+ cofirst_marker = cofirst_para.add_run("†")
306
+ cofirst_marker.font.superscript = True
307
+ cofirst_marker.font.size = Pt(8)
308
+
309
+ cofirst_label = cofirst_para.add_run(" These authors contributed equally: ")
310
+ cofirst_label.font.size = Pt(8)
311
+
312
+ for i, author in enumerate(cofirst_authors):
313
+ if i > 0:
314
+ sep_run = cofirst_para.add_run(", ")
315
+ sep_run.font.size = Pt(8)
316
+
317
+ name = author.get("name", "")
318
+ name_run = cofirst_para.add_run(name)
319
+ name_run.font.size = Pt(8)
320
+
321
+ cofirst_para.paragraph_format.space_after = Pt(12)
322
+
323
+ # Add corresponding author information if any (already extracted by processor)
324
+ if corresponding_authors:
325
+ corr_para = doc.add_paragraph()
326
+ corr_marker = corr_para.add_run("*")
327
+ corr_marker.font.superscript = True
328
+ corr_marker.font.size = Pt(8)
329
+
330
+ corr_label = corr_para.add_run(" Correspondence: ")
331
+ corr_label.font.size = Pt(8)
332
+
333
+ for i, author in enumerate(corresponding_authors):
334
+ if i > 0:
335
+ sep_run = corr_para.add_run("; ")
336
+ sep_run.font.size = Pt(8)
337
+
338
+ name = author.get("name", "")
339
+ email = author.get("email", "")
340
+
341
+ # Decode email if it's base64 encoded
342
+ if not email:
343
+ email64 = author.get("email64", "")
344
+ if email64:
345
+ try:
346
+ email = base64.b64decode(email64).decode("utf-8")
347
+ except Exception:
348
+ email = ""
349
+
350
+ if email:
351
+ info_run = corr_para.add_run(f"{name} ({email})")
352
+ else:
353
+ info_run = corr_para.add_run(name)
354
+ info_run.font.size = Pt(8)
355
+
356
+ corr_para.paragraph_format.space_after = Pt(12)
357
+
253
358
  def _add_section(
254
359
  self,
255
360
  doc: Document,
@@ -278,7 +383,8 @@ class DocxWriter:
278
383
  elif section_type == "code_block":
279
384
  self._add_code_block(doc, section)
280
385
  elif section_type == "comment":
281
- self._add_comment(doc, section)
386
+ if not self.hide_comments:
387
+ self._add_comment(doc, section)
282
388
  elif section_type == "figure":
283
389
  self._add_figure(doc, section)
284
390
  elif section_type == "table":
@@ -304,6 +410,7 @@ class DocxWriter:
304
410
  run = para.add_run(text)
305
411
  run.bold = True
306
412
  run.font.size = Pt(12)
413
+ run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
307
414
 
308
415
  def _add_heading(self, doc: Document, section: Dict[str, Any]):
309
416
  """Add heading to document.
@@ -314,7 +421,10 @@ class DocxWriter:
314
421
  """
315
422
  level = section["level"]
316
423
  text = section["text"]
317
- doc.add_heading(text, level=level)
424
+ heading = doc.add_heading(text, level=level)
425
+ # Ensure heading text is black (not blue)
426
+ for run in heading.runs:
427
+ run.font.color.rgb = RGBColor(0, 0, 0) # Explicitly set to black
318
428
 
319
429
  def _add_paragraph(
320
430
  self,
@@ -371,9 +481,9 @@ class DocxWriter:
371
481
  if run_data.get("xref"):
372
482
  # Use color based on xref type (fig, sfig, stable, eq, etc.)
373
483
  xref_type = run_data.get("xref_type", "cite")
374
- run.font.highlight_color = self.get_xref_color(xref_type)
484
+ self._apply_highlight(run, self.get_xref_color(xref_type))
375
485
  if run_data.get("highlight_yellow"):
376
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
486
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
377
487
 
378
488
  elif run_data["type"] == "hyperlink":
379
489
  # Add hyperlink with yellow highlighting
@@ -387,18 +497,19 @@ class DocxWriter:
387
497
  self._add_inline_equation(paragraph, latex_content)
388
498
 
389
499
  elif run_data["type"] == "inline_comment":
390
- # Add inline comment with gray highlighting
391
- comment_text = run_data["text"]
392
- run = paragraph.add_run(f"[Comment: {comment_text}]")
393
- run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
394
- run.italic = True
395
- run.font.size = Pt(10)
500
+ # Add inline comment with gray highlighting (unless hide_comments is enabled)
501
+ if not self.hide_comments:
502
+ comment_text = run_data["text"]
503
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
504
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
505
+ run.italic = True
506
+ run.font.size = Pt(10)
396
507
 
397
508
  elif run_data["type"] == "citation":
398
509
  cite_num = run_data["number"]
399
510
  # Add citation as [NN] inline with yellow highlighting
400
511
  run = paragraph.add_run(f"[{cite_num}]")
401
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
512
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
402
513
  run.font.size = Pt(10)
403
514
 
404
515
  def _add_list(self, doc: Document, section: Dict[str, Any]):
@@ -436,12 +547,12 @@ class DocxWriter:
436
547
  if run_data.get("xref"):
437
548
  # Use color based on xref type
438
549
  xref_type = run_data.get("xref_type", "cite")
439
- run.font.highlight_color = self.get_xref_color(xref_type)
550
+ self._apply_highlight(run, self.get_xref_color(xref_type))
440
551
  if run_data.get("highlight_yellow"):
441
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
552
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
442
553
  run.font.size = Pt(10)
443
554
  if run_data.get("highlight_yellow"):
444
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
555
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
445
556
  elif run_data["type"] == "hyperlink":
446
557
  text = run_data.get("text", "")
447
558
  url = run_data.get("url", "")
@@ -451,18 +562,19 @@ class DocxWriter:
451
562
  latex_content = run_data.get("latex", "")
452
563
  self._add_inline_equation(paragraph, latex_content)
453
564
  elif run_data["type"] == "inline_comment":
454
- # Add inline comment with gray highlighting
455
- comment_text = run_data["text"]
456
- run = paragraph.add_run(f"[Comment: {comment_text}]")
457
- run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
458
- run.italic = True
459
- run.font.size = Pt(10)
565
+ # Add inline comment with gray highlighting (unless hide_comments is enabled)
566
+ if not self.hide_comments:
567
+ comment_text = run_data["text"]
568
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
569
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
570
+ run.italic = True
571
+ run.font.size = Pt(10)
460
572
  elif run_data["type"] == "citation":
461
573
  cite_num = run_data["number"]
462
574
  run = paragraph.add_run(f"[{cite_num}]")
463
575
  run.bold = True
464
576
  run.font.size = Pt(10)
465
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
577
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
466
578
 
467
579
  def _add_code_block(self, doc: Document, section: Dict[str, Any]):
468
580
  """Add code block to document.
@@ -495,7 +607,7 @@ class DocxWriter:
495
607
 
496
608
  # Add comment text with light gray highlighting to distinguish from colored xrefs
497
609
  run = paragraph.add_run(f"[Comment: {comment_text}]")
498
- run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
610
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
499
611
  run.italic = True
500
612
  run.font.size = Pt(10)
501
613
 
@@ -625,11 +737,11 @@ class DocxWriter:
625
737
  else:
626
738
  run = caption_para.add_run(f"Fig. {figure_number}. ")
627
739
  run.bold = True
628
- run.font.size = Pt(7)
740
+ run.font.size = Pt(8)
629
741
  else:
630
742
  run = caption_para.add_run("Figure: ")
631
743
  run.bold = True
632
- run.font.size = Pt(7)
744
+ run.font.size = Pt(8)
633
745
 
634
746
  # Parse and add caption with inline formatting
635
747
  # Import the processor to parse inline formatting
@@ -642,7 +754,7 @@ class DocxWriter:
642
754
  if run_data["type"] == "text":
643
755
  text = run_data["text"]
644
756
  run = caption_para.add_run(text)
645
- run.font.size = Pt(7)
757
+ run.font.size = Pt(8)
646
758
 
647
759
  # Apply formatting
648
760
  if run_data.get("bold"):
@@ -658,26 +770,27 @@ class DocxWriter:
658
770
  if run_data.get("xref"):
659
771
  # Use color based on xref type
660
772
  xref_type = run_data.get("xref_type", "cite")
661
- run.font.highlight_color = self.get_xref_color(xref_type)
773
+ self._apply_highlight(run, self.get_xref_color(xref_type))
662
774
  if run_data.get("highlight_yellow"):
663
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
775
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
664
776
  elif run_data["type"] == "inline_equation":
665
777
  # Add inline equation as Office Math
666
778
  latex_content = run_data.get("latex", "")
667
779
  self._add_inline_equation(caption_para, latex_content)
668
780
  elif run_data["type"] == "inline_comment":
669
- # Add inline comment with gray highlighting
670
- comment_text = run_data["text"]
671
- run = caption_para.add_run(f"[Comment: {comment_text}]")
672
- run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
673
- run.italic = True
674
- run.font.size = Pt(7)
781
+ # Add inline comment with gray highlighting (unless hide_comments is enabled)
782
+ if not self.hide_comments:
783
+ comment_text = run_data["text"]
784
+ run = caption_para.add_run(f"[Comment: {comment_text}]")
785
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
786
+ run.italic = True
787
+ run.font.size = Pt(8)
675
788
  elif run_data["type"] == "citation":
676
789
  cite_num = run_data["number"]
677
790
  run = caption_para.add_run(f"[{cite_num}]")
678
791
  run.bold = True
679
- run.font.size = Pt(7)
680
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
792
+ run.font.size = Pt(8)
793
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
681
794
 
682
795
  # Add spacing after figure (reduced from 12 to 6 for compactness)
683
796
  caption_para.paragraph_format.space_after = Pt(6)
@@ -752,7 +865,7 @@ class DocxWriter:
752
865
  if run_data.get("xref"):
753
866
  # Use color based on xref type
754
867
  xref_type = run_data.get("xref_type", "cite")
755
- run.font.highlight_color = self.get_xref_color(xref_type)
868
+ self._apply_highlight(run, self.get_xref_color(xref_type))
756
869
 
757
870
  # Add table caption if present
758
871
  caption = section.get("caption")
@@ -775,7 +888,7 @@ class DocxWriter:
775
888
  # Fallback if label not in map
776
889
  run = caption_para.add_run("Supp. Table: ")
777
890
  run.bold = True
778
- run.font.size = Pt(7)
891
+ run.font.size = Pt(8)
779
892
  elif label and label.startswith("table:"):
780
893
  # Extract label name for main tables
781
894
  label_name = label.split(":", 1)[1] if ":" in label else label
@@ -786,7 +899,7 @@ class DocxWriter:
786
899
  else:
787
900
  run = caption_para.add_run("Table: ")
788
901
  run.bold = True
789
- run.font.size = Pt(7)
902
+ run.font.size = Pt(8)
790
903
 
791
904
  # Parse and add caption with inline formatting
792
905
  caption_runs = processor._parse_inline_formatting(caption, {})
@@ -794,7 +907,7 @@ class DocxWriter:
794
907
  if run_data["type"] == "text":
795
908
  text = run_data["text"]
796
909
  run = caption_para.add_run(text)
797
- run.font.size = Pt(7)
910
+ run.font.size = Pt(8)
798
911
  if run_data.get("bold"):
799
912
  run.bold = True
800
913
  if run_data.get("italic"):
@@ -810,7 +923,7 @@ class DocxWriter:
810
923
  if run_data.get("xref"):
811
924
  # Use color based on xref type
812
925
  xref_type = run_data.get("xref_type", "cite")
813
- run.font.highlight_color = self.get_xref_color(xref_type)
926
+ self._apply_highlight(run, self.get_xref_color(xref_type))
814
927
 
815
928
  # Add spacing after table (reduced from 12 to 6 for compactness)
816
929
  caption_para.paragraph_format.space_after = Pt(6)
@@ -0,0 +1,150 @@
1
+ r"""LaTeX accent character to Unicode conversion map.
2
+
3
+ This module provides centralized mapping of LaTeX accent commands to their
4
+ Unicode equivalents. Used by both DOCX export and LaTeX processing to ensure
5
+ consistent character handling across formats.
6
+
7
+ Examples:
8
+ >>> clean_latex_accents("\\'e")
9
+ 'é'
10
+ >>> clean_latex_accents("Calf{\\'e}")
11
+ 'Café'
12
+ """
13
+
14
+ from typing import Dict
15
+
16
+ # LaTeX accent commands to Unicode character mapping
17
+ # Handles both with and without backslashes (BibTeX parser may strip them)
18
+ # Also handles variant forms where backslash is replaced with the literal character
19
+ ACCENT_MAP: Dict[str, str] = {
20
+ # Acute accents (é, á, í, ó, ú) - use non-raw strings for single backslash
21
+ "\\'e": "é",
22
+ "{\\'e}": "é",
23
+ "{'e}": "é",
24
+ "{'é}": "é",
25
+ "\\'a": "á",
26
+ "{\\'a}": "á",
27
+ "{'a}": "á",
28
+ "{'á}": "á",
29
+ "\\'i": "í",
30
+ "{\\'i}": "í",
31
+ "{'i}": "í",
32
+ "{'í}": "í",
33
+ "'{\\i}": "í", # Acute on dotless i
34
+ "\\'o": "ó",
35
+ "{\\'o}": "ó",
36
+ "{'o}": "ó",
37
+ "{'ó}": "ó",
38
+ "'{o}": "ó", # Acute o (variant without backslash)
39
+ "\\'u": "ú",
40
+ "{\\'u}": "ú",
41
+ "{'u}": "ú",
42
+ "{'ú}": "ú",
43
+ # Uppercase acute accents
44
+ "\\'E": "É",
45
+ "{\\'E}": "É",
46
+ "{'E}": "É",
47
+ "\\'A": "Á",
48
+ "{\\'A}": "Á",
49
+ "{'A}": "Á",
50
+ "\\'I": "Í",
51
+ "{\\'I}": "Í",
52
+ "{'I}": "Í",
53
+ "'{\\I}": "Í", # Acute on uppercase dotless I
54
+ "\\'O": "Ó",
55
+ "{\\'O}": "Ó",
56
+ "{'O}": "Ó",
57
+ "'{O}": "Ó",
58
+ "\\'U": "Ú",
59
+ "{\\'U}": "Ú",
60
+ "{'U}": "Ú",
61
+ # Umlaut/diaeresis (ë, ä, ï, ö, ü)
62
+ '\\"e': "ë",
63
+ '{\\"e}': "ë",
64
+ '{"e}': "ë",
65
+ '{"ë}': "ë",
66
+ '\\"a': "ä",
67
+ '{\\"a}': "ä",
68
+ '{"a}': "ä",
69
+ '{"ä}': "ä",
70
+ '\\"i': "ï",
71
+ '{\\"i}': "ï",
72
+ '{"i}': "ï",
73
+ '{"ï}': "ï",
74
+ '\\"o': "ö",
75
+ '{\\"o}': "ö",
76
+ '{"o}': "ö",
77
+ '{"ö}': "ö",
78
+ '\\"u': "ü",
79
+ '{\\"u}': "ü",
80
+ '{"u}': "ü",
81
+ '{"ü}': "ü",
82
+ # Grave accents (è, à)
83
+ "\\`e": "è",
84
+ "{\\`e}": "è",
85
+ "{`e}": "è",
86
+ "{`è}": "è",
87
+ "\\`a": "à",
88
+ "{\\`a}": "à",
89
+ "{`a}": "à",
90
+ "{`à}": "à",
91
+ # Circumflex (ê, â)
92
+ "\\^e": "ê",
93
+ "{\\^e}": "ê",
94
+ "{^e}": "ê",
95
+ "{^ê}": "ê",
96
+ "\\^a": "â",
97
+ "{\\^a}": "â",
98
+ "{^a}": "â",
99
+ "{^â}": "â",
100
+ # Tilde (ñ, ã, õ)
101
+ "\\~n": "ñ",
102
+ "{\\~n}": "ñ",
103
+ "{~n}": "ñ",
104
+ "{~ñ}": "ñ",
105
+ "~{n}": "ñ",
106
+ "\\~a": "ã",
107
+ "{\\~a}": "ã",
108
+ "{~a}": "ã",
109
+ "~{a}": "ã", # Tilde on a (variant)
110
+ "{~ã}": "ã",
111
+ "\\~o": "õ",
112
+ "{\\~o}": "õ",
113
+ "{~o}": "õ",
114
+ "~{o}": "õ", # Tilde on o (variant)
115
+ "{~õ}": "õ",
116
+ # Uppercase tilde
117
+ "\\~N": "Ñ",
118
+ "{\\~N}": "Ñ",
119
+ "~{N}": "Ñ",
120
+ "\\~A": "Ã",
121
+ "{\\~A}": "Ã",
122
+ "~{A}": "Ã",
123
+ "\\~O": "Õ",
124
+ "{\\~O}": "Õ",
125
+ "~{O}": "Õ",
126
+ # Cedilla (ç)
127
+ "\\c{c}": "ç",
128
+ "{\\c{c}}": "ç",
129
+ "{\\c{ç}}": "ç",
130
+ }
131
+
132
+
133
+ def clean_latex_accents(text: str) -> str:
134
+ r"""Convert LaTeX accent commands to Unicode characters.
135
+
136
+ Args:
137
+ text: Text containing LaTeX accent commands
138
+
139
+ Returns:
140
+ Text with accent commands converted to Unicode
141
+
142
+ Examples:
143
+ >>> clean_latex_accents("Calf{\\'e}")
144
+ 'Café'
145
+ >>> clean_latex_accents("Se\\~nor")
146
+ 'Señor'
147
+ """
148
+ for latex_cmd, unicode_char in ACCENT_MAP.items():
149
+ text = text.replace(latex_cmd, unicode_char)
150
+ return text