rxiv-maker 1.16.8__py3-none-any.whl → 1.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ This module handles the actual generation of DOCX files using python-docx,
4
4
  writing structured content with formatting, citations, and references.
5
5
  """
6
6
 
7
+ import base64
7
8
  from pathlib import Path
8
9
  from typing import Any, Dict, Optional
9
10
 
@@ -11,11 +12,12 @@ from docx import Document
11
12
  from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_COLOR_INDEX
12
13
  from docx.oxml import OxmlElement
13
14
  from docx.oxml.ns import qn
14
- from docx.shared import Inches, Pt
15
+ from docx.shared import Inches, Pt, RGBColor
15
16
  from latex2mathml.converter import convert as latex_to_mathml
16
17
  from lxml import etree
17
18
 
18
19
  from ..core.logging_config import get_logger
20
+ from ..utils.author_affiliation_processor import AuthorAffiliationProcessor
19
21
  from ..utils.docx_helpers import convert_pdf_to_image
20
22
 
21
23
  logger = get_logger()
@@ -24,6 +26,29 @@ logger = get_logger()
24
26
  class DocxWriter:
25
27
  """Writes structured content to DOCX files using python-docx."""
26
28
 
29
+ # Color mapping for different reference types
30
+ XREF_COLORS = {
31
+ "fig": WD_COLOR_INDEX.BRIGHT_GREEN, # Figures (bright green - lighter)
32
+ "sfig": WD_COLOR_INDEX.TURQUOISE, # Supplementary figures (turquoise - lighter cyan)
33
+ "stable": WD_COLOR_INDEX.TURQUOISE, # Supplementary tables (turquoise - lighter cyan)
34
+ "table": WD_COLOR_INDEX.BLUE, # Main tables
35
+ "eq": WD_COLOR_INDEX.VIOLET, # Equations
36
+ "snote": WD_COLOR_INDEX.TURQUOISE, # Supplementary notes (turquoise - lighter cyan)
37
+ "cite": WD_COLOR_INDEX.YELLOW, # Citations (yellow)
38
+ }
39
+
40
+ @staticmethod
41
+ def get_xref_color(xref_type: str):
42
+ """Get highlight color for a cross-reference type.
43
+
44
+ Args:
45
+ xref_type: Type of cross-reference (fig, sfig, stable, table, eq, snote, cite)
46
+
47
+ Returns:
48
+ WD_COLOR_INDEX color for the xref type, or YELLOW as default
49
+ """
50
+ return DocxWriter.XREF_COLORS.get(xref_type, WD_COLOR_INDEX.YELLOW)
51
+
27
52
  def write(
28
53
  self,
29
54
  doc_structure: Dict[str, Any],
@@ -32,6 +57,10 @@ class DocxWriter:
32
57
  include_footnotes: bool = True,
33
58
  base_path: Optional[Path] = None,
34
59
  metadata: Optional[Dict[str, Any]] = None,
60
+ table_map: Optional[Dict[str, int]] = None,
61
+ figures_at_end: bool = False,
62
+ hide_highlighting: bool = False,
63
+ hide_comments: bool = False,
35
64
  ) -> Path:
36
65
  """Write DOCX file from structured content.
37
66
 
@@ -42,6 +71,10 @@ class DocxWriter:
42
71
  include_footnotes: Whether to add DOI footnotes
43
72
  base_path: Base path for resolving relative figure paths
44
73
  metadata: Document metadata (title, authors, affiliations)
74
+ table_map: Mapping from table labels to numbers (for supplementary tables)
75
+ figures_at_end: Place main figures at end before SI/bibliography
76
+ hide_highlighting: Disable colored highlighting on references and citations
77
+ hide_comments: Exclude all comments (block and inline) from output
45
78
 
46
79
  Returns:
47
80
  Path to created DOCX file
@@ -49,8 +82,14 @@ class DocxWriter:
49
82
  self.base_path = base_path or Path.cwd()
50
83
  self.bibliography = bibliography
51
84
  self.include_footnotes = include_footnotes
85
+ self.table_map = table_map or {}
86
+ self.hide_highlighting = hide_highlighting
87
+ self.hide_comments = hide_comments
52
88
  doc = Document()
53
89
 
90
+ # Set default font to Arial for entire document
91
+ self._set_default_font(doc, "Arial")
92
+
54
93
  # Add title and author information if metadata provided
55
94
  if metadata:
56
95
  self._add_title_page(doc, metadata)
@@ -69,19 +108,44 @@ class DocxWriter:
69
108
  # Store figure map for use in text processing
70
109
  self.figure_map = figure_map
71
110
 
72
- # Process each section INCLUDING figures inline
111
+ # Collect main figures if figures_at_end is True
112
+ collected_main_figures = []
113
+
114
+ # Process each section
73
115
  figure_counter = 0
116
+ sfigure_counter = 0
74
117
  for section in doc_structure["sections"]:
75
118
  if section["type"] == "figure":
76
- figure_counter += 1
77
- self._add_figure(doc, section, figure_number=figure_counter)
119
+ is_supplementary = section.get("is_supplementary", False)
120
+ if is_supplementary:
121
+ # Supplementary figures always go inline (in SI section)
122
+ sfigure_counter += 1
123
+ self._add_figure(doc, section, figure_number=sfigure_counter, is_supplementary=True)
124
+ else:
125
+ # Main figures: collect if figures_at_end, otherwise add inline
126
+ figure_counter += 1
127
+ if figures_at_end:
128
+ collected_main_figures.append((section, figure_counter))
129
+ else:
130
+ self._add_figure(doc, section, figure_number=figure_counter, is_supplementary=False)
78
131
  else:
79
132
  self._add_section(doc, section, bibliography, include_footnotes)
80
133
 
134
+ # Add collected main figures at the end (before bibliography)
135
+ if figures_at_end and collected_main_figures:
136
+ doc.add_page_break()
137
+ heading = doc.add_heading("Figures", level=1)
138
+ for run in heading.runs:
139
+ run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
140
+ for section, fig_num in collected_main_figures:
141
+ self._add_figure(doc, section, figure_number=fig_num, is_supplementary=False)
142
+
81
143
  # Add bibliography section at the end
82
144
  if include_footnotes and bibliography:
83
145
  doc.add_page_break()
84
- doc.add_heading("Bibliography", level=1)
146
+ heading = doc.add_heading("Bibliography", level=1)
147
+ for run in heading.runs:
148
+ run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
85
149
 
86
150
  # Add numbered bibliography entries
87
151
  for num in sorted(bibliography.keys()):
@@ -92,15 +156,15 @@ class DocxWriter:
92
156
  num_run = para.add_run(f"[{num}] ")
93
157
  num_run.bold = True
94
158
 
95
- # Add formatted bibliography text (slim format)
159
+ # Add formatted bibliography text (without DOI - added separately below)
96
160
  para.add_run(bib_entry["formatted"])
97
161
 
98
- # Add DOI as hyperlink with yellow highlighting if present
162
+ # Add DOI as hyperlink with yellow highlighting if present (unless hide_highlighting is enabled)
99
163
  if bib_entry.get("doi"):
100
164
  doi = bib_entry["doi"]
101
165
  doi_url = f"https://doi.org/{doi}" if not doi.startswith("http") else doi
102
- para.add_run(" ")
103
- self._add_hyperlink(para, doi_url, doi_url, highlight=True)
166
+ para.add_run("\nDOI: ")
167
+ self._add_hyperlink(para, doi_url, doi_url, highlight=not self.hide_highlighting)
104
168
 
105
169
  # Add spacing between entries
106
170
  para.paragraph_format.space_after = Pt(6)
@@ -109,6 +173,38 @@ class DocxWriter:
109
173
  doc.save(str(output_path))
110
174
  return output_path
111
175
 
176
+ def _set_default_font(self, doc: Document, font_name: str):
177
+ """Set the default font for the entire document.
178
+
179
+ Args:
180
+ doc: Document object
181
+ font_name: Font name to use (e.g., "Arial", "Times New Roman")
182
+ """
183
+ # Set font on Normal style (base style for most content)
184
+ style = doc.styles["Normal"]
185
+ font = style.font
186
+ font.name = font_name
187
+ font.size = Pt(10) # Default body font size
188
+
189
+ # Also set on heading styles to ensure consistency
190
+ for i in range(1, 10):
191
+ try:
192
+ heading_style = doc.styles[f"Heading {i}"]
193
+ heading_style.font.name = font_name
194
+ except KeyError:
195
+ # Heading style doesn't exist, skip
196
+ pass
197
+
198
+ def _apply_highlight(self, run, color: WD_COLOR_INDEX):
199
+ """Apply highlight color to a run, unless highlighting is disabled.
200
+
201
+ Args:
202
+ run: The run object to apply highlighting to
203
+ color: The WD_COLOR_INDEX color to apply
204
+ """
205
+ if not self.hide_highlighting:
206
+ run.font.highlight_color = color
207
+
112
208
  def _add_title_page(self, doc: Document, metadata: Dict[str, Any]):
113
209
  """Add title, author and affiliation information.
114
210
 
@@ -144,27 +240,16 @@ class DocxWriter:
144
240
  if not authors:
145
241
  return # Nothing more to add
146
242
 
147
- # Collect unique affiliations and build mapping
148
- all_affiliations = []
149
- affiliation_map = {} # Maps affiliation shortname to number
150
-
151
- # Get full affiliation details from metadata
152
- affiliation_details = {a.get("shortname"): a for a in metadata.get("affiliations", [])}
153
-
154
- for author in authors:
155
- author_affils = author.get("affiliations", [])
156
- for affil_shortname in author_affils:
157
- if affil_shortname not in affiliation_map:
158
- affiliation_map[affil_shortname] = len(affiliation_map) + 1
159
- # Look up full affiliation info
160
- affil_info = affiliation_details.get(affil_shortname, {})
161
- full_name = affil_info.get("full_name", affil_shortname)
162
- location = affil_info.get("location", "")
163
- # Format: "Full Name, Location" or just "Full Name" if no location
164
- affil_text = f"{full_name}, {location}" if location else full_name
165
- all_affiliations.append(affil_text)
166
-
167
- # Add authors with superscript affiliation numbers
243
+ # Process author and affiliation metadata using centralized processor
244
+ processor = AuthorAffiliationProcessor()
245
+ processed = processor.process(metadata)
246
+
247
+ affiliation_map = processed["affiliation_map"]
248
+ ordered_affiliations = processed["ordered_affiliations"]
249
+ cofirst_authors = processed["cofirst_authors"]
250
+ corresponding_authors = processed["corresponding_authors"]
251
+
252
+ # Add authors with superscript affiliation numbers and corresponding author markers
168
253
  if authors:
169
254
  author_para = doc.add_paragraph()
170
255
  for i, author in enumerate(authors):
@@ -182,25 +267,94 @@ class DocxWriter:
182
267
  sup_run = author_para.add_run(",".join(affil_nums))
183
268
  sup_run.font.superscript = True
184
269
 
270
+ # Add co-first author marker (dagger) if applicable
271
+ is_cofirst = author.get("co_first_author", False)
272
+ if is_cofirst:
273
+ cofirst_run = author_para.add_run("†")
274
+ cofirst_run.font.superscript = True
275
+
276
+ # Add corresponding author marker (asterisk) if applicable
277
+ is_corresponding = author.get("corresponding_author", False)
278
+ if is_corresponding:
279
+ corr_run = author_para.add_run("*")
280
+ corr_run.font.superscript = True
281
+
185
282
  author_para.paragraph_format.space_after = Pt(8)
186
283
 
187
284
  # Add affiliations
188
- if all_affiliations:
189
- for i, affil_text in enumerate(all_affiliations, start=1):
285
+ if ordered_affiliations:
286
+ for affil_num, _affil_shortname, affil_text in ordered_affiliations:
190
287
  affil_para = doc.add_paragraph()
191
288
 
192
289
  # Add superscript number
193
- num_run = affil_para.add_run(str(i))
290
+ num_run = affil_para.add_run(str(affil_num))
194
291
  num_run.font.superscript = True
292
+ num_run.font.size = Pt(8)
195
293
 
196
294
  # Add affiliation text
197
- affil_para.add_run(f" {affil_text}")
295
+ affil_run = affil_para.add_run(f" {affil_text}")
296
+ affil_run.font.size = Pt(8)
198
297
  affil_para.paragraph_format.space_after = Pt(4)
199
- affil_para.runs[1].font.size = Pt(10)
200
298
 
201
299
  # Extra space after last affiliation
202
300
  affil_para.paragraph_format.space_after = Pt(12)
203
301
 
302
+ # Add co-first author information if any (already extracted by processor)
303
+ if cofirst_authors:
304
+ cofirst_para = doc.add_paragraph()
305
+ cofirst_marker = cofirst_para.add_run("†")
306
+ cofirst_marker.font.superscript = True
307
+ cofirst_marker.font.size = Pt(8)
308
+
309
+ cofirst_label = cofirst_para.add_run(" These authors contributed equally: ")
310
+ cofirst_label.font.size = Pt(8)
311
+
312
+ for i, author in enumerate(cofirst_authors):
313
+ if i > 0:
314
+ sep_run = cofirst_para.add_run(", ")
315
+ sep_run.font.size = Pt(8)
316
+
317
+ name = author.get("name", "")
318
+ name_run = cofirst_para.add_run(name)
319
+ name_run.font.size = Pt(8)
320
+
321
+ cofirst_para.paragraph_format.space_after = Pt(12)
322
+
323
+ # Add corresponding author information if any (already extracted by processor)
324
+ if corresponding_authors:
325
+ corr_para = doc.add_paragraph()
326
+ corr_marker = corr_para.add_run("*")
327
+ corr_marker.font.superscript = True
328
+ corr_marker.font.size = Pt(8)
329
+
330
+ corr_label = corr_para.add_run(" Correspondence: ")
331
+ corr_label.font.size = Pt(8)
332
+
333
+ for i, author in enumerate(corresponding_authors):
334
+ if i > 0:
335
+ sep_run = corr_para.add_run("; ")
336
+ sep_run.font.size = Pt(8)
337
+
338
+ name = author.get("name", "")
339
+ email = author.get("email", "")
340
+
341
+ # Decode email if it's base64 encoded
342
+ if not email:
343
+ email64 = author.get("email64", "")
344
+ if email64:
345
+ try:
346
+ email = base64.b64decode(email64).decode("utf-8")
347
+ except Exception:
348
+ email = ""
349
+
350
+ if email:
351
+ info_run = corr_para.add_run(f"{name} ({email})")
352
+ else:
353
+ info_run = corr_para.add_run(name)
354
+ info_run.font.size = Pt(8)
355
+
356
+ corr_para.paragraph_format.space_after = Pt(12)
357
+
204
358
  def _add_section(
205
359
  self,
206
360
  doc: Document,
@@ -228,6 +382,9 @@ class DocxWriter:
228
382
  self._add_list(doc, section)
229
383
  elif section_type == "code_block":
230
384
  self._add_code_block(doc, section)
385
+ elif section_type == "comment":
386
+ if not self.hide_comments:
387
+ self._add_comment(doc, section)
231
388
  elif section_type == "figure":
232
389
  self._add_figure(doc, section)
233
390
  elif section_type == "table":
@@ -253,6 +410,7 @@ class DocxWriter:
253
410
  run = para.add_run(text)
254
411
  run.bold = True
255
412
  run.font.size = Pt(12)
413
+ run.font.color.rgb = RGBColor(0, 0, 0) # Ensure black text
256
414
 
257
415
  def _add_heading(self, doc: Document, section: Dict[str, Any]):
258
416
  """Add heading to document.
@@ -263,7 +421,10 @@ class DocxWriter:
263
421
  """
264
422
  level = section["level"]
265
423
  text = section["text"]
266
- doc.add_heading(text, level=level)
424
+ heading = doc.add_heading(text, level=level)
425
+ # Ensure heading text is black (not blue)
426
+ for run in heading.runs:
427
+ run.font.color.rgb = RGBColor(0, 0, 0) # Explicitly set to black
267
428
 
268
429
  def _add_paragraph(
269
430
  self,
@@ -310,13 +471,19 @@ class DocxWriter:
310
471
  run.italic = True
311
472
  if run_data.get("underline"):
312
473
  run.underline = True
474
+ if run_data.get("subscript"):
475
+ run.font.subscript = True
476
+ if run_data.get("superscript"):
477
+ run.font.superscript = True
313
478
  if run_data.get("code"):
314
479
  run.font.name = "Courier New"
315
480
  run.font.size = Pt(10)
316
481
  if run_data.get("xref"):
317
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
482
+ # Use color based on xref type (fig, sfig, stable, eq, etc.)
483
+ xref_type = run_data.get("xref_type", "cite")
484
+ self._apply_highlight(run, self.get_xref_color(xref_type))
318
485
  if run_data.get("highlight_yellow"):
319
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
486
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
320
487
 
321
488
  elif run_data["type"] == "hyperlink":
322
489
  # Add hyperlink with yellow highlighting
@@ -329,11 +496,20 @@ class DocxWriter:
329
496
  latex_content = run_data.get("latex", "")
330
497
  self._add_inline_equation(paragraph, latex_content)
331
498
 
499
+ elif run_data["type"] == "inline_comment":
500
+ # Add inline comment with gray highlighting (unless hide_comments is enabled)
501
+ if not self.hide_comments:
502
+ comment_text = run_data["text"]
503
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
504
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
505
+ run.italic = True
506
+ run.font.size = Pt(10)
507
+
332
508
  elif run_data["type"] == "citation":
333
509
  cite_num = run_data["number"]
334
510
  # Add citation as [NN] inline with yellow highlighting
335
511
  run = paragraph.add_run(f"[{cite_num}]")
336
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
512
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
337
513
  run.font.size = Pt(10)
338
514
 
339
515
  def _add_list(self, doc: Document, section: Dict[str, Any]):
@@ -362,15 +538,21 @@ class DocxWriter:
362
538
  run.bold = True
363
539
  if run_data.get("italic"):
364
540
  run.italic = True
541
+ if run_data.get("subscript"):
542
+ run.font.subscript = True
543
+ if run_data.get("superscript"):
544
+ run.font.superscript = True
365
545
  if run_data.get("code"):
366
546
  run.font.name = "Courier New"
367
547
  if run_data.get("xref"):
368
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
548
+ # Use color based on xref type
549
+ xref_type = run_data.get("xref_type", "cite")
550
+ self._apply_highlight(run, self.get_xref_color(xref_type))
369
551
  if run_data.get("highlight_yellow"):
370
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
552
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
371
553
  run.font.size = Pt(10)
372
554
  if run_data.get("highlight_yellow"):
373
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
555
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
374
556
  elif run_data["type"] == "hyperlink":
375
557
  text = run_data.get("text", "")
376
558
  url = run_data.get("url", "")
@@ -379,11 +561,20 @@ class DocxWriter:
379
561
  # Add inline equation as Office Math
380
562
  latex_content = run_data.get("latex", "")
381
563
  self._add_inline_equation(paragraph, latex_content)
564
+ elif run_data["type"] == "inline_comment":
565
+ # Add inline comment with gray highlighting (unless hide_comments is enabled)
566
+ if not self.hide_comments:
567
+ comment_text = run_data["text"]
568
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
569
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
570
+ run.italic = True
571
+ run.font.size = Pt(10)
382
572
  elif run_data["type"] == "citation":
383
573
  cite_num = run_data["number"]
384
574
  run = paragraph.add_run(f"[{cite_num}]")
385
575
  run.bold = True
386
576
  run.font.size = Pt(10)
577
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
387
578
 
388
579
  def _add_code_block(self, doc: Document, section: Dict[str, Any]):
389
580
  """Add code block to document.
@@ -404,6 +595,22 @@ class DocxWriter:
404
595
  paragraph_format = paragraph.paragraph_format
405
596
  paragraph_format.left_indent = Pt(36) # Indent code blocks
406
597
 
598
+ def _add_comment(self, doc: Document, section: Dict[str, Any]):
599
+ """Add comment to document with gray highlighting.
600
+
601
+ Args:
602
+ doc: Document object
603
+ section: Comment section data with 'text'
604
+ """
605
+ comment_text = section["text"]
606
+ paragraph = doc.add_paragraph()
607
+
608
+ # Add comment text with light gray highlighting to distinguish from colored xrefs
609
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
610
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
611
+ run.italic = True
612
+ run.font.size = Pt(10)
613
+
407
614
  def _check_poppler_availability(self) -> bool:
408
615
  """Check if poppler is available for PDF conversion.
409
616
 
@@ -417,13 +624,16 @@ class DocxWriter:
417
624
 
418
625
  return result.status == DependencyStatus.AVAILABLE
419
626
 
420
- def _add_figure(self, doc: Document, section: Dict[str, Any], figure_number: int = None):
627
+ def _add_figure(
628
+ self, doc: Document, section: Dict[str, Any], figure_number: int = None, is_supplementary: bool = False
629
+ ):
421
630
  """Add figure to document with caption.
422
631
 
423
632
  Args:
424
633
  doc: Document object
425
634
  section: Figure section data with 'path', 'caption', 'label'
426
635
  figure_number: Figure number (1-indexed)
636
+ is_supplementary: Whether this is a supplementary figure
427
637
  """
428
638
  figure_path = Path(section["path"])
429
639
  caption = section.get("caption", "")
@@ -470,19 +680,45 @@ class DocxWriter:
470
680
  logger.warning(f"Unsupported image format: {figure_path.suffix}")
471
681
 
472
682
  if img_source:
473
- # Add image
683
+ # Add image with proper sizing to fit page
474
684
  try:
475
- doc.add_picture(img_source, width=Inches(6))
476
- logger.debug(f"Embedded figure: {figure_path}")
685
+ from PIL import Image as PILImage
686
+
687
+ # Get image dimensions
688
+ with PILImage.open(img_source) as img:
689
+ img_width, img_height = img.size
690
+ aspect_ratio = img_width / img_height
691
+
692
+ # Page dimensions with margins (Letter size: 8.5 x 11 inches, 1 inch margins)
693
+ max_width = Inches(6.5) # 8.5 - 2*1
694
+ max_height = Inches(9) # 11 - 2*1
695
+
696
+ # Add figure centered
697
+ # Note: add_picture() creates a paragraph automatically, but we need to add it explicitly
698
+ # to control alignment
699
+ fig_para = doc.add_paragraph()
700
+ fig_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
701
+
702
+ # Calculate optimal size maintaining aspect ratio
703
+ if aspect_ratio > (6.5 / 9): # Wide image - constrain by width
704
+ run = fig_para.add_run()
705
+ run.add_picture(img_source, width=max_width)
706
+ else: # Tall image - constrain by height
707
+ run = fig_para.add_run()
708
+ run.add_picture(img_source, height=max_height)
709
+
710
+ logger.debug(f"Embedded figure: {figure_path} ({img_width}x{img_height})")
477
711
  except Exception as e:
478
712
  logger.warning(f"Failed to embed figure {figure_path}: {e}")
479
- # Add placeholder text
713
+ # Add placeholder text (centered)
480
714
  p = doc.add_paragraph()
715
+ p.alignment = WD_ALIGN_PARAGRAPH.CENTER
481
716
  run = p.add_run(f"[Figure: {figure_path.name}]")
482
717
  run.italic = True
483
718
  else:
484
- # Add placeholder if embedding failed
719
+ # Add placeholder if embedding failed (centered)
485
720
  p = doc.add_paragraph()
721
+ p.alignment = WD_ALIGN_PARAGRAPH.CENTER
486
722
  run = p.add_run(f"[Figure: {figure_path.name}]")
487
723
  run.italic = True
488
724
  logger.warning(f"Could not embed figure: {figure_path}")
@@ -494,15 +730,18 @@ class DocxWriter:
494
730
  # Add small space before caption to separate from figure
495
731
  caption_para.paragraph_format.space_before = Pt(3)
496
732
 
497
- # Format as "Figure number: "
733
+ # Format as "Figure number: " or "Supp. Fig. number: "
498
734
  if figure_number:
499
- run = caption_para.add_run(f"Figure {figure_number}: ")
735
+ if is_supplementary:
736
+ run = caption_para.add_run(f"Supp. Fig. S{figure_number}. ")
737
+ else:
738
+ run = caption_para.add_run(f"Fig. {figure_number}. ")
500
739
  run.bold = True
501
- run.font.size = Pt(7)
740
+ run.font.size = Pt(8)
502
741
  else:
503
742
  run = caption_para.add_run("Figure: ")
504
743
  run.bold = True
505
- run.font.size = Pt(7)
744
+ run.font.size = Pt(8)
506
745
 
507
746
  # Parse and add caption with inline formatting
508
747
  # Import the processor to parse inline formatting
@@ -515,28 +754,43 @@ class DocxWriter:
515
754
  if run_data["type"] == "text":
516
755
  text = run_data["text"]
517
756
  run = caption_para.add_run(text)
518
- run.font.size = Pt(7)
757
+ run.font.size = Pt(8)
519
758
 
520
759
  # Apply formatting
521
760
  if run_data.get("bold"):
522
761
  run.bold = True
523
762
  if run_data.get("italic"):
524
763
  run.italic = True
764
+ if run_data.get("subscript"):
765
+ run.font.subscript = True
766
+ if run_data.get("superscript"):
767
+ run.font.superscript = True
525
768
  if run_data.get("code"):
526
769
  run.font.name = "Courier New"
527
770
  if run_data.get("xref"):
528
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
771
+ # Use color based on xref type
772
+ xref_type = run_data.get("xref_type", "cite")
773
+ self._apply_highlight(run, self.get_xref_color(xref_type))
529
774
  if run_data.get("highlight_yellow"):
530
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
775
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
531
776
  elif run_data["type"] == "inline_equation":
532
777
  # Add inline equation as Office Math
533
778
  latex_content = run_data.get("latex", "")
534
779
  self._add_inline_equation(caption_para, latex_content)
780
+ elif run_data["type"] == "inline_comment":
781
+ # Add inline comment with gray highlighting (unless hide_comments is enabled)
782
+ if not self.hide_comments:
783
+ comment_text = run_data["text"]
784
+ run = caption_para.add_run(f"[Comment: {comment_text}]")
785
+ self._apply_highlight(run, WD_COLOR_INDEX.GRAY_25)
786
+ run.italic = True
787
+ run.font.size = Pt(8)
535
788
  elif run_data["type"] == "citation":
536
789
  cite_num = run_data["number"]
537
790
  run = caption_para.add_run(f"[{cite_num}]")
538
791
  run.bold = True
539
- run.font.size = Pt(7)
792
+ run.font.size = Pt(8)
793
+ self._apply_highlight(run, WD_COLOR_INDEX.YELLOW)
540
794
 
541
795
  # Add spacing after figure (reduced from 12 to 6 for compactness)
542
796
  caption_para.paragraph_format.space_after = Pt(6)
@@ -602,10 +856,16 @@ class DocxWriter:
602
856
  run.italic = True
603
857
  if run_data.get("underline"):
604
858
  run.underline = True
859
+ if run_data.get("subscript"):
860
+ run.font.subscript = True
861
+ if run_data.get("superscript"):
862
+ run.font.superscript = True
605
863
  if run_data.get("code"):
606
864
  run.font.name = "Courier New"
607
865
  if run_data.get("xref"):
608
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
866
+ # Use color based on xref type
867
+ xref_type = run_data.get("xref_type", "cite")
868
+ self._apply_highlight(run, self.get_xref_color(xref_type))
609
869
 
610
870
  # Add table caption if present
611
871
  caption = section.get("caption")
@@ -616,18 +876,30 @@ class DocxWriter:
616
876
  # Add small space before caption to separate from table
617
877
  caption_para.paragraph_format.space_before = Pt(3)
618
878
 
619
- # Determine table number from label (e.g., "stable:structural_models" -> "Supp. Table 1")
879
+ # Determine table number from label using table_map
620
880
  if label and label.startswith("stable:"):
621
- # Count how many supplementary tables we've seen so far
622
- # For now, we'll just format as "Supp. Table: caption"
623
- # A more sophisticated approach would track table numbers
624
- run = caption_para.add_run("Supp. Table: ")
881
+ # Extract label name (e.g., "stable:parameters" -> "parameters")
882
+ label_name = label.split(":", 1)[1] if ":" in label else label
883
+ # Look up number in table_map
884
+ table_num = self.table_map.get(label_name)
885
+ if table_num:
886
+ run = caption_para.add_run(f"Supp. Table S{table_num}. ")
887
+ else:
888
+ # Fallback if label not in map
889
+ run = caption_para.add_run("Supp. Table: ")
625
890
  run.bold = True
626
- run.font.size = Pt(7)
891
+ run.font.size = Pt(8)
627
892
  elif label and label.startswith("table:"):
628
- run = caption_para.add_run("Table: ")
893
+ # Extract label name for main tables
894
+ label_name = label.split(":", 1)[1] if ":" in label else label
895
+ # Look up number in table_map (though main tables may not be in map)
896
+ table_num = self.table_map.get(label_name)
897
+ if table_num:
898
+ run = caption_para.add_run(f"Table {table_num}. ")
899
+ else:
900
+ run = caption_para.add_run("Table: ")
629
901
  run.bold = True
630
- run.font.size = Pt(7)
902
+ run.font.size = Pt(8)
631
903
 
632
904
  # Parse and add caption with inline formatting
633
905
  caption_runs = processor._parse_inline_formatting(caption, {})
@@ -635,17 +907,23 @@ class DocxWriter:
635
907
  if run_data["type"] == "text":
636
908
  text = run_data["text"]
637
909
  run = caption_para.add_run(text)
638
- run.font.size = Pt(7)
910
+ run.font.size = Pt(8)
639
911
  if run_data.get("bold"):
640
912
  run.bold = True
641
913
  if run_data.get("italic"):
642
914
  run.italic = True
643
915
  if run_data.get("underline"):
644
916
  run.underline = True
917
+ if run_data.get("subscript"):
918
+ run.font.subscript = True
919
+ if run_data.get("superscript"):
920
+ run.font.superscript = True
645
921
  if run_data.get("code"):
646
922
  run.font.name = "Courier New"
647
923
  if run_data.get("xref"):
648
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
924
+ # Use color based on xref type
925
+ xref_type = run_data.get("xref_type", "cite")
926
+ self._apply_highlight(run, self.get_xref_color(xref_type))
649
927
 
650
928
  # Add spacing after table (reduced from 12 to 6 for compactness)
651
929
  caption_para.paragraph_format.space_after = Pt(6)