rxiv-maker 1.16.8__py3-none-any.whl → 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,11 +45,16 @@ class DocxExporter:
45
45
  self.resolve_dois = resolve_dois
46
46
  self.include_footnotes = include_footnotes
47
47
 
48
- # Load config to get author name format preference
48
+ # Load config to get author name format preference and DOCX options
49
49
  config_manager = ConfigManager(base_dir=Path(manuscript_path))
50
50
  config = config_manager.load_config()
51
51
  self.author_format = config.get("bibliography_author_format", "lastname_firstname")
52
52
 
53
+ # DOCX export options
54
+ docx_config = config.get("docx", {})
55
+ self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
56
+ self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
57
+
53
58
  # Components
54
59
  self.citation_mapper = CitationMapper()
55
60
  self.content_processor = DocxContentProcessor()
@@ -98,6 +103,13 @@ class DocxExporter:
98
103
  markdown_content = self._load_markdown()
99
104
  logger.debug(f"Loaded {len(markdown_content)} characters of markdown")
100
105
 
106
+ # Step 2.5: If SI is hidden from export, still load it for label mapping
107
+ si_content_for_mapping = ""
108
+ if self.hide_si:
109
+ si_content_for_mapping = self._load_si_for_mapping()
110
+ if si_content_for_mapping:
111
+ logger.info("📋 Loaded SI content for label mapping (SI section hidden from export)")
112
+
101
113
  # Step 3: Extract and map citations
102
114
  citations = self.citation_mapper.extract_citations_from_markdown(markdown_content)
103
115
  citation_map = self.citation_mapper.create_mapping(citations)
@@ -120,14 +132,14 @@ class DocxExporter:
120
132
 
121
133
  # Replace @fig:label with "Fig. X" in text, handling optional panel letters
122
134
  # Pattern matches: @fig:label optionally followed by space and panel letter(s)
123
- # Use special markers <<XREF>> to enable yellow highlighting in DOCX
135
+ # Use special markers <<XREF:type>> to enable color-coded highlighting in DOCX
124
136
  for label, num in figure_map.items():
125
137
  # Match @fig:label with optional panel letters like " a", " a,b", " a-c"
126
138
  # Use negative lookahead (?![a-z]) to prevent matching start of words like " is", " and"
127
139
  # Panel letters must be followed by non-letter (space, punctuation, end of string)
128
140
  markdown_with_numbers = re.sub(
129
141
  rf"@fig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
130
- lambda m, num=num: f"<<XREF>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
142
+ lambda m, num=num: f"<<XREF:fig>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
131
143
  markdown_with_numbers,
132
144
  )
133
145
 
@@ -135,7 +147,9 @@ class DocxExporter:
135
147
 
136
148
  # Find all supplementary figures and create mapping
137
149
  # Allow hyphens and underscores in label names
138
- sfig_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#sfig:([\w-]+)", markdown_with_numbers)
150
+ # IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
151
+ content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
152
+ sfig_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#sfig:([\w-]+)", content_to_scan_for_sfigs)
139
153
  sfig_map = {label: i + 1 for i, label in enumerate(sfig_labels)}
140
154
 
141
155
  # Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
@@ -144,34 +158,51 @@ class DocxExporter:
144
158
  # Negative lookahead prevents matching start of words
145
159
  markdown_with_numbers = re.sub(
146
160
  rf"@sfig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
147
- lambda m, num=num: f"<<XREF>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
161
+ lambda m, num=num: f"<<XREF:sfig>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
148
162
  markdown_with_numbers,
149
163
  )
150
164
 
151
165
  logger.debug(f"Mapped {len(sfig_map)} supplementary figure labels to numbers")
152
166
 
153
- # Find all tables and create mapping (looking for {#stable:label} tags)
154
- # Allow hyphens and underscores in label names
155
- table_labels = re.findall(r"\{#stable:([\w-]+)\}", markdown_with_numbers)
167
+ # Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
168
+ # IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
169
+ # NOT the order of caption references (%{#stable:X}) which are just metadata
170
+ # When SI is excluded from export, we still need to extract labels from SI
171
+
172
+ content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
173
+
174
+ # Extract table labels in document order (both {#stable:label} markdown format and \label{stable:label} LaTeX format)
175
+ # The PDF numbering follows the order these labels appear in the document
176
+ markdown_labels = re.findall(r"\{#stable:([\w-]+)\}", content_to_scan_for_tables)
177
+ latex_labels = re.findall(r"\\label\{stable:([\w-]+)\}", content_to_scan_for_tables)
178
+
179
+ # Combine both formats, preferring LaTeX labels if present (since that's what PDF uses)
180
+ table_labels = latex_labels if latex_labels else markdown_labels
181
+
182
+ # Remove duplicates while preserving order
183
+ seen = set()
184
+ table_labels = [label for label in table_labels if not (label in seen or seen.add(label))]
185
+
156
186
  table_map = {label: i + 1 for i, label in enumerate(table_labels)}
187
+ logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
157
188
 
158
189
  # Replace @stable:label with "Supp. Table X" in text
159
190
  for label, num in table_map.items():
160
191
  markdown_with_numbers = re.sub(
161
- rf"@stable:{label}\b", f"<<XREF>>Supp. Table {num}<</XREF>>", markdown_with_numbers
192
+ rf"@stable:{label}\b", f"<<XREF:stable>>Supp. Table {num}<</XREF>>", markdown_with_numbers
162
193
  )
163
194
 
164
- logger.debug(f"Mapped {len(table_map)} supplementary table labels to numbers")
165
-
166
195
  # Find all supplementary notes and create mapping (looking for {#snote:label} tags)
167
196
  # Allow hyphens and underscores in label names
168
- snote_labels = re.findall(r"\{#snote:([\w-]+)\}", markdown_with_numbers)
197
+ # IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
198
+ content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
199
+ snote_labels = re.findall(r"\{#snote:([\w-]+)\}", content_to_scan_for_snotes)
169
200
  snote_map = {label: i + 1 for i, label in enumerate(snote_labels)}
170
201
 
171
202
  # Replace @snote:label with "Supp. Note X" in text
172
203
  for label, num in snote_map.items():
173
204
  markdown_with_numbers = re.sub(
174
- rf"@snote:{label}\b", f"<<XREF>>Supp. Note {num}<</XREF>>", markdown_with_numbers
205
+ rf"@snote:{label}\b", f"<<XREF:snote>>Supp. Note {num}<</XREF>>", markdown_with_numbers
175
206
  )
176
207
 
177
208
  logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
@@ -186,18 +217,17 @@ class DocxExporter:
186
217
  for label, num in equation_map.items():
187
218
  # Replace (@eq:label) with (Eq. X)
188
219
  markdown_with_numbers = re.sub(
189
- rf"\(@eq:{label}\b\)", f"(<<XREF>>Eq. {num}<</XREF>>)", markdown_with_numbers
220
+ rf"\(@eq:{label}\b\)", f"(<<XREF:eq>>Eq. {num}<</XREF>>)", markdown_with_numbers
190
221
  )
191
222
  # Replace @eq:label with Eq. X
192
- markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF>>Eq. {num}<</XREF>>", markdown_with_numbers)
223
+ markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF:eq>>Eq. {num}<</XREF>>", markdown_with_numbers)
193
224
 
194
225
  logger.debug(f"Mapped {len(equation_map)} equation labels to numbers")
195
226
 
196
227
  # Step 5.6: Remove label markers now that mapping is complete
197
228
  # These metadata markers should not appear in the final output
198
- markdown_with_numbers = re.sub(
199
- r"^\{#(?:fig|sfig|snote|stable|table|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE
200
- )
229
+ # NOTE: Keep fig/sfig/stable/table labels - they're needed by content processor and removed during caption parsing
230
+ markdown_with_numbers = re.sub(r"^\{#(?:snote|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE)
201
231
 
202
232
  # Step 6: Convert content to DOCX structure
203
233
  doc_structure = self.content_processor.parse(markdown_with_numbers, citation_map)
@@ -215,6 +245,8 @@ class DocxExporter:
215
245
  include_footnotes=self.include_footnotes,
216
246
  base_path=self.path_manager.manuscript_path,
217
247
  metadata=metadata,
248
+ table_map=table_map,
249
+ figures_at_end=self.figures_at_end,
218
250
  )
219
251
  logger.info(f"DOCX exported successfully: {docx_path}")
220
252
 
@@ -265,9 +297,9 @@ class DocxExporter:
265
297
 
266
298
  content.append(main_content)
267
299
 
268
- # Load 02_SUPPLEMENTARY_INFO.md if exists
300
+ # Load 02_SUPPLEMENTARY_INFO.md if exists and not configured to hide SI
269
301
  supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
270
- if supp_md.exists():
302
+ if supp_md.exists() and not self.hide_si:
271
303
  logger.info("Including supplementary information")
272
304
  supp_content = supp_md.read_text(encoding="utf-8")
273
305
  supp_content = remove_yaml_header(supp_content)
@@ -281,11 +313,36 @@ class DocxExporter:
281
313
  content.append("<!-- PAGE_BREAK -->")
282
314
  content.append("# Supplementary Information")
283
315
  content.append(supp_content)
316
+ elif supp_md.exists() and self.hide_si:
317
+ logger.info("Supplementary information exists but hidden per config (docx.hide_si: true)")
284
318
  else:
285
319
  logger.debug("No supplementary information file found")
286
320
 
287
321
  return "\n\n".join(content)
288
322
 
323
+ def _load_si_for_mapping(self) -> str:
324
+ r"""Load SI content for label mapping without including in export.
325
+
326
+ This method is used when hide_si is True but we still need to extract
327
+ SI labels (stable, sfig, snote) for cross-references in the main text.
328
+
329
+ IMPORTANT: We return RAW content (before preprocessing) because we need to
330
+ extract LaTeX labels (\label{stable:X}) which determine the PDF numbering order.
331
+ The preprocessor strips out {{tex: blocks, losing this ordering information.
332
+
333
+ Returns:
334
+ SI content as string (raw, before preprocessing), or empty string if SI doesn't exist
335
+ """
336
+ supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
337
+ if not supp_md.exists():
338
+ return ""
339
+
340
+ # Load RAW SI content (don't preprocess - we need LaTeX labels for ordering)
341
+ supp_content = supp_md.read_text(encoding="utf-8")
342
+ supp_content = remove_yaml_header(supp_content)
343
+
344
+ return supp_content
345
+
289
346
  def _build_bibliography(self, citation_map: Dict[str, int]) -> Dict[int, Dict]:
290
347
  """Build bibliography with optional DOI resolution.
291
348
 
@@ -327,7 +384,8 @@ class DocxExporter:
327
384
  logger.info(f"Resolved DOI for {key}: {doi}")
328
385
 
329
386
  # Format entry (full format for DOCX bibliography)
330
- formatted = format_bibliography_entry(entry, doi, slim=False, author_format=self.author_format)
387
+ # Don't include DOI in formatted text - it will be added separately as a hyperlink by the writer
388
+ formatted = format_bibliography_entry(entry, doi=None, slim=False, author_format=self.author_format)
331
389
 
332
390
  bibliography[number] = {"key": key, "entry": entry, "doi": doi, "formatted": formatted}
333
391
 
@@ -24,6 +24,29 @@ logger = get_logger()
24
24
  class DocxWriter:
25
25
  """Writes structured content to DOCX files using python-docx."""
26
26
 
27
+ # Color mapping for different reference types
28
+ XREF_COLORS = {
29
+ "fig": WD_COLOR_INDEX.BRIGHT_GREEN, # Figures (bright green - lighter)
30
+ "sfig": WD_COLOR_INDEX.TURQUOISE, # Supplementary figures (turquoise - lighter cyan)
31
+ "stable": WD_COLOR_INDEX.TURQUOISE, # Supplementary tables (turquoise - lighter cyan)
32
+ "table": WD_COLOR_INDEX.BLUE, # Main tables
33
+ "eq": WD_COLOR_INDEX.VIOLET, # Equations
34
+ "snote": WD_COLOR_INDEX.TURQUOISE, # Supplementary notes (turquoise - lighter cyan)
35
+ "cite": WD_COLOR_INDEX.YELLOW, # Citations (yellow)
36
+ }
37
+
38
+ @staticmethod
39
+ def get_xref_color(xref_type: str):
40
+ """Get highlight color for a cross-reference type.
41
+
42
+ Args:
43
+ xref_type: Type of cross-reference (fig, sfig, stable, table, eq, snote, cite)
44
+
45
+ Returns:
46
+ WD_COLOR_INDEX color for the xref type, or YELLOW as default
47
+ """
48
+ return DocxWriter.XREF_COLORS.get(xref_type, WD_COLOR_INDEX.YELLOW)
49
+
27
50
  def write(
28
51
  self,
29
52
  doc_structure: Dict[str, Any],
@@ -32,6 +55,8 @@ class DocxWriter:
32
55
  include_footnotes: bool = True,
33
56
  base_path: Optional[Path] = None,
34
57
  metadata: Optional[Dict[str, Any]] = None,
58
+ table_map: Optional[Dict[str, int]] = None,
59
+ figures_at_end: bool = False,
35
60
  ) -> Path:
36
61
  """Write DOCX file from structured content.
37
62
 
@@ -42,6 +67,8 @@ class DocxWriter:
42
67
  include_footnotes: Whether to add DOI footnotes
43
68
  base_path: Base path for resolving relative figure paths
44
69
  metadata: Document metadata (title, authors, affiliations)
70
+ table_map: Mapping from table labels to numbers (for supplementary tables)
71
+ figures_at_end: Place main figures at end before SI/bibliography
45
72
 
46
73
  Returns:
47
74
  Path to created DOCX file
@@ -49,6 +76,7 @@ class DocxWriter:
49
76
  self.base_path = base_path or Path.cwd()
50
77
  self.bibliography = bibliography
51
78
  self.include_footnotes = include_footnotes
79
+ self.table_map = table_map or {}
52
80
  doc = Document()
53
81
 
54
82
  # Add title and author information if metadata provided
@@ -69,15 +97,36 @@ class DocxWriter:
69
97
  # Store figure map for use in text processing
70
98
  self.figure_map = figure_map
71
99
 
72
- # Process each section INCLUDING figures inline
100
+ # Collect main figures if figures_at_end is True
101
+ collected_main_figures = []
102
+
103
+ # Process each section
73
104
  figure_counter = 0
105
+ sfigure_counter = 0
74
106
  for section in doc_structure["sections"]:
75
107
  if section["type"] == "figure":
76
- figure_counter += 1
77
- self._add_figure(doc, section, figure_number=figure_counter)
108
+ is_supplementary = section.get("is_supplementary", False)
109
+ if is_supplementary:
110
+ # Supplementary figures always go inline (in SI section)
111
+ sfigure_counter += 1
112
+ self._add_figure(doc, section, figure_number=sfigure_counter, is_supplementary=True)
113
+ else:
114
+ # Main figures: collect if figures_at_end, otherwise add inline
115
+ figure_counter += 1
116
+ if figures_at_end:
117
+ collected_main_figures.append((section, figure_counter))
118
+ else:
119
+ self._add_figure(doc, section, figure_number=figure_counter, is_supplementary=False)
78
120
  else:
79
121
  self._add_section(doc, section, bibliography, include_footnotes)
80
122
 
123
+ # Add collected main figures at the end (before bibliography)
124
+ if figures_at_end and collected_main_figures:
125
+ doc.add_page_break()
126
+ doc.add_heading("Figures", level=1)
127
+ for section, fig_num in collected_main_figures:
128
+ self._add_figure(doc, section, figure_number=fig_num, is_supplementary=False)
129
+
81
130
  # Add bibliography section at the end
82
131
  if include_footnotes and bibliography:
83
132
  doc.add_page_break()
@@ -92,14 +141,14 @@ class DocxWriter:
92
141
  num_run = para.add_run(f"[{num}] ")
93
142
  num_run.bold = True
94
143
 
95
- # Add formatted bibliography text (slim format)
144
+ # Add formatted bibliography text (without DOI - added separately below)
96
145
  para.add_run(bib_entry["formatted"])
97
146
 
98
147
  # Add DOI as hyperlink with yellow highlighting if present
99
148
  if bib_entry.get("doi"):
100
149
  doi = bib_entry["doi"]
101
150
  doi_url = f"https://doi.org/{doi}" if not doi.startswith("http") else doi
102
- para.add_run(" ")
151
+ para.add_run("\nDOI: ")
103
152
  self._add_hyperlink(para, doi_url, doi_url, highlight=True)
104
153
 
105
154
  # Add spacing between entries
@@ -228,6 +277,8 @@ class DocxWriter:
228
277
  self._add_list(doc, section)
229
278
  elif section_type == "code_block":
230
279
  self._add_code_block(doc, section)
280
+ elif section_type == "comment":
281
+ self._add_comment(doc, section)
231
282
  elif section_type == "figure":
232
283
  self._add_figure(doc, section)
233
284
  elif section_type == "table":
@@ -310,11 +361,17 @@ class DocxWriter:
310
361
  run.italic = True
311
362
  if run_data.get("underline"):
312
363
  run.underline = True
364
+ if run_data.get("subscript"):
365
+ run.font.subscript = True
366
+ if run_data.get("superscript"):
367
+ run.font.superscript = True
313
368
  if run_data.get("code"):
314
369
  run.font.name = "Courier New"
315
370
  run.font.size = Pt(10)
316
371
  if run_data.get("xref"):
317
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
372
+ # Use color based on xref type (fig, sfig, stable, eq, etc.)
373
+ xref_type = run_data.get("xref_type", "cite")
374
+ run.font.highlight_color = self.get_xref_color(xref_type)
318
375
  if run_data.get("highlight_yellow"):
319
376
  run.font.highlight_color = WD_COLOR_INDEX.YELLOW
320
377
 
@@ -329,6 +386,14 @@ class DocxWriter:
329
386
  latex_content = run_data.get("latex", "")
330
387
  self._add_inline_equation(paragraph, latex_content)
331
388
 
389
+ elif run_data["type"] == "inline_comment":
390
+ # Add inline comment with gray highlighting
391
+ comment_text = run_data["text"]
392
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
393
+ run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
394
+ run.italic = True
395
+ run.font.size = Pt(10)
396
+
332
397
  elif run_data["type"] == "citation":
333
398
  cite_num = run_data["number"]
334
399
  # Add citation as [NN] inline with yellow highlighting
@@ -362,10 +427,16 @@ class DocxWriter:
362
427
  run.bold = True
363
428
  if run_data.get("italic"):
364
429
  run.italic = True
430
+ if run_data.get("subscript"):
431
+ run.font.subscript = True
432
+ if run_data.get("superscript"):
433
+ run.font.superscript = True
365
434
  if run_data.get("code"):
366
435
  run.font.name = "Courier New"
367
436
  if run_data.get("xref"):
368
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
437
+ # Use color based on xref type
438
+ xref_type = run_data.get("xref_type", "cite")
439
+ run.font.highlight_color = self.get_xref_color(xref_type)
369
440
  if run_data.get("highlight_yellow"):
370
441
  run.font.highlight_color = WD_COLOR_INDEX.YELLOW
371
442
  run.font.size = Pt(10)
@@ -379,11 +450,19 @@ class DocxWriter:
379
450
  # Add inline equation as Office Math
380
451
  latex_content = run_data.get("latex", "")
381
452
  self._add_inline_equation(paragraph, latex_content)
453
+ elif run_data["type"] == "inline_comment":
454
+ # Add inline comment with gray highlighting
455
+ comment_text = run_data["text"]
456
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
457
+ run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
458
+ run.italic = True
459
+ run.font.size = Pt(10)
382
460
  elif run_data["type"] == "citation":
383
461
  cite_num = run_data["number"]
384
462
  run = paragraph.add_run(f"[{cite_num}]")
385
463
  run.bold = True
386
464
  run.font.size = Pt(10)
465
+ run.font.highlight_color = WD_COLOR_INDEX.YELLOW
387
466
 
388
467
  def _add_code_block(self, doc: Document, section: Dict[str, Any]):
389
468
  """Add code block to document.
@@ -404,6 +483,22 @@ class DocxWriter:
404
483
  paragraph_format = paragraph.paragraph_format
405
484
  paragraph_format.left_indent = Pt(36) # Indent code blocks
406
485
 
486
+ def _add_comment(self, doc: Document, section: Dict[str, Any]):
487
+ """Add comment to document with gray highlighting.
488
+
489
+ Args:
490
+ doc: Document object
491
+ section: Comment section data with 'text'
492
+ """
493
+ comment_text = section["text"]
494
+ paragraph = doc.add_paragraph()
495
+
496
+ # Add comment text with light gray highlighting to distinguish from colored xrefs
497
+ run = paragraph.add_run(f"[Comment: {comment_text}]")
498
+ run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
499
+ run.italic = True
500
+ run.font.size = Pt(10)
501
+
407
502
  def _check_poppler_availability(self) -> bool:
408
503
  """Check if poppler is available for PDF conversion.
409
504
 
@@ -417,13 +512,16 @@ class DocxWriter:
417
512
 
418
513
  return result.status == DependencyStatus.AVAILABLE
419
514
 
420
- def _add_figure(self, doc: Document, section: Dict[str, Any], figure_number: int = None):
515
+ def _add_figure(
516
+ self, doc: Document, section: Dict[str, Any], figure_number: int = None, is_supplementary: bool = False
517
+ ):
421
518
  """Add figure to document with caption.
422
519
 
423
520
  Args:
424
521
  doc: Document object
425
522
  section: Figure section data with 'path', 'caption', 'label'
426
523
  figure_number: Figure number (1-indexed)
524
+ is_supplementary: Whether this is a supplementary figure
427
525
  """
428
526
  figure_path = Path(section["path"])
429
527
  caption = section.get("caption", "")
@@ -470,19 +568,45 @@ class DocxWriter:
470
568
  logger.warning(f"Unsupported image format: {figure_path.suffix}")
471
569
 
472
570
  if img_source:
473
- # Add image
571
+ # Add image with proper sizing to fit page
474
572
  try:
475
- doc.add_picture(img_source, width=Inches(6))
476
- logger.debug(f"Embedded figure: {figure_path}")
573
+ from PIL import Image as PILImage
574
+
575
+ # Get image dimensions
576
+ with PILImage.open(img_source) as img:
577
+ img_width, img_height = img.size
578
+ aspect_ratio = img_width / img_height
579
+
580
+ # Page dimensions with margins (Letter size: 8.5 x 11 inches, 1 inch margins)
581
+ max_width = Inches(6.5) # 8.5 - 2*1
582
+ max_height = Inches(9) # 11 - 2*1
583
+
584
+ # Add figure centered
585
+ # Note: add_picture() creates a paragraph automatically, but we need to add it explicitly
586
+ # to control alignment
587
+ fig_para = doc.add_paragraph()
588
+ fig_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
589
+
590
+ # Calculate optimal size maintaining aspect ratio
591
+ if aspect_ratio > (6.5 / 9): # Wide image - constrain by width
592
+ run = fig_para.add_run()
593
+ run.add_picture(img_source, width=max_width)
594
+ else: # Tall image - constrain by height
595
+ run = fig_para.add_run()
596
+ run.add_picture(img_source, height=max_height)
597
+
598
+ logger.debug(f"Embedded figure: {figure_path} ({img_width}x{img_height})")
477
599
  except Exception as e:
478
600
  logger.warning(f"Failed to embed figure {figure_path}: {e}")
479
- # Add placeholder text
601
+ # Add placeholder text (centered)
480
602
  p = doc.add_paragraph()
603
+ p.alignment = WD_ALIGN_PARAGRAPH.CENTER
481
604
  run = p.add_run(f"[Figure: {figure_path.name}]")
482
605
  run.italic = True
483
606
  else:
484
- # Add placeholder if embedding failed
607
+ # Add placeholder if embedding failed (centered)
485
608
  p = doc.add_paragraph()
609
+ p.alignment = WD_ALIGN_PARAGRAPH.CENTER
486
610
  run = p.add_run(f"[Figure: {figure_path.name}]")
487
611
  run.italic = True
488
612
  logger.warning(f"Could not embed figure: {figure_path}")
@@ -494,9 +618,12 @@ class DocxWriter:
494
618
  # Add small space before caption to separate from figure
495
619
  caption_para.paragraph_format.space_before = Pt(3)
496
620
 
497
- # Format as "Figure number: "
621
+ # Format as "Figure number: " or "Supp. Fig. number: "
498
622
  if figure_number:
499
- run = caption_para.add_run(f"Figure {figure_number}: ")
623
+ if is_supplementary:
624
+ run = caption_para.add_run(f"Supp. Fig. S{figure_number}. ")
625
+ else:
626
+ run = caption_para.add_run(f"Fig. {figure_number}. ")
500
627
  run.bold = True
501
628
  run.font.size = Pt(7)
502
629
  else:
@@ -522,21 +649,35 @@ class DocxWriter:
522
649
  run.bold = True
523
650
  if run_data.get("italic"):
524
651
  run.italic = True
652
+ if run_data.get("subscript"):
653
+ run.font.subscript = True
654
+ if run_data.get("superscript"):
655
+ run.font.superscript = True
525
656
  if run_data.get("code"):
526
657
  run.font.name = "Courier New"
527
658
  if run_data.get("xref"):
528
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
659
+ # Use color based on xref type
660
+ xref_type = run_data.get("xref_type", "cite")
661
+ run.font.highlight_color = self.get_xref_color(xref_type)
529
662
  if run_data.get("highlight_yellow"):
530
663
  run.font.highlight_color = WD_COLOR_INDEX.YELLOW
531
664
  elif run_data["type"] == "inline_equation":
532
665
  # Add inline equation as Office Math
533
666
  latex_content = run_data.get("latex", "")
534
667
  self._add_inline_equation(caption_para, latex_content)
668
+ elif run_data["type"] == "inline_comment":
669
+ # Add inline comment with gray highlighting
670
+ comment_text = run_data["text"]
671
+ run = caption_para.add_run(f"[Comment: {comment_text}]")
672
+ run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
673
+ run.italic = True
674
+ run.font.size = Pt(7)
535
675
  elif run_data["type"] == "citation":
536
676
  cite_num = run_data["number"]
537
677
  run = caption_para.add_run(f"[{cite_num}]")
538
678
  run.bold = True
539
679
  run.font.size = Pt(7)
680
+ run.font.highlight_color = WD_COLOR_INDEX.YELLOW
540
681
 
541
682
  # Add spacing after figure (reduced from 12 to 6 for compactness)
542
683
  caption_para.paragraph_format.space_after = Pt(6)
@@ -602,10 +743,16 @@ class DocxWriter:
602
743
  run.italic = True
603
744
  if run_data.get("underline"):
604
745
  run.underline = True
746
+ if run_data.get("subscript"):
747
+ run.font.subscript = True
748
+ if run_data.get("superscript"):
749
+ run.font.superscript = True
605
750
  if run_data.get("code"):
606
751
  run.font.name = "Courier New"
607
752
  if run_data.get("xref"):
608
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
753
+ # Use color based on xref type
754
+ xref_type = run_data.get("xref_type", "cite")
755
+ run.font.highlight_color = self.get_xref_color(xref_type)
609
756
 
610
757
  # Add table caption if present
611
758
  caption = section.get("caption")
@@ -616,16 +763,28 @@ class DocxWriter:
616
763
  # Add small space before caption to separate from table
617
764
  caption_para.paragraph_format.space_before = Pt(3)
618
765
 
619
- # Determine table number from label (e.g., "stable:structural_models" -> "Supp. Table 1")
766
+ # Determine table number from label using table_map
620
767
  if label and label.startswith("stable:"):
621
- # Count how many supplementary tables we've seen so far
622
- # For now, we'll just format as "Supp. Table: caption"
623
- # A more sophisticated approach would track table numbers
624
- run = caption_para.add_run("Supp. Table: ")
768
+ # Extract label name (e.g., "stable:parameters" -> "parameters")
769
+ label_name = label.split(":", 1)[1] if ":" in label else label
770
+ # Look up number in table_map
771
+ table_num = self.table_map.get(label_name)
772
+ if table_num:
773
+ run = caption_para.add_run(f"Supp. Table S{table_num}. ")
774
+ else:
775
+ # Fallback if label not in map
776
+ run = caption_para.add_run("Supp. Table: ")
625
777
  run.bold = True
626
778
  run.font.size = Pt(7)
627
779
  elif label and label.startswith("table:"):
628
- run = caption_para.add_run("Table: ")
780
+ # Extract label name for main tables
781
+ label_name = label.split(":", 1)[1] if ":" in label else label
782
+ # Look up number in table_map (though main tables may not be in map)
783
+ table_num = self.table_map.get(label_name)
784
+ if table_num:
785
+ run = caption_para.add_run(f"Table {table_num}. ")
786
+ else:
787
+ run = caption_para.add_run("Table: ")
629
788
  run.bold = True
630
789
  run.font.size = Pt(7)
631
790
 
@@ -642,10 +801,16 @@ class DocxWriter:
642
801
  run.italic = True
643
802
  if run_data.get("underline"):
644
803
  run.underline = True
804
+ if run_data.get("subscript"):
805
+ run.font.subscript = True
806
+ if run_data.get("superscript"):
807
+ run.font.superscript = True
645
808
  if run_data.get("code"):
646
809
  run.font.name = "Courier New"
647
810
  if run_data.get("xref"):
648
- run.font.highlight_color = WD_COLOR_INDEX.YELLOW
811
+ # Use color based on xref type
812
+ xref_type = run_data.get("xref_type", "cite")
813
+ run.font.highlight_color = self.get_xref_color(xref_type)
649
814
 
650
815
  # Add spacing after table (reduced from 12 to 6 for compactness)
651
816
  caption_para.paragraph_format.space_after = Pt(6)
@@ -286,6 +286,7 @@ output/
286
286
  .rxiv_cache/
287
287
  *.pdf
288
288
  *.docx
289
+ *.zip
289
290
  *.log
290
291
  *.aux
291
292
  *.fdb_latexmk