rxiv-maker 1.16.8__py3-none-any.whl → 1.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,11 +45,18 @@ class DocxExporter:
45
45
  self.resolve_dois = resolve_dois
46
46
  self.include_footnotes = include_footnotes
47
47
 
48
- # Load config to get author name format preference
48
+ # Load config to get author name format preference and DOCX options
49
49
  config_manager = ConfigManager(base_dir=Path(manuscript_path))
50
50
  config = config_manager.load_config()
51
51
  self.author_format = config.get("bibliography_author_format", "lastname_firstname")
52
52
 
53
+ # DOCX export options
54
+ docx_config = config.get("docx", {})
55
+ self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
56
+ self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
57
+ self.hide_highlighting = docx_config.get("hide_highlighting", False) # Default to False (show highlights)
58
+ self.hide_comments = docx_config.get("hide_comments", False) # Default to False (include comments)
59
+
53
60
  # Components
54
61
  self.citation_mapper = CitationMapper()
55
62
  self.content_processor = DocxContentProcessor()
@@ -98,6 +105,13 @@ class DocxExporter:
98
105
  markdown_content = self._load_markdown()
99
106
  logger.debug(f"Loaded {len(markdown_content)} characters of markdown")
100
107
 
108
+ # Step 2.5: If SI is hidden from export, still load it for label mapping
109
+ si_content_for_mapping = ""
110
+ if self.hide_si:
111
+ si_content_for_mapping = self._load_si_for_mapping()
112
+ if si_content_for_mapping:
113
+ logger.info("📋 Loaded SI content for label mapping (SI section hidden from export)")
114
+
101
115
  # Step 3: Extract and map citations
102
116
  citations = self.citation_mapper.extract_citations_from_markdown(markdown_content)
103
117
  citation_map = self.citation_mapper.create_mapping(citations)
@@ -113,30 +127,33 @@ class DocxExporter:
113
127
  # Step 5.5: Replace figure and equation references with numbers
114
128
  import re
115
129
 
130
+ # Extract all labels using centralized utility
131
+ from ..utils.label_extractor import LabelExtractor
132
+
133
+ label_extractor = LabelExtractor()
134
+
116
135
  # Find all figures and create mapping
117
- # Allow hyphens and underscores in label names
118
- figure_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#fig:([\w-]+)", markdown_with_numbers)
119
- figure_map = {label: i + 1 for i, label in enumerate(figure_labels)}
136
+ figure_map = label_extractor.extract_figure_labels(markdown_with_numbers)
120
137
 
121
138
  # Replace @fig:label with "Fig. X" in text, handling optional panel letters
122
139
  # Pattern matches: @fig:label optionally followed by space and panel letter(s)
123
- # Use special markers <<XREF>> to enable yellow highlighting in DOCX
140
+ # Use special markers <<XREF:type>> to enable color-coded highlighting in DOCX
124
141
  for label, num in figure_map.items():
125
142
  # Match @fig:label with optional panel letters like " a", " a,b", " a-c"
126
143
  # Use negative lookahead (?![a-z]) to prevent matching start of words like " is", " and"
127
144
  # Panel letters must be followed by non-letter (space, punctuation, end of string)
128
145
  markdown_with_numbers = re.sub(
129
146
  rf"@fig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
130
- lambda m, num=num: f"<<XREF>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
147
+ lambda m, num=num: f"<<XREF:fig>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
131
148
  markdown_with_numbers,
132
149
  )
133
150
 
134
151
  logger.debug(f"Mapped {len(figure_map)} figure labels to numbers")
135
152
 
136
153
  # Find all supplementary figures and create mapping
137
- # Allow hyphens and underscores in label names
138
- sfig_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#sfig:([\w-]+)", markdown_with_numbers)
139
- sfig_map = {label: i + 1 for i, label in enumerate(sfig_labels)}
154
+ # IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
155
+ content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
156
+ sfig_map = label_extractor.extract_supplementary_figure_labels(content_to_scan_for_sfigs)
140
157
 
141
158
  # Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
142
159
  for label, num in sfig_map.items():
@@ -144,60 +161,57 @@ class DocxExporter:
144
161
  # Negative lookahead prevents matching start of words
145
162
  markdown_with_numbers = re.sub(
146
163
  rf"@sfig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
147
- lambda m, num=num: f"<<XREF>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
164
+ lambda m, num=num: f"<<XREF:sfig>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
148
165
  markdown_with_numbers,
149
166
  )
150
167
 
151
168
  logger.debug(f"Mapped {len(sfig_map)} supplementary figure labels to numbers")
152
169
 
153
- # Find all tables and create mapping (looking for {#stable:label} tags)
154
- # Allow hyphens and underscores in label names
155
- table_labels = re.findall(r"\{#stable:([\w-]+)\}", markdown_with_numbers)
156
- table_map = {label: i + 1 for i, label in enumerate(table_labels)}
170
+ # Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
171
+ # IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
172
+ # When SI is excluded from export, we still need to extract labels from SI
173
+ content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
174
+ table_map = label_extractor.extract_supplementary_table_labels(content_to_scan_for_tables)
175
+ logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
157
176
 
158
177
  # Replace @stable:label with "Supp. Table X" in text
159
178
  for label, num in table_map.items():
160
179
  markdown_with_numbers = re.sub(
161
- rf"@stable:{label}\b", f"<<XREF>>Supp. Table {num}<</XREF>>", markdown_with_numbers
180
+ rf"@stable:{label}\b", f"<<XREF:stable>>Supp. Table {num}<</XREF>>", markdown_with_numbers
162
181
  )
163
182
 
164
- logger.debug(f"Mapped {len(table_map)} supplementary table labels to numbers")
165
-
166
183
  # Find all supplementary notes and create mapping (looking for {#snote:label} tags)
167
- # Allow hyphens and underscores in label names
168
- snote_labels = re.findall(r"\{#snote:([\w-]+)\}", markdown_with_numbers)
169
- snote_map = {label: i + 1 for i, label in enumerate(snote_labels)}
184
+ # IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
185
+ content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
186
+ snote_map = label_extractor.extract_supplementary_note_labels(content_to_scan_for_snotes)
170
187
 
171
188
  # Replace @snote:label with "Supp. Note X" in text
172
189
  for label, num in snote_map.items():
173
190
  markdown_with_numbers = re.sub(
174
- rf"@snote:{label}\b", f"<<XREF>>Supp. Note {num}<</XREF>>", markdown_with_numbers
191
+ rf"@snote:{label}\b", f"<<XREF:snote>>Supp. Note {num}<</XREF>>", markdown_with_numbers
175
192
  )
176
193
 
177
194
  logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
178
195
 
179
196
  # Find all equations and create mapping (looking for {#eq:label} tags)
180
- # Allow hyphens and underscores in label names
181
- equation_labels = re.findall(r"\{#eq:([\w-]+)\}", markdown_with_numbers)
182
- equation_map = {label: i + 1 for i, label in enumerate(equation_labels)}
197
+ equation_map = label_extractor.extract_equation_labels(markdown_with_numbers)
183
198
 
184
199
  # Replace @eq:label with "Eq. X"
185
200
  # Handle both @eq:label and (@eq:label) formats
186
201
  for label, num in equation_map.items():
187
202
  # Replace (@eq:label) with (Eq. X)
188
203
  markdown_with_numbers = re.sub(
189
- rf"\(@eq:{label}\b\)", f"(<<XREF>>Eq. {num}<</XREF>>)", markdown_with_numbers
204
+ rf"\(@eq:{label}\b\)", f"(<<XREF:eq>>Eq. {num}<</XREF>>)", markdown_with_numbers
190
205
  )
191
206
  # Replace @eq:label with Eq. X
192
- markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF>>Eq. {num}<</XREF>>", markdown_with_numbers)
207
+ markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF:eq>>Eq. {num}<</XREF>>", markdown_with_numbers)
193
208
 
194
209
  logger.debug(f"Mapped {len(equation_map)} equation labels to numbers")
195
210
 
196
211
  # Step 5.6: Remove label markers now that mapping is complete
197
212
  # These metadata markers should not appear in the final output
198
- markdown_with_numbers = re.sub(
199
- r"^\{#(?:fig|sfig|snote|stable|table|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE
200
- )
213
+ # NOTE: Keep fig/sfig/stable/table labels - they're needed by content processor and removed during caption parsing
214
+ markdown_with_numbers = re.sub(r"^\{#(?:snote|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE)
201
215
 
202
216
  # Step 6: Convert content to DOCX structure
203
217
  doc_structure = self.content_processor.parse(markdown_with_numbers, citation_map)
@@ -215,6 +229,10 @@ class DocxExporter:
215
229
  include_footnotes=self.include_footnotes,
216
230
  base_path=self.path_manager.manuscript_path,
217
231
  metadata=metadata,
232
+ table_map=table_map,
233
+ figures_at_end=self.figures_at_end,
234
+ hide_highlighting=self.hide_highlighting,
235
+ hide_comments=self.hide_comments,
218
236
  )
219
237
  logger.info(f"DOCX exported successfully: {docx_path}")
220
238
 
@@ -265,9 +283,9 @@ class DocxExporter:
265
283
 
266
284
  content.append(main_content)
267
285
 
268
- # Load 02_SUPPLEMENTARY_INFO.md if exists
286
+ # Load 02_SUPPLEMENTARY_INFO.md if exists and not configured to hide SI
269
287
  supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
270
- if supp_md.exists():
288
+ if supp_md.exists() and not self.hide_si:
271
289
  logger.info("Including supplementary information")
272
290
  supp_content = supp_md.read_text(encoding="utf-8")
273
291
  supp_content = remove_yaml_header(supp_content)
@@ -281,11 +299,36 @@ class DocxExporter:
281
299
  content.append("<!-- PAGE_BREAK -->")
282
300
  content.append("# Supplementary Information")
283
301
  content.append(supp_content)
302
+ elif supp_md.exists() and self.hide_si:
303
+ logger.info("Supplementary information exists but hidden per config (docx.hide_si: true)")
284
304
  else:
285
305
  logger.debug("No supplementary information file found")
286
306
 
287
307
  return "\n\n".join(content)
288
308
 
309
+ def _load_si_for_mapping(self) -> str:
310
+ r"""Load SI content for label mapping without including in export.
311
+
312
+ This method is used when hide_si is True but we still need to extract
313
+ SI labels (stable, sfig, snote) for cross-references in the main text.
314
+
315
+ IMPORTANT: We return RAW content (before preprocessing) because we need to
316
+ extract LaTeX labels (\label{stable:X}) which determine the PDF numbering order.
317
+ The preprocessor strips out {{tex: blocks, losing this ordering information.
318
+
319
+ Returns:
320
+ SI content as string (raw, before preprocessing), or empty string if SI doesn't exist
321
+ """
322
+ supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
323
+ if not supp_md.exists():
324
+ return ""
325
+
326
+ # Load RAW SI content (don't preprocess - we need LaTeX labels for ordering)
327
+ supp_content = supp_md.read_text(encoding="utf-8")
328
+ supp_content = remove_yaml_header(supp_content)
329
+
330
+ return supp_content
331
+
289
332
  def _build_bibliography(self, citation_map: Dict[str, int]) -> Dict[int, Dict]:
290
333
  """Build bibliography with optional DOI resolution.
291
334
 
@@ -327,7 +370,8 @@ class DocxExporter:
327
370
  logger.info(f"Resolved DOI for {key}: {doi}")
328
371
 
329
372
  # Format entry (full format for DOCX bibliography)
330
- formatted = format_bibliography_entry(entry, doi, slim=False, author_format=self.author_format)
373
+ # Don't include DOI in formatted text - it will be added separately as a hyperlink by the writer
374
+ formatted = format_bibliography_entry(entry, doi=None, slim=False, author_format=self.author_format)
331
375
 
332
376
  bibliography[number] = {"key": key, "entry": entry, "doi": doi, "formatted": formatted}
333
377