rxiv-maker 1.16.7__py3-none-any.whl → 1.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxiv_maker/__version__.py +1 -1
- rxiv_maker/cli/commands/build.py +7 -0
- rxiv_maker/cli/commands/docx.py +74 -0
- rxiv_maker/cli/framework/workflow_commands.py +66 -2
- rxiv_maker/converters/citation_processor.py +5 -3
- rxiv_maker/core/managers/config_manager.py +1 -0
- rxiv_maker/core/managers/dependency_manager.py +12 -0
- rxiv_maker/exporters/docx_citation_mapper.py +99 -0
- rxiv_maker/exporters/docx_content_processor.py +128 -30
- rxiv_maker/exporters/docx_exporter.py +179 -24
- rxiv_maker/exporters/docx_writer.py +227 -27
- rxiv_maker/templates/registry.py +1 -0
- rxiv_maker/tex/style/rxiv_maker_style.cls +33 -33
- rxiv_maker/utils/bst_generator.py +27 -7
- rxiv_maker/utils/docx_helpers.py +62 -3
- rxiv_maker/utils/pdf_splitter.py +116 -0
- {rxiv_maker-1.16.7.dist-info → rxiv_maker-1.17.0.dist-info}/METADATA +2 -1
- {rxiv_maker-1.16.7.dist-info → rxiv_maker-1.17.0.dist-info}/RECORD +21 -20
- {rxiv_maker-1.16.7.dist-info → rxiv_maker-1.17.0.dist-info}/WHEEL +0 -0
- {rxiv_maker-1.16.7.dist-info → rxiv_maker-1.17.0.dist-info}/entry_points.txt +0 -0
- {rxiv_maker-1.16.7.dist-info → rxiv_maker-1.17.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -45,11 +45,16 @@ class DocxExporter:
|
|
|
45
45
|
self.resolve_dois = resolve_dois
|
|
46
46
|
self.include_footnotes = include_footnotes
|
|
47
47
|
|
|
48
|
-
# Load config to get author name format preference
|
|
48
|
+
# Load config to get author name format preference and DOCX options
|
|
49
49
|
config_manager = ConfigManager(base_dir=Path(manuscript_path))
|
|
50
50
|
config = config_manager.load_config()
|
|
51
51
|
self.author_format = config.get("bibliography_author_format", "lastname_firstname")
|
|
52
52
|
|
|
53
|
+
# DOCX export options
|
|
54
|
+
docx_config = config.get("docx", {})
|
|
55
|
+
self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
|
|
56
|
+
self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
|
|
57
|
+
|
|
53
58
|
# Components
|
|
54
59
|
self.citation_mapper = CitationMapper()
|
|
55
60
|
self.content_processor = DocxContentProcessor()
|
|
@@ -98,6 +103,13 @@ class DocxExporter:
|
|
|
98
103
|
markdown_content = self._load_markdown()
|
|
99
104
|
logger.debug(f"Loaded {len(markdown_content)} characters of markdown")
|
|
100
105
|
|
|
106
|
+
# Step 2.5: If SI is hidden from export, still load it for label mapping
|
|
107
|
+
si_content_for_mapping = ""
|
|
108
|
+
if self.hide_si:
|
|
109
|
+
si_content_for_mapping = self._load_si_for_mapping()
|
|
110
|
+
if si_content_for_mapping:
|
|
111
|
+
logger.info("📋 Loaded SI content for label mapping (SI section hidden from export)")
|
|
112
|
+
|
|
101
113
|
# Step 3: Extract and map citations
|
|
102
114
|
citations = self.citation_mapper.extract_citations_from_markdown(markdown_content)
|
|
103
115
|
citation_map = self.citation_mapper.create_mapping(citations)
|
|
@@ -120,14 +132,14 @@ class DocxExporter:
|
|
|
120
132
|
|
|
121
133
|
# Replace @fig:label with "Fig. X" in text, handling optional panel letters
|
|
122
134
|
# Pattern matches: @fig:label optionally followed by space and panel letter(s)
|
|
123
|
-
# Use special markers <<XREF>> to enable
|
|
135
|
+
# Use special markers <<XREF:type>> to enable color-coded highlighting in DOCX
|
|
124
136
|
for label, num in figure_map.items():
|
|
125
137
|
# Match @fig:label with optional panel letters like " a", " a,b", " a-c"
|
|
126
138
|
# Use negative lookahead (?![a-z]) to prevent matching start of words like " is", " and"
|
|
127
139
|
# Panel letters must be followed by non-letter (space, punctuation, end of string)
|
|
128
140
|
markdown_with_numbers = re.sub(
|
|
129
141
|
rf"@fig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
|
|
130
|
-
lambda m, num=num: f"<<XREF>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
142
|
+
lambda m, num=num: f"<<XREF:fig>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
131
143
|
markdown_with_numbers,
|
|
132
144
|
)
|
|
133
145
|
|
|
@@ -135,7 +147,9 @@ class DocxExporter:
|
|
|
135
147
|
|
|
136
148
|
# Find all supplementary figures and create mapping
|
|
137
149
|
# Allow hyphens and underscores in label names
|
|
138
|
-
|
|
150
|
+
# IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
|
|
151
|
+
content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
152
|
+
sfig_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#sfig:([\w-]+)", content_to_scan_for_sfigs)
|
|
139
153
|
sfig_map = {label: i + 1 for i, label in enumerate(sfig_labels)}
|
|
140
154
|
|
|
141
155
|
# Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
|
|
@@ -144,34 +158,51 @@ class DocxExporter:
|
|
|
144
158
|
# Negative lookahead prevents matching start of words
|
|
145
159
|
markdown_with_numbers = re.sub(
|
|
146
160
|
rf"@sfig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
|
|
147
|
-
lambda m, num=num: f"<<XREF>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
161
|
+
lambda m, num=num: f"<<XREF:sfig>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
148
162
|
markdown_with_numbers,
|
|
149
163
|
)
|
|
150
164
|
|
|
151
165
|
logger.debug(f"Mapped {len(sfig_map)} supplementary figure labels to numbers")
|
|
152
166
|
|
|
153
|
-
# Find all tables and create mapping (looking for {#stable:label} tags)
|
|
154
|
-
#
|
|
155
|
-
|
|
167
|
+
# Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
|
|
168
|
+
# IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
|
|
169
|
+
# NOT the order of caption references (%{#stable:X}) which are just metadata
|
|
170
|
+
# When SI is excluded from export, we still need to extract labels from SI
|
|
171
|
+
|
|
172
|
+
content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
173
|
+
|
|
174
|
+
# Extract table labels in document order (both {#stable:label} markdown format and \label{stable:label} LaTeX format)
|
|
175
|
+
# The PDF numbering follows the order these labels appear in the document
|
|
176
|
+
markdown_labels = re.findall(r"\{#stable:([\w-]+)\}", content_to_scan_for_tables)
|
|
177
|
+
latex_labels = re.findall(r"\\label\{stable:([\w-]+)\}", content_to_scan_for_tables)
|
|
178
|
+
|
|
179
|
+
# Combine both formats, preferring LaTeX labels if present (since that's what PDF uses)
|
|
180
|
+
table_labels = latex_labels if latex_labels else markdown_labels
|
|
181
|
+
|
|
182
|
+
# Remove duplicates while preserving order
|
|
183
|
+
seen = set()
|
|
184
|
+
table_labels = [label for label in table_labels if not (label in seen or seen.add(label))]
|
|
185
|
+
|
|
156
186
|
table_map = {label: i + 1 for i, label in enumerate(table_labels)}
|
|
187
|
+
logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
|
|
157
188
|
|
|
158
189
|
# Replace @stable:label with "Supp. Table X" in text
|
|
159
190
|
for label, num in table_map.items():
|
|
160
191
|
markdown_with_numbers = re.sub(
|
|
161
|
-
rf"@stable:{label}\b", f"<<XREF>>Supp. Table {num}<</XREF>>", markdown_with_numbers
|
|
192
|
+
rf"@stable:{label}\b", f"<<XREF:stable>>Supp. Table {num}<</XREF>>", markdown_with_numbers
|
|
162
193
|
)
|
|
163
194
|
|
|
164
|
-
logger.debug(f"Mapped {len(table_map)} supplementary table labels to numbers")
|
|
165
|
-
|
|
166
195
|
# Find all supplementary notes and create mapping (looking for {#snote:label} tags)
|
|
167
196
|
# Allow hyphens and underscores in label names
|
|
168
|
-
|
|
197
|
+
# IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
|
|
198
|
+
content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
199
|
+
snote_labels = re.findall(r"\{#snote:([\w-]+)\}", content_to_scan_for_snotes)
|
|
169
200
|
snote_map = {label: i + 1 for i, label in enumerate(snote_labels)}
|
|
170
201
|
|
|
171
202
|
# Replace @snote:label with "Supp. Note X" in text
|
|
172
203
|
for label, num in snote_map.items():
|
|
173
204
|
markdown_with_numbers = re.sub(
|
|
174
|
-
rf"@snote:{label}\b", f"<<XREF>>Supp. Note {num}<</XREF>>", markdown_with_numbers
|
|
205
|
+
rf"@snote:{label}\b", f"<<XREF:snote>>Supp. Note {num}<</XREF>>", markdown_with_numbers
|
|
175
206
|
)
|
|
176
207
|
|
|
177
208
|
logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
|
|
@@ -186,18 +217,17 @@ class DocxExporter:
|
|
|
186
217
|
for label, num in equation_map.items():
|
|
187
218
|
# Replace (@eq:label) with (Eq. X)
|
|
188
219
|
markdown_with_numbers = re.sub(
|
|
189
|
-
rf"\(@eq:{label}\b\)", f"(<<XREF>>Eq. {num}<</XREF>>)", markdown_with_numbers
|
|
220
|
+
rf"\(@eq:{label}\b\)", f"(<<XREF:eq>>Eq. {num}<</XREF>>)", markdown_with_numbers
|
|
190
221
|
)
|
|
191
222
|
# Replace @eq:label with Eq. X
|
|
192
|
-
markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF>>Eq. {num}<</XREF>>", markdown_with_numbers)
|
|
223
|
+
markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF:eq>>Eq. {num}<</XREF>>", markdown_with_numbers)
|
|
193
224
|
|
|
194
225
|
logger.debug(f"Mapped {len(equation_map)} equation labels to numbers")
|
|
195
226
|
|
|
196
227
|
# Step 5.6: Remove label markers now that mapping is complete
|
|
197
228
|
# These metadata markers should not appear in the final output
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
)
|
|
229
|
+
# NOTE: Keep fig/sfig/stable/table labels - they're needed by content processor and removed during caption parsing
|
|
230
|
+
markdown_with_numbers = re.sub(r"^\{#(?:snote|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE)
|
|
201
231
|
|
|
202
232
|
# Step 6: Convert content to DOCX structure
|
|
203
233
|
doc_structure = self.content_processor.parse(markdown_with_numbers, citation_map)
|
|
@@ -215,6 +245,8 @@ class DocxExporter:
|
|
|
215
245
|
include_footnotes=self.include_footnotes,
|
|
216
246
|
base_path=self.path_manager.manuscript_path,
|
|
217
247
|
metadata=metadata,
|
|
248
|
+
table_map=table_map,
|
|
249
|
+
figures_at_end=self.figures_at_end,
|
|
218
250
|
)
|
|
219
251
|
logger.info(f"DOCX exported successfully: {docx_path}")
|
|
220
252
|
|
|
@@ -265,9 +297,9 @@ class DocxExporter:
|
|
|
265
297
|
|
|
266
298
|
content.append(main_content)
|
|
267
299
|
|
|
268
|
-
# Load 02_SUPPLEMENTARY_INFO.md if exists
|
|
300
|
+
# Load 02_SUPPLEMENTARY_INFO.md if exists and not configured to hide SI
|
|
269
301
|
supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
|
|
270
|
-
if supp_md.exists():
|
|
302
|
+
if supp_md.exists() and not self.hide_si:
|
|
271
303
|
logger.info("Including supplementary information")
|
|
272
304
|
supp_content = supp_md.read_text(encoding="utf-8")
|
|
273
305
|
supp_content = remove_yaml_header(supp_content)
|
|
@@ -281,11 +313,36 @@ class DocxExporter:
|
|
|
281
313
|
content.append("<!-- PAGE_BREAK -->")
|
|
282
314
|
content.append("# Supplementary Information")
|
|
283
315
|
content.append(supp_content)
|
|
316
|
+
elif supp_md.exists() and self.hide_si:
|
|
317
|
+
logger.info("Supplementary information exists but hidden per config (docx.hide_si: true)")
|
|
284
318
|
else:
|
|
285
319
|
logger.debug("No supplementary information file found")
|
|
286
320
|
|
|
287
321
|
return "\n\n".join(content)
|
|
288
322
|
|
|
323
|
+
def _load_si_for_mapping(self) -> str:
|
|
324
|
+
r"""Load SI content for label mapping without including in export.
|
|
325
|
+
|
|
326
|
+
This method is used when hide_si is True but we still need to extract
|
|
327
|
+
SI labels (stable, sfig, snote) for cross-references in the main text.
|
|
328
|
+
|
|
329
|
+
IMPORTANT: We return RAW content (before preprocessing) because we need to
|
|
330
|
+
extract LaTeX labels (\label{stable:X}) which determine the PDF numbering order.
|
|
331
|
+
The preprocessor strips out {{tex: blocks, losing this ordering information.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
SI content as string (raw, before preprocessing), or empty string if SI doesn't exist
|
|
335
|
+
"""
|
|
336
|
+
supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
|
|
337
|
+
if not supp_md.exists():
|
|
338
|
+
return ""
|
|
339
|
+
|
|
340
|
+
# Load RAW SI content (don't preprocess - we need LaTeX labels for ordering)
|
|
341
|
+
supp_content = supp_md.read_text(encoding="utf-8")
|
|
342
|
+
supp_content = remove_yaml_header(supp_content)
|
|
343
|
+
|
|
344
|
+
return supp_content
|
|
345
|
+
|
|
289
346
|
def _build_bibliography(self, citation_map: Dict[str, int]) -> Dict[int, Dict]:
|
|
290
347
|
"""Build bibliography with optional DOI resolution.
|
|
291
348
|
|
|
@@ -318,12 +375,17 @@ class DocxExporter:
|
|
|
318
375
|
# Get DOI from entry
|
|
319
376
|
doi = entry.fields.get("doi")
|
|
320
377
|
|
|
321
|
-
#
|
|
322
|
-
|
|
323
|
-
|
|
378
|
+
# Attempt DOI resolution if requested and DOI missing
|
|
379
|
+
if self.resolve_dois and not doi:
|
|
380
|
+
doi = self._resolve_doi_from_metadata(entry)
|
|
381
|
+
if doi:
|
|
382
|
+
# Store in entry for this export
|
|
383
|
+
entry.fields["doi"] = doi
|
|
384
|
+
logger.info(f"Resolved DOI for {key}: {doi}")
|
|
324
385
|
|
|
325
386
|
# Format entry (full format for DOCX bibliography)
|
|
326
|
-
formatted
|
|
387
|
+
# Don't include DOI in formatted text - it will be added separately as a hyperlink by the writer
|
|
388
|
+
formatted = format_bibliography_entry(entry, doi=None, slim=False, author_format=self.author_format)
|
|
327
389
|
|
|
328
390
|
bibliography[number] = {"key": key, "entry": entry, "doi": doi, "formatted": formatted}
|
|
329
391
|
|
|
@@ -332,6 +394,99 @@ class DocxExporter:
|
|
|
332
394
|
|
|
333
395
|
return bibliography
|
|
334
396
|
|
|
397
|
+
def _resolve_doi_from_metadata(self, entry) -> str | None:
|
|
398
|
+
"""Resolve DOI from entry metadata using CrossRef API.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
entry: Bibliography entry to resolve DOI for
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Resolved DOI if found, None otherwise
|
|
405
|
+
"""
|
|
406
|
+
import requests
|
|
407
|
+
|
|
408
|
+
# Try to construct a search query from available fields
|
|
409
|
+
title = entry.fields.get("title", "").strip()
|
|
410
|
+
year = entry.fields.get("year", "").strip()
|
|
411
|
+
|
|
412
|
+
if not title:
|
|
413
|
+
logger.debug(f"Cannot resolve DOI for {entry.key}: no title")
|
|
414
|
+
return None
|
|
415
|
+
|
|
416
|
+
# Clean title for search (remove LaTeX commands, braces, etc.)
|
|
417
|
+
search_title = self._clean_title_for_search(title)
|
|
418
|
+
|
|
419
|
+
# Try CrossRef search API
|
|
420
|
+
try:
|
|
421
|
+
url = "https://api.crossref.org/works"
|
|
422
|
+
params = {
|
|
423
|
+
"query.title": search_title,
|
|
424
|
+
"rows": 5, # Get top 5 results
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
response = requests.get(url, params=params, timeout=10)
|
|
428
|
+
|
|
429
|
+
if response.status_code == 200:
|
|
430
|
+
data = response.json()
|
|
431
|
+
items = data.get("message", {}).get("items", [])
|
|
432
|
+
|
|
433
|
+
# Find best match
|
|
434
|
+
for item in items:
|
|
435
|
+
item_title = item.get("title", [""])[0].lower()
|
|
436
|
+
search_title_lower = search_title.lower()
|
|
437
|
+
|
|
438
|
+
# Simple similarity check - titles should be very similar
|
|
439
|
+
if item_title and (search_title_lower in item_title or item_title in search_title_lower):
|
|
440
|
+
# Verify year matches if available
|
|
441
|
+
if year:
|
|
442
|
+
item_year = item.get("published", {}).get("date-parts", [[None]])[0][0]
|
|
443
|
+
if item_year and str(item_year) != year:
|
|
444
|
+
continue
|
|
445
|
+
|
|
446
|
+
doi = item.get("DOI")
|
|
447
|
+
if doi:
|
|
448
|
+
logger.info(f"Resolved DOI for {entry.key}: {doi}")
|
|
449
|
+
return doi
|
|
450
|
+
|
|
451
|
+
logger.debug(f"Could not resolve DOI for {entry.key} via CrossRef")
|
|
452
|
+
return None
|
|
453
|
+
|
|
454
|
+
except requests.exceptions.Timeout:
|
|
455
|
+
logger.debug(f"CrossRef API timeout resolving DOI for {entry.key}")
|
|
456
|
+
return None
|
|
457
|
+
except requests.exceptions.ConnectionError:
|
|
458
|
+
logger.debug(f"CrossRef API connection error for {entry.key}")
|
|
459
|
+
return None
|
|
460
|
+
except Exception as e:
|
|
461
|
+
logger.debug(f"Error resolving DOI for {entry.key}: {e}")
|
|
462
|
+
return None
|
|
463
|
+
|
|
464
|
+
def _clean_title_for_search(self, title: str) -> str:
|
|
465
|
+
"""Clean title for CrossRef search by removing LaTeX commands.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
title: Raw title from BibTeX entry
|
|
469
|
+
|
|
470
|
+
Returns:
|
|
471
|
+
Cleaned title suitable for search
|
|
472
|
+
"""
|
|
473
|
+
import re
|
|
474
|
+
|
|
475
|
+
# Remove LaTeX commands
|
|
476
|
+
title = re.sub(r"\\[a-zA-Z]+\{([^}]*)\}", r"\1", title) # \textit{foo} -> foo
|
|
477
|
+
title = re.sub(r"\\[a-zA-Z]+", "", title) # \LaTeX -> LaTeX
|
|
478
|
+
|
|
479
|
+
# Remove braces
|
|
480
|
+
title = title.replace("{", "").replace("}", "")
|
|
481
|
+
|
|
482
|
+
# Remove special characters
|
|
483
|
+
title = re.sub(r"[^a-zA-Z0-9\s\-]", " ", title)
|
|
484
|
+
|
|
485
|
+
# Normalize whitespace
|
|
486
|
+
title = " ".join(title.split())
|
|
487
|
+
|
|
488
|
+
return title.strip()
|
|
489
|
+
|
|
335
490
|
def _get_metadata(self) -> Dict[str, Any]:
|
|
336
491
|
"""Extract metadata for title page.
|
|
337
492
|
|