rxiv-maker 1.16.8__py3-none-any.whl → 1.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxiv_maker/__version__.py +1 -1
- rxiv_maker/cli/commands/build.py +7 -0
- rxiv_maker/cli/framework/workflow_commands.py +69 -3
- rxiv_maker/converters/citation_processor.py +5 -3
- rxiv_maker/core/managers/config_manager.py +1 -0
- rxiv_maker/exporters/docx_citation_mapper.py +18 -0
- rxiv_maker/exporters/docx_content_processor.py +110 -30
- rxiv_maker/exporters/docx_exporter.py +76 -32
- rxiv_maker/exporters/docx_writer.py +345 -67
- rxiv_maker/templates/registry.py +1 -0
- rxiv_maker/tex/style/rxiv_maker_style.cls +33 -33
- rxiv_maker/utils/accent_character_map.py +150 -0
- rxiv_maker/utils/author_affiliation_processor.py +128 -0
- rxiv_maker/utils/citation_range_formatter.py +118 -0
- rxiv_maker/utils/comment_filter.py +46 -0
- rxiv_maker/utils/docx_helpers.py +43 -118
- rxiv_maker/utils/label_extractor.py +185 -0
- rxiv_maker/utils/pdf_splitter.py +116 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/METADATA +2 -1
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/RECORD +23 -17
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/WHEEL +0 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/entry_points.txt +0 -0
- {rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -45,11 +45,18 @@ class DocxExporter:
|
|
|
45
45
|
self.resolve_dois = resolve_dois
|
|
46
46
|
self.include_footnotes = include_footnotes
|
|
47
47
|
|
|
48
|
-
# Load config to get author name format preference
|
|
48
|
+
# Load config to get author name format preference and DOCX options
|
|
49
49
|
config_manager = ConfigManager(base_dir=Path(manuscript_path))
|
|
50
50
|
config = config_manager.load_config()
|
|
51
51
|
self.author_format = config.get("bibliography_author_format", "lastname_firstname")
|
|
52
52
|
|
|
53
|
+
# DOCX export options
|
|
54
|
+
docx_config = config.get("docx", {})
|
|
55
|
+
self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
|
|
56
|
+
self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
|
|
57
|
+
self.hide_highlighting = docx_config.get("hide_highlighting", False) # Default to False (show highlights)
|
|
58
|
+
self.hide_comments = docx_config.get("hide_comments", False) # Default to False (include comments)
|
|
59
|
+
|
|
53
60
|
# Components
|
|
54
61
|
self.citation_mapper = CitationMapper()
|
|
55
62
|
self.content_processor = DocxContentProcessor()
|
|
@@ -98,6 +105,13 @@ class DocxExporter:
|
|
|
98
105
|
markdown_content = self._load_markdown()
|
|
99
106
|
logger.debug(f"Loaded {len(markdown_content)} characters of markdown")
|
|
100
107
|
|
|
108
|
+
# Step 2.5: If SI is hidden from export, still load it for label mapping
|
|
109
|
+
si_content_for_mapping = ""
|
|
110
|
+
if self.hide_si:
|
|
111
|
+
si_content_for_mapping = self._load_si_for_mapping()
|
|
112
|
+
if si_content_for_mapping:
|
|
113
|
+
logger.info("📋 Loaded SI content for label mapping (SI section hidden from export)")
|
|
114
|
+
|
|
101
115
|
# Step 3: Extract and map citations
|
|
102
116
|
citations = self.citation_mapper.extract_citations_from_markdown(markdown_content)
|
|
103
117
|
citation_map = self.citation_mapper.create_mapping(citations)
|
|
@@ -113,30 +127,33 @@ class DocxExporter:
|
|
|
113
127
|
# Step 5.5: Replace figure and equation references with numbers
|
|
114
128
|
import re
|
|
115
129
|
|
|
130
|
+
# Extract all labels using centralized utility
|
|
131
|
+
from ..utils.label_extractor import LabelExtractor
|
|
132
|
+
|
|
133
|
+
label_extractor = LabelExtractor()
|
|
134
|
+
|
|
116
135
|
# Find all figures and create mapping
|
|
117
|
-
|
|
118
|
-
figure_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#fig:([\w-]+)", markdown_with_numbers)
|
|
119
|
-
figure_map = {label: i + 1 for i, label in enumerate(figure_labels)}
|
|
136
|
+
figure_map = label_extractor.extract_figure_labels(markdown_with_numbers)
|
|
120
137
|
|
|
121
138
|
# Replace @fig:label with "Fig. X" in text, handling optional panel letters
|
|
122
139
|
# Pattern matches: @fig:label optionally followed by space and panel letter(s)
|
|
123
|
-
# Use special markers <<XREF>> to enable
|
|
140
|
+
# Use special markers <<XREF:type>> to enable color-coded highlighting in DOCX
|
|
124
141
|
for label, num in figure_map.items():
|
|
125
142
|
# Match @fig:label with optional panel letters like " a", " a,b", " a-c"
|
|
126
143
|
# Use negative lookahead (?![a-z]) to prevent matching start of words like " is", " and"
|
|
127
144
|
# Panel letters must be followed by non-letter (space, punctuation, end of string)
|
|
128
145
|
markdown_with_numbers = re.sub(
|
|
129
146
|
rf"@fig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
|
|
130
|
-
lambda m, num=num: f"<<XREF>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
147
|
+
lambda m, num=num: f"<<XREF:fig>>Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
131
148
|
markdown_with_numbers,
|
|
132
149
|
)
|
|
133
150
|
|
|
134
151
|
logger.debug(f"Mapped {len(figure_map)} figure labels to numbers")
|
|
135
152
|
|
|
136
153
|
# Find all supplementary figures and create mapping
|
|
137
|
-
#
|
|
138
|
-
|
|
139
|
-
sfig_map =
|
|
154
|
+
# IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
|
|
155
|
+
content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
156
|
+
sfig_map = label_extractor.extract_supplementary_figure_labels(content_to_scan_for_sfigs)
|
|
140
157
|
|
|
141
158
|
# Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
|
|
142
159
|
for label, num in sfig_map.items():
|
|
@@ -144,60 +161,57 @@ class DocxExporter:
|
|
|
144
161
|
# Negative lookahead prevents matching start of words
|
|
145
162
|
markdown_with_numbers = re.sub(
|
|
146
163
|
rf"@sfig:{label}\b(\s+[a-z](?:[,\-][a-z])*(?![a-z]))?",
|
|
147
|
-
lambda m, num=num: f"<<XREF>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
164
|
+
lambda m, num=num: f"<<XREF:sfig>>Supp. Fig. {num}{m.group(1) if m.group(1) else ''}<</XREF>>",
|
|
148
165
|
markdown_with_numbers,
|
|
149
166
|
)
|
|
150
167
|
|
|
151
168
|
logger.debug(f"Mapped {len(sfig_map)} supplementary figure labels to numbers")
|
|
152
169
|
|
|
153
|
-
# Find all tables and create mapping (looking for {#stable:label} tags)
|
|
154
|
-
#
|
|
155
|
-
|
|
156
|
-
|
|
170
|
+
# Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
|
|
171
|
+
# IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
|
|
172
|
+
# When SI is excluded from export, we still need to extract labels from SI
|
|
173
|
+
content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
174
|
+
table_map = label_extractor.extract_supplementary_table_labels(content_to_scan_for_tables)
|
|
175
|
+
logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
|
|
157
176
|
|
|
158
177
|
# Replace @stable:label with "Supp. Table X" in text
|
|
159
178
|
for label, num in table_map.items():
|
|
160
179
|
markdown_with_numbers = re.sub(
|
|
161
|
-
rf"@stable:{label}\b", f"<<XREF>>Supp. Table {num}<</XREF>>", markdown_with_numbers
|
|
180
|
+
rf"@stable:{label}\b", f"<<XREF:stable>>Supp. Table {num}<</XREF>>", markdown_with_numbers
|
|
162
181
|
)
|
|
163
182
|
|
|
164
|
-
logger.debug(f"Mapped {len(table_map)} supplementary table labels to numbers")
|
|
165
|
-
|
|
166
183
|
# Find all supplementary notes and create mapping (looking for {#snote:label} tags)
|
|
167
|
-
#
|
|
168
|
-
|
|
169
|
-
snote_map =
|
|
184
|
+
# IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
|
|
185
|
+
content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
186
|
+
snote_map = label_extractor.extract_supplementary_note_labels(content_to_scan_for_snotes)
|
|
170
187
|
|
|
171
188
|
# Replace @snote:label with "Supp. Note X" in text
|
|
172
189
|
for label, num in snote_map.items():
|
|
173
190
|
markdown_with_numbers = re.sub(
|
|
174
|
-
rf"@snote:{label}\b", f"<<XREF>>Supp. Note {num}<</XREF>>", markdown_with_numbers
|
|
191
|
+
rf"@snote:{label}\b", f"<<XREF:snote>>Supp. Note {num}<</XREF>>", markdown_with_numbers
|
|
175
192
|
)
|
|
176
193
|
|
|
177
194
|
logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
|
|
178
195
|
|
|
179
196
|
# Find all equations and create mapping (looking for {#eq:label} tags)
|
|
180
|
-
|
|
181
|
-
equation_labels = re.findall(r"\{#eq:([\w-]+)\}", markdown_with_numbers)
|
|
182
|
-
equation_map = {label: i + 1 for i, label in enumerate(equation_labels)}
|
|
197
|
+
equation_map = label_extractor.extract_equation_labels(markdown_with_numbers)
|
|
183
198
|
|
|
184
199
|
# Replace @eq:label with "Eq. X"
|
|
185
200
|
# Handle both @eq:label and (@eq:label) formats
|
|
186
201
|
for label, num in equation_map.items():
|
|
187
202
|
# Replace (@eq:label) with (Eq. X)
|
|
188
203
|
markdown_with_numbers = re.sub(
|
|
189
|
-
rf"\(@eq:{label}\b\)", f"(<<XREF>>Eq. {num}<</XREF>>)", markdown_with_numbers
|
|
204
|
+
rf"\(@eq:{label}\b\)", f"(<<XREF:eq>>Eq. {num}<</XREF>>)", markdown_with_numbers
|
|
190
205
|
)
|
|
191
206
|
# Replace @eq:label with Eq. X
|
|
192
|
-
markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF>>Eq. {num}<</XREF>>", markdown_with_numbers)
|
|
207
|
+
markdown_with_numbers = re.sub(rf"@eq:{label}\b", f"<<XREF:eq>>Eq. {num}<</XREF>>", markdown_with_numbers)
|
|
193
208
|
|
|
194
209
|
logger.debug(f"Mapped {len(equation_map)} equation labels to numbers")
|
|
195
210
|
|
|
196
211
|
# Step 5.6: Remove label markers now that mapping is complete
|
|
197
212
|
# These metadata markers should not appear in the final output
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
)
|
|
213
|
+
# NOTE: Keep fig/sfig/stable/table labels - they're needed by content processor and removed during caption parsing
|
|
214
|
+
markdown_with_numbers = re.sub(r"^\{#(?:snote|eq):[^}]+\}\s*", "", markdown_with_numbers, flags=re.MULTILINE)
|
|
201
215
|
|
|
202
216
|
# Step 6: Convert content to DOCX structure
|
|
203
217
|
doc_structure = self.content_processor.parse(markdown_with_numbers, citation_map)
|
|
@@ -215,6 +229,10 @@ class DocxExporter:
|
|
|
215
229
|
include_footnotes=self.include_footnotes,
|
|
216
230
|
base_path=self.path_manager.manuscript_path,
|
|
217
231
|
metadata=metadata,
|
|
232
|
+
table_map=table_map,
|
|
233
|
+
figures_at_end=self.figures_at_end,
|
|
234
|
+
hide_highlighting=self.hide_highlighting,
|
|
235
|
+
hide_comments=self.hide_comments,
|
|
218
236
|
)
|
|
219
237
|
logger.info(f"DOCX exported successfully: {docx_path}")
|
|
220
238
|
|
|
@@ -265,9 +283,9 @@ class DocxExporter:
|
|
|
265
283
|
|
|
266
284
|
content.append(main_content)
|
|
267
285
|
|
|
268
|
-
# Load 02_SUPPLEMENTARY_INFO.md if exists
|
|
286
|
+
# Load 02_SUPPLEMENTARY_INFO.md if exists and not configured to hide SI
|
|
269
287
|
supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
|
|
270
|
-
if supp_md.exists():
|
|
288
|
+
if supp_md.exists() and not self.hide_si:
|
|
271
289
|
logger.info("Including supplementary information")
|
|
272
290
|
supp_content = supp_md.read_text(encoding="utf-8")
|
|
273
291
|
supp_content = remove_yaml_header(supp_content)
|
|
@@ -281,11 +299,36 @@ class DocxExporter:
|
|
|
281
299
|
content.append("<!-- PAGE_BREAK -->")
|
|
282
300
|
content.append("# Supplementary Information")
|
|
283
301
|
content.append(supp_content)
|
|
302
|
+
elif supp_md.exists() and self.hide_si:
|
|
303
|
+
logger.info("Supplementary information exists but hidden per config (docx.hide_si: true)")
|
|
284
304
|
else:
|
|
285
305
|
logger.debug("No supplementary information file found")
|
|
286
306
|
|
|
287
307
|
return "\n\n".join(content)
|
|
288
308
|
|
|
309
|
+
def _load_si_for_mapping(self) -> str:
|
|
310
|
+
r"""Load SI content for label mapping without including in export.
|
|
311
|
+
|
|
312
|
+
This method is used when hide_si is True but we still need to extract
|
|
313
|
+
SI labels (stable, sfig, snote) for cross-references in the main text.
|
|
314
|
+
|
|
315
|
+
IMPORTANT: We return RAW content (before preprocessing) because we need to
|
|
316
|
+
extract LaTeX labels (\label{stable:X}) which determine the PDF numbering order.
|
|
317
|
+
The preprocessor strips out {{tex: blocks, losing this ordering information.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
SI content as string (raw, before preprocessing), or empty string if SI doesn't exist
|
|
321
|
+
"""
|
|
322
|
+
supp_md = self.path_manager.manuscript_path / "02_SUPPLEMENTARY_INFO.md"
|
|
323
|
+
if not supp_md.exists():
|
|
324
|
+
return ""
|
|
325
|
+
|
|
326
|
+
# Load RAW SI content (don't preprocess - we need LaTeX labels for ordering)
|
|
327
|
+
supp_content = supp_md.read_text(encoding="utf-8")
|
|
328
|
+
supp_content = remove_yaml_header(supp_content)
|
|
329
|
+
|
|
330
|
+
return supp_content
|
|
331
|
+
|
|
289
332
|
def _build_bibliography(self, citation_map: Dict[str, int]) -> Dict[int, Dict]:
|
|
290
333
|
"""Build bibliography with optional DOI resolution.
|
|
291
334
|
|
|
@@ -327,7 +370,8 @@ class DocxExporter:
|
|
|
327
370
|
logger.info(f"Resolved DOI for {key}: {doi}")
|
|
328
371
|
|
|
329
372
|
# Format entry (full format for DOCX bibliography)
|
|
330
|
-
formatted
|
|
373
|
+
# Don't include DOI in formatted text - it will be added separately as a hyperlink by the writer
|
|
374
|
+
formatted = format_bibliography_entry(entry, doi=None, slim=False, author_format=self.author_format)
|
|
331
375
|
|
|
332
376
|
bibliography[number] = {"key": key, "entry": entry, "doi": doi, "formatted": formatted}
|
|
333
377
|
|