rxiv-maker 1.17.0__py3-none-any.whl → 1.18.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rxiv_maker/__version__.py +1 -1
- rxiv_maker/cli/framework/workflow_commands.py +3 -1
- rxiv_maker/exporters/docx_citation_mapper.py +3 -84
- rxiv_maker/exporters/docx_content_processor.py +5 -23
- rxiv_maker/exporters/docx_exporter.py +14 -28
- rxiv_maker/exporters/docx_writer.py +201 -75
- rxiv_maker/processors/template_processor.py +10 -0
- rxiv_maker/templates/registry.py +52 -12
- rxiv_maker/tex/template.tex +2 -0
- rxiv_maker/utils/accent_character_map.py +150 -0
- rxiv_maker/utils/author_affiliation_processor.py +128 -0
- rxiv_maker/utils/citation_range_formatter.py +118 -0
- rxiv_maker/utils/comment_filter.py +46 -0
- rxiv_maker/utils/docx_helpers.py +4 -117
- rxiv_maker/utils/label_extractor.py +185 -0
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.1.dist-info}/METADATA +1 -1
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.1.dist-info}/RECORD +20 -15
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.1.dist-info}/WHEEL +0 -0
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.1.dist-info}/entry_points.txt +0 -0
- {rxiv_maker-1.17.0.dist-info → rxiv_maker-1.18.1.dist-info}/licenses/LICENSE +0 -0
rxiv_maker/__version__.py
CHANGED
|
@@ -30,8 +30,10 @@ class InitCommand(BaseCommand):
|
|
|
30
30
|
self.engine = "local" # Only local engine is supported
|
|
31
31
|
|
|
32
32
|
# Store manuscript path without PathManager validation since we're creating the directory
|
|
33
|
+
# NOTE: For init command, we should NOT use environment variable MANUSCRIPT_PATH
|
|
34
|
+
# as it's meant for finding existing manuscripts, not determining where to initialize
|
|
33
35
|
if manuscript_path is None:
|
|
34
|
-
manuscript_path =
|
|
36
|
+
manuscript_path = "MANUSCRIPT"
|
|
35
37
|
|
|
36
38
|
# Store the raw path for use in execute_operation
|
|
37
39
|
self.raw_manuscript_path = manuscript_path
|
|
@@ -8,6 +8,7 @@ import re
|
|
|
8
8
|
from typing import Dict, List
|
|
9
9
|
|
|
10
10
|
from ..converters.citation_processor import extract_citations_from_text
|
|
11
|
+
from ..utils.citation_range_formatter import format_citation_ranges
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class CitationMapper:
|
|
@@ -17,97 +18,15 @@ class CitationMapper:
|
|
|
17
18
|
def _format_citation_ranges(text: str) -> str:
|
|
18
19
|
"""Format consecutive citations as ranges.
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
Also formats comma-separated lists like [1, 2, 3] to [1-3].
|
|
21
|
+
Uses centralized citation range formatter from utils module.
|
|
22
22
|
|
|
23
23
|
Args:
|
|
24
24
|
text: Text with numbered citations
|
|
25
25
|
|
|
26
26
|
Returns:
|
|
27
27
|
Text with consecutive citations formatted as ranges
|
|
28
|
-
|
|
29
|
-
Example:
|
|
30
|
-
>>> CitationMapper._format_citation_ranges("text [1][2][3] more")
|
|
31
|
-
'text [1-3] more'
|
|
32
|
-
>>> CitationMapper._format_citation_ranges("text [1, 2, 3] more")
|
|
33
|
-
'text [1-3] more'
|
|
34
|
-
>>> CitationMapper._format_citation_ranges("text [1][3][4] more")
|
|
35
|
-
'text [1][3-4] more'
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
# Pattern 1: Handle adjacent bracketed citations [1][2][3] or [1] [2] [3]
|
|
39
|
-
def combine_adjacent(match_obj):
|
|
40
|
-
# Extract all numbers from consecutive brackets (allowing spaces between)
|
|
41
|
-
numbers = [int(n) for n in re.findall(r"\[(\d+)\]", match_obj.group(0))]
|
|
42
|
-
return CitationMapper._format_number_list(numbers)
|
|
43
|
-
|
|
44
|
-
# Find sequences of adjacent bracketed numbers (with optional spaces between)
|
|
45
|
-
text = re.sub(r"(?:\[\d+\]\s*){2,}", combine_adjacent, text)
|
|
46
|
-
|
|
47
|
-
# Pattern 2: Handle comma-separated citations within single brackets [1, 2, 3]
|
|
48
|
-
def combine_comma_separated(match_obj):
|
|
49
|
-
# Extract all numbers from comma-separated list
|
|
50
|
-
numbers_str = match_obj.group(1)
|
|
51
|
-
numbers = [int(n.strip()) for n in numbers_str.split(",")]
|
|
52
|
-
return CitationMapper._format_number_list(numbers)
|
|
53
|
-
|
|
54
|
-
text = re.sub(r"\[([\d,\s]+)\]", combine_comma_separated, text)
|
|
55
|
-
|
|
56
|
-
return text
|
|
57
|
-
|
|
58
|
-
@staticmethod
|
|
59
|
-
def _format_number_list(numbers: List[int]) -> str:
|
|
60
|
-
"""Format a list of citation numbers as ranges.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
numbers: List of citation numbers
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
Formatted string with ranges
|
|
67
|
-
|
|
68
|
-
Example:
|
|
69
|
-
>>> CitationMapper._format_number_list([1, 2, 3, 5, 6, 8])
|
|
70
|
-
'[1-3, 5-6, 8]'
|
|
71
|
-
>>> CitationMapper._format_number_list([15, 16])
|
|
72
|
-
'[15-16]'
|
|
73
|
-
>>> CitationMapper._format_number_list([1, 3, 5])
|
|
74
|
-
'[1, 3, 5]'
|
|
75
28
|
"""
|
|
76
|
-
|
|
77
|
-
return "[]"
|
|
78
|
-
|
|
79
|
-
# Sort numbers
|
|
80
|
-
sorted_nums = sorted(set(numbers))
|
|
81
|
-
|
|
82
|
-
# Build ranges
|
|
83
|
-
ranges = []
|
|
84
|
-
start = sorted_nums[0]
|
|
85
|
-
end = sorted_nums[0]
|
|
86
|
-
|
|
87
|
-
for num in sorted_nums[1:]:
|
|
88
|
-
if num == end + 1:
|
|
89
|
-
# Continue current range
|
|
90
|
-
end = num
|
|
91
|
-
else:
|
|
92
|
-
# End current range and start new one
|
|
93
|
-
if start == end:
|
|
94
|
-
# Single number
|
|
95
|
-
ranges.append(str(start))
|
|
96
|
-
else:
|
|
97
|
-
# Range (including 2 consecutive numbers like 15-16)
|
|
98
|
-
ranges.append(f"{start}-{end}")
|
|
99
|
-
start = num
|
|
100
|
-
end = num
|
|
101
|
-
|
|
102
|
-
# Add final range
|
|
103
|
-
if start == end:
|
|
104
|
-
# Single number
|
|
105
|
-
ranges.append(str(start))
|
|
106
|
-
else:
|
|
107
|
-
# Range (including 2 consecutive numbers like 15-16)
|
|
108
|
-
ranges.append(f"{start}-{end}")
|
|
109
|
-
|
|
110
|
-
return f"[{', '.join(ranges)}]"
|
|
29
|
+
return format_citation_ranges(text)
|
|
111
30
|
|
|
112
31
|
def create_mapping(self, citations: List[str]) -> Dict[str, int]:
|
|
113
32
|
"""Create citation key → number mapping.
|
|
@@ -7,30 +7,12 @@ DOCX generation with python-docx.
|
|
|
7
7
|
import re
|
|
8
8
|
from typing import Any, Dict, List, Optional
|
|
9
9
|
|
|
10
|
+
from ..utils.comment_filter import is_metadata_comment
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
class DocxContentProcessor:
|
|
12
14
|
"""Parses markdown content into structured format for DOCX writing."""
|
|
13
15
|
|
|
14
|
-
@staticmethod
|
|
15
|
-
def _is_metadata_comment(comment_text: str) -> bool:
|
|
16
|
-
"""Check if a comment is metadata/informational and should be skipped.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
comment_text: The comment text to check
|
|
20
|
-
|
|
21
|
-
Returns:
|
|
22
|
-
True if comment should be skipped, False if it should be included
|
|
23
|
-
"""
|
|
24
|
-
if not comment_text:
|
|
25
|
-
return True
|
|
26
|
-
|
|
27
|
-
# Normalize to lowercase for case-insensitive matching
|
|
28
|
-
normalized = comment_text.lower().strip()
|
|
29
|
-
|
|
30
|
-
# Skip comments that start with common metadata keywords
|
|
31
|
-
metadata_prefixes = ["note:", "note ", "comment:", "comment "]
|
|
32
|
-
return any(normalized.startswith(prefix) for prefix in metadata_prefixes)
|
|
33
|
-
|
|
34
16
|
def parse(self, markdown: str, citation_map: Dict[str, int]) -> Dict[str, Any]:
|
|
35
17
|
"""Parse markdown into structured sections for DOCX.
|
|
36
18
|
|
|
@@ -83,7 +65,7 @@ class DocxContentProcessor:
|
|
|
83
65
|
# Single-line comment
|
|
84
66
|
comment_text = line.strip()[4:-3].strip()
|
|
85
67
|
# Skip metadata comments (e.g., "note that...", "Comment: ...")
|
|
86
|
-
if comment_text and not
|
|
68
|
+
if comment_text and not is_metadata_comment(comment_text):
|
|
87
69
|
sections.append({"type": "comment", "text": comment_text})
|
|
88
70
|
i += 1
|
|
89
71
|
continue
|
|
@@ -104,7 +86,7 @@ class DocxContentProcessor:
|
|
|
104
86
|
# Join and add comment
|
|
105
87
|
comment_text = " ".join(comment_lines).strip()
|
|
106
88
|
# Skip metadata comments (e.g., "note that...", "Comment: ...")
|
|
107
|
-
if comment_text and not
|
|
89
|
+
if comment_text and not is_metadata_comment(comment_text):
|
|
108
90
|
sections.append({"type": "comment", "text": comment_text})
|
|
109
91
|
continue
|
|
110
92
|
|
|
@@ -445,7 +427,7 @@ class DocxContentProcessor:
|
|
|
445
427
|
elif match.group(6): # Inline HTML comment
|
|
446
428
|
comment_text = match.group(7).strip()
|
|
447
429
|
# Skip metadata comments (e.g., "note that...", "Comment: ...")
|
|
448
|
-
if comment_text and not
|
|
430
|
+
if comment_text and not is_metadata_comment(comment_text):
|
|
449
431
|
runs.append({"type": "inline_comment", "text": comment_text})
|
|
450
432
|
elif match.group(8): # Markdown link [text](url)
|
|
451
433
|
runs.append(
|
|
@@ -54,6 +54,8 @@ class DocxExporter:
|
|
|
54
54
|
docx_config = config.get("docx", {})
|
|
55
55
|
self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
|
|
56
56
|
self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
|
|
57
|
+
self.hide_highlighting = docx_config.get("hide_highlighting", False) # Default to False (show highlights)
|
|
58
|
+
self.hide_comments = docx_config.get("hide_comments", False) # Default to False (include comments)
|
|
57
59
|
|
|
58
60
|
# Components
|
|
59
61
|
self.citation_mapper = CitationMapper()
|
|
@@ -125,10 +127,13 @@ class DocxExporter:
|
|
|
125
127
|
# Step 5.5: Replace figure and equation references with numbers
|
|
126
128
|
import re
|
|
127
129
|
|
|
130
|
+
# Extract all labels using centralized utility
|
|
131
|
+
from ..utils.label_extractor import LabelExtractor
|
|
132
|
+
|
|
133
|
+
label_extractor = LabelExtractor()
|
|
134
|
+
|
|
128
135
|
# Find all figures and create mapping
|
|
129
|
-
|
|
130
|
-
figure_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#fig:([\w-]+)", markdown_with_numbers)
|
|
131
|
-
figure_map = {label: i + 1 for i, label in enumerate(figure_labels)}
|
|
136
|
+
figure_map = label_extractor.extract_figure_labels(markdown_with_numbers)
|
|
132
137
|
|
|
133
138
|
# Replace @fig:label with "Fig. X" in text, handling optional panel letters
|
|
134
139
|
# Pattern matches: @fig:label optionally followed by space and panel letter(s)
|
|
@@ -146,11 +151,9 @@ class DocxExporter:
|
|
|
146
151
|
logger.debug(f"Mapped {len(figure_map)} figure labels to numbers")
|
|
147
152
|
|
|
148
153
|
# Find all supplementary figures and create mapping
|
|
149
|
-
# Allow hyphens and underscores in label names
|
|
150
154
|
# IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
|
|
151
155
|
content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
152
|
-
|
|
153
|
-
sfig_map = {label: i + 1 for i, label in enumerate(sfig_labels)}
|
|
156
|
+
sfig_map = label_extractor.extract_supplementary_figure_labels(content_to_scan_for_sfigs)
|
|
154
157
|
|
|
155
158
|
# Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
|
|
156
159
|
for label, num in sfig_map.items():
|
|
@@ -166,24 +169,9 @@ class DocxExporter:
|
|
|
166
169
|
|
|
167
170
|
# Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
|
|
168
171
|
# IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
|
|
169
|
-
# NOT the order of caption references (%{#stable:X}) which are just metadata
|
|
170
172
|
# When SI is excluded from export, we still need to extract labels from SI
|
|
171
|
-
|
|
172
173
|
content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
173
|
-
|
|
174
|
-
# Extract table labels in document order (both {#stable:label} markdown format and \label{stable:label} LaTeX format)
|
|
175
|
-
# The PDF numbering follows the order these labels appear in the document
|
|
176
|
-
markdown_labels = re.findall(r"\{#stable:([\w-]+)\}", content_to_scan_for_tables)
|
|
177
|
-
latex_labels = re.findall(r"\\label\{stable:([\w-]+)\}", content_to_scan_for_tables)
|
|
178
|
-
|
|
179
|
-
# Combine both formats, preferring LaTeX labels if present (since that's what PDF uses)
|
|
180
|
-
table_labels = latex_labels if latex_labels else markdown_labels
|
|
181
|
-
|
|
182
|
-
# Remove duplicates while preserving order
|
|
183
|
-
seen = set()
|
|
184
|
-
table_labels = [label for label in table_labels if not (label in seen or seen.add(label))]
|
|
185
|
-
|
|
186
|
-
table_map = {label: i + 1 for i, label in enumerate(table_labels)}
|
|
174
|
+
table_map = label_extractor.extract_supplementary_table_labels(content_to_scan_for_tables)
|
|
187
175
|
logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
|
|
188
176
|
|
|
189
177
|
# Replace @stable:label with "Supp. Table X" in text
|
|
@@ -193,11 +181,9 @@ class DocxExporter:
|
|
|
193
181
|
)
|
|
194
182
|
|
|
195
183
|
# Find all supplementary notes and create mapping (looking for {#snote:label} tags)
|
|
196
|
-
# Allow hyphens and underscores in label names
|
|
197
184
|
# IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
|
|
198
185
|
content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
|
|
199
|
-
|
|
200
|
-
snote_map = {label: i + 1 for i, label in enumerate(snote_labels)}
|
|
186
|
+
snote_map = label_extractor.extract_supplementary_note_labels(content_to_scan_for_snotes)
|
|
201
187
|
|
|
202
188
|
# Replace @snote:label with "Supp. Note X" in text
|
|
203
189
|
for label, num in snote_map.items():
|
|
@@ -208,9 +194,7 @@ class DocxExporter:
|
|
|
208
194
|
logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
|
|
209
195
|
|
|
210
196
|
# Find all equations and create mapping (looking for {#eq:label} tags)
|
|
211
|
-
|
|
212
|
-
equation_labels = re.findall(r"\{#eq:([\w-]+)\}", markdown_with_numbers)
|
|
213
|
-
equation_map = {label: i + 1 for i, label in enumerate(equation_labels)}
|
|
197
|
+
equation_map = label_extractor.extract_equation_labels(markdown_with_numbers)
|
|
214
198
|
|
|
215
199
|
# Replace @eq:label with "Eq. X"
|
|
216
200
|
# Handle both @eq:label and (@eq:label) formats
|
|
@@ -247,6 +231,8 @@ class DocxExporter:
|
|
|
247
231
|
metadata=metadata,
|
|
248
232
|
table_map=table_map,
|
|
249
233
|
figures_at_end=self.figures_at_end,
|
|
234
|
+
hide_highlighting=self.hide_highlighting,
|
|
235
|
+
hide_comments=self.hide_comments,
|
|
250
236
|
)
|
|
251
237
|
logger.info(f"DOCX exported successfully: {docx_path}")
|
|
252
238
|
|