rxiv-maker 1.17.0__py3-none-any.whl → 1.18.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rxiv_maker/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information."""
2
2
 
3
- __version__ = "1.17.0"
3
+ __version__ = "1.18.1"
@@ -30,8 +30,10 @@ class InitCommand(BaseCommand):
30
30
  self.engine = "local" # Only local engine is supported
31
31
 
32
32
  # Store manuscript path without PathManager validation since we're creating the directory
33
+ # NOTE: For init command, we should NOT use environment variable MANUSCRIPT_PATH
34
+ # as it's meant for finding existing manuscripts, not determining where to initialize
33
35
  if manuscript_path is None:
34
- manuscript_path = EnvironmentManager.get_manuscript_path() or "MANUSCRIPT"
36
+ manuscript_path = "MANUSCRIPT"
35
37
 
36
38
  # Store the raw path for use in execute_operation
37
39
  self.raw_manuscript_path = manuscript_path
@@ -8,6 +8,7 @@ import re
8
8
  from typing import Dict, List
9
9
 
10
10
  from ..converters.citation_processor import extract_citations_from_text
11
+ from ..utils.citation_range_formatter import format_citation_ranges
11
12
 
12
13
 
13
14
  class CitationMapper:
@@ -17,97 +18,15 @@ class CitationMapper:
17
18
  def _format_citation_ranges(text: str) -> str:
18
19
  """Format consecutive citations as ranges.
19
20
 
20
- Converts patterns like [1][2][3] to [1-3], [15][16] to [15-16], etc.
21
- Also formats comma-separated lists like [1, 2, 3] to [1-3].
21
+ Uses centralized citation range formatter from utils module.
22
22
 
23
23
  Args:
24
24
  text: Text with numbered citations
25
25
 
26
26
  Returns:
27
27
  Text with consecutive citations formatted as ranges
28
-
29
- Example:
30
- >>> CitationMapper._format_citation_ranges("text [1][2][3] more")
31
- 'text [1-3] more'
32
- >>> CitationMapper._format_citation_ranges("text [1, 2, 3] more")
33
- 'text [1-3] more'
34
- >>> CitationMapper._format_citation_ranges("text [1][3][4] more")
35
- 'text [1][3-4] more'
36
- """
37
-
38
- # Pattern 1: Handle adjacent bracketed citations [1][2][3] or [1] [2] [3]
39
- def combine_adjacent(match_obj):
40
- # Extract all numbers from consecutive brackets (allowing spaces between)
41
- numbers = [int(n) for n in re.findall(r"\[(\d+)\]", match_obj.group(0))]
42
- return CitationMapper._format_number_list(numbers)
43
-
44
- # Find sequences of adjacent bracketed numbers (with optional spaces between)
45
- text = re.sub(r"(?:\[\d+\]\s*){2,}", combine_adjacent, text)
46
-
47
- # Pattern 2: Handle comma-separated citations within single brackets [1, 2, 3]
48
- def combine_comma_separated(match_obj):
49
- # Extract all numbers from comma-separated list
50
- numbers_str = match_obj.group(1)
51
- numbers = [int(n.strip()) for n in numbers_str.split(",")]
52
- return CitationMapper._format_number_list(numbers)
53
-
54
- text = re.sub(r"\[([\d,\s]+)\]", combine_comma_separated, text)
55
-
56
- return text
57
-
58
- @staticmethod
59
- def _format_number_list(numbers: List[int]) -> str:
60
- """Format a list of citation numbers as ranges.
61
-
62
- Args:
63
- numbers: List of citation numbers
64
-
65
- Returns:
66
- Formatted string with ranges
67
-
68
- Example:
69
- >>> CitationMapper._format_number_list([1, 2, 3, 5, 6, 8])
70
- '[1-3, 5-6, 8]'
71
- >>> CitationMapper._format_number_list([15, 16])
72
- '[15-16]'
73
- >>> CitationMapper._format_number_list([1, 3, 5])
74
- '[1, 3, 5]'
75
28
  """
76
- if not numbers:
77
- return "[]"
78
-
79
- # Sort numbers
80
- sorted_nums = sorted(set(numbers))
81
-
82
- # Build ranges
83
- ranges = []
84
- start = sorted_nums[0]
85
- end = sorted_nums[0]
86
-
87
- for num in sorted_nums[1:]:
88
- if num == end + 1:
89
- # Continue current range
90
- end = num
91
- else:
92
- # End current range and start new one
93
- if start == end:
94
- # Single number
95
- ranges.append(str(start))
96
- else:
97
- # Range (including 2 consecutive numbers like 15-16)
98
- ranges.append(f"{start}-{end}")
99
- start = num
100
- end = num
101
-
102
- # Add final range
103
- if start == end:
104
- # Single number
105
- ranges.append(str(start))
106
- else:
107
- # Range (including 2 consecutive numbers like 15-16)
108
- ranges.append(f"{start}-{end}")
109
-
110
- return f"[{', '.join(ranges)}]"
29
+ return format_citation_ranges(text)
111
30
 
112
31
  def create_mapping(self, citations: List[str]) -> Dict[str, int]:
113
32
  """Create citation key → number mapping.
@@ -7,30 +7,12 @@ DOCX generation with python-docx.
7
7
  import re
8
8
  from typing import Any, Dict, List, Optional
9
9
 
10
+ from ..utils.comment_filter import is_metadata_comment
11
+
10
12
 
11
13
  class DocxContentProcessor:
12
14
  """Parses markdown content into structured format for DOCX writing."""
13
15
 
14
- @staticmethod
15
- def _is_metadata_comment(comment_text: str) -> bool:
16
- """Check if a comment is metadata/informational and should be skipped.
17
-
18
- Args:
19
- comment_text: The comment text to check
20
-
21
- Returns:
22
- True if comment should be skipped, False if it should be included
23
- """
24
- if not comment_text:
25
- return True
26
-
27
- # Normalize to lowercase for case-insensitive matching
28
- normalized = comment_text.lower().strip()
29
-
30
- # Skip comments that start with common metadata keywords
31
- metadata_prefixes = ["note:", "note ", "comment:", "comment "]
32
- return any(normalized.startswith(prefix) for prefix in metadata_prefixes)
33
-
34
16
  def parse(self, markdown: str, citation_map: Dict[str, int]) -> Dict[str, Any]:
35
17
  """Parse markdown into structured sections for DOCX.
36
18
 
@@ -83,7 +65,7 @@ class DocxContentProcessor:
83
65
  # Single-line comment
84
66
  comment_text = line.strip()[4:-3].strip()
85
67
  # Skip metadata comments (e.g., "note that...", "Comment: ...")
86
- if comment_text and not self._is_metadata_comment(comment_text):
68
+ if comment_text and not is_metadata_comment(comment_text):
87
69
  sections.append({"type": "comment", "text": comment_text})
88
70
  i += 1
89
71
  continue
@@ -104,7 +86,7 @@ class DocxContentProcessor:
104
86
  # Join and add comment
105
87
  comment_text = " ".join(comment_lines).strip()
106
88
  # Skip metadata comments (e.g., "note that...", "Comment: ...")
107
- if comment_text and not self._is_metadata_comment(comment_text):
89
+ if comment_text and not is_metadata_comment(comment_text):
108
90
  sections.append({"type": "comment", "text": comment_text})
109
91
  continue
110
92
 
@@ -445,7 +427,7 @@ class DocxContentProcessor:
445
427
  elif match.group(6): # Inline HTML comment
446
428
  comment_text = match.group(7).strip()
447
429
  # Skip metadata comments (e.g., "note that...", "Comment: ...")
448
- if comment_text and not self._is_metadata_comment(comment_text):
430
+ if comment_text and not is_metadata_comment(comment_text):
449
431
  runs.append({"type": "inline_comment", "text": comment_text})
450
432
  elif match.group(8): # Markdown link [text](url)
451
433
  runs.append(
@@ -54,6 +54,8 @@ class DocxExporter:
54
54
  docx_config = config.get("docx", {})
55
55
  self.hide_si = docx_config.get("hide_si", False) # Default to False (don't hide SI) for backwards compatibility
56
56
  self.figures_at_end = docx_config.get("figures_at_end", False) # Default to False (inline figures)
57
+ self.hide_highlighting = docx_config.get("hide_highlighting", False) # Default to False (show highlights)
58
+ self.hide_comments = docx_config.get("hide_comments", False) # Default to False (include comments)
57
59
 
58
60
  # Components
59
61
  self.citation_mapper = CitationMapper()
@@ -125,10 +127,13 @@ class DocxExporter:
125
127
  # Step 5.5: Replace figure and equation references with numbers
126
128
  import re
127
129
 
130
+ # Extract all labels using centralized utility
131
+ from ..utils.label_extractor import LabelExtractor
132
+
133
+ label_extractor = LabelExtractor()
134
+
128
135
  # Find all figures and create mapping
129
- # Allow hyphens and underscores in label names
130
- figure_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#fig:([\w-]+)", markdown_with_numbers)
131
- figure_map = {label: i + 1 for i, label in enumerate(figure_labels)}
136
+ figure_map = label_extractor.extract_figure_labels(markdown_with_numbers)
132
137
 
133
138
  # Replace @fig:label with "Fig. X" in text, handling optional panel letters
134
139
  # Pattern matches: @fig:label optionally followed by space and panel letter(s)
@@ -146,11 +151,9 @@ class DocxExporter:
146
151
  logger.debug(f"Mapped {len(figure_map)} figure labels to numbers")
147
152
 
148
153
  # Find all supplementary figures and create mapping
149
- # Allow hyphens and underscores in label names
150
154
  # IMPORTANT: When SI is excluded, extract from SI content (where figures are defined)
151
155
  content_to_scan_for_sfigs = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
152
- sfig_labels = re.findall(r"!\[[^\]]*\]\([^)]+\)\s*\n\s*\{#sfig:([\w-]+)", content_to_scan_for_sfigs)
153
- sfig_map = {label: i + 1 for i, label in enumerate(sfig_labels)}
156
+ sfig_map = label_extractor.extract_supplementary_figure_labels(content_to_scan_for_sfigs)
154
157
 
155
158
  # Replace @sfig:label with "Supp. Fig. X" in text, handling optional panel letters
156
159
  for label, num in sfig_map.items():
@@ -166,24 +169,9 @@ class DocxExporter:
166
169
 
167
170
  # Find all tables and create mapping (looking for {#stable:label} or \label{stable:label} tags)
168
171
  # IMPORTANT: PDF uses the order that tables are DEFINED in the document (order of \label{stable:X})
169
- # NOT the order of caption references (%{#stable:X}) which are just metadata
170
172
  # When SI is excluded from export, we still need to extract labels from SI
171
-
172
173
  content_to_scan_for_tables = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
173
-
174
- # Extract table labels in document order (both {#stable:label} markdown format and \label{stable:label} LaTeX format)
175
- # The PDF numbering follows the order these labels appear in the document
176
- markdown_labels = re.findall(r"\{#stable:([\w-]+)\}", content_to_scan_for_tables)
177
- latex_labels = re.findall(r"\\label\{stable:([\w-]+)\}", content_to_scan_for_tables)
178
-
179
- # Combine both formats, preferring LaTeX labels if present (since that's what PDF uses)
180
- table_labels = latex_labels if latex_labels else markdown_labels
181
-
182
- # Remove duplicates while preserving order
183
- seen = set()
184
- table_labels = [label for label in table_labels if not (label in seen or seen.add(label))]
185
-
186
- table_map = {label: i + 1 for i, label in enumerate(table_labels)}
174
+ table_map = label_extractor.extract_supplementary_table_labels(content_to_scan_for_tables)
187
175
  logger.debug(f"Mapped {len(table_map)} supplementary tables: {table_map}")
188
176
 
189
177
  # Replace @stable:label with "Supp. Table X" in text
@@ -193,11 +181,9 @@ class DocxExporter:
193
181
  )
194
182
 
195
183
  # Find all supplementary notes and create mapping (looking for {#snote:label} tags)
196
- # Allow hyphens and underscores in label names
197
184
  # IMPORTANT: When SI is excluded, extract from SI content (where notes are defined)
198
185
  content_to_scan_for_snotes = si_content_for_mapping if si_content_for_mapping else markdown_with_numbers
199
- snote_labels = re.findall(r"\{#snote:([\w-]+)\}", content_to_scan_for_snotes)
200
- snote_map = {label: i + 1 for i, label in enumerate(snote_labels)}
186
+ snote_map = label_extractor.extract_supplementary_note_labels(content_to_scan_for_snotes)
201
187
 
202
188
  # Replace @snote:label with "Supp. Note X" in text
203
189
  for label, num in snote_map.items():
@@ -208,9 +194,7 @@ class DocxExporter:
208
194
  logger.debug(f"Mapped {len(snote_map)} supplementary note labels to numbers")
209
195
 
210
196
  # Find all equations and create mapping (looking for {#eq:label} tags)
211
- # Allow hyphens and underscores in label names
212
- equation_labels = re.findall(r"\{#eq:([\w-]+)\}", markdown_with_numbers)
213
- equation_map = {label: i + 1 for i, label in enumerate(equation_labels)}
197
+ equation_map = label_extractor.extract_equation_labels(markdown_with_numbers)
214
198
 
215
199
  # Replace @eq:label with "Eq. X"
216
200
  # Handle both @eq:label and (@eq:label) formats
@@ -247,6 +231,8 @@ class DocxExporter:
247
231
  metadata=metadata,
248
232
  table_map=table_map,
249
233
  figures_at_end=self.figures_at_end,
234
+ hide_highlighting=self.hide_highlighting,
235
+ hide_comments=self.hide_comments,
250
236
  )
251
237
  logger.info(f"DOCX exported successfully: {docx_path}")
252
238