PyPI - rxiv-maker - Versions diffs - 1.16.8__py3-none-any.whl → 1.17.0__py3-none-any.whl - Mend

rxiv-maker 1.16.8py3-none-any.whl → 1.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

rxiv_maker/__version__.py +1 -1
rxiv_maker/cli/commands/build.py +7 -0
rxiv_maker/cli/framework/workflow_commands.py +66 -2
rxiv_maker/converters/citation_processor.py +5 -3
rxiv_maker/core/managers/config_manager.py +1 -0
rxiv_maker/exporters/docx_citation_mapper.py +99 -0
rxiv_maker/exporters/docx_content_processor.py +128 -30
rxiv_maker/exporters/docx_exporter.py +79 -21
rxiv_maker/exporters/docx_writer.py +189 -24
rxiv_maker/templates/registry.py +1 -0
rxiv_maker/tex/style/rxiv_maker_style.cls +33 -33
rxiv_maker/utils/docx_helpers.py +39 -1
rxiv_maker/utils/pdf_splitter.py +116 -0
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/METADATA +2 -1
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/RECORD +18 -17
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/WHEEL +0 -0
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/entry_points.txt +0 -0
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.17.0.dist-info}/licenses/LICENSE +0 -0

rxiv_maker/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information."""
-__version__ = "1.16.8"
+__version__ = "1.17.0"

rxiv_maker/cli/commands/build.py CHANGED Viewed

@@ -30,6 +30,7 @@ from ..framework import BuildCommand
 @click.option("--keep-output", is_flag=True, help="Preserve existing output directory (default: clear before build)")
 @click.option("--docx", is_flag=True, help="Also export to DOCX format for collaborative review")
 @click.option("--resolve-dois", "-r", is_flag=True, help="Attempt to resolve missing DOIs (when using --docx)")
+@click.option("--split-si", is_flag=True, help="Split PDF into main and SI sections (__main.pdf and __si.pdf)")
 @click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
 @click.option("--quiet", "-q", is_flag=True, help="Suppress non-essential output")
 @click.option("--debug", "-d", is_flag=True, help="Enable debug output")
@@ -49,6 +50,7 @@ def build(
     keep_output: bool,
     docx: bool,
     resolve_dois: bool,
+    split_si: bool,
     verbose: bool,
     quiet: bool,
     debug: bool,
@@ -81,6 +83,10 @@ def build(
         $ rxiv pdf --docx --resolve-dois
+    **Split PDF into main and SI sections:**
+        $ rxiv pdf --split-si
     **Force regenerate all figures:**
         $ rxiv pdf --force-figures
@@ -108,6 +114,7 @@ def build(
         keep_output=keep_output,
         docx=docx,
         resolve_dois=resolve_dois,
+        split_si=split_si,
         debug=debug or verbose,
         quiet=quiet,
         container_mode=container_mode,

rxiv_maker/cli/framework/workflow_commands.py CHANGED Viewed

@@ -142,6 +142,7 @@ class BuildCommand(BaseCommand):
         keep_output: bool = False,
         docx: bool = False,
         resolve_dois: bool = False,
+        split_si: bool = False,
         debug: bool = False,
         quiet: bool = False,
         container_mode: Optional[str] = None,
@@ -156,6 +157,7 @@ class BuildCommand(BaseCommand):
             keep_output: Preserve existing output directory
             docx: Also export to DOCX format
             resolve_dois: Attempt to resolve missing DOIs (for DOCX export)
+            split_si: Split PDF into main and SI sections
             debug: Enable debug output
             quiet: Suppress non-critical warnings
             container_mode: Container behavior mode
@@ -223,6 +225,10 @@ class BuildCommand(BaseCommand):
             if docx:
                 self._export_docx(resolve_dois=resolve_dois, quiet=quiet, debug=debug)
+            # Split PDF if requested
+            if split_si:
+                self._split_pdf(pdf_path, quiet=quiet, debug=debug)
             # Show helpful tips after successful build
             self._show_build_tips()
@@ -252,11 +258,69 @@ class BuildCommand(BaseCommand):
                 self.console.print(f"[green]✅ DOCX exported:[/green] {docx_path}")
         except Exception as e:
-            self.console.print(f"[yellow]⚠️  DOCX export failed:[/yellow] {e}", err=True)
+            self.console.print(f"[yellow]⚠️  DOCX export failed:[/yellow] {e}")
+            if debug:
+                import traceback
+                self.console.print(f"[dim]{traceback.format_exc()}[/dim]")
+    def _split_pdf(self, pdf_path: Path, quiet: bool = False, debug: bool = False) -> None:
+        """Split PDF into main and SI sections after successful PDF build.
+        Args:
+            pdf_path: Path to the generated PDF
+            quiet: Suppress non-essential output
+            debug: Enable debug output
+        """
+        try:
+            from ...processors.yaml_processor import extract_yaml_metadata
+            from ...utils.file_helpers import find_manuscript_md
+            from ...utils.pdf_splitter import split_pdf
+            from ...utils.pdf_utils import get_custom_pdf_filename
+            if not quiet:
+                self.console.print("\n[cyan]✂️  Splitting PDF into main and SI sections...[/cyan]")
+            # Split the PDF
+            main_path, si_path = split_pdf(pdf_path)
+            if main_path and si_path:
+                # Extract metadata to generate custom filename
+                manuscript_md = find_manuscript_md(str(self.path_manager.manuscript_path))
+                yaml_metadata = extract_yaml_metadata(str(manuscript_md))
+                # Get base filename (e.g., "2025__saraiva_et_al__rxiv.pdf")
+                base_filename = get_custom_pdf_filename(yaml_metadata)
+                base_name = base_filename.replace(".pdf", "")
+                # Generate final filenames with __main and __si suffixes
+                main_filename = f"{base_name}__main.pdf"
+                si_filename = f"{base_name}__si.pdf"
+                # Copy split files to manuscript directory
+                final_main_path = self.path_manager.manuscript_path / main_filename
+                final_si_path = self.path_manager.manuscript_path / si_filename
+                shutil.copy2(main_path, final_main_path)
+                shutil.copy2(si_path, final_si_path)
+                if not quiet:
+                    self.console.print("[green]✅ PDF split successfully:[/green]")
+                    self.console.print(f"   📄 Main: {final_main_path}")
+                    self.console.print(f"   📄 SI: {final_si_path}")
+            elif main_path is None and si_path is None:
+                if not quiet:
+                    self.console.print("[yellow]⚠️  Could not split PDF: SI section marker not found[/yellow]")
+            else:
+                if not quiet:
+                    self.console.print("[yellow]⚠️  PDF splitting partially failed[/yellow]")
+        except Exception as e:
+            self.console.print(f"[yellow]⚠️  PDF splitting failed:[/yellow] {e}")
             if debug:
                 import traceback
-                self.console.print(f"[dim]{traceback.format_exc()}[/dim]", err=True)
+                self.console.print(f"[dim]{traceback.format_exc()}[/dim]")
     def _show_build_tips(self) -> None:
         """Show helpful tips after successful PDF build."""

rxiv_maker/converters/citation_processor.py CHANGED Viewed

@@ -202,9 +202,11 @@ def extract_citations_from_text(text: MarkdownContent) -> list[CitationKey]:
         backtick_patterns.append(match.group(0))
         return f"__BACKTICK_PATTERN_{len(backtick_patterns) - 1}__"
-    # Match both single backticks `...` and triple backticks ```...```
-    text_cleaned = re.sub(r"`[^`]+`", protect_backticks, text)
-    text_cleaned = re.sub(r"```.*?```", protect_backticks, text_cleaned, flags=re.DOTALL)
+    # IMPORTANT: Match triple backticks FIRST, then single backticks
+    # This prevents the single-backtick pattern from matching across triple-backtick blocks
+    # (e.g., from a ` before ```latex to the first ` inside the code block)
+    text_cleaned = re.sub(r"```.*?```", protect_backticks, text, flags=re.DOTALL)
+    text_cleaned = re.sub(r"`[^`]+`", protect_backticks, text_cleaned)
     # Find bracketed multiple citations
     bracketed_matches = re.findall(r"\[(@[^]]+)\]", text_cleaned)

rxiv_maker/core/managers/config_manager.py CHANGED Viewed

@@ -343,6 +343,7 @@ class ConfigManager:
             "bibliography": {"file": "03_REFERENCES.bib", "style": "nature"},
             "citation_style": "numbered",
             "enable_inline_doi_resolution": False,
+            "docx": {"hide_si": False, "figures_at_end": False},
             "cache": {"enabled": True, "ttl_hours": 24},
             "version": "1.0",
             "acknowledge_rxiv_maker": True,

rxiv_maker/exporters/docx_citation_mapper.py CHANGED Viewed

@@ -13,6 +13,102 @@ from ..converters.citation_processor import extract_citations_from_text
 class CitationMapper:
     """Maps citation keys to sequential numbers for DOCX export."""
+    @staticmethod
+    def _format_citation_ranges(text: str) -> str:
+        """Format consecutive citations as ranges.
+        Converts patterns like [1][2][3] to [1-3], [15][16] to [15-16], etc.
+        Also formats comma-separated lists like [1, 2, 3] to [1-3].
+        Args:
+            text: Text with numbered citations
+        Returns:
+            Text with consecutive citations formatted as ranges
+        Example:
+            >>> CitationMapper._format_citation_ranges("text [1][2][3] more")
+            'text [1-3] more'
+            >>> CitationMapper._format_citation_ranges("text [1, 2, 3] more")
+            'text [1-3] more'
+            >>> CitationMapper._format_citation_ranges("text [1][3][4] more")
+            'text [1][3-4] more'
+        """
+        # Pattern 1: Handle adjacent bracketed citations [1][2][3] or [1] [2] [3]
+        def combine_adjacent(match_obj):
+            # Extract all numbers from consecutive brackets (allowing spaces between)
+            numbers = [int(n) for n in re.findall(r"\[(\d+)\]", match_obj.group(0))]
+            return CitationMapper._format_number_list(numbers)
+        # Find sequences of adjacent bracketed numbers (with optional spaces between)
+        text = re.sub(r"(?:\[\d+\]\s*){2,}", combine_adjacent, text)
+        # Pattern 2: Handle comma-separated citations within single brackets [1, 2, 3]
+        def combine_comma_separated(match_obj):
+            # Extract all numbers from comma-separated list
+            numbers_str = match_obj.group(1)
+            numbers = [int(n.strip()) for n in numbers_str.split(",")]
+            return CitationMapper._format_number_list(numbers)
+        text = re.sub(r"\[([\d,\s]+)\]", combine_comma_separated, text)
+        return text
+    @staticmethod
+    def _format_number_list(numbers: List[int]) -> str:
+        """Format a list of citation numbers as ranges.
+        Args:
+            numbers: List of citation numbers
+        Returns:
+            Formatted string with ranges
+        Example:
+            >>> CitationMapper._format_number_list([1, 2, 3, 5, 6, 8])
+            '[1-3, 5-6, 8]'
+            >>> CitationMapper._format_number_list([15, 16])
+            '[15-16]'
+            >>> CitationMapper._format_number_list([1, 3, 5])
+            '[1, 3, 5]'
+        """
+        if not numbers:
+            return "[]"
+        # Sort numbers
+        sorted_nums = sorted(set(numbers))
+        # Build ranges
+        ranges = []
+        start = sorted_nums[0]
+        end = sorted_nums[0]
+        for num in sorted_nums[1:]:
+            if num == end + 1:
+                # Continue current range
+                end = num
+            else:
+                # End current range and start new one
+                if start == end:
+                    # Single number
+                    ranges.append(str(start))
+                else:
+                    # Range (including 2 consecutive numbers like 15-16)
+                    ranges.append(f"{start}-{end}")
+                start = num
+                end = num
+        # Add final range
+        if start == end:
+            # Single number
+            ranges.append(str(start))
+        else:
+            # Range (including 2 consecutive numbers like 15-16)
+            ranges.append(f"{start}-{end}")
+        return f"[{', '.join(ranges)}]"
     def create_mapping(self, citations: List[str]) -> Dict[str, int]:
         """Create citation key → number mapping.
@@ -121,4 +217,7 @@ class CitationMapper:
         for i, pattern in enumerate(email_patterns):
             text = text.replace(f"__EMAIL_PATTERN_{i}__", pattern)
+        # Format consecutive citations as ranges (e.g., [1][2][3] -> [1-3])
+        text = self._format_citation_ranges(text)
         return text

rxiv_maker/exporters/docx_content_processor.py CHANGED Viewed

@@ -11,6 +11,26 @@ from typing import Any, Dict, List, Optional
 class DocxContentProcessor:
     """Parses markdown content into structured format for DOCX writing."""
+    @staticmethod
+    def _is_metadata_comment(comment_text: str) -> bool:
+        """Check if a comment is metadata/informational and should be skipped.
+        Args:
+            comment_text: The comment text to check
+        Returns:
+            True if comment should be skipped, False if it should be included
+        """
+        if not comment_text:
+            return True
+        # Normalize to lowercase for case-insensitive matching
+        normalized = comment_text.lower().strip()
+        # Skip comments that start with common metadata keywords
+        metadata_prefixes = ["note:", "note ", "comment:", "comment "]
+        return any(normalized.startswith(prefix) for prefix in metadata_prefixes)
     def parse(self, markdown: str, citation_map: Dict[str, int]) -> Dict[str, Any]:
         """Parse markdown into structured sections for DOCX.
@@ -55,10 +75,38 @@ class DocxContentProcessor:
                 i += 1
                 continue
-            # Skip HTML/markdown comments
+            # Parse HTML/markdown comments (single-line and multi-line)
+            # Skip informational/metadata comments (those starting with "Note:")
             if line.strip().startswith("<!--"):
-                i += 1
-                continue
+                # Check if it's a single-line comment
+                if line.strip().endswith("-->"):
+                    # Single-line comment
+                    comment_text = line.strip()[4:-3].strip()
+                    # Skip metadata comments (e.g., "note that...", "Comment: ...")
+                    if comment_text and not self._is_metadata_comment(comment_text):
+                        sections.append({"type": "comment", "text": comment_text})
+                    i += 1
+                    continue
+                else:
+                    # Multi-line comment - collect all lines until -->
+                    comment_lines = [line.strip()[4:]]  # Remove <!--
+                    i += 1
+                    while i < len(lines):
+                        if lines[i].strip().endswith("-->"):
+                            # Last line of comment
+                            comment_lines.append(lines[i].strip()[:-3])  # Remove -->
+                            i += 1
+                            break
+                        else:
+                            comment_lines.append(lines[i].strip())
+                            i += 1
+                    # Join and add comment
+                    comment_text = " ".join(comment_lines).strip()
+                    # Skip metadata comments (e.g., "note that...", "Comment: ...")
+                    if comment_text and not self._is_metadata_comment(comment_text):
+                        sections.append({"type": "comment", "text": comment_text})
+                    continue
             # Skip LaTeX commands like <clearpage>
             if line.strip().startswith("<") and line.strip().endswith(">") and " " not in line.strip():
@@ -335,18 +383,21 @@ class DocxContentProcessor:
         runs = []
         # Find all formatting markers, links, and citations
-        # Pattern to match: <<HIGHLIGHT_YELLOW>>text<</HIGHLIGHT_YELLOW>>, <<XREF>>text<</XREF>>, [text](url), **bold**, __underlined__, *italic*, _italic_, `code`, $math$, [number]
+        # Pattern to match: <<HIGHLIGHT_YELLOW>>text<</HIGHLIGHT_YELLOW>>, <<XREF:type>>text<</XREF>>, <!-- comment -->, [text](url), **bold**, __underlined__, *italic*, _italic_, ~subscript~, ^superscript^, `code`, $math$, [number]
         pattern = re.compile(
             r"(<<HIGHLIGHT_YELLOW>>([^<]+)<</HIGHLIGHT_YELLOW>>)"  # Yellow highlight (must be first)
-            r"|(<<XREF>>([^<]+)<</XREF>>)"  # Cross-reference
+            r"|(<<XREF:(\w+)>>([^<]+)<</XREF>>)"  # Cross-reference with type
+            r"|(<!--\s*(.+?)\s*-->)"  # HTML comments (inline)
             r"|(\[([^\]]+)\]\(([^)]+)\))"  # Markdown link [text](url) (before citations)
             r"|(\*\*([^*]+)\*\*)"  # Bold
             r"|(__([^_]+)__)"  # Underline with double underscores (must come before single underscore)
             r"|(\*([^*]+)\*)"  # Italic with asterisks
             r"|(_([^_]+)_)"  # Italic with underscores
+            r"|(~([^~]+)~)"  # Subscript
+            r"|(\^([^^]+)\^)"  # Superscript
             r"|(`([^`]+)`)"  # Code
             r"|(\$([^\$]+)\$)"  # Inline math
-            r"|(\[(\d+(?:,\s*\d+)*)\])"  # Citation numbers
+            r"|(\[(\d+(?:[-,]\s*\d+)*)\])"  # Citation numbers (supports both ranges [1-3] and lists [1, 2])
         )
         last_end = 0
@@ -378,67 +429,99 @@ class DocxContentProcessor:
                     if run["type"] == "text":
                         run["highlight_yellow"] = True
                     runs.append(run)
-            elif match.group(3):  # Cross-reference
+            elif match.group(3):  # Cross-reference with type
                 runs.append(
                     {
                         "type": "text",
-                        "text": match.group(4),
+                        "text": match.group(5),  # Text is now in group 5
                         "bold": False,
                         "italic": False,
                         "underline": False,
                         "code": False,
                         "xref": True,
+                        "xref_type": match.group(4),  # Type is in group 4
                     }
                 )
-            elif match.group(5):  # Markdown link [text](url)
+            elif match.group(6):  # Inline HTML comment
+                comment_text = match.group(7).strip()
+                # Skip metadata comments (e.g., "note that...", "Comment: ...")
+                if comment_text and not self._is_metadata_comment(comment_text):
+                    runs.append({"type": "inline_comment", "text": comment_text})
+            elif match.group(8):  # Markdown link [text](url)
                 runs.append(
                     {
                         "type": "hyperlink",
-                        "text": match.group(6),
-                        "url": match.group(7),
+                        "text": match.group(9),
+                        "url": match.group(10),
                     }
                 )
-            elif match.group(8):  # Bold
+            elif match.group(11):  # Bold
                 # Recursively parse inner text for underline/italic/other formatting
-                inner_text = match.group(9)
+                inner_text = match.group(12)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add bold to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["bold"] = True
                     runs.append(run)
-            elif match.group(10):  # Underline
+            elif match.group(13):  # Underline
                 # Recursively parse inner text for bold/italic/other formatting
-                inner_text = match.group(11)
+                inner_text = match.group(14)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add underline to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["underline"] = True
                     runs.append(run)
-            elif match.group(12):  # Italic with asterisks
+            elif match.group(15):  # Italic with asterisks
                 # Recursively parse inner text for bold/underline/other formatting
-                inner_text = match.group(13)
+                inner_text = match.group(16)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add italic to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["italic"] = True
                     runs.append(run)
-            elif match.group(14):  # Italic with underscores
+            elif match.group(17):  # Italic with underscores
                 # Recursively parse inner text for bold/underline/other formatting
-                inner_text = match.group(15)
+                inner_text = match.group(18)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add italic to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["italic"] = True
                     runs.append(run)
-            elif match.group(16):  # Code
+            elif match.group(19):  # Subscript
+                runs.append(
+                    {
+                        "type": "text",
+                        "text": match.group(20),
+                        "bold": False,
+                        "italic": False,
+                        "underline": False,
+                        "code": False,
+                        "xref": False,
+                        "subscript": True,
+                    }
+                )
+            elif match.group(21):  # Superscript
+                runs.append(
+                    {
+                        "type": "text",
+                        "text": match.group(22),
+                        "bold": False,
+                        "italic": False,
+                        "underline": False,
+                        "code": False,
+                        "xref": False,
+                        "superscript": True,
+                    }
+                )
+            elif match.group(23):  # Code
                 runs.append(
                     {
                         "type": "text",
-                        "text": match.group(17),
+                        "text": match.group(24),
                         "bold": False,
                         "italic": False,
                         "underline": False,
@@ -446,14 +529,23 @@ class DocxContentProcessor:
                         "xref": False,
                     }
                 )
-            elif match.group(18):  # Inline math
-                runs.append({"type": "inline_equation", "latex": match.group(19)})
-            elif match.group(20):  # Citation
-                # Parse citation numbers (may be multiple: [1, 2, 3])
-                numbers_str = match.group(21)
-                numbers = [int(n.strip()) for n in numbers_str.split(",")]
-                for num in numbers:
-                    runs.append({"type": "citation", "number": num})
+            elif match.group(25):  # Inline math
+                runs.append({"type": "inline_equation", "latex": match.group(26)})
+            elif match.group(27):  # Citation
+                # Keep citation as formatted text with yellow highlighting
+                # The citation mapper has already formatted ranges (e.g., [1-3], [1, 4-6, 8])
+                citation_text = match.group(0)  # Full match including brackets
+                runs.append(
+                    {
+                        "type": "text",
+                        "text": citation_text,
+                        "bold": False,
+                        "italic": False,
+                        "underline": False,
+                        "code": False,
+                        "highlight_yellow": True,  # Highlight citations in yellow
+                    }
+                )
             last_end = match.end()
@@ -516,6 +608,7 @@ class DocxContentProcessor:
         # Look ahead for caption line (skip empty lines)
         caption = ""
         label = ""
+        is_supplementary = False  # Default to main figure
         next_i = start_idx + 1
         # Skip empty lines to find caption
@@ -529,6 +622,9 @@ class DocxContentProcessor:
             # Check for {#fig:label ...} or {#sfig:label ...} **Caption**
             if next_line and (next_line.startswith("{#fig:") or next_line.startswith("{#sfig:")):
+                # Detect if it's a supplementary figure
+                is_supplementary = next_line.startswith("{#sfig:")
                 # Extract label if present
                 label_match = re.match(r"\{#s?fig:(\w+)[^}]*\}", next_line)
                 if label_match:
@@ -572,6 +668,7 @@ class DocxContentProcessor:
             "alt": alt_text,
             "caption": caption,
             "label": label,
+            "is_supplementary": is_supplementary,
         }, next_i
     def _parse_table(self, lines: List[str], start_idx: int) -> tuple[Optional[Dict[str, Any]], int]:
@@ -626,7 +723,8 @@ class DocxContentProcessor:
         if i < len(lines):
             caption_line = lines[i].strip()
             # Match {#stable:label} Caption or {#table:label} Caption
-            caption_match = re.match(r"^\{#(stable|table):(\w+)\}\s*(.+)$", caption_line)
+            # Allow hyphens and underscores in label names (e.g., "tool-comparison")
+            caption_match = re.match(r"^\{#(stable|table):([\w-]+)\}\s*(.+)$", caption_line)
             if caption_match:
                 label = f"{caption_match.group(1)}:{caption_match.group(2)}"
                 caption = caption_match.group(3).strip()

rxiv-maker 1.16.8__py3-none-any.whl → 1.17.0__py3-none-any.whl

rxiv-maker 1.16.8py3-none-any.whl → 1.17.0py3-none-any.whl