PyPI - rxiv-maker - Versions diffs - 1.16.8__py3-none-any.whl → 1.18.0__py3-none-any.whl - Mend

rxiv-maker 1.16.8py3-none-any.whl → 1.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

rxiv_maker/__version__.py +1 -1
rxiv_maker/cli/commands/build.py +7 -0
rxiv_maker/cli/framework/workflow_commands.py +69 -3
rxiv_maker/converters/citation_processor.py +5 -3
rxiv_maker/core/managers/config_manager.py +1 -0
rxiv_maker/exporters/docx_citation_mapper.py +18 -0
rxiv_maker/exporters/docx_content_processor.py +110 -30
rxiv_maker/exporters/docx_exporter.py +76 -32
rxiv_maker/exporters/docx_writer.py +345 -67
rxiv_maker/templates/registry.py +1 -0
rxiv_maker/tex/style/rxiv_maker_style.cls +33 -33
rxiv_maker/utils/accent_character_map.py +150 -0
rxiv_maker/utils/author_affiliation_processor.py +128 -0
rxiv_maker/utils/citation_range_formatter.py +118 -0
rxiv_maker/utils/comment_filter.py +46 -0
rxiv_maker/utils/docx_helpers.py +43 -118
rxiv_maker/utils/label_extractor.py +185 -0
rxiv_maker/utils/pdf_splitter.py +116 -0
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/METADATA +2 -1
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/RECORD +23 -17
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/WHEEL +0 -0
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/entry_points.txt +0 -0
{rxiv_maker-1.16.8.dist-info → rxiv_maker-1.18.0.dist-info}/licenses/LICENSE +0 -0

rxiv_maker/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information."""
-__version__ = "1.16.8"
+__version__ = "1.18.0"

rxiv_maker/cli/commands/build.py CHANGED Viewed

@@ -30,6 +30,7 @@ from ..framework import BuildCommand
 @click.option("--keep-output", is_flag=True, help="Preserve existing output directory (default: clear before build)")
 @click.option("--docx", is_flag=True, help="Also export to DOCX format for collaborative review")
 @click.option("--resolve-dois", "-r", is_flag=True, help="Attempt to resolve missing DOIs (when using --docx)")
+@click.option("--split-si", is_flag=True, help="Split PDF into main and SI sections (__main.pdf and __si.pdf)")
 @click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
 @click.option("--quiet", "-q", is_flag=True, help="Suppress non-essential output")
 @click.option("--debug", "-d", is_flag=True, help="Enable debug output")
@@ -49,6 +50,7 @@ def build(
     keep_output: bool,
     docx: bool,
     resolve_dois: bool,
+    split_si: bool,
     verbose: bool,
     quiet: bool,
     debug: bool,
@@ -81,6 +83,10 @@ def build(
         $ rxiv pdf --docx --resolve-dois
+    **Split PDF into main and SI sections:**
+        $ rxiv pdf --split-si
     **Force regenerate all figures:**
         $ rxiv pdf --force-figures
@@ -108,6 +114,7 @@ def build(
         keep_output=keep_output,
         docx=docx,
         resolve_dois=resolve_dois,
+        split_si=split_si,
         debug=debug or verbose,
         quiet=quiet,
         container_mode=container_mode,

rxiv_maker/cli/framework/workflow_commands.py CHANGED Viewed

@@ -30,8 +30,10 @@ class InitCommand(BaseCommand):
         self.engine = "local"  # Only local engine is supported
         # Store manuscript path without PathManager validation since we're creating the directory
+        # NOTE: For init command, we should NOT use environment variable MANUSCRIPT_PATH
+        # as it's meant for finding existing manuscripts, not determining where to initialize
         if manuscript_path is None:
-            manuscript_path = EnvironmentManager.get_manuscript_path() or "MANUSCRIPT"
+            manuscript_path = "MANUSCRIPT"
         # Store the raw path for use in execute_operation
         self.raw_manuscript_path = manuscript_path
@@ -142,6 +144,7 @@ class BuildCommand(BaseCommand):
         keep_output: bool = False,
         docx: bool = False,
         resolve_dois: bool = False,
+        split_si: bool = False,
         debug: bool = False,
         quiet: bool = False,
         container_mode: Optional[str] = None,
@@ -156,6 +159,7 @@ class BuildCommand(BaseCommand):
             keep_output: Preserve existing output directory
             docx: Also export to DOCX format
             resolve_dois: Attempt to resolve missing DOIs (for DOCX export)
+            split_si: Split PDF into main and SI sections
             debug: Enable debug output
             quiet: Suppress non-critical warnings
             container_mode: Container behavior mode
@@ -223,6 +227,10 @@ class BuildCommand(BaseCommand):
             if docx:
                 self._export_docx(resolve_dois=resolve_dois, quiet=quiet, debug=debug)
+            # Split PDF if requested
+            if split_si:
+                self._split_pdf(pdf_path, quiet=quiet, debug=debug)
             # Show helpful tips after successful build
             self._show_build_tips()
@@ -252,11 +260,69 @@ class BuildCommand(BaseCommand):
                 self.console.print(f"[green]✅ DOCX exported:[/green] {docx_path}")
         except Exception as e:
-            self.console.print(f"[yellow]⚠️  DOCX export failed:[/yellow] {e}", err=True)
+            self.console.print(f"[yellow]⚠️  DOCX export failed:[/yellow] {e}")
+            if debug:
+                import traceback
+                self.console.print(f"[dim]{traceback.format_exc()}[/dim]")
+    def _split_pdf(self, pdf_path: Path, quiet: bool = False, debug: bool = False) -> None:
+        """Split PDF into main and SI sections after successful PDF build.
+        Args:
+            pdf_path: Path to the generated PDF
+            quiet: Suppress non-essential output
+            debug: Enable debug output
+        """
+        try:
+            from ...processors.yaml_processor import extract_yaml_metadata
+            from ...utils.file_helpers import find_manuscript_md
+            from ...utils.pdf_splitter import split_pdf
+            from ...utils.pdf_utils import get_custom_pdf_filename
+            if not quiet:
+                self.console.print("\n[cyan]✂️  Splitting PDF into main and SI sections...[/cyan]")
+            # Split the PDF
+            main_path, si_path = split_pdf(pdf_path)
+            if main_path and si_path:
+                # Extract metadata to generate custom filename
+                manuscript_md = find_manuscript_md(str(self.path_manager.manuscript_path))
+                yaml_metadata = extract_yaml_metadata(str(manuscript_md))
+                # Get base filename (e.g., "2025__saraiva_et_al__rxiv.pdf")
+                base_filename = get_custom_pdf_filename(yaml_metadata)
+                base_name = base_filename.replace(".pdf", "")
+                # Generate final filenames with __main and __si suffixes
+                main_filename = f"{base_name}__main.pdf"
+                si_filename = f"{base_name}__si.pdf"
+                # Copy split files to manuscript directory
+                final_main_path = self.path_manager.manuscript_path / main_filename
+                final_si_path = self.path_manager.manuscript_path / si_filename
+                shutil.copy2(main_path, final_main_path)
+                shutil.copy2(si_path, final_si_path)
+                if not quiet:
+                    self.console.print("[green]✅ PDF split successfully:[/green]")
+                    self.console.print(f"   📄 Main: {final_main_path}")
+                    self.console.print(f"   📄 SI: {final_si_path}")
+            elif main_path is None and si_path is None:
+                if not quiet:
+                    self.console.print("[yellow]⚠️  Could not split PDF: SI section marker not found[/yellow]")
+            else:
+                if not quiet:
+                    self.console.print("[yellow]⚠️  PDF splitting partially failed[/yellow]")
+        except Exception as e:
+            self.console.print(f"[yellow]⚠️  PDF splitting failed:[/yellow] {e}")
             if debug:
                 import traceback
-                self.console.print(f"[dim]{traceback.format_exc()}[/dim]", err=True)
+                self.console.print(f"[dim]{traceback.format_exc()}[/dim]")
     def _show_build_tips(self) -> None:
         """Show helpful tips after successful PDF build."""

rxiv_maker/converters/citation_processor.py CHANGED Viewed

@@ -202,9 +202,11 @@ def extract_citations_from_text(text: MarkdownContent) -> list[CitationKey]:
         backtick_patterns.append(match.group(0))
         return f"__BACKTICK_PATTERN_{len(backtick_patterns) - 1}__"
-    # Match both single backticks `...` and triple backticks ```...```
-    text_cleaned = re.sub(r"`[^`]+`", protect_backticks, text)
-    text_cleaned = re.sub(r"```.*?```", protect_backticks, text_cleaned, flags=re.DOTALL)
+    # IMPORTANT: Match triple backticks FIRST, then single backticks
+    # This prevents the single-backtick pattern from matching across triple-backtick blocks
+    # (e.g., from a ` before ```latex to the first ` inside the code block)
+    text_cleaned = re.sub(r"```.*?```", protect_backticks, text, flags=re.DOTALL)
+    text_cleaned = re.sub(r"`[^`]+`", protect_backticks, text_cleaned)
     # Find bracketed multiple citations
     bracketed_matches = re.findall(r"\[(@[^]]+)\]", text_cleaned)

rxiv_maker/core/managers/config_manager.py CHANGED Viewed

@@ -343,6 +343,7 @@ class ConfigManager:
             "bibliography": {"file": "03_REFERENCES.bib", "style": "nature"},
             "citation_style": "numbered",
             "enable_inline_doi_resolution": False,
+            "docx": {"hide_si": False, "figures_at_end": False},
             "cache": {"enabled": True, "ttl_hours": 24},
             "version": "1.0",
             "acknowledge_rxiv_maker": True,

rxiv_maker/exporters/docx_citation_mapper.py CHANGED Viewed

@@ -8,11 +8,26 @@ import re
 from typing import Dict, List
 from ..converters.citation_processor import extract_citations_from_text
+from ..utils.citation_range_formatter import format_citation_ranges
 class CitationMapper:
     """Maps citation keys to sequential numbers for DOCX export."""
+    @staticmethod
+    def _format_citation_ranges(text: str) -> str:
+        """Format consecutive citations as ranges.
+        Uses centralized citation range formatter from utils module.
+        Args:
+            text: Text with numbered citations
+        Returns:
+            Text with consecutive citations formatted as ranges
+        """
+        return format_citation_ranges(text)
     def create_mapping(self, citations: List[str]) -> Dict[str, int]:
         """Create citation key → number mapping.
@@ -121,4 +136,7 @@ class CitationMapper:
         for i, pattern in enumerate(email_patterns):
             text = text.replace(f"__EMAIL_PATTERN_{i}__", pattern)
+        # Format consecutive citations as ranges (e.g., [1][2][3] -> [1-3])
+        text = self._format_citation_ranges(text)
         return text

rxiv_maker/exporters/docx_content_processor.py CHANGED Viewed

@@ -7,6 +7,8 @@ DOCX generation with python-docx.
 import re
 from typing import Any, Dict, List, Optional
+from ..utils.comment_filter import is_metadata_comment
 class DocxContentProcessor:
     """Parses markdown content into structured format for DOCX writing."""
@@ -55,10 +57,38 @@ class DocxContentProcessor:
                 i += 1
                 continue
-            # Skip HTML/markdown comments
+            # Parse HTML/markdown comments (single-line and multi-line)
+            # Skip informational/metadata comments (those starting with "Note:")
             if line.strip().startswith("<!--"):
-                i += 1
-                continue
+                # Check if it's a single-line comment
+                if line.strip().endswith("-->"):
+                    # Single-line comment
+                    comment_text = line.strip()[4:-3].strip()
+                    # Skip metadata comments (e.g., "note that...", "Comment: ...")
+                    if comment_text and not is_metadata_comment(comment_text):
+                        sections.append({"type": "comment", "text": comment_text})
+                    i += 1
+                    continue
+                else:
+                    # Multi-line comment - collect all lines until -->
+                    comment_lines = [line.strip()[4:]]  # Remove <!--
+                    i += 1
+                    while i < len(lines):
+                        if lines[i].strip().endswith("-->"):
+                            # Last line of comment
+                            comment_lines.append(lines[i].strip()[:-3])  # Remove -->
+                            i += 1
+                            break
+                        else:
+                            comment_lines.append(lines[i].strip())
+                            i += 1
+                    # Join and add comment
+                    comment_text = " ".join(comment_lines).strip()
+                    # Skip metadata comments (e.g., "note that...", "Comment: ...")
+                    if comment_text and not is_metadata_comment(comment_text):
+                        sections.append({"type": "comment", "text": comment_text})
+                    continue
             # Skip LaTeX commands like <clearpage>
             if line.strip().startswith("<") and line.strip().endswith(">") and " " not in line.strip():
@@ -335,18 +365,21 @@ class DocxContentProcessor:
         runs = []
         # Find all formatting markers, links, and citations
-        # Pattern to match: <<HIGHLIGHT_YELLOW>>text<</HIGHLIGHT_YELLOW>>, <<XREF>>text<</XREF>>, [text](url), **bold**, __underlined__, *italic*, _italic_, `code`, $math$, [number]
+        # Pattern to match: <<HIGHLIGHT_YELLOW>>text<</HIGHLIGHT_YELLOW>>, <<XREF:type>>text<</XREF>>, <!-- comment -->, [text](url), **bold**, __underlined__, *italic*, _italic_, ~subscript~, ^superscript^, `code`, $math$, [number]
         pattern = re.compile(
             r"(<<HIGHLIGHT_YELLOW>>([^<]+)<</HIGHLIGHT_YELLOW>>)"  # Yellow highlight (must be first)
-            r"|(<<XREF>>([^<]+)<</XREF>>)"  # Cross-reference
+            r"|(<<XREF:(\w+)>>([^<]+)<</XREF>>)"  # Cross-reference with type
+            r"|(<!--\s*(.+?)\s*-->)"  # HTML comments (inline)
             r"|(\[([^\]]+)\]\(([^)]+)\))"  # Markdown link [text](url) (before citations)
             r"|(\*\*([^*]+)\*\*)"  # Bold
             r"|(__([^_]+)__)"  # Underline with double underscores (must come before single underscore)
             r"|(\*([^*]+)\*)"  # Italic with asterisks
             r"|(_([^_]+)_)"  # Italic with underscores
+            r"|(~([^~]+)~)"  # Subscript
+            r"|(\^([^^]+)\^)"  # Superscript
             r"|(`([^`]+)`)"  # Code
             r"|(\$([^\$]+)\$)"  # Inline math
-            r"|(\[(\d+(?:,\s*\d+)*)\])"  # Citation numbers
+            r"|(\[(\d+(?:[-,]\s*\d+)*)\])"  # Citation numbers (supports both ranges [1-3] and lists [1, 2])
         )
         last_end = 0
@@ -378,67 +411,99 @@ class DocxContentProcessor:
                     if run["type"] == "text":
                         run["highlight_yellow"] = True
                     runs.append(run)
-            elif match.group(3):  # Cross-reference
+            elif match.group(3):  # Cross-reference with type
                 runs.append(
                     {
                         "type": "text",
-                        "text": match.group(4),
+                        "text": match.group(5),  # Text is now in group 5
                         "bold": False,
                         "italic": False,
                         "underline": False,
                         "code": False,
                         "xref": True,
+                        "xref_type": match.group(4),  # Type is in group 4
                     }
                 )
-            elif match.group(5):  # Markdown link [text](url)
+            elif match.group(6):  # Inline HTML comment
+                comment_text = match.group(7).strip()
+                # Skip metadata comments (e.g., "note that...", "Comment: ...")
+                if comment_text and not is_metadata_comment(comment_text):
+                    runs.append({"type": "inline_comment", "text": comment_text})
+            elif match.group(8):  # Markdown link [text](url)
                 runs.append(
                     {
                         "type": "hyperlink",
-                        "text": match.group(6),
-                        "url": match.group(7),
+                        "text": match.group(9),
+                        "url": match.group(10),
                     }
                 )
-            elif match.group(8):  # Bold
+            elif match.group(11):  # Bold
                 # Recursively parse inner text for underline/italic/other formatting
-                inner_text = match.group(9)
+                inner_text = match.group(12)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add bold to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["bold"] = True
                     runs.append(run)
-            elif match.group(10):  # Underline
+            elif match.group(13):  # Underline
                 # Recursively parse inner text for bold/italic/other formatting
-                inner_text = match.group(11)
+                inner_text = match.group(14)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add underline to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["underline"] = True
                     runs.append(run)
-            elif match.group(12):  # Italic with asterisks
+            elif match.group(15):  # Italic with asterisks
                 # Recursively parse inner text for bold/underline/other formatting
-                inner_text = match.group(13)
+                inner_text = match.group(16)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add italic to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["italic"] = True
                     runs.append(run)
-            elif match.group(14):  # Italic with underscores
+            elif match.group(17):  # Italic with underscores
                 # Recursively parse inner text for bold/underline/other formatting
-                inner_text = match.group(15)
+                inner_text = match.group(18)
                 inner_runs = self._parse_inline_formatting(inner_text, citation_map)
                 # Add italic to all inner runs
                 for run in inner_runs:
                     if run["type"] == "text":
                         run["italic"] = True
                     runs.append(run)
-            elif match.group(16):  # Code
+            elif match.group(19):  # Subscript
                 runs.append(
                     {
                         "type": "text",
-                        "text": match.group(17),
+                        "text": match.group(20),
+                        "bold": False,
+                        "italic": False,
+                        "underline": False,
+                        "code": False,
+                        "xref": False,
+                        "subscript": True,
+                    }
+                )
+            elif match.group(21):  # Superscript
+                runs.append(
+                    {
+                        "type": "text",
+                        "text": match.group(22),
+                        "bold": False,
+                        "italic": False,
+                        "underline": False,
+                        "code": False,
+                        "xref": False,
+                        "superscript": True,
+                    }
+                )
+            elif match.group(23):  # Code
+                runs.append(
+                    {
+                        "type": "text",
+                        "text": match.group(24),
                         "bold": False,
                         "italic": False,
                         "underline": False,
@@ -446,14 +511,23 @@ class DocxContentProcessor:
                         "xref": False,
                     }
                 )
-            elif match.group(18):  # Inline math
-                runs.append({"type": "inline_equation", "latex": match.group(19)})
-            elif match.group(20):  # Citation
-                # Parse citation numbers (may be multiple: [1, 2, 3])
-                numbers_str = match.group(21)
-                numbers = [int(n.strip()) for n in numbers_str.split(",")]
-                for num in numbers:
-                    runs.append({"type": "citation", "number": num})
+            elif match.group(25):  # Inline math
+                runs.append({"type": "inline_equation", "latex": match.group(26)})
+            elif match.group(27):  # Citation
+                # Keep citation as formatted text with yellow highlighting
+                # The citation mapper has already formatted ranges (e.g., [1-3], [1, 4-6, 8])
+                citation_text = match.group(0)  # Full match including brackets
+                runs.append(
+                    {
+                        "type": "text",
+                        "text": citation_text,
+                        "bold": False,
+                        "italic": False,
+                        "underline": False,
+                        "code": False,
+                        "highlight_yellow": True,  # Highlight citations in yellow
+                    }
+                )
             last_end = match.end()
@@ -516,6 +590,7 @@ class DocxContentProcessor:
         # Look ahead for caption line (skip empty lines)
         caption = ""
         label = ""
+        is_supplementary = False  # Default to main figure
         next_i = start_idx + 1
         # Skip empty lines to find caption
@@ -529,6 +604,9 @@ class DocxContentProcessor:
             # Check for {#fig:label ...} or {#sfig:label ...} **Caption**
             if next_line and (next_line.startswith("{#fig:") or next_line.startswith("{#sfig:")):
+                # Detect if it's a supplementary figure
+                is_supplementary = next_line.startswith("{#sfig:")
                 # Extract label if present
                 label_match = re.match(r"\{#s?fig:(\w+)[^}]*\}", next_line)
                 if label_match:
@@ -572,6 +650,7 @@ class DocxContentProcessor:
             "alt": alt_text,
             "caption": caption,
             "label": label,
+            "is_supplementary": is_supplementary,
         }, next_i
     def _parse_table(self, lines: List[str], start_idx: int) -> tuple[Optional[Dict[str, Any]], int]:
@@ -626,7 +705,8 @@ class DocxContentProcessor:
         if i < len(lines):
             caption_line = lines[i].strip()
             # Match {#stable:label} Caption or {#table:label} Caption
-            caption_match = re.match(r"^\{#(stable|table):(\w+)\}\s*(.+)$", caption_line)
+            # Allow hyphens and underscores in label names (e.g., "tool-comparison")
+            caption_match = re.match(r"^\{#(stable|table):([\w-]+)\}\s*(.+)$", caption_line)
             if caption_match:
                 label = f"{caption_match.group(1)}:{caption_match.group(2)}"
                 caption = caption_match.group(3).strip()

rxiv-maker 1.16.8__py3-none-any.whl → 1.18.0__py3-none-any.whl

rxiv-maker 1.16.8py3-none-any.whl → 1.18.0py3-none-any.whl