PyPI - arxiv-to-prompt - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

arxiv-to-prompt 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

arxiv_to_prompt/__init__.py CHANGED Viewed

@@ -1,14 +1,18 @@
 """
-arxiv-to-prompt: A tool to download and process LaTeX source from arXiv papers.
+arxiv-to-prompt: A tool to download and process LaTeX source from arXiv papers or local folders.
 This package provides functionality to:
 - Download source files from any arXiv paper using its ID
+- Process LaTeX source files from a local folder
 - Smart concatenation of multiple LaTeX files into a single coherent source
 - Option to remove LaTeX comments
 Example:
     >>> from arxiv_to_prompt import process_latex_source
+    >>> # From arXiv
     >>> latex_source = process_latex_source("2303.08774")
+    >>> # From local folder
+    >>> latex_source = process_latex_source(local_folder="/path/to/tex/files")
 """
 from .core import process_latex_source, download_arxiv_source, get_default_cache_dir

arxiv_to_prompt/cli.py CHANGED Viewed

@@ -5,11 +5,13 @@ def main():
     default_cache = str(get_default_cache_dir())
     parser = argparse.ArgumentParser(
-        description="Download and display LaTeX source from arXiv papers."
+        description="Download and display LaTeX source from arXiv papers or process local TeX files."
     )
     parser.add_argument(
         "arxiv_id",
-        help="The arXiv ID of the paper (do not include the version, e.g. v1, v2)"
+        nargs="?",
+        default=None,
+        help="The arXiv ID of the paper (do not include the version, e.g. v1, v2). Not needed if --local-folder is provided."
     )
     parser.add_argument(
         "--no-comments",
@@ -27,14 +29,28 @@ def main():
         action="store_true",
         help="Remove the appendix section and everything after it"
     )
+    parser.add_argument(
+        "--local-folder",
+        type=str,
+        help="Path to a local folder containing TeX files (alternative to arxiv_id)",
+        default=None
+    )
     args = parser.parse_args()
+    # Validate that either arxiv_id or local_folder is provided
+    if not args.arxiv_id and not args.local_folder:
+        parser.error("Either provide an arXiv ID or use --local-folder to specify a local folder")
+    if args.arxiv_id and args.local_folder:
+        parser.error("Cannot specify both arXiv ID and --local-folder")
     content = process_latex_source(
-        args.arxiv_id,
+        arxiv_id=args.arxiv_id,
         keep_comments=not args.no_comments,
         cache_dir=args.cache_dir,
-        remove_appendix_section=args.no_appendix
+        remove_appendix_section=args.no_appendix,
+        local_folder=args.local_folder
     )
     if content:
         print(content)

arxiv_to_prompt/core.py CHANGED Viewed

@@ -92,14 +92,28 @@ def download_arxiv_source(arxiv_id: str, cache_dir: Optional[str] = None, use_ca
 def find_main_tex(directory: str) -> Optional[str]:
     """
-    Find the main .tex file containing documentclass. If there are multiple files,
-    returns the filename of the longest .tex file containing documentclass, since shorter
-    files are typically conference templates or supplementary documents rather than the
-    main manuscript.
+    Find the main .tex file containing documentclass.
+    First checks for common naming conventions (main.tex, paper.tex, index.tex).
+    If none found, returns the filename of the longest .tex file containing documentclass,
+    since shorter files are typically conference templates or supplementary documents
+    rather than the main manuscript.
     """
+    common_names = ['main.tex', 'paper.tex', 'index.tex']
     main_tex_file = None
     max_line_count = 0
+    # First pass: check for common naming conventions
+    for file_name in os.listdir(directory):
+        if file_name in common_names:
+            try:
+                with open(os.path.join(directory, file_name), 'r', encoding='utf-8') as file:
+                    lines = file.readlines()
+                    if any('\\documentclass' in line for line in lines):
+                        return file_name
+            except Exception as e:
+                logging.warning(f"Could not read file {file_name}: {e}")
+    # Second pass: find the longest .tex file containing documentclass
     for file_name in os.listdir(directory):
         if file_name.endswith('.tex'):
             try:
@@ -208,37 +222,57 @@ def flatten_tex(directory: str, main_file: str) -> str:
     main_file_path = os.path.join(directory, main_file)
     return process_file(main_file_path, set())
-def process_latex_source(arxiv_id: str, keep_comments: bool = True,
+def process_latex_source(arxiv_id: Optional[str] = None, keep_comments: bool = True,
                         cache_dir: Optional[str] = None,
-                        use_cache: bool = False, remove_appendix_section: bool = False) -> Optional[str]:
+                        use_cache: bool = False, remove_appendix_section: bool = False,
+                        local_folder: Optional[str] = None) -> Optional[str]:
     """
-    Process LaTeX source files from arXiv and return the combined content.
+    Process LaTeX source files from arXiv or a local folder and return the combined content.
     Args:
-        arxiv_id: The arXiv ID of the paper
+        arxiv_id: The arXiv ID of the paper (required if local_folder is not provided)
         keep_comments: Whether to keep LaTeX comments in the output
-        cache_dir: Custom directory to store downloaded files
-        use_cache: Whether to use cached files if they exist (default: False)
+        cache_dir: Custom directory to store downloaded files (only used for arXiv)
+        use_cache: Whether to use cached files if they exist (default: False, only used for arXiv)
         remove_appendix_section: Whether to remove the appendix section and everything after it
+        local_folder: Path to a local folder containing TeX files (alternative to arxiv_id)
     Returns:
         The processed LaTeX content or None if processing fails
     """
-    base_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
-    # Download the latest version
-    if not download_arxiv_source(arxiv_id, cache_dir, use_cache):
+    # Determine the directory to process
+    if local_folder:
+        directory = Path(local_folder).expanduser().resolve()
+        # Validate the folder exists
+        if not directory.exists():
+            logging.error(f"Local folder does not exist: {directory}")
+            return None
+        if not directory.is_dir():
+            logging.error(f"Path is not a directory: {directory}")
+            return None
+        logging.info(f"Processing local folder: {directory}")
+    elif arxiv_id:
+        base_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
+        # Download the latest version
+        if not download_arxiv_source(arxiv_id, cache_dir, use_cache):
+            return None
+        directory = base_dir / arxiv_id
+    else:
+        logging.error("Either arxiv_id or local_folder must be provided")
         return None
-    directory = base_dir / arxiv_id
-    main_file = find_main_tex(directory)
+    main_file = find_main_tex(str(directory))
     if not main_file:
         logging.error("Main .tex file not found.")
         return None
     # Get the content
-    content = flatten_tex(directory, main_file)
+    content = flatten_tex(str(directory), main_file)
     # Process comments if requested
     if not keep_comments:

{arxiv_to_prompt-0.2.2.dist-info → arxiv_to_prompt-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arxiv-to-prompt
-Version: 0.2.2
+Version: 0.3.0
 Summary: transform arXiv papers into a single latex prompt for LLMs
 Author: Takashi Ishida
 License: MIT
@@ -17,12 +17,15 @@ Requires-Dist: pytest>=7.0.0; extra == "test"
 Requires-Dist: pytest-cov>=4.0.0; extra == "test"
 Dynamic: license-file
-# arxiv-to-prompt
+<div align="center">
+<img src="logo.png#gh-light-mode-only" alt="" width="475"><img src="logo.png#gh-dark-mode-only" alt="" width="475">
-[![PyPI version](https://badge.fury.io/py/arxiv-to-prompt.svg?update=20250307)](https://pypi.org/project/arxiv-to-prompt/)
+[![PyPI version](https://badge.fury.io/py/arxiv-to-prompt.svg)](https://pypi.org/project/arxiv-to-prompt/)
 [![Tests](https://github.com/takashiishida/arxiv-to-prompt/actions/workflows/tests.yml/badge.svg)](https://github.com/takashiishida/arxiv-to-prompt/actions)
 [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Changelog](https://img.shields.io/github/v/release/takashiishida/arxiv-to-prompt?label=changelog)](https://github.com/takashiishida/arxiv-to-prompt/releases)
+[![Downloads](https://static.pepy.tech/badge/arxiv-to-prompt)](https://pepy.tech/project/arxiv-to-prompt)
+</div>
 A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides options to remove LaTeX comments and appendix sections from the output (which can be useful to shorten the prompt).
@@ -48,6 +51,9 @@ arxiv-to-prompt 2303.08774 --no-appendix
 # Combine options (no comments and no appendix)
 arxiv-to-prompt 2303.08774 --no-comments --no-appendix
+# Process a local folder containing TeX files (instead of downloading from arXiv)
+arxiv-to-prompt --local-folder /path/to/tex/files
 # Copy to clipboard
 arxiv-to-prompt 2303.08774 | pbcopy
@@ -75,18 +81,18 @@ latex_source = process_latex_source("2303.08774", remove_appendix_section=True)
 # Combine options (no comments and no appendix)
 latex_source = process_latex_source("2303.08774", keep_comments=False, remove_appendix_section=True)
+# Process LaTeX sources from a local folder (instead of downloading from arXiv)
+latex_source = process_latex_source(local_folder="/path/to/tex/files")
 ```
 ### Projects Using arxiv-to-prompt
 Here are some projects and use cases that leverage arxiv-to-prompt:
-- [arxiv-latex-mcp](https://github.com/takashiishida/arxiv-latex-mcp): MCP server that uses arxiv-to-prompt to fetch and process arXiv LaTeX sources for precise interpretation of mathematical expressions in scientific papers.
-- [arxiv-tex-ui](https://github.com/takashiishida/arxiv-tex-ui): chat with an llm about an arxiv paper by using the latex source.
+- [arxiv-latex-mcp](https://github.com/takashiishida/arxiv-latex-mcp): MCP server that fetch and process arXiv LaTeX sources for precise interpretation of mathematical expressions in papers.
+- [arxiv-tex-ui](https://github.com/takashiishida/arxiv-tex-ui): chat with an LLM about an arxiv paper by using the latex source.
+- [paper2slides](https://github.com/takashiishida/paper2slides): transform an arXiv paper into slides.
+- [ArXivToPrompt](https://apps.apple.com/jp/app/arxivtoprompt/id6751013390): iOS app that allows users to easily extract LaTeX source from arXiv papers on their iPhone and copy it to the clipboard for use with LLM apps.
 If you're using arxiv-to-prompt in your project, please submit a pull request to add it to this list!
-### References
-- Inspired by [files-to-prompt](https://github.com/simonw/files-to-prompt).
-- Reused some code from [paper2slides](https://github.com/takashiishida/paper2slides).

arxiv_to_prompt-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+arxiv_to_prompt/__init__.py,sha256=riK7TcTaKDleP5g5rjf2jkmLtXZu7irNZDujyAVDnKM,1093
+arxiv_to_prompt/cli.py,sha256=TUnHsGolF5zhiexW5RXBPhNL0HODmfppEkXzC8z65NE,1861
+arxiv_to_prompt/core.py,sha256=pgb8PGiOqgbPTW5rIJwLlmS9n3nnlYa5UVQ5YSvCIuo,12077
+arxiv_to_prompt-0.3.0.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
+arxiv_to_prompt-0.3.0.dist-info/METADATA,sha256=CNOBI0du7Yj9Hyr6JElWhohvLgd4jLUvlZEn9c8NexU,4608
+arxiv_to_prompt-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+arxiv_to_prompt-0.3.0.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
+arxiv_to_prompt-0.3.0.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
+arxiv_to_prompt-0.3.0.dist-info/RECORD,,

arxiv_to_prompt-0.2.2.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-arxiv_to_prompt/__init__.py,sha256=UjbXdsTGX7eT6O1RvqGB1-wMv_Kj-pM-7M5FZUUzVIQ,899
-arxiv_to_prompt/cli.py,sha256=2ZVmxNcygFpOFROfCo-FtXzcRpLVVRUOkIhASL0iD7o,1179
-arxiv_to_prompt/core.py,sha256=0XwG9hqljQ3FHDOmmR7C8CX4ge1CJJAqSosVzTXhkes,10425
-arxiv_to_prompt-0.2.2.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
-arxiv_to_prompt-0.2.2.dist-info/METADATA,sha256=MwmVCm5oLxKS8L-gQeFHK5W-YCidjepwebVesWHskco,3998
-arxiv_to_prompt-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-arxiv_to_prompt-0.2.2.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
-arxiv_to_prompt-0.2.2.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
-arxiv_to_prompt-0.2.2.dist-info/RECORD,,

{arxiv_to_prompt-0.2.2.dist-info → arxiv_to_prompt-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{arxiv_to_prompt-0.2.2.dist-info → arxiv_to_prompt-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{arxiv_to_prompt-0.2.2.dist-info → arxiv_to_prompt-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{arxiv_to_prompt-0.2.2.dist-info → arxiv_to_prompt-0.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

arxiv-to-prompt 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

arxiv-to-prompt 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl