arxiv-to-prompt 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arxiv-to-prompt
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: transform arXiv papers into a single latex prompt for LLMs
5
5
  Author: Takashi Ishida
6
6
  License: MIT
@@ -61,7 +61,7 @@ arxiv-to-prompt 2303.08774 | pbcopy
61
61
  arxiv-to-prompt 1706.03762 | llm -s "explain this paper"
62
62
  ```
63
63
 
64
- The arXiv ID can be found in the paper's URL. For example, for `https://arxiv.org/abs/2303.08774`, the ID is `2303.08774`. It will automatically download the latest version of the paper, so you don't need to specify the version.
64
+ You can use either the arXiv ID (e.g., `2303.08774`) or the full URL (e.g., `https://arxiv.org/abs/2303.08774`). It will automatically download the latest version of the paper, so you don't need to specify the version.
65
65
 
66
66
  ### Python API
67
67
 
@@ -42,7 +42,7 @@ arxiv-to-prompt 2303.08774 | pbcopy
42
42
  arxiv-to-prompt 1706.03762 | llm -s "explain this paper"
43
43
  ```
44
44
 
45
- The arXiv ID can be found in the paper's URL. For example, for `https://arxiv.org/abs/2303.08774`, the ID is `2303.08774`. It will automatically download the latest version of the paper, so you don't need to specify the version.
45
+ You can use either the arXiv ID (e.g., `2303.08774`) or the full URL (e.g., `https://arxiv.org/abs/2303.08774`). It will automatically download the latest version of the paper, so you don't need to specify the version.
46
46
 
47
47
  ### Python API
48
48
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arxiv-to-prompt"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "transform arXiv papers into a single latex prompt for LLMs"
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Takashi Ishida" }]
@@ -1,6 +1,16 @@
1
1
  import argparse
2
+ import re
2
3
  from .core import process_latex_source, get_default_cache_dir
3
4
 
5
+
6
+ def extract_arxiv_id(input_str: str) -> str:
7
+ """Extract arxiv ID from URL or return input as-is if already an ID."""
8
+ if "arxiv.org" in input_str:
9
+ match = re.search(r'arxiv\.org/(?:abs|pdf)/(\d{4}\.\d{4,5})(?:v\d+)?(?:\.pdf)?', input_str)
10
+ if match:
11
+ return match.group(1)
12
+ return input_str
13
+
4
14
  def main():
5
15
  default_cache = str(get_default_cache_dir())
6
16
 
@@ -11,7 +21,7 @@ def main():
11
21
  "arxiv_id",
12
22
  nargs="?",
13
23
  default=None,
14
- help="The arXiv ID of the paper (do not include the version, e.g. v1, v2). Not needed if --local-folder is provided."
24
+ help="The arXiv ID (e.g. 2303.08774) or URL (e.g. https://arxiv.org/abs/2303.08774). Not needed if --local-folder is provided."
15
25
  )
16
26
  parser.add_argument(
17
27
  "--no-comments",
@@ -44,9 +54,11 @@ def main():
44
54
 
45
55
  if args.arxiv_id and args.local_folder:
46
56
  parser.error("Cannot specify both arXiv ID and --local-folder")
47
-
57
+
58
+ arxiv_id = extract_arxiv_id(args.arxiv_id) if args.arxiv_id else None
59
+
48
60
  content = process_latex_source(
49
- arxiv_id=args.arxiv_id,
61
+ arxiv_id=arxiv_id,
50
62
  keep_comments=not args.no_comments,
51
63
  cache_dir=args.cache_dir,
52
64
  remove_appendix_section=args.no_appendix,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arxiv-to-prompt
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: transform arXiv papers into a single latex prompt for LLMs
5
5
  Author: Takashi Ishida
6
6
  License: MIT
@@ -61,7 +61,7 @@ arxiv-to-prompt 2303.08774 | pbcopy
61
61
  arxiv-to-prompt 1706.03762 | llm -s "explain this paper"
62
62
  ```
63
63
 
64
- The arXiv ID can be found in the paper's URL. For example, for `https://arxiv.org/abs/2303.08774`, the ID is `2303.08774`. It will automatically download the latest version of the paper, so you don't need to specify the version.
64
+ You can use either the arXiv ID (e.g., `2303.08774`) or the full URL (e.g., `https://arxiv.org/abs/2303.08774`). It will automatically download the latest version of the paper, so you don't need to specify the version.
65
65
 
66
66
  ### Python API
67
67
 
@@ -11,6 +11,7 @@ from arxiv_to_prompt.core import (
11
11
  flatten_tex,
12
12
  remove_appendix,
13
13
  )
14
+ from arxiv_to_prompt.cli import extract_arxiv_id
14
15
 
15
16
  # Test fixtures
16
17
  @pytest.fixture
@@ -271,3 +272,27 @@ def test_input_file_extensions(temp_cache_dir):
271
272
  assert "Style content" in result
272
273
  assert "Class content" in result
273
274
  assert "Already tex content" in result
275
+
276
+
277
+ def test_extract_arxiv_id():
278
+ """Test extracting arxiv ID from URLs and plain IDs."""
279
+ # Plain IDs should be returned as-is
280
+ assert extract_arxiv_id("2505.18102") == "2505.18102"
281
+ assert extract_arxiv_id("2401.12345") == "2401.12345"
282
+
283
+ # Extract from abs URLs
284
+ assert extract_arxiv_id("https://arxiv.org/abs/2505.18102") == "2505.18102"
285
+ assert extract_arxiv_id("http://arxiv.org/abs/2505.18102") == "2505.18102"
286
+
287
+ # Extract from pdf URLs
288
+ assert extract_arxiv_id("https://arxiv.org/pdf/2505.18102") == "2505.18102"
289
+ assert extract_arxiv_id("https://arxiv.org/pdf/2505.18102.pdf") == "2505.18102"
290
+
291
+ # Strip version suffixes
292
+ assert extract_arxiv_id("https://arxiv.org/abs/2505.18102v1") == "2505.18102"
293
+ assert extract_arxiv_id("https://arxiv.org/abs/2505.18102v2") == "2505.18102"
294
+ assert extract_arxiv_id("https://arxiv.org/pdf/2505.18102v3.pdf") == "2505.18102"
295
+
296
+ # Non-arxiv input returned as-is
297
+ assert extract_arxiv_id("invalid") == "invalid"
298
+ assert extract_arxiv_id("https://example.com/2505.18102") == "https://example.com/2505.18102"
File without changes