arxiv-to-prompt 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arxiv_to_prompt/cli.py CHANGED
@@ -22,13 +22,19 @@ def main():
22
22
  help=f"Custom directory to store downloaded files (default: {default_cache})",
23
23
  default=None
24
24
  )
25
+ parser.add_argument(
26
+ "--no-appendix",
27
+ action="store_true",
28
+ help="Remove the appendix section and everything after it"
29
+ )
25
30
 
26
31
  args = parser.parse_args()
27
32
 
28
33
  content = process_latex_source(
29
34
  args.arxiv_id,
30
35
  keep_comments=not args.no_comments,
31
- cache_dir=args.cache_dir
36
+ cache_dir=args.cache_dir,
37
+ remove_appendix_section=args.no_appendix
32
38
  )
33
39
  if content:
34
40
  print(content)
arxiv_to_prompt/core.py CHANGED
@@ -140,6 +140,14 @@ def remove_comments_from_lines(text: str) -> str:
140
140
  result.append(''.join(cleaned_line).rstrip())
141
141
  return '\n'.join(result)
142
142
 
143
+ def remove_appendix(text: str) -> str:
144
+ """Remove appendix section and everything after it."""
145
+ # Find the position of \appendix command
146
+ appendix_match = re.search(r'\\appendix\b', text)
147
+ if appendix_match:
148
+ return text[:appendix_match.start()].rstrip()
149
+ return text
150
+
143
151
  def flatten_tex(directory: str, main_file: str) -> str:
144
152
  """Combine all tex files into one, resolving inputs."""
145
153
  def process_file(file_path: str, processed_files: set) -> str:
@@ -184,7 +192,8 @@ def flatten_tex(directory: str, main_file: str) -> str:
184
192
 
185
193
  # Process the command normally
186
194
  input_file = match.group(1)
187
- if not input_file.endswith('.tex'):
195
+ # Only add .tex extension if the file has no extension at all
196
+ if not os.path.splitext(input_file)[1]:
188
197
  input_file += '.tex'
189
198
  input_path = os.path.join(directory, input_file)
190
199
  return process_file(input_path, processed_files)
@@ -201,7 +210,7 @@ def flatten_tex(directory: str, main_file: str) -> str:
201
210
 
202
211
  def process_latex_source(arxiv_id: str, keep_comments: bool = True,
203
212
  cache_dir: Optional[str] = None,
204
- use_cache: bool = False) -> Optional[str]:
213
+ use_cache: bool = False, remove_appendix_section: bool = False) -> Optional[str]:
205
214
  """
206
215
  Process LaTeX source files from arXiv and return the combined content.
207
216
 
@@ -210,6 +219,7 @@ def process_latex_source(arxiv_id: str, keep_comments: bool = True,
210
219
  keep_comments: Whether to keep LaTeX comments in the output
211
220
  cache_dir: Custom directory to store downloaded files
212
221
  use_cache: Whether to use cached files if they exist (default: False)
222
+ remove_appendix_section: Whether to remove the appendix section and everything after it
213
223
 
214
224
  Returns:
215
225
  The processed LaTeX content or None if processing fails
@@ -234,6 +244,10 @@ def process_latex_source(arxiv_id: str, keep_comments: bool = True,
234
244
  if not keep_comments:
235
245
  content = remove_comments_from_lines(content)
236
246
 
247
+ # Remove appendix if requested
248
+ if remove_appendix_section:
249
+ content = remove_appendix(content)
250
+
237
251
  return content
238
252
 
239
253
  def check_source_available(arxiv_id: str) -> bool:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: arxiv-to-prompt
3
- Version: 0.1.1
3
+ Version: 0.2.1
4
4
  Summary: transform arXiv papers into a single latex prompt for LLMs
5
5
  Author: Takashi Ishida
6
6
  License: MIT
@@ -15,15 +15,16 @@ Requires-Dist: requests>=2.25.0
15
15
  Provides-Extra: test
16
16
  Requires-Dist: pytest>=7.0.0; extra == "test"
17
17
  Requires-Dist: pytest-cov>=4.0.0; extra == "test"
18
+ Dynamic: license-file
18
19
 
19
20
  # arxiv-to-prompt
20
21
 
21
- [![PyPI version](https://badge.fury.io/py/arxiv-to-prompt.svg?update=20250202)](https://pypi.org/project/arxiv-to-prompt/)
22
+ [![PyPI version](https://badge.fury.io/py/arxiv-to-prompt.svg?update=20250307)](https://pypi.org/project/arxiv-to-prompt/)
22
23
  [![Tests](https://github.com/takashiishida/arxiv-to-prompt/actions/workflows/tests.yml/badge.svg)](https://github.com/takashiishida/arxiv-to-prompt/actions)
23
24
  [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
24
25
  [![Changelog](https://img.shields.io/github/v/release/takashiishida/arxiv-to-prompt?label=changelog)](https://github.com/takashiishida/arxiv-to-prompt/releases)
25
26
 
26
- A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides an option to remove LaTeX comments from the output (which can be useful to shorten the prompt).
27
+ A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides options to remove LaTeX comments and appendix sections from the output (which can be useful to shorten the prompt).
27
28
 
28
29
  ### Installation
29
30
 
@@ -41,6 +42,12 @@ arxiv-to-prompt 2303.08774
41
42
  # Display LaTeX source without comments
42
43
  arxiv-to-prompt 2303.08774 --no-comments
43
44
 
45
+ # Display LaTeX source without appendix sections
46
+ arxiv-to-prompt 2303.08774 --no-appendix
47
+
48
+ # Combine options (no comments and no appendix)
49
+ arxiv-to-prompt 2303.08774 --no-comments --no-appendix
50
+
44
51
  # Copy to clipboard
45
52
  arxiv-to-prompt 2303.08774 | pbcopy
46
53
 
@@ -62,8 +69,23 @@ latex_source = process_latex_source("2303.08774")
62
69
 
63
70
  # Get LaTeX source without comments
64
71
  latex_source = process_latex_source("2303.08774", keep_comments=False)
72
+
73
+ # Get LaTeX source without appendix sections
74
+ latex_source = process_latex_source("2303.08774", remove_appendix_section=True)
75
+
76
+ # Combine options (no comments and no appendix)
77
+ latex_source = process_latex_source("2303.08774", keep_comments=False, remove_appendix_section=True)
65
78
  ```
66
79
 
80
+ ### Projects Using arxiv-to-prompt
81
+
82
+ Here are some projects and use cases that leverage arxiv-to-prompt:
83
+
84
+ - [arxiv-latex-mcp](https://github.com/takashiishida/arxiv-latex-mcp): MCP server that uses arxiv-to-prompt to fetch and process arXiv LaTeX sources for precise interpretation of mathematical expressions in scientific papers.
85
+ - [arxiv-tex-ui](https://github.com/takashiishida/arxiv-tex-ui): chat with an llm about an arxiv paper by using the latex source.
86
+
87
+ If you're using arxiv-to-prompt in your project, please submit a pull request to add it to this list!
88
+
67
89
  ### References
68
90
 
69
91
  - Inspired by [files-to-prompt](https://github.com/simonw/files-to-prompt).
@@ -0,0 +1,9 @@
1
+ arxiv_to_prompt/__init__.py,sha256=oL2bEzZhiFoMqCF-84Xmljqw55lgRkwInBFpExRPCTY,609
2
+ arxiv_to_prompt/cli.py,sha256=2ZVmxNcygFpOFROfCo-FtXzcRpLVVRUOkIhASL0iD7o,1179
3
+ arxiv_to_prompt/core.py,sha256=0XwG9hqljQ3FHDOmmR7C8CX4ge1CJJAqSosVzTXhkes,10425
4
+ arxiv_to_prompt-0.2.1.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
5
+ arxiv_to_prompt-0.2.1.dist-info/METADATA,sha256=lWDgxqAXcdZs7OjKrq5QN2gDBemZtRYqcoCjdIIfw-o,3998
6
+ arxiv_to_prompt-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ arxiv_to_prompt-0.2.1.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
8
+ arxiv_to_prompt-0.2.1.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
9
+ arxiv_to_prompt-0.2.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- arxiv_to_prompt/__init__.py,sha256=oL2bEzZhiFoMqCF-84Xmljqw55lgRkwInBFpExRPCTY,609
2
- arxiv_to_prompt/cli.py,sha256=WafgKxxpgJrLyeuQ-tnUASoknoNXiaQRWLP-Emsr-ug,977
3
- arxiv_to_prompt/core.py,sha256=UOFbivdGx470Myh5wzq4SVzh0AOPFldU3thBmwfsd6g,9802
4
- arxiv_to_prompt-0.1.1.dist-info/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
5
- arxiv_to_prompt-0.1.1.dist-info/METADATA,sha256=IKVSFdYmNdt-YUHA3JBltJsOvLCkxyd90d4_gbcf2ok,2920
6
- arxiv_to_prompt-0.1.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
7
- arxiv_to_prompt-0.1.1.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
8
- arxiv_to_prompt-0.1.1.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
9
- arxiv_to_prompt-0.1.1.dist-info/RECORD,,