arxiv-to-prompt 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arxiv_to_prompt/cli.py CHANGED
@@ -22,13 +22,19 @@ def main():
22
22
  help=f"Custom directory to store downloaded files (default: {default_cache})",
23
23
  default=None
24
24
  )
25
+ parser.add_argument(
26
+ "--no-appendix",
27
+ action="store_true",
28
+ help="Remove the appendix section and everything after it"
29
+ )
25
30
 
26
31
  args = parser.parse_args()
27
32
 
28
33
  content = process_latex_source(
29
34
  args.arxiv_id,
30
35
  keep_comments=not args.no_comments,
31
- cache_dir=args.cache_dir
36
+ cache_dir=args.cache_dir,
37
+ remove_appendix_section=args.no_appendix
32
38
  )
33
39
  if content:
34
40
  print(content)
arxiv_to_prompt/core.py CHANGED
@@ -140,6 +140,14 @@ def remove_comments_from_lines(text: str) -> str:
140
140
  result.append(''.join(cleaned_line).rstrip())
141
141
  return '\n'.join(result)
142
142
 
143
+ def remove_appendix(text: str) -> str:
144
+ """Remove appendix section and everything after it."""
145
+ # Find the position of \appendix command
146
+ appendix_match = re.search(r'\\appendix\b', text)
147
+ if appendix_match:
148
+ return text[:appendix_match.start()].rstrip()
149
+ return text
150
+
143
151
  def flatten_tex(directory: str, main_file: str) -> str:
144
152
  """Combine all tex files into one, resolving inputs."""
145
153
  def process_file(file_path: str, processed_files: set) -> str:
@@ -151,8 +159,38 @@ def flatten_tex(directory: str, main_file: str) -> str:
151
159
  with open(file_path, 'r', encoding='utf-8') as f:
152
160
  content = f.read()
153
161
 
154
- # Process \input and \include commands
162
+ # Process \input and \include commands that are not commented out
155
163
  def replace_input(match):
164
+ # Check if the match is preceded by a comment character
165
+ line_start = content.rfind('\n', 0, match.start()) + 1
166
+ line_prefix = content[line_start:match.start()]
167
+
168
+ # If there's a % character in the line prefix that's not escaped,
169
+ # this command is commented out, so return the original text
170
+ comment_pos = -1
171
+ i = 0
172
+ while i < len(line_prefix):
173
+ if line_prefix[i] == '%':
174
+ # Check if the % is escaped with a backslash
175
+ if i > 0 and line_prefix[i-1] == '\\':
176
+ # Count backslashes before %
177
+ backslash_count = 0
178
+ j = i - 1
179
+ while j >= 0 and line_prefix[j] == '\\':
180
+ backslash_count += 1
181
+ j -= 1
182
+ # If odd number of backslashes, % is escaped
183
+ if backslash_count % 2 == 1:
184
+ i += 1
185
+ continue
186
+ comment_pos = i
187
+ break
188
+ i += 1
189
+
190
+ if comment_pos != -1:
191
+ return match.group(0) # Return the original text without processing
192
+
193
+ # Process the command normally
156
194
  input_file = match.group(1)
157
195
  if not input_file.endswith('.tex'):
158
196
  input_file += '.tex'
@@ -171,7 +209,7 @@ def flatten_tex(directory: str, main_file: str) -> str:
171
209
 
172
210
  def process_latex_source(arxiv_id: str, keep_comments: bool = True,
173
211
  cache_dir: Optional[str] = None,
174
- use_cache: bool = False) -> Optional[str]:
212
+ use_cache: bool = False, remove_appendix_section: bool = False) -> Optional[str]:
175
213
  """
176
214
  Process LaTeX source files from arXiv and return the combined content.
177
215
 
@@ -180,6 +218,7 @@ def process_latex_source(arxiv_id: str, keep_comments: bool = True,
180
218
  keep_comments: Whether to keep LaTeX comments in the output
181
219
  cache_dir: Custom directory to store downloaded files
182
220
  use_cache: Whether to use cached files if they exist (default: False)
221
+ remove_appendix_section: Whether to remove the appendix section and everything after it
183
222
 
184
223
  Returns:
185
224
  The processed LaTeX content or None if processing fails
@@ -204,6 +243,10 @@ def process_latex_source(arxiv_id: str, keep_comments: bool = True,
204
243
  if not keep_comments:
205
244
  content = remove_comments_from_lines(content)
206
245
 
246
+ # Remove appendix if requested
247
+ if remove_appendix_section:
248
+ content = remove_appendix(content)
249
+
207
250
  return content
208
251
 
209
252
  def check_source_available(arxiv_id: str) -> bool:
@@ -1,41 +1,13 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: arxiv-to-prompt
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: transform arXiv papers into a single latex prompt for LLMs
5
5
  Author: Takashi Ishida
6
- License: MIT License
7
-
8
- Copyright (c) 2025 Takashi Ishida
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
6
+ License: MIT
28
7
  Project-URL: Homepage, https://github.com/takashiishida/arxiv-to-prompt
29
8
  Project-URL: Changelog, https://github.com/takashiishida/arxiv-to-prompt/releases
30
9
  Project-URL: Issues, https://github.com/takashiishida/arxiv-to-prompt/issues
31
10
  Project-URL: CI, https://github.com/takashiishida/arxiv-to-prompt/actions
32
- Classifier: License :: OSI Approved :: MIT License
33
- Classifier: Programming Language :: Python :: 3
34
- Classifier: Programming Language :: Python :: 3.8
35
- Classifier: Programming Language :: Python :: 3.9
36
- Classifier: Programming Language :: Python :: 3.10
37
- Classifier: Programming Language :: Python :: 3.11
38
- Classifier: Operating System :: OS Independent
39
11
  Requires-Python: >=3.8
40
12
  Description-Content-Type: text/markdown
41
13
  License-File: LICENSE
@@ -43,15 +15,16 @@ Requires-Dist: requests>=2.25.0
43
15
  Provides-Extra: test
44
16
  Requires-Dist: pytest>=7.0.0; extra == "test"
45
17
  Requires-Dist: pytest-cov>=4.0.0; extra == "test"
18
+ Dynamic: license-file
46
19
 
47
20
  # arxiv-to-prompt
48
21
 
49
- [![PyPI version](https://badge.fury.io/py/arxiv-to-prompt.svg)](https://pypi.org/project/arxiv-to-prompt/)
22
+ [![PyPI version](https://badge.fury.io/py/arxiv-to-prompt.svg?update=20250307)](https://pypi.org/project/arxiv-to-prompt/)
50
23
  [![Tests](https://github.com/takashiishida/arxiv-to-prompt/actions/workflows/tests.yml/badge.svg)](https://github.com/takashiishida/arxiv-to-prompt/actions)
51
24
  [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
52
25
  [![Changelog](https://img.shields.io/github/v/release/takashiishida/arxiv-to-prompt?label=changelog)](https://github.com/takashiishida/arxiv-to-prompt/releases)
53
26
 
54
- A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides an option to remove LaTeX comments from the output (which can be useful to shorten the prompt).
27
+ A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides options to remove LaTeX comments and appendix sections from the output (which can be useful to shorten the prompt).
55
28
 
56
29
  ### Installation
57
30
 
@@ -69,8 +42,17 @@ arxiv-to-prompt 2303.08774
69
42
  # Display LaTeX source without comments
70
43
  arxiv-to-prompt 2303.08774 --no-comments
71
44
 
45
+ # Display LaTeX source without appendix sections
46
+ arxiv-to-prompt 2303.08774 --no-appendix
47
+
48
+ # Combine options (no comments and no appendix)
49
+ arxiv-to-prompt 2303.08774 --no-comments --no-appendix
50
+
72
51
  # Copy to clipboard
73
52
  arxiv-to-prompt 2303.08774 | pbcopy
53
+
54
+ # Combine with the `llm` library from https://github.com/simonw/llm to chat about the paper
55
+ arxiv-to-prompt 1706.03762 | llm -s "explain this paper"
74
56
  ```
75
57
 
76
58
  The arXiv ID can be found in the paper's URL. For example, for `https://arxiv.org/abs/2303.08774`, the ID is `2303.08774`. It will automatically download the latest version of the paper, so you don't need to specify the version.
@@ -87,8 +69,23 @@ latex_source = process_latex_source("2303.08774")
87
69
 
88
70
  # Get LaTeX source without comments
89
71
  latex_source = process_latex_source("2303.08774", keep_comments=False)
72
+
73
+ # Get LaTeX source without appendix sections
74
+ latex_source = process_latex_source("2303.08774", remove_appendix_section=True)
75
+
76
+ # Combine options (no comments and no appendix)
77
+ latex_source = process_latex_source("2303.08774", keep_comments=False, remove_appendix_section=True)
90
78
  ```
91
79
 
80
+ ### Projects Using arxiv-to-prompt
81
+
82
+ Here are some projects and use cases that leverage arxiv-to-prompt:
83
+
84
+ - [arxiv-latex-mcp](https://github.com/takashiishida/arxiv-latex-mcp): MCP server that uses arxiv-to-prompt to fetch and process arXiv LaTeX sources for precise interpretation of mathematical expressions in scientific papers.
85
+ - [arxiv-tex-ui](https://github.com/takashiishida/arxiv-tex-ui): chat with an llm about an arxiv paper by using the latex source.
86
+
87
+ If you're using arxiv-to-prompt in your project, please submit a pull request to add it to this list!
88
+
92
89
  ### References
93
90
 
94
91
  - Inspired by [files-to-prompt](https://github.com/simonw/files-to-prompt).
@@ -0,0 +1,9 @@
1
+ arxiv_to_prompt/__init__.py,sha256=oL2bEzZhiFoMqCF-84Xmljqw55lgRkwInBFpExRPCTY,609
2
+ arxiv_to_prompt/cli.py,sha256=2ZVmxNcygFpOFROfCo-FtXzcRpLVVRUOkIhASL0iD7o,1179
3
+ arxiv_to_prompt/core.py,sha256=pVsUzpplBBTLBxxjYQ6AbR667XlZ9TMz3RFNS8bX7X8,10343
4
+ arxiv_to_prompt-0.2.0.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
5
+ arxiv_to_prompt-0.2.0.dist-info/METADATA,sha256=iD0c2HDslUJzc-1xWN1-0X95TUZtxBpLPc36NIicF_A,3998
6
+ arxiv_to_prompt-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ arxiv_to_prompt-0.2.0.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
8
+ arxiv_to_prompt-0.2.0.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
9
+ arxiv_to_prompt-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +0,0 @@
1
- arxiv_to_prompt/__init__.py,sha256=oL2bEzZhiFoMqCF-84Xmljqw55lgRkwInBFpExRPCTY,609
2
- arxiv_to_prompt/cli.py,sha256=WafgKxxpgJrLyeuQ-tnUASoknoNXiaQRWLP-Emsr-ug,977
3
- arxiv_to_prompt/core.py,sha256=cQcMNQJSrRVQAQsy2ULeLVlQlKIDDdgVLHFKJNMR0Sg,8296
4
- arxiv_to_prompt-0.1.0.dist-info/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
5
- arxiv_to_prompt-0.1.0.dist-info/METADATA,sha256=H8T6HFkP199SK19Jy66MgrVE2S8kTBr-2yYzC9qpQBs,4338
6
- arxiv_to_prompt-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
7
- arxiv_to_prompt-0.1.0.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
8
- arxiv_to_prompt-0.1.0.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
9
- arxiv_to_prompt-0.1.0.dist-info/RECORD,,