arxiv-to-prompt 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arxiv_to_prompt/cli.py +7 -1
- arxiv_to_prompt/core.py +16 -2
- {arxiv_to_prompt-0.1.1.dist-info → arxiv_to_prompt-0.2.1.dist-info}/METADATA +26 -4
- arxiv_to_prompt-0.2.1.dist-info/RECORD +9 -0
- {arxiv_to_prompt-0.1.1.dist-info → arxiv_to_prompt-0.2.1.dist-info}/WHEEL +1 -1
- arxiv_to_prompt-0.1.1.dist-info/RECORD +0 -9
- {arxiv_to_prompt-0.1.1.dist-info → arxiv_to_prompt-0.2.1.dist-info}/entry_points.txt +0 -0
- {arxiv_to_prompt-0.1.1.dist-info → arxiv_to_prompt-0.2.1.dist-info/licenses}/LICENSE +0 -0
- {arxiv_to_prompt-0.1.1.dist-info → arxiv_to_prompt-0.2.1.dist-info}/top_level.txt +0 -0
arxiv_to_prompt/cli.py
CHANGED
|
@@ -22,13 +22,19 @@ def main():
|
|
|
22
22
|
help=f"Custom directory to store downloaded files (default: {default_cache})",
|
|
23
23
|
default=None
|
|
24
24
|
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--no-appendix",
|
|
27
|
+
action="store_true",
|
|
28
|
+
help="Remove the appendix section and everything after it"
|
|
29
|
+
)
|
|
25
30
|
|
|
26
31
|
args = parser.parse_args()
|
|
27
32
|
|
|
28
33
|
content = process_latex_source(
|
|
29
34
|
args.arxiv_id,
|
|
30
35
|
keep_comments=not args.no_comments,
|
|
31
|
-
cache_dir=args.cache_dir
|
|
36
|
+
cache_dir=args.cache_dir,
|
|
37
|
+
remove_appendix_section=args.no_appendix
|
|
32
38
|
)
|
|
33
39
|
if content:
|
|
34
40
|
print(content)
|
arxiv_to_prompt/core.py
CHANGED
|
@@ -140,6 +140,14 @@ def remove_comments_from_lines(text: str) -> str:
|
|
|
140
140
|
result.append(''.join(cleaned_line).rstrip())
|
|
141
141
|
return '\n'.join(result)
|
|
142
142
|
|
|
143
|
+
def remove_appendix(text: str) -> str:
|
|
144
|
+
"""Remove appendix section and everything after it."""
|
|
145
|
+
# Find the position of \appendix command
|
|
146
|
+
appendix_match = re.search(r'\\appendix\b', text)
|
|
147
|
+
if appendix_match:
|
|
148
|
+
return text[:appendix_match.start()].rstrip()
|
|
149
|
+
return text
|
|
150
|
+
|
|
143
151
|
def flatten_tex(directory: str, main_file: str) -> str:
|
|
144
152
|
"""Combine all tex files into one, resolving inputs."""
|
|
145
153
|
def process_file(file_path: str, processed_files: set) -> str:
|
|
@@ -184,7 +192,8 @@ def flatten_tex(directory: str, main_file: str) -> str:
|
|
|
184
192
|
|
|
185
193
|
# Process the command normally
|
|
186
194
|
input_file = match.group(1)
|
|
187
|
-
|
|
195
|
+
# Only add .tex extension if the file has no extension at all
|
|
196
|
+
if not os.path.splitext(input_file)[1]:
|
|
188
197
|
input_file += '.tex'
|
|
189
198
|
input_path = os.path.join(directory, input_file)
|
|
190
199
|
return process_file(input_path, processed_files)
|
|
@@ -201,7 +210,7 @@ def flatten_tex(directory: str, main_file: str) -> str:
|
|
|
201
210
|
|
|
202
211
|
def process_latex_source(arxiv_id: str, keep_comments: bool = True,
|
|
203
212
|
cache_dir: Optional[str] = None,
|
|
204
|
-
use_cache: bool = False) -> Optional[str]:
|
|
213
|
+
use_cache: bool = False, remove_appendix_section: bool = False) -> Optional[str]:
|
|
205
214
|
"""
|
|
206
215
|
Process LaTeX source files from arXiv and return the combined content.
|
|
207
216
|
|
|
@@ -210,6 +219,7 @@ def process_latex_source(arxiv_id: str, keep_comments: bool = True,
|
|
|
210
219
|
keep_comments: Whether to keep LaTeX comments in the output
|
|
211
220
|
cache_dir: Custom directory to store downloaded files
|
|
212
221
|
use_cache: Whether to use cached files if they exist (default: False)
|
|
222
|
+
remove_appendix_section: Whether to remove the appendix section and everything after it
|
|
213
223
|
|
|
214
224
|
Returns:
|
|
215
225
|
The processed LaTeX content or None if processing fails
|
|
@@ -234,6 +244,10 @@ def process_latex_source(arxiv_id: str, keep_comments: bool = True,
|
|
|
234
244
|
if not keep_comments:
|
|
235
245
|
content = remove_comments_from_lines(content)
|
|
236
246
|
|
|
247
|
+
# Remove appendix if requested
|
|
248
|
+
if remove_appendix_section:
|
|
249
|
+
content = remove_appendix(content)
|
|
250
|
+
|
|
237
251
|
return content
|
|
238
252
|
|
|
239
253
|
def check_source_available(arxiv_id: str) -> bool:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: arxiv-to-prompt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: transform arXiv papers into a single latex prompt for LLMs
|
|
5
5
|
Author: Takashi Ishida
|
|
6
6
|
License: MIT
|
|
@@ -15,15 +15,16 @@ Requires-Dist: requests>=2.25.0
|
|
|
15
15
|
Provides-Extra: test
|
|
16
16
|
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
17
17
|
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
18
|
+
Dynamic: license-file
|
|
18
19
|
|
|
19
20
|
# arxiv-to-prompt
|
|
20
21
|
|
|
21
|
-
[](https://pypi.org/project/arxiv-to-prompt/)
|
|
22
23
|
[](https://github.com/takashiishida/arxiv-to-prompt/actions)
|
|
23
24
|
[](https://opensource.org/licenses/MIT)
|
|
24
25
|
[](https://github.com/takashiishida/arxiv-to-prompt/releases)
|
|
25
26
|
|
|
26
|
-
A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides
|
|
27
|
+
A command-line tool to transform arXiv papers into a single LaTeX source that can be used as a prompt for asking LLMs questions about the paper. It downloads the source files, automatically finds the main tex file containing `\documentclass`, and flattens multiple files into a single coherent source by resolving `\input` and `\include` commands. The tool also provides options to remove LaTeX comments and appendix sections from the output (which can be useful to shorten the prompt).
|
|
27
28
|
|
|
28
29
|
### Installation
|
|
29
30
|
|
|
@@ -41,6 +42,12 @@ arxiv-to-prompt 2303.08774
|
|
|
41
42
|
# Display LaTeX source without comments
|
|
42
43
|
arxiv-to-prompt 2303.08774 --no-comments
|
|
43
44
|
|
|
45
|
+
# Display LaTeX source without appendix sections
|
|
46
|
+
arxiv-to-prompt 2303.08774 --no-appendix
|
|
47
|
+
|
|
48
|
+
# Combine options (no comments and no appendix)
|
|
49
|
+
arxiv-to-prompt 2303.08774 --no-comments --no-appendix
|
|
50
|
+
|
|
44
51
|
# Copy to clipboard
|
|
45
52
|
arxiv-to-prompt 2303.08774 | pbcopy
|
|
46
53
|
|
|
@@ -62,8 +69,23 @@ latex_source = process_latex_source("2303.08774")
|
|
|
62
69
|
|
|
63
70
|
# Get LaTeX source without comments
|
|
64
71
|
latex_source = process_latex_source("2303.08774", keep_comments=False)
|
|
72
|
+
|
|
73
|
+
# Get LaTeX source without appendix sections
|
|
74
|
+
latex_source = process_latex_source("2303.08774", remove_appendix_section=True)
|
|
75
|
+
|
|
76
|
+
# Combine options (no comments and no appendix)
|
|
77
|
+
latex_source = process_latex_source("2303.08774", keep_comments=False, remove_appendix_section=True)
|
|
65
78
|
```
|
|
66
79
|
|
|
80
|
+
### Projects Using arxiv-to-prompt
|
|
81
|
+
|
|
82
|
+
Here are some projects and use cases that leverage arxiv-to-prompt:
|
|
83
|
+
|
|
84
|
+
- [arxiv-latex-mcp](https://github.com/takashiishida/arxiv-latex-mcp): MCP server that uses arxiv-to-prompt to fetch and process arXiv LaTeX sources for precise interpretation of mathematical expressions in scientific papers.
|
|
85
|
+
- [arxiv-tex-ui](https://github.com/takashiishida/arxiv-tex-ui): chat with an llm about an arxiv paper by using the latex source.
|
|
86
|
+
|
|
87
|
+
If you're using arxiv-to-prompt in your project, please submit a pull request to add it to this list!
|
|
88
|
+
|
|
67
89
|
### References
|
|
68
90
|
|
|
69
91
|
- Inspired by [files-to-prompt](https://github.com/simonw/files-to-prompt).
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
arxiv_to_prompt/__init__.py,sha256=oL2bEzZhiFoMqCF-84Xmljqw55lgRkwInBFpExRPCTY,609
|
|
2
|
+
arxiv_to_prompt/cli.py,sha256=2ZVmxNcygFpOFROfCo-FtXzcRpLVVRUOkIhASL0iD7o,1179
|
|
3
|
+
arxiv_to_prompt/core.py,sha256=0XwG9hqljQ3FHDOmmR7C8CX4ge1CJJAqSosVzTXhkes,10425
|
|
4
|
+
arxiv_to_prompt-0.2.1.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
|
|
5
|
+
arxiv_to_prompt-0.2.1.dist-info/METADATA,sha256=lWDgxqAXcdZs7OjKrq5QN2gDBemZtRYqcoCjdIIfw-o,3998
|
|
6
|
+
arxiv_to_prompt-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
arxiv_to_prompt-0.2.1.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
|
|
8
|
+
arxiv_to_prompt-0.2.1.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
|
|
9
|
+
arxiv_to_prompt-0.2.1.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
arxiv_to_prompt/__init__.py,sha256=oL2bEzZhiFoMqCF-84Xmljqw55lgRkwInBFpExRPCTY,609
|
|
2
|
-
arxiv_to_prompt/cli.py,sha256=WafgKxxpgJrLyeuQ-tnUASoknoNXiaQRWLP-Emsr-ug,977
|
|
3
|
-
arxiv_to_prompt/core.py,sha256=UOFbivdGx470Myh5wzq4SVzh0AOPFldU3thBmwfsd6g,9802
|
|
4
|
-
arxiv_to_prompt-0.1.1.dist-info/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
|
|
5
|
-
arxiv_to_prompt-0.1.1.dist-info/METADATA,sha256=IKVSFdYmNdt-YUHA3JBltJsOvLCkxyd90d4_gbcf2ok,2920
|
|
6
|
-
arxiv_to_prompt-0.1.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
7
|
-
arxiv_to_prompt-0.1.1.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
|
|
8
|
-
arxiv_to_prompt-0.1.1.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
|
|
9
|
-
arxiv_to_prompt-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|