arxiv-to-prompt 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arxiv_to_prompt/core.py +40 -25
- {arxiv_to_prompt-0.5.0.dist-info → arxiv_to_prompt-0.5.1.dist-info}/METADATA +1 -1
- arxiv_to_prompt-0.5.1.dist-info/RECORD +9 -0
- arxiv_to_prompt-0.5.0.dist-info/RECORD +0 -9
- {arxiv_to_prompt-0.5.0.dist-info → arxiv_to_prompt-0.5.1.dist-info}/WHEEL +0 -0
- {arxiv_to_prompt-0.5.0.dist-info → arxiv_to_prompt-0.5.1.dist-info}/entry_points.txt +0 -0
- {arxiv_to_prompt-0.5.0.dist-info → arxiv_to_prompt-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {arxiv_to_prompt-0.5.0.dist-info → arxiv_to_prompt-0.5.1.dist-info}/top_level.txt +0 -0
arxiv_to_prompt/core.py
CHANGED
|
@@ -92,40 +92,55 @@ def download_arxiv_source(arxiv_id: str, cache_dir: Optional[str] = None, use_ca
|
|
|
92
92
|
|
|
93
93
|
def find_main_tex(directory: str) -> Optional[str]:
|
|
94
94
|
"""
|
|
95
|
-
Find the main .tex file containing documentclass.
|
|
95
|
+
Find the main .tex file containing documentclass.
|
|
96
|
+
Searches recursively through subdirectories.
|
|
96
97
|
First checks for common naming conventions (main.tex, paper.tex, index.tex).
|
|
97
|
-
If none found, returns the
|
|
98
|
-
since shorter files are typically conference templates or supplementary documents
|
|
98
|
+
If none found, returns the path of the longest .tex file containing documentclass,
|
|
99
|
+
since shorter files are typically conference templates or supplementary documents
|
|
99
100
|
rather than the main manuscript.
|
|
100
101
|
"""
|
|
101
102
|
common_names = ['main.tex', 'paper.tex', 'index.tex']
|
|
102
103
|
main_tex_file = None
|
|
103
104
|
max_line_count = 0
|
|
104
105
|
|
|
105
|
-
#
|
|
106
|
-
for
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
106
|
+
# Walk through directory and subdirectories
|
|
107
|
+
for root, dirs, files in os.walk(directory):
|
|
108
|
+
rel_root = os.path.relpath(root, directory)
|
|
109
|
+
|
|
110
|
+
# First pass: check for common naming conventions
|
|
111
|
+
for file_name in files:
|
|
112
|
+
if file_name in common_names:
|
|
113
|
+
file_path = os.path.join(root, file_name)
|
|
114
|
+
try:
|
|
115
|
+
with open(file_path, 'r', encoding='utf-8') as file:
|
|
116
|
+
lines = file.readlines()
|
|
117
|
+
if any('\\documentclass' in line for line in lines):
|
|
118
|
+
if rel_root == '.':
|
|
119
|
+
return file_name
|
|
120
|
+
return os.path.join(rel_root, file_name)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logging.warning(f"Could not read file {file_path}: {e}")
|
|
115
123
|
|
|
116
124
|
# Second pass: find the longest .tex file containing documentclass
|
|
117
|
-
for
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
125
|
+
for root, dirs, files in os.walk(directory):
|
|
126
|
+
rel_root = os.path.relpath(root, directory)
|
|
127
|
+
|
|
128
|
+
for file_name in files:
|
|
129
|
+
if file_name.endswith('.tex'):
|
|
130
|
+
file_path = os.path.join(root, file_name)
|
|
131
|
+
try:
|
|
132
|
+
with open(file_path, 'r', encoding='utf-8') as file:
|
|
133
|
+
lines = file.readlines()
|
|
134
|
+
if any('\\documentclass' in line for line in lines):
|
|
135
|
+
line_count = len(lines)
|
|
136
|
+
if line_count > max_line_count:
|
|
137
|
+
if rel_root == '.':
|
|
138
|
+
main_tex_file = file_name
|
|
139
|
+
else:
|
|
140
|
+
main_tex_file = os.path.join(rel_root, file_name)
|
|
141
|
+
max_line_count = line_count
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logging.warning(f"Could not read file {file_path}: {e}")
|
|
129
144
|
|
|
130
145
|
return main_tex_file
|
|
131
146
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
arxiv_to_prompt/__init__.py,sha256=LbfYhirPwhaMpwV4-YgMwW6hA0GOQDHVCPYCPKabjw0,1169
|
|
2
|
+
arxiv_to_prompt/cli.py,sha256=IwT64A-lf5PrxCxs2e1adN09USkf7ji31uzO8YAegpU,3203
|
|
3
|
+
arxiv_to_prompt/core.py,sha256=ln67k1MT-l8PalwGsszU6IwCZ15GAOiX0yfLgyKvySA,13837
|
|
4
|
+
arxiv_to_prompt-0.5.1.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
|
|
5
|
+
arxiv_to_prompt-0.5.1.dist-info/METADATA,sha256=VKK7my5pxFuVLTejMV3vS8BLhk_kV62HHPWxC84_80Q,4786
|
|
6
|
+
arxiv_to_prompt-0.5.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
7
|
+
arxiv_to_prompt-0.5.1.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
|
|
8
|
+
arxiv_to_prompt-0.5.1.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
|
|
9
|
+
arxiv_to_prompt-0.5.1.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
arxiv_to_prompt/__init__.py,sha256=LbfYhirPwhaMpwV4-YgMwW6hA0GOQDHVCPYCPKabjw0,1169
|
|
2
|
-
arxiv_to_prompt/cli.py,sha256=IwT64A-lf5PrxCxs2e1adN09USkf7ji31uzO8YAegpU,3203
|
|
3
|
-
arxiv_to_prompt/core.py,sha256=GafxYeE0dNg70hNG8BrSM7S99dIpHiy1KoNp5oW8niA,13119
|
|
4
|
-
arxiv_to_prompt-0.5.0.dist-info/licenses/LICENSE,sha256=np8L3--VyxwVJa_8D_mfK4RYrtnRMM_eeYN3rM4PMHo,1071
|
|
5
|
-
arxiv_to_prompt-0.5.0.dist-info/METADATA,sha256=4a66cO6DpNdd0dz3U_79QhL60Q1cAhHHyExWUqhL4eo,4786
|
|
6
|
-
arxiv_to_prompt-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
7
|
-
arxiv_to_prompt-0.5.0.dist-info/entry_points.txt,sha256=iYEEn8xZ_5OkhNIs5HCyHSQBpDRJkbD5h0tlAb16lL0,61
|
|
8
|
-
arxiv_to_prompt-0.5.0.dist-info/top_level.txt,sha256=JClbu_lGGWu3RaTHZlNqTKB1-DUSbYXQNIYmJ9_F7fY,16
|
|
9
|
-
arxiv_to_prompt-0.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|