arxiv-to-prompt 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arxiv-to-prompt
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: transform arXiv papers into a single latex prompt for LLMs
5
5
  Author: Takashi Ishida
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "arxiv-to-prompt"
7
- version = "0.5.0"
7
+ version = "0.5.1"
8
8
  description = "transform arXiv papers into a single latex prompt for LLMs"
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Takashi Ishida" }]
@@ -92,40 +92,55 @@ def download_arxiv_source(arxiv_id: str, cache_dir: Optional[str] = None, use_ca
92
92
 
93
93
  def find_main_tex(directory: str) -> Optional[str]:
94
94
  """
95
- Find the main .tex file containing documentclass.
95
+ Find the main .tex file containing documentclass.
96
+ Searches recursively through subdirectories.
96
97
  First checks for common naming conventions (main.tex, paper.tex, index.tex).
97
- If none found, returns the filename of the longest .tex file containing documentclass,
98
- since shorter files are typically conference templates or supplementary documents
98
+ If none found, returns the path of the longest .tex file containing documentclass,
99
+ since shorter files are typically conference templates or supplementary documents
99
100
  rather than the main manuscript.
100
101
  """
101
102
  common_names = ['main.tex', 'paper.tex', 'index.tex']
102
103
  main_tex_file = None
103
104
  max_line_count = 0
104
105
 
105
- # First pass: check for common naming conventions
106
- for file_name in os.listdir(directory):
107
- if file_name in common_names:
108
- try:
109
- with open(os.path.join(directory, file_name), 'r', encoding='utf-8') as file:
110
- lines = file.readlines()
111
- if any('\\documentclass' in line for line in lines):
112
- return file_name
113
- except Exception as e:
114
- logging.warning(f"Could not read file {file_name}: {e}")
106
+ # Walk through directory and subdirectories
107
+ for root, dirs, files in os.walk(directory):
108
+ rel_root = os.path.relpath(root, directory)
109
+
110
+ # First pass: check for common naming conventions
111
+ for file_name in files:
112
+ if file_name in common_names:
113
+ file_path = os.path.join(root, file_name)
114
+ try:
115
+ with open(file_path, 'r', encoding='utf-8') as file:
116
+ lines = file.readlines()
117
+ if any('\\documentclass' in line for line in lines):
118
+ if rel_root == '.':
119
+ return file_name
120
+ return os.path.join(rel_root, file_name)
121
+ except Exception as e:
122
+ logging.warning(f"Could not read file {file_path}: {e}")
115
123
 
116
124
  # Second pass: find the longest .tex file containing documentclass
117
- for file_name in os.listdir(directory):
118
- if file_name.endswith('.tex'):
119
- try:
120
- with open(os.path.join(directory, file_name), 'r', encoding='utf-8') as file:
121
- lines = file.readlines()
122
- if any('\\documentclass' in line for line in lines):
123
- line_count = len(lines)
124
- if line_count > max_line_count:
125
- main_tex_file = file_name
126
- max_line_count = line_count
127
- except Exception as e:
128
- logging.warning(f"Could not read file {file_name}: {e}")
125
+ for root, dirs, files in os.walk(directory):
126
+ rel_root = os.path.relpath(root, directory)
127
+
128
+ for file_name in files:
129
+ if file_name.endswith('.tex'):
130
+ file_path = os.path.join(root, file_name)
131
+ try:
132
+ with open(file_path, 'r', encoding='utf-8') as file:
133
+ lines = file.readlines()
134
+ if any('\\documentclass' in line for line in lines):
135
+ line_count = len(lines)
136
+ if line_count > max_line_count:
137
+ if rel_root == '.':
138
+ main_tex_file = file_name
139
+ else:
140
+ main_tex_file = os.path.join(rel_root, file_name)
141
+ max_line_count = line_count
142
+ except Exception as e:
143
+ logging.warning(f"Could not read file {file_path}: {e}")
129
144
 
130
145
  return main_tex_file
131
146
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arxiv-to-prompt
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: transform arXiv papers into a single latex prompt for LLMs
5
5
  Author: Takashi Ishida
6
6
  License: MIT
@@ -153,6 +153,23 @@ def test_find_main_tex(temp_cache_dir):
153
153
  assert found_main == "main.tex"
154
154
 
155
155
 
156
+ def test_find_main_tex_in_subdirectory(temp_cache_dir):
157
+ """Test finding main tex file in a subdirectory."""
158
+ # Create test directory with subdirectory
159
+ tex_dir = temp_cache_dir / "test_tex_subdir"
160
+ tex_dir.mkdir(parents=True)
161
+ subdir = tex_dir / "paper"
162
+ subdir.mkdir()
163
+
164
+ # Create main.tex in subdirectory
165
+ main_file = subdir / "main.tex"
166
+ main_file.write_text("\\documentclass{article}\n\\begin{document}\nHello\n\\end{document}")
167
+
168
+ # Test finding main file in subdirectory
169
+ found_main = find_main_tex(str(tex_dir))
170
+ assert found_main == os.path.join("paper", "main.tex")
171
+
172
+
156
173
  def test_commented_input_commands(temp_cache_dir):
157
174
  """Test that commented-out \\include and \\input commands are ignored."""
158
175
  # Create test directory and files
File without changes