pytexmd 1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytexmd/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ __all__ = ['filter','file_loader','sphinx_doc','process_file',"config"]
2
+
3
+ from . import filter,file_loader,sphinx_doc,config
4
+ from .core import process_file
pytexmd/cli.py ADDED
@@ -0,0 +1,52 @@
1
+ """Command-line interface for pytexmd.
2
+
3
+ This module provides a CLI for processing LaTeX files and generating documentation.
4
+ """
5
+
6
+ __all__ = ['process_file']
7
+
8
+ from .core import process_file
9
+ import argparse
10
+
11
+ def main() -> None:
12
+ """Main entry point for the CLI.
13
+
14
+ Parses command-line arguments and processes the specified LaTeX file.
15
+
16
+ Args:
17
+ input_file (str): File to process.
18
+ output_folder (str): Output folder.
19
+ depth (int): How many sub files should be created according to sections and paragraphs etc.
20
+ output_suffix (str): Suffix for output files.
21
+ project_name (str): Project name.
22
+ author (str): Author name.
23
+ version (str): Version string.
24
+
25
+ Returns:
26
+ None
27
+
28
+ Example:
29
+ python -m pytexmd.cli main.tex output_folder --depth 3 --output_suffix .md --project_name "My Project" --author "Author" --version "1.0"
30
+ """
31
+ parser = argparse.ArgumentParser(description="My Library CLI")
32
+ parser.add_argument("input_file", help="File to process", type=str)
33
+ parser.add_argument("output_folder", help="Output folder", type=str)
34
+ parser.add_argument("--depth", help="(not supported yet)How many sub files should be created according to sections and paragraphs etc.", default=0, type=int)
35
+ parser.add_argument("--output_suffix", help="Suffix for output files", default=".md", type=str)
36
+ parser.add_argument("--project_name", help="Project name", default="My Project", type=str)
37
+ parser.add_argument("--author", help="Author name", default="Author", type=str)
38
+ parser.add_argument("--version", help="Version string", default="1.0", type=str)
39
+ args = parser.parse_args()
40
+ print(f"Processing {args.input_file}")
41
+ process_file(
42
+ args.input_file,
43
+ args.output_folder,
44
+ args.depth,
45
+ args.output_suffix,
46
+ args.project_name,
47
+ args.author,
48
+ args.version
49
+ )
50
+
51
+ if __name__ == "__main__":
52
+ main()
pytexmd/config.py ADDED
@@ -0,0 +1,9 @@
1
+ __all__ = ["set_latex_replacements"]
2
+
3
+ LATEX_REPLACEMENTS = []
4
+
5
+
6
+ def set_latex_replacements(replacements):
7
+ global LATEX_REPLACEMENTS
8
+ LATEX_REPLACEMENTS = replacements
9
+
pytexmd/core.py ADDED
@@ -0,0 +1,80 @@
1
+ """Core utilities for processing LaTeX files and generating documentation.
2
+
3
+ This module provides the main entry point for converting LaTeX files to Markdown and generating Sphinx documentation.
4
+ """
5
+
6
+ __all__ = ["process_file"]
7
+
8
+ import os
9
+ from .filter import process_string
10
+ from .file_loader import load_tex_file, convert_bbl_to_bib
11
+ from .sphinx_doc import create_sphinx_documentation, make_html, create_config_file
12
+ from .filter.splitting import split_rename
13
+ from .filter.text import CUSTOM_THEOREM_TYPES
14
+
15
+ def process_file(
16
+ input_file: str,
17
+ output_folder: str,
18
+ depth: int = 3,
19
+ output_suffix: str = ".md",
20
+ project_name: str = "My Project",
21
+ author: str = "Author",
22
+ version: str = "1.0",
23
+ ) -> None:
24
+ """Process a LaTeX file and generate documentation.
25
+
26
+ Loads the LaTeX file, expands its content, generates Sphinx documentation, and converts the content to Markdown.
27
+
28
+ Args:
29
+ input_file (str): Path to the input LaTeX file.
30
+ output_folder (str): Path to the output folder for documentation.
31
+ depth (int, optional): Depth for processing sections. Defaults to 3.
32
+ output_suffix (str, optional): Suffix for output files. Defaults to ".md".
33
+
34
+ Returns:
35
+ None
36
+
37
+ Example:
38
+ process_file("main.tex", "docs")
39
+ """
40
+ latex_content = load_tex_file(input_file)
41
+ file_string = latex_content.content
42
+ create_sphinx_documentation(output_folder,project_name,author,version)
43
+ source_folder = os.path.join(output_folder, "source")
44
+
45
+ # Copy every .bib file found in the project directly to the Sphinx
46
+ # source folder. For .bbl files (compiled bibliography output), convert
47
+ # them to .bib format first so sphinxcontrib.bibtex can parse them.
48
+ import shutil
49
+ copied_bib_names: list[str] = []
50
+ for abs_path in latex_content.bib_files.values():
51
+ ext = os.path.splitext(abs_path)[1].lower()
52
+ if ext == '.bbl':
53
+ # Convert \begin{thebibliography} format → BibTeX database format
54
+ dest_name = os.path.splitext(os.path.basename(abs_path))[0] + '.bib'
55
+ dest = os.path.join(source_folder, dest_name)
56
+ try:
57
+ with open(abs_path, 'r', encoding='utf-8', errors='replace') as f:
58
+ bbl_content = f.read()
59
+ bib_content = convert_bbl_to_bib(bbl_content)
60
+ with open(dest, 'w', encoding='utf-8') as f:
61
+ f.write(bib_content)
62
+ copied_bib_names.append(dest_name)
63
+ print(f"Bibliography converted .bbl → .bib: {dest}")
64
+ except OSError as exc:
65
+ print(f"Warning: could not convert {abs_path}: {exc}")
66
+ else:
67
+ dest = os.path.join(source_folder, os.path.basename(abs_path))
68
+ try:
69
+ shutil.copy2(abs_path, dest)
70
+ copied_bib_names.append(os.path.basename(abs_path))
71
+ print(f"Bibliography file copied: {dest}")
72
+ except OSError as exc:
73
+ print(f"Warning: could not copy {abs_path}: {exc}")
74
+
75
+ process_string(source_folder, file_string, depth, output_suffix)
76
+ # Re-write conf.py now that custom theorem types are known.
77
+ create_config_file(output_folder, project_name, author, version,
78
+ custom_types=CUSTOM_THEOREM_TYPES,
79
+ bib_filenames=copied_bib_names)
80
+ #make_html(output_folder)
pytexmd/file_loader.py ADDED
@@ -0,0 +1,300 @@
1
+ r"""File loader utilities for LaTeX projects.
2
+
3
+ This module provides functions and classes to load LaTeX files and their associated resources
4
+ (recursively), such as .tex, .bib, and image files. It also expands \input{} commands in the main
5
+ LaTeX file.
6
+
7
+ Typical usage example:
8
+ latex_file = load_tex_file("main.tex")
9
+ """
10
+
11
+ __all__ = ["load_tex_file", "LatexFile", "merge_bib_files", "convert_bbl_to_bib"]
12
+
13
+ import os
14
+ import re
15
+ import regex
16
+ from typing import List, Dict, Tuple, Optional, Any, NamedTuple
17
+ from pytexmd.filter.bibtex.core import convert_bbl_to_bib
18
+
19
+ class LatexFile(NamedTuple):
20
+ r"""Container for loaded LaTeX project files.
21
+
22
+ Attributes:
23
+ content (str): The expanded content of the main LaTeX file, with \input{} resolved.
24
+ tex_files (Dict[str, str]): Mapping from base filename (without extension) to absolute path for .tex/.sty/.cls files.
25
+ bib_files (Dict[str, str]): Mapping from base filename (without extension) to absolute path for .bib/.bbl/.bibtex/.biblatex files.
26
+ image_files (Dict[str, str]): Mapping from base filename (without extension) to absolute path for image files.
27
+ all_files (Dict[str, str]): Combined mapping of all supported files.
28
+ merged_bib_content (str): Merged and deduplicated content of all found .bib files.
29
+ """
30
+ content: str
31
+ tex_files: Dict[str, str]
32
+ bib_files: Dict[str, str]
33
+ image_files: Dict[str, str]
34
+ all_files: Dict[str, str]
35
+ merged_bib_content: str = ""
36
+
37
+
38
+ def _split_bib_entries(content: str) -> List[str]:
39
+ """Split .bib file content into individual top-level @-entries."""
40
+ entries = []
41
+ i = 0
42
+ n = len(content)
43
+ while i < n:
44
+ if content[i] != '@':
45
+ i += 1
46
+ continue
47
+ start = i
48
+ while i < n and content[i] != '{':
49
+ i += 1
50
+ if i >= n:
51
+ break
52
+ depth = 1
53
+ i += 1
54
+ while i < n and depth > 0:
55
+ if content[i] == '{':
56
+ depth += 1
57
+ elif content[i] == '}':
58
+ depth -= 1
59
+ i += 1
60
+ entries.append(content[start:i])
61
+ return entries
62
+
63
+
64
+ def _extract_bib_key(entry: str) -> Optional[str]:
65
+ """Return the citation key from a bib entry, or None for @string/@preamble/@comment."""
66
+ m = re.match(r'@(\w+)\s*\{\s*([^,\s}]+)', entry, re.IGNORECASE)
67
+ if not m:
68
+ return None
69
+ if m.group(1).lower() in ('string', 'preamble', 'comment'):
70
+ return None
71
+ return m.group(2)
72
+
73
+
74
+ def merge_bib_files(bib_paths: List[str]) -> str:
75
+ """Read and merge multiple .bib files, deduplicating entries by citation key.
76
+
77
+ Args:
78
+ bib_paths: List of absolute paths to .bib/.bbl files.
79
+
80
+ Returns:
81
+ str: Merged .bib content with duplicate entries removed (first occurrence wins).
82
+ """
83
+ seen_keys: set = set()
84
+ merged: List[str] = []
85
+ for bib_path in bib_paths:
86
+ try:
87
+ with open(bib_path, 'r', encoding='utf-8', errors='replace') as f:
88
+ raw = f.read()
89
+ except OSError as exc:
90
+ print(f"Warning: could not read {bib_path}: {exc}")
91
+ continue
92
+ for entry in _split_bib_entries(raw):
93
+ key = _extract_bib_key(entry)
94
+ if key is None:
95
+ merged.append(entry.strip())
96
+ elif key not in seen_keys:
97
+ seen_keys.add(key)
98
+ merged.append(entry.strip())
99
+ return "\n\n".join(e for e in merged if e)
100
+
101
+
102
+ def _clean_latex(text: str) -> str:
103
+ """Strip common LaTeX markup from a string, leaving plain text."""
104
+ # Unwrap braced groups: {text} -> text (but keep content)
105
+ text = re.sub(r'\{([^{}]*)\}', r'\1', text)
106
+ # Remove remaining LaTeX commands (e.g. \newblock, \em, \textbf)
107
+ text = re.sub(r'\\[a-zA-Z]+\*?', ' ', text)
108
+ # Remove lone backslashes
109
+ text = text.replace('\\', '')
110
+ # Collapse whitespace
111
+ return re.sub(r'\s+', ' ', text).strip()
112
+
113
+
114
+ def load_tex_file(file_name: str) -> LatexFile:
115
+ r"""Load a LaTeX file and its associated resources recursively.
116
+
117
+ Expands all \input{} commands in the main file, and collects all .tex, .bib, and image files
118
+ in the same directory tree.
119
+
120
+ Args:
121
+ file_name (str): Path to the main LaTeX file.
122
+
123
+ Returns:
124
+ LatexFile: A named tuple containing the expanded content and dictionaries of found files.
125
+
126
+ Raises:
127
+ FileNotFoundError: If the main file does not exist.
128
+ OSError: If there is an error reading files from disk.
129
+
130
+ Example:
131
+ latex_file = load_tex_file("main.tex")
132
+ print(latex_file.content)
133
+ """
134
+ def load_file(file_name: str) -> str:
135
+ r"""Read the contents of a file.
136
+
137
+ Args:
138
+ file_name (str): Path to the file.
139
+
140
+ Returns:
141
+ str: Contents of the file.
142
+ """
143
+ data = None
144
+ with open(file_name, 'r', encoding='utf-8') as f:
145
+ data = f.read()
146
+ return data
147
+
148
+ # Get the folder where file_name resides
149
+ #folder_path = os.path.dirname(file_name)
150
+ absolute_folder = os.path.dirname(os.path.abspath(file_name))
151
+
152
+ # Get all image files, .bib files, and .tex files in the folder (recursively)
153
+ image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.svg', '.pdf', '.eps']
154
+ tex_extensions = [ '.tex', '.sty', '.cls']
155
+ bib_extensions = ['.bib', '.bbl',".bibtex", '.biblatex']
156
+ target_extensions = tex_extensions + image_extensions + bib_extensions
157
+
158
+ all_files = []
159
+ tex_files = []
160
+ bib_files = []
161
+ image_files = []
162
+
163
+ if os.path.exists(absolute_folder):
164
+ # Walk through all subdirectories recursively
165
+ for root, dirs, files in os.walk(absolute_folder):
166
+ for file in files:
167
+ file_path = os.path.join(root, file)
168
+ relative_path = os.path.abspath(file_path)
169
+ file_ext = os.path.splitext(file)[1].lower()
170
+
171
+ if file_ext in tex_extensions:
172
+ tex_files.append(relative_path)
173
+ elif file_ext in bib_extensions:
174
+ bib_files.append(relative_path)
175
+ elif file_ext in image_extensions:
176
+ image_files.append(relative_path)
177
+
178
+ print(f"Folder (recursive): {absolute_folder}")
179
+ print(f"TEX files: {tex_files}")
180
+ print(f"BIB files: {bib_files}")
181
+ print(f"Image files: {image_files}")
182
+
183
+ content = load_file(file_name)
184
+
185
+ def remove_extensions(file_name: str) -> str:
186
+ """Strip a single trailing known extension from a basename."""
187
+ root, ext = os.path.splitext(file_name)
188
+ if ext.lower() in target_extensions:
189
+ return root
190
+ return file_name
191
+
192
+ def _basename_key(abs_path: str) -> str:
193
+ """Return the bare basename (no extension) used as dict key."""
194
+ return remove_extensions(os.path.basename(abs_path))
195
+
196
+ _tex_files = {_basename_key(f): f for f in tex_files}
197
+ _bib_files = {_basename_key(f): f for f in bib_files}
198
+ _image_files = {_basename_key(f): f for f in image_files}
199
+ all_files = {**_tex_files, **_bib_files, **_image_files}
200
+
201
+ def input_to_filename(input_name: str) -> str:
202
+ r"""Convert LaTeX input name to absolute filename.
203
+
204
+ Tries direct relative-path resolution first (handles paths like
205
+ ``../sibling/file`` or ``sections/foo``), then falls back to a
206
+ basename-only dict lookup for plain names like ``foo``.
207
+
208
+ Args:
209
+ input_name (str): Name from \input{} command.
210
+
211
+ Returns:
212
+ str: Absolute path to the file.
213
+
214
+ Raises:
215
+ FileNotFoundError: If no matching file can be located.
216
+ """
217
+ # Normalise separators so os.path works cross-platform
218
+ norm = input_name.replace("\\", "/")
219
+
220
+ # Strategy 1: resolve as a path relative to the project root
221
+ candidate = os.path.normpath(os.path.join(absolute_folder, norm))
222
+ if os.path.isfile(candidate):
223
+ return candidate
224
+ # Try appending each tex extension (LaTeX omits .tex in \input)
225
+ for ext in tex_extensions:
226
+ if os.path.isfile(candidate + ext):
227
+ return candidate + ext
228
+
229
+ # Strategy 2: bare-basename dict lookup (legacy fallback)
230
+ bare = remove_extensions(norm.split("/")[-1])
231
+ if bare in all_files:
232
+ return all_files[bare]
233
+
234
+ raise FileNotFoundError(
235
+ f"Cannot resolve \\input{{{input_name}}}: tried '{candidate}' "
236
+ f"and basename key '{bare}' in scanned files."
237
+ )
238
+
239
+ def get_input_file(input_name: str) -> str:
240
+ r"""Get the contents of an input file referenced in LaTeX.
241
+
242
+ Args:
243
+ input_name (str): Name from \input{} command.
244
+
245
+ Returns:
246
+ str: Contents of the input file, or empty string if not found.
247
+ """
248
+ try:
249
+ filename = input_to_filename(input_name)
250
+ _resolved_input_dirs.add(os.path.dirname(os.path.abspath(filename)))
251
+ return load_file(filename)
252
+ except (KeyError, FileNotFoundError) as exc:
253
+ print(f"File not found for input: {input_name} ({exc})")
254
+ return ""
255
+ # Search for \input{filename} patterns in the content
256
+ _resolved_input_dirs: set = set()
257
+ input_pattern = r'\\input\{([^}]+)\}'
258
+ content_old = content
259
+ done_matches = []
260
+
261
+ while True:
262
+ matches = regex.findall(input_pattern, content)
263
+ for match in matches:
264
+ if match in done_matches:
265
+ continue
266
+ content = content.replace(r"\input{"+match+"}", get_input_file(match))
267
+ done_matches.append(match)
268
+ if content == content_old:
269
+ break
270
+ content_old = content
271
+
272
+ # Collect .bib files from directories outside the project root that were
273
+ # touched by \input{} resolution (the initial os.walk already covers the
274
+ # tree rooted at absolute_folder).
275
+ for _d in _resolved_input_dirs:
276
+ _d = os.path.normpath(_d)
277
+ try:
278
+ _rel = os.path.relpath(_d, absolute_folder)
279
+ if not _rel.startswith('..'):
280
+ continue # already covered by the initial recursive walk
281
+ except ValueError:
282
+ pass # different drive on Windows — definitely outside project root
283
+ if os.path.isdir(_d):
284
+ for _root, _dirs, _fls in os.walk(_d):
285
+ for _fl in _fls:
286
+ if os.path.splitext(_fl)[1].lower() in bib_extensions:
287
+ _abs = os.path.abspath(os.path.join(_root, _fl))
288
+ if _abs not in bib_files:
289
+ bib_files.append(_abs)
290
+
291
+ # Rebuild bib dict in case extra files were found
292
+ _bib_files = {_basename_key(f): f for f in bib_files}
293
+ all_files = {**_tex_files, **_bib_files, **_image_files}
294
+
295
+ # Merge all collected .bib files, deduplicating by citation key
296
+ merged_bib_content = merge_bib_files(bib_files)
297
+
298
+ out = {"content": content, "tex_files": _tex_files, "bib_files": _bib_files, "image_files": _image_files, "all_files": all_files, "merged_bib_content": merged_bib_content}
299
+ return LatexFile(**out)
300
+
@@ -0,0 +1,20 @@
1
+ __all__ = ["string_to_tree",
2
+ "process_string",
3
+ "element_to_file_whole",
4
+ "element_to_file_only_begin",
5
+ "split_document_to_files",
6
+ "split_by_sections",
7
+ "verify_content_integrity",
8
+ "string_to_filename",
9
+ "preprocessor",
10
+ "text",
11
+ "enumitem",
12
+ "equations",
13
+ "antibugs",
14
+ "core",
15
+ "splitting"
16
+ ]
17
+
18
+
19
+ from . import preprocessor,enumitem,equations,antibugs,core,splitting, text
20
+ from .file_maker import string_to_tree, process_string, element_to_file_whole, split_document_to_files, split_by_sections, verify_content_integrity, string_to_filename