PyPI - pytexmd - Versions diffs - 1.1__py3-none-any.whl - Mend

pytexmd 1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pytexmd/__init__.py +4 -0
pytexmd/cli.py +52 -0
pytexmd/config.py +9 -0
pytexmd/core.py +80 -0
pytexmd/file_loader.py +300 -0
pytexmd/filter/__init__.py +20 -0
pytexmd/filter/antibugs.py +231 -0
pytexmd/filter/bibtex/__init__.py +3 -0
pytexmd/filter/bibtex/core.py +255 -0
pytexmd/filter/core.py +967 -0
pytexmd/filter/enumitem.py +503 -0
pytexmd/filter/equations.py +522 -0
pytexmd/filter/file_maker.py +574 -0
pytexmd/filter/notworking_preprocessor.py +452 -0
pytexmd/filter/preprocessor.py +243 -0
pytexmd/filter/splitting.py +308 -0
pytexmd/filter/text.py +803 -0
pytexmd/sphinx_doc.py +284 -0
pytexmd-1.1.dist-info/METADATA +208 -0
pytexmd-1.1.dist-info/RECORD +23 -0
pytexmd-1.1.dist-info/WHEEL +5 -0
pytexmd-1.1.dist-info/entry_points.txt +2 -0
pytexmd-1.1.dist-info/top_level.txt +1 -0

pytexmd/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+__all__ = ['filter','file_loader','sphinx_doc','process_file',"config"]
+from . import filter,file_loader,sphinx_doc,config
+from .core import process_file

pytexmd/cli.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Command-line interface for pytexmd.
+This module provides a CLI for processing LaTeX files and generating documentation.
+"""
+__all__ = ['process_file']
+from .core import process_file
+import argparse
+def main() -> None:
+    """Main entry point for the CLI.
+    Parses command-line arguments and processes the specified LaTeX file.
+    Args:
+        input_file (str): File to process.
+        output_folder (str): Output folder.
+        depth (int): How many sub files should be created according to sections and paragraphs etc.
+        output_suffix (str): Suffix for output files.
+        project_name (str): Project name.
+        author (str): Author name.
+        version (str): Version string.
+    Returns:
+        None
+    Example:
+        python -m pytexmd.cli main.tex output_folder --depth 3 --output_suffix .md --project_name "My Project" --author "Author" --version "1.0"
+    """
+    parser = argparse.ArgumentParser(description="My Library CLI")
+    parser.add_argument("input_file", help="File to process", type=str)
+    parser.add_argument("output_folder", help="Output folder", type=str)
+    parser.add_argument("--depth", help="(not supported yet)How many sub files should be created according to sections and paragraphs etc.", default=0, type=int)
+    parser.add_argument("--output_suffix", help="Suffix for output files", default=".md", type=str)
+    parser.add_argument("--project_name", help="Project name", default="My Project", type=str)
+    parser.add_argument("--author", help="Author name", default="Author", type=str)
+    parser.add_argument("--version", help="Version string", default="1.0", type=str)
+    args = parser.parse_args()
+    print(f"Processing {args.input_file}")
+    process_file(
+        args.input_file,
+        args.output_folder,
+        args.depth,
+        args.output_suffix,
+        args.project_name,
+        args.author,
+        args.version
+    )
+if __name__ == "__main__":
+    main()

pytexmd/config.py ADDED Viewed

@@ -0,0 +1,9 @@
+__all__ = ["set_latex_replacements"]
+LATEX_REPLACEMENTS = []
+def set_latex_replacements(replacements):
+    global LATEX_REPLACEMENTS
+    LATEX_REPLACEMENTS = replacements

pytexmd/core.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""Core utilities for processing LaTeX files and generating documentation.
+This module provides the main entry point for converting LaTeX files to Markdown and generating Sphinx documentation.
+"""
+__all__ = ["process_file"]
+import os
+from .filter import process_string
+from .file_loader import load_tex_file, convert_bbl_to_bib
+from .sphinx_doc import create_sphinx_documentation, make_html, create_config_file
+from .filter.splitting import split_rename
+from .filter.text import CUSTOM_THEOREM_TYPES
+def process_file(
+    input_file: str,
+    output_folder: str,
+    depth: int = 3,
+    output_suffix: str = ".md",
+    project_name: str = "My Project",
+    author: str = "Author",
+    version: str = "1.0",
+) -> None:
+    """Process a LaTeX file and generate documentation.
+    Loads the LaTeX file, expands its content, generates Sphinx documentation, and converts the content to Markdown.
+    Args:
+        input_file (str): Path to the input LaTeX file.
+        output_folder (str): Path to the output folder for documentation.
+        depth (int, optional): Depth for processing sections. Defaults to 3.
+        output_suffix (str, optional): Suffix for output files. Defaults to ".md".
+    Returns:
+        None
+    Example:
+        process_file("main.tex", "docs")
+    """
+    latex_content = load_tex_file(input_file)
+    file_string = latex_content.content
+    create_sphinx_documentation(output_folder,project_name,author,version)
+    source_folder = os.path.join(output_folder, "source")
+    # Copy every .bib file found in the project directly to the Sphinx
+    # source folder. For .bbl files (compiled bibliography output), convert
+    # them to .bib format first so sphinxcontrib.bibtex can parse them.
+    import shutil
+    copied_bib_names: list[str] = []
+    for abs_path in latex_content.bib_files.values():
+        ext = os.path.splitext(abs_path)[1].lower()
+        if ext == '.bbl':
+            # Convert \begin{thebibliography} format → BibTeX database format
+            dest_name = os.path.splitext(os.path.basename(abs_path))[0] + '.bib'
+            dest = os.path.join(source_folder, dest_name)
+            try:
+                with open(abs_path, 'r', encoding='utf-8', errors='replace') as f:
+                    bbl_content = f.read()
+                bib_content = convert_bbl_to_bib(bbl_content)
+                with open(dest, 'w', encoding='utf-8') as f:
+                    f.write(bib_content)
+                copied_bib_names.append(dest_name)
+                print(f"Bibliography converted .bbl → .bib: {dest}")
+            except OSError as exc:
+                print(f"Warning: could not convert {abs_path}: {exc}")
+        else:
+            dest = os.path.join(source_folder, os.path.basename(abs_path))
+            try:
+                shutil.copy2(abs_path, dest)
+                copied_bib_names.append(os.path.basename(abs_path))
+                print(f"Bibliography file copied: {dest}")
+            except OSError as exc:
+                print(f"Warning: could not copy {abs_path}: {exc}")
+    process_string(source_folder, file_string, depth, output_suffix)
+    # Re-write conf.py now that custom theorem types are known.
+    create_config_file(output_folder, project_name, author, version,
+                       custom_types=CUSTOM_THEOREM_TYPES,
+                       bib_filenames=copied_bib_names)
+    #make_html(output_folder)

pytexmd/file_loader.py ADDED Viewed

@@ -0,0 +1,300 @@
+r"""File loader utilities for LaTeX projects.
+This module provides functions and classes to load LaTeX files and their associated resources
+(recursively), such as .tex, .bib, and image files. It also expands \input{} commands in the main
+LaTeX file.
+Typical usage example:
+    latex_file = load_tex_file("main.tex")
+"""
+__all__ = ["load_tex_file", "LatexFile", "merge_bib_files", "convert_bbl_to_bib"]
+import os
+import re
+import regex
+from typing import List, Dict, Tuple, Optional, Any, NamedTuple
+from pytexmd.filter.bibtex.core import convert_bbl_to_bib
+class LatexFile(NamedTuple):
+    r"""Container for loaded LaTeX project files.
+    Attributes:
+        content (str): The expanded content of the main LaTeX file, with \input{} resolved.
+        tex_files (Dict[str, str]): Mapping from base filename (without extension) to absolute path for .tex/.sty/.cls files.
+        bib_files (Dict[str, str]): Mapping from base filename (without extension) to absolute path for .bib/.bbl/.bibtex/.biblatex files.
+        image_files (Dict[str, str]): Mapping from base filename (without extension) to absolute path for image files.
+        all_files (Dict[str, str]): Combined mapping of all supported files.
+        merged_bib_content (str): Merged and deduplicated content of all found .bib files.
+    """
+    content: str
+    tex_files: Dict[str, str]
+    bib_files: Dict[str, str]
+    image_files: Dict[str, str]
+    all_files: Dict[str, str]
+    merged_bib_content: str = ""
+def _split_bib_entries(content: str) -> List[str]:
+    """Split .bib file content into individual top-level @-entries."""
+    entries = []
+    i = 0
+    n = len(content)
+    while i < n:
+        if content[i] != '@':
+            i += 1
+            continue
+        start = i
+        while i < n and content[i] != '{':
+            i += 1
+        if i >= n:
+            break
+        depth = 1
+        i += 1
+        while i < n and depth > 0:
+            if content[i] == '{':
+                depth += 1
+            elif content[i] == '}':
+                depth -= 1
+            i += 1
+        entries.append(content[start:i])
+    return entries
+def _extract_bib_key(entry: str) -> Optional[str]:
+    """Return the citation key from a bib entry, or None for @string/@preamble/@comment."""
+    m = re.match(r'@(\w+)\s*\{\s*([^,\s}]+)', entry, re.IGNORECASE)
+    if not m:
+        return None
+    if m.group(1).lower() in ('string', 'preamble', 'comment'):
+        return None
+    return m.group(2)
+def merge_bib_files(bib_paths: List[str]) -> str:
+    """Read and merge multiple .bib files, deduplicating entries by citation key.
+    Args:
+        bib_paths: List of absolute paths to .bib/.bbl files.
+    Returns:
+        str: Merged .bib content with duplicate entries removed (first occurrence wins).
+    """
+    seen_keys: set = set()
+    merged: List[str] = []
+    for bib_path in bib_paths:
+        try:
+            with open(bib_path, 'r', encoding='utf-8', errors='replace') as f:
+                raw = f.read()
+        except OSError as exc:
+            print(f"Warning: could not read {bib_path}: {exc}")
+            continue
+        for entry in _split_bib_entries(raw):
+            key = _extract_bib_key(entry)
+            if key is None:
+                merged.append(entry.strip())
+            elif key not in seen_keys:
+                seen_keys.add(key)
+                merged.append(entry.strip())
+    return "\n\n".join(e for e in merged if e)
+def _clean_latex(text: str) -> str:
+    """Strip common LaTeX markup from a string, leaving plain text."""
+    # Unwrap braced groups: {text} -> text (but keep content)
+    text = re.sub(r'\{([^{}]*)\}', r'\1', text)
+    # Remove remaining LaTeX commands (e.g. \newblock, \em, \textbf)
+    text = re.sub(r'\\[a-zA-Z]+\*?', ' ', text)
+    # Remove lone backslashes
+    text = text.replace('\\', '')
+    # Collapse whitespace
+    return re.sub(r'\s+', ' ', text).strip()
+def load_tex_file(file_name: str) -> LatexFile:
+    r"""Load a LaTeX file and its associated resources recursively.
+    Expands all \input{} commands in the main file, and collects all .tex, .bib, and image files
+    in the same directory tree.
+    Args:
+        file_name (str): Path to the main LaTeX file.
+    Returns:
+        LatexFile: A named tuple containing the expanded content and dictionaries of found files.
+    Raises:
+        FileNotFoundError: If the main file does not exist.
+        OSError: If there is an error reading files from disk.
+    Example:
+        latex_file = load_tex_file("main.tex")
+        print(latex_file.content)
+    """
+    def load_file(file_name: str) -> str:
+        r"""Read the contents of a file.
+        Args:
+            file_name (str): Path to the file.
+        Returns:
+            str: Contents of the file.
+        """
+        data = None
+        with open(file_name, 'r', encoding='utf-8') as f:
+            data = f.read()
+        return data
+    # Get the folder where file_name resides
+    #folder_path = os.path.dirname(file_name)
+    absolute_folder = os.path.dirname(os.path.abspath(file_name))
+    # Get all image files, .bib files, and .tex files in the folder (recursively)
+    image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.svg', '.pdf', '.eps']
+    tex_extensions = [ '.tex', '.sty', '.cls']
+    bib_extensions = ['.bib', '.bbl',".bibtex", '.biblatex']
+    target_extensions = tex_extensions + image_extensions + bib_extensions
+    all_files = []
+    tex_files = []
+    bib_files = []
+    image_files = []
+    if os.path.exists(absolute_folder):
+        # Walk through all subdirectories recursively
+        for root, dirs, files in os.walk(absolute_folder):
+            for file in files:
+                file_path = os.path.join(root, file)
+                relative_path = os.path.abspath(file_path)
+                file_ext = os.path.splitext(file)[1].lower()
+                if file_ext in tex_extensions:
+                    tex_files.append(relative_path)
+                elif file_ext in bib_extensions:
+                    bib_files.append(relative_path)
+                elif file_ext in image_extensions:
+                    image_files.append(relative_path)
+    print(f"Folder (recursive): {absolute_folder}")
+    print(f"TEX files: {tex_files}")
+    print(f"BIB files: {bib_files}")
+    print(f"Image files: {image_files}")
+    content = load_file(file_name)
+    def remove_extensions(file_name: str) -> str:
+        """Strip a single trailing known extension from a basename."""
+        root, ext = os.path.splitext(file_name)
+        if ext.lower() in target_extensions:
+            return root
+        return file_name
+    def _basename_key(abs_path: str) -> str:
+        """Return the bare basename (no extension) used as dict key."""
+        return remove_extensions(os.path.basename(abs_path))
+    _tex_files = {_basename_key(f): f for f in tex_files}
+    _bib_files = {_basename_key(f): f for f in bib_files}
+    _image_files = {_basename_key(f): f for f in image_files}
+    all_files = {**_tex_files, **_bib_files, **_image_files}
+    def input_to_filename(input_name: str) -> str:
+        r"""Convert LaTeX input name to absolute filename.
+        Tries direct relative-path resolution first (handles paths like
+        ``../sibling/file`` or ``sections/foo``), then falls back to a
+        basename-only dict lookup for plain names like ``foo``.
+        Args:
+            input_name (str): Name from \input{} command.
+        Returns:
+            str: Absolute path to the file.
+        Raises:
+            FileNotFoundError: If no matching file can be located.
+        """
+        # Normalise separators so os.path works cross-platform
+        norm = input_name.replace("\\", "/")
+        # Strategy 1: resolve as a path relative to the project root
+        candidate = os.path.normpath(os.path.join(absolute_folder, norm))
+        if os.path.isfile(candidate):
+            return candidate
+        # Try appending each tex extension (LaTeX omits .tex in \input)
+        for ext in tex_extensions:
+            if os.path.isfile(candidate + ext):
+                return candidate + ext
+        # Strategy 2: bare-basename dict lookup (legacy fallback)
+        bare = remove_extensions(norm.split("/")[-1])
+        if bare in all_files:
+            return all_files[bare]
+        raise FileNotFoundError(
+            f"Cannot resolve \\input{{{input_name}}}: tried '{candidate}' "
+            f"and basename key '{bare}' in scanned files."
+        )
+    def get_input_file(input_name: str) -> str:
+        r"""Get the contents of an input file referenced in LaTeX.
+        Args:
+            input_name (str): Name from \input{} command.
+        Returns:
+            str: Contents of the input file, or empty string if not found.
+        """
+        try:
+            filename = input_to_filename(input_name)
+            _resolved_input_dirs.add(os.path.dirname(os.path.abspath(filename)))
+            return load_file(filename)
+        except (KeyError, FileNotFoundError) as exc:
+            print(f"File not found for input: {input_name} ({exc})")
+            return ""
+    # Search for \input{filename} patterns in the content
+    _resolved_input_dirs: set = set()
+    input_pattern = r'\\input\{([^}]+)\}'
+    content_old = content
+    done_matches = []
+    while True:
+        matches = regex.findall(input_pattern, content)
+        for match in matches:
+            if match in done_matches:
+                continue
+            content = content.replace(r"\input{"+match+"}", get_input_file(match))
+            done_matches.append(match)
+        if content == content_old:
+            break
+        content_old = content
+    # Collect .bib files from directories outside the project root that were
+    # touched by \input{} resolution (the initial os.walk already covers the
+    # tree rooted at absolute_folder).
+    for _d in _resolved_input_dirs:
+        _d = os.path.normpath(_d)
+        try:
+            _rel = os.path.relpath(_d, absolute_folder)
+            if not _rel.startswith('..'):
+                continue  # already covered by the initial recursive walk
+        except ValueError:
+            pass  # different drive on Windows — definitely outside project root
+        if os.path.isdir(_d):
+            for _root, _dirs, _fls in os.walk(_d):
+                for _fl in _fls:
+                    if os.path.splitext(_fl)[1].lower() in bib_extensions:
+                        _abs = os.path.abspath(os.path.join(_root, _fl))
+                        if _abs not in bib_files:
+                            bib_files.append(_abs)
+    # Rebuild bib dict in case extra files were found
+    _bib_files = {_basename_key(f): f for f in bib_files}
+    all_files = {**_tex_files, **_bib_files, **_image_files}
+    # Merge all collected .bib files, deduplicating by citation key
+    merged_bib_content = merge_bib_files(bib_files)
+    out = {"content": content, "tex_files": _tex_files, "bib_files": _bib_files, "image_files": _image_files, "all_files": all_files, "merged_bib_content": merged_bib_content}
+    return LatexFile(**out)

pytexmd/filter/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+__all__ = ["string_to_tree",
+           "process_string",
+           "element_to_file_whole",
+           "element_to_file_only_begin",
+           "split_document_to_files",
+           "split_by_sections",
+           "verify_content_integrity",
+           "string_to_filename",
+           "preprocessor",
+           "text",
+           "enumitem",
+           "equations",
+           "antibugs",
+           "core",
+           "splitting"
+           ]
+from . import preprocessor,enumitem,equations,antibugs,core,splitting, text
+from .file_maker import string_to_tree, process_string, element_to_file_whole, split_document_to_files, split_by_sections, verify_content_integrity, string_to_filename