PyPI - markdown_convert - Versions diffs - 1.2.12__py3-none-any.whl → 1.2.32__py3-none-any.whl - Mend

markdown_convert 1.2.12py3-none-any.whl → 1.2.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

markdown_convert/__init__.py +11 -0
markdown_convert/__main__.py +95 -0
markdown_convert/code.css +73 -73
markdown_convert/default.css +229 -373
markdown_convert/modules/__init__.py +4 -0
markdown_convert/modules/constants.py +25 -0
markdown_convert/modules/convert.py +338 -0
markdown_convert/modules/resources.py +101 -0
markdown_convert/modules/transform.py +107 -0
markdown_convert/modules/utils.py +38 -0
markdown_convert/modules/validate.py +61 -0
markdown_convert-1.2.32.dist-info/METADATA +105 -0
markdown_convert-1.2.32.dist-info/RECORD +16 -0
{markdown_convert-1.2.12.dist-info → markdown_convert-1.2.32.dist-info}/WHEEL +1 -1
{markdown_convert-1.2.12.dist-info → markdown_convert-1.2.32.dist-info}/licenses/LICENSE +339 -339
markdown_convert-1.2.12.dist-info/METADATA +0 -80
markdown_convert-1.2.12.dist-info/RECORD +0 -7
{markdown_convert-1.2.12.dist-info → markdown_convert-1.2.32.dist-info}/entry_points.txt +0 -0

markdown_convert/modules/convert.py ADDED Viewed

@@ -0,0 +1,338 @@
+"""
+Module to convert a markdown file to a pdf file.
+Author: @julynx
+"""
+import os
+import secrets
+import time
+from datetime import datetime
+from pathlib import Path
+import markdown2
+from playwright.sync_api import sync_playwright
+from .constants import MARKDOWN_EXTENSIONS
+from .resources import get_code_css_path, get_css_path, get_output_path
+from .transform import (
+    create_sections,
+    render_mermaid_diagrams,
+    create_html_document,
+    render_checkboxes,
+)
+from .utils import drop_duplicates
+def _generate_pdf_with_playwright(
+    html_content,
+    output_path,
+    *,
+    css_content=None,
+    base_dir=None,
+    dump_html=False,
+    nonce=None,
+):
+    """
+    Generate a PDF from HTML content using Playwright.
+    Args:
+        html_content (str): HTML content to convert.
+        output_path (str): Path to save the PDF file.
+        css_content (str, optional): CSS content to inject.
+        base_dir (Path, optional): Base directory for resolving relative paths in HTML.
+        dump_html (bool, optional): Whether to dump the HTML content to a file.
+    """
+    # Generate a cryptographic nonce for the Mermaid script
+    # Content Security Policy using nonce to whitelist only the Mermaid initialization script
+    # This prevents arbitrary JavaScript injection while allowing Mermaid to work
+    csp = (
+        "default-src 'none'; "
+        f"script-src 'nonce-{nonce}' https://cdn.jsdelivr.net; "
+        f"script-src-elem 'nonce-{nonce}' https://cdn.jsdelivr.net; "
+        "style-src 'unsafe-inline'; "
+        "img-src data: https: file:; "
+        "font-src data: https:; "
+        "connect-src https://cdn.jsdelivr.net;"
+    )
+    # Wrap HTML content with CSP and CSS
+    if css_content:
+        full_html = create_html_document(html_content, css_content, csp)
+    else:
+        full_html = html_content
+    with sync_playwright() as playwright:
+        browser = playwright.chromium.launch(
+            headless=True,
+            args=[
+                "--disable-dev-shm-usage",
+                "--disable-extensions",
+                "--disable-plugins",
+                "--disable-gpu",
+                "--no-first-run",
+                "--no-default-browser-check",
+            ],
+        )
+        context = browser.new_context(
+            java_script_enabled=True,
+            permissions=[],
+            geolocation=None,
+            accept_downloads=False,
+        )
+        page = context.new_page()
+        # Handle loading based on presence of base_dir
+        temp_html = None
+        try:
+            if base_dir:
+                temp_html = base_dir / f".temp_{os.getpid()}.html"
+                temp_html.write_text(full_html, encoding="utf-8")
+                page.goto(temp_html.as_uri(), wait_until="networkidle", timeout=30000)
+            else:
+                page.set_content(full_html, wait_until="networkidle", timeout=30000)
+            pdf_params = {
+                "format": "A4",
+                "print_background": True,
+                "margin": {
+                    "top": "20mm",
+                    "bottom": "20mm",
+                    "left": "20mm",
+                    "right": "20mm",
+                },
+                "path": output_path,
+            }  # Playwright ignores None paths
+            pdf_bytes = page.pdf(**pdf_params)
+            return None if output_path else pdf_bytes
+        finally:
+            browser.close()
+            if temp_html and temp_html.exists() and not dump_html:
+                temp_html.unlink()
+def _get_css_content(css_sources):
+    """
+    Get the CSS content from a list of CSS file paths.
+    Args:
+        css_sources (list): List of CSS file paths.
+    Returns:
+        str: Combined CSS content.
+    """
+    css_buffer = ""
+    for css_file in css_sources:
+        css_buffer += Path(css_file).read_text(encoding="utf-8") + "\n"
+    return css_buffer
+def convert(
+    markdown_path,
+    css_path=None,
+    output_path=None,
+    *,
+    extend_default_css=True,
+    dump_html=False,
+):
+    """
+    Convert a markdown file to a pdf file.
+    Args:
+        markdown_path (str): Path to the markdown file.
+        css_path (str=None): Path to the CSS file.
+        output_path (str=None): Path to the output file.
+        extend_default_css (bool=True): Extend the default CSS file.
+        dump_html (bool=False): Dump the intermediate HTML to a file.
+    """
+    if css_path is None:
+        css_path = get_css_path()
+    if output_path is None:
+        output_path = get_output_path(markdown_path, None)
+    if extend_default_css:
+        css_sources = [get_code_css_path(), get_css_path(), css_path]
+    else:
+        css_sources = [get_code_css_path(), css_path]
+    css_sources = drop_duplicates(css_sources)
+    try:
+        nonce = secrets.token_urlsafe(16)
+        html = markdown2.markdown_path(markdown_path, extras=MARKDOWN_EXTENSIONS)
+        html = create_sections(html)
+        html = render_mermaid_diagrams(html, nonce=nonce)
+        html = render_checkboxes(html)
+        _generate_pdf_with_playwright(
+            html,
+            output_path,
+            css_content=_get_css_content(css_sources),
+            base_dir=Path(markdown_path).resolve().parent,
+            dump_html=dump_html,
+            nonce=nonce,
+        )
+    except Exception as exc:
+        raise RuntimeError(exc) from exc
+def live_convert(
+    markdown_path, css_path=None, output_path=None, *, extend_default_css=True
+):
+    """
+    Convert a markdown file to a pdf file and watch for changes.
+    Args:
+        markdown_path (str): Path to the markdown file.
+        css_path (str=None): Path to the CSS file.
+        output_path (str=None): Path to the output file.
+        extend_default_css (bool=True): Extend the default CSS file.
+    """
+    if css_path is None:
+        css_path = get_css_path()
+    if output_path is None:
+        output_path = get_output_path(markdown_path, None)
+    live_converter = LiveConverter(
+        markdown_path,
+        css_path,
+        output_path,
+        extend_default_css=extend_default_css,
+        loud=True,
+    )
+    live_converter.observe()
+def convert_text(markdown_text, css_text=None, *, extend_default_css=True):
+    """
+    Convert markdown text to a pdf file.
+    Args:
+        markdown_text (str): Markdown text.
+        css_text (str=None): CSS text.
+        extend_default_css (bool=True): Extend the default CSS file.
+    Returns:
+        PDF file as bytes.
+    """
+    default_css = Path(get_css_path()).read_text(encoding="utf-8")
+    code_css = Path(get_code_css_path()).read_text(encoding="utf-8")
+    if css_text is None:
+        css_text = default_css
+    if extend_default_css:
+        css_sources = [code_css, default_css, css_text]
+    else:
+        css_sources = [code_css, css_text]
+    try:
+        nonce = secrets.token_urlsafe(16)
+        html = markdown2.markdown(markdown_text, extras=MARKDOWN_EXTENSIONS)
+        html = create_sections(html)
+        html = render_mermaid_diagrams(html, nonce=nonce)
+        html = render_checkboxes(html)
+        return _generate_pdf_with_playwright(
+            html,
+            None,
+            css_content=_get_css_content(css_sources),
+            nonce=nonce,
+        )
+    except Exception as exc:
+        raise RuntimeError(exc) from exc
+class LiveConverter:
+    """
+    Class to convert a markdown file to a pdf file and watch for changes.
+    """
+    def __init__(
+        self,
+        markdown_path,
+        css_path,
+        output_path,
+        *,
+        extend_default_css=True,
+        loud=False,
+    ):
+        """
+        Initialize the LiveConverter class.
+        Args:
+            markdown_path (str): Path to the markdown file.
+            css_path (str): Path to the CSS file.
+            output_path (str): Path to the output file.
+            extend_default_css (bool): Extend the default CSS file.
+        """
+        self.md_path = Path(markdown_path).absolute()
+        self.css_path = Path(css_path).absolute()
+        self.output_path = output_path
+        self.extend_default_css = extend_default_css
+        self.loud = loud
+        self.md_last_modified = None
+        self.css_last_modified = None
+    def get_last_modified_date(self, file_path):
+        """
+        Get the last modified date of a file.
+        Args:
+            file_path (str): Path to the file.
+        Returns:
+            Last modified date of the file.
+        """
+        return os.path.getmtime(file_path)
+    def write_pdf(self):
+        """
+        Write the pdf file.
+        """
+        convert(
+            self.md_path,
+            self.css_path,
+            self.output_path,
+            extend_default_css=self.extend_default_css,
+        )
+        if self.loud:
+            print(f"- PDF file updated: {datetime.now()}", flush=True)
+    def observe(self, poll_interval=1):
+        """
+        Observe the markdown and CSS files. Calls write_pdf() when a file is
+        modified.
+        """
+        self.write_pdf()
+        self.md_last_modified = self.get_last_modified_date(self.md_path)
+        self.css_last_modified = self.get_last_modified_date(self.css_path)
+        try:
+            while True:
+                markdown_modified = self.get_last_modified_date(self.md_path)
+                css_modified = self.get_last_modified_date(self.css_path)
+                if (
+                    markdown_modified != self.md_last_modified
+                    or css_modified != self.css_last_modified
+                ):
+                    self.write_pdf()
+                    self.md_last_modified = markdown_modified
+                    self.css_last_modified = css_modified
+                time.sleep(poll_interval)
+        except KeyboardInterrupt:
+            if self.loud:
+                print("\nInterrupted by user.\n", flush=True)

markdown_convert/modules/resources.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""
+This module contains functions that are used to get the output path, the CSS
+path, and the usage message.
+Author: @julynx
+"""
+from pathlib import Path
+try:
+    # Python 3.9+
+    from importlib.resources import files
+except ImportError:
+    # Fallback for older Python versions
+    from importlib_resources import files
+from .constants import BLUE, CYAN, GREEN, YELLOW, OPTIONS, OPTIONS_MODES
+from .utils import color
+def get_output_path(markdown_path, output_dir=None):
+    """
+    Get the output path for the pdf file.
+    Args:
+        markdown_path (str): The path to the markdown file.
+        output_dir (str): The output directory.
+    Returns:
+        str: The output path.
+    """
+    markdown_path = Path(markdown_path)
+    if output_dir is None:
+        return markdown_path.parent / f"{markdown_path.stem}.pdf"
+    output_dir = Path(output_dir)
+    if output_dir.suffix == ".pdf":
+        return output_dir
+    return output_dir.parent / f"{Path(markdown_path).stem}.pdf"
+def get_css_path():
+    """
+    Get the path to the default CSS file.
+    Returns:
+        str: The path to the default CSS file.
+    """
+    package_files = files("markdown_convert")
+    css_file = package_files / "default.css"
+    return str(css_file)
+def get_code_css_path():
+    """
+    Get the path to the code CSS file.
+    Returns:
+        str: The path to the code CSS file.
+    """
+    package_files = files("markdown_convert")
+    css_file = package_files / "code.css"
+    return str(css_file)
+def get_usage():
+    """
+    Returns a message describing how to use the program.
+    Returns:
+        str: The usage message.
+    """
+    commd = (
+        f"{color(GREEN, 'markdown-convert')} "
+        f"[{color(YELLOW, OPTIONS[0])}] [{color(BLUE, 'options')}]"
+    )
+    option_one = (
+        f"{color(BLUE, OPTIONS[1])}{color(CYAN, '=')}"
+        f"{color(CYAN, '|'.join(OPTIONS_MODES))}"
+    )
+    option_two = (
+        f"{color(BLUE, OPTIONS[2])}{color(CYAN, '=')}[{color(CYAN, 'css_file_path')}]"
+    )
+    option_three = f"{color(BLUE, OPTIONS[3])}{color(CYAN, '=')}[{color(CYAN, 'output_file_path')}]"
+    usage = (
+        "\n"
+        "Usage:\n"
+        f"  {commd}\n"
+        "\n"
+        "Options:\n"
+        f"  {option_one}\n"
+        "    Convert the markdown file once (default) or live.\n"
+        f"  {option_two}\n"
+        "    Use a custom CSS file.\n"
+        f"  {option_three}\n"
+        "    Specify the output file path.\n"
+    )
+    return usage

markdown_convert/modules/transform.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""
+Module for transforming HTML content.
+"""
+import re
+def create_html_document(html_content, css_content, csp):
+    """
+    Creates a complete HTML document with the given content, CSS, and Content Security Policy.
+    Args:
+        html_content (str): The HTML content to include in the body.
+        css_content (str): The CSS styles to include in the head.
+        csp (str): The Content Security Policy string.
+    Returns:
+        str: A complete HTML document as a string.
+    """
+    return f"""<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+<meta http-equiv="Content-Security-Policy" content="{csp}">
+<style>
+{css_content}
+</style>
+</head>
+<body>
+{html_content}
+</body>
+</html>"""
+def create_sections(html):
+    """
+    Creates h2 sections, from the first h2 to the next h2, wrapping them in <section> tags
+    using regular expressions.
+    Args:
+        html (str): HTML content.
+    Returns:
+        HTML content with sections wrapped in <section> tags.
+    """
+    pattern = re.compile(r"(<h2.*?>.*?</h2>)(.*?)(?=(<h2.*?>|$))", re.DOTALL)
+    def wrap_section(match):
+        return f"<section>\n{match.group(1)}\n{match.group(2)}\n</section>\n"
+    # Split by code blocks to avoid processing text inside them
+    parts = re.split(r"(<code>.*?</code>)", html, flags=re.DOTALL)
+    for part_index, _part in enumerate(parts):
+        # Only process parts that are NOT code blocks
+        if not parts[part_index].startswith("<code>"):
+            parts[part_index] = pattern.sub(wrap_section, parts[part_index])
+    return "".join(parts)
+def render_mermaid_diagrams(html, *, nonce):
+    """
+    Renders Mermaid diagrams in the HTML content.
+    Args:
+        html (str): HTML content.
+        nonce (str): Cryptographic nonce for CSP.
+    Returns:
+        str: HTML content with rendered Mermaid diagrams.
+    """
+    mermaid_script = f"""
+<script type="module" nonce="{nonce}">
+  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
+  mermaid.initialize({{
+    startOnLoad: true,
+    theme: 'default',
+    themeVariables: {{}},
+    fontFamily: 'arial, verdana, sans-serif'
+  }});
+</script>
+"""
+    if '<div class="mermaid">' in html:
+        html = mermaid_script + html
+    return html
+def render_checkboxes(html):
+    """
+    Renders checkboxes in the HTML content by replacing input elements with SVG representations.
+    Args:
+        html (str): HTML content.
+    Returns:
+        str: HTML content with rendered checkboxes.
+    """
+    unchecked = "[ ]"
+    checked = "[x]"
+    unchecked_html = "<input type='checkbox'>"
+    checked_html = "<input type='checkbox' checked>"
+    # Split by code blocks to avoid processing text inside them
+    parts = re.split(r"(<code>.*?</code>)", html, flags=re.DOTALL)
+    for part_index, _part in enumerate(parts):
+        # Only process parts that are NOT code blocks
+        if not parts[part_index].startswith("<code>"):
+            parts[part_index] = parts[part_index].replace(unchecked, unchecked_html)
+            parts[part_index] = parts[part_index].replace(checked, checked_html)
+    return "".join(parts)

markdown_convert/modules/utils.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""
+Utility functions for string manipulation.
+Author: @julynx
+"""
+import platform
+def color(color_code, text):
+    """
+    Colorize text.
+    Args:
+        text (str): The text to colorize.
+        color (str): The color code.
+    Returns:
+        str: The colorized text.
+    """
+    # Disable if running on Windows
+    if platform.system() == "Windows":
+        return text
+    return f"\033[{color_code}m{text}\033[0m"
+def drop_duplicates(lst):
+    """
+    Drops duplicates from the given list.
+    Args:
+        lst: List to remove duplicates from.
+    Returns:
+        List without duplicates.
+    """
+    return list(dict.fromkeys(lst))

markdown_convert/modules/validate.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""
+This module contains functions to validate the input paths.
+Author: @julynx
+"""
+from pathlib import Path
+def validate_markdown_path(markdown_path):
+    """
+    Validate the markdown file path.
+    Args:
+        markdown_path (str): The path to the markdown file.
+    Raises:
+        FileNotFoundError: If the file is not found.
+        ValueError: If the file is not a Markdown file.
+    """
+    if not Path(markdown_path).is_file():
+        raise FileNotFoundError(f"File not found: '{markdown_path}'")
+    if not markdown_path.endswith(".md"):
+        raise ValueError("File must be a Markdown file.")
+def validate_css_path(css_path):
+    """
+    Validate the CSS file path.
+    Args:
+        css_path (str): The path to the CSS file.
+    Raises:
+        FileNotFoundError: If the file is not found.
+        ValueError: If the file is not a CSS file.
+    """
+    if not Path(css_path).is_file():
+        raise FileNotFoundError(f"File not found: '{css_path}'")
+    if not css_path.endswith(".css"):
+        raise ValueError("File must be a CSS file.")
+def validate_output_path(output_dir):
+    """
+    Validate the output directory path.
+    Args:
+        output_dir (str): The path to the output directory.
+    Raises:
+        FileNotFoundError: If the directory is not found.
+    """
+    check_dir = Path(output_dir)
+    if output_dir.endswith(".pdf"):
+        check_dir = check_dir.parent
+    if not check_dir.is_dir():
+        raise FileNotFoundError(f"Directory not found: '{check_dir}'")

markdown_convert 1.2.12__py3-none-any.whl → 1.2.32__py3-none-any.whl

markdown_convert 1.2.12py3-none-any.whl → 1.2.32py3-none-any.whl