PyPI - sheetah - Versions diffs - 0.1.0__tar.gz - Mend

sheetah 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

sheetah-0.1.0/PKG-INFO +96 -0
sheetah-0.1.0/README.md +82 -0
sheetah-0.1.0/pyproject.toml +23 -0
sheetah-0.1.0/src/sheetah/__init__.py +19 -0
sheetah-0.1.0/src/sheetah/cli.py +118 -0
sheetah-0.1.0/src/sheetah/sheetah.py +220 -0

sheetah-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,96 @@
+Metadata-Version: 2.3
+Name: sheetah
+Version: 0.1.0
+Summary: Add your description here
+Author: Carsten Engelke
+Author-email: Carsten Engelke <carsten.engelke@gmail.com>
+Requires-Dist: pygubu>=0.40
+Requires-Dist: markdown>=3.0
+Requires-Dist: html2text>=2020.1.16
+Requires-Dist: prompt-toolkit>=3.0
+Requires-Dist: pyperclip>=1.8
+Requires-Python: >=3.13
+Description-Content-Type: text/markdown
+# sheetah
+`sheetah` ist ein einfaches Python-Paket zur Verarbeitung von Markdown-Dokumenten.
+Es gliedert den Text an H2-Überschriften ("##") in **Segmente**, erlaubt fuzzy Suche
+und bietet ein interaktives CLI mit Clipboard‑Kopie.
+## Installation
+```bash
+pip install sheetah
+```
+Oder lokal im Quellcode (virtuellesenv):
+```bash
+cd sheetah
+python -m venv .venv
+. .venv/Scripts/activate  # oder source .venv/bin/activate
+python -m pip install -e .[all]
+```
+> Die Extras umfassen `prompt_toolkit` und `pyperclip` für die interaktive UI.
+## Nutzung
+### Programmatisch
+```python
+from sheetah import Document
+txt = """Einführung
+## Abschnitt 1
+Text1
+## Abschnitt 2
+Text2
+"""
+doc = Document.from_markdown(txt)
+print(doc.description)
+for seg in doc.search("Text"):
+    print(seg.name)
+    print(seg.text())
+```
+### Kommandozeile
+```bash
+sheetah pfad/zur/datei.md
+```
+Eine interaktive Oberfläche erscheint:
+1. Eingabe der Suchanfrage oben
+2. Ergebnisse werden aufgelistet, bester Treffer ist vorgeschlagen
+3. Mit Pfeil hoch/runter navigieren; Vorschau rechts
+4. `<Enter>` kopiert den Text des ausgewählten Segments in die Zwischenablage
+5. `<Ctrl-C>` oder `<Ctrl-Q>` beendet das Programm
+Die Beschreibung des Dokuments (alles vor dem ersten `##`) wird über der
+Suchzeile angezeigt.
+## API
+- `Document.from_markdown(markdown: str) -> Document` – erstellt ein Dokument.
+- `Document.items` – Liste der `Segment`-Instanzen.
+- `Document.description` – Markdown-Text vor dem ersten Abschnitt.
+- `Document.search(query: str, limit: Optional[int]=None)` – fuzzy Suche.
+- `Segment.name` – Name (Header) des Segments.
+- `Segment.text()` – reiner Text.
+- `Segment.html()` – HTML-Konvertierung.
+## Tests
+```bash
+pytest
+```
+## Lizenz
+MIT, siehe `LICENSE`.

sheetah-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,82 @@
+# sheetah
+`sheetah` ist ein einfaches Python-Paket zur Verarbeitung von Markdown-Dokumenten.
+Es gliedert den Text an H2-Überschriften ("##") in **Segmente**, erlaubt fuzzy Suche
+und bietet ein interaktives CLI mit Clipboard‑Kopie.
+## Installation
+```bash
+pip install sheetah
+```
+Oder lokal im Quellcode (virtuellesenv):
+```bash
+cd sheetah
+python -m venv .venv
+. .venv/Scripts/activate  # oder source .venv/bin/activate
+python -m pip install -e .[all]
+```
+> Die Extras umfassen `prompt_toolkit` und `pyperclip` für die interaktive UI.
+## Nutzung
+### Programmatisch
+```python
+from sheetah import Document
+txt = """Einführung
+## Abschnitt 1
+Text1
+## Abschnitt 2
+Text2
+"""
+doc = Document.from_markdown(txt)
+print(doc.description)
+for seg in doc.search("Text"):
+    print(seg.name)
+    print(seg.text())
+```
+### Kommandozeile
+```bash
+sheetah pfad/zur/datei.md
+```
+Eine interaktive Oberfläche erscheint:
+1. Eingabe der Suchanfrage oben
+2. Ergebnisse werden aufgelistet, bester Treffer ist vorgeschlagen
+3. Mit Pfeil hoch/runter navigieren; Vorschau rechts
+4. `<Enter>` kopiert den Text des ausgewählten Segments in die Zwischenablage
+5. `<Ctrl-C>` oder `<Ctrl-Q>` beendet das Programm
+Die Beschreibung des Dokuments (alles vor dem ersten `##`) wird über der
+Suchzeile angezeigt.
+## API
+- `Document.from_markdown(markdown: str) -> Document` – erstellt ein Dokument.
+- `Document.items` – Liste der `Segment`-Instanzen.
+- `Document.description` – Markdown-Text vor dem ersten Abschnitt.
+- `Document.search(query: str, limit: Optional[int]=None)` – fuzzy Suche.
+- `Segment.name` – Name (Header) des Segments.
+- `Segment.text()` – reiner Text.
+- `Segment.html()` – HTML-Konvertierung.
+## Tests
+```bash
+pytest
+```
+## Lizenz
+MIT, siehe `LICENSE`.

sheetah-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,23 @@
+[project]
+name = "sheetah"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Carsten Engelke", email = "carsten.engelke@gmail.com" }
+]
+requires-python = ">=3.13"
+dependencies = [
+    "pygubu>=0.40",
+    "markdown>=3.0",
+    "html2text>=2020.1.16",
+    "prompt_toolkit>=3.0",
+    "pyperclip>=1.8",
+]
+[project.scripts]
+sheetah = "sheetah.cli:main"
+[build-system]
+requires = ["uv_build>=0.10.2,<0.11.0"]
+build-backend = "uv_build"

sheetah-0.1.0/src/sheetah/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Package entrypoint for :mod:`sheetah`.
+The top-level ``main`` function is simply routed to the command‑line
+implementation so installing the package and invoking ``sheetah`` will
+launch the interactive search UI.  Helper classes are re-exported here as a
+convenience for programmatic use.
+"""
+from __future__ import annotations
+from .sheetah import Document, Segment
+from . import cli
+__all__ = ["Document", "Segment", "cli", "main"]
+def main() -> None:
+    """Run the command‑line interface."""
+    cli.main()

sheetah-0.1.0/src/sheetah/cli.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""Command-line interface for searching documents created from Markdown."""
+from __future__ import annotations
+import argparse
+import sys
+from typing import List
+from prompt_toolkit import Application
+from prompt_toolkit.key_binding import KeyBindings
+from prompt_toolkit.layout import Layout, HSplit, VSplit
+from prompt_toolkit.widgets import TextArea, Label, Frame
+import pyperclip
+from sheetah.sheetah import Document, Segment
+class DocSearchUI:
+    def __init__(self, document: Document):
+        self.document = document
+        self.results: List[Segment] = document.items
+        self.selected = 0
+        # description shown above search field (if available)
+        desc_text = document.description or ""
+        self.description_label = Label(text=desc_text)
+        self.search_field = TextArea(height=1, prompt="Search: ", multiline=False)
+        self.result_area = TextArea(focusable=False, scrollbar=True)
+        self.detail_area = TextArea(focusable=False, scrollbar=True)
+        self.status_bar = Label(text="Use up/down to navigate, Enter to copy, Ctrl-C to quit.")
+        # bind events
+        self.search_field.buffer.on_text_changed += self._on_search_change
+        self._update_display()
+        self.kb = KeyBindings()
+        self.kb.add("up")(self._go_up)
+        self.kb.add("down")(self._go_down)
+        self.kb.add("enter")(self._copy_current)
+        self.kb.add("c-c")(self._exit)
+        self.kb.add("c-q")(self._exit)
+        root = HSplit([
+            self.description_label,
+            self.search_field,
+            VSplit([
+                Frame(self.result_area, title="Segments", width=40),
+                Frame(self.detail_area, title="Preview"),
+            ]),
+            self.status_bar,
+        ])
+        self.app = Application(layout=Layout(root), key_bindings=self.kb, full_screen=True)
+    def _on_search_change(self, _):
+        text = self.search_field.text
+        self.results = self.document.search(text)
+        self.selected = 0
+        self._update_display()
+    def _format_results(self) -> str:
+        lines = []
+        for i, seg in enumerate(self.results):
+            prefix = "> " if i == self.selected else "  "
+            lines.append(prefix + seg.name)
+        return "\n".join(lines)
+    def _update_display(self):
+        self.result_area.text = self._format_results()
+        if self.results:
+            self.detail_area.text = self.results[self.selected].text()
+        else:
+            self.detail_area.text = "<no results>"
+    def _go_up(self, event):
+        if self.results:
+            self.selected = max(0, self.selected - 1)
+            self._update_display()
+    def _go_down(self, event):
+        if self.results:
+            self.selected = min(len(self.results) - 1, self.selected + 1)
+            self._update_display()
+    def _copy_current(self, event):
+        if self.results:
+            pyperclip.copy(self.results[self.selected].text())
+            # show a little confirmation in status
+            self.status_bar.text = "Copied to clipboard! (Ctrl-C to quit)"
+    def _exit(self, event):
+        event.app.exit()
+    def run(self):
+        self.app.run()
+def main(argv=None):
+    argv = argv if argv is not None else sys.argv[1:]
+    parser = argparse.ArgumentParser(description="Search markdown document interactively.")
+    parser.add_argument("file", help="Path to markdown file")
+    args = parser.parse_args(argv)
+    try:
+        text = open(args.file, encoding="utf-8").read()
+    except Exception as e:
+        print(f"Unable to read {args.file}: {e}")
+        sys.exit(1)
+    doc = Document.from_markdown(text)
+    ui = DocSearchUI(doc)
+    ui.run()
+if __name__ == "__main__":
+    main()

sheetah-0.1.0/src/sheetah/sheetah.py ADDED Viewed

@@ -0,0 +1,220 @@
+from dataclasses import dataclass
+from typing import List, Optional
+import re
+# use established third-party libraries for conversions
+try:
+    import markdown as _markdown_lib
+except ImportError:  # pragma: no cover
+    _markdown_lib = None
+try:
+    import html2text as _html2text_lib
+except ImportError:  # pragma: no cover
+    _html2text_lib = None
+from difflib import SequenceMatcher
+@dataclass
+class Segment:
+    name: str
+    _markdown: str
+    def markdown(self) -> str:
+        return self._markdown
+    def text(self) -> str:
+        """Return a plain-text version of the markdown content.
+        If ``html2text`` (and ``markdown``) are available we generate HTML first
+        and then convert that to plain text; this gives a much better result
+        than the simple regex stripping that was previously implemented.  A
+        fallback regex stripper remains so that the method always returns
+        something even when the optional dependencies are missing.
+        """
+        md = self._markdown
+        # lazy import to avoid module-level state problems when tests install
+        # dependencies later.
+        try:
+            import markdown as _m
+        except ImportError:
+            _m = None
+        if _m:
+            html_text = _m.markdown(md)
+            # strip HTML tags to get plain text; simple approach covers most
+            # typical output from markdown.
+            text = re.sub(r"<[^>]+>", "", html_text)
+            return text.strip()
+        # fallback: crude regex-based cleanup
+        md = re.sub(r"```[\s\S]*?```", "", md)
+        md = re.sub(r"`([^`]*)`", r"\1", md)
+        md = re.sub(r"!\[.*?\]\(.*?\)", "", md)
+        md = re.sub(r"\[(.*?)\]\(.*?\)", r"\1", md)
+        md = re.sub(r"\*\*(.*?)\*\*", r"\1", md)
+        md = re.sub(r"\*(.*?)\*", r"\1", md)
+        md = re.sub(r"__(.*?)__", r"\1", md)
+        md = re.sub(r"_(.*?)_", r"\1", md)
+        md = re.sub(r"^#+\s*", "", md, flags=re.MULTILINE)
+        md = re.sub(r"^>\s?", "", md, flags=re.MULTILINE)
+        md = re.sub(r"[-*_]{3,}", "", md)
+        md = re.sub(r"^[\s]*[-*+]\s+", "", md, flags=re.MULTILINE)
+        md = re.sub(r"\n{2,}", "\n\n", md)
+        return md.strip()
+    def html(self) -> str:
+        """Return HTML generated from the markdown payload.
+        When the ``markdown`` package is installed we use it directly, which
+        is far more complete than our previous handwritten converter.  If the
+        library isn't available we fall back to the old simple implementation
+        just so the method never fails; the fallback path should only be hit in
+        unit tests or extremely minimal installations.
+        """
+        md = self._markdown
+        # lazy lookup to keep behaviour consistent if libs are installed later
+        try:
+            import markdown as _m
+        except ImportError:
+            _m = None
+        if _m:
+            return _m.markdown(md)
+        # fallback minimal converter
+        import html as _htmlmod
+        text = md
+        text = _htmlmod.escape(text)
+        text = re.sub(r"```([\s\S]*?)```", lambda m: f"<pre><code>{_htmlmod.escape(m.group(1))}</code></pre>", text)
+        text = re.sub(r"`([^`]*)`", lambda m: f"<code>{_htmlmod.escape(m.group(1))}</code>", text)
+        text = re.sub(r"\[(.*?)\]\((.*?)\)", r"<a href=\"\2\">\1</a>", text)
+        text = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)
+        text = re.sub(r"\*(.*?)\*", r"<em>\1</em>", text)
+        text = re.sub(r"^###\s*(.*?)$", r"<h3>\1</h3>", text, flags=re.MULTILINE)
+        text = re.sub(r"^##\s*(.*?)$", r"<h2>\1</h2>", text, flags=re.MULTILINE)
+        text = re.sub(r"^#\s*(.*?)$", r"<h1>\1</h1>", text, flags=re.MULTILINE)
+        # simple list handling remains as before
+        def _ulify(text: str) -> str:
+            lines = text.splitlines()
+            out = []
+            in_ul = False
+            for line in lines:
+                m = re.match(r"^[\s]*[-*+]\s+(.*)$", line)
+                if m:
+                    if not in_ul:
+                        out.append("<ul>")
+                        in_ul = True
+                    out.append(f"<li>{m.group(1)}</li>")
+                else:
+                    if in_ul:
+                        out.append("</ul>")
+                        in_ul = False
+                    out.append(line)
+            if in_ul:
+                out.append("</ul>")
+            return "\n".join(out)
+        text = _ulify(text)
+        parts = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
+        html_parts = []
+        for p in parts:
+            if p.startswith("<h1>") or p.startswith("<h2>") or p.startswith("<h3>") or p.startswith("<ul>") or p.startswith("<pre>"):
+                html_parts.append(p)
+            else:
+                html_parts.append(f"<p>{p}</p>")
+        return "\n".join(html_parts)
+class Document:
+    def __init__(self, items: Optional[List[Segment]] = None, description: str = ""):
+        self.items: List[Segment] = items or []
+        self.description: str = description
+    @classmethod
+    def from_markdown(cls, md: str) -> "Document":
+        """Create a document from a markdown string.
+        Everything **before** the first H2 header ("##") is treated as the
+        document description; subsequent H2 sections become individual segments.
+        """
+        lines = md.splitlines(keepends=True)
+        items: List[Segment] = []
+        current_name: Optional[str] = None
+        buffer: List[str] = []
+        description_lines: List[str] = []
+        seen_first_header = False
+        header_re = re.compile(r"^##+\s*(.*)")
+        for line in lines:
+            m = header_re.match(line)
+            if m:
+                if not seen_first_header:
+                    # description collected so far
+                    seen_first_header = True
+                # start new segment
+                if current_name is not None or buffer:
+                    items.append(Segment(name=current_name or "", _markdown="".join(buffer).rstrip()))
+                current_name = m.group(1).strip()
+                buffer = []
+            else:
+                if not seen_first_header:
+                    description_lines.append(line)
+                else:
+                    buffer.append(line)
+        # finalize last
+        if current_name is not None:
+            items.append(Segment(name=current_name or "", _markdown="".join(buffer).rstrip()))
+        elif not seen_first_header:
+            # no H2 headers found: everything is description
+            description_lines = lines
+        desc = "".join(description_lines).strip()
+        return cls(items, description=desc)
+    def list(self, names_only: bool = True) -> List:
+        return [it.name for it in self.items] if names_only else list(self.items)
+    def search(self, query: str, limit: Optional[int] = None) -> List[Segment]:
+        q = (query or "").strip().lower()
+        if not q:
+            return self.items[:limit] if limit else list(self.items)
+        scored: List[tuple] = []
+        for it in self.items:
+            name = (it.name or "").lower()
+            content = (it._markdown or "").lower()
+            score = 0
+            if q in name:
+                score += 200
+                score += max(0, 50 - name.index(q))
+            if q in content:
+                score += 100
+                score += max(0, 25 - content.index(q))
+            # fuzzy similarity
+            score += int(SequenceMatcher(None, q, name).ratio() * 50)
+            score += int(SequenceMatcher(None, q, content).ratio() * 10)
+            if score > 0:
+                scored.append((score, it))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        results = [it for _, it in scored]
+        return results[:limit] if limit else results
+# expose CLI entrypoint for setuptools
+try:
+    from . import cli
+    def main():
+        return cli.main()
+except ImportError:  # pragma: no cover - running without package context
+    def main():
+        # fallback: no cli available
+        raise RuntimeError("CLI not available; install prompt_toolkit first")