sheetah 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sheetah-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.3
2
+ Name: sheetah
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Author: Carsten Engelke
6
+ Author-email: Carsten Engelke <carsten.engelke@gmail.com>
7
+ Requires-Dist: pygubu>=0.40
8
+ Requires-Dist: markdown>=3.0
9
+ Requires-Dist: html2text>=2020.1.16
10
+ Requires-Dist: prompt-toolkit>=3.0
11
+ Requires-Dist: pyperclip>=1.8
12
+ Requires-Python: >=3.13
13
+ Description-Content-Type: text/markdown
14
+
15
+ # sheetah
16
+
17
+ `sheetah` ist ein einfaches Python-Paket zur Verarbeitung von Markdown-Dokumenten.
18
+ Es gliedert den Text an H2-Überschriften ("##") in **Segmente**, erlaubt fuzzy Suche
19
+ und bietet ein interaktives CLI mit Clipboard‑Kopie.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install sheetah
25
+ ```
26
+
27
+ Oder lokal im Quellcode (virtuellesenv):
28
+
29
+ ```bash
30
+ cd sheetah
31
+ python -m venv .venv
32
+ . .venv/Scripts/activate # oder source .venv/bin/activate
33
+ python -m pip install -e .[all]
34
+ ```
35
+
36
+ > Die Extras umfassen `prompt_toolkit` und `pyperclip` für die interaktive UI.
37
+
38
+ ## Nutzung
39
+
40
+ ### Programmatisch
41
+
42
+ ```python
43
+ from sheetah import Document
44
+
45
+ txt = """Einführung
46
+
47
+ ## Abschnitt 1
48
+ Text1
49
+
50
+ ## Abschnitt 2
51
+ Text2
52
+ """
53
+
54
+ doc = Document.from_markdown(txt)
55
+ print(doc.description)
56
+ for seg in doc.search("Text"):
57
+ print(seg.name)
58
+ print(seg.text())
59
+ ```
60
+
61
+ ### Kommandozeile
62
+
63
+ ```bash
64
+ sheetah pfad/zur/datei.md
65
+ ```
66
+
67
+ Eine interaktive Oberfläche erscheint:
68
+
69
+ 1. Eingabe der Suchanfrage oben
70
+ 2. Ergebnisse werden aufgelistet, bester Treffer ist vorgeschlagen
71
+ 3. Mit Pfeil hoch/runter navigieren; Vorschau rechts
72
+ 4. `<Enter>` kopiert den Text des ausgewählten Segments in die Zwischenablage
73
+ 5. `<Ctrl-C>` oder `<Ctrl-Q>` beendet das Programm
74
+
75
+ Die Beschreibung des Dokuments (alles vor dem ersten `##`) wird über der
76
+ Suchzeile angezeigt.
77
+
78
+ ## API
79
+
80
+ - `Document.from_markdown(markdown: str) -> Document` – erstellt ein Dokument.
81
+ - `Document.items` – Liste der `Segment`-Instanzen.
82
+ - `Document.description` – Markdown-Text vor dem ersten Abschnitt.
83
+ - `Document.search(query: str, limit: Optional[int]=None)` – fuzzy Suche.
84
+ - `Segment.name` – Name (Header) des Segments.
85
+ - `Segment.text()` – reiner Text.
86
+ - `Segment.html()` – HTML-Konvertierung.
87
+
88
+ ## Tests
89
+
90
+ ```bash
91
+ pytest
92
+ ```
93
+
94
+ ## Lizenz
95
+
96
+ MIT, siehe `LICENSE`.
@@ -0,0 +1,82 @@
1
+ # sheetah
2
+
3
+ `sheetah` ist ein einfaches Python-Paket zur Verarbeitung von Markdown-Dokumenten.
4
+ Es gliedert den Text an H2-Überschriften ("##") in **Segmente**, erlaubt fuzzy Suche
5
+ und bietet ein interaktives CLI mit Clipboard‑Kopie.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install sheetah
11
+ ```
12
+
13
+ Oder lokal im Quellcode (virtuellesenv):
14
+
15
+ ```bash
16
+ cd sheetah
17
+ python -m venv .venv
18
+ . .venv/Scripts/activate # oder source .venv/bin/activate
19
+ python -m pip install -e .[all]
20
+ ```
21
+
22
+ > Die Extras umfassen `prompt_toolkit` und `pyperclip` für die interaktive UI.
23
+
24
+ ## Nutzung
25
+
26
+ ### Programmatisch
27
+
28
+ ```python
29
+ from sheetah import Document
30
+
31
+ txt = """Einführung
32
+
33
+ ## Abschnitt 1
34
+ Text1
35
+
36
+ ## Abschnitt 2
37
+ Text2
38
+ """
39
+
40
+ doc = Document.from_markdown(txt)
41
+ print(doc.description)
42
+ for seg in doc.search("Text"):
43
+ print(seg.name)
44
+ print(seg.text())
45
+ ```
46
+
47
+ ### Kommandozeile
48
+
49
+ ```bash
50
+ sheetah pfad/zur/datei.md
51
+ ```
52
+
53
+ Eine interaktive Oberfläche erscheint:
54
+
55
+ 1. Eingabe der Suchanfrage oben
56
+ 2. Ergebnisse werden aufgelistet, bester Treffer ist vorgeschlagen
57
+ 3. Mit Pfeil hoch/runter navigieren; Vorschau rechts
58
+ 4. `<Enter>` kopiert den Text des ausgewählten Segments in die Zwischenablage
59
+ 5. `<Ctrl-C>` oder `<Ctrl-Q>` beendet das Programm
60
+
61
+ Die Beschreibung des Dokuments (alles vor dem ersten `##`) wird über der
62
+ Suchzeile angezeigt.
63
+
64
+ ## API
65
+
66
+ - `Document.from_markdown(markdown: str) -> Document` – erstellt ein Dokument.
67
+ - `Document.items` – Liste der `Segment`-Instanzen.
68
+ - `Document.description` – Markdown-Text vor dem ersten Abschnitt.
69
+ - `Document.search(query: str, limit: Optional[int]=None)` – fuzzy Suche.
70
+ - `Segment.name` – Name (Header) des Segments.
71
+ - `Segment.text()` – reiner Text.
72
+ - `Segment.html()` – HTML-Konvertierung.
73
+
74
+ ## Tests
75
+
76
+ ```bash
77
+ pytest
78
+ ```
79
+
80
+ ## Lizenz
81
+
82
+ MIT, siehe `LICENSE`.
@@ -0,0 +1,23 @@
1
+ [project]
2
+ name = "sheetah"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Carsten Engelke", email = "carsten.engelke@gmail.com" }
8
+ ]
9
+ requires-python = ">=3.13"
10
+ dependencies = [
11
+ "pygubu>=0.40",
12
+ "markdown>=3.0",
13
+ "html2text>=2020.1.16",
14
+ "prompt_toolkit>=3.0",
15
+ "pyperclip>=1.8",
16
+ ]
17
+
18
+ [project.scripts]
19
+ sheetah = "sheetah.cli:main"
20
+
21
+ [build-system]
22
+ requires = ["uv_build>=0.10.2,<0.11.0"]
23
+ build-backend = "uv_build"
@@ -0,0 +1,19 @@
1
+ """Package entrypoint for :mod:`sheetah`.
2
+
3
+ The top-level ``main`` function is simply routed to the command‑line
4
+ implementation so installing the package and invoking ``sheetah`` will
5
+ launch the interactive search UI. Helper classes are re-exported here as a
6
+ convenience for programmatic use.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .sheetah import Document, Segment
12
+ from . import cli
13
+
14
+ __all__ = ["Document", "Segment", "cli", "main"]
15
+
16
+
17
+ def main() -> None:
18
+ """Run the command‑line interface."""
19
+ cli.main()
@@ -0,0 +1,118 @@
1
+ """Command-line interface for searching documents created from Markdown."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from typing import List
8
+
9
+ from prompt_toolkit import Application
10
+ from prompt_toolkit.key_binding import KeyBindings
11
+ from prompt_toolkit.layout import Layout, HSplit, VSplit
12
+ from prompt_toolkit.widgets import TextArea, Label, Frame
13
+
14
+ import pyperclip
15
+
16
+ from sheetah.sheetah import Document, Segment
17
+
18
+
19
+ class DocSearchUI:
20
+ def __init__(self, document: Document):
21
+ self.document = document
22
+ self.results: List[Segment] = document.items
23
+ self.selected = 0
24
+
25
+ # description shown above search field (if available)
26
+ desc_text = document.description or ""
27
+ self.description_label = Label(text=desc_text)
28
+
29
+ self.search_field = TextArea(height=1, prompt="Search: ", multiline=False)
30
+ self.result_area = TextArea(focusable=False, scrollbar=True)
31
+ self.detail_area = TextArea(focusable=False, scrollbar=True)
32
+ self.status_bar = Label(text="Use up/down to navigate, Enter to copy, Ctrl-C to quit.")
33
+
34
+ # bind events
35
+ self.search_field.buffer.on_text_changed += self._on_search_change
36
+ self._update_display()
37
+
38
+ self.kb = KeyBindings()
39
+ self.kb.add("up")(self._go_up)
40
+ self.kb.add("down")(self._go_down)
41
+ self.kb.add("enter")(self._copy_current)
42
+ self.kb.add("c-c")(self._exit)
43
+ self.kb.add("c-q")(self._exit)
44
+
45
+ root = HSplit([
46
+ self.description_label,
47
+ self.search_field,
48
+ VSplit([
49
+ Frame(self.result_area, title="Segments", width=40),
50
+ Frame(self.detail_area, title="Preview"),
51
+ ]),
52
+ self.status_bar,
53
+ ])
54
+
55
+ self.app = Application(layout=Layout(root), key_bindings=self.kb, full_screen=True)
56
+
57
+ def _on_search_change(self, _):
58
+ text = self.search_field.text
59
+ self.results = self.document.search(text)
60
+ self.selected = 0
61
+ self._update_display()
62
+
63
+ def _format_results(self) -> str:
64
+ lines = []
65
+ for i, seg in enumerate(self.results):
66
+ prefix = "> " if i == self.selected else " "
67
+ lines.append(prefix + seg.name)
68
+ return "\n".join(lines)
69
+
70
+ def _update_display(self):
71
+ self.result_area.text = self._format_results()
72
+ if self.results:
73
+ self.detail_area.text = self.results[self.selected].text()
74
+ else:
75
+ self.detail_area.text = "<no results>"
76
+
77
+ def _go_up(self, event):
78
+ if self.results:
79
+ self.selected = max(0, self.selected - 1)
80
+ self._update_display()
81
+
82
+ def _go_down(self, event):
83
+ if self.results:
84
+ self.selected = min(len(self.results) - 1, self.selected + 1)
85
+ self._update_display()
86
+
87
+ def _copy_current(self, event):
88
+ if self.results:
89
+ pyperclip.copy(self.results[self.selected].text())
90
+ # show a little confirmation in status
91
+ self.status_bar.text = "Copied to clipboard! (Ctrl-C to quit)"
92
+
93
+ def _exit(self, event):
94
+ event.app.exit()
95
+
96
+ def run(self):
97
+ self.app.run()
98
+
99
+
100
+ def main(argv=None):
101
+ argv = argv if argv is not None else sys.argv[1:]
102
+ parser = argparse.ArgumentParser(description="Search markdown document interactively.")
103
+ parser.add_argument("file", help="Path to markdown file")
104
+ args = parser.parse_args(argv)
105
+
106
+ try:
107
+ text = open(args.file, encoding="utf-8").read()
108
+ except Exception as e:
109
+ print(f"Unable to read {args.file}: {e}")
110
+ sys.exit(1)
111
+
112
+ doc = Document.from_markdown(text)
113
+ ui = DocSearchUI(doc)
114
+ ui.run()
115
+
116
+
117
+ if __name__ == "__main__":
118
+ main()
@@ -0,0 +1,220 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Optional
3
+ import re
4
+
5
+ # use established third-party libraries for conversions
6
+ try:
7
+ import markdown as _markdown_lib
8
+ except ImportError: # pragma: no cover
9
+ _markdown_lib = None
10
+
11
+ try:
12
+ import html2text as _html2text_lib
13
+ except ImportError: # pragma: no cover
14
+ _html2text_lib = None
15
+
16
+ from difflib import SequenceMatcher
17
+
18
+
19
+ @dataclass
20
+ class Segment:
21
+ name: str
22
+ _markdown: str
23
+
24
+ def markdown(self) -> str:
25
+ return self._markdown
26
+
27
+ def text(self) -> str:
28
+ """Return a plain-text version of the markdown content.
29
+
30
+ If ``html2text`` (and ``markdown``) are available we generate HTML first
31
+ and then convert that to plain text; this gives a much better result
32
+ than the simple regex stripping that was previously implemented. A
33
+ fallback regex stripper remains so that the method always returns
34
+ something even when the optional dependencies are missing.
35
+ """
36
+ md = self._markdown
37
+
38
+ # lazy import to avoid module-level state problems when tests install
39
+ # dependencies later.
40
+ try:
41
+ import markdown as _m
42
+ except ImportError:
43
+ _m = None
44
+
45
+ if _m:
46
+ html_text = _m.markdown(md)
47
+ # strip HTML tags to get plain text; simple approach covers most
48
+ # typical output from markdown.
49
+ text = re.sub(r"<[^>]+>", "", html_text)
50
+ return text.strip()
51
+
52
+ # fallback: crude regex-based cleanup
53
+ md = re.sub(r"```[\s\S]*?```", "", md)
54
+ md = re.sub(r"`([^`]*)`", r"\1", md)
55
+ md = re.sub(r"!\[.*?\]\(.*?\)", "", md)
56
+ md = re.sub(r"\[(.*?)\]\(.*?\)", r"\1", md)
57
+ md = re.sub(r"\*\*(.*?)\*\*", r"\1", md)
58
+ md = re.sub(r"\*(.*?)\*", r"\1", md)
59
+ md = re.sub(r"__(.*?)__", r"\1", md)
60
+ md = re.sub(r"_(.*?)_", r"\1", md)
61
+ md = re.sub(r"^#+\s*", "", md, flags=re.MULTILINE)
62
+ md = re.sub(r"^>\s?", "", md, flags=re.MULTILINE)
63
+ md = re.sub(r"[-*_]{3,}", "", md)
64
+ md = re.sub(r"^[\s]*[-*+]\s+", "", md, flags=re.MULTILINE)
65
+ md = re.sub(r"\n{2,}", "\n\n", md)
66
+ return md.strip()
67
+
68
+ def html(self) -> str:
69
+ """Return HTML generated from the markdown payload.
70
+
71
+ When the ``markdown`` package is installed we use it directly, which
72
+ is far more complete than our previous handwritten converter. If the
73
+ library isn't available we fall back to the old simple implementation
74
+ just so the method never fails; the fallback path should only be hit in
75
+ unit tests or extremely minimal installations.
76
+ """
77
+ md = self._markdown
78
+ # lazy lookup to keep behaviour consistent if libs are installed later
79
+ try:
80
+ import markdown as _m
81
+ except ImportError:
82
+ _m = None
83
+
84
+ if _m:
85
+ return _m.markdown(md)
86
+
87
+ # fallback minimal converter
88
+ import html as _htmlmod
89
+
90
+ text = md
91
+ text = _htmlmod.escape(text)
92
+ text = re.sub(r"```([\s\S]*?)```", lambda m: f"<pre><code>{_htmlmod.escape(m.group(1))}</code></pre>", text)
93
+ text = re.sub(r"`([^`]*)`", lambda m: f"<code>{_htmlmod.escape(m.group(1))}</code>", text)
94
+ text = re.sub(r"\[(.*?)\]\((.*?)\)", r"<a href=\"\2\">\1</a>", text)
95
+ text = re.sub(r"\*\*(.*?)\*\*", r"<strong>\1</strong>", text)
96
+ text = re.sub(r"\*(.*?)\*", r"<em>\1</em>", text)
97
+ text = re.sub(r"^###\s*(.*?)$", r"<h3>\1</h3>", text, flags=re.MULTILINE)
98
+ text = re.sub(r"^##\s*(.*?)$", r"<h2>\1</h2>", text, flags=re.MULTILINE)
99
+ text = re.sub(r"^#\s*(.*?)$", r"<h1>\1</h1>", text, flags=re.MULTILINE)
100
+ # simple list handling remains as before
101
+ def _ulify(text: str) -> str:
102
+ lines = text.splitlines()
103
+ out = []
104
+ in_ul = False
105
+ for line in lines:
106
+ m = re.match(r"^[\s]*[-*+]\s+(.*)$", line)
107
+ if m:
108
+ if not in_ul:
109
+ out.append("<ul>")
110
+ in_ul = True
111
+ out.append(f"<li>{m.group(1)}</li>")
112
+ else:
113
+ if in_ul:
114
+ out.append("</ul>")
115
+ in_ul = False
116
+ out.append(line)
117
+ if in_ul:
118
+ out.append("</ul>")
119
+ return "\n".join(out)
120
+
121
+ text = _ulify(text)
122
+ parts = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
123
+ html_parts = []
124
+ for p in parts:
125
+ if p.startswith("<h1>") or p.startswith("<h2>") or p.startswith("<h3>") or p.startswith("<ul>") or p.startswith("<pre>"):
126
+ html_parts.append(p)
127
+ else:
128
+ html_parts.append(f"<p>{p}</p>")
129
+ return "\n".join(html_parts)
130
+
131
+
132
+ class Document:
133
+ def __init__(self, items: Optional[List[Segment]] = None, description: str = ""):
134
+ self.items: List[Segment] = items or []
135
+ self.description: str = description
136
+
137
+ @classmethod
138
+ def from_markdown(cls, md: str) -> "Document":
139
+ """Create a document from a markdown string.
140
+
141
+ Everything **before** the first H2 header ("##") is treated as the
142
+ document description; subsequent H2 sections become individual segments.
143
+ """
144
+ lines = md.splitlines(keepends=True)
145
+ items: List[Segment] = []
146
+ current_name: Optional[str] = None
147
+ buffer: List[str] = []
148
+ description_lines: List[str] = []
149
+ seen_first_header = False
150
+
151
+ header_re = re.compile(r"^##+\s*(.*)")
152
+
153
+ for line in lines:
154
+ m = header_re.match(line)
155
+ if m:
156
+ if not seen_first_header:
157
+ # description collected so far
158
+ seen_first_header = True
159
+ # start new segment
160
+ if current_name is not None or buffer:
161
+ items.append(Segment(name=current_name or "", _markdown="".join(buffer).rstrip()))
162
+ current_name = m.group(1).strip()
163
+ buffer = []
164
+ else:
165
+ if not seen_first_header:
166
+ description_lines.append(line)
167
+ else:
168
+ buffer.append(line)
169
+
170
+ # finalize last
171
+ if current_name is not None:
172
+ items.append(Segment(name=current_name or "", _markdown="".join(buffer).rstrip()))
173
+ elif not seen_first_header:
174
+ # no H2 headers found: everything is description
175
+ description_lines = lines
176
+
177
+ desc = "".join(description_lines).strip()
178
+ return cls(items, description=desc)
179
+
180
+ def list(self, names_only: bool = True) -> List:
181
+ return [it.name for it in self.items] if names_only else list(self.items)
182
+
183
+ def search(self, query: str, limit: Optional[int] = None) -> List[Segment]:
184
+ q = (query or "").strip().lower()
185
+ if not q:
186
+ return self.items[:limit] if limit else list(self.items)
187
+
188
+ scored: List[tuple] = []
189
+ for it in self.items:
190
+ name = (it.name or "").lower()
191
+ content = (it._markdown or "").lower()
192
+ score = 0
193
+ if q in name:
194
+ score += 200
195
+ score += max(0, 50 - name.index(q))
196
+ if q in content:
197
+ score += 100
198
+ score += max(0, 25 - content.index(q))
199
+ # fuzzy similarity
200
+ score += int(SequenceMatcher(None, q, name).ratio() * 50)
201
+ score += int(SequenceMatcher(None, q, content).ratio() * 10)
202
+
203
+ if score > 0:
204
+ scored.append((score, it))
205
+
206
+ scored.sort(key=lambda x: x[0], reverse=True)
207
+ results = [it for _, it in scored]
208
+ return results[:limit] if limit else results
209
+
210
+
211
+ # expose CLI entrypoint for setuptools
212
+ try:
213
+ from . import cli
214
+
215
+ def main():
216
+ return cli.main()
217
+ except ImportError: # pragma: no cover - running without package context
218
+ def main():
219
+ # fallback: no cli available
220
+ raise RuntimeError("CLI not available; install prompt_toolkit first")