lorewiki 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lorewiki-0.2.1 → lorewiki-0.2.2}/PKG-INFO +1 -1
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/__init__.py +1 -1
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/add.py +37 -1
- {lorewiki-0.2.1 → lorewiki-0.2.2}/pyproject.toml +1 -1
- {lorewiki-0.2.1 → lorewiki-0.2.2}/.gitignore +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/LICENSE +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/README.md +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/__main__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/__init__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/apps.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/commands.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/config_cmds.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/helpers.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/topic_cmds.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/config.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/__init__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/connection.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/models.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/schema.sql +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/__init__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/chunker.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/cleaning.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/indexer.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/parser.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/llm/__init__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/llm/client.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/llm/generator.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/py.typed +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/__init__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/base.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/bm25.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/fusion.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/hierarchy.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/search.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/vector.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/topic.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/utils/__init__.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/utils/logger.py +0 -0
- {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/utils/topic_shared.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lorewiki
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Local-first knowledge base for LLM-assisted coding, with hybrid retrieval (BM25 + hierarchy + optional vector) over SQLite FTS5.
|
|
5
5
|
Project-URL: Documentation, https://github.com/JochenYang/Lore-wiki
|
|
6
6
|
Project-URL: Source, https://github.com/JochenYang/Lore-wiki
|
|
@@ -88,6 +88,14 @@ def _read_body(body: str | None, file: Path | None) -> str:
|
|
|
88
88
|
Priority: ``--body`` → ``--file`` → ``sys.stdin`` (if not a TTY).
|
|
89
89
|
Returns the body text with a single trailing newline so the
|
|
90
90
|
frontmatter/body separator renders cleanly.
|
|
91
|
+
|
|
92
|
+
stdin content goes through :func:`_strip_surrogates` because
|
|
93
|
+
Windows PowerShell pipes strings as UTF-16 LE, which Python's
|
|
94
|
+
stdin reader can surface as **lone** surrogate codepoints
|
|
95
|
+
(U+D800..U+DFFF). UTF-8 cannot encode lone surrogates, so
|
|
96
|
+
leaving them in would crash the downstream ``write_text(..., 'utf-8')``
|
|
97
|
+
call with ``UnicodeEncodeError``. ``--body`` and ``--file`` paths
|
|
98
|
+
are already valid str (no surrogates), so they skip the scrub.
|
|
91
99
|
"""
|
|
92
100
|
if body is not None and body.strip():
|
|
93
101
|
return body.rstrip() + "\n"
|
|
@@ -104,6 +112,22 @@ def _read_body(body: str | None, file: Path | None) -> str:
|
|
|
104
112
|
raise typer.BadParameter(msg)
|
|
105
113
|
|
|
106
114
|
|
|
115
|
+
# Lone-surrogate scrub. See the docstring of ``_read_body`` for why
|
|
116
|
+
# this is needed. Python 3.10 has no ``str.remove_surrogates()``
|
|
117
|
+
# (that helper landed in 3.11), so we do the regex replacement
|
|
118
|
+
# ourselves.
|
|
119
|
+
_SURROGATE_RE = re.compile(r"[\ud800-\udfff]")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _strip_surrogates(text: str) -> str:
|
|
123
|
+
"""Replace every lone UTF-16 surrogate codepoint with U+FFFD.
|
|
124
|
+
|
|
125
|
+
Used by :func:`_read_body` on the stdin path. Idempotent: running
|
|
126
|
+
it twice produces the same output as running it once.
|
|
127
|
+
"""
|
|
128
|
+
return _SURROGATE_RE.sub("\ufffd", text)
|
|
129
|
+
|
|
130
|
+
|
|
107
131
|
# ---------------------------------------------------------------------------
|
|
108
132
|
# Title inference
|
|
109
133
|
# ---------------------------------------------------------------------------
|
|
@@ -228,6 +252,12 @@ def add(
|
|
|
228
252
|
"""
|
|
229
253
|
# ---- 1. body & title ----------------------------------------------------
|
|
230
254
|
raw_body = _read_body(body, file)
|
|
255
|
+
# Scrub lone UTF-16 surrogates that may have entered through any
|
|
256
|
+
# path (stdin on Windows + PowerShell is the common case, but a
|
|
257
|
+
# buggy ``--body`` from a script that decoded UTF-16 LE with the
|
|
258
|
+
# wrong codec would hit the same problem). Idempotent — safe to
|
|
259
|
+
# run on bodies that are already clean.
|
|
260
|
+
raw_body = _strip_surrogates(raw_body)
|
|
231
261
|
final_title = title.strip() or _extract_h1(raw_body) or slugify(raw_body[:64])
|
|
232
262
|
|
|
233
263
|
# ---- 2. resolve paths ---------------------------------------------------
|
|
@@ -274,9 +304,15 @@ def add(
|
|
|
274
304
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
275
305
|
try:
|
|
276
306
|
target_path.write_text(frontmatter.dumps(post) + "\n", encoding="utf-8")
|
|
277
|
-
except OSError as exc:
|
|
307
|
+
except (OSError, UnicodeEncodeError) as exc:
|
|
308
|
+
# ``UnicodeEncodeError`` is NOT a subclass of ``OSError``, so
|
|
309
|
+
# the original ``except OSError`` silently let it through and
|
|
310
|
+
# left a 0-byte file on disk. Belt-and-braces: clean up so a
|
|
311
|
+
# subsequent ``add`` invocation doesn't trip the
|
|
312
|
+
# "target exists" check against an empty file.
|
|
278
313
|
log.error("write failed {}: {}", target_path, exc)
|
|
279
314
|
console.print(f"[red]write failed:[/red] {exc}")
|
|
315
|
+
target_path.unlink(missing_ok=True)
|
|
280
316
|
raise typer.Exit(code=5) from exc
|
|
281
317
|
|
|
282
318
|
# ---- 6. re-index --------------------------------------------------------
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lorewiki"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.2"
|
|
8
8
|
description = "Local-first knowledge base for LLM-assisted coding, with hybrid retrieval (BM25 + hierarchy + optional vector) over SQLite FTS5."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|