lorewiki 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {lorewiki-0.2.1 → lorewiki-0.2.2}/PKG-INFO +1 -1
  2. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/__init__.py +1 -1
  3. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/add.py +37 -1
  4. {lorewiki-0.2.1 → lorewiki-0.2.2}/pyproject.toml +1 -1
  5. {lorewiki-0.2.1 → lorewiki-0.2.2}/.gitignore +0 -0
  6. {lorewiki-0.2.1 → lorewiki-0.2.2}/LICENSE +0 -0
  7. {lorewiki-0.2.1 → lorewiki-0.2.2}/README.md +0 -0
  8. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/__main__.py +0 -0
  9. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/__init__.py +0 -0
  10. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/apps.py +0 -0
  11. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/commands.py +0 -0
  12. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/config_cmds.py +0 -0
  13. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/helpers.py +0 -0
  14. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/cli/topic_cmds.py +0 -0
  15. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/config.py +0 -0
  16. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/__init__.py +0 -0
  17. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/connection.py +0 -0
  18. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/models.py +0 -0
  19. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/db/schema.sql +0 -0
  20. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/__init__.py +0 -0
  21. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/chunker.py +0 -0
  22. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/cleaning.py +0 -0
  23. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/indexer.py +0 -0
  24. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/indexer/parser.py +0 -0
  25. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/llm/__init__.py +0 -0
  26. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/llm/client.py +0 -0
  27. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/llm/generator.py +0 -0
  28. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/py.typed +0 -0
  29. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/__init__.py +0 -0
  30. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/base.py +0 -0
  31. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/bm25.py +0 -0
  32. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/fusion.py +0 -0
  33. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/hierarchy.py +0 -0
  34. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/search.py +0 -0
  35. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/retriever/vector.py +0 -0
  36. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/topic.py +0 -0
  37. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/utils/__init__.py +0 -0
  38. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/utils/logger.py +0 -0
  39. {lorewiki-0.2.1 → lorewiki-0.2.2}/lorewiki/utils/topic_shared.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lorewiki
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Local-first knowledge base for LLM-assisted coding, with hybrid retrieval (BM25 + hierarchy + optional vector) over SQLite FTS5.
5
5
  Project-URL: Documentation, https://github.com/JochenYang/Lore-wiki
6
6
  Project-URL: Source, https://github.com/JochenYang/Lore-wiki
@@ -1,4 +1,4 @@
1
1
  """LoreWiki - Local-first knowledge base for LLM-assisted coding."""
2
2
 
3
- __version__ = "0.2.1"
3
+ __version__ = "0.2.2"
4
4
  __all__ = ["__version__"]
@@ -88,6 +88,14 @@ def _read_body(body: str | None, file: Path | None) -> str:
88
88
  Priority: ``--body`` → ``--file`` → ``sys.stdin`` (if not a TTY).
89
89
  Returns the body text with a single trailing newline so the
90
90
  frontmatter/body separator renders cleanly.
91
+
92
+ stdin content goes through :func:`_strip_surrogates` because
93
+ Windows PowerShell pipes strings as UTF-16 LE, which Python's
94
+ stdin reader can surface as **lone** surrogate codepoints
95
+ (U+D800..U+DFFF). UTF-8 cannot encode lone surrogates, so
96
+ leaving them in would crash the downstream ``write_text(..., 'utf-8')``
97
+ call with ``UnicodeEncodeError``. ``--body`` and ``--file`` paths
98
+ are already valid str (no surrogates), so they skip the scrub.
91
99
  """
92
100
  if body is not None and body.strip():
93
101
  return body.rstrip() + "\n"
@@ -104,6 +112,22 @@ def _read_body(body: str | None, file: Path | None) -> str:
104
112
  raise typer.BadParameter(msg)
105
113
 
106
114
 
115
+ # Lone-surrogate scrub. See the docstring of ``_read_body`` for why
116
+ # this is needed. Python 3.10 has no ``str.remove_surrogates()``
117
+ # (that helper landed in 3.11), so we do the regex replacement
118
+ # ourselves.
119
+ _SURROGATE_RE = re.compile(r"[\ud800-\udfff]")
120
+
121
+
122
+ def _strip_surrogates(text: str) -> str:
123
+ """Replace every lone UTF-16 surrogate codepoint with U+FFFD.
124
+
125
+ Used by :func:`_read_body` on the stdin path. Idempotent: running
126
+ it twice produces the same output as running it once.
127
+ """
128
+ return _SURROGATE_RE.sub("\ufffd", text)
129
+
130
+
107
131
  # ---------------------------------------------------------------------------
108
132
  # Title inference
109
133
  # ---------------------------------------------------------------------------
@@ -228,6 +252,12 @@ def add(
228
252
  """
229
253
  # ---- 1. body & title ----------------------------------------------------
230
254
  raw_body = _read_body(body, file)
255
+ # Scrub lone UTF-16 surrogates that may have entered through any
256
+ # path (stdin on Windows + PowerShell is the common case, but a
257
+ # buggy ``--body`` from a script that decoded UTF-16 LE with the
258
+ # wrong codec would hit the same problem). Idempotent — safe to
259
+ # run on bodies that are already clean.
260
+ raw_body = _strip_surrogates(raw_body)
231
261
  final_title = title.strip() or _extract_h1(raw_body) or slugify(raw_body[:64])
232
262
 
233
263
  # ---- 2. resolve paths ---------------------------------------------------
@@ -274,9 +304,15 @@ def add(
274
304
  target_dir.mkdir(parents=True, exist_ok=True)
275
305
  try:
276
306
  target_path.write_text(frontmatter.dumps(post) + "\n", encoding="utf-8")
277
- except OSError as exc:
307
+ except (OSError, UnicodeEncodeError) as exc:
308
+ # ``UnicodeEncodeError`` is NOT a subclass of ``OSError``, so
309
+ # the original ``except OSError`` silently let it through and
310
+ # left a 0-byte file on disk. Belt-and-braces: clean up so a
311
+ # subsequent ``add`` invocation doesn't trip the
312
+ # "target exists" check against an empty file.
278
313
  log.error("write failed {}: {}", target_path, exc)
279
314
  console.print(f"[red]write failed:[/red] {exc}")
315
+ target_path.unlink(missing_ok=True)
280
316
  raise typer.Exit(code=5) from exc
281
317
 
282
318
  # ---- 6. re-index --------------------------------------------------------
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "lorewiki"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  description = "Local-first knowledge base for LLM-assisted coding, with hybrid retrieval (BM25 + hierarchy + optional vector) over SQLite FTS5."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes