lorewiki 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lorewiki-0.2.1/.gitignore +59 -0
  2. lorewiki-0.2.1/LICENSE +21 -0
  3. lorewiki-0.2.1/PKG-INFO +363 -0
  4. lorewiki-0.2.1/README.md +319 -0
  5. lorewiki-0.2.1/lorewiki/__init__.py +4 -0
  6. lorewiki-0.2.1/lorewiki/__main__.py +6 -0
  7. lorewiki-0.2.1/lorewiki/cli/__init__.py +25 -0
  8. lorewiki-0.2.1/lorewiki/cli/add.py +324 -0
  9. lorewiki-0.2.1/lorewiki/cli/apps.py +181 -0
  10. lorewiki-0.2.1/lorewiki/cli/commands.py +659 -0
  11. lorewiki-0.2.1/lorewiki/cli/config_cmds.py +92 -0
  12. lorewiki-0.2.1/lorewiki/cli/helpers.py +177 -0
  13. lorewiki-0.2.1/lorewiki/cli/topic_cmds.py +350 -0
  14. lorewiki-0.2.1/lorewiki/config.py +265 -0
  15. lorewiki-0.2.1/lorewiki/db/__init__.py +25 -0
  16. lorewiki-0.2.1/lorewiki/db/connection.py +125 -0
  17. lorewiki-0.2.1/lorewiki/db/models.py +57 -0
  18. lorewiki-0.2.1/lorewiki/db/schema.sql +104 -0
  19. lorewiki-0.2.1/lorewiki/indexer/__init__.py +20 -0
  20. lorewiki-0.2.1/lorewiki/indexer/chunker.py +229 -0
  21. lorewiki-0.2.1/lorewiki/indexer/cleaning.py +402 -0
  22. lorewiki-0.2.1/lorewiki/indexer/indexer.py +275 -0
  23. lorewiki-0.2.1/lorewiki/indexer/parser.py +113 -0
  24. lorewiki-0.2.1/lorewiki/llm/__init__.py +24 -0
  25. lorewiki-0.2.1/lorewiki/llm/client.py +290 -0
  26. lorewiki-0.2.1/lorewiki/llm/generator.py +203 -0
  27. lorewiki-0.2.1/lorewiki/py.typed +0 -0
  28. lorewiki-0.2.1/lorewiki/retriever/__init__.py +18 -0
  29. lorewiki-0.2.1/lorewiki/retriever/base.py +21 -0
  30. lorewiki-0.2.1/lorewiki/retriever/bm25.py +226 -0
  31. lorewiki-0.2.1/lorewiki/retriever/fusion.py +88 -0
  32. lorewiki-0.2.1/lorewiki/retriever/hierarchy.py +248 -0
  33. lorewiki-0.2.1/lorewiki/retriever/search.py +89 -0
  34. lorewiki-0.2.1/lorewiki/retriever/vector.py +51 -0
  35. lorewiki-0.2.1/lorewiki/topic.py +675 -0
  36. lorewiki-0.2.1/lorewiki/utils/__init__.py +5 -0
  37. lorewiki-0.2.1/lorewiki/utils/logger.py +81 -0
  38. lorewiki-0.2.1/lorewiki/utils/topic_shared.py +39 -0
  39. lorewiki-0.2.1/pyproject.toml +193 -0
@@ -0,0 +1,59 @@
1
+ # Python build / cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ build/
6
+ dist/
7
+ .eggs/
8
+
9
+ # Virtual env
10
+ .venv/
11
+ venv/
12
+ env/
13
+
14
+ # Test / coverage
15
+ .pytest_cache/
16
+ .coverage
17
+ .coverage.*
18
+ htmlcov/
19
+ coverage.xml
20
+ .tox/
21
+
22
+ # Lint
23
+ .ruff_cache/
24
+
25
+ # OS
26
+ .DS_Store
27
+ Thumbs.db
28
+ desktop.ini
29
+
30
+ # Editor
31
+ .vscode/
32
+ .idea/
33
+ *.swp
34
+
35
+ # lorewiki — every project's built index is regenerated by
36
+ # ``lorewiki index``. Keeping them out of git avoids spurious diffs
37
+ # and keeps the repo small.
38
+ **/.lorewiki/index.db
39
+ **/.lorewiki/index.db-*
40
+ **/.lorewiki/clean-backup/
41
+ example_wiki/.lorewiki/
42
+
43
+ # uv — uv.lock is *committed* for reproducible installs (uv's default).
44
+ # If you want to ignore it for a library-style project, add `!uv.lock`
45
+ # below or delete this section entirely.
46
+ uv.lock
47
+
48
+ # Build artefacts that aren't covered by the rules above.
49
+ # `.tgz` is what `npm pack` writes (a tarball, not a build dir).
50
+ # `.zip` covers GitHub release assets that some devs stage locally.
51
+ *.tgz
52
+ *.zip
53
+
54
+ # secrets / local overrides that should never be committed
55
+ .env
56
+ .env.*
57
+ !.env.example
58
+ *.local.toml
59
+
lorewiki-0.2.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LoreWiki contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,363 @@
1
+ Metadata-Version: 2.4
2
+ Name: lorewiki
3
+ Version: 0.2.1
4
+ Summary: Local-first knowledge base for LLM-assisted coding, with hybrid retrieval (BM25 + hierarchy + optional vector) over SQLite FTS5.
5
+ Project-URL: Documentation, https://github.com/JochenYang/Lore-wiki
6
+ Project-URL: Source, https://github.com/JochenYang/Lore-wiki
7
+ Author: LoreWiki Team
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: cli,fts5,knowledge-base,llm,rag,sqlite
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development :: Documentation
20
+ Classifier: Topic :: Text Processing :: Indexing
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: click>=8.0
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: loguru>=0.7
25
+ Requires-Dist: pydantic-settings>=2.2
26
+ Requires-Dist: pydantic>=2.6
27
+ Requires-Dist: python-frontmatter>=1.1
28
+ Requires-Dist: rich>=13.7
29
+ Requires-Dist: tomli-w>=1.0
30
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
31
+ Requires-Dist: typer>=0.12
32
+ Provides-Extra: all
33
+ Requires-Dist: sentence-transformers>=2.7; extra == 'all'
34
+ Requires-Dist: sqlite-vec>=0.1.6; extra == 'all'
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
38
+ Requires-Dist: pytest>=8.0; extra == 'dev'
39
+ Requires-Dist: ruff>=0.4; extra == 'dev'
40
+ Provides-Extra: vector
41
+ Requires-Dist: sentence-transformers>=2.7; extra == 'vector'
42
+ Requires-Dist: sqlite-vec>=0.1.6; extra == 'vector'
43
+ Description-Content-Type: text/markdown
44
+
45
+ <p align="center">
46
+ <img src="https://raw.githubusercontent.com/JochenYang/Lore-wiki/main/assets/logo.png" alt="LoreWiki" width="320" />
47
+ </p>
48
+
49
+ <p align="center">
50
+ <b><a href="README.md">English</a></b> · <a href="docs/README_zh-CN.md">中文</a>
51
+ </p>
52
+
53
+ > Local-first knowledge base for LLM-assisted coding, with hybrid retrieval
54
+ > over SQLite FTS5.
55
+
56
+ ### Build with
57
+
58
+ [![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-3776AB?logo=python&logoColor=white&style=for-the-badge)](https://www.python.org/)
59
+ [![SQLite](https://img.shields.io/badge/SQLite-+FTS5-003B57?logo=sqlite&logoColor=white&style=for-the-badge)](https://www.sqlite.org/)
60
+
61
+ ### Tools
62
+
63
+ [![uv](https://img.shields.io/badge/uv-pkg%20%2B%20tool-5C2D91?logo=astral&logoColor=white&style=for-the-badge)](https://docs.astral.sh/uv/)
64
+ [![ruff](https://img.shields.io/badge/ruff-0%20errors-D7FF64?logo=ruff&logoColor=black&style=for-the-badge)](https://docs.astral.sh/ruff/)
65
+ [![pytest](https://img.shields.io/badge/pytest-240%20passed-0A9EDC?logo=pytest&logoColor=white&style=for-the-badge)](tests/)
66
+ [![License](https://img.shields.io/badge/License-MIT-22B14C?style=for-the-badge)](LICENSE)
67
+
68
+ ---
69
+
70
+ LoreWiki indexes your team's Markdown wiki and exposes it through a single
71
+ CLI plus an [opencode](https://opencode.ai) skill consumable by Codex /
72
+ Aider / Claude Code / any shell-using LLM agent. The vault is also a
73
+ plain folder of `.md` files, so Obsidian / Logseq / VS Code can open
74
+ it directly.
75
+
76
+ **Key numbers from the example_wiki benchmark** (10 hand-authored queries):
77
+
78
+ | Mode | Recall@5 | Avg latency |
79
+ |---------------|----------|-------------|
80
+ | BM25 | 80% | 1.7 ms |
81
+ | Hierarchy | 90% | 0.8 ms |
82
+ | **Mix (RRF)** | **100%** | 3.0 ms |
83
+
84
+ ## Features
85
+
86
+ - **Hybrid retrieval**: FTS5 BM25 + hierarchy tree navigation, fused via
87
+ Reciprocal Rank Fusion (no score normalisation needed).
88
+ - **Chinese + English friendly**: trigram tokenizer + bigram/LIKE fallback for
89
+ short CJK queries (e.g. `"幂等"` (idempotent), `"认证"` (auth)).
90
+ - **Optional LLM integration** (Ollama or OpenAI-compatible). Gracefully
91
+ degrades to "return the top-k chunks" when the LLM is offline.
92
+ - **Single-binary CLI + opencode skill**: one command surface, one
93
+ opencode skill (or any shell-using agent) for AI consumption, and
94
+ the on-disk vault as the "UI". No server processes, no extra
95
+ dependencies.
96
+ - **One `lorewiki add`** to author a note end-to-end (body via
97
+ `--body` / `--file` / stdin) with auto-reindex so the new doc is
98
+ immediately retrievable.
99
+ - **Second-brain / topics**: one isolated vault per knowledge domain
100
+ under `~/lorewiki/topics/`, shared across every project.
101
+ - **Zero external services**: SQLite is the only dependency for retrieval.
102
+ LLM is opt-in.
103
+ - **Single-package install**: `pip install lorewiki` and you have
104
+ everything; the data lives in your home and is fully owned.
105
+
106
+ ## Installation
107
+
108
+ ```bash
109
+ # Editable install (recommended for active development)
110
+ uv tool install --editable .
111
+
112
+ # Or plain pip
113
+ pip install -e . # core CLI
114
+ pip install -e ".[dev]" # add pytest / ruff / coverage
115
+ pip install -e ".[vector]" # opt-in: vector retrieval (sqlite-vec)
116
+ ```
117
+
118
+ Python **3.10+** required. After install, `lorewiki --version`
119
+ should print `LoreWiki 0.1.0`.
120
+
121
+ > Windows PowerShell users: if CJK characters show as `?` in
122
+ > `lorewiki search --raw` output, prefix the command with
123
+ > `chcp 65001 |` to force the shell code page to UTF-8, or upgrade
124
+ > to v0.1.1+ which forces UTF-8 stdout automatically.
125
+
126
+ ## Quickstart
127
+
128
+ ```bash
129
+ # 1. Create a wiki + config
130
+ lorewiki init --path ./my-wiki
131
+
132
+ # 2. Author Markdown under ./my-wiki/, then index it
133
+ lorewiki index --path ./my-wiki
134
+
135
+ # 3. Search
136
+ lorewiki search "用户登录接口" --path ./my-wiki --mode mix --top-k 5
137
+
138
+ # 4. Ask (LLM-assisted answer, gracefully falls back to top chunks)
139
+ lorewiki ask "如何实现幂等重试" --path ./my-wiki
140
+
141
+ # 5. Or author a note from the CLI (writes + re-indexes in one go)
142
+ echo "Some deep details about Python design pattern." \
143
+ | lorewiki add --title "Python Design" --module "patterns" --tag python,design
144
+
145
+ # 6. Browse the index status
146
+ lorewiki status --path ./my-wiki
147
+ ```
148
+
149
+ The default config lives in `<wiki>/.lorewiki/config.toml`; user-wide
150
+ overrides live in `~/.lorewiki/config.toml`; env vars `LOREWIKI_*` override
151
+ both.
152
+
153
+ ## Topics — your second brain
154
+
155
+ The per-wiki mode above is fine for a single project. The
156
+ **shared-brain** workflow is **topics** — isolated vaults under
157
+ `~/lorewiki/topics/`, queryable from any project:
158
+
159
+ ```bash
160
+ lorewiki topic create react # empty vault
161
+ lorewiki topic create react --source ~/notes/react # copy mode (default)
162
+ lorewiki topic create react --source ~/notes/react --link # symlink mode
163
+ lorewiki topic use react # activate
164
+ lorewiki index # index the active topic
165
+ lorewiki search "useState closure" # query the active topic
166
+ lorewiki ask "props drilling 对比" # LLM answer from active topic
167
+ ```
168
+
169
+ Layout produced:
170
+
171
+ ```
172
+ ~/lorewiki/ # central root
173
+ ├── config.toml # global: LLM key, retrieval mode
174
+ ├── current # text: name of active topic
175
+ └── topics/
176
+ └── react/ # one topic = one vault
177
+ ├── .lorewiki/index.db # hidden lorewiki metadata
178
+ ├── api/auth.md
179
+ └── architecture.md
180
+ ```
181
+
182
+ **Topic resolution priority** (later wins): `--topic` flag →
183
+ `LOREWIKI_TOPIC` env → `~/lorewiki/current` file → `--path` (legacy
184
+ per-wiki mode) → cwd `.lorewiki/config.toml` (legacy per-project mode).
185
+
186
+ The legacy per-project mode is **permanently supported** — no
187
+ migration required. Topics are a convenience, not a replacement.
188
+
189
+ The vault root is plain Markdown with a hidden `.lorewiki/`
190
+ directory, so **Obsidian / Logseq / VS Code can open it directly**
191
+ without lorewiki installed. That cross-tool friendliness is the
192
+ whole point of the "second brain" framing.
193
+
194
+ Topic names: lowercase ASCII, digits, hyphens, 1-64 chars, no
195
+ leading/trailing hyphens. Reserved names (`init`, `index`,
196
+ `current`, Windows device names) are rejected.
197
+
198
+ ## How it works
199
+
200
+ For a one-query end-to-end walkthrough (CLI dispatch → config
201
+ resolution → retriever selection → RRF fusion → output) plus a
202
+ deep dive on **how the LLM config actually takes effect**
203
+ (three configuration paths, `build_client` dispatch, why pure
204
+ `httpx` instead of SDKs), see [`docs/how-it-works.md`](docs/how-it-works.md).
205
+
206
+ A higher-level architecture overview lives in
207
+ [`docs/architecture.md`](docs/architecture.md). Per-phase
208
+ self-critique notes are in `docs/critique/phase-{0..6}.md`.
209
+
210
+ ## Configuration
211
+
212
+ ```toml
213
+ # ./my-wiki/.lorewiki/config.toml
214
+
215
+ retrieval_mode = "mix" # mix | bm25 | hierarchy | vector
216
+ rrf_k = 60
217
+ chunk_max_tokens = 800
218
+ chunk_overlap_tokens = 100
219
+ chunk_min_chars = 40
220
+ snippet_chars = 240
221
+
222
+ [mix_weights]
223
+ bm25 = 1.0
224
+ hierarchy = 0.8
225
+ vector = 0.5
226
+
227
+ [llm]
228
+ enabled = false # set true to enable `ask`'s LLM path
229
+ backend = "ollama" # ollama | openai
230
+ ollama_url = "http://localhost:11434"
231
+ ollama_model = "llama3.2"
232
+ openai_api_key = ""
233
+ openai_base_url = "" # leave blank for api.openai.com
234
+ openai_model = "gpt-4o-mini"
235
+ timeout_seconds = 30.0
236
+ ```
237
+
238
+ Programmatic access:
239
+
240
+ ```bash
241
+ lorewiki config list --path ./my-wiki
242
+ lorewiki config get llm.backend --path ./my-wiki
243
+ lorewiki config set retrieval_mode '"bm25"' --path ./my-wiki
244
+ ```
245
+
246
+ ## LLM setup
247
+
248
+ ### Ollama (local, recommended)
249
+
250
+ ```bash
251
+ ollama pull llama3.2
252
+ lorewiki config set llm.enabled true --path ./my-wiki
253
+ lorewiki config set llm.backend '"ollama"' --path ./my-wiki
254
+ lorewiki ask "what's our retry policy?" --path ./my-wiki
255
+ ```
256
+
257
+ ### OpenAI-compatible (any provider that speaks the `/v1/chat/completions` schema)
258
+
259
+ > **Note on Azure OpenAI**: Azure's path is different
260
+ > (`/openai/deployments/<deployment>/chat/completions?api-version=...`)
261
+ > and is **not** currently supported. Use OpenRouter or a self-hosted
262
+ > vLLM-compatible endpoint, or wait for the phase-7 Azure support
263
+ > (or open an issue if you need it sooner).
264
+
265
+ ```bash
266
+ lorewiki config set llm.enabled true --path ./my-wiki
267
+ lorewiki config set llm.backend '"openai"' --path ./my-wiki
268
+ lorewiki config set llm.openai_api_key '"sk-..."' --path ./my-wiki
269
+ # Optional: point at a compatible proxy (OpenRouter, Azure, vLLM, ...).
270
+ lorewiki config set llm.openai_base_url '"https://openrouter.ai/api/v1"' --path ./my-wiki
271
+ ```
272
+
273
+ If the LLM is unreachable, `ask` returns the top-K chunks with a clear
274
+ "degraded" notice — your workflow never breaks because the model is down.
275
+
276
+ ## REST API
277
+
278
+ The FastAPI / REST surface was removed in 0.2.0. The CLI is the only
279
+ programmatic surface; agents consume it through the opencode skill
280
+ (see below) or by shelling out.
281
+
282
+ ## The Markdown vault as your "UI"
283
+
284
+ LoreWiki no longer ships a built-in web UI in 0.1.0. The recommended
285
+ ways to consume the data are:
286
+
287
+ - **The CLI** (this document) — the single source of truth.
288
+ - **The active topic's vault directory** — every topic is a plain
289
+ folder of `.md` files under `~/.lorewiki/topics/<name>/` (or
290
+ `<wiki>/.lorewiki/...` in per-wiki mode). Open it in Obsidian,
291
+ VS Code, Cursor, or any Markdown editor for the full rendered
292
+ view, no extra tooling required.
293
+ - **The opencode skill** (below) — for AI agents.
294
+
295
+ ## opencode skill (Codex / Aider / any shell-using agent)
296
+
297
+ For agents that can already run shell commands, the CLI is lighter-weight
298
+ than MCP. LoreWiki ships an official [opencode](https://opencode.ai) skill
299
+ in [`skills/lorewiki/SKILL.md`](skills/lorewiki/SKILL.md).
300
+
301
+ One-time install (after `uv tool install --editable .` puts `lorewiki` on your PATH):
302
+
303
+ ```powershell
304
+ # Windows
305
+ .\skills\install.ps1 # copy mode
306
+ .\skills\install.ps1 -Symlink # symlink mode (lets you edit SKILL.md live)
307
+ ```
308
+
309
+ ```bash
310
+ # macOS / Linux
311
+ ./skills/install.sh # copy mode
312
+ ./skills/install.sh --symlink # symlink mode
313
+ ```
314
+
315
+ Restart opencode and the agent will auto-trigger the skill on cues like
316
+ `查 wiki` / `search the wiki` / `lorewiki ...`. See
317
+ [`skills/README.md`](skills/README.md) for full details.
318
+
319
+ ## Architecture
320
+
321
+ ```
322
+ ┌─────────────────────────────────────────────────────────────┐
323
+ │ CLI + opencode skill · vault-as-folder │
324
+ ├─────────────────────────────────────────────────────────────┤
325
+ │ Indexer │ Retriever (BM25 + Hierarchy + RRF) │ LLM │
326
+ ├─────────────────────────────────────────────────────────────┤
327
+ │ SQLite + FTS5 (documents · docs_fts · hierarchy) │
328
+ └─────────────────────────────────────────────────────────────┘
329
+ ```
330
+
331
+ See `docs/lorewiki dev document.md` for the full design plan and
332
+ `docs/critique/phase-{0..6}.md` for per-phase self-critique notes.
333
+
334
+ ## Development
335
+
336
+ ```bash
337
+ pip install -e ".[dev]"
338
+ ruff check lorewiki skills tests # lint
339
+ pytest -q # 240 unit + integration tests
340
+ pytest --cov=lorewiki # coverage report
341
+ ```
342
+
343
+ The `example_wiki/` directory is a curated 5-file benchmark
344
+ fixture — not a starter. See `example_wiki/README.md` for what
345
+ it is and how to use it.
346
+
347
+ ## Roadmap
348
+
349
+ - **Vector retrieval** (sqlite-vec + sentence-transformers) — opt-in,
350
+ via `pip install lorewiki[vector]`.
351
+ - **Incremental file-watcher** (`lorewiki update --watch`).
352
+ - **PDF / Word ingestion** beyond Markdown.
353
+ - **Atomic write of `~/lorewiki/current`** (currently best-effort).
354
+
355
+ ## Contributing
356
+
357
+ See [`CONTRIBUTING.md`](CONTRIBUTING.md) for the workflow. Bug
358
+ reports and feature requests go to the issue tracker; PRs are
359
+ welcome — see the testing / linting commands above.
360
+
361
+ ## License
362
+
363
+ [MIT](LICENSE) · Copyright (c) 2026 LoreWiki contributors.