guanlan-wiki 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. guanlan_wiki-0.1.0/.github/workflows/ci.yml +41 -0
  2. guanlan_wiki-0.1.0/.github/workflows/release.yml +38 -0
  3. guanlan_wiki-0.1.0/.gitignore +37 -0
  4. guanlan_wiki-0.1.0/.python-version +1 -0
  5. guanlan_wiki-0.1.0/CLAUDE.md +62 -0
  6. guanlan_wiki-0.1.0/LICENSE +201 -0
  7. guanlan_wiki-0.1.0/PKG-INFO +123 -0
  8. guanlan_wiki-0.1.0/README.md +95 -0
  9. guanlan_wiki-0.1.0/docs/DESIGN.md +276 -0
  10. guanlan_wiki-0.1.0/docs/P2-/346/234/200/345/260/217/351/227/255/347/216/257.md +335 -0
  11. guanlan_wiki-0.1.0/docs/P3-/345/201/245/345/272/267/344/270/216/345/233/276/350/260/261.md +283 -0
  12. guanlan_wiki-0.1.0/docs/P3.1-/345/210/253/345/220/215/350/247/243/346/236/220.md +129 -0
  13. guanlan_wiki-0.1.0/docs/P3.2-/347/274/272/345/244/261/345/256/236/344/275/223/347/211/251/345/214/226.md +226 -0
  14. guanlan_wiki-0.1.0/docs/P4-Web/345/256/277/344/270/273.md +393 -0
  15. guanlan_wiki-0.1.0/docs/P4.1-Web/346/212/225/345/226/202.md +160 -0
  16. guanlan_wiki-0.1.0/docs/P4.2-/344/274/232/350/257/235/350/220/275/347/233/230.md +270 -0
  17. guanlan_wiki-0.1.0/docs/backlog/notes/broken-link-handling-survey.md +86 -0
  18. guanlan_wiki-0.1.0/docs/backlog/notes/cjk-retrieval-enhancements.md +22 -0
  19. guanlan_wiki-0.1.0/docs/guanlan.png +0 -0
  20. guanlan_wiki-0.1.0/docs//345/217/221/345/270/203/345/210/260-PyPI.md +55 -0
  21. guanlan_wiki-0.1.0/examples/AGENTAO.md +28 -0
  22. guanlan_wiki-0.1.0/examples/SCHEMA.md +33 -0
  23. guanlan_wiki-0.1.0/examples/wiki/index.md +25 -0
  24. guanlan_wiki-0.1.0/examples/wiki/log.md +6 -0
  25. guanlan_wiki-0.1.0/examples/wiki/overview.md +14 -0
  26. guanlan_wiki-0.1.0/guanlan/__init__.py +7 -0
  27. guanlan_wiki-0.1.0/guanlan/__main__.py +6 -0
  28. guanlan_wiki-0.1.0/guanlan/check.py +279 -0
  29. guanlan_wiki-0.1.0/guanlan/cli.py +228 -0
  30. guanlan_wiki-0.1.0/guanlan/errors.py +36 -0
  31. guanlan_wiki-0.1.0/guanlan/gate.py +387 -0
  32. guanlan_wiki-0.1.0/guanlan/graph.py +321 -0
  33. guanlan_wiki-0.1.0/guanlan/health.py +182 -0
  34. guanlan_wiki-0.1.0/guanlan/ingest.py +73 -0
  35. guanlan_wiki-0.1.0/guanlan/init.py +104 -0
  36. guanlan_wiki-0.1.0/guanlan/lint.py +130 -0
  37. guanlan_wiki-0.1.0/guanlan/pages.py +286 -0
  38. guanlan_wiki-0.1.0/guanlan/paths.py +48 -0
  39. guanlan_wiki-0.1.0/guanlan/query.py +80 -0
  40. guanlan_wiki-0.1.0/guanlan/runtime.py +144 -0
  41. guanlan_wiki-0.1.0/guanlan/skill.py +95 -0
  42. guanlan_wiki-0.1.0/guanlan/web/__init__.py +16 -0
  43. guanlan_wiki-0.1.0/guanlan/web/app.py +386 -0
  44. guanlan_wiki-0.1.0/guanlan/web/chat.py +428 -0
  45. guanlan_wiki-0.1.0/guanlan/web/jobs.py +81 -0
  46. guanlan_wiki-0.1.0/guanlan/web/render.py +258 -0
  47. guanlan_wiki-0.1.0/guanlan/web/server.py +98 -0
  48. guanlan_wiki-0.1.0/guanlan/web/static/app.css +174 -0
  49. guanlan_wiki-0.1.0/guanlan/web/static/app.js +547 -0
  50. guanlan_wiki-0.1.0/guanlan/web/static/index.html +69 -0
  51. guanlan_wiki-0.1.0/guanlan/web/static/logo.png +0 -0
  52. guanlan_wiki-0.1.0/pyproject.toml +68 -0
  53. guanlan_wiki-0.1.0/skills/guanlan-wiki/SKILL.md +74 -0
  54. guanlan_wiki-0.1.0/skills/guanlan-wiki/references/conventions.md +184 -0
  55. guanlan_wiki-0.1.0/tests/conftest.py +46 -0
  56. guanlan_wiki-0.1.0/tests/test_aliases.py +203 -0
  57. guanlan_wiki-0.1.0/tests/test_check.py +210 -0
  58. guanlan_wiki-0.1.0/tests/test_cli.py +117 -0
  59. guanlan_wiki-0.1.0/tests/test_gate.py +283 -0
  60. guanlan_wiki-0.1.0/tests/test_graph.py +284 -0
  61. guanlan_wiki-0.1.0/tests/test_health.py +170 -0
  62. guanlan_wiki-0.1.0/tests/test_ingest.py +156 -0
  63. guanlan_wiki-0.1.0/tests/test_init.py +51 -0
  64. guanlan_wiki-0.1.0/tests/test_lint.py +163 -0
  65. guanlan_wiki-0.1.0/tests/test_pages.py +181 -0
  66. guanlan_wiki-0.1.0/tests/test_query.py +109 -0
  67. guanlan_wiki-0.1.0/tests/test_runtime.py +64 -0
  68. guanlan_wiki-0.1.0/tests/test_skill.py +84 -0
  69. guanlan_wiki-0.1.0/tests/test_web.py +1335 -0
  70. guanlan_wiki-0.1.0/uv.lock +1190 -0
@@ -0,0 +1,41 @@
1
+ name: CI
2
+
3
+ # 推到 main 或对 main 发 PR 时跑:lint + 全量测试。
4
+ # 全程离线——测试用 fake runner / monkeypatch,不打真实 LLM,无需任何 API key。
5
+ on:
6
+ push:
7
+ branches: [main]
8
+ pull_request:
9
+ branches: [main]
10
+
11
+ # 同一分支/PR 的新推送取消旧的在跑任务,省额度。
12
+ concurrency:
13
+ group: ci-${{ github.ref }}
14
+ cancel-in-progress: true
15
+
16
+ jobs:
17
+ test:
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: 装 uv(带依赖缓存)
23
+ uses: astral-sh/setup-uv@v5
24
+ with:
25
+ enable-cache: true
26
+ cache-dependency-glob: uv.lock
27
+
28
+ - name: 装 Python 3.12
29
+ run: uv python install 3.12
30
+
31
+ # ruff 用 uvx 临时拉,不进 lock;与本地 `uvx ruff check` 一致。
32
+ - name: Lint(ruff)
33
+ run: uvx ruff check guanlan tests
34
+
35
+ # --extra web 装上 fastapi/uvicorn/markdown,否则 test_web.py / test_aliases.py
36
+ # 会被 importorskip 跳过;dev 组(pytest)uv sync 默认带上。
37
+ - name: 装依赖
38
+ run: uv sync --extra web
39
+
40
+ - name: 跑测试
41
+ run: uv run --extra web pytest -q
@@ -0,0 +1,38 @@
1
+ name: Release (PyPI)
2
+
3
+ # 推 v* tag 时构建并发布到 PyPI。用 Trusted Publishing(OIDC)——仓库不存任何 token:
4
+ # 需先在 PyPI 为项目 guanlan-wiki 配好 pending publisher(绑定本仓库 + 本 workflow +
5
+ # environment 名 pypi),见 docs/发布到-PyPI.md。
6
+ on:
7
+ push:
8
+ tags: ["v*"]
9
+
10
+ # 默认不给任何权限;发布 job 单独申明 id-token: write 换取 OIDC 凭据。
11
+ permissions: {}
12
+
13
+ jobs:
14
+ pypi-publish:
15
+ runs-on: ubuntu-latest
16
+ environment:
17
+ name: pypi
18
+ url: https://pypi.org/p/guanlan-wiki
19
+ permissions:
20
+ id-token: write # Trusted Publishing 必需
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ - name: 装 uv
25
+ uses: astral-sh/setup-uv@v5
26
+ with:
27
+ enable-cache: true
28
+ cache-dependency-glob: uv.lock
29
+
30
+ - name: 构建 sdist + wheel
31
+ run: uv build
32
+
33
+ # 发布前再跑一遍 twine check,产物不合规就别上传。
34
+ - name: 校验产物
35
+ run: uvx twine check dist/*
36
+
37
+ - name: 发布到 PyPI(Trusted Publishing,免 token)
38
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,37 @@
1
+ # macOS
2
+ .DS_Store
3
+
4
+ # Python-generated files
5
+ __pycache__/
6
+ *.py[oc]
7
+ build/
8
+ dist/
9
+ wheels/
10
+ *.egg-info
11
+
12
+ # Virtual environments
13
+ .venv
14
+
15
+ # Local agentao residue (from previous experiments in this dir, if any)
16
+ .agentao/
17
+ agentao.log
18
+ agentao.log.*
19
+
20
+ # Claude Code 本机运行时状态(scheduled_tasks.lock 含 sessionId / pid / 进程起始时间),
21
+ # per-machine 不入仓——否则别的 checkout 会继承陈旧锁 + 本地会话元数据。
22
+ .claude/
23
+
24
+ # Qoder
25
+ .qoder/
26
+
27
+ # Dev-only sample wiki data & derived (local testing only — never pushed; see docs/DESIGN.md §4.7)
28
+ # Templates live in examples/; only the repo-root sample config + data/derived dirs are ignored.
29
+ /raw/
30
+ /wiki/
31
+ /graph/
32
+ /workspace/
33
+
34
+ # Repo-root sample config copied from examples/ (may carry machine paths / private dev settings).
35
+ # Anchored to root so examples/AGENTAO.md and examples/SCHEMA.md stay tracked.
36
+ /AGENTAO.md
37
+ /SCHEMA.md
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,62 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## What this is
6
+
7
+ 观澜 (GuānLán) is an implementation of the [Karpathy LLM Wiki pattern](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f): an Agent incrementally builds and maintains a structured, cross-linked markdown knowledge wiki instead of doing fresh RAG retrieval on every query. The full design (in Chinese) is the authoritative spec — read [`docs/DESIGN.md`](docs/DESIGN.md) before any non-trivial change.
8
+
9
+ **Current status: P4 (optional local Web host).** P2's closed loop (`guanlan init` / `ingest` / `query` / `check` / `install-skill`, wired through Agentao) + P3's three on-demand zero-LLM maintenance tools (`guanlan health` / `lint` / `graph`) + P4's **optional** Web host (`guanlan web`, needs `pip install 'guanlan[web]'`) are all implemented. P4 puts the existing commands in a browser via a thin FastAPI/uvicorn layer (127.0.0.1-only, `workers=1`) under a `guanlan/web/` subpackage that carries *no business intelligence*: it reuses `run_ingest` / `run_check` / `run_health` / `run_lint` / `build_and_write_graph` / `pages.*` and an embedded read-only `Agentao` for chat. **Read/write split** (DESIGN §5.2): the only write job `ingest` reuses the P2 subprocess + single-writer gate (one background worker, FIFO); all Q&A (one-shot + multi-turn) goes through a read-only in-process embedded `Agentao` (`.arun`, token-streamed, memory-only). Web-side `raw/` writes, `query --backfill`, writable work-sessions, session persistence, multi-format ingest remain post-P4 (DESIGN §8). When adding features, match the phase boundaries in DESIGN §4.4 / §7.
10
+
11
+ The **P2 spec** (module layout, deterministic gate, `raw/` snapshot + `check` contracts, exit codes, Agentao integration) is [`docs/P2-最小闭环.md`](docs/P2-最小闭环.md); the **P3 spec** (`pages.py` shared primitives with strict/lenient frontmatter tiers, `graph`/`health`/`lint` contracts, `EXIT_LINT_FINDINGS`, advisory-not-gate exit semantics) is [`docs/P3-健康与图谱.md`](docs/P3-健康与图谱.md); the **P4 spec** (`guanlan/web/` layout, the read/write split, single-worker `ingest` job, read-only embedded chat with the four embedding pitfalls + token-streaming transport contract, HTTP API + SSE contracts, no new exit codes) is [`docs/P4-Web宿主.md`](docs/P4-Web宿主.md) — together they document how the current code is structured. The **P3.1 spec** (optional `aliases` frontmatter feeding the single `pages.py` resolution point: `alias_index` / `link_resolution_index` / alias-aware `link_target_stems`, `check` uniqueness validation `aliases.collides_stem`/`aliases.duplicate`, alias edges in `graph`, Web `[[alias]]` linking — a zero-LLM half-phase enhancement, not a new milestone; P5 still = multi-format) is [`docs/P3.1-别名解析.md`](docs/P3.1-别名解析.md). Two **landing designs for P4 §10 deferred items** (specced, not yet implemented) are [`docs/P4.1-Web投喂.md`](docs/P4.1-Web投喂.md) (**P4.1** — `POST /api/raw` paste-to-save: a human source-add, *not* a gated agent write; serialized through the existing single-writer `JobQueue` to avoid the `run_guarded_write` `raw/` snapshot window; default no-overwrite, slug + `.md`) and [`docs/P4.2-会话落盘.md`](docs/P4.2-会话落盘.md) (**P4.2** — session persistence/restore: reuse `agentao.embedding.{save_session,load_session,list_sessions,delete_session}` under `<kb>/.agentao/sessions/`, conversation id → agentao UUID, lazy restore that re-applies the read-only two-point posture; thin adaptation over agentao's *snapshot-file* semantics — dedup the catalog by `session_id` and prune to one snapshot per session *before* each save (since `save_session` rotates by file-count right after writing) so the 10-*file* rotation cap behaves as a 10-*session* cap). Both are zero-LLM Web-host half-phases that stay inside the P4 boundary (no new exit codes, no SSE changes, no writable sessions).
12
+
13
+ ## Commands
14
+
15
+ ```bash
16
+ uv run guanlan init /tmp/demo # scaffold a knowledge base (deterministic, zero-LLM)
17
+ uv run guanlan -C /tmp/demo check # deterministic validation (frontmatter / broken links / sources)
18
+ uv run guanlan -C /tmp/demo health # P3: stub pages + index↔disk sync (advisory; --strict → exit 6)
19
+ uv run guanlan -C /tmp/demo lint # P3: orphans / broken links / missing entities (advisory)
20
+ uv run guanlan -C /tmp/demo graph # P3: write graph/graph.json + graph.html (--json-only skips html)
21
+ uv run guanlan -C /tmp/demo web --no-browser # P4: optional local Web host (needs guanlan[web]; 127.0.0.1 only)
22
+ uv run pytest # run all tests
23
+ uv run pytest tests/test_web.py # P4 Web host tests (skipped if guanlan[web] absent)
24
+ uv run pytest tests/test_init.py::test_init_is_idempotent_and_non_destructive # single test
25
+ ```
26
+
27
+ (`ingest` / `query` and the Web host's chat drive Agentao + the skill and need a configured model; `init` / `check` / `health` / `lint` / `graph` are the zero-LLM ones runnable offline. `guanlan web` needs the optional `web` extra: `uv pip install 'fastapi>=0.110' 'uvicorn>=0.29' 'markdown>=3'`, or `pip install 'guanlan[web]'`.)
28
+
29
+ Python 3.12+, dependencies managed by `uv` (see `uv.lock`). The package depends on `agentao` (the governed Agent runtime executing LLM-driven `ingest`/`query` via subprocess, and — in P4 — embedded read-only for Web chat). The `web` extra (fastapi/uvicorn/markdown) is optional and not part of the core install.
30
+
31
+ ## Architecture
32
+
33
+ The project deliberately separates three concerns. Internalize this split before editing — most design decisions follow from it.
34
+
35
+ 1. **`guanlan/` — the thin CLI wrapper (this package).** It carries *no business intelligence*. Its only jobs are: (a) `init` (deterministic template generation, zero LLM), and (b) in P2, orchestrating Agentao + the skill and enforcing deterministic gates on write operations. `cli.py` is argparse-only; `init.py` does the file generation.
36
+
37
+ 2. **`skills/guanlan-wiki/` — the maintenance engine.** This is where the actual wiki-maintenance workflows live (`SKILL.md` = workflows, `references/conventions.md` = default page/frontmatter/naming conventions, and `scripts/*.py` = deterministic checks, to be added in P2/P3). The engine is shipped/installed *once* and is **not** copied into each knowledge base. It is intended to run under Agentao's skill discovery.
38
+
39
+ 3. **User knowledge base (generated by `guanlan init`).** Holds only data + per-base config: `AGENTAO.md` (Agent behavior hard-constraints + pointers), `SCHEMA.md` (this base's domain/page-types/custom rules), `raw/` (read-only sources), `wiki/` (Agent-owned generated layer: `index.md`, `log.md`, `overview.md`, plus `sources/ entities/ concepts/ syntheses/`).
40
+
41
+ ### Two run modes (do not mix them)
42
+
43
+ - **Development = repo root *is* a sample wiki.** Set Agentao's `working_directory` to this repo root; `skills/guanlan-wiki/` then hits Agentao's repo-root discovery path (`<wd>/skills/`), so the engine is found with no install. Sample wiki data (`raw/`, `wiki/`, `graph/`) and the dev-copied `AGENTAO.md`/`SCHEMA.md` are `.gitignore`d — they may contain machine-local paths and never get committed.
44
+ - **External real wiki = global install.** The skill is installed to `~/.agentao/skills/guanlan-wiki/` (cwd-independent), with the user's base as `working_directory`. There is intentionally no "discover repo skills/ from an external wiki" path.
45
+
46
+ ### init template duality (`guanlan/init.py`)
47
+
48
+ `init` copies a template tree. `_templates_dir()` resolves two locations by priority: bundled `guanlan/_templates/` (installed wheel) → repo-root `examples/` (development). The wheel's `force-include` in `pyproject.toml` copies `examples/{AGENTAO.md,SCHEMA.md,wiki}` into `guanlan/_templates/` at build time — so **`examples/` is the single source of truth for init templates**; edit templates there, not in `_templates/`. `init` never overwrites existing files (idempotent) and substitutes a `__DATE__` token in `wiki/` seed files.
49
+
50
+ ## Invariants that drive the design
51
+
52
+ These come up repeatedly in DESIGN and the skill; preserve them in any change:
53
+
54
+ - **Markdown is the only source of truth.** Any index / graph / cache is a derivative that must be idempotently rebuildable from markdown — it never becomes authoritative.
55
+ - **`raw/` is read-only and immutable.** In P2 this is enforced *deterministically by the wrapper* via a before/after snapshot (filename + size + mtime, SHA256 if needed) around the Agentao call — not by permission config, since a snapshot also catches shell `mv`/`rm`/`python` writes that bypass `write_file`.
56
+ - **Zero-LLM scripts vs. LLM-only workflows.** Deterministic work (frontmatter/wikilink/structure checks, graph building) is plain Python scripts with no LLM. LLM is used *only* for `ingest` and `query`, and always via the Agentao runtime — scripts must never carry their own LLM client or API keys.
57
+ - **`SCHEMA.md` / `AGENTAO.md` / `index.md` / `log.md` / `overview.md` are config, not content** — exclude them from index/graph/lint scans.
58
+ - **Data conventions** (frontmatter fields, kebab-case vs TitleCase naming, `[[wikilink]]` resolution, `index.md`/`log.md` formats, the `## ⚠️ 矛盾与存疑` contradiction-marking format) are specified in `skills/guanlan-wiki/references/conventions.md` and DESIGN §4.5. A base's `SCHEMA.md` may override defaults.
59
+
60
+ ## Conventions
61
+
62
+ The codebase (code comments, docstrings, design docs, user-facing CLI output) is written in **Chinese**. Match that when editing existing files.
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2026 jin-bo
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: guanlan-wiki
3
+ Version: 0.1.0
4
+ Summary: 观澜 — 增量构建并维护结构化、互链的 markdown 知识 wiki(Karpathy LLM Wiki 模式)
5
+ Project-URL: Homepage, https://github.com/jin-bo/guanlan
6
+ Project-URL: Repository, https://github.com/jin-bo/guanlan
7
+ Project-URL: Issues, https://github.com/jin-bo/guanlan/issues
8
+ Author-email: jin-bo <jinbobo@gmail.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: agent,agentao,karpathy,knowledge-base,llm,markdown,rag,wiki
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Natural Language :: Chinese (Simplified)
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Documentation
19
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: agentao[cli]>=0.4.8
22
+ Requires-Dist: pyyaml>=6
23
+ Provides-Extra: web
24
+ Requires-Dist: fastapi>=0.110; extra == 'web'
25
+ Requires-Dist: markdown>=3; extra == 'web'
26
+ Requires-Dist: uvicorn>=0.29; extra == 'web'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # 观澜 (GuānLán)
30
+
31
+ > 《孟子·尽心上》"观水有术,必观其澜"——在信息的汪洋中洞察脉络与趋势。
32
+
33
+ 观澜是 [Karpathy LLM Wiki 模式](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) 的一个实现:让 Agent **增量地构建并持续维护一个结构化、互相链接的知识 wiki**,而不是每次提问都从原始文档临时检索(传统 RAG)。知识被"编译"一次后持续保鲜,随每篇新资料、每次提问而复利增长。
34
+
35
+ - **markdown 始终是唯一事实来源**——整个知识库就是一组本地 markdown 文件,任何索引/图谱/缓存都是可幂等重建的派生物。
36
+ - **Agent 全权拥有 wiki 层,人不直接写**——人负责投喂资料、提问、给方向;摘要、交叉引用、归档全交给 Agent。
37
+ - **`raw/` 只读不可变**——Agent 只读原始资料,永不修改,保证事实可追溯。
38
+ - **确定性优先**——结构检查、断链、frontmatter 校验走脚本(零 LLM);需 LLM 的 ingest/query 统一经 Agentao 运行时治理。
39
+
40
+ 完整设计见 [`docs/DESIGN.md`](docs/DESIGN.md)。
41
+
42
+ ## 状态
43
+
44
+ 🚀 **P4(Web 宿主,可选叠加层)** —— 在 P2 最小闭环(`guanlan init` / `ingest` / `query` / `check` / `install-skill`)与 P3 维护工具(`health` / `lint` / `graph`)之上,新增一个**可选**的本地 Web 宿主,把上述命令搬进浏览器:
45
+
46
+ - `guanlan web` —— 起一个仅监听 `127.0.0.1` 的本地 Web 宿主(需 `pip install 'guanlan[web]'`)。浏览 wiki(`[[wikilink]]` 可点击导航)/ 跑 check·health·lint 看报告 / 看 graph / 从 `raw/` 选一篇触发 ingest(单 worker 串行,轮询结果)/ 与 agent **只读多轮对话**(token 流式)。
47
+ - 它是 **MVP 之后的可选叠加层**:不装 `guanlan[web]`、不起 `guanlan web`,整套东西照旧用 CLI 跑通。markdown 仍是唯一事实来源,Web 只是 ingest 与问答的另一个入口、wiki 的只读浏览器。
48
+ - **读写分线**:唯一写作业 `ingest` 复用 P2 子进程 + 单写者门禁;所有问答(一次性单轮 + 多轮)走只读进程内嵌入 `Agentao`(默认只读、不过门禁、仅内存)。
49
+
50
+ P3 三个零-LLM 维护工具(advisory):
51
+
52
+ - `guanlan health` —— stub 页面 + index↔disk 同步(`--strict` → 退出码 6)。
53
+ - `guanlan lint` —— 孤儿页 / 断链 / 缺失实体。
54
+ - `guanlan graph` —— 确定性 `[[wikilink]]` 图谱 → `graph/graph.json` + 自包含 `graph/graph.html`(`--json-only` 跳过 html)。
55
+
56
+ **P3.1 别名解析(零-LLM 增强)** —— entity/concept 页可在 frontmatter 声明可选 `aliases`,让别名进入 `[[wikilink]]` 解析命名空间(与页名同口径、大小写不敏感):`[[大模型]]` / `[[LLM]]` 都解析到声明它们的页,**消假断链**(check / lint / graph / Web 一致)、**补 CJK 同义召回**(别名纳入 query 2-gram 与 ingest 去重)。别名全局唯一由 `check` 确定性校验(撞页名 / 重复 → 阻断写门禁)。这不是新里程碑,P5 仍是多格式与自动化。细化见 [`docs/P3.1-别名解析.md`](docs/P3.1-别名解析.md)。
57
+
58
+ Web 端写 `raw/`、`query --backfill`、可写多轮工作会话、会话落盘、多格式 ingest 留待 P4 之后(见 DESIGN §8 与 `docs/P4-Web宿主.md` §10)。别名自动物化建页(`heal`)、同义词表、向量检索按需驱动、另开方案。
59
+
60
+ ## 快速开始
61
+
62
+ ```bash
63
+ # 在空目录初始化一个知识库(生成 AGENTAO.md / SCHEMA.md / raw/ / wiki/)
64
+ guanlan init my-wiki
65
+
66
+ # 或就地初始化当前目录
67
+ guanlan init
68
+ ```
69
+
70
+ `init` 是确定性的(零 LLM),已存在的文件不会被覆盖,可安全重复运行。
71
+
72
+ 投喂资料、提问、维护:
73
+
74
+ ```bash
75
+ # 投喂资料 / 提问(需配置模型,经 Agentao 运行时)
76
+ guanlan -C my-wiki ingest path/to/source.md
77
+ guanlan -C my-wiki query "..."
78
+
79
+ # 零-LLM、可离线运行的确定性工具
80
+ guanlan -C my-wiki check # frontmatter / 断链 / 来源校验
81
+ guanlan -C my-wiki health # stub 页面 + index↔disk 同步(--strict → exit 6)
82
+ guanlan -C my-wiki lint # 孤儿页 / 断链 / 缺失实体
83
+ guanlan -C my-wiki graph # 写出 graph/graph.json + graph.html(--json-only 跳过 html)
84
+ ```
85
+
86
+ 可选 Web 宿主(叠加层,需先装 `guanlan[web]`):
87
+
88
+ ```bash
89
+ pip install 'guanlan[web]' # 装可选依赖(fastapi / uvicorn / markdown)
90
+ guanlan -C my-wiki web # 起本地 Web 宿主,仅监听 127.0.0.1,默认开浏览器
91
+ guanlan -C my-wiki web --port 9000 --no-browser # 换端口 / 不开浏览器
92
+ ```
93
+
94
+ 浏览器里可:浏览 wiki 并跟随 `[[wikilink]]` 导航、跑 check·health·lint 看报告、看 graph、
95
+ 从 `raw/` 选一篇触发 ingest(轮询结果)、与 agent 只读多轮对话(token 流式)。
96
+ **仅供本机单用户**——绝不要把该端口暴露到网络。
97
+
98
+ 生成结构:
99
+
100
+ ```
101
+ my-wiki/
102
+ ├── AGENTAO.md # Agent 行为约束 + 指针
103
+ ├── SCHEMA.md # 本库 Schema:领域 / 启用页面类型 / 自定义规则
104
+ ├── raw/ # 原始资料(只读,事实来源)
105
+ └── wiki/ # Agent 全权生成的知识层
106
+ ├── index.md # 全量页面目录
107
+ ├── log.md # append-only 时间线
108
+ └── overview.md # 跨资料活体综述
109
+ ```
110
+
111
+ ## 开发
112
+
113
+ ```bash
114
+ uv run guanlan init /tmp/demo # 跑 CLI
115
+ uv run pytest # 跑测试
116
+ ```
117
+
118
+ 维护引擎是 `skills/guanlan-wiki/`(`SKILL.md` + `references/conventions.md` + 脚本),
119
+ 开发期命中 Agentao 的 repo-root skill 发现路径(`<工作目录>/skills/`),免安装。
120
+
121
+ ## 许可证
122
+
123
+ [Apache License 2.0](LICENSE)
@@ -0,0 +1,95 @@
1
+ # 观澜 (GuānLán)
2
+
3
+ > 《孟子·尽心上》"观水有术,必观其澜"——在信息的汪洋中洞察脉络与趋势。
4
+
5
+ 观澜是 [Karpathy LLM Wiki 模式](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) 的一个实现:让 Agent **增量地构建并持续维护一个结构化、互相链接的知识 wiki**,而不是每次提问都从原始文档临时检索(传统 RAG)。知识被"编译"一次后持续保鲜,随每篇新资料、每次提问而复利增长。
6
+
7
+ - **markdown 始终是唯一事实来源**——整个知识库就是一组本地 markdown 文件,任何索引/图谱/缓存都是可幂等重建的派生物。
8
+ - **Agent 全权拥有 wiki 层,人不直接写**——人负责投喂资料、提问、给方向;摘要、交叉引用、归档全交给 Agent。
9
+ - **`raw/` 只读不可变**——Agent 只读原始资料,永不修改,保证事实可追溯。
10
+ - **确定性优先**——结构检查、断链、frontmatter 校验走脚本(零 LLM);需 LLM 的 ingest/query 统一经 Agentao 运行时治理。
11
+
12
+ 完整设计见 [`docs/DESIGN.md`](docs/DESIGN.md)。
13
+
14
+ ## 状态
15
+
16
+ 🚀 **P4(Web 宿主,可选叠加层)** —— 在 P2 最小闭环(`guanlan init` / `ingest` / `query` / `check` / `install-skill`)与 P3 维护工具(`health` / `lint` / `graph`)之上,新增一个**可选**的本地 Web 宿主,把上述命令搬进浏览器:
17
+
18
+ - `guanlan web` —— 起一个仅监听 `127.0.0.1` 的本地 Web 宿主(需 `pip install 'guanlan[web]'`)。浏览 wiki(`[[wikilink]]` 可点击导航)/ 跑 check·health·lint 看报告 / 看 graph / 从 `raw/` 选一篇触发 ingest(单 worker 串行,轮询结果)/ 与 agent **只读多轮对话**(token 流式)。
19
+ - 它是 **MVP 之后的可选叠加层**:不装 `guanlan[web]`、不起 `guanlan web`,整套东西照旧用 CLI 跑通。markdown 仍是唯一事实来源,Web 只是 ingest 与问答的另一个入口、wiki 的只读浏览器。
20
+ - **读写分线**:唯一写作业 `ingest` 复用 P2 子进程 + 单写者门禁;所有问答(一次性单轮 + 多轮)走只读进程内嵌入 `Agentao`(默认只读、不过门禁、仅内存)。
21
+
22
+ P3 三个零-LLM 维护工具(advisory):
23
+
24
+ - `guanlan health` —— stub 页面 + index↔disk 同步(`--strict` → 退出码 6)。
25
+ - `guanlan lint` —— 孤儿页 / 断链 / 缺失实体。
26
+ - `guanlan graph` —— 确定性 `[[wikilink]]` 图谱 → `graph/graph.json` + 自包含 `graph/graph.html`(`--json-only` 跳过 html)。
27
+
28
+ **P3.1 别名解析(零-LLM 增强)** —— entity/concept 页可在 frontmatter 声明可选 `aliases`,让别名进入 `[[wikilink]]` 解析命名空间(与页名同口径、大小写不敏感):`[[大模型]]` / `[[LLM]]` 都解析到声明它们的页,**消假断链**(check / lint / graph / Web 一致)、**补 CJK 同义召回**(别名纳入 query 2-gram 与 ingest 去重)。别名全局唯一由 `check` 确定性校验(撞页名 / 重复 → 阻断写门禁)。这不是新里程碑,P5 仍是多格式与自动化。细化见 [`docs/P3.1-别名解析.md`](docs/P3.1-别名解析.md)。
29
+
30
+ Web 端写 `raw/`、`query --backfill`、可写多轮工作会话、会话落盘、多格式 ingest 留待 P4 之后(见 DESIGN §8 与 `docs/P4-Web宿主.md` §10)。别名自动物化建页(`heal`)、同义词表、向量检索按需驱动、另开方案。
31
+
32
+ ## 快速开始
33
+
34
+ ```bash
35
+ # 在空目录初始化一个知识库(生成 AGENTAO.md / SCHEMA.md / raw/ / wiki/)
36
+ guanlan init my-wiki
37
+
38
+ # 或就地初始化当前目录
39
+ guanlan init
40
+ ```
41
+
42
+ `init` 是确定性的(零 LLM),已存在的文件不会被覆盖,可安全重复运行。
43
+
44
+ 投喂资料、提问、维护:
45
+
46
+ ```bash
47
+ # 投喂资料 / 提问(需配置模型,经 Agentao 运行时)
48
+ guanlan -C my-wiki ingest path/to/source.md
49
+ guanlan -C my-wiki query "..."
50
+
51
+ # 零-LLM、可离线运行的确定性工具
52
+ guanlan -C my-wiki check # frontmatter / 断链 / 来源校验
53
+ guanlan -C my-wiki health # stub 页面 + index↔disk 同步(--strict → exit 6)
54
+ guanlan -C my-wiki lint # 孤儿页 / 断链 / 缺失实体
55
+ guanlan -C my-wiki graph # 写出 graph/graph.json + graph.html(--json-only 跳过 html)
56
+ ```
57
+
58
+ 可选 Web 宿主(叠加层,需先装 `guanlan[web]`):
59
+
60
+ ```bash
61
+ pip install 'guanlan[web]' # 装可选依赖(fastapi / uvicorn / markdown)
62
+ guanlan -C my-wiki web # 起本地 Web 宿主,仅监听 127.0.0.1,默认开浏览器
63
+ guanlan -C my-wiki web --port 9000 --no-browser # 换端口 / 不开浏览器
64
+ ```
65
+
66
+ 浏览器里可:浏览 wiki 并跟随 `[[wikilink]]` 导航、跑 check·health·lint 看报告、看 graph、
67
+ 从 `raw/` 选一篇触发 ingest(轮询结果)、与 agent 只读多轮对话(token 流式)。
68
+ **仅供本机单用户**——绝不要把该端口暴露到网络。
69
+
70
+ 生成结构:
71
+
72
+ ```
73
+ my-wiki/
74
+ ├── AGENTAO.md # Agent 行为约束 + 指针
75
+ ├── SCHEMA.md # 本库 Schema:领域 / 启用页面类型 / 自定义规则
76
+ ├── raw/ # 原始资料(只读,事实来源)
77
+ └── wiki/ # Agent 全权生成的知识层
78
+ ├── index.md # 全量页面目录
79
+ ├── log.md # append-only 时间线
80
+ └── overview.md # 跨资料活体综述
81
+ ```
82
+
83
+ ## 开发
84
+
85
+ ```bash
86
+ uv run guanlan init /tmp/demo # 跑 CLI
87
+ uv run pytest # 跑测试
88
+ ```
89
+
90
+ 维护引擎是 `skills/guanlan-wiki/`(`SKILL.md` + `references/conventions.md` + 脚本),
91
+ 开发期命中 Agentao 的 repo-root skill 发现路径(`<工作目录>/skills/`),免安装。
92
+
93
+ ## 许可证
94
+
95
+ [Apache License 2.0](LICENSE)