guanlan-wiki 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guanlan_wiki-0.1.0/.github/workflows/ci.yml +41 -0
- guanlan_wiki-0.1.0/.github/workflows/release.yml +38 -0
- guanlan_wiki-0.1.0/.gitignore +37 -0
- guanlan_wiki-0.1.0/.python-version +1 -0
- guanlan_wiki-0.1.0/CLAUDE.md +62 -0
- guanlan_wiki-0.1.0/LICENSE +201 -0
- guanlan_wiki-0.1.0/PKG-INFO +123 -0
- guanlan_wiki-0.1.0/README.md +95 -0
- guanlan_wiki-0.1.0/docs/DESIGN.md +276 -0
- guanlan_wiki-0.1.0/docs/P2-/346/234/200/345/260/217/351/227/255/347/216/257.md +335 -0
- guanlan_wiki-0.1.0/docs/P3-/345/201/245/345/272/267/344/270/216/345/233/276/350/260/261.md +283 -0
- guanlan_wiki-0.1.0/docs/P3.1-/345/210/253/345/220/215/350/247/243/346/236/220.md +129 -0
- guanlan_wiki-0.1.0/docs/P3.2-/347/274/272/345/244/261/345/256/236/344/275/223/347/211/251/345/214/226.md +226 -0
- guanlan_wiki-0.1.0/docs/P4-Web/345/256/277/344/270/273.md +393 -0
- guanlan_wiki-0.1.0/docs/P4.1-Web/346/212/225/345/226/202.md +160 -0
- guanlan_wiki-0.1.0/docs/P4.2-/344/274/232/350/257/235/350/220/275/347/233/230.md +270 -0
- guanlan_wiki-0.1.0/docs/backlog/notes/broken-link-handling-survey.md +86 -0
- guanlan_wiki-0.1.0/docs/backlog/notes/cjk-retrieval-enhancements.md +22 -0
- guanlan_wiki-0.1.0/docs/guanlan.png +0 -0
- guanlan_wiki-0.1.0/docs//345/217/221/345/270/203/345/210/260-PyPI.md +55 -0
- guanlan_wiki-0.1.0/examples/AGENTAO.md +28 -0
- guanlan_wiki-0.1.0/examples/SCHEMA.md +33 -0
- guanlan_wiki-0.1.0/examples/wiki/index.md +25 -0
- guanlan_wiki-0.1.0/examples/wiki/log.md +6 -0
- guanlan_wiki-0.1.0/examples/wiki/overview.md +14 -0
- guanlan_wiki-0.1.0/guanlan/__init__.py +7 -0
- guanlan_wiki-0.1.0/guanlan/__main__.py +6 -0
- guanlan_wiki-0.1.0/guanlan/check.py +279 -0
- guanlan_wiki-0.1.0/guanlan/cli.py +228 -0
- guanlan_wiki-0.1.0/guanlan/errors.py +36 -0
- guanlan_wiki-0.1.0/guanlan/gate.py +387 -0
- guanlan_wiki-0.1.0/guanlan/graph.py +321 -0
- guanlan_wiki-0.1.0/guanlan/health.py +182 -0
- guanlan_wiki-0.1.0/guanlan/ingest.py +73 -0
- guanlan_wiki-0.1.0/guanlan/init.py +104 -0
- guanlan_wiki-0.1.0/guanlan/lint.py +130 -0
- guanlan_wiki-0.1.0/guanlan/pages.py +286 -0
- guanlan_wiki-0.1.0/guanlan/paths.py +48 -0
- guanlan_wiki-0.1.0/guanlan/query.py +80 -0
- guanlan_wiki-0.1.0/guanlan/runtime.py +144 -0
- guanlan_wiki-0.1.0/guanlan/skill.py +95 -0
- guanlan_wiki-0.1.0/guanlan/web/__init__.py +16 -0
- guanlan_wiki-0.1.0/guanlan/web/app.py +386 -0
- guanlan_wiki-0.1.0/guanlan/web/chat.py +428 -0
- guanlan_wiki-0.1.0/guanlan/web/jobs.py +81 -0
- guanlan_wiki-0.1.0/guanlan/web/render.py +258 -0
- guanlan_wiki-0.1.0/guanlan/web/server.py +98 -0
- guanlan_wiki-0.1.0/guanlan/web/static/app.css +174 -0
- guanlan_wiki-0.1.0/guanlan/web/static/app.js +547 -0
- guanlan_wiki-0.1.0/guanlan/web/static/index.html +69 -0
- guanlan_wiki-0.1.0/guanlan/web/static/logo.png +0 -0
- guanlan_wiki-0.1.0/pyproject.toml +68 -0
- guanlan_wiki-0.1.0/skills/guanlan-wiki/SKILL.md +74 -0
- guanlan_wiki-0.1.0/skills/guanlan-wiki/references/conventions.md +184 -0
- guanlan_wiki-0.1.0/tests/conftest.py +46 -0
- guanlan_wiki-0.1.0/tests/test_aliases.py +203 -0
- guanlan_wiki-0.1.0/tests/test_check.py +210 -0
- guanlan_wiki-0.1.0/tests/test_cli.py +117 -0
- guanlan_wiki-0.1.0/tests/test_gate.py +283 -0
- guanlan_wiki-0.1.0/tests/test_graph.py +284 -0
- guanlan_wiki-0.1.0/tests/test_health.py +170 -0
- guanlan_wiki-0.1.0/tests/test_ingest.py +156 -0
- guanlan_wiki-0.1.0/tests/test_init.py +51 -0
- guanlan_wiki-0.1.0/tests/test_lint.py +163 -0
- guanlan_wiki-0.1.0/tests/test_pages.py +181 -0
- guanlan_wiki-0.1.0/tests/test_query.py +109 -0
- guanlan_wiki-0.1.0/tests/test_runtime.py +64 -0
- guanlan_wiki-0.1.0/tests/test_skill.py +84 -0
- guanlan_wiki-0.1.0/tests/test_web.py +1335 -0
- guanlan_wiki-0.1.0/uv.lock +1190 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
# 推到 main 或对 main 发 PR 时跑:lint + 全量测试。
|
|
4
|
+
# 全程离线——测试用 fake runner / monkeypatch,不打真实 LLM,无需任何 API key。
|
|
5
|
+
on:
|
|
6
|
+
push:
|
|
7
|
+
branches: [main]
|
|
8
|
+
pull_request:
|
|
9
|
+
branches: [main]
|
|
10
|
+
|
|
11
|
+
# 同一分支/PR 的新推送取消旧的在跑任务,省额度。
|
|
12
|
+
concurrency:
|
|
13
|
+
group: ci-${{ github.ref }}
|
|
14
|
+
cancel-in-progress: true
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
test:
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: 装 uv(带依赖缓存)
|
|
23
|
+
uses: astral-sh/setup-uv@v5
|
|
24
|
+
with:
|
|
25
|
+
enable-cache: true
|
|
26
|
+
cache-dependency-glob: uv.lock
|
|
27
|
+
|
|
28
|
+
- name: 装 Python 3.12
|
|
29
|
+
run: uv python install 3.12
|
|
30
|
+
|
|
31
|
+
# ruff 用 uvx 临时拉,不进 lock;与本地 `uvx ruff check` 一致。
|
|
32
|
+
- name: Lint(ruff)
|
|
33
|
+
run: uvx ruff check guanlan tests
|
|
34
|
+
|
|
35
|
+
# --extra web 装上 fastapi/uvicorn/markdown,否则 test_web.py / test_aliases.py
|
|
36
|
+
# 会被 importorskip 跳过;dev 组(pytest)uv sync 默认带上。
|
|
37
|
+
- name: 装依赖
|
|
38
|
+
run: uv sync --extra web
|
|
39
|
+
|
|
40
|
+
- name: 跑测试
|
|
41
|
+
run: uv run --extra web pytest -q
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: Release (PyPI)
|
|
2
|
+
|
|
3
|
+
# 推 v* tag 时构建并发布到 PyPI。用 Trusted Publishing(OIDC)——仓库不存任何 token:
|
|
4
|
+
# 需先在 PyPI 为项目 guanlan-wiki 配好 pending publisher(绑定本仓库 + 本 workflow +
|
|
5
|
+
# environment 名 pypi),见 docs/发布到-PyPI.md。
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
tags: ["v*"]
|
|
9
|
+
|
|
10
|
+
# 默认不给任何权限;发布 job 单独申明 id-token: write 换取 OIDC 凭据。
|
|
11
|
+
permissions: {}
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
pypi-publish:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
environment:
|
|
17
|
+
name: pypi
|
|
18
|
+
url: https://pypi.org/p/guanlan-wiki
|
|
19
|
+
permissions:
|
|
20
|
+
id-token: write # Trusted Publishing 必需
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
- name: 装 uv
|
|
25
|
+
uses: astral-sh/setup-uv@v5
|
|
26
|
+
with:
|
|
27
|
+
enable-cache: true
|
|
28
|
+
cache-dependency-glob: uv.lock
|
|
29
|
+
|
|
30
|
+
- name: 构建 sdist + wheel
|
|
31
|
+
run: uv build
|
|
32
|
+
|
|
33
|
+
# 发布前再跑一遍 twine check,产物不合规就别上传。
|
|
34
|
+
- name: 校验产物
|
|
35
|
+
run: uvx twine check dist/*
|
|
36
|
+
|
|
37
|
+
- name: 发布到 PyPI(Trusted Publishing,免 token)
|
|
38
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# macOS
|
|
2
|
+
.DS_Store
|
|
3
|
+
|
|
4
|
+
# Python-generated files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[oc]
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
wheels/
|
|
10
|
+
*.egg-info
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv
|
|
14
|
+
|
|
15
|
+
# Local agentao residue (from previous experiments in this dir, if any)
|
|
16
|
+
.agentao/
|
|
17
|
+
agentao.log
|
|
18
|
+
agentao.log.*
|
|
19
|
+
|
|
20
|
+
# Claude Code 本机运行时状态(scheduled_tasks.lock 含 sessionId / pid / 进程起始时间),
|
|
21
|
+
# per-machine 不入仓——否则别的 checkout 会继承陈旧锁 + 本地会话元数据。
|
|
22
|
+
.claude/
|
|
23
|
+
|
|
24
|
+
# Qoder
|
|
25
|
+
.qoder/
|
|
26
|
+
|
|
27
|
+
# Dev-only sample wiki data & derived (local testing only — never pushed; see docs/DESIGN.md §4.7)
|
|
28
|
+
# Templates live in examples/; only the repo-root sample config + data/derived dirs are ignored.
|
|
29
|
+
/raw/
|
|
30
|
+
/wiki/
|
|
31
|
+
/graph/
|
|
32
|
+
/workspace/
|
|
33
|
+
|
|
34
|
+
# Repo-root sample config copied from examples/ (may carry machine paths / private dev settings).
|
|
35
|
+
# Anchored to root so examples/AGENTAO.md and examples/SCHEMA.md stay tracked.
|
|
36
|
+
/AGENTAO.md
|
|
37
|
+
/SCHEMA.md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## What this is
|
|
6
|
+
|
|
7
|
+
观澜 (GuānLán) is an implementation of the [Karpathy LLM Wiki pattern](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f): an Agent incrementally builds and maintains a structured, cross-linked markdown knowledge wiki instead of doing fresh RAG retrieval on every query. The full design (in Chinese) is the authoritative spec — read [`docs/DESIGN.md`](docs/DESIGN.md) before any non-trivial change.
|
|
8
|
+
|
|
9
|
+
**Current status: P4 (optional local Web host).** P2's closed loop (`guanlan init` / `ingest` / `query` / `check` / `install-skill`, wired through Agentao) + P3's three on-demand zero-LLM maintenance tools (`guanlan health` / `lint` / `graph`) + P4's **optional** Web host (`guanlan web`, needs `pip install 'guanlan[web]'`) are all implemented. P4 puts the existing commands in a browser via a thin FastAPI/uvicorn layer (127.0.0.1-only, `workers=1`) under a `guanlan/web/` subpackage that carries *no business intelligence*: it reuses `run_ingest` / `run_check` / `run_health` / `run_lint` / `build_and_write_graph` / `pages.*` and an embedded read-only `Agentao` for chat. **Read/write split** (DESIGN §5.2): the only write job `ingest` reuses the P2 subprocess + single-writer gate (one background worker, FIFO); all Q&A (one-shot + multi-turn) goes through a read-only in-process embedded `Agentao` (`.arun`, token-streamed, memory-only). Web-side `raw/` writes, `query --backfill`, writable work-sessions, session persistence, multi-format ingest remain post-P4 (DESIGN §8). When adding features, match the phase boundaries in DESIGN §4.4 / §7.
|
|
10
|
+
|
|
11
|
+
The **P2 spec** (module layout, deterministic gate, `raw/` snapshot + `check` contracts, exit codes, Agentao integration) is [`docs/P2-最小闭环.md`](docs/P2-最小闭环.md); the **P3 spec** (`pages.py` shared primitives with strict/lenient frontmatter tiers, `graph`/`health`/`lint` contracts, `EXIT_LINT_FINDINGS`, advisory-not-gate exit semantics) is [`docs/P3-健康与图谱.md`](docs/P3-健康与图谱.md); the **P4 spec** (`guanlan/web/` layout, the read/write split, single-worker `ingest` job, read-only embedded chat with the four embedding pitfalls + token-streaming transport contract, HTTP API + SSE contracts, no new exit codes) is [`docs/P4-Web宿主.md`](docs/P4-Web宿主.md) — together they document how the current code is structured. The **P3.1 spec** (optional `aliases` frontmatter feeding the single `pages.py` resolution point: `alias_index` / `link_resolution_index` / alias-aware `link_target_stems`, `check` uniqueness validation `aliases.collides_stem`/`aliases.duplicate`, alias edges in `graph`, Web `[[alias]]` linking — a zero-LLM half-phase enhancement, not a new milestone; P5 still = multi-format) is [`docs/P3.1-别名解析.md`](docs/P3.1-别名解析.md). Two **landing designs for P4 §10 deferred items** (specced, not yet implemented) are [`docs/P4.1-Web投喂.md`](docs/P4.1-Web投喂.md) (**P4.1** — `POST /api/raw` paste-to-save: a human source-add, *not* a gated agent write; serialized through the existing single-writer `JobQueue` to avoid the `run_guarded_write` `raw/` snapshot window; default no-overwrite, slug + `.md`) and [`docs/P4.2-会话落盘.md`](docs/P4.2-会话落盘.md) (**P4.2** — session persistence/restore: reuse `agentao.embedding.{save_session,load_session,list_sessions,delete_session}` under `<kb>/.agentao/sessions/`, conversation id → agentao UUID, lazy restore that re-applies the read-only two-point posture; thin adaptation over agentao's *snapshot-file* semantics — dedup the catalog by `session_id` and prune to one snapshot per session *before* each save (since `save_session` rotates by file-count right after writing) so the 10-*file* rotation cap behaves as a 10-*session* cap). Both are zero-LLM Web-host half-phases that stay inside the P4 boundary (no new exit codes, no SSE changes, no writable sessions).
|
|
12
|
+
|
|
13
|
+
## Commands
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv run guanlan init /tmp/demo # scaffold a knowledge base (deterministic, zero-LLM)
|
|
17
|
+
uv run guanlan -C /tmp/demo check # deterministic validation (frontmatter / broken links / sources)
|
|
18
|
+
uv run guanlan -C /tmp/demo health # P3: stub pages + index↔disk sync (advisory; --strict → exit 6)
|
|
19
|
+
uv run guanlan -C /tmp/demo lint # P3: orphans / broken links / missing entities (advisory)
|
|
20
|
+
uv run guanlan -C /tmp/demo graph # P3: write graph/graph.json + graph.html (--json-only skips html)
|
|
21
|
+
uv run guanlan -C /tmp/demo web --no-browser # P4: optional local Web host (needs guanlan[web]; 127.0.0.1 only)
|
|
22
|
+
uv run pytest # run all tests
|
|
23
|
+
uv run pytest tests/test_web.py # P4 Web host tests (skipped if guanlan[web] absent)
|
|
24
|
+
uv run pytest tests/test_init.py::test_init_is_idempotent_and_non_destructive # single test
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
(`ingest` / `query` and the Web host's chat drive Agentao + the skill and need a configured model; `init` / `check` / `health` / `lint` / `graph` are the zero-LLM ones runnable offline. `guanlan web` needs the optional `web` extra: `uv pip install 'fastapi>=0.110' 'uvicorn>=0.29' 'markdown>=3'`, or `pip install 'guanlan[web]'`.)
|
|
28
|
+
|
|
29
|
+
Python 3.12+, dependencies managed by `uv` (see `uv.lock`). The package depends on `agentao` (the governed Agent runtime executing LLM-driven `ingest`/`query` via subprocess, and — in P4 — embedded read-only for Web chat). The `web` extra (fastapi/uvicorn/markdown) is optional and not part of the core install.
|
|
30
|
+
|
|
31
|
+
## Architecture
|
|
32
|
+
|
|
33
|
+
The project deliberately separates three concerns. Internalize this split before editing — most design decisions follow from it.
|
|
34
|
+
|
|
35
|
+
1. **`guanlan/` — the thin CLI wrapper (this package).** It carries *no business intelligence*. Its only jobs are: (a) `init` (deterministic template generation, zero LLM), and (b) in P2, orchestrating Agentao + the skill and enforcing deterministic gates on write operations. `cli.py` is argparse-only; `init.py` does the file generation.
|
|
36
|
+
|
|
37
|
+
2. **`skills/guanlan-wiki/` — the maintenance engine.** This is where the actual wiki-maintenance workflows live (`SKILL.md` = workflows, `references/conventions.md` = default page/frontmatter/naming conventions, and `scripts/*.py` = deterministic checks, to be added in P2/P3). The engine is shipped/installed *once* and is **not** copied into each knowledge base. It is intended to run under Agentao's skill discovery.
|
|
38
|
+
|
|
39
|
+
3. **User knowledge base (generated by `guanlan init`).** Holds only data + per-base config: `AGENTAO.md` (Agent behavior hard-constraints + pointers), `SCHEMA.md` (this base's domain/page-types/custom rules), `raw/` (read-only sources), `wiki/` (Agent-owned generated layer: `index.md`, `log.md`, `overview.md`, plus `sources/ entities/ concepts/ syntheses/`).
|
|
40
|
+
|
|
41
|
+
### Two run modes (do not mix them)
|
|
42
|
+
|
|
43
|
+
- **Development = repo root *is* a sample wiki.** Set Agentao's `working_directory` to this repo root; `skills/guanlan-wiki/` then hits Agentao's repo-root discovery path (`<wd>/skills/`), so the engine is found with no install. Sample wiki data (`raw/`, `wiki/`, `graph/`) and the dev-copied `AGENTAO.md`/`SCHEMA.md` are `.gitignore`d — they may contain machine-local paths and never get committed.
|
|
44
|
+
- **External real wiki = global install.** The skill is installed to `~/.agentao/skills/guanlan-wiki/` (cwd-independent), with the user's base as `working_directory`. There is intentionally no "discover repo skills/ from an external wiki" path.
|
|
45
|
+
|
|
46
|
+
### init template duality (`guanlan/init.py`)
|
|
47
|
+
|
|
48
|
+
`init` copies a template tree. `_templates_dir()` resolves two locations by priority: bundled `guanlan/_templates/` (installed wheel) → repo-root `examples/` (development). The wheel's `force-include` in `pyproject.toml` copies `examples/{AGENTAO.md,SCHEMA.md,wiki}` into `guanlan/_templates/` at build time — so **`examples/` is the single source of truth for init templates**; edit templates there, not in `_templates/`. `init` never overwrites existing files (idempotent) and substitutes a `__DATE__` token in `wiki/` seed files.
|
|
49
|
+
|
|
50
|
+
## Invariants that drive the design
|
|
51
|
+
|
|
52
|
+
These come up repeatedly in DESIGN and the skill; preserve them in any change:
|
|
53
|
+
|
|
54
|
+
- **Markdown is the only source of truth.** Any index / graph / cache is a derivative that must be idempotently rebuildable from markdown — it never becomes authoritative.
|
|
55
|
+
- **`raw/` is read-only and immutable.** In P2 this is enforced *deterministically by the wrapper* via a before/after snapshot (filename + size + mtime, SHA256 if needed) around the Agentao call — not by permission config, since a snapshot also catches shell `mv`/`rm`/`python` writes that bypass `write_file`.
|
|
56
|
+
- **Zero-LLM scripts vs. LLM-only workflows.** Deterministic work (frontmatter/wikilink/structure checks, graph building) is plain Python scripts with no LLM. LLM is used *only* for `ingest` and `query`, and always via the Agentao runtime — scripts must never carry their own LLM client or API keys.
|
|
57
|
+
- **`SCHEMA.md` / `AGENTAO.md` / `index.md` / `log.md` / `overview.md` are config, not content** — exclude them from index/graph/lint scans.
|
|
58
|
+
- **Data conventions** (frontmatter fields, kebab-case vs TitleCase naming, `[[wikilink]]` resolution, `index.md`/`log.md` formats, the `## ⚠️ 矛盾与存疑` contradiction-marking format) are specified in `skills/guanlan-wiki/references/conventions.md` and DESIGN §4.5. A base's `SCHEMA.md` may override defaults.
|
|
59
|
+
|
|
60
|
+
## Conventions
|
|
61
|
+
|
|
62
|
+
The codebase (code comments, docstrings, design docs, user-facing CLI output) is written in **Chinese**. Match that when editing existing files.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright 2026 jin-bo
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: guanlan-wiki
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 观澜 — 增量构建并维护结构化、互链的 markdown 知识 wiki(Karpathy LLM Wiki 模式)
|
|
5
|
+
Project-URL: Homepage, https://github.com/jin-bo/guanlan
|
|
6
|
+
Project-URL: Repository, https://github.com/jin-bo/guanlan
|
|
7
|
+
Project-URL: Issues, https://github.com/jin-bo/guanlan/issues
|
|
8
|
+
Author-email: jin-bo <jinbobo@gmail.com>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent,agentao,karpathy,knowledge-base,llm,markdown,rag,wiki
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Natural Language :: Chinese (Simplified)
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
19
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: agentao[cli]>=0.4.8
|
|
22
|
+
Requires-Dist: pyyaml>=6
|
|
23
|
+
Provides-Extra: web
|
|
24
|
+
Requires-Dist: fastapi>=0.110; extra == 'web'
|
|
25
|
+
Requires-Dist: markdown>=3; extra == 'web'
|
|
26
|
+
Requires-Dist: uvicorn>=0.29; extra == 'web'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# 观澜 (GuānLán)
|
|
30
|
+
|
|
31
|
+
> 《孟子·尽心上》"观水有术,必观其澜"——在信息的汪洋中洞察脉络与趋势。
|
|
32
|
+
|
|
33
|
+
观澜是 [Karpathy LLM Wiki 模式](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) 的一个实现:让 Agent **增量地构建并持续维护一个结构化、互相链接的知识 wiki**,而不是每次提问都从原始文档临时检索(传统 RAG)。知识被"编译"一次后持续保鲜,随每篇新资料、每次提问而复利增长。
|
|
34
|
+
|
|
35
|
+
- **markdown 始终是唯一事实来源**——整个知识库就是一组本地 markdown 文件,任何索引/图谱/缓存都是可幂等重建的派生物。
|
|
36
|
+
- **Agent 全权拥有 wiki 层,人不直接写**——人负责投喂资料、提问、给方向;摘要、交叉引用、归档全交给 Agent。
|
|
37
|
+
- **`raw/` 只读不可变**——Agent 只读原始资料,永不修改,保证事实可追溯。
|
|
38
|
+
- **确定性优先**——结构检查、断链、frontmatter 校验走脚本(零 LLM);需 LLM 的 ingest/query 统一经 Agentao 运行时治理。
|
|
39
|
+
|
|
40
|
+
完整设计见 [`docs/DESIGN.md`](docs/DESIGN.md)。
|
|
41
|
+
|
|
42
|
+
## 状态
|
|
43
|
+
|
|
44
|
+
🚀 **P4(Web 宿主,可选叠加层)** —— 在 P2 最小闭环(`guanlan init` / `ingest` / `query` / `check` / `install-skill`)与 P3 维护工具(`health` / `lint` / `graph`)之上,新增一个**可选**的本地 Web 宿主,把上述命令搬进浏览器:
|
|
45
|
+
|
|
46
|
+
- `guanlan web` —— 起一个仅监听 `127.0.0.1` 的本地 Web 宿主(需 `pip install 'guanlan[web]'`)。浏览 wiki(`[[wikilink]]` 可点击导航)/ 跑 check·health·lint 看报告 / 看 graph / 从 `raw/` 选一篇触发 ingest(单 worker 串行,轮询结果)/ 与 agent **只读多轮对话**(token 流式)。
|
|
47
|
+
- 它是 **MVP 之后的可选叠加层**:不装 `guanlan[web]`、不起 `guanlan web`,整套东西照旧用 CLI 跑通。markdown 仍是唯一事实来源,Web 只是 ingest 与问答的另一个入口、wiki 的只读浏览器。
|
|
48
|
+
- **读写分线**:唯一写作业 `ingest` 复用 P2 子进程 + 单写者门禁;所有问答(一次性单轮 + 多轮)走只读进程内嵌入 `Agentao`(默认只读、不过门禁、仅内存)。
|
|
49
|
+
|
|
50
|
+
P3 三个零-LLM 维护工具(advisory):
|
|
51
|
+
|
|
52
|
+
- `guanlan health` —— stub 页面 + index↔disk 同步(`--strict` → 退出码 6)。
|
|
53
|
+
- `guanlan lint` —— 孤儿页 / 断链 / 缺失实体。
|
|
54
|
+
- `guanlan graph` —— 确定性 `[[wikilink]]` 图谱 → `graph/graph.json` + 自包含 `graph/graph.html`(`--json-only` 跳过 html)。
|
|
55
|
+
|
|
56
|
+
**P3.1 别名解析(零-LLM 增强)** —— entity/concept 页可在 frontmatter 声明可选 `aliases`,让别名进入 `[[wikilink]]` 解析命名空间(与页名同口径、大小写不敏感):`[[大模型]]` / `[[LLM]]` 都解析到声明它们的页,**消假断链**(check / lint / graph / Web 一致)、**补 CJK 同义召回**(别名纳入 query 2-gram 与 ingest 去重)。别名全局唯一由 `check` 确定性校验(撞页名 / 重复 → 阻断写门禁)。这不是新里程碑,P5 仍是多格式与自动化。细化见 [`docs/P3.1-别名解析.md`](docs/P3.1-别名解析.md)。
|
|
57
|
+
|
|
58
|
+
Web 端写 `raw/`、`query --backfill`、可写多轮工作会话、会话落盘、多格式 ingest 留待 P4 之后(见 DESIGN §8 与 `docs/P4-Web宿主.md` §10)。别名自动物化建页(`heal`)、同义词表、向量检索按需驱动、另开方案。
|
|
59
|
+
|
|
60
|
+
## 快速开始
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# 在空目录初始化一个知识库(生成 AGENTAO.md / SCHEMA.md / raw/ / wiki/)
|
|
64
|
+
guanlan init my-wiki
|
|
65
|
+
|
|
66
|
+
# 或就地初始化当前目录
|
|
67
|
+
guanlan init
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`init` 是确定性的(零 LLM),已存在的文件不会被覆盖,可安全重复运行。
|
|
71
|
+
|
|
72
|
+
投喂资料、提问、维护:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# 投喂资料 / 提问(需配置模型,经 Agentao 运行时)
|
|
76
|
+
guanlan -C my-wiki ingest path/to/source.md
|
|
77
|
+
guanlan -C my-wiki query "..."
|
|
78
|
+
|
|
79
|
+
# 零-LLM、可离线运行的确定性工具
|
|
80
|
+
guanlan -C my-wiki check # frontmatter / 断链 / 来源校验
|
|
81
|
+
guanlan -C my-wiki health # stub 页面 + index↔disk 同步(--strict → exit 6)
|
|
82
|
+
guanlan -C my-wiki lint # 孤儿页 / 断链 / 缺失实体
|
|
83
|
+
guanlan -C my-wiki graph # 写出 graph/graph.json + graph.html(--json-only 跳过 html)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
可选 Web 宿主(叠加层,需先装 `guanlan[web]`):
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
pip install 'guanlan[web]' # 装可选依赖(fastapi / uvicorn / markdown)
|
|
90
|
+
guanlan -C my-wiki web # 起本地 Web 宿主,仅监听 127.0.0.1,默认开浏览器
|
|
91
|
+
guanlan -C my-wiki web --port 9000 --no-browser # 换端口 / 不开浏览器
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
浏览器里可:浏览 wiki 并跟随 `[[wikilink]]` 导航、跑 check·health·lint 看报告、看 graph、
|
|
95
|
+
从 `raw/` 选一篇触发 ingest(轮询结果)、与 agent 只读多轮对话(token 流式)。
|
|
96
|
+
**仅供本机单用户**——绝不要把该端口暴露到网络。
|
|
97
|
+
|
|
98
|
+
生成结构:
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
my-wiki/
|
|
102
|
+
├── AGENTAO.md # Agent 行为约束 + 指针
|
|
103
|
+
├── SCHEMA.md # 本库 Schema:领域 / 启用页面类型 / 自定义规则
|
|
104
|
+
├── raw/ # 原始资料(只读,事实来源)
|
|
105
|
+
└── wiki/ # Agent 全权生成的知识层
|
|
106
|
+
├── index.md # 全量页面目录
|
|
107
|
+
├── log.md # append-only 时间线
|
|
108
|
+
└── overview.md # 跨资料活体综述
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## 开发
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
uv run guanlan init /tmp/demo # 跑 CLI
|
|
115
|
+
uv run pytest # 跑测试
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
维护引擎是 `skills/guanlan-wiki/`(`SKILL.md` + `references/conventions.md` + 脚本),
|
|
119
|
+
开发期命中 Agentao 的 repo-root skill 发现路径(`<工作目录>/skills/`),免安装。
|
|
120
|
+
|
|
121
|
+
## 许可证
|
|
122
|
+
|
|
123
|
+
[Apache License 2.0](LICENSE)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# 观澜 (GuānLán)
|
|
2
|
+
|
|
3
|
+
> 《孟子·尽心上》"观水有术,必观其澜"——在信息的汪洋中洞察脉络与趋势。
|
|
4
|
+
|
|
5
|
+
观澜是 [Karpathy LLM Wiki 模式](https://gist.github.com/karpathy/442a6bf555914893e9891c11519de94f) 的一个实现:让 Agent **增量地构建并持续维护一个结构化、互相链接的知识 wiki**,而不是每次提问都从原始文档临时检索(传统 RAG)。知识被"编译"一次后持续保鲜,随每篇新资料、每次提问而复利增长。
|
|
6
|
+
|
|
7
|
+
- **markdown 始终是唯一事实来源**——整个知识库就是一组本地 markdown 文件,任何索引/图谱/缓存都是可幂等重建的派生物。
|
|
8
|
+
- **Agent 全权拥有 wiki 层,人不直接写**——人负责投喂资料、提问、给方向;摘要、交叉引用、归档全交给 Agent。
|
|
9
|
+
- **`raw/` 只读不可变**——Agent 只读原始资料,永不修改,保证事实可追溯。
|
|
10
|
+
- **确定性优先**——结构检查、断链、frontmatter 校验走脚本(零 LLM);需 LLM 的 ingest/query 统一经 Agentao 运行时治理。
|
|
11
|
+
|
|
12
|
+
完整设计见 [`docs/DESIGN.md`](docs/DESIGN.md)。
|
|
13
|
+
|
|
14
|
+
## 状态
|
|
15
|
+
|
|
16
|
+
🚀 **P4(Web 宿主,可选叠加层)** —— 在 P2 最小闭环(`guanlan init` / `ingest` / `query` / `check` / `install-skill`)与 P3 维护工具(`health` / `lint` / `graph`)之上,新增一个**可选**的本地 Web 宿主,把上述命令搬进浏览器:
|
|
17
|
+
|
|
18
|
+
- `guanlan web` —— 起一个仅监听 `127.0.0.1` 的本地 Web 宿主(需 `pip install 'guanlan[web]'`)。浏览 wiki(`[[wikilink]]` 可点击导航)/ 跑 check·health·lint 看报告 / 看 graph / 从 `raw/` 选一篇触发 ingest(单 worker 串行,轮询结果)/ 与 agent **只读多轮对话**(token 流式)。
|
|
19
|
+
- 它是 **MVP 之后的可选叠加层**:不装 `guanlan[web]`、不起 `guanlan web`,整套东西照旧用 CLI 跑通。markdown 仍是唯一事实来源,Web 只是 ingest 与问答的另一个入口、wiki 的只读浏览器。
|
|
20
|
+
- **读写分线**:唯一写作业 `ingest` 复用 P2 子进程 + 单写者门禁;所有问答(一次性单轮 + 多轮)走只读进程内嵌入 `Agentao`(默认只读、不过门禁、仅内存)。
|
|
21
|
+
|
|
22
|
+
P3 三个零-LLM 维护工具(advisory):
|
|
23
|
+
|
|
24
|
+
- `guanlan health` —— stub 页面 + index↔disk 同步(`--strict` → 退出码 6)。
|
|
25
|
+
- `guanlan lint` —— 孤儿页 / 断链 / 缺失实体。
|
|
26
|
+
- `guanlan graph` —— 确定性 `[[wikilink]]` 图谱 → `graph/graph.json` + 自包含 `graph/graph.html`(`--json-only` 跳过 html)。
|
|
27
|
+
|
|
28
|
+
**P3.1 别名解析(零-LLM 增强)** —— entity/concept 页可在 frontmatter 声明可选 `aliases`,让别名进入 `[[wikilink]]` 解析命名空间(与页名同口径、大小写不敏感):`[[大模型]]` / `[[LLM]]` 都解析到声明它们的页,**消假断链**(check / lint / graph / Web 一致)、**补 CJK 同义召回**(别名纳入 query 2-gram 与 ingest 去重)。别名全局唯一由 `check` 确定性校验(撞页名 / 重复 → 阻断写门禁)。这不是新里程碑,P5 仍是多格式与自动化。细化见 [`docs/P3.1-别名解析.md`](docs/P3.1-别名解析.md)。
|
|
29
|
+
|
|
30
|
+
Web 端写 `raw/`、`query --backfill`、可写多轮工作会话、会话落盘、多格式 ingest 留待 P4 之后(见 DESIGN §8 与 `docs/P4-Web宿主.md` §10)。别名自动物化建页(`heal`)、同义词表、向量检索按需驱动、另开方案。
|
|
31
|
+
|
|
32
|
+
## 快速开始
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# 在空目录初始化一个知识库(生成 AGENTAO.md / SCHEMA.md / raw/ / wiki/)
|
|
36
|
+
guanlan init my-wiki
|
|
37
|
+
|
|
38
|
+
# 或就地初始化当前目录
|
|
39
|
+
guanlan init
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
`init` 是确定性的(零 LLM),已存在的文件不会被覆盖,可安全重复运行。
|
|
43
|
+
|
|
44
|
+
投喂资料、提问、维护:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# 投喂资料 / 提问(需配置模型,经 Agentao 运行时)
|
|
48
|
+
guanlan -C my-wiki ingest path/to/source.md
|
|
49
|
+
guanlan -C my-wiki query "..."
|
|
50
|
+
|
|
51
|
+
# 零-LLM、可离线运行的确定性工具
|
|
52
|
+
guanlan -C my-wiki check # frontmatter / 断链 / 来源校验
|
|
53
|
+
guanlan -C my-wiki health # stub 页面 + index↔disk 同步(--strict → exit 6)
|
|
54
|
+
guanlan -C my-wiki lint # 孤儿页 / 断链 / 缺失实体
|
|
55
|
+
guanlan -C my-wiki graph # 写出 graph/graph.json + graph.html(--json-only 跳过 html)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
可选 Web 宿主(叠加层,需先装 `guanlan[web]`):
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install 'guanlan[web]' # 装可选依赖(fastapi / uvicorn / markdown)
|
|
62
|
+
guanlan -C my-wiki web # 起本地 Web 宿主,仅监听 127.0.0.1,默认开浏览器
|
|
63
|
+
guanlan -C my-wiki web --port 9000 --no-browser # 换端口 / 不开浏览器
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
浏览器里可:浏览 wiki 并跟随 `[[wikilink]]` 导航、跑 check·health·lint 看报告、看 graph、
|
|
67
|
+
从 `raw/` 选一篇触发 ingest(轮询结果)、与 agent 只读多轮对话(token 流式)。
|
|
68
|
+
**仅供本机单用户**——绝不要把该端口暴露到网络。
|
|
69
|
+
|
|
70
|
+
生成结构:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
my-wiki/
|
|
74
|
+
├── AGENTAO.md # Agent 行为约束 + 指针
|
|
75
|
+
├── SCHEMA.md # 本库 Schema:领域 / 启用页面类型 / 自定义规则
|
|
76
|
+
├── raw/ # 原始资料(只读,事实来源)
|
|
77
|
+
└── wiki/ # Agent 全权生成的知识层
|
|
78
|
+
├── index.md # 全量页面目录
|
|
79
|
+
├── log.md # append-only 时间线
|
|
80
|
+
└── overview.md # 跨资料活体综述
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## 开发
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
uv run guanlan init /tmp/demo # 跑 CLI
|
|
87
|
+
uv run pytest # 跑测试
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
维护引擎是 `skills/guanlan-wiki/`(`SKILL.md` + `references/conventions.md` + 脚本),
|
|
91
|
+
开发期命中 Agentao 的 repo-root skill 发现路径(`<工作目录>/skills/`),免安装。
|
|
92
|
+
|
|
93
|
+
## 许可证
|
|
94
|
+
|
|
95
|
+
[Apache License 2.0](LICENSE)
|