openkb 0.1.4.dev0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openkb-0.2.0/.claude-plugin/marketplace.json +31 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/.github/workflows/publish.yml +14 -17
- {openkb-0.1.4.dev0 → openkb-0.2.0}/PKG-INFO +34 -3
- {openkb-0.1.4.dev0 → openkb-0.2.0}/README.md +32 -2
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/chat.py +0 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/compiler.py +177 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/linter.py +1 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/query.py +1 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/tools.py +5 -6
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/cli.py +595 -11
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/converter.py +1 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/state.py +9 -0
- openkb-0.2.0/openkb/url_ingest.py +280 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/pyproject.toml +6 -2
- openkb-0.2.0/skills/openkb/SKILL.md +175 -0
- openkb-0.2.0/skills/openkb/references/commands.md +69 -0
- openkb-0.2.0/skills/openkb/references/wiki-schema.md +119 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_add_command.py +4 -6
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_agent_tools.py +0 -2
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_cli.py +117 -4
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_compiler.py +0 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_config.py +0 -2
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_converter.py +1 -4
- openkb-0.2.0/tests/test_feedback.py +241 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_images.py +0 -3
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_indexer.py +0 -2
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_lint.py +2 -3
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_linter.py +0 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_list_status.py +19 -1
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_query.py +0 -1
- openkb-0.2.0/tests/test_remove.py +986 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_state.py +0 -2
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_tree_renderer.py +0 -1
- openkb-0.2.0/tests/test_url_ingest.py +621 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_watcher.py +1 -2
- {openkb-0.1.4.dev0 → openkb-0.2.0}/.env.example +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/.gitignore +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/LICENSE +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/config.yaml.example +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/__init__.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/__main__.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/__init__.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/_markdown.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/agent/chat_session.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/config.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/images.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/indexer.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/lint.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/log.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/schema.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/tree_renderer.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/openkb/watcher.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/conftest.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_chat_session.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_chat_slash_commands.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_lint_cli.py +0 -0
- {openkb-0.1.4.dev0 → openkb-0.2.0}/tests/test_markdown_renderer.py +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vectify",
|
|
3
|
+
"owner": {
|
|
4
|
+
"name": "Ray",
|
|
5
|
+
"email": "ray@vectify.ai"
|
|
6
|
+
},
|
|
7
|
+
"metadata": {
|
|
8
|
+
"description": "Skills for navigating an OpenKB-compiled knowledge base from agent CLIs (Claude Code, Codex, Gemini CLI).",
|
|
9
|
+
"version": "0.1.4"
|
|
10
|
+
},
|
|
11
|
+
"plugins": [
|
|
12
|
+
{
|
|
13
|
+
"name": "openkb",
|
|
14
|
+
"description": "Navigate an OpenKB-compiled wiki: discover documents and concepts via openkb CLI commands, read concept and summary pages directly, and follow wikilinks across the knowledge graph.",
|
|
15
|
+
"source": "./",
|
|
16
|
+
"strict": false,
|
|
17
|
+
"version": "0.1.4",
|
|
18
|
+
"author": {
|
|
19
|
+
"name": "Ray",
|
|
20
|
+
"email": "ray@vectify.ai"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://github.com/VectifyAI/OpenKB",
|
|
23
|
+
"repository": "https://github.com/VectifyAI/OpenKB",
|
|
24
|
+
"license": "Apache-2.0",
|
|
25
|
+
"keywords": ["knowledge-base", "wiki", "openkb", "rag", "agent-skill"],
|
|
26
|
+
"skills": [
|
|
27
|
+
"./skills/openkb"
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
name: Publish to PyPI
|
|
2
2
|
|
|
3
3
|
# Release flow:
|
|
4
|
-
# 1.
|
|
5
|
-
# 2.
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
4
|
+
# 1. `git tag -a vX.Y.Z -m "Release X.Y.Z" && git push origin vX.Y.Z`
|
|
5
|
+
# 2. This workflow builds the package (hatch-vcs derives the version from
|
|
6
|
+
# the tag automatically — pyproject.toml has no static version field),
|
|
7
|
+
# publishes to PyPI via OIDC trusted publishing, and creates a GitHub
|
|
8
|
+
# Release with auto-generated notes.
|
|
9
|
+
#
|
|
10
|
+
# Tag must follow PEP 440: `v0.1.4`, `v0.2.0rc1`, `v0.1.4.dev0`. The
|
|
11
|
+
# leading `v` is stripped by hatch-vcs when computing the package version.
|
|
9
12
|
#
|
|
10
13
|
# Do not run `python -m build && twine upload` locally — that bypasses the
|
|
11
|
-
#
|
|
12
|
-
# version uploads, so if the workflow
|
|
13
|
-
# manually create the missing GitHub
|
|
14
|
+
# GitHub Release creation and produces a release without an attached
|
|
15
|
+
# changelog. PyPI rejects duplicate version uploads, so if the workflow
|
|
16
|
+
# fails after PyPI publish succeeded, manually create the missing GitHub
|
|
17
|
+
# Release with `gh release create vX.Y.Z`.
|
|
14
18
|
|
|
15
19
|
on:
|
|
16
20
|
push:
|
|
@@ -26,20 +30,13 @@ jobs:
|
|
|
26
30
|
contents: write # Create GitHub Release
|
|
27
31
|
steps:
|
|
28
32
|
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.2.2
|
|
33
|
+
with:
|
|
34
|
+
fetch-depth: 0 # hatch-vcs needs full history + tags
|
|
29
35
|
|
|
30
36
|
- uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
|
|
31
37
|
with:
|
|
32
38
|
python-version: "3.12"
|
|
33
39
|
|
|
34
|
-
- name: Verify tag matches pyproject.toml version
|
|
35
|
-
run: |
|
|
36
|
-
tag="${GITHUB_REF_NAME#v}"
|
|
37
|
-
pkg_version=$(python -c "import tomllib,pathlib; print(tomllib.loads(pathlib.Path('pyproject.toml').read_text())['project']['version'])")
|
|
38
|
-
if [ "$tag" != "$pkg_version" ]; then
|
|
39
|
-
echo "::error::Tag v$tag does not match pyproject.toml version $pkg_version"
|
|
40
|
-
exit 1
|
|
41
|
-
fi
|
|
42
|
-
|
|
43
40
|
- name: Install build tools
|
|
44
41
|
run: pip install build
|
|
45
42
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openkb
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: OpenKB: Open LLM Knowledge Base, powered by PageIndex
|
|
5
5
|
Project-URL: Repository, https://github.com/VectifyAI/OpenKB
|
|
6
6
|
Project-URL: Homepage, https://github.com/VectifyAI/OpenKB
|
|
@@ -29,6 +29,7 @@ Requires-Dist: prompt-toolkit>=3.0
|
|
|
29
29
|
Requires-Dist: python-dotenv
|
|
30
30
|
Requires-Dist: pyyaml
|
|
31
31
|
Requires-Dist: rich>=13.0
|
|
32
|
+
Requires-Dist: trafilatura>=2.0
|
|
32
33
|
Requires-Dist: watchdog>=3.0
|
|
33
34
|
Provides-Extra: dev
|
|
34
35
|
Requires-Dist: pytest; extra == 'dev'
|
|
@@ -109,7 +110,8 @@ openkb init
|
|
|
109
110
|
|
|
110
111
|
# 3. Add documents
|
|
111
112
|
openkb add paper.pdf
|
|
112
|
-
openkb add ~/papers/
|
|
113
|
+
openkb add ~/papers/ # Add a whole directory
|
|
114
|
+
openkb add https://arxiv.org/pdf/2509.11420 # Or fetch from a URL
|
|
113
115
|
|
|
114
116
|
# 4. Ask a question
|
|
115
117
|
openkb query "What are the main findings?"
|
|
@@ -185,13 +187,15 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
|
|
|
185
187
|
| Command | Description |
|
|
186
188
|
|---|---|
|
|
187
189
|
| `openkb init` | Initialize a new knowledge base (interactive) |
|
|
188
|
-
| <code>openkb add <
|
|
190
|
+
| <code>openkb add <file_or_dir_or_URL></code> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) |
|
|
191
|
+
| <code>openkb remove <doc></code> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) |
|
|
189
192
|
| <code>openkb query "question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
|
|
190
193
|
| `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
|
|
191
194
|
| `openkb watch` | Watch `raw/` and auto-compile new files |
|
|
192
195
|
| `openkb lint` | Run structural + knowledge health checks |
|
|
193
196
|
| `openkb list` | List indexed documents and concepts |
|
|
194
197
|
| `openkb status` | Show knowledge base stats |
|
|
198
|
+
| <code>openkb feedback ["msg"]</code> | File feedback by opening a prefilled GitHub issue (use `--type bug/feature/question` to tag the issue) |
|
|
195
199
|
|
|
196
200
|
<!-- | `openkb lint --fix` | Auto-fix what it can | -->
|
|
197
201
|
|
|
@@ -272,6 +276,33 @@ OpenKB's wiki is a directory of Markdown files with `[[wikilinks]]`. Obsidian re
|
|
|
272
276
|
3. Use graph view to see knowledge connections
|
|
273
277
|
4. Use Obsidian Web Clipper to add web articles to `raw/`
|
|
274
278
|
|
|
279
|
+
### Using with Claude Code / Codex / Gemini CLI
|
|
280
|
+
|
|
281
|
+
OpenKB ships a `SKILL.md` so any agent CLI can read your compiled wiki — no extra runtime, no MCP setup, just install the skill once.
|
|
282
|
+
|
|
283
|
+
**Claude Code**:
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
/plugin marketplace add VectifyAI/OpenKB
|
|
287
|
+
/plugin install openkb@vectify
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
**Gemini CLI**:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
gemini skills install https://github.com/VectifyAI/OpenKB.git --path skills/openkb --consent
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
**OpenAI Codex CLI** (no marketplace command yet — manual symlink):
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
git clone https://github.com/VectifyAI/OpenKB.git ~/openkb-src
|
|
300
|
+
mkdir -p ~/.agents/skills
|
|
301
|
+
ln -s ~/openkb-src/skills/openkb ~/.agents/skills/openkb
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
The skill is read-only — it won't run `openkb add`, `remove`, or `lint --fix` without you asking. See [`skills/openkb/SKILL.md`](skills/openkb/SKILL.md) for the full instruction set.
|
|
305
|
+
|
|
275
306
|
# 🧭 Learn More
|
|
276
307
|
|
|
277
308
|
### Compared to Karpathy's Approach
|
|
@@ -72,7 +72,8 @@ openkb init
|
|
|
72
72
|
|
|
73
73
|
# 3. Add documents
|
|
74
74
|
openkb add paper.pdf
|
|
75
|
-
openkb add ~/papers/
|
|
75
|
+
openkb add ~/papers/ # Add a whole directory
|
|
76
|
+
openkb add https://arxiv.org/pdf/2509.11420 # Or fetch from a URL
|
|
76
77
|
|
|
77
78
|
# 4. Ask a question
|
|
78
79
|
openkb query "What are the main findings?"
|
|
@@ -148,13 +149,15 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
|
|
|
148
149
|
| Command | Description |
|
|
149
150
|
|---|---|
|
|
150
151
|
| `openkb init` | Initialize a new knowledge base (interactive) |
|
|
151
|
-
| <code>openkb add <
|
|
152
|
+
| <code>openkb add <file_or_dir_or_URL></code> | Add documents and compile to wiki. URL ingest auto-detects PDF (saved as `.pdf` → PageIndex / markitdown) vs HTML (trafilatura main-content extract → `.md`) |
|
|
153
|
+
| <code>openkb remove <doc></code> | Remove a document and clean up its wiki pages, images, registry, and PageIndex state (use `--dry-run` to preview, `--keep-raw` / `--keep-empty-concepts` to retain artifacts) |
|
|
152
154
|
| <code>openkb query "question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
|
|
153
155
|
| `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
|
|
154
156
|
| `openkb watch` | Watch `raw/` and auto-compile new files |
|
|
155
157
|
| `openkb lint` | Run structural + knowledge health checks |
|
|
156
158
|
| `openkb list` | List indexed documents and concepts |
|
|
157
159
|
| `openkb status` | Show knowledge base stats |
|
|
160
|
+
| <code>openkb feedback ["msg"]</code> | File feedback by opening a prefilled GitHub issue (use `--type bug/feature/question` to tag the issue) |
|
|
158
161
|
|
|
159
162
|
<!-- | `openkb lint --fix` | Auto-fix what it can | -->
|
|
160
163
|
|
|
@@ -235,6 +238,33 @@ OpenKB's wiki is a directory of Markdown files with `[[wikilinks]]`. Obsidian re
|
|
|
235
238
|
3. Use graph view to see knowledge connections
|
|
236
239
|
4. Use Obsidian Web Clipper to add web articles to `raw/`
|
|
237
240
|
|
|
241
|
+
### Using with Claude Code / Codex / Gemini CLI
|
|
242
|
+
|
|
243
|
+
OpenKB ships a `SKILL.md` so any agent CLI can read your compiled wiki — no extra runtime, no MCP setup, just install the skill once.
|
|
244
|
+
|
|
245
|
+
**Claude Code**:
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
/plugin marketplace add VectifyAI/OpenKB
|
|
249
|
+
/plugin install openkb@vectify
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
**Gemini CLI**:
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
gemini skills install https://github.com/VectifyAI/OpenKB.git --path skills/openkb --consent
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
**OpenAI Codex CLI** (no marketplace command yet — manual symlink):
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
git clone https://github.com/VectifyAI/OpenKB.git ~/openkb-src
|
|
262
|
+
mkdir -p ~/.agents/skills
|
|
263
|
+
ln -s ~/openkb-src/skills/openkb ~/.agents/skills/openkb
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
The skill is read-only — it won't run `openkb add`, `remove`, or `lint --fix` without you asking. See [`skills/openkb/SKILL.md`](skills/openkb/SKILL.md) for the full instruction set.
|
|
267
|
+
|
|
238
268
|
# 🧭 Learn More
|
|
239
269
|
|
|
240
270
|
### Compared to Karpathy's Approach
|
|
@@ -445,6 +445,28 @@ def _insert_section_entry(lines: list[str], heading: str, entry: str) -> bool:
|
|
|
445
445
|
return True
|
|
446
446
|
|
|
447
447
|
|
|
448
|
+
def _remove_section_entry(lines: list[str], heading: str, link: str) -> bool:
|
|
449
|
+
"""Remove the first entry whose line starts with ``- {link}`` in the named
|
|
450
|
+
section. Returns True if an entry was removed.
|
|
451
|
+
|
|
452
|
+
Matching is intentionally strict (prefix-only, matching the canonical
|
|
453
|
+
bullet form written by ``_insert_section_entry`` and friends). An earlier
|
|
454
|
+
substring fallback could wrongly delete sibling bullets whose brief text
|
|
455
|
+
referenced the removed link.
|
|
456
|
+
"""
|
|
457
|
+
bounds = _get_section_bounds(lines, heading)
|
|
458
|
+
if bounds is None:
|
|
459
|
+
return False
|
|
460
|
+
|
|
461
|
+
start, end = bounds
|
|
462
|
+
entry_prefix = f"- {link}"
|
|
463
|
+
for i in range(start, end):
|
|
464
|
+
if lines[i].startswith(entry_prefix):
|
|
465
|
+
del lines[i]
|
|
466
|
+
return True
|
|
467
|
+
return False
|
|
468
|
+
|
|
469
|
+
|
|
448
470
|
|
|
449
471
|
def _write_summary(wiki_dir: Path, doc_name: str, summary: str,
|
|
450
472
|
doc_type: str = "short") -> None:
|
|
@@ -562,6 +584,44 @@ def _prepend_source_to_frontmatter(text: str, source_file: str) -> str:
|
|
|
562
584
|
return "\n".join(fm_lines) + body
|
|
563
585
|
|
|
564
586
|
|
|
587
|
+
def _remove_source_from_frontmatter(text: str, source_file: str) -> tuple[str, bool]:
|
|
588
|
+
"""Remove ``source_file`` from the inline ``sources:`` list in YAML frontmatter.
|
|
589
|
+
|
|
590
|
+
Returns ``(rewritten_text, sources_now_empty)``. ``sources_now_empty`` is
|
|
591
|
+
True when ``source_file`` was the only remaining item in the list (callers
|
|
592
|
+
can use this to decide whether to delete the page entirely).
|
|
593
|
+
|
|
594
|
+
If the frontmatter is missing, malformed, has no ``sources:`` line, or
|
|
595
|
+
the source is not present in the list, returns ``(text, False)``.
|
|
596
|
+
"""
|
|
597
|
+
if not text.startswith("---"):
|
|
598
|
+
return text, False
|
|
599
|
+
|
|
600
|
+
fm_end = text.find("---", 3)
|
|
601
|
+
if fm_end == -1:
|
|
602
|
+
return text, False
|
|
603
|
+
|
|
604
|
+
fm_block = text[:fm_end]
|
|
605
|
+
body = text[fm_end:]
|
|
606
|
+
fm_lines = fm_block.split("\n")
|
|
607
|
+
|
|
608
|
+
for i, line in enumerate(fm_lines):
|
|
609
|
+
if not line.lstrip().startswith("sources:"):
|
|
610
|
+
continue
|
|
611
|
+
lb = line.find("[")
|
|
612
|
+
rb = line.rfind("]")
|
|
613
|
+
if lb == -1 or rb == -1 or rb < lb:
|
|
614
|
+
return text, False
|
|
615
|
+
items = [s.strip() for s in line[lb + 1:rb].split(",") if s.strip()]
|
|
616
|
+
if source_file not in items:
|
|
617
|
+
return text, False
|
|
618
|
+
items.remove(source_file)
|
|
619
|
+
fm_lines[i] = f"sources: [{', '.join(items)}]"
|
|
620
|
+
return "\n".join(fm_lines) + body, len(items) == 0
|
|
621
|
+
|
|
622
|
+
return text, False
|
|
623
|
+
|
|
624
|
+
|
|
565
625
|
def _add_related_link(wiki_dir: Path, concept_slug: str, doc_name: str, source_file: str) -> None:
|
|
566
626
|
"""Add a cross-reference link to an existing concept page (no LLM call)."""
|
|
567
627
|
concepts_dir = wiki_dir / "concepts"
|
|
@@ -631,6 +691,123 @@ def _backlink_concepts(wiki_dir: Path, doc_name: str, concept_slugs: list[str])
|
|
|
631
691
|
_insert_section_entry(lines, "## Related Documents", f"- {link}")
|
|
632
692
|
path.write_text("\n".join(lines), encoding="utf-8")
|
|
633
693
|
|
|
694
|
+
|
|
695
|
+
def remove_doc_from_concept_pages(
|
|
696
|
+
wiki_dir: Path,
|
|
697
|
+
doc_name: str,
|
|
698
|
+
*,
|
|
699
|
+
keep_empty: bool = False,
|
|
700
|
+
) -> dict[str, list[str]]:
|
|
701
|
+
"""Update or delete concept pages affected by removing a document.
|
|
702
|
+
|
|
703
|
+
For each ``concepts/*.md`` whose frontmatter ``sources:`` lists
|
|
704
|
+
``summaries/{doc_name}``:
|
|
705
|
+
|
|
706
|
+
- Remove that source from the frontmatter list.
|
|
707
|
+
- Remove any ``- [[summaries/{doc_name}]]`` entries from the
|
|
708
|
+
``## Related Documents`` section.
|
|
709
|
+
- Remove any standalone ``See also: [[summaries/{doc_name}]]`` lines
|
|
710
|
+
(left by ``_add_related_link``).
|
|
711
|
+
- If the ``sources:`` list becomes empty AND ``keep_empty`` is False,
|
|
712
|
+
delete the concept page entirely.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
wiki_dir: Path to the wiki root directory.
|
|
716
|
+
doc_name: The summary slug being removed (e.g.
|
|
717
|
+
``"attention-is-all-you-need"``).
|
|
718
|
+
keep_empty: When True, retains concept pages whose only source
|
|
719
|
+
was the removed doc — leaves their frontmatter with an empty
|
|
720
|
+
``sources: []`` list. Useful when the doc is being replaced
|
|
721
|
+
by a newer version that will repopulate the source on the
|
|
722
|
+
next ``openkb add``.
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
``{"modified": [slugs...], "deleted": [slugs...]}`` — concept
|
|
726
|
+
slugs whose pages were edited vs. deleted.
|
|
727
|
+
"""
|
|
728
|
+
concepts_dir = wiki_dir / "concepts"
|
|
729
|
+
if not concepts_dir.is_dir():
|
|
730
|
+
return {"modified": [], "deleted": []}
|
|
731
|
+
|
|
732
|
+
source_file = f"summaries/{doc_name}.md"
|
|
733
|
+
bare_source = f"summaries/{doc_name}"
|
|
734
|
+
link = f"[[{bare_source}]]"
|
|
735
|
+
|
|
736
|
+
modified: list[str] = []
|
|
737
|
+
deleted: list[str] = []
|
|
738
|
+
|
|
739
|
+
for path in sorted(concepts_dir.glob("*.md")):
|
|
740
|
+
text = path.read_text(encoding="utf-8")
|
|
741
|
+
# Cheap filter: skip pages that don't reference the doc at all.
|
|
742
|
+
if source_file not in text and bare_source not in text:
|
|
743
|
+
continue
|
|
744
|
+
|
|
745
|
+
new_text, sources_empty = _remove_source_from_frontmatter(text, source_file)
|
|
746
|
+
|
|
747
|
+
# Drop the doc's entry from the "## Related Documents" section.
|
|
748
|
+
if link in new_text:
|
|
749
|
+
lines = new_text.split("\n")
|
|
750
|
+
while _remove_section_entry(lines, "## Related Documents", link):
|
|
751
|
+
pass
|
|
752
|
+
new_text = "\n".join(lines)
|
|
753
|
+
|
|
754
|
+
# Drop standalone "See also: [[summaries/{doc_name}]]" lines.
|
|
755
|
+
# The dominant form (written by ``_add_related_link``) is a
|
|
756
|
+
# paragraph: preceded by a blank line and trailed by either a
|
|
757
|
+
# newline or end-of-string. The first regex matches that shape
|
|
758
|
+
# exactly, preserving one trailing newline so paragraph spacing
|
|
759
|
+
# in surrounding content survives.
|
|
760
|
+
new_text = re.sub(
|
|
761
|
+
rf"\n\n[ \t]*See also:[ \t]*\[\[{re.escape(bare_source)}\]\][ \t]*(\n|\Z)",
|
|
762
|
+
r"\1",
|
|
763
|
+
new_text,
|
|
764
|
+
)
|
|
765
|
+
# Fallback for hand-edited inline "See also:" lines that lack the
|
|
766
|
+
# paragraph-break separator above. Bounded to a single line via
|
|
767
|
+
# `[ \t]` and an optional trailing newline.
|
|
768
|
+
new_text = re.sub(
|
|
769
|
+
rf"^[ \t]*See also:[ \t]*\[\[{re.escape(bare_source)}\]\][ \t]*\n?",
|
|
770
|
+
"",
|
|
771
|
+
new_text,
|
|
772
|
+
flags=re.MULTILINE,
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
if sources_empty and not keep_empty:
|
|
776
|
+
path.unlink()
|
|
777
|
+
deleted.append(path.stem)
|
|
778
|
+
elif new_text != text:
|
|
779
|
+
path.write_text(new_text, encoding="utf-8")
|
|
780
|
+
modified.append(path.stem)
|
|
781
|
+
|
|
782
|
+
return {"modified": modified, "deleted": deleted}
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
def remove_doc_from_index(wiki_dir: Path, doc_name: str, concept_slugs_deleted: list[str]) -> None:
|
|
786
|
+
"""Remove the document's entry from ``index.md`` along with any concept
|
|
787
|
+
entries for concepts that were deleted as a side effect.
|
|
788
|
+
|
|
789
|
+
No-op when ``index.md`` doesn't exist. Section headings are kept even
|
|
790
|
+
when their last entry is removed — adding a new doc later repopulates
|
|
791
|
+
them.
|
|
792
|
+
"""
|
|
793
|
+
index_path = wiki_dir / "index.md"
|
|
794
|
+
if not index_path.exists():
|
|
795
|
+
return
|
|
796
|
+
|
|
797
|
+
lines = index_path.read_text(encoding="utf-8").split("\n")
|
|
798
|
+
|
|
799
|
+
doc_link = f"[[summaries/{doc_name}]]"
|
|
800
|
+
while _remove_section_entry(lines, "## Documents", doc_link):
|
|
801
|
+
pass
|
|
802
|
+
|
|
803
|
+
for slug in concept_slugs_deleted:
|
|
804
|
+
concept_link = f"[[concepts/{slug}]]"
|
|
805
|
+
while _remove_section_entry(lines, "## Concepts", concept_link):
|
|
806
|
+
pass
|
|
807
|
+
|
|
808
|
+
index_path.write_text("\n".join(lines), encoding="utf-8")
|
|
809
|
+
|
|
810
|
+
|
|
634
811
|
def _update_index(
|
|
635
812
|
wiki_dir: Path, doc_name: str, concept_names: list[str],
|
|
636
813
|
doc_brief: str = "", concept_briefs: dict[str, str] | None = None,
|
|
@@ -8,7 +8,7 @@ from agents import Agent, Runner, function_tool
|
|
|
8
8
|
from openkb.agent.tools import list_wiki_files, read_wiki_file
|
|
9
9
|
|
|
10
10
|
MAX_TURNS = 50
|
|
11
|
-
from openkb.schema import
|
|
11
|
+
from openkb.schema import get_agents_md
|
|
12
12
|
|
|
13
13
|
_LINTER_INSTRUCTIONS_TEMPLATE = """\
|
|
14
14
|
You are OpenKB's semantic lint agent. Your job is to audit the wiki
|
|
@@ -113,7 +113,7 @@ async def run_query(
|
|
|
113
113
|
The agent's final answer as a string.
|
|
114
114
|
"""
|
|
115
115
|
import sys
|
|
116
|
-
from agents import RawResponsesStreamEvent, RunItemStreamEvent
|
|
116
|
+
from agents import RawResponsesStreamEvent, RunItemStreamEvent
|
|
117
117
|
from openai.types.responses import ResponseTextDeltaEvent
|
|
118
118
|
from openkb.config import load_config
|
|
119
119
|
|
|
@@ -6,6 +6,7 @@ tested in isolation without requiring the openai-agents runtime.
|
|
|
6
6
|
"""
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import contextlib
|
|
9
10
|
import json as _json
|
|
10
11
|
from pathlib import Path
|
|
11
12
|
|
|
@@ -71,7 +72,9 @@ def parse_pages(pages: str) -> list[int]:
|
|
|
71
72
|
segments = part.split("-")
|
|
72
73
|
# Re-join to handle leading negatives: segments[0] may be empty
|
|
73
74
|
# if part starts with "-". We just try to parse start/end.
|
|
74
|
-
|
|
75
|
+
# Silently skip malformed segments — parse_pages is a tolerant
|
|
76
|
+
# parser by design (user-supplied page specs may contain typos).
|
|
77
|
+
with contextlib.suppress(ValueError):
|
|
75
78
|
if len(segments) == 2:
|
|
76
79
|
start, end = int(segments[0]), int(segments[1])
|
|
77
80
|
result.update(range(start, end + 1))
|
|
@@ -79,13 +82,9 @@ def parse_pages(pages: str) -> list[int]:
|
|
|
79
82
|
# e.g. "-1" split gives ['', '1']
|
|
80
83
|
result.add(-int(segments[1]))
|
|
81
84
|
# More complex cases (e.g. negative range) are ignored.
|
|
82
|
-
except ValueError:
|
|
83
|
-
pass
|
|
84
85
|
else:
|
|
85
|
-
|
|
86
|
+
with contextlib.suppress(ValueError):
|
|
86
87
|
result.add(int(part))
|
|
87
|
-
except ValueError:
|
|
88
|
-
pass
|
|
89
88
|
return sorted(n for n in result if n > 0)
|
|
90
89
|
|
|
91
90
|
|