openkb 0.1.2__tar.gz → 0.1.4.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openkb-0.1.4.dev0/.env.example +5 -0
- openkb-0.1.4.dev0/.github/workflows/publish.yml +58 -0
- openkb-0.1.4.dev0/.gitignore +19 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/PKG-INFO +61 -33
- {openkb-0.1.2 → openkb-0.1.4.dev0}/README.md +44 -18
- openkb-0.1.4.dev0/config.yaml.example +3 -0
- openkb-0.1.4.dev0/openkb/agent/_markdown.py +371 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/chat.py +276 -20
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/compiler.py +358 -73
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/query.py +98 -21
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/cli.py +98 -29
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/lint.py +168 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/pyproject.toml +31 -32
- openkb-0.1.4.dev0/tests/conftest.py +80 -0
- openkb-0.1.4.dev0/tests/test_add_command.py +151 -0
- openkb-0.1.4.dev0/tests/test_agent_tools.py +215 -0
- openkb-0.1.4.dev0/tests/test_chat_session.py +76 -0
- openkb-0.1.4.dev0/tests/test_chat_slash_commands.py +256 -0
- openkb-0.1.4.dev0/tests/test_cli.py +160 -0
- openkb-0.1.4.dev0/tests/test_compiler.py +1347 -0
- openkb-0.1.4.dev0/tests/test_config.py +49 -0
- openkb-0.1.4.dev0/tests/test_converter.py +133 -0
- openkb-0.1.4.dev0/tests/test_images.py +169 -0
- openkb-0.1.4.dev0/tests/test_indexer.py +127 -0
- openkb-0.1.4.dev0/tests/test_lint.py +395 -0
- openkb-0.1.4.dev0/tests/test_lint_cli.py +75 -0
- openkb-0.1.4.dev0/tests/test_linter.py +85 -0
- openkb-0.1.4.dev0/tests/test_list_status.py +150 -0
- openkb-0.1.4.dev0/tests/test_markdown_renderer.py +38 -0
- openkb-0.1.4.dev0/tests/test_query.py +139 -0
- openkb-0.1.4.dev0/tests/test_state.py +84 -0
- openkb-0.1.4.dev0/tests/test_tree_renderer.py +41 -0
- openkb-0.1.4.dev0/tests/test_watcher.py +126 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/LICENSE +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/__init__.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/__main__.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/__init__.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/chat_session.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/linter.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/tools.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/config.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/converter.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/images.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/indexer.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/log.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/schema.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/state.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/tree_renderer.py +0 -0
- {openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/watcher.py +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Release flow:
|
|
4
|
+
# 1. Bump `version` in pyproject.toml on main.
|
|
5
|
+
# 2. `git tag -a vX.Y.Z -m "Release X.Y.Z" && git push origin vX.Y.Z`
|
|
6
|
+
# The tag MUST match pyproject.toml's version exactly (with the `v` prefix).
|
|
7
|
+
# 3. This workflow builds the package, publishes to PyPI via OIDC trusted
|
|
8
|
+
# publishing, and creates a GitHub Release with auto-generated notes.
|
|
9
|
+
#
|
|
10
|
+
# Do not run `python -m build && twine upload` locally — that bypasses the
|
|
11
|
+
# version check and leaves no GitHub Release. PyPI rejects duplicate
|
|
12
|
+
# version uploads, so if the workflow fails after PyPI publish succeeded,
|
|
13
|
+
# manually create the missing GitHub Release with `gh release create vX.Y.Z`.
|
|
14
|
+
|
|
15
|
+
on:
|
|
16
|
+
push:
|
|
17
|
+
tags:
|
|
18
|
+
- "v*"
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
publish:
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
environment: pypi
|
|
24
|
+
permissions:
|
|
25
|
+
id-token: write # OIDC trusted publishing to PyPI
|
|
26
|
+
contents: write # Create GitHub Release
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.2.2
|
|
29
|
+
|
|
30
|
+
- uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
|
|
31
|
+
with:
|
|
32
|
+
python-version: "3.12"
|
|
33
|
+
|
|
34
|
+
- name: Verify tag matches pyproject.toml version
|
|
35
|
+
run: |
|
|
36
|
+
tag="${GITHUB_REF_NAME#v}"
|
|
37
|
+
pkg_version=$(python -c "import tomllib,pathlib; print(tomllib.loads(pathlib.Path('pyproject.toml').read_text())['project']['version'])")
|
|
38
|
+
if [ "$tag" != "$pkg_version" ]; then
|
|
39
|
+
echo "::error::Tag v$tag does not match pyproject.toml version $pkg_version"
|
|
40
|
+
exit 1
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
- name: Install build tools
|
|
44
|
+
run: pip install build
|
|
45
|
+
|
|
46
|
+
- name: Build package
|
|
47
|
+
run: python -m build
|
|
48
|
+
|
|
49
|
+
- name: Publish to PyPI
|
|
50
|
+
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1.14.0
|
|
51
|
+
|
|
52
|
+
- name: Create GitHub Release
|
|
53
|
+
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
|
|
54
|
+
with:
|
|
55
|
+
tag_name: ${{ github.ref_name }}
|
|
56
|
+
name: ${{ github.ref_name }}
|
|
57
|
+
generate_release_notes: true
|
|
58
|
+
files: dist/*
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: openkb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4.dev0
|
|
4
4
|
Summary: OpenKB: Open LLM Knowledge Base, powered by PageIndex
|
|
5
|
+
Project-URL: Repository, https://github.com/VectifyAI/OpenKB
|
|
6
|
+
Project-URL: Homepage, https://github.com/VectifyAI/OpenKB
|
|
7
|
+
Project-URL: Issues, https://github.com/VectifyAI/OpenKB/issues
|
|
8
|
+
Author-email: Kylin <quanqi@pageindex.ai>, Ray <ray@vectify.ai>
|
|
5
9
|
License: Apache-2.0
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
Author-email: ray@vectify.ai
|
|
9
|
-
Maintainer: rejojer
|
|
10
|
-
Requires-Python: >=3.10
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agents,ai,document,knowledge-base,llm,pageindex,rag,retrieval
|
|
11
12
|
Classifier: Development Status :: 3 - Alpha
|
|
12
13
|
Classifier: Intended Audience :: Developers
|
|
13
14
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
@@ -17,19 +18,21 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
20
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
-
Requires-
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: click>=8.0
|
|
21
23
|
Requires-Dist: json-repair
|
|
22
24
|
Requires-Dist: litellm
|
|
23
25
|
Requires-Dist: markitdown[all]
|
|
24
26
|
Requires-Dist: openai-agents
|
|
25
|
-
Requires-Dist: pageindex
|
|
26
|
-
Requires-Dist:
|
|
27
|
+
Requires-Dist: pageindex==0.3.0.dev1
|
|
28
|
+
Requires-Dist: prompt-toolkit>=3.0
|
|
27
29
|
Requires-Dist: python-dotenv
|
|
28
30
|
Requires-Dist: pyyaml
|
|
29
|
-
Requires-Dist:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
Requires-Dist: rich>=13.0
|
|
32
|
+
Requires-Dist: watchdog>=3.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
33
36
|
Description-Content-Type: text/markdown
|
|
34
37
|
|
|
35
38
|
<div align="center">
|
|
@@ -52,13 +55,13 @@ Description-Content-Type: text/markdown
|
|
|
52
55
|
|
|
53
56
|
The idea is based on a [concept](https://x.com/karpathy/status/2039805659525644595) described by Andrej Karpathy: LLMs generate summaries, concept pages, and cross-references, all maintained automatically. Knowledge compounds over time instead of being re-derived on every query.
|
|
54
57
|
|
|
55
|
-
### Why not
|
|
58
|
+
### Why not traditional RAG?
|
|
56
59
|
|
|
57
60
|
Traditional RAG rediscovers knowledge from scratch on every query. Nothing accumulates. OpenKB compiles knowledge once into a persistent wiki, then keeps it current. Cross-references already exist. Contradictions are flagged. Synthesis reflects everything consumed.
|
|
58
61
|
|
|
59
62
|
### Features
|
|
60
63
|
|
|
61
|
-
- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel,
|
|
64
|
+
- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown
|
|
62
65
|
- **Scale to long documents** — Long and complex documents are handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
|
|
63
66
|
- **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text
|
|
64
67
|
- **Compiled Wiki** — LLM manages and compiles your documents into summaries, concept pages, and cross-links, all kept in sync
|
|
@@ -76,7 +79,26 @@ Traditional RAG rediscovers knowledge from scratch on every query. Nothing accum
|
|
|
76
79
|
pip install openkb
|
|
77
80
|
```
|
|
78
81
|
|
|
79
|
-
|
|
82
|
+
<details>
|
|
83
|
+
<summary><i>Other install options</i></summary>
|
|
84
|
+
|
|
85
|
+
- **Latest from GitHub:**
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install git+https://github.com/VectifyAI/OpenKB.git
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
- **Install from source** (editable, for development):
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
git clone https://github.com/VectifyAI/OpenKB.git
|
|
95
|
+
cd OpenKB
|
|
96
|
+
pip install -e .
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
</details>
|
|
100
|
+
|
|
101
|
+
### Quick Start
|
|
80
102
|
|
|
81
103
|
```bash
|
|
82
104
|
# 1. Create a directory for your knowledge base
|
|
@@ -87,13 +109,12 @@ openkb init
|
|
|
87
109
|
|
|
88
110
|
# 3. Add documents
|
|
89
111
|
openkb add paper.pdf
|
|
90
|
-
openkb add ~/papers/
|
|
91
|
-
openkb add article.html
|
|
112
|
+
openkb add ~/papers/ # Add a whole directory
|
|
92
113
|
|
|
93
114
|
# 4. Ask a question
|
|
94
115
|
openkb query "What are the main findings?"
|
|
95
116
|
|
|
96
|
-
# 5. Or
|
|
117
|
+
# 5. Or chat interactively
|
|
97
118
|
openkb chat
|
|
98
119
|
```
|
|
99
120
|
|
|
@@ -109,7 +130,7 @@ Create a `.env` file with your LLM API key:
|
|
|
109
130
|
LLM_API_KEY=your_llm_api_key
|
|
110
131
|
```
|
|
111
132
|
|
|
112
|
-
# 🧩 How
|
|
133
|
+
# 🧩 How OpenKB Works
|
|
113
134
|
|
|
114
135
|
### Architecture
|
|
115
136
|
|
|
@@ -135,7 +156,7 @@ wiki/
|
|
|
135
156
|
└── reports/ Lint reports
|
|
136
157
|
```
|
|
137
158
|
|
|
138
|
-
### Short vs.
|
|
159
|
+
### Short vs. Long Document Handling
|
|
139
160
|
|
|
140
161
|
| | Short documents | Long documents (PDF ≥ 20 pages) |
|
|
141
162
|
|---|---|---|
|
|
@@ -146,7 +167,7 @@ wiki/
|
|
|
146
167
|
|
|
147
168
|
Short docs are read in full by the LLM. Long PDFs are indexed by PageIndex into a hierarchical tree with summaries. The LLM reads the tree instead of the full text, enabling better retrieval from long documents.
|
|
148
169
|
|
|
149
|
-
###
|
|
170
|
+
### Knowledge Compilation
|
|
150
171
|
|
|
151
172
|
When you add a document, the LLM:
|
|
152
173
|
|
|
@@ -157,16 +178,15 @@ When you add a document, the LLM:
|
|
|
157
178
|
|
|
158
179
|
A single source might touch 10-15 wiki pages. Knowledge accumulates: each document enriches the existing wiki rather than sitting in isolation.
|
|
159
180
|
|
|
160
|
-
#
|
|
181
|
+
# ⚙️ Usage
|
|
161
182
|
|
|
162
183
|
### Commands
|
|
163
184
|
|
|
164
185
|
| Command | Description |
|
|
165
186
|
|---|---|
|
|
166
187
|
| `openkb init` | Initialize a new knowledge base (interactive) |
|
|
167
|
-
|
|
|
168
|
-
|
|
|
169
|
-
| `openkb query "question" --save` | Ask and save the answer to `wiki/explorations/` |
|
|
188
|
+
| <code>openkb add <file_or_dir></code> | Add documents and compile to wiki |
|
|
189
|
+
| <code>openkb query "question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
|
|
170
190
|
| `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
|
|
171
191
|
| `openkb watch` | Watch `raw/` and auto-compile new files |
|
|
172
192
|
| `openkb lint` | Run structural + knowledge health checks |
|
|
@@ -175,7 +195,7 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
|
|
|
175
195
|
|
|
176
196
|
<!-- | `openkb lint --fix` | Auto-fix what it can | -->
|
|
177
197
|
|
|
178
|
-
### Interactive
|
|
198
|
+
### Interactive Chat
|
|
179
199
|
|
|
180
200
|
`openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context.
|
|
181
201
|
|
|
@@ -187,7 +207,16 @@ openkb chat --list # list all sessions
|
|
|
187
207
|
openkb chat --delete <id> # delete a session
|
|
188
208
|
```
|
|
189
209
|
|
|
190
|
-
|
|
210
|
+
Inside a chat, type `/` to access slash commands (Tab to complete):
|
|
211
|
+
|
|
212
|
+
- `/help` — list available commands
|
|
213
|
+
- `/status` — show knowledge base status
|
|
214
|
+
- `/list` — list all documents
|
|
215
|
+
- `/add <path>` — add a document or directory without leaving the chat
|
|
216
|
+
- `/save [name]` — export the transcript to `wiki/explorations/`
|
|
217
|
+
- `/clear` — start a fresh session (the current one stays on disk)
|
|
218
|
+
- `/lint` — run knowledge base lint
|
|
219
|
+
- `/exit` — exit (Ctrl-D also works)
|
|
191
220
|
|
|
192
221
|
### Configuration
|
|
193
222
|
|
|
@@ -207,7 +236,7 @@ Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/p
|
|
|
207
236
|
| Anthropic | `anthropic/claude-sonnet-4-6` |
|
|
208
237
|
| Gemini | `gemini/gemini-3.1-pro-preview` |
|
|
209
238
|
|
|
210
|
-
### PageIndex
|
|
239
|
+
### PageIndex Integration
|
|
211
240
|
|
|
212
241
|
Long documents are challenging for LLMs due to context limits, context rot, and summarization loss.
|
|
213
242
|
[PageIndex](https://github.com/VectifyAI/PageIndex) solves this with vectorless, reasoning-based retrieval — building a hierarchical tree index that lets LLMs reason over the index for context-aware retrieval.
|
|
@@ -255,7 +284,7 @@ OpenKB's wiki is a directory of Markdown files with `[[wikilinks]]`. Obsidian re
|
|
|
255
284
|
| Wiki compilation | LLM agent | LLM agent (same) |
|
|
256
285
|
| Q&A | Query over wiki | Wiki + PageIndex retrieval |
|
|
257
286
|
|
|
258
|
-
###
|
|
287
|
+
### The Stack
|
|
259
288
|
|
|
260
289
|
- [PageIndex](https://github.com/VectifyAI/PageIndex) — Vectorless, reasoning-based document indexing and retrieval
|
|
261
290
|
- [markitdown](https://github.com/microsoft/markitdown) — Universal file-to-markdown conversion
|
|
@@ -282,7 +311,7 @@ Apache 2.0. See [LICENSE](LICENSE).
|
|
|
282
311
|
|
|
283
312
|
### Support Us
|
|
284
313
|
|
|
285
|
-
If you find OpenKB useful, give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
|
|
314
|
+
If you find OpenKB useful, please give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
|
|
286
315
|
|
|
287
316
|
<div>
|
|
288
317
|
|
|
@@ -291,4 +320,3 @@ If you find OpenKB useful, give us a star 🌟 — and check out [PageIndex](htt
|
|
|
291
320
|
[](https://ii2abc2jejf.typeform.com/to/tK3AXl8T)
|
|
292
321
|
|
|
293
322
|
</div>
|
|
294
|
-
|
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
|
|
19
19
|
The idea is based on a [concept](https://x.com/karpathy/status/2039805659525644595) described by Andrej Karpathy: LLMs generate summaries, concept pages, and cross-references, all maintained automatically. Knowledge compounds over time instead of being re-derived on every query.
|
|
20
20
|
|
|
21
|
-
### Why not
|
|
21
|
+
### Why not traditional RAG?
|
|
22
22
|
|
|
23
23
|
Traditional RAG rediscovers knowledge from scratch on every query. Nothing accumulates. OpenKB compiles knowledge once into a persistent wiki, then keeps it current. Cross-references already exist. Contradictions are flagged. Synthesis reflects everything consumed.
|
|
24
24
|
|
|
25
25
|
### Features
|
|
26
26
|
|
|
27
|
-
- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel,
|
|
27
|
+
- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown
|
|
28
28
|
- **Scale to long documents** — Long and complex documents are handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
|
|
29
29
|
- **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text
|
|
30
30
|
- **Compiled Wiki** — LLM manages and compiles your documents into summaries, concept pages, and cross-links, all kept in sync
|
|
@@ -42,7 +42,26 @@ Traditional RAG rediscovers knowledge from scratch on every query. Nothing accum
|
|
|
42
42
|
pip install openkb
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
<details>
|
|
46
|
+
<summary><i>Other install options</i></summary>
|
|
47
|
+
|
|
48
|
+
- **Latest from GitHub:**
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install git+https://github.com/VectifyAI/OpenKB.git
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
- **Install from source** (editable, for development):
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
git clone https://github.com/VectifyAI/OpenKB.git
|
|
58
|
+
cd OpenKB
|
|
59
|
+
pip install -e .
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
</details>
|
|
63
|
+
|
|
64
|
+
### Quick Start
|
|
46
65
|
|
|
47
66
|
```bash
|
|
48
67
|
# 1. Create a directory for your knowledge base
|
|
@@ -53,13 +72,12 @@ openkb init
|
|
|
53
72
|
|
|
54
73
|
# 3. Add documents
|
|
55
74
|
openkb add paper.pdf
|
|
56
|
-
openkb add ~/papers/
|
|
57
|
-
openkb add article.html
|
|
75
|
+
openkb add ~/papers/ # Add a whole directory
|
|
58
76
|
|
|
59
77
|
# 4. Ask a question
|
|
60
78
|
openkb query "What are the main findings?"
|
|
61
79
|
|
|
62
|
-
# 5. Or
|
|
80
|
+
# 5. Or chat interactively
|
|
63
81
|
openkb chat
|
|
64
82
|
```
|
|
65
83
|
|
|
@@ -75,7 +93,7 @@ Create a `.env` file with your LLM API key:
|
|
|
75
93
|
LLM_API_KEY=your_llm_api_key
|
|
76
94
|
```
|
|
77
95
|
|
|
78
|
-
# 🧩 How
|
|
96
|
+
# 🧩 How OpenKB Works
|
|
79
97
|
|
|
80
98
|
### Architecture
|
|
81
99
|
|
|
@@ -101,7 +119,7 @@ wiki/
|
|
|
101
119
|
└── reports/ Lint reports
|
|
102
120
|
```
|
|
103
121
|
|
|
104
|
-
### Short vs.
|
|
122
|
+
### Short vs. Long Document Handling
|
|
105
123
|
|
|
106
124
|
| | Short documents | Long documents (PDF ≥ 20 pages) |
|
|
107
125
|
|---|---|---|
|
|
@@ -112,7 +130,7 @@ wiki/
|
|
|
112
130
|
|
|
113
131
|
Short docs are read in full by the LLM. Long PDFs are indexed by PageIndex into a hierarchical tree with summaries. The LLM reads the tree instead of the full text, enabling better retrieval from long documents.
|
|
114
132
|
|
|
115
|
-
###
|
|
133
|
+
### Knowledge Compilation
|
|
116
134
|
|
|
117
135
|
When you add a document, the LLM:
|
|
118
136
|
|
|
@@ -123,16 +141,15 @@ When you add a document, the LLM:
|
|
|
123
141
|
|
|
124
142
|
A single source might touch 10-15 wiki pages. Knowledge accumulates: each document enriches the existing wiki rather than sitting in isolation.
|
|
125
143
|
|
|
126
|
-
#
|
|
144
|
+
# ⚙️ Usage
|
|
127
145
|
|
|
128
146
|
### Commands
|
|
129
147
|
|
|
130
148
|
| Command | Description |
|
|
131
149
|
|---|---|
|
|
132
150
|
| `openkb init` | Initialize a new knowledge base (interactive) |
|
|
133
|
-
|
|
|
134
|
-
|
|
|
135
|
-
| `openkb query "question" --save` | Ask and save the answer to `wiki/explorations/` |
|
|
151
|
+
| <code>openkb add <file_or_dir></code> | Add documents and compile to wiki |
|
|
152
|
+
| <code>openkb query "question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
|
|
136
153
|
| `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
|
|
137
154
|
| `openkb watch` | Watch `raw/` and auto-compile new files |
|
|
138
155
|
| `openkb lint` | Run structural + knowledge health checks |
|
|
@@ -141,7 +158,7 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
|
|
|
141
158
|
|
|
142
159
|
<!-- | `openkb lint --fix` | Auto-fix what it can | -->
|
|
143
160
|
|
|
144
|
-
### Interactive
|
|
161
|
+
### Interactive Chat
|
|
145
162
|
|
|
146
163
|
`openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context.
|
|
147
164
|
|
|
@@ -153,7 +170,16 @@ openkb chat --list # list all sessions
|
|
|
153
170
|
openkb chat --delete <id> # delete a session
|
|
154
171
|
```
|
|
155
172
|
|
|
156
|
-
|
|
173
|
+
Inside a chat, type `/` to access slash commands (Tab to complete):
|
|
174
|
+
|
|
175
|
+
- `/help` — list available commands
|
|
176
|
+
- `/status` — show knowledge base status
|
|
177
|
+
- `/list` — list all documents
|
|
178
|
+
- `/add <path>` — add a document or directory without leaving the chat
|
|
179
|
+
- `/save [name]` — export the transcript to `wiki/explorations/`
|
|
180
|
+
- `/clear` — start a fresh session (the current one stays on disk)
|
|
181
|
+
- `/lint` — run knowledge base lint
|
|
182
|
+
- `/exit` — exit (Ctrl-D also works)
|
|
157
183
|
|
|
158
184
|
### Configuration
|
|
159
185
|
|
|
@@ -173,7 +199,7 @@ Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/p
|
|
|
173
199
|
| Anthropic | `anthropic/claude-sonnet-4-6` |
|
|
174
200
|
| Gemini | `gemini/gemini-3.1-pro-preview` |
|
|
175
201
|
|
|
176
|
-
### PageIndex
|
|
202
|
+
### PageIndex Integration
|
|
177
203
|
|
|
178
204
|
Long documents are challenging for LLMs due to context limits, context rot, and summarization loss.
|
|
179
205
|
[PageIndex](https://github.com/VectifyAI/PageIndex) solves this with vectorless, reasoning-based retrieval — building a hierarchical tree index that lets LLMs reason over the index for context-aware retrieval.
|
|
@@ -221,7 +247,7 @@ OpenKB's wiki is a directory of Markdown files with `[[wikilinks]]`. Obsidian re
|
|
|
221
247
|
| Wiki compilation | LLM agent | LLM agent (same) |
|
|
222
248
|
| Q&A | Query over wiki | Wiki + PageIndex retrieval |
|
|
223
249
|
|
|
224
|
-
###
|
|
250
|
+
### The Stack
|
|
225
251
|
|
|
226
252
|
- [PageIndex](https://github.com/VectifyAI/PageIndex) — Vectorless, reasoning-based document indexing and retrieval
|
|
227
253
|
- [markitdown](https://github.com/microsoft/markitdown) — Universal file-to-markdown conversion
|
|
@@ -248,7 +274,7 @@ Apache 2.0. See [LICENSE](LICENSE).
|
|
|
248
274
|
|
|
249
275
|
### Support Us
|
|
250
276
|
|
|
251
|
-
If you find OpenKB useful, give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
|
|
277
|
+
If you find OpenKB useful, please give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
|
|
252
278
|
|
|
253
279
|
<div>
|
|
254
280
|
|