kbvc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kbvc-0.1.0/LICENSE +21 -0
- kbvc-0.1.0/PKG-INFO +524 -0
- kbvc-0.1.0/README.md +443 -0
- kbvc-0.1.0/pyproject.toml +80 -0
- kbvc-0.1.0/setup.cfg +4 -0
- kbvc-0.1.0/src/kbvc/__init__.py +2 -0
- kbvc-0.1.0/src/kbvc/adapters/__init__.py +37 -0
- kbvc-0.1.0/src/kbvc/adapters/base.py +36 -0
- kbvc-0.1.0/src/kbvc/adapters/text_file.py +42 -0
- kbvc-0.1.0/src/kbvc/backends/__init__.py +68 -0
- kbvc-0.1.0/src/kbvc/backends/embed/__init__.py +41 -0
- kbvc-0.1.0/src/kbvc/backends/embed/gemini.py +87 -0
- kbvc-0.1.0/src/kbvc/backends/embed/huggingface.py +49 -0
- kbvc-0.1.0/src/kbvc/backends/embed/ollama.py +61 -0
- kbvc-0.1.0/src/kbvc/backends/embed/openai.py +86 -0
- kbvc-0.1.0/src/kbvc/backends/vectordb/__init__.py +199 -0
- kbvc-0.1.0/src/kbvc/backends/vectordb/chroma.py +123 -0
- kbvc-0.1.0/src/kbvc/backends/vectordb/lancedb.py +248 -0
- kbvc-0.1.0/src/kbvc/backends/vectordb/pgvector.py +259 -0
- kbvc-0.1.0/src/kbvc/backends/vectordb/pinecone.py +141 -0
- kbvc-0.1.0/src/kbvc/backends/vectordb/qdrant.py +269 -0
- kbvc-0.1.0/src/kbvc/cli.py +2581 -0
- kbvc-0.1.0/src/kbvc/commands/__init__.py +0 -0
- kbvc-0.1.0/src/kbvc/commands/analyze.py +360 -0
- kbvc-0.1.0/src/kbvc/commands/backend.py +115 -0
- kbvc-0.1.0/src/kbvc/commands/commit.py +350 -0
- kbvc-0.1.0/src/kbvc/commands/contradict.py +165 -0
- kbvc-0.1.0/src/kbvc/commands/explain.py +68 -0
- kbvc-0.1.0/src/kbvc/commands/gc.py +144 -0
- kbvc-0.1.0/src/kbvc/commands/ingest.py +404 -0
- kbvc-0.1.0/src/kbvc/commands/migrate.py +105 -0
- kbvc-0.1.0/src/kbvc/commands/migrate_embeddings.py +175 -0
- kbvc-0.1.0/src/kbvc/commands/migrate_helpers.py +40 -0
- kbvc-0.1.0/src/kbvc/commands/promote.py +108 -0
- kbvc-0.1.0/src/kbvc/commands/push.py +215 -0
- kbvc-0.1.0/src/kbvc/commands/stale.py +137 -0
- kbvc-0.1.0/src/kbvc/commands/stats.py +147 -0
- kbvc-0.1.0/src/kbvc/commands/sync.py +93 -0
- kbvc-0.1.0/src/kbvc/core/__init__.py +0 -0
- kbvc-0.1.0/src/kbvc/core/chunker.py +140 -0
- kbvc-0.1.0/src/kbvc/core/commit.py +193 -0
- kbvc-0.1.0/src/kbvc/core/graph.py +264 -0
- kbvc-0.1.0/src/kbvc/core/index.py +84 -0
- kbvc-0.1.0/src/kbvc/core/ko.py +157 -0
- kbvc-0.1.0/src/kbvc/core/lineage.py +198 -0
- kbvc-0.1.0/src/kbvc/core/prompt_store.py +179 -0
- kbvc-0.1.0/src/kbvc/core/repo.py +318 -0
- kbvc-0.1.0/src/kbvc/core/retrieval_store.py +130 -0
- kbvc-0.1.0/src/kbvc/core/versioner.py +132 -0
- kbvc-0.1.0/src/kbvc/utils/__init__.py +0 -0
- kbvc-0.1.0/src/kbvc/utils/config.py +44 -0
- kbvc-0.1.0/src/kbvc/utils/display.py +63 -0
- kbvc-0.1.0/src/kbvc/utils/lock.py +81 -0
- kbvc-0.1.0/src/kbvc.egg-info/PKG-INFO +524 -0
- kbvc-0.1.0/src/kbvc.egg-info/SOURCES.txt +58 -0
- kbvc-0.1.0/src/kbvc.egg-info/dependency_links.txt +1 -0
- kbvc-0.1.0/src/kbvc.egg-info/entry_points.txt +2 -0
- kbvc-0.1.0/src/kbvc.egg-info/requires.txt +43 -0
- kbvc-0.1.0/src/kbvc.egg-info/top_level.txt +1 -0
- kbvc-0.1.0/tests/test_kbvc.py +2417 -0
kbvc-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 KBVC Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
kbvc-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kbvc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Knowledge Operating System for AI/RAG — version control, semantic graph, full audit trails, and reproducible deployment for knowledge bases
|
|
5
|
+
Author-email: Saiyam Jain <saiyam.sandhir.jain@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 KBVC Contributors
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/Saiyam-Sandhir-Jain/kbvc
|
|
29
|
+
Project-URL: Documentation, https://github.com/Saiyam-Sandhir-Jain/kbvc/blob/main/KBVC_DOCS.md
|
|
30
|
+
Project-URL: Repository, https://github.com/Saiyam-Sandhir-Jain/kbvc
|
|
31
|
+
Project-URL: Issues, https://github.com/Saiyam-Sandhir-Jain/kbvc/issues
|
|
32
|
+
Project-URL: Changelog, https://github.com/Saiyam-Sandhir-Jain/kbvc/blob/main/CHANGELOG.md
|
|
33
|
+
Keywords: rag,knowledge-infrastructure,vector-database,embeddings,knowledge-base,version-control,knowledge-graph,llm,reproducibility,semantic-versioning
|
|
34
|
+
Classifier: Development Status :: 4 - Beta
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: Intended Audience :: Science/Research
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
43
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
44
|
+
Classifier: Topic :: Software Development :: Version Control
|
|
45
|
+
Requires-Python: >=3.9
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
License-File: LICENSE
|
|
48
|
+
Requires-Dist: click>=8.0
|
|
49
|
+
Requires-Dist: pyyaml>=6.0
|
|
50
|
+
Requires-Dist: rich>=13.0
|
|
51
|
+
Provides-Extra: openai
|
|
52
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
53
|
+
Provides-Extra: gemini
|
|
54
|
+
Requires-Dist: google-genai>=0.1; extra == "gemini"
|
|
55
|
+
Provides-Extra: ollama
|
|
56
|
+
Requires-Dist: ollama>=0.2; extra == "ollama"
|
|
57
|
+
Provides-Extra: hf
|
|
58
|
+
Requires-Dist: sentence-transformers>=2.0; extra == "hf"
|
|
59
|
+
Provides-Extra: pgvector
|
|
60
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "pgvector"
|
|
61
|
+
Requires-Dist: pgvector>=0.2; extra == "pgvector"
|
|
62
|
+
Provides-Extra: qdrant
|
|
63
|
+
Requires-Dist: qdrant-client>=1.7; extra == "qdrant"
|
|
64
|
+
Provides-Extra: pinecone
|
|
65
|
+
Requires-Dist: pinecone-client>=3.0; extra == "pinecone"
|
|
66
|
+
Provides-Extra: chroma
|
|
67
|
+
Requires-Dist: chromadb>=0.4; extra == "chroma"
|
|
68
|
+
Provides-Extra: lancedb
|
|
69
|
+
Requires-Dist: lancedb>=0.5; extra == "lancedb"
|
|
70
|
+
Requires-Dist: pyarrow>=14.0; extra == "lancedb"
|
|
71
|
+
Provides-Extra: dev
|
|
72
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
73
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
74
|
+
Provides-Extra: all
|
|
75
|
+
Requires-Dist: openai>=1.0; extra == "all"
|
|
76
|
+
Requires-Dist: qdrant-client>=1.7; extra == "all"
|
|
77
|
+
Requires-Dist: chromadb>=0.4; extra == "all"
|
|
78
|
+
Requires-Dist: lancedb>=0.5; extra == "all"
|
|
79
|
+
Requires-Dist: pyarrow>=14.0; extra == "all"
|
|
80
|
+
Dynamic: license-file
|
|
81
|
+
|
|
82
|
+
<div align="center">
|
|
83
|
+
|
|
84
|
+
<img src="https://img.shields.io/badge/KBVC-Knowledge%20OS-6366f1?style=for-the-badge&logoColor=white" alt="KBVC"/>
|
|
85
|
+
|
|
86
|
+
# KBVC — Knowledge Base Version Control
|
|
87
|
+
|
|
88
|
+
**The Knowledge Operating System for AI/RAG systems.**
|
|
89
|
+
Version control, semantic graph, full audit trails, and reproducible deployment — for your knowledge base.
|
|
90
|
+
|
|
91
|
+
[](https://pypi.org/project/kbvc/)
|
|
92
|
+
[](https://pypi.org/project/kbvc/)
|
|
93
|
+
[](LICENSE)
|
|
94
|
+
[](https://github.com/Saiyam-Sandhir-Jain/kbvc/actions)
|
|
95
|
+
[](#)
|
|
96
|
+
|
|
97
|
+
</div>
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## What is KBVC?
|
|
102
|
+
|
|
103
|
+
KBVC is a **Knowledge Operating System** — an infrastructure layer that sits between your documents and your vector database, giving your knowledge base the same discipline that Git gives your code.
|
|
104
|
+
|
|
105
|
+
It is **not** a RAG library. It is **not** a vector database.
|
|
106
|
+
It is the **missing infrastructure layer** that answers questions no existing tool can:
|
|
107
|
+
|
|
108
|
+
| Question | Without KBVC | With KBVC |
|
|
109
|
+
|---|---|---|
|
|
110
|
+
| Which document version produced this embedding? | ❌ Unknown | ✅ `kbvc explain <vector_id>` |
|
|
111
|
+
| What was my knowledge state 3 months ago? | ❌ Gone | ✅ `kbvc checkout <commit>` |
|
|
112
|
+
| How do my documents relate to each other? | ❌ No idea | ✅ `kbvc graph` |
|
|
113
|
+
| Which documents are stale or contradictory? | ❌ Manual audit | ✅ `kbvc stale` / `kbvc contradict list` |
|
|
114
|
+
| Can I reproduce the exact RAG pipeline from last quarter? | ❌ Never | ✅ `kbvc.lock` |
|
|
115
|
+
| If my LLM gives a bad answer, what document caused it? | ❌ Guessing | ✅ `kbvc trace` + `kbvc explain` |
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Architecture Overview
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
Your Documents (Markdown)
|
|
123
|
+
│
|
|
124
|
+
▼
|
|
125
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
126
|
+
│ KBVC │
|
|
127
|
+
│ │
|
|
128
|
+
│ ┌─────────────┐ ┌──────────────┐ ┌───────────────────┐ │
|
|
129
|
+
│ │ Commit DAG │ │ Knowledge │ │ Prompt & │ │
|
|
130
|
+
│ │ (SHA-256) │ │ Graph │ │ Retrieval Store │ │
|
|
131
|
+
│ │ │ │ (Relations) │ │ (per commit) │ │
|
|
132
|
+
│ └─────────────┘ └──────────────┘ └───────────────────┘ │
|
|
133
|
+
│ │
|
|
134
|
+
│ ┌──────────────────────────────────────────────────────┐ │
|
|
135
|
+
│ │ VSAL — Vector Storage Abstraction Layer │ │
|
|
136
|
+
│ │ ChunkRecord ──► Qdrant / pgvector / Chroma / │ │
|
|
137
|
+
│ │ Pinecone / LanceDB │ │
|
|
138
|
+
│ └──────────────────────────────────────────────────────┘ │
|
|
139
|
+
│ │
|
|
140
|
+
│ ┌──────────────────────────────────────────────────────┐ │
|
|
141
|
+
│ │ Embed Backends │ │
|
|
142
|
+
│ │ OpenAI · Gemini · Ollama · HuggingFace │ │
|
|
143
|
+
│ └──────────────────────────────────────────────────────┘ │
|
|
144
|
+
└─────────────────────────────────────────────────────────────┘
|
|
145
|
+
│
|
|
146
|
+
▼
|
|
147
|
+
Your RAG / LLM Application
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Feature Matrix
|
|
153
|
+
|
|
154
|
+
| Phase | Commands | What It Unlocks |
|
|
155
|
+
|---|---|---|
|
|
156
|
+
| **1** | `init` `add` `commit` `log` | Git-style commit loop for knowledge |
|
|
157
|
+
| **2** | `status` `checkout` `diff` | Time travel — restore any past state |
|
|
158
|
+
| **3** | `link` `query` `graph` | Knowledge graph + semantic search |
|
|
159
|
+
| **4** | `branch` `depends` `impact` `annotate` `history` `trace` `doctor` | Audit trails + dependency management |
|
|
160
|
+
| **5** | `ingest` `push` `remote` `analyze` `extract` `stale` `stats` | Multi-source ingestion + intelligence |
|
|
161
|
+
| **6** | `backend` `explain` `promote` | Infrastructure management + provenance |
|
|
162
|
+
| **7** | `gc` `migrate backend` `migrate schema` | Maintenance + zero-downtime ops |
|
|
163
|
+
| **8** | `sync` | Volatility-aware auto-commit |
|
|
164
|
+
| **9** | `contradict list/resolve` | Contradiction detection + resolution |
|
|
165
|
+
| **10** | `ask` | Grounded knowledge Q&A |
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Quick Start
|
|
170
|
+
|
|
171
|
+
### Install
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
# Core (pick your embedding + vector DB extras)
|
|
175
|
+
pip install kbvc[openai,qdrant]
|
|
176
|
+
|
|
177
|
+
# Local-first setup (no external server)
|
|
178
|
+
pip install kbvc[openai,lancedb]
|
|
179
|
+
|
|
180
|
+
# Everything
|
|
181
|
+
pip install kbvc[all]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Initialize a Knowledge Repository
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
mkdir my-knowledge && cd my-knowledge
|
|
188
|
+
kbvc init
|
|
189
|
+
|
|
190
|
+
# Configure backends
|
|
191
|
+
kbvc config set embed.backend openai
|
|
192
|
+
kbvc config set embed.key sk-...
|
|
193
|
+
kbvc config set vectordb.backend lancedb # no server needed
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Create Your First Knowledge Object
|
|
197
|
+
|
|
198
|
+
Every document tracked by KBVC is a **Knowledge Object (KO)** — a Markdown file with a YAML frontmatter header:
|
|
199
|
+
|
|
200
|
+
```markdown
|
|
201
|
+
---
|
|
202
|
+
id: caching-strategy
|
|
203
|
+
type: document
|
|
204
|
+
tags: [architecture, performance]
|
|
205
|
+
volatility: slow
|
|
206
|
+
source_type: file
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Caching Strategy
|
|
210
|
+
|
|
211
|
+
We use a two-tier cache: Redis for hot paths (TTL 60s) and
|
|
212
|
+
PostgreSQL materialized views for aggregate queries.
|
|
213
|
+
|
|
214
|
+
## Invalidation
|
|
215
|
+
|
|
216
|
+
Cache invalidation is triggered by...
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
kbvc add knowledge/caching-strategy.md
|
|
221
|
+
kbvc commit -m "Add caching strategy document"
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Explore
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
kbvc log # View commit history
|
|
228
|
+
kbvc status # See staged vs. committed state
|
|
229
|
+
kbvc query "how does caching work?" # Semantic search
|
|
230
|
+
kbvc ask "what is the cache TTL?" # Grounded Q&A with citations
|
|
231
|
+
kbvc graph # Visualize knowledge graph
|
|
232
|
+
kbvc stats # Analytics report
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## Core Concepts
|
|
238
|
+
|
|
239
|
+
### Knowledge Objects (KOs)
|
|
240
|
+
|
|
241
|
+
A KO is the fundamental unit of KBVC. Each KO is:
|
|
242
|
+
- A Markdown file with YAML frontmatter
|
|
243
|
+
- Chunked into sections (one vector per section)
|
|
244
|
+
- Versioned independently (v1, v2, v3...)
|
|
245
|
+
- Connected to other KOs via typed relations
|
|
246
|
+
|
|
247
|
+
**Volatility levels** control how KBVC handles automatic commits:
|
|
248
|
+
|
|
249
|
+
| Volatility | Meaning | Auto-commit |
|
|
250
|
+
|---|---|---|
|
|
251
|
+
| `frozen` | Never changes (e.g. archived decisions) | Never re-embedded |
|
|
252
|
+
| `slow` | Changes occasionally (most documents) | On explicit `kbvc sync` |
|
|
253
|
+
| `live` | Changes frequently (e.g. meeting notes) | On every `kbvc sync` |
|
|
254
|
+
|
|
255
|
+
### Commit DAG
|
|
256
|
+
|
|
257
|
+
Every `kbvc commit` creates an immutable `CommitObject` — a SHA-256 hash over:
|
|
258
|
+
- Parent commit ID
|
|
259
|
+
- Branch name
|
|
260
|
+
- Commit message
|
|
261
|
+
- All changed KO IDs + their chunk hashes
|
|
262
|
+
- Snapshot filenames for graph, prompt, retrieval config
|
|
263
|
+
|
|
264
|
+
This gives you a fully reproducible, tamper-evident history — just like Git, but for knowledge.
|
|
265
|
+
|
|
266
|
+
### Knowledge Graph
|
|
267
|
+
|
|
268
|
+
KOs can be linked with typed relations:
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
kbvc link caching-strategy informed_by system-architecture
|
|
272
|
+
kbvc link new-api contradicts old-api
|
|
273
|
+
kbvc link auth-module extends base-security
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Relation types:** `informed_by` · `contradicts` · `extends` · `derived_from` · `supersedes` · `depends_on` · `related_to`
|
|
277
|
+
|
|
278
|
+
### VSAL — Vector Storage Abstraction Layer
|
|
279
|
+
|
|
280
|
+
KBVC never touches Qdrant points, pgvector rows, or Pinecone vectors directly. All storage goes through `ChunkRecord` — a universal unit that every backend adapter maps to its native representation.
|
|
281
|
+
|
|
282
|
+
This means you can migrate between vector databases **without re-embedding**:
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
kbvc migrate backend --from qdrant --to pgvector
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Knowledge Object Frontmatter Reference
|
|
291
|
+
|
|
292
|
+
```yaml
|
|
293
|
+
---
|
|
294
|
+
id: my-document # required; used as ko_id
|
|
295
|
+
type: project # project | education | patent | document | lesson | ...
|
|
296
|
+
tags: [ai, rag, python] # free tags for filtering
|
|
297
|
+
volatility: slow # frozen | slow | live
|
|
298
|
+
source_type: file # file | web | github | pdf | notion | memory
|
|
299
|
+
valid_from: "2026-01-01" # optional temporal validity window
|
|
300
|
+
valid_to: null # null = still valid
|
|
301
|
+
---
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## All Commands
|
|
307
|
+
|
|
308
|
+
### Repository Setup
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
kbvc init [--name NAME] [--no-git] # Initialize a KBVC repo
|
|
312
|
+
kbvc clone <url> [directory] # Clone a KBVC repo from Git
|
|
313
|
+
kbvc config set <key> <value> # Set a config value
|
|
314
|
+
kbvc config get <key> # Read a config value
|
|
315
|
+
kbvc config list # List all config
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
### Knowledge Authoring
|
|
319
|
+
|
|
320
|
+
```bash
|
|
321
|
+
kbvc add <path|.> # Stage files for commit
|
|
322
|
+
kbvc add <path> --reason "why" # Stage with an annotation
|
|
323
|
+
kbvc commit -m "message" # Commit staged changes
|
|
324
|
+
kbvc commit -m "message" --dry-run # Preview commit (no write)
|
|
325
|
+
kbvc status # Show staged vs. committed state
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### History & Diff
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
kbvc log # Commit history (newest first)
|
|
332
|
+
kbvc log --oneline # Compact history
|
|
333
|
+
kbvc diff # Diff staged against HEAD
|
|
334
|
+
kbvc diff <commit_id> # Diff against a specific commit
|
|
335
|
+
kbvc history <ko_id> # Version history for one KO
|
|
336
|
+
kbvc checkout <commit_id> # Restore knowledge state
|
|
337
|
+
kbvc checkout <commit_id> --ko <id> # Restore single KO
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### Knowledge Graph
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
kbvc link <from> <type> <to> # Create a relation
|
|
344
|
+
kbvc graph # Print graph summary
|
|
345
|
+
kbvc graph --dot # Export as DOT (Graphviz)
|
|
346
|
+
kbvc depends <ko_id> # Show KO dependencies
|
|
347
|
+
kbvc impact <ko_id> # What depends on this KO?
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Search & Q&A
|
|
351
|
+
|
|
352
|
+
```bash
|
|
353
|
+
kbvc query "natural language question" # Vector search
|
|
354
|
+
kbvc query "..." --top-k 10 # Return 10 results
|
|
355
|
+
kbvc ask "natural language question" # Grounded Q&A with citations
|
|
356
|
+
kbvc ask "..." --top-k 5 --show-ids # Show raw vector IDs
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### Intelligence & Analytics
|
|
360
|
+
|
|
361
|
+
```bash
|
|
362
|
+
kbvc analyze # Suggest relations between KOs
|
|
363
|
+
kbvc extract # Extract entities from KOs
|
|
364
|
+
kbvc stale # Find stale/orphaned KOs
|
|
365
|
+
kbvc contradict list # List contradictions
|
|
366
|
+
kbvc contradict resolve <rel_id> # Resolve a contradiction
|
|
367
|
+
kbvc stats # Analytics report
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Provenance & Audit
|
|
371
|
+
|
|
372
|
+
```bash
|
|
373
|
+
kbvc trace <ko_id> # Full version trail for a KO
|
|
374
|
+
kbvc explain <vector_id> # Trace vector → source → commit
|
|
375
|
+
kbvc annotate <ko_id> "reason" # Add change reason before commit
|
|
376
|
+
kbvc doctor # Repo health check
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
### Ingestion
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
kbvc ingest website <url> # Scrape a website
|
|
383
|
+
kbvc ingest github <repo_url> # Import a GitHub repo
|
|
384
|
+
kbvc ingest pdf <file.pdf> # Convert PDF to KO
|
|
385
|
+
kbvc ingest notion <page_url> # Import a Notion page
|
|
386
|
+
kbvc ingest text <file.txt> # Import a plain text file
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
### Branches & Remotes
|
|
390
|
+
|
|
391
|
+
```bash
|
|
392
|
+
kbvc branch # List branches
|
|
393
|
+
kbvc branch <name> # Create a branch
|
|
394
|
+
kbvc branch checkout <name> # Switch branch
|
|
395
|
+
kbvc remote add <name> <url> # Add a remote
|
|
396
|
+
kbvc push # Push to configured remote
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### Infrastructure
|
|
400
|
+
|
|
401
|
+
```bash
|
|
402
|
+
kbvc backend init # Create vector DB schema
|
|
403
|
+
kbvc backend info # Show backend status
|
|
404
|
+
kbvc migrate backend --from X --to Y # Migrate vector store
|
|
405
|
+
kbvc migrate embeddings --from M --to M2 # Swap embedding model
|
|
406
|
+
kbvc migrate schema # Update lock file schema
|
|
407
|
+
kbvc gc # Garbage-collect orphan vectors
|
|
408
|
+
kbvc sync # Volatility-aware auto-commit
|
|
409
|
+
kbvc sync --volatility live # Only sync live KOs
|
|
410
|
+
kbvc sync --dry-run # Preview what would be committed
|
|
411
|
+
kbvc promote "<memory text>" # Promote agent memory to KO
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
---
|
|
415
|
+
|
|
416
|
+
## Backends
|
|
417
|
+
|
|
418
|
+
### Embedding Backends
|
|
419
|
+
|
|
420
|
+
| Backend | Install Extra | Models |
|
|
421
|
+
|---|---|---|
|
|
422
|
+
| **OpenAI** | `kbvc[openai]` | `text-embedding-3-small` (default), `text-embedding-3-large`, `ada-002` |
|
|
423
|
+
| **Gemini** | `kbvc[gemini]` | `gemini-embedding-001` |
|
|
424
|
+
| **Ollama** | `kbvc[ollama]` | any locally-served model (`nomic-embed-text`, etc.) |
|
|
425
|
+
| **HuggingFace** | `kbvc[hf]` | any `sentence-transformers` model |
|
|
426
|
+
|
|
427
|
+
```bash
|
|
428
|
+
# OpenAI
|
|
429
|
+
kbvc config set embed.backend openai
|
|
430
|
+
kbvc config set embed.key sk-...
|
|
431
|
+
kbvc config set embed.model text-embedding-3-small
|
|
432
|
+
|
|
433
|
+
# Ollama (local, no API key)
|
|
434
|
+
kbvc config set embed.backend ollama
|
|
435
|
+
kbvc config set embed.model nomic-embed-text
|
|
436
|
+
kbvc config set embed.url http://localhost:11434
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
### Vector DB Backends
|
|
440
|
+
|
|
441
|
+
| Backend | Install Extra | Best For |
|
|
442
|
+
|---|---|---|
|
|
443
|
+
| **LanceDB** | `kbvc[lancedb]` | Local dev, no server, embedded |
|
|
444
|
+
| **Qdrant** | `kbvc[qdrant]` | Production, cloud-native |
|
|
445
|
+
| **pgvector** | `kbvc[pgvector]` | PostgreSQL shops |
|
|
446
|
+
| **Chroma** | `kbvc[chroma]` | Local dev, Python-first |
|
|
447
|
+
| **Pinecone** | `kbvc[pinecone]` | Managed serverless |
|
|
448
|
+
|
|
449
|
+
```bash
|
|
450
|
+
# LanceDB (no server needed — best for local dev)
|
|
451
|
+
kbvc config set vectordb.backend lancedb
|
|
452
|
+
kbvc config set vectordb.url ./kbvc_lance
|
|
453
|
+
|
|
454
|
+
# Qdrant
|
|
455
|
+
kbvc config set vectordb.backend qdrant
|
|
456
|
+
kbvc config set vectordb.url http://localhost:6333
|
|
457
|
+
kbvc config set vectordb.collection kbvc
|
|
458
|
+
|
|
459
|
+
# pgvector
|
|
460
|
+
kbvc config set vectordb.backend pgvector
|
|
461
|
+
kbvc config set vectordb.url "postgresql://user:pass@localhost/mydb"
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
---
|
|
465
|
+
|
|
466
|
+
## The kbvc.lock File
|
|
467
|
+
|
|
468
|
+
Every commit writes a `kbvc.lock` file to your repo root — analogous to `package-lock.json`. It records:
|
|
469
|
+
|
|
470
|
+
```yaml
|
|
471
|
+
kbvc_version: "0.1.0"
|
|
472
|
+
embedding:
|
|
473
|
+
provider: openai
|
|
474
|
+
model: text-embedding-3-small
|
|
475
|
+
dimensions: 1536
|
|
476
|
+
vector_store:
|
|
477
|
+
provider: qdrant
|
|
478
|
+
collection: kbvc
|
|
479
|
+
committed_at: "2026-01-15T10:30:00Z"
|
|
480
|
+
commit_id: "a3f2c1d9..."
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
Commit `kbvc.lock` to Git. Anyone cloning your repo with `kbvc clone` will see exactly what backend configuration was used and can reproduce it.
|
|
484
|
+
|
|
485
|
+
---
|
|
486
|
+
|
|
487
|
+
## Use Cases
|
|
488
|
+
|
|
489
|
+
### AI Product Teams
|
|
490
|
+
Track your RAG knowledge base like code. Every prompt change, every document update, every embedding model swap is versioned and auditable. Reproduce any past retrieval configuration exactly.
|
|
491
|
+
|
|
492
|
+
### Research Labs
|
|
493
|
+
Manage evolving scientific knowledge with contradiction detection and lineage tracking. Know when a newer paper supersedes an older one. Build multi-hop knowledge graphs connecting related findings.
|
|
494
|
+
|
|
495
|
+
### Enterprise Knowledge Management
|
|
496
|
+
Enforce governance over internal knowledge bases. Who changed what, when, and why. Detect when two documents contradict each other. Get alerts when downstream KOs depend on stale upstream content.
|
|
497
|
+
|
|
498
|
+
### LLM Application Developers
|
|
499
|
+
Debug why your LLM gave a specific answer — trace it back to the exact document chunk, its version, and the commit that created its embedding. Swap embedding models without rewriting your pipeline.
|
|
500
|
+
|
|
501
|
+
---
|
|
502
|
+
|
|
503
|
+
## Development Setup
|
|
504
|
+
|
|
505
|
+
```bash
|
|
506
|
+
git clone https://github.com/Saiyam-Sandhir-Jain/kbvc.git
|
|
507
|
+
cd kbvc
|
|
508
|
+
pip install -e ".[dev,openai,qdrant]"
|
|
509
|
+
pytest tests/ -v
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for the full contributor guide.
|
|
513
|
+
|
|
514
|
+
---
|
|
515
|
+
|
|
516
|
+
## License
|
|
517
|
+
|
|
518
|
+
MIT © [Saiyam Jain](https://github.com/Saiyam-Sandhir-Jain)
|
|
519
|
+
|
|
520
|
+
---
|
|
521
|
+
|
|
522
|
+
<div align="center">
|
|
523
|
+
<sub>Built by Saiyam Jain · VIT Bhopal</sub>
|
|
524
|
+
</div>
|