kbvc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. kbvc-0.1.0/LICENSE +21 -0
  2. kbvc-0.1.0/PKG-INFO +524 -0
  3. kbvc-0.1.0/README.md +443 -0
  4. kbvc-0.1.0/pyproject.toml +80 -0
  5. kbvc-0.1.0/setup.cfg +4 -0
  6. kbvc-0.1.0/src/kbvc/__init__.py +2 -0
  7. kbvc-0.1.0/src/kbvc/adapters/__init__.py +37 -0
  8. kbvc-0.1.0/src/kbvc/adapters/base.py +36 -0
  9. kbvc-0.1.0/src/kbvc/adapters/text_file.py +42 -0
  10. kbvc-0.1.0/src/kbvc/backends/__init__.py +68 -0
  11. kbvc-0.1.0/src/kbvc/backends/embed/__init__.py +41 -0
  12. kbvc-0.1.0/src/kbvc/backends/embed/gemini.py +87 -0
  13. kbvc-0.1.0/src/kbvc/backends/embed/huggingface.py +49 -0
  14. kbvc-0.1.0/src/kbvc/backends/embed/ollama.py +61 -0
  15. kbvc-0.1.0/src/kbvc/backends/embed/openai.py +86 -0
  16. kbvc-0.1.0/src/kbvc/backends/vectordb/__init__.py +199 -0
  17. kbvc-0.1.0/src/kbvc/backends/vectordb/chroma.py +123 -0
  18. kbvc-0.1.0/src/kbvc/backends/vectordb/lancedb.py +248 -0
  19. kbvc-0.1.0/src/kbvc/backends/vectordb/pgvector.py +259 -0
  20. kbvc-0.1.0/src/kbvc/backends/vectordb/pinecone.py +141 -0
  21. kbvc-0.1.0/src/kbvc/backends/vectordb/qdrant.py +269 -0
  22. kbvc-0.1.0/src/kbvc/cli.py +2581 -0
  23. kbvc-0.1.0/src/kbvc/commands/__init__.py +0 -0
  24. kbvc-0.1.0/src/kbvc/commands/analyze.py +360 -0
  25. kbvc-0.1.0/src/kbvc/commands/backend.py +115 -0
  26. kbvc-0.1.0/src/kbvc/commands/commit.py +350 -0
  27. kbvc-0.1.0/src/kbvc/commands/contradict.py +165 -0
  28. kbvc-0.1.0/src/kbvc/commands/explain.py +68 -0
  29. kbvc-0.1.0/src/kbvc/commands/gc.py +144 -0
  30. kbvc-0.1.0/src/kbvc/commands/ingest.py +404 -0
  31. kbvc-0.1.0/src/kbvc/commands/migrate.py +105 -0
  32. kbvc-0.1.0/src/kbvc/commands/migrate_embeddings.py +175 -0
  33. kbvc-0.1.0/src/kbvc/commands/migrate_helpers.py +40 -0
  34. kbvc-0.1.0/src/kbvc/commands/promote.py +108 -0
  35. kbvc-0.1.0/src/kbvc/commands/push.py +215 -0
  36. kbvc-0.1.0/src/kbvc/commands/stale.py +137 -0
  37. kbvc-0.1.0/src/kbvc/commands/stats.py +147 -0
  38. kbvc-0.1.0/src/kbvc/commands/sync.py +93 -0
  39. kbvc-0.1.0/src/kbvc/core/__init__.py +0 -0
  40. kbvc-0.1.0/src/kbvc/core/chunker.py +140 -0
  41. kbvc-0.1.0/src/kbvc/core/commit.py +193 -0
  42. kbvc-0.1.0/src/kbvc/core/graph.py +264 -0
  43. kbvc-0.1.0/src/kbvc/core/index.py +84 -0
  44. kbvc-0.1.0/src/kbvc/core/ko.py +157 -0
  45. kbvc-0.1.0/src/kbvc/core/lineage.py +198 -0
  46. kbvc-0.1.0/src/kbvc/core/prompt_store.py +179 -0
  47. kbvc-0.1.0/src/kbvc/core/repo.py +318 -0
  48. kbvc-0.1.0/src/kbvc/core/retrieval_store.py +130 -0
  49. kbvc-0.1.0/src/kbvc/core/versioner.py +132 -0
  50. kbvc-0.1.0/src/kbvc/utils/__init__.py +0 -0
  51. kbvc-0.1.0/src/kbvc/utils/config.py +44 -0
  52. kbvc-0.1.0/src/kbvc/utils/display.py +63 -0
  53. kbvc-0.1.0/src/kbvc/utils/lock.py +81 -0
  54. kbvc-0.1.0/src/kbvc.egg-info/PKG-INFO +524 -0
  55. kbvc-0.1.0/src/kbvc.egg-info/SOURCES.txt +58 -0
  56. kbvc-0.1.0/src/kbvc.egg-info/dependency_links.txt +1 -0
  57. kbvc-0.1.0/src/kbvc.egg-info/entry_points.txt +2 -0
  58. kbvc-0.1.0/src/kbvc.egg-info/requires.txt +43 -0
  59. kbvc-0.1.0/src/kbvc.egg-info/top_level.txt +1 -0
  60. kbvc-0.1.0/tests/test_kbvc.py +2417 -0
kbvc-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 KBVC Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
kbvc-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,524 @@
1
+ Metadata-Version: 2.4
2
+ Name: kbvc
3
+ Version: 0.1.0
4
+ Summary: Knowledge Operating System for AI/RAG — version control, semantic graph, full audit trails, and reproducible deployment for knowledge bases
5
+ Author-email: Saiyam Jain <saiyam.sandhir.jain@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 KBVC Contributors
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/Saiyam-Sandhir-Jain/kbvc
29
+ Project-URL: Documentation, https://github.com/Saiyam-Sandhir-Jain/kbvc/blob/main/KBVC_DOCS.md
30
+ Project-URL: Repository, https://github.com/Saiyam-Sandhir-Jain/kbvc
31
+ Project-URL: Issues, https://github.com/Saiyam-Sandhir-Jain/kbvc/issues
32
+ Project-URL: Changelog, https://github.com/Saiyam-Sandhir-Jain/kbvc/blob/main/CHANGELOG.md
33
+ Keywords: rag,knowledge-infrastructure,vector-database,embeddings,knowledge-base,version-control,knowledge-graph,llm,reproducibility,semantic-versioning
34
+ Classifier: Development Status :: 4 - Beta
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: License :: OSI Approved :: MIT License
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.9
40
+ Classifier: Programming Language :: Python :: 3.10
41
+ Classifier: Programming Language :: Python :: 3.11
42
+ Classifier: Programming Language :: Python :: 3.12
43
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
44
+ Classifier: Topic :: Software Development :: Version Control
45
+ Requires-Python: >=3.9
46
+ Description-Content-Type: text/markdown
47
+ License-File: LICENSE
48
+ Requires-Dist: click>=8.0
49
+ Requires-Dist: pyyaml>=6.0
50
+ Requires-Dist: rich>=13.0
51
+ Provides-Extra: openai
52
+ Requires-Dist: openai>=1.0; extra == "openai"
53
+ Provides-Extra: gemini
54
+ Requires-Dist: google-genai>=0.1; extra == "gemini"
55
+ Provides-Extra: ollama
56
+ Requires-Dist: ollama>=0.2; extra == "ollama"
57
+ Provides-Extra: hf
58
+ Requires-Dist: sentence-transformers>=2.0; extra == "hf"
59
+ Provides-Extra: pgvector
60
+ Requires-Dist: psycopg2-binary>=2.9; extra == "pgvector"
61
+ Requires-Dist: pgvector>=0.2; extra == "pgvector"
62
+ Provides-Extra: qdrant
63
+ Requires-Dist: qdrant-client>=1.7; extra == "qdrant"
64
+ Provides-Extra: pinecone
65
+ Requires-Dist: pinecone-client>=3.0; extra == "pinecone"
66
+ Provides-Extra: chroma
67
+ Requires-Dist: chromadb>=0.4; extra == "chroma"
68
+ Provides-Extra: lancedb
69
+ Requires-Dist: lancedb>=0.5; extra == "lancedb"
70
+ Requires-Dist: pyarrow>=14.0; extra == "lancedb"
71
+ Provides-Extra: dev
72
+ Requires-Dist: pytest>=7.0; extra == "dev"
73
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
74
+ Provides-Extra: all
75
+ Requires-Dist: openai>=1.0; extra == "all"
76
+ Requires-Dist: qdrant-client>=1.7; extra == "all"
77
+ Requires-Dist: chromadb>=0.4; extra == "all"
78
+ Requires-Dist: lancedb>=0.5; extra == "all"
79
+ Requires-Dist: pyarrow>=14.0; extra == "all"
80
+ Dynamic: license-file
81
+
82
+ <div align="center">
83
+
84
+ <img src="https://img.shields.io/badge/KBVC-Knowledge%20OS-6366f1?style=for-the-badge&logoColor=white" alt="KBVC"/>
85
+
86
+ # KBVC — Knowledge Base Version Control
87
+
88
+ **The Knowledge Operating System for AI/RAG systems.**
89
+ Version control, semantic graph, full audit trails, and reproducible deployment — for your knowledge base.
90
+
91
+ [![PyPI version](https://img.shields.io/pypi/v/kbvc.svg?style=flat-square&color=6366f1)](https://pypi.org/project/kbvc/)
92
+ [![Python](https://img.shields.io/pypi/pyversions/kbvc?style=flat-square)](https://pypi.org/project/kbvc/)
93
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg?style=flat-square)](LICENSE)
94
+ [![CI](https://img.shields.io/github/actions/workflow/status/Saiyam-Sandhir-Jain/kbvc/ci.yml?style=flat-square&label=CI)](https://github.com/Saiyam-Sandhir-Jain/kbvc/actions)
95
+ [![Tests](https://img.shields.io/badge/tests-195%20passed-brightgreen?style=flat-square)](#)
96
+
97
+ </div>
98
+
99
+ ---
100
+
101
+ ## What is KBVC?
102
+
103
+ KBVC is a **Knowledge Operating System** — an infrastructure layer that sits between your documents and your vector database, giving your knowledge base the same discipline that Git gives your code.
104
+
105
+ It is **not** a RAG library. It is **not** a vector database.
106
+ It is the **missing infrastructure layer** that answers questions no existing tool can:
107
+
108
+ | Question | Without KBVC | With KBVC |
109
+ |---|---|---|
110
+ | Which document version produced this embedding? | ❌ Unknown | ✅ `kbvc explain <vector_id>` |
111
+ | What was my knowledge state 3 months ago? | ❌ Gone | ✅ `kbvc checkout <commit>` |
112
+ | How do my documents relate to each other? | ❌ No idea | ✅ `kbvc graph` |
113
+ | Which documents are stale or contradictory? | ❌ Manual audit | ✅ `kbvc stale` / `kbvc contradict list` |
114
+ | Can I reproduce the exact RAG pipeline from last quarter? | ❌ Never | ✅ `kbvc.lock` |
115
+ | If my LLM gives a bad answer, what document caused it? | ❌ Guessing | ✅ `kbvc trace` + `kbvc explain` |
116
+
117
+ ---
118
+
119
+ ## Architecture Overview
120
+
121
+ ```
122
+ Your Documents (Markdown)
123
+
124
+
125
+ ┌─────────────────────────────────────────────────────────────┐
126
+ │ KBVC │
127
+ │ │
128
+ │ ┌─────────────┐ ┌──────────────┐ ┌───────────────────┐ │
129
+ │ │ Commit DAG │ │ Knowledge │ │ Prompt & │ │
130
+ │ │ (SHA-256) │ │ Graph │ │ Retrieval Store │ │
131
+ │ │ │ │ (Relations) │ │ (per commit) │ │
132
+ │ └─────────────┘ └──────────────┘ └───────────────────┘ │
133
+ │ │
134
+ │ ┌──────────────────────────────────────────────────────┐ │
135
+ │ │ VSAL — Vector Storage Abstraction Layer │ │
136
+ │ │ ChunkRecord ──► Qdrant / pgvector / Chroma / │ │
137
+ │ │ Pinecone / LanceDB │ │
138
+ │ └──────────────────────────────────────────────────────┘ │
139
+ │ │
140
+ │ ┌──────────────────────────────────────────────────────┐ │
141
+ │ │ Embed Backends │ │
142
+ │ │ OpenAI · Gemini · Ollama · HuggingFace │ │
143
+ │ └──────────────────────────────────────────────────────┘ │
144
+ └─────────────────────────────────────────────────────────────┘
145
+
146
+
147
+ Your RAG / LLM Application
148
+ ```
149
+
150
+ ---
151
+
152
+ ## Feature Matrix
153
+
154
+ | Phase | Commands | What It Unlocks |
155
+ |---|---|---|
156
+ | **1** | `init` `add` `commit` `log` | Git-style commit loop for knowledge |
157
+ | **2** | `status` `checkout` `diff` | Time travel — restore any past state |
158
+ | **3** | `link` `query` `graph` | Knowledge graph + semantic search |
159
+ | **4** | `branch` `depends` `impact` `annotate` `history` `trace` `doctor` | Audit trails + dependency management |
160
+ | **5** | `ingest` `push` `remote` `analyze` `extract` `stale` `stats` | Multi-source ingestion + intelligence |
161
+ | **6** | `backend` `explain` `promote` | Infrastructure management + provenance |
162
+ | **7** | `gc` `migrate backend` `migrate schema` | Maintenance + zero-downtime ops |
163
+ | **8** | `sync` | Volatility-aware auto-commit |
164
+ | **9** | `contradict list/resolve` | Contradiction detection + resolution |
165
+ | **10** | `ask` | Grounded knowledge Q&A |
166
+
167
+ ---
168
+
169
+ ## Quick Start
170
+
171
+ ### Install
172
+
173
+ ```bash
174
+ # Core (pick your embedding + vector DB extras)
175
+ pip install kbvc[openai,qdrant]
176
+
177
+ # Local-first setup (no external server)
178
+ pip install kbvc[openai,lancedb]
179
+
180
+ # Everything
181
+ pip install kbvc[all]
182
+ ```
183
+
184
+ ### Initialize a Knowledge Repository
185
+
186
+ ```bash
187
+ mkdir my-knowledge && cd my-knowledge
188
+ kbvc init
189
+
190
+ # Configure backends
191
+ kbvc config set embed.backend openai
192
+ kbvc config set embed.key sk-...
193
+ kbvc config set vectordb.backend lancedb # no server needed
194
+ ```
195
+
196
+ ### Create Your First Knowledge Object
197
+
198
+ Every document tracked by KBVC is a **Knowledge Object (KO)** — a Markdown file with a YAML frontmatter header:
199
+
200
+ ```markdown
201
+ ---
202
+ id: caching-strategy
203
+ type: document
204
+ tags: [architecture, performance]
205
+ volatility: slow
206
+ source_type: file
207
+ ---
208
+
209
+ ## Caching Strategy
210
+
211
+ We use a two-tier cache: Redis for hot paths (TTL 60s) and
212
+ PostgreSQL materialized views for aggregate queries.
213
+
214
+ ## Invalidation
215
+
216
+ Cache invalidation is triggered by...
217
+ ```
218
+
219
+ ```bash
220
+ kbvc add knowledge/caching-strategy.md
221
+ kbvc commit -m "Add caching strategy document"
222
+ ```
223
+
224
+ ### Explore
225
+
226
+ ```bash
227
+ kbvc log # View commit history
228
+ kbvc status # See staged vs. committed state
229
+ kbvc query "how does caching work?" # Semantic search
230
+ kbvc ask "what is the cache TTL?" # Grounded Q&A with citations
231
+ kbvc graph # Visualize knowledge graph
232
+ kbvc stats # Analytics report
233
+ ```
234
+
235
+ ---
236
+
237
+ ## Core Concepts
238
+
239
+ ### Knowledge Objects (KOs)
240
+
241
+ A KO is the fundamental unit of KBVC. Each KO is:
242
+ - A Markdown file with YAML frontmatter
243
+ - Chunked into sections (one vector per section)
244
+ - Versioned independently (v1, v2, v3...)
245
+ - Connected to other KOs via typed relations
246
+
247
+ **Volatility levels** control how KBVC handles automatic commits:
248
+
249
+ | Volatility | Meaning | Auto-commit |
250
+ |---|---|---|
251
+ | `frozen` | Never changes (e.g. archived decisions) | Never re-embedded |
252
+ | `slow` | Changes occasionally (most documents) | On explicit `kbvc sync` |
253
+ | `live` | Changes frequently (e.g. meeting notes) | On every `kbvc sync` |
254
+
255
+ ### Commit DAG
256
+
257
+ Every `kbvc commit` creates an immutable `CommitObject` — a SHA-256 hash over:
258
+ - Parent commit ID
259
+ - Branch name
260
+ - Commit message
261
+ - All changed KO IDs + their chunk hashes
262
+ - Snapshot filenames for graph, prompt, retrieval config
263
+
264
+ This gives you a fully reproducible, tamper-evident history — just like Git, but for knowledge.
265
+
266
+ ### Knowledge Graph
267
+
268
+ KOs can be linked with typed relations:
269
+
270
+ ```bash
271
+ kbvc link caching-strategy informed_by system-architecture
272
+ kbvc link new-api contradicts old-api
273
+ kbvc link auth-module extends base-security
274
+ ```
275
+
276
+ **Relation types:** `informed_by` · `contradicts` · `extends` · `derived_from` · `supersedes` · `depends_on` · `related_to`
277
+
278
+ ### VSAL — Vector Storage Abstraction Layer
279
+
280
+ KBVC never touches Qdrant points, pgvector rows, or Pinecone vectors directly. All storage goes through `ChunkRecord` — a universal unit that every backend adapter maps to its native representation.
281
+
282
+ This means you can migrate between vector databases **without re-embedding**:
283
+
284
+ ```bash
285
+ kbvc migrate backend --from qdrant --to pgvector
286
+ ```
287
+
288
+ ---
289
+
290
+ ## Knowledge Object Frontmatter Reference
291
+
292
+ ```yaml
293
+ ---
294
+ id: my-document # required; used as ko_id
295
+ type: project # project | education | patent | document | lesson | ...
296
+ tags: [ai, rag, python] # free tags for filtering
297
+ volatility: slow # frozen | slow | live
298
+ source_type: file # file | web | github | pdf | notion | memory
299
+ valid_from: "2026-01-01" # optional temporal validity window
300
+ valid_to: null # null = still valid
301
+ ---
302
+ ```
303
+
304
+ ---
305
+
306
+ ## All Commands
307
+
308
+ ### Repository Setup
309
+
310
+ ```bash
311
+ kbvc init [--name NAME] [--no-git] # Initialize a KBVC repo
312
+ kbvc clone <url> [directory] # Clone a KBVC repo from Git
313
+ kbvc config set <key> <value> # Set a config value
314
+ kbvc config get <key> # Read a config value
315
+ kbvc config list # List all config
316
+ ```
317
+
318
+ ### Knowledge Authoring
319
+
320
+ ```bash
321
+ kbvc add <path|.> # Stage files for commit
322
+ kbvc add <path> --reason "why" # Stage with an annotation
323
+ kbvc commit -m "message" # Commit staged changes
324
+ kbvc commit -m "message" --dry-run # Preview commit (no write)
325
+ kbvc status # Show staged vs. committed state
326
+ ```
327
+
328
+ ### History & Diff
329
+
330
+ ```bash
331
+ kbvc log # Commit history (newest first)
332
+ kbvc log --oneline # Compact history
333
+ kbvc diff # Diff staged against HEAD
334
+ kbvc diff <commit_id> # Diff against a specific commit
335
+ kbvc history <ko_id> # Version history for one KO
336
+ kbvc checkout <commit_id> # Restore knowledge state
337
+ kbvc checkout <commit_id> --ko <id> # Restore single KO
338
+ ```
339
+
340
+ ### Knowledge Graph
341
+
342
+ ```bash
343
+ kbvc link <from> <type> <to> # Create a relation
344
+ kbvc graph # Print graph summary
345
+ kbvc graph --dot # Export as DOT (Graphviz)
346
+ kbvc depends <ko_id> # Show KO dependencies
347
+ kbvc impact <ko_id> # What depends on this KO?
348
+ ```
349
+
350
+ ### Search & Q&A
351
+
352
+ ```bash
353
+ kbvc query "natural language question" # Vector search
354
+ kbvc query "..." --top-k 10 # Return 10 results
355
+ kbvc ask "natural language question" # Grounded Q&A with citations
356
+ kbvc ask "..." --top-k 5 --show-ids # Show raw vector IDs
357
+ ```
358
+
359
+ ### Intelligence & Analytics
360
+
361
+ ```bash
362
+ kbvc analyze # Suggest relations between KOs
363
+ kbvc extract # Extract entities from KOs
364
+ kbvc stale # Find stale/orphaned KOs
365
+ kbvc contradict list # List contradictions
366
+ kbvc contradict resolve <rel_id> # Resolve a contradiction
367
+ kbvc stats # Analytics report
368
+ ```
369
+
370
+ ### Provenance & Audit
371
+
372
+ ```bash
373
+ kbvc trace <ko_id> # Full version trail for a KO
374
+ kbvc explain <vector_id> # Trace vector → source → commit
375
+ kbvc annotate <ko_id> "reason" # Add change reason before commit
376
+ kbvc doctor # Repo health check
377
+ ```
378
+
379
+ ### Ingestion
380
+
381
+ ```bash
382
+ kbvc ingest website <url> # Scrape a website
383
+ kbvc ingest github <repo_url> # Import a GitHub repo
384
+ kbvc ingest pdf <file.pdf> # Convert PDF to KO
385
+ kbvc ingest notion <page_url> # Import a Notion page
386
+ kbvc ingest text <file.txt> # Import a plain text file
387
+ ```
388
+
389
+ ### Branches & Remotes
390
+
391
+ ```bash
392
+ kbvc branch # List branches
393
+ kbvc branch <name> # Create a branch
394
+ kbvc branch checkout <name> # Switch branch
395
+ kbvc remote add <name> <url> # Add a remote
396
+ kbvc push # Push to configured remote
397
+ ```
398
+
399
+ ### Infrastructure
400
+
401
+ ```bash
402
+ kbvc backend init # Create vector DB schema
403
+ kbvc backend info # Show backend status
404
+ kbvc migrate backend --from X --to Y # Migrate vector store
405
+ kbvc migrate embeddings --from M --to M2 # Swap embedding model
406
+ kbvc migrate schema # Update lock file schema
407
+ kbvc gc # Garbage-collect orphan vectors
408
+ kbvc sync # Volatility-aware auto-commit
409
+ kbvc sync --volatility live # Only sync live KOs
410
+ kbvc sync --dry-run # Preview what would be committed
411
+ kbvc promote "<memory text>" # Promote agent memory to KO
412
+ ```
413
+
414
+ ---
415
+
416
+ ## Backends
417
+
418
+ ### Embedding Backends
419
+
420
+ | Backend | Install Extra | Models |
421
+ |---|---|---|
422
+ | **OpenAI** | `kbvc[openai]` | `text-embedding-3-small` (default), `text-embedding-3-large`, `ada-002` |
423
+ | **Gemini** | `kbvc[gemini]` | `gemini-embedding-001` |
424
+ | **Ollama** | `kbvc[ollama]` | any locally-served model (`nomic-embed-text`, etc.) |
425
+ | **HuggingFace** | `kbvc[hf]` | any `sentence-transformers` model |
426
+
427
+ ```bash
428
+ # OpenAI
429
+ kbvc config set embed.backend openai
430
+ kbvc config set embed.key sk-...
431
+ kbvc config set embed.model text-embedding-3-small
432
+
433
+ # Ollama (local, no API key)
434
+ kbvc config set embed.backend ollama
435
+ kbvc config set embed.model nomic-embed-text
436
+ kbvc config set embed.url http://localhost:11434
437
+ ```
438
+
439
+ ### Vector DB Backends
440
+
441
+ | Backend | Install Extra | Best For |
442
+ |---|---|---|
443
+ | **LanceDB** | `kbvc[lancedb]` | Local dev, no server, embedded |
444
+ | **Qdrant** | `kbvc[qdrant]` | Production, cloud-native |
445
+ | **pgvector** | `kbvc[pgvector]` | PostgreSQL shops |
446
+ | **Chroma** | `kbvc[chroma]` | Local dev, Python-first |
447
+ | **Pinecone** | `kbvc[pinecone]` | Managed serverless |
448
+
449
+ ```bash
450
+ # LanceDB (no server needed — best for local dev)
451
+ kbvc config set vectordb.backend lancedb
452
+ kbvc config set vectordb.url ./kbvc_lance
453
+
454
+ # Qdrant
455
+ kbvc config set vectordb.backend qdrant
456
+ kbvc config set vectordb.url http://localhost:6333
457
+ kbvc config set vectordb.collection kbvc
458
+
459
+ # pgvector
460
+ kbvc config set vectordb.backend pgvector
461
+ kbvc config set vectordb.url "postgresql://user:pass@localhost/mydb"
462
+ ```
463
+
464
+ ---
465
+
466
+ ## The kbvc.lock File
467
+
468
+ Every commit writes a `kbvc.lock` file to your repo root — analogous to `package-lock.json`. It records:
469
+
470
+ ```yaml
471
+ kbvc_version: "0.1.0"
472
+ embedding:
473
+ provider: openai
474
+ model: text-embedding-3-small
475
+ dimensions: 1536
476
+ vector_store:
477
+ provider: qdrant
478
+ collection: kbvc
479
+ committed_at: "2026-01-15T10:30:00Z"
480
+ commit_id: "a3f2c1d9..."
481
+ ```
482
+
483
+ Commit `kbvc.lock` to Git. Anyone cloning your repo with `kbvc clone` will see exactly what backend configuration was used and can reproduce it.
484
+
485
+ ---
486
+
487
+ ## Use Cases
488
+
489
+ ### AI Product Teams
490
+ Track your RAG knowledge base like code. Every prompt change, every document update, every embedding model swap is versioned and auditable. Reproduce any past retrieval configuration exactly.
491
+
492
+ ### Research Labs
493
+ Manage evolving scientific knowledge with contradiction detection and lineage tracking. Know when a newer paper supersedes an older one. Build multi-hop knowledge graphs connecting related findings.
494
+
495
+ ### Enterprise Knowledge Management
496
+ Enforce governance over internal knowledge bases. Who changed what, when, and why. Detect when two documents contradict each other. Get alerts when downstream KOs depend on stale upstream content.
497
+
498
+ ### LLM Application Developers
499
+ Debug why your LLM gave a specific answer — trace it back to the exact document chunk, its version, and the commit that created its embedding. Swap embedding models without rewriting your pipeline.
500
+
501
+ ---
502
+
503
+ ## Development Setup
504
+
505
+ ```bash
506
+ git clone https://github.com/Saiyam-Sandhir-Jain/kbvc.git
507
+ cd kbvc
508
+ pip install -e ".[dev,openai,qdrant]"
509
+ pytest tests/ -v
510
+ ```
511
+
512
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full contributor guide.
513
+
514
+ ---
515
+
516
+ ## License
517
+
518
+ MIT © [Saiyam Jain](https://github.com/Saiyam-Sandhir-Jain)
519
+
520
+ ---
521
+
522
+ <div align="center">
523
+ <sub>Built by Saiyam Jain · VIT Bhopal</sub>
524
+ </div>