knowledgevault 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. knowledgevault-0.3.0/LICENSE +21 -0
  2. knowledgevault-0.3.0/PKG-INFO +448 -0
  3. knowledgevault-0.3.0/README.md +410 -0
  4. knowledgevault-0.3.0/knowledgevault.egg-info/PKG-INFO +448 -0
  5. knowledgevault-0.3.0/knowledgevault.egg-info/SOURCES.txt +48 -0
  6. knowledgevault-0.3.0/knowledgevault.egg-info/dependency_links.txt +1 -0
  7. knowledgevault-0.3.0/knowledgevault.egg-info/entry_points.txt +3 -0
  8. knowledgevault-0.3.0/knowledgevault.egg-info/requires.txt +20 -0
  9. knowledgevault-0.3.0/knowledgevault.egg-info/top_level.txt +1 -0
  10. knowledgevault-0.3.0/kvault/__init__.py +62 -0
  11. knowledgevault-0.3.0/kvault/cli/__init__.py +0 -0
  12. knowledgevault-0.3.0/kvault/cli/check.py +250 -0
  13. knowledgevault-0.3.0/kvault/cli/main.py +724 -0
  14. knowledgevault-0.3.0/kvault/core/__init__.py +20 -0
  15. knowledgevault-0.3.0/kvault/core/frontmatter.py +94 -0
  16. knowledgevault-0.3.0/kvault/core/index.py +462 -0
  17. knowledgevault-0.3.0/kvault/core/observability.py +479 -0
  18. knowledgevault-0.3.0/kvault/core/research.py +255 -0
  19. knowledgevault-0.3.0/kvault/core/storage.py +322 -0
  20. knowledgevault-0.3.0/kvault/matching/__init__.py +29 -0
  21. knowledgevault-0.3.0/kvault/matching/alias.py +153 -0
  22. knowledgevault-0.3.0/kvault/matching/base.py +160 -0
  23. knowledgevault-0.3.0/kvault/matching/domain.py +118 -0
  24. knowledgevault-0.3.0/kvault/matching/fuzzy.py +127 -0
  25. knowledgevault-0.3.0/kvault/mcp/__init__.py +5 -0
  26. knowledgevault-0.3.0/kvault/mcp/server.py +1538 -0
  27. knowledgevault-0.3.0/kvault/mcp/state.py +239 -0
  28. knowledgevault-0.3.0/kvault/mcp/validation.py +293 -0
  29. knowledgevault-0.3.0/kvault/orchestrator/__init__.py +27 -0
  30. knowledgevault-0.3.0/kvault/orchestrator/context.py +337 -0
  31. knowledgevault-0.3.0/kvault/orchestrator/enforcer.py +509 -0
  32. knowledgevault-0.3.0/kvault/orchestrator/runner.py +1443 -0
  33. knowledgevault-0.3.0/kvault/orchestrator/state_machine.py +453 -0
  34. knowledgevault-0.3.0/kvault/templates/CLAUDE.md +311 -0
  35. knowledgevault-0.3.0/kvault/templates/__init__.py +0 -0
  36. knowledgevault-0.3.0/kvault/templates/category_summary.md +10 -0
  37. knowledgevault-0.3.0/kvault/templates/journal_entry.md +8 -0
  38. knowledgevault-0.3.0/kvault/templates/root_summary.md +40 -0
  39. knowledgevault-0.3.0/pyproject.toml +116 -0
  40. knowledgevault-0.3.0/setup.cfg +4 -0
  41. knowledgevault-0.3.0/tests/test_check.py +202 -0
  42. knowledgevault-0.3.0/tests/test_e2e_cli.py +192 -0
  43. knowledgevault-0.3.0/tests/test_frontmatter.py +158 -0
  44. knowledgevault-0.3.0/tests/test_index.py +190 -0
  45. knowledgevault-0.3.0/tests/test_init.py +137 -0
  46. knowledgevault-0.3.0/tests/test_matching.py +365 -0
  47. knowledgevault-0.3.0/tests/test_observability.py +224 -0
  48. knowledgevault-0.3.0/tests/test_orchestrator.py +872 -0
  49. knowledgevault-0.3.0/tests/test_research.py +198 -0
  50. knowledgevault-0.3.0/tests/test_storage.py +239 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Eddie Landesberg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,448 @@
1
+ Metadata-Version: 2.4
2
+ Name: knowledgevault
3
+ Version: 0.3.0
4
+ Summary: Config-driven knowledge graph framework for extracting structured knowledge from unstructured data
5
+ Author: Eddie Landesberg
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/cimo-labs/kvault
8
+ Project-URL: Documentation, https://github.com/cimo-labs/kvault#readme
9
+ Project-URL: Repository, https://github.com/cimo-labs/kvault
10
+ Keywords: knowledge-graph,entity-extraction,llm,data-processing,claude-code,mcp,personal-knowledge-base
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: pyyaml>=6.0
23
+ Requires-Dist: click>=8.0
24
+ Requires-Dist: pydantic>=2.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.0; extra == "dev"
27
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
28
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
29
+ Requires-Dist: black>=23.0; extra == "dev"
30
+ Requires-Dist: mypy>=1.0; extra == "dev"
31
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
32
+ Requires-Dist: pre-commit>=3.0; extra == "dev"
33
+ Provides-Extra: sdk
34
+ Requires-Dist: claude-code-sdk>=0.1.0; extra == "sdk"
35
+ Provides-Extra: mcp
36
+ Requires-Dist: mcp>=1.0.0; python_version >= "3.10" and extra == "mcp"
37
+ Dynamic: license-file
38
+
39
+ # kvault
40
+
41
+ Agent-first knowledge graph framework. Build knowledge graphs from unstructured data using intelligent agents.
42
+
43
+ ## Philosophy
44
+
45
+ **The agent IS the pipeline.** Claude (or another LLM) does extraction, research, decisions, and propagation. kvault provides tools, not workflows.
46
+
47
+ ```
48
+ ┌─────────────────────────────────────────────────────────────┐
49
+ │ EntityIndex MatchStrategies ObservabilityLogger │
50
+ │ (fast lookup) (fuzzy, alias) (debug & improve) │
51
+ │ │
52
+ │ SimpleStorage (YAML frontmatter in _summary.md preferred) │
53
+ └─────────────────────────────────────────────────────────────┘
54
+
55
+ Agent (Claude) does:
56
+ - Read input
57
+ - Research (using EntityIndex + MatchStrategies)
58
+ - Decide (using its reasoning)
59
+ - Write (using SimpleStorage)
60
+ - Propagate (update parent summaries)
61
+ - Log (using ObservabilityLogger)
62
+ ```
63
+
64
+ ## Getting Started with Claude Code
65
+
66
+ The fastest way to get a personal knowledge base running with Claude Code:
67
+
68
+ ```bash
69
+ # 1. Install kvault with MCP support
70
+ pip install kvault[mcp]
71
+
72
+ # 2. Initialize a new knowledge base
73
+ kvault init my_kb --name "Your Name"
74
+
75
+ # 3. Verify it's clean
76
+ kvault check --kb-root my_kb
77
+ ```
78
+
79
+ Then add the MCP server to `.claude/settings.json`:
80
+
81
+ ```json
82
+ {
83
+ "mcpServers": {
84
+ "kvault": {
85
+ "command": "kvault-mcp",
86
+ "env": {}
87
+ }
88
+ }
89
+ }
90
+ ```
91
+
92
+ And add the integrity hook (catches stale summaries before each prompt):
93
+
94
+ ```json
95
+ {
96
+ "hooks": {
97
+ "UserPromptSubmit": [
98
+ {
99
+ "type": "command",
100
+ "command": "kvault check --kb-root /absolute/path/to/my_kb"
101
+ }
102
+ ]
103
+ }
104
+ }
105
+ ```
106
+
107
+ Customize the generated `CLAUDE.md` with your personal details, then start adding entities.
108
+
109
+ ## Installation
110
+
111
+ ```bash
112
+ pip install kvault
113
+ ```
114
+
115
+ Or install from source:
116
+
117
+ ```bash
118
+ git clone https://github.com/cimo-labs/kvault
119
+ cd kvault
120
+ pip install -e .
121
+ ```
122
+
123
+ ## Quick Start
124
+
125
+ ```python
126
+ from pathlib import Path
127
+ from kvault import (
128
+ EntityIndex,
129
+ SimpleStorage,
130
+ ObservabilityLogger,
131
+ EntityResearcher
132
+ )
133
+
134
+ # Initialize
135
+ kg_root = Path("my_knowledge_base")
136
+ index = EntityIndex(kg_root / ".kvault" / "index.db")
137
+ storage = SimpleStorage(kg_root)
138
+ logger = ObservabilityLogger(kg_root / ".kvault" / "logs.db")
139
+ researcher = EntityResearcher(index)
140
+
141
+ # 1. Research - find existing entities
142
+ matches = researcher.research("Alice Smith", email="alice@anthropic.com")
143
+ action, target, confidence = researcher.suggest_action("Alice Smith")
144
+ logger.log_research("Alice Smith", "alice smith",
145
+ [m.__dict__ for m in matches], action)
146
+
147
+ # 2. Decide - agent determines what to do
148
+ if action == "create":
149
+ entity_path = "people/collaborators/alice_smith"
150
+ logger.log_decide("Alice Smith", "create",
151
+ "No existing match found", confidence)
152
+
153
+ # 3. Write - create/update the entity
154
+ storage.create_entity(entity_path, {
155
+ "created": "2026-01-05",
156
+ "updated": "2026-01-05",
157
+ "source": "email:123",
158
+ "aliases": ["Alice", "alice@anthropic.com"]
159
+ }, summary="# Alice Smith\n\nResearch scientist at Anthropic.")
160
+ logger.log_write(entity_path, "create", "Created new entity")
161
+
162
+ # 4. Update index
163
+ index.add(entity_path, "Alice Smith",
164
+ ["Alice", "alice@anthropic.com"], "people")
165
+
166
+ # 5. Propagate - update parent summaries
167
+ ancestors = storage.get_ancestors(entity_path)
168
+ logger.log_propagate(entity_path, ancestors)
169
+ ```
170
+
171
+ ## Core Components
172
+
173
+ ### EntityIndex
174
+
175
+ SQLite-backed entity index with full-text search for fast lookups.
176
+
177
+ ```python
178
+ from kvault import EntityIndex
179
+
180
+ index = EntityIndex(Path("index.db"))
181
+
182
+ # Add entity
183
+ index.add("people/alice", "Alice Smith",
184
+ aliases=["Alice", "alice@example.com"],
185
+ category="people")
186
+
187
+ # Search
188
+ results = index.search("Alice")
189
+
190
+ # Find by alias
191
+ entry = index.find_by_alias("alice@example.com")
192
+
193
+ # Find by email domain
194
+ entries = index.find_by_email_domain("example.com")
195
+
196
+ # Rebuild from filesystem
197
+ count = index.rebuild(Path("knowledge_graph"))
198
+ ```
199
+
200
+ ### SimpleStorage
201
+
202
+ Filesystem storage with minimal 4-field schema.
203
+
204
+ ```python
205
+ from kvault import SimpleStorage
206
+
207
+ storage = SimpleStorage(Path("knowledge_graph"))
208
+
209
+ # Create entity
210
+ storage.create_entity("people/alice", {
211
+ "created": "2026-01-05",
212
+ "updated": "2026-01-05",
213
+ "source": "manual",
214
+ "aliases": ["Alice"]
215
+ }, summary="# Alice\n\nDescription here.")
216
+
217
+ # Update entity
218
+ storage.update_entity("people/alice",
219
+ meta={"source": "email:123"},
220
+ summary="# Alice\n\nUpdated description.")
221
+
222
+ # Read
223
+ meta = storage.read_meta("people/alice")
224
+ summary = storage.read_summary("people/alice")
225
+
226
+ # Navigate hierarchy
227
+ ancestors = storage.get_ancestors("people/collaborators/alice")
228
+ # Returns: ["people/collaborators", "people"]
229
+ ```
230
+
231
+ ### ObservabilityLogger
232
+
233
+ Phase-based logging for debugging and system improvement.
234
+
235
+ ```python
236
+ from kvault import ObservabilityLogger
237
+
238
+ logger = ObservabilityLogger(Path("logs.db"))
239
+
240
+ # Log phases
241
+ logger.log_input([{"name": "Alice"}], source="email")
242
+ logger.log_research("Alice", "alice", matches, "create")
243
+ logger.log_decide("Alice", "create", "No match found", confidence=0.95)
244
+ logger.log_write("people/alice", "create", "Created entity")
245
+ logger.log_propagate("people/alice", ["people"])
246
+ logger.log_error("validation_failed", entity="Alice",
247
+ details={"field": "email"})
248
+
249
+ # Query logs
250
+ errors = logger.get_errors()
251
+ decisions = logger.get_decisions(action="create")
252
+ low_conf = logger.get_low_confidence(threshold=0.7)
253
+ summary = logger.get_session_summary()
254
+ ```
255
+
256
+ ### EntityResearcher
257
+
258
+ Research existing entities before creating new ones.
259
+
260
+ ```python
261
+ from kvault import EntityResearcher, EntityIndex
262
+
263
+ index = EntityIndex(Path("index.db"))
264
+ researcher = EntityResearcher(index)
265
+
266
+ # Find matches
267
+ matches = researcher.research("Alice Smith", email="alice@example.com")
268
+
269
+ # Get suggestion
270
+ action, path, confidence = researcher.suggest_action("Alice Smith")
271
+ # Returns: ("create", None, 0.95) or ("update", "people/alice", 0.90)
272
+
273
+ # Quick checks
274
+ exists = researcher.exists("Alice Smith", threshold=0.9)
275
+ best = researcher.best_match("Alice Smith")
276
+ ```
277
+
278
+ ### Matching Strategies
279
+
280
+ Pluggable strategies for entity deduplication.
281
+
282
+ ```python
283
+ from kvault import (
284
+ AliasMatchStrategy,
285
+ FuzzyNameMatchStrategy,
286
+ EmailDomainMatchStrategy
287
+ )
288
+
289
+ # Alias matching - exact match (score: 1.0)
290
+ alias_strategy = AliasMatchStrategy()
291
+
292
+ # Fuzzy name matching (score: 0.85-0.99)
293
+ fuzzy_strategy = FuzzyNameMatchStrategy(threshold=0.85)
294
+
295
+ # Email domain matching (score: 0.85-0.95)
296
+ domain_strategy = EmailDomainMatchStrategy()
297
+ ```
298
+
299
+ ## Storage Format
300
+
301
+ ### YAML Frontmatter (Preferred)
302
+
303
+ Entities are stored as a single `_summary.md` file with YAML frontmatter:
304
+
305
+ ```markdown
306
+ ---
307
+ created: 2026-01-05
308
+ updated: 2026-01-05
309
+ source: email:123
310
+ aliases: [Alice, alice@anthropic.com, +14155551234]
311
+ phone: +14155551234
312
+ email: alice@anthropic.com
313
+ relationship_type: colleague
314
+ context: Met at NeurIPS 2024
315
+ ---
316
+
317
+ # Alice Smith
318
+
319
+ Research scientist at Anthropic working on causal discovery.
320
+
321
+ ## Background
322
+ Collaborator on interpretability project.
323
+
324
+ ## Interactions
325
+ - 2026-01-05: Initial contact logged
326
+
327
+ ## Notes
328
+ - Interested in causal representation learning
329
+ ```
330
+
331
+ **Required fields:** `created`, `updated`, `source`, `aliases`
332
+ **Optional fields:** `phone`, `email`, `relationship_type`, `context`, `related_to`, `last_interaction`, `status`
333
+
334
+ ### Legacy Format (_meta.json)
335
+
336
+ Separate `_meta.json` files are still supported for backward compatibility:
337
+
338
+ ```json
339
+ {
340
+ "created": "2026-01-05",
341
+ "last_updated": "2026-01-05",
342
+ "sources": ["email:123"],
343
+ "aliases": ["Alice", "alice@anthropic.com"]
344
+ }
345
+ ```
346
+
347
+ **Note:** New entities should use YAML frontmatter. The index rebuilder supports both formats.
348
+
349
+ ## Development
350
+
351
+ ```bash
352
+ # Install dev dependencies
353
+ pip install -e ".[dev]"
354
+
355
+ # Run tests
356
+ pytest
357
+
358
+ # Format code
359
+ black kvault/
360
+
361
+ # Type check
362
+ mypy kvault/
363
+ ```
364
+
365
+ ## MCP Server (Claude Code Integration)
366
+
367
+ The kvault MCP server provides direct tool access for Claude Code, enabling the 6-step workflow without subprocess parsing.
368
+
369
+ ### Installation
370
+
371
+ ```bash
372
+ pip install kvault[mcp] # Install with MCP support
373
+ ```
374
+
375
+ ### Configuration
376
+
377
+ Add to `.claude/settings.json`:
378
+
379
+ ```json
380
+ {
381
+ "mcpServers": {
382
+ "kvault": {
383
+ "command": "kvault-mcp",
384
+ "env": {}
385
+ }
386
+ }
387
+ }
388
+ ```
389
+
390
+ ### Available Tools
391
+
392
+ | Category | Tools |
393
+ |----------|-------|
394
+ | **Init** | `kvault_init`, `kvault_status` |
395
+ | **Index** | `kvault_search`, `kvault_find_by_alias`, `kvault_find_by_email_domain`, `kvault_rebuild_index` |
396
+ | **Entity** | `kvault_read_entity`, `kvault_write_entity`, `kvault_list_entities`, `kvault_delete_entity`, `kvault_move_entity` |
397
+ | **Summary** | `kvault_read_summary`, `kvault_write_summary`, `kvault_get_parent_summaries` |
398
+ | **Research** | `kvault_research` |
399
+ | **Workflow** | `kvault_log_phase`, `kvault_write_journal`, `kvault_validate_transition` |
400
+
401
+ ### Example Workflow
402
+
403
+ ```
404
+ 1. kvault_init(kg_root="/path/to/kb")
405
+ 2. kvault_research(name="John Doe", phone="+14155551234")
406
+ 3. kvault_write_entity(path="people/contacts/john_doe", meta={...}, content="...", create=true)
407
+ 4. kvault_get_parent_summaries(path="people/contacts/john_doe")
408
+ 5. kvault_write_summary(path="people/contacts", content="...")
409
+ 6. kvault_write_journal(actions=[...], source="manual")
410
+ 7. kvault_rebuild_index()
411
+ ```
412
+
413
+ ### Benefits
414
+
415
+ - **Structured JSON responses** - No regex parsing of CLI output
416
+ - **Direct control** - Each tool call is explicit and debuggable
417
+ - **Session state** - Track workflow progress across calls
418
+ - **No timeouts** - Individual tools complete quickly
419
+
420
+ ---
421
+
422
+ ## CLI Usage
423
+
424
+ ```bash
425
+ pip install -e ".[dev]"
426
+
427
+ # Initialize a new KB
428
+ kvault init my_kb --name "Alice"
429
+
430
+ # Check KB integrity (propagation, journal, index, frontmatter, branching)
431
+ kvault check --kb-root my_kb
432
+ kvault check # Auto-detects KB root from cwd
433
+
434
+ # Process a corpus
435
+ kvault process --corpus /path/to/corpus --kg-root /path/to/kg --dry-run
436
+ kvault process --corpus /path/to/corpus --kg-root /path/to/kg --apply
437
+
438
+ # Rebuild and search the index
439
+ kvault index rebuild --kg-root /path/to/kg
440
+ kvault index search --db /path/to/kg/.kvault/index.db --query "Acme"
441
+
442
+ # Session summary (observability)
443
+ kvault log summary --db /path/to/kg/.kvault/logs.db
444
+ ```
445
+
446
+ ## License
447
+
448
+ MIT