kgmd 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. kgmd-0.1.0/.github/workflows/ci.yml +33 -0
  2. kgmd-0.1.0/.github/workflows/publish.yml +30 -0
  3. kgmd-0.1.0/.gitignore +11 -0
  4. kgmd-0.1.0/LICENSE +21 -0
  5. kgmd-0.1.0/Makefile +21 -0
  6. kgmd-0.1.0/PKG-INFO +202 -0
  7. kgmd-0.1.0/README.md +165 -0
  8. kgmd-0.1.0/kgmd/__init__.py +3 -0
  9. kgmd-0.1.0/kgmd/cli.py +711 -0
  10. kgmd-0.1.0/kgmd/config.py +101 -0
  11. kgmd-0.1.0/kgmd/db.py +97 -0
  12. kgmd-0.1.0/kgmd/embed.py +153 -0
  13. kgmd-0.1.0/kgmd/export.py +193 -0
  14. kgmd-0.1.0/kgmd/extract.py +309 -0
  15. kgmd-0.1.0/kgmd/induce.py +192 -0
  16. kgmd-0.1.0/kgmd/ingest.py +230 -0
  17. kgmd-0.1.0/kgmd/llm.py +153 -0
  18. kgmd-0.1.0/kgmd/mcp_server.py +136 -0
  19. kgmd-0.1.0/kgmd/prompts/extract.txt +51 -0
  20. kgmd-0.1.0/kgmd/prompts/induce.txt +33 -0
  21. kgmd-0.1.0/kgmd/prompts/resolve.txt +28 -0
  22. kgmd-0.1.0/kgmd/query.py +337 -0
  23. kgmd-0.1.0/kgmd/resolve.py +304 -0
  24. kgmd-0.1.0/kgmd/schema.py +198 -0
  25. kgmd-0.1.0/pyproject.toml +64 -0
  26. kgmd-0.1.0/tests/__init__.py +0 -0
  27. kgmd-0.1.0/tests/conftest.py +155 -0
  28. kgmd-0.1.0/tests/fixtures/acme_corp.md +9 -0
  29. kgmd-0.1.0/tests/fixtures/brian_anderson.md +7 -0
  30. kgmd-0.1.0/tests/fixtures/digital_transformation.md +9 -0
  31. kgmd-0.1.0/tests/fixtures/partnerships.md +10 -0
  32. kgmd-0.1.0/tests/fixtures/quarterly_review.md +12 -0
  33. kgmd-0.1.0/tests/fixtures/sarah_chen.md +7 -0
  34. kgmd-0.1.0/tests/fixtures/tech_stack.md +15 -0
  35. kgmd-0.1.0/tests/test_chunk.py +96 -0
  36. kgmd-0.1.0/tests/test_db.py +112 -0
  37. kgmd-0.1.0/tests/test_export.py +76 -0
  38. kgmd-0.1.0/tests/test_extract.py +113 -0
  39. kgmd-0.1.0/tests/test_induce.py +88 -0
  40. kgmd-0.1.0/tests/test_mcp.py +73 -0
  41. kgmd-0.1.0/tests/test_query.py +129 -0
  42. kgmd-0.1.0/tests/test_resolve.py +106 -0
@@ -0,0 +1,33 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -e ".[dev]"
28
+
29
+ - name: Lint
30
+ run: ruff check .
31
+
32
+ - name: Test
33
+ run: pytest -v
@@ -0,0 +1,30 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: pypi
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - name: Install build tools
24
+ run: pip install build
25
+
26
+ - name: Build package
27
+ run: python -m build
28
+
29
+ - name: Publish to PyPI
30
+ uses: pypa/gh-action-pypi-publish@release/v1
kgmd-0.1.0/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ .env
8
+ *.db
9
+ *.db-shm
10
+ *.db-wal
11
+ .kgmd/
kgmd-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 2Lines Software
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
kgmd-0.1.0/Makefile ADDED
@@ -0,0 +1,21 @@
1
+ .PHONY: install test lint format build clean
2
+
3
+ install:
4
+ pip install -e ".[dev]"
5
+
6
+ test:
7
+ python -m pytest tests/ -v
8
+
9
+ lint:
10
+ ruff check kgmd/ tests/
11
+
12
+ format:
13
+ ruff format kgmd/ tests/
14
+ ruff check --fix kgmd/ tests/
15
+
16
+ build:
17
+ python -m build
18
+
19
+ clean:
20
+ rm -rf dist/ build/ *.egg-info
21
+ find . -type d -name __pycache__ -exec rm -rf {} +
kgmd-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: kgmd
3
+ Version: 0.1.0
4
+ Summary: A CLI that builds a knowledge graph from markdown files and exposes it via MCP
5
+ Project-URL: Homepage, https://github.com/johncarpenter/kgmd
6
+ Project-URL: Repository, https://github.com/johncarpenter/kgmd
7
+ Project-URL: Issues, https://github.com/johncarpenter/kgmd/issues
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: entity-extraction,knowledge-graph,llm,markdown,mcp,rag,sqlite
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: click>=8.1
23
+ Requires-Dist: fastembed>=0.4
24
+ Requires-Dist: litellm>=1.50
25
+ Requires-Dist: mcp>=1.0
26
+ Requires-Dist: networkx>=3.2
27
+ Requires-Dist: platformdirs>=4.0
28
+ Requires-Dist: pydantic>=2.6
29
+ Requires-Dist: pyyaml>=6.0
30
+ Requires-Dist: rich>=13.7
31
+ Requires-Dist: sqlite-vec>=0.1.6
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest-mock>=3.10; extra == 'dev'
34
+ Requires-Dist: pytest>=7.0; extra == 'dev'
35
+ Requires-Dist: ruff>=0.4; extra == 'dev'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # kgmd
39
+
40
+ A CLI that builds a knowledge graph from a directory of markdown files and exposes it via MCP.
41
+
42
+ - Extracts entities and relations using any LLM (via [litellm](https://github.com/BerriAI/litellm))
43
+ - Resolves duplicate entities using local embeddings + LLM verification
44
+ - Induces a typed schema from the extracted data
45
+ - Stores everything in a single SQLite file (powered by [sqlite-vec](https://github.com/asg017/sqlite-vec))
46
+ - Exposes the graph via CLI queries and an [MCP](https://modelcontextprotocol.io/) server
47
+
48
+ ## Install
49
+
50
+ ```bash
51
+ pip install kgmd
52
+ ```
53
+
54
+ Or with [uv](https://github.com/astral-sh/uv):
55
+
56
+ ```bash
57
+ uv tool install kgmd
58
+ ```
59
+
60
+ ### Requirements
61
+
62
+ - Python 3.10+
63
+ - An API key for any LLM provider supported by litellm (OpenRouter, OpenAI, Anthropic, etc.)
64
+ - Embeddings run locally by default via [fastembed](https://github.com/qdrant/fastembed) (no API key needed)
65
+
66
+ ## Quickstart
67
+
68
+ ```bash
69
+ # Initialize a corpus
70
+ cd my-notes/
71
+ kgmd init
72
+
73
+ # Set your LLM API key
74
+ export OPENROUTER_API_KEY="sk-..."
75
+
76
+ # Build the knowledge graph (extract -> resolve -> induce)
77
+ kgmd build
78
+
79
+ # Query
80
+ kgmd entities
81
+ kgmd relations
82
+ kgmd find "machine learning"
83
+ kgmd entity "Brian Anderson"
84
+ kgmd neighbors "Brian Anderson" --depth 2
85
+ kgmd path "Brian Anderson" "Acme Corp"
86
+
87
+ # Export
88
+ kgmd export --format graphml --output graph.graphml
89
+
90
+ # View induced schema
91
+ kgmd schema
92
+
93
+ # Corpus statistics
94
+ kgmd stats
95
+ ```
96
+
97
+ ## How it works
98
+
99
+ `kgmd build` runs three stages:
100
+
101
+ 1. **Extract** -- Each markdown file is chunked and sent to an LLM, which returns structured JSON with entities (people, organizations, projects, etc.) and relations between them.
102
+ 2. **Resolve** -- Entity mentions are embedded locally, clustered by cosine similarity, and duplicate clusters are verified by the LLM before merging.
103
+ 3. **Induce** -- Aggregate statistics about entity types and relation predicates are sent to the LLM, which produces a typed YAML schema with hierarchies.
104
+
105
+ All state lives in `.kgmd/graph.db`, a single SQLite file. Re-running `kgmd build` is incremental -- unchanged files are skipped.
106
+
107
+ ## MCP Server
108
+
109
+ `kgmd mcp` launches an MCP server over stdio, exposing 7 tools:
110
+
111
+ | Tool | Description |
112
+ |---|---|
113
+ | `search` | Semantic search over chunks |
114
+ | `get_entity` | Full entity record with mentions and relations |
115
+ | `list_entities` | List entities, optionally filtered by type |
116
+ | `get_neighbors` | Subgraph traversal around an entity |
117
+ | `find_path` | Shortest path between two entities |
118
+ | `list_relations` | List relations with optional filters |
119
+ | `get_schema` | The current induced schema |
120
+
121
+ ### Claude Desktop setup
122
+
123
+ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
124
+
125
+ ```json
126
+ {
127
+ "mcpServers": {
128
+ "kgmd": {
129
+ "command": "kgmd",
130
+ "args": ["mcp"],
131
+ "cwd": "/path/to/your/corpus"
132
+ }
133
+ }
134
+ }
135
+ ```
136
+
137
+ ## Configuration
138
+
139
+ Per-corpus config lives in `.kgmd/config.yaml`. Global defaults in `~/.config/kgmd/config.yaml` (or the platform equivalent). Corpus config overrides global.
140
+
141
+ ```yaml
142
+ embedding:
143
+ backend: fastembed # or "litellm" for API embeddings
144
+ model: BAAI/bge-small-en-v1.5
145
+
146
+ llm:
147
+ model: openrouter/anthropic/claude-sonnet-4-5
148
+ temperature: 0.0
149
+ max_tokens: 4096
150
+ timeout_seconds: 120
151
+
152
+ chunking:
153
+ max_chars: 4000
154
+ overlap_chars: 200
155
+ split_on: paragraph # or "heading", "fixed"
156
+
157
+ extraction:
158
+ max_entities_per_chunk: 30
159
+ max_relations_per_chunk: 30
160
+ retry_on_parse_failure: 2
161
+
162
+ resolution:
163
+ similarity_threshold: 0.85
164
+ llm_verify_clusters: true
165
+ max_cluster_size: 10
166
+
167
+ induction:
168
+ include_attribute_summary: true
169
+ hierarchy_depth: 3
170
+ ```
171
+
172
+ ## Export formats
173
+
174
+ ```bash
175
+ kgmd export --format jsonld # JSON-LD with schema.org context
176
+ kgmd export --format cypher # Cypher CREATE statements (Neo4j)
177
+ kgmd export --format graphml # GraphML (Gephi, yEd)
178
+ ```
179
+
180
+ ## Development
181
+
182
+ ```bash
183
+ git clone https://github.com/2lines/kgmd.git
184
+ cd kgmd
185
+ pip install -e .
186
+ make test # run tests
187
+ make lint # ruff check
188
+ make format # ruff format
189
+ ```
190
+
191
+ **Note:** Your Python must be built with SQLite extension loading enabled. If using pyenv:
192
+
193
+ ```bash
194
+ LDFLAGS="-L$(brew --prefix sqlite)/lib" \
195
+ CPPFLAGS="-I$(brew --prefix sqlite)/include -DSQLITE_ENABLE_LOAD_EXTENSION" \
196
+ PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \
197
+ pyenv install 3.12
198
+ ```
199
+
200
+ ## License
201
+
202
+ [MIT](LICENSE)
kgmd-0.1.0/README.md ADDED
@@ -0,0 +1,165 @@
1
+ # kgmd
2
+
3
+ A CLI that builds a knowledge graph from a directory of markdown files and exposes it via MCP.
4
+
5
+ - Extracts entities and relations using any LLM (via [litellm](https://github.com/BerriAI/litellm))
6
+ - Resolves duplicate entities using local embeddings + LLM verification
7
+ - Induces a typed schema from the extracted data
8
+ - Stores everything in a single SQLite file (powered by [sqlite-vec](https://github.com/asg017/sqlite-vec))
9
+ - Exposes the graph via CLI queries and an [MCP](https://modelcontextprotocol.io/) server
10
+
11
+ ## Install
12
+
13
+ ```bash
14
+ pip install kgmd
15
+ ```
16
+
17
+ Or with [uv](https://github.com/astral-sh/uv):
18
+
19
+ ```bash
20
+ uv tool install kgmd
21
+ ```
22
+
23
+ ### Requirements
24
+
25
+ - Python 3.10+
26
+ - An API key for any LLM provider supported by litellm (OpenRouter, OpenAI, Anthropic, etc.)
27
+ - Embeddings run locally by default via [fastembed](https://github.com/qdrant/fastembed) (no API key needed)
28
+
29
+ ## Quickstart
30
+
31
+ ```bash
32
+ # Initialize a corpus
33
+ cd my-notes/
34
+ kgmd init
35
+
36
+ # Set your LLM API key
37
+ export OPENROUTER_API_KEY="sk-..."
38
+
39
+ # Build the knowledge graph (extract -> resolve -> induce)
40
+ kgmd build
41
+
42
+ # Query
43
+ kgmd entities
44
+ kgmd relations
45
+ kgmd find "machine learning"
46
+ kgmd entity "Brian Anderson"
47
+ kgmd neighbors "Brian Anderson" --depth 2
48
+ kgmd path "Brian Anderson" "Acme Corp"
49
+
50
+ # Export
51
+ kgmd export --format graphml --output graph.graphml
52
+
53
+ # View induced schema
54
+ kgmd schema
55
+
56
+ # Corpus statistics
57
+ kgmd stats
58
+ ```
59
+
60
+ ## How it works
61
+
62
+ `kgmd build` runs three stages:
63
+
64
+ 1. **Extract** -- Each markdown file is chunked and sent to an LLM, which returns structured JSON with entities (people, organizations, projects, etc.) and relations between them.
65
+ 2. **Resolve** -- Entity mentions are embedded locally, clustered by cosine similarity, and duplicate clusters are verified by the LLM before merging.
66
+ 3. **Induce** -- Aggregate statistics about entity types and relation predicates are sent to the LLM, which produces a typed YAML schema with hierarchies.
67
+
68
+ All state lives in `.kgmd/graph.db`, a single SQLite file. Re-running `kgmd build` is incremental -- unchanged files are skipped.
69
+
70
+ ## MCP Server
71
+
72
+ `kgmd mcp` launches an MCP server over stdio, exposing 7 tools:
73
+
74
+ | Tool | Description |
75
+ |---|---|
76
+ | `search` | Semantic search over chunks |
77
+ | `get_entity` | Full entity record with mentions and relations |
78
+ | `list_entities` | List entities, optionally filtered by type |
79
+ | `get_neighbors` | Subgraph traversal around an entity |
80
+ | `find_path` | Shortest path between two entities |
81
+ | `list_relations` | List relations with optional filters |
82
+ | `get_schema` | The current induced schema |
83
+
84
+ ### Claude Desktop setup
85
+
86
+ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
87
+
88
+ ```json
89
+ {
90
+ "mcpServers": {
91
+ "kgmd": {
92
+ "command": "kgmd",
93
+ "args": ["mcp"],
94
+ "cwd": "/path/to/your/corpus"
95
+ }
96
+ }
97
+ }
98
+ ```
99
+
100
+ ## Configuration
101
+
102
+ Per-corpus config lives in `.kgmd/config.yaml`. Global defaults in `~/.config/kgmd/config.yaml` (or the platform equivalent). Corpus config overrides global.
103
+
104
+ ```yaml
105
+ embedding:
106
+ backend: fastembed # or "litellm" for API embeddings
107
+ model: BAAI/bge-small-en-v1.5
108
+
109
+ llm:
110
+ model: openrouter/anthropic/claude-sonnet-4-5
111
+ temperature: 0.0
112
+ max_tokens: 4096
113
+ timeout_seconds: 120
114
+
115
+ chunking:
116
+ max_chars: 4000
117
+ overlap_chars: 200
118
+ split_on: paragraph # or "heading", "fixed"
119
+
120
+ extraction:
121
+ max_entities_per_chunk: 30
122
+ max_relations_per_chunk: 30
123
+ retry_on_parse_failure: 2
124
+
125
+ resolution:
126
+ similarity_threshold: 0.85
127
+ llm_verify_clusters: true
128
+ max_cluster_size: 10
129
+
130
+ induction:
131
+ include_attribute_summary: true
132
+ hierarchy_depth: 3
133
+ ```
134
+
135
+ ## Export formats
136
+
137
+ ```bash
138
+ kgmd export --format jsonld # JSON-LD with schema.org context
139
+ kgmd export --format cypher # Cypher CREATE statements (Neo4j)
140
+ kgmd export --format graphml # GraphML (Gephi, yEd)
141
+ ```
142
+
143
+ ## Development
144
+
145
+ ```bash
146
+ git clone https://github.com/2lines/kgmd.git
147
+ cd kgmd
148
+ pip install -e .
149
+ make test # run tests
150
+ make lint # ruff check
151
+ make format # ruff format
152
+ ```
153
+
154
+ **Note:** Your Python must be built with SQLite extension loading enabled. If using pyenv:
155
+
156
+ ```bash
157
+ LDFLAGS="-L$(brew --prefix sqlite)/lib" \
158
+ CPPFLAGS="-I$(brew --prefix sqlite)/include -DSQLITE_ENABLE_LOAD_EXTENSION" \
159
+ PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \
160
+ pyenv install 3.12
161
+ ```
162
+
163
+ ## License
164
+
165
+ [MIT](LICENSE)
@@ -0,0 +1,3 @@
1
+ """kgmd — Knowledge graph from markdown files."""
2
+
3
+ __version__ = "0.1.0"