kgmd 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kgmd-0.1.0/.github/workflows/ci.yml +33 -0
- kgmd-0.1.0/.github/workflows/publish.yml +30 -0
- kgmd-0.1.0/.gitignore +11 -0
- kgmd-0.1.0/LICENSE +21 -0
- kgmd-0.1.0/Makefile +21 -0
- kgmd-0.1.0/PKG-INFO +202 -0
- kgmd-0.1.0/README.md +165 -0
- kgmd-0.1.0/kgmd/__init__.py +3 -0
- kgmd-0.1.0/kgmd/cli.py +711 -0
- kgmd-0.1.0/kgmd/config.py +101 -0
- kgmd-0.1.0/kgmd/db.py +97 -0
- kgmd-0.1.0/kgmd/embed.py +153 -0
- kgmd-0.1.0/kgmd/export.py +193 -0
- kgmd-0.1.0/kgmd/extract.py +309 -0
- kgmd-0.1.0/kgmd/induce.py +192 -0
- kgmd-0.1.0/kgmd/ingest.py +230 -0
- kgmd-0.1.0/kgmd/llm.py +153 -0
- kgmd-0.1.0/kgmd/mcp_server.py +136 -0
- kgmd-0.1.0/kgmd/prompts/extract.txt +51 -0
- kgmd-0.1.0/kgmd/prompts/induce.txt +33 -0
- kgmd-0.1.0/kgmd/prompts/resolve.txt +28 -0
- kgmd-0.1.0/kgmd/query.py +337 -0
- kgmd-0.1.0/kgmd/resolve.py +304 -0
- kgmd-0.1.0/kgmd/schema.py +198 -0
- kgmd-0.1.0/pyproject.toml +64 -0
- kgmd-0.1.0/tests/__init__.py +0 -0
- kgmd-0.1.0/tests/conftest.py +155 -0
- kgmd-0.1.0/tests/fixtures/acme_corp.md +9 -0
- kgmd-0.1.0/tests/fixtures/brian_anderson.md +7 -0
- kgmd-0.1.0/tests/fixtures/digital_transformation.md +9 -0
- kgmd-0.1.0/tests/fixtures/partnerships.md +10 -0
- kgmd-0.1.0/tests/fixtures/quarterly_review.md +12 -0
- kgmd-0.1.0/tests/fixtures/sarah_chen.md +7 -0
- kgmd-0.1.0/tests/fixtures/tech_stack.md +15 -0
- kgmd-0.1.0/tests/test_chunk.py +96 -0
- kgmd-0.1.0/tests/test_db.py +112 -0
- kgmd-0.1.0/tests/test_export.py +76 -0
- kgmd-0.1.0/tests/test_extract.py +113 -0
- kgmd-0.1.0/tests/test_induce.py +88 -0
- kgmd-0.1.0/tests/test_mcp.py +73 -0
- kgmd-0.1.0/tests/test_query.py +129 -0
- kgmd-0.1.0/tests/test_resolve.py +106 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Lint
|
|
30
|
+
run: ruff check .
|
|
31
|
+
|
|
32
|
+
- name: Test
|
|
33
|
+
run: pytest -v
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment: pypi
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
|
|
23
|
+
- name: Install build tools
|
|
24
|
+
run: pip install build
|
|
25
|
+
|
|
26
|
+
- name: Build package
|
|
27
|
+
run: python -m build
|
|
28
|
+
|
|
29
|
+
- name: Publish to PyPI
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
kgmd-0.1.0/.gitignore
ADDED
kgmd-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 2Lines Software
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
kgmd-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
.PHONY: install test lint format build clean
|
|
2
|
+
|
|
3
|
+
install:
|
|
4
|
+
pip install -e ".[dev]"
|
|
5
|
+
|
|
6
|
+
test:
|
|
7
|
+
python -m pytest tests/ -v
|
|
8
|
+
|
|
9
|
+
lint:
|
|
10
|
+
ruff check kgmd/ tests/
|
|
11
|
+
|
|
12
|
+
format:
|
|
13
|
+
ruff format kgmd/ tests/
|
|
14
|
+
ruff check --fix kgmd/ tests/
|
|
15
|
+
|
|
16
|
+
build:
|
|
17
|
+
python -m build
|
|
18
|
+
|
|
19
|
+
clean:
|
|
20
|
+
rm -rf dist/ build/ *.egg-info
|
|
21
|
+
find . -type d -name __pycache__ -exec rm -rf {} +
|
kgmd-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kgmd
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A CLI that builds a knowledge graph from markdown files and exposes it via MCP
|
|
5
|
+
Project-URL: Homepage, https://github.com/johncarpenter/kgmd
|
|
6
|
+
Project-URL: Repository, https://github.com/johncarpenter/kgmd
|
|
7
|
+
Project-URL: Issues, https://github.com/johncarpenter/kgmd/issues
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: entity-extraction,knowledge-graph,llm,markdown,mcp,rag,sqlite
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: click>=8.1
|
|
23
|
+
Requires-Dist: fastembed>=0.4
|
|
24
|
+
Requires-Dist: litellm>=1.50
|
|
25
|
+
Requires-Dist: mcp>=1.0
|
|
26
|
+
Requires-Dist: networkx>=3.2
|
|
27
|
+
Requires-Dist: platformdirs>=4.0
|
|
28
|
+
Requires-Dist: pydantic>=2.6
|
|
29
|
+
Requires-Dist: pyyaml>=6.0
|
|
30
|
+
Requires-Dist: rich>=13.7
|
|
31
|
+
Requires-Dist: sqlite-vec>=0.1.6
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest-mock>=3.10; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# kgmd
|
|
39
|
+
|
|
40
|
+
A CLI that builds a knowledge graph from a directory of markdown files and exposes it via MCP.
|
|
41
|
+
|
|
42
|
+
- Extracts entities and relations using any LLM (via [litellm](https://github.com/BerriAI/litellm))
|
|
43
|
+
- Resolves duplicate entities using local embeddings + LLM verification
|
|
44
|
+
- Induces a typed schema from the extracted data
|
|
45
|
+
- Stores everything in a single SQLite file (powered by [sqlite-vec](https://github.com/asg017/sqlite-vec))
|
|
46
|
+
- Exposes the graph via CLI queries and an [MCP](https://modelcontextprotocol.io/) server
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install kgmd
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Or with [uv](https://github.com/astral-sh/uv):
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uv tool install kgmd
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Requirements
|
|
61
|
+
|
|
62
|
+
- Python 3.10+
|
|
63
|
+
- An API key for any LLM provider supported by litellm (OpenRouter, OpenAI, Anthropic, etc.)
|
|
64
|
+
- Embeddings run locally by default via [fastembed](https://github.com/qdrant/fastembed) (no API key needed)
|
|
65
|
+
|
|
66
|
+
## Quickstart
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# Initialize a corpus
|
|
70
|
+
cd my-notes/
|
|
71
|
+
kgmd init
|
|
72
|
+
|
|
73
|
+
# Set your LLM API key
|
|
74
|
+
export OPENROUTER_API_KEY="sk-..."
|
|
75
|
+
|
|
76
|
+
# Build the knowledge graph (extract -> resolve -> induce)
|
|
77
|
+
kgmd build
|
|
78
|
+
|
|
79
|
+
# Query
|
|
80
|
+
kgmd entities
|
|
81
|
+
kgmd relations
|
|
82
|
+
kgmd find "machine learning"
|
|
83
|
+
kgmd entity "Brian Anderson"
|
|
84
|
+
kgmd neighbors "Brian Anderson" --depth 2
|
|
85
|
+
kgmd path "Brian Anderson" "Acme Corp"
|
|
86
|
+
|
|
87
|
+
# Export
|
|
88
|
+
kgmd export --format graphml --output graph.graphml
|
|
89
|
+
|
|
90
|
+
# View induced schema
|
|
91
|
+
kgmd schema
|
|
92
|
+
|
|
93
|
+
# Corpus statistics
|
|
94
|
+
kgmd stats
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## How it works
|
|
98
|
+
|
|
99
|
+
`kgmd build` runs three stages:
|
|
100
|
+
|
|
101
|
+
1. **Extract** -- Each markdown file is chunked and sent to an LLM, which returns structured JSON with entities (people, organizations, projects, etc.) and relations between them.
|
|
102
|
+
2. **Resolve** -- Entity mentions are embedded locally, clustered by cosine similarity, and duplicate clusters are verified by the LLM before merging.
|
|
103
|
+
3. **Induce** -- Aggregate statistics about entity types and relation predicates are sent to the LLM, which produces a typed YAML schema with hierarchies.
|
|
104
|
+
|
|
105
|
+
All state lives in `.kgmd/graph.db`, a single SQLite file. Re-running `kgmd build` is incremental -- unchanged files are skipped.
|
|
106
|
+
|
|
107
|
+
## MCP Server
|
|
108
|
+
|
|
109
|
+
`kgmd mcp` launches an MCP server over stdio, exposing 7 tools:
|
|
110
|
+
|
|
111
|
+
| Tool | Description |
|
|
112
|
+
|---|---|
|
|
113
|
+
| `search` | Semantic search over chunks |
|
|
114
|
+
| `get_entity` | Full entity record with mentions and relations |
|
|
115
|
+
| `list_entities` | List entities, optionally filtered by type |
|
|
116
|
+
| `get_neighbors` | Subgraph traversal around an entity |
|
|
117
|
+
| `find_path` | Shortest path between two entities |
|
|
118
|
+
| `list_relations` | List relations with optional filters |
|
|
119
|
+
| `get_schema` | The current induced schema |
|
|
120
|
+
|
|
121
|
+
### Claude Desktop setup
|
|
122
|
+
|
|
123
|
+
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"mcpServers": {
|
|
128
|
+
"kgmd": {
|
|
129
|
+
"command": "kgmd",
|
|
130
|
+
"args": ["mcp"],
|
|
131
|
+
"cwd": "/path/to/your/corpus"
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Configuration
|
|
138
|
+
|
|
139
|
+
Per-corpus config lives in `.kgmd/config.yaml`. Global defaults in `~/.config/kgmd/config.yaml` (or the platform equivalent). Corpus config overrides global.
|
|
140
|
+
|
|
141
|
+
```yaml
|
|
142
|
+
embedding:
|
|
143
|
+
backend: fastembed # or "litellm" for API embeddings
|
|
144
|
+
model: BAAI/bge-small-en-v1.5
|
|
145
|
+
|
|
146
|
+
llm:
|
|
147
|
+
model: openrouter/anthropic/claude-sonnet-4-5
|
|
148
|
+
temperature: 0.0
|
|
149
|
+
max_tokens: 4096
|
|
150
|
+
timeout_seconds: 120
|
|
151
|
+
|
|
152
|
+
chunking:
|
|
153
|
+
max_chars: 4000
|
|
154
|
+
overlap_chars: 200
|
|
155
|
+
split_on: paragraph # or "heading", "fixed"
|
|
156
|
+
|
|
157
|
+
extraction:
|
|
158
|
+
max_entities_per_chunk: 30
|
|
159
|
+
max_relations_per_chunk: 30
|
|
160
|
+
retry_on_parse_failure: 2
|
|
161
|
+
|
|
162
|
+
resolution:
|
|
163
|
+
similarity_threshold: 0.85
|
|
164
|
+
llm_verify_clusters: true
|
|
165
|
+
max_cluster_size: 10
|
|
166
|
+
|
|
167
|
+
induction:
|
|
168
|
+
include_attribute_summary: true
|
|
169
|
+
hierarchy_depth: 3
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Export formats
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
kgmd export --format jsonld # JSON-LD with schema.org context
|
|
176
|
+
kgmd export --format cypher # Cypher CREATE statements (Neo4j)
|
|
177
|
+
kgmd export --format graphml # GraphML (Gephi, yEd)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Development
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
git clone https://github.com/2lines/kgmd.git
|
|
184
|
+
cd kgmd
|
|
185
|
+
pip install -e .
|
|
186
|
+
make test # run tests
|
|
187
|
+
make lint # ruff check
|
|
188
|
+
make format # ruff format
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
**Note:** Your Python must be built with SQLite extension loading enabled. If using pyenv:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
LDFLAGS="-L$(brew --prefix sqlite)/lib" \
|
|
195
|
+
CPPFLAGS="-I$(brew --prefix sqlite)/include -DSQLITE_ENABLE_LOAD_EXTENSION" \
|
|
196
|
+
PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \
|
|
197
|
+
pyenv install 3.12
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## License
|
|
201
|
+
|
|
202
|
+
[MIT](LICENSE)
|
kgmd-0.1.0/README.md
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# kgmd
|
|
2
|
+
|
|
3
|
+
A CLI that builds a knowledge graph from a directory of markdown files and exposes it via MCP.
|
|
4
|
+
|
|
5
|
+
- Extracts entities and relations using any LLM (via [litellm](https://github.com/BerriAI/litellm))
|
|
6
|
+
- Resolves duplicate entities using local embeddings + LLM verification
|
|
7
|
+
- Induces a typed schema from the extracted data
|
|
8
|
+
- Stores everything in a single SQLite file (powered by [sqlite-vec](https://github.com/asg017/sqlite-vec))
|
|
9
|
+
- Exposes the graph via CLI queries and an [MCP](https://modelcontextprotocol.io/) server
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install kgmd
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Or with [uv](https://github.com/astral-sh/uv):
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
uv tool install kgmd
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Requirements
|
|
24
|
+
|
|
25
|
+
- Python 3.10+
|
|
26
|
+
- An API key for any LLM provider supported by litellm (OpenRouter, OpenAI, Anthropic, etc.)
|
|
27
|
+
- Embeddings run locally by default via [fastembed](https://github.com/qdrant/fastembed) (no API key needed)
|
|
28
|
+
|
|
29
|
+
## Quickstart
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Initialize a corpus
|
|
33
|
+
cd my-notes/
|
|
34
|
+
kgmd init
|
|
35
|
+
|
|
36
|
+
# Set your LLM API key
|
|
37
|
+
export OPENROUTER_API_KEY="sk-..."
|
|
38
|
+
|
|
39
|
+
# Build the knowledge graph (extract -> resolve -> induce)
|
|
40
|
+
kgmd build
|
|
41
|
+
|
|
42
|
+
# Query
|
|
43
|
+
kgmd entities
|
|
44
|
+
kgmd relations
|
|
45
|
+
kgmd find "machine learning"
|
|
46
|
+
kgmd entity "Brian Anderson"
|
|
47
|
+
kgmd neighbors "Brian Anderson" --depth 2
|
|
48
|
+
kgmd path "Brian Anderson" "Acme Corp"
|
|
49
|
+
|
|
50
|
+
# Export
|
|
51
|
+
kgmd export --format graphml --output graph.graphml
|
|
52
|
+
|
|
53
|
+
# View induced schema
|
|
54
|
+
kgmd schema
|
|
55
|
+
|
|
56
|
+
# Corpus statistics
|
|
57
|
+
kgmd stats
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## How it works
|
|
61
|
+
|
|
62
|
+
`kgmd build` runs three stages:
|
|
63
|
+
|
|
64
|
+
1. **Extract** -- Each markdown file is chunked and sent to an LLM, which returns structured JSON with entities (people, organizations, projects, etc.) and relations between them.
|
|
65
|
+
2. **Resolve** -- Entity mentions are embedded locally, clustered by cosine similarity, and duplicate clusters are verified by the LLM before merging.
|
|
66
|
+
3. **Induce** -- Aggregate statistics about entity types and relation predicates are sent to the LLM, which produces a typed YAML schema with hierarchies.
|
|
67
|
+
|
|
68
|
+
All state lives in `.kgmd/graph.db`, a single SQLite file. Re-running `kgmd build` is incremental -- unchanged files are skipped.
|
|
69
|
+
|
|
70
|
+
## MCP Server
|
|
71
|
+
|
|
72
|
+
`kgmd mcp` launches an MCP server over stdio, exposing 7 tools:
|
|
73
|
+
|
|
74
|
+
| Tool | Description |
|
|
75
|
+
|---|---|
|
|
76
|
+
| `search` | Semantic search over chunks |
|
|
77
|
+
| `get_entity` | Full entity record with mentions and relations |
|
|
78
|
+
| `list_entities` | List entities, optionally filtered by type |
|
|
79
|
+
| `get_neighbors` | Subgraph traversal around an entity |
|
|
80
|
+
| `find_path` | Shortest path between two entities |
|
|
81
|
+
| `list_relations` | List relations with optional filters |
|
|
82
|
+
| `get_schema` | The current induced schema |
|
|
83
|
+
|
|
84
|
+
### Claude Desktop setup
|
|
85
|
+
|
|
86
|
+
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"mcpServers": {
|
|
91
|
+
"kgmd": {
|
|
92
|
+
"command": "kgmd",
|
|
93
|
+
"args": ["mcp"],
|
|
94
|
+
"cwd": "/path/to/your/corpus"
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Configuration
|
|
101
|
+
|
|
102
|
+
Per-corpus config lives in `.kgmd/config.yaml`. Global defaults in `~/.config/kgmd/config.yaml` (or the platform equivalent). Corpus config overrides global.
|
|
103
|
+
|
|
104
|
+
```yaml
|
|
105
|
+
embedding:
|
|
106
|
+
backend: fastembed # or "litellm" for API embeddings
|
|
107
|
+
model: BAAI/bge-small-en-v1.5
|
|
108
|
+
|
|
109
|
+
llm:
|
|
110
|
+
model: openrouter/anthropic/claude-sonnet-4-5
|
|
111
|
+
temperature: 0.0
|
|
112
|
+
max_tokens: 4096
|
|
113
|
+
timeout_seconds: 120
|
|
114
|
+
|
|
115
|
+
chunking:
|
|
116
|
+
max_chars: 4000
|
|
117
|
+
overlap_chars: 200
|
|
118
|
+
split_on: paragraph # or "heading", "fixed"
|
|
119
|
+
|
|
120
|
+
extraction:
|
|
121
|
+
max_entities_per_chunk: 30
|
|
122
|
+
max_relations_per_chunk: 30
|
|
123
|
+
retry_on_parse_failure: 2
|
|
124
|
+
|
|
125
|
+
resolution:
|
|
126
|
+
similarity_threshold: 0.85
|
|
127
|
+
llm_verify_clusters: true
|
|
128
|
+
max_cluster_size: 10
|
|
129
|
+
|
|
130
|
+
induction:
|
|
131
|
+
include_attribute_summary: true
|
|
132
|
+
hierarchy_depth: 3
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Export formats
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
kgmd export --format jsonld # JSON-LD with schema.org context
|
|
139
|
+
kgmd export --format cypher # Cypher CREATE statements (Neo4j)
|
|
140
|
+
kgmd export --format graphml # GraphML (Gephi, yEd)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Development
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
git clone https://github.com/2lines/kgmd.git
|
|
147
|
+
cd kgmd
|
|
148
|
+
pip install -e .
|
|
149
|
+
make test # run tests
|
|
150
|
+
make lint # ruff check
|
|
151
|
+
make format # ruff format
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Note:** Your Python must be built with SQLite extension loading enabled. If using pyenv:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
LDFLAGS="-L$(brew --prefix sqlite)/lib" \
|
|
158
|
+
CPPFLAGS="-I$(brew --prefix sqlite)/include -DSQLITE_ENABLE_LOAD_EXTENSION" \
|
|
159
|
+
PYTHON_CONFIGURE_OPTS="--enable-loadable-sqlite-extensions" \
|
|
160
|
+
pyenv install 3.12
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## License
|
|
164
|
+
|
|
165
|
+
[MIT](LICENSE)
|