mdbind 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbind-0.1.1/PKG-INFO +9 -0
- mdbind-0.1.1/README.md +212 -0
- mdbind-0.1.1/pyproject.toml +24 -0
- mdbind-0.1.1/setup.cfg +4 -0
- mdbind-0.1.1/src/mdbind/__init__.py +0 -0
- mdbind-0.1.1/src/mdbind/cache.py +170 -0
- mdbind-0.1.1/src/mdbind/cli.py +1181 -0
- mdbind-0.1.1/src/mdbind/composer.py +135 -0
- mdbind-0.1.1/src/mdbind/cycle.py +24 -0
- mdbind-0.1.1/src/mdbind/directives.py +116 -0
- mdbind-0.1.1/src/mdbind/index.py +57 -0
- mdbind-0.1.1/src/mdbind/models.py +86 -0
- mdbind-0.1.1/src/mdbind/parser.py +241 -0
- mdbind-0.1.1/src/mdbind.egg-info/PKG-INFO +9 -0
- mdbind-0.1.1/src/mdbind.egg-info/SOURCES.txt +30 -0
- mdbind-0.1.1/src/mdbind.egg-info/dependency_links.txt +1 -0
- mdbind-0.1.1/src/mdbind.egg-info/entry_points.txt +2 -0
- mdbind-0.1.1/src/mdbind.egg-info/requires.txt +4 -0
- mdbind-0.1.1/src/mdbind.egg-info/top_level.txt +1 -0
- mdbind-0.1.1/tests/test_cache.py +157 -0
- mdbind-0.1.1/tests/test_cli_compose.py +282 -0
- mdbind-0.1.1/tests/test_cli_get.py +105 -0
- mdbind-0.1.1/tests/test_cli_ia_v1.py +197 -0
- mdbind-0.1.1/tests/test_cli_ia_v2.py +315 -0
- mdbind-0.1.1/tests/test_cli_tree.py +156 -0
- mdbind-0.1.1/tests/test_cli_validate.py +149 -0
- mdbind-0.1.1/tests/test_cycle_detection.py +104 -0
- mdbind-0.1.1/tests/test_directives.py +134 -0
- mdbind-0.1.1/tests/test_examples.py +79 -0
- mdbind-0.1.1/tests/test_index.py +85 -0
- mdbind-0.1.1/tests/test_models.py +160 -0
- mdbind-0.1.1/tests/test_parser.py +133 -0
mdbind-0.1.1/PKG-INFO
ADDED
mdbind-0.1.1/README.md
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# MdBind
|
|
4
|
+
|
|
5
|
+
**Structured memory in plain Markdown.**
|
|
6
|
+
|
|
7
|
+
Transform your Markdown files into a navigable knowledge graph —
|
|
8
|
+
without databases, embeddings, or proprietary formats.
|
|
9
|
+
|
|
10
|
+
[](https://www.python.org/)
|
|
11
|
+
[](#development)
|
|
12
|
+
[](#installation)
|
|
13
|
+
[](#license)
|
|
14
|
+
|
|
15
|
+
</div>
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## What is MdBind?
|
|
20
|
+
|
|
21
|
+
MdBind turns Markdown files into a **directed knowledge graph** where every section is an addressable node with stable identity, metadata, and explicit relationships.
|
|
22
|
+
|
|
23
|
+
Your files stay **plain text, Git-friendly, and human-readable** — but gain:
|
|
24
|
+
|
|
25
|
+
- Graph traversal and dependency resolution
|
|
26
|
+
- Stable URIs that survive reorganization
|
|
27
|
+
- Structured metadata queries
|
|
28
|
+
- AI-oriented context retrieval with bounded token consumption
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Why not embeddings?
|
|
33
|
+
|
|
34
|
+
| Approach | Inspectable | Versionable | Deterministic | Human-readable |
|
|
35
|
+
|---|:---:|:---:|:---:|:---:|
|
|
36
|
+
| Vector databases | ✗ | ✗ | ✗ | ✗ |
|
|
37
|
+
| Proprietary stores | ✗ | partial | partial | ✗ |
|
|
38
|
+
| **MdBind** | ✓ | ✓ | ✓ | ✓ |
|
|
39
|
+
|
|
40
|
+
Every node, every edge, every relationship is visible in the source file. What an agent reads, a human can audit.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Quick start
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# 1. Clone and install
|
|
48
|
+
git clone <repo-url> && cd mdbind
|
|
49
|
+
python3 -m venv .venv && source .venv/bin/activate
|
|
50
|
+
pip install -e .
|
|
51
|
+
|
|
52
|
+
# 2. Point it at your docs
|
|
53
|
+
mdb validate --root docs/
|
|
54
|
+
|
|
55
|
+
# 3. Query the graph
|
|
56
|
+
mdb get docs/auth.md#auth --json
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## See it in action
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Navigate the dependency tree
|
|
65
|
+
$ mdb tree docs/auth.md#auth --root docs/
|
|
66
|
+
|
|
67
|
+
auth [docs/auth.md]
|
|
68
|
+
├── jwt [include] docs/security.md#jwt
|
|
69
|
+
└── permissions [ref] docs/users.md#permissions
|
|
70
|
+
|
|
71
|
+
# Compose a unified document by expanding all @include directives
|
|
72
|
+
$ mdb compose docs/auth.md#auth --root docs/ --depth 2
|
|
73
|
+
|
|
74
|
+
# Find everything that depends on a node (reverse BFS)
|
|
75
|
+
$ mdb impact docs/auth.md#auth --root docs/ --json
|
|
76
|
+
|
|
77
|
+
# Boolean metadata query
|
|
78
|
+
$ mdb query "tag:api AND NOT status=obsolete" --root docs/ --json
|
|
79
|
+
|
|
80
|
+
# Bounded context for LLM consumption
|
|
81
|
+
$ mdb context-compose docs/auth.md#auth --root docs/ --depth 2 --token-limit 2000 --json
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Syntax
|
|
87
|
+
|
|
88
|
+
### Declaring a section
|
|
89
|
+
|
|
90
|
+
A section is a Markdown heading followed immediately by a YAML block with a `section:` field:
|
|
91
|
+
|
|
92
|
+
````markdown
|
|
93
|
+
## Authentication
|
|
94
|
+
|
|
95
|
+
```yaml
|
|
96
|
+
section: auth
|
|
97
|
+
title: Authentication
|
|
98
|
+
type: domain
|
|
99
|
+
owner: security-team
|
|
100
|
+
tags: [auth, core]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Authentication is responsible for user identity.
|
|
104
|
+
|
|
105
|
+
[@include: JWT handling](security.md#jwt)
|
|
106
|
+
|
|
107
|
+
See also: [@ref: permissions model](users.md#permissions)
|
|
108
|
+
````
|
|
109
|
+
|
|
110
|
+
- The YAML block must be the **first element** after the heading
|
|
111
|
+
- `section:` is mandatory and must be **unique per repository**
|
|
112
|
+
- Any additional fields are preserved as queryable metadata
|
|
113
|
+
|
|
114
|
+
### Directives (graph edges)
|
|
115
|
+
|
|
116
|
+
```markdown
|
|
117
|
+
[@include: label](path/to/file.md#section-id) <!-- expands inline during compose -->
|
|
118
|
+
[@ref: label](path/to/file.md#section-id) <!-- records dependency, no expansion -->
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Directives are standard Markdown links — they render correctly in any Markdown viewer.
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
auth
|
|
125
|
+
├── jwt [include]
|
|
126
|
+
└── permissions [ref]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Commands
|
|
132
|
+
|
|
133
|
+
### Quick reference
|
|
134
|
+
|
|
135
|
+
| Command | Description |
|
|
136
|
+
|---|---|
|
|
137
|
+
| `mdb get <URI>` | Extract a section with full documentary fidelity |
|
|
138
|
+
| `mdb tree <URI>` | Visual dependency hierarchy |
|
|
139
|
+
| `mdb compose <URI>` | Materialize a unified document (expands `@include`) |
|
|
140
|
+
| `mdb validate` | Check integrity: broken refs, cycles, duplicate IDs |
|
|
141
|
+
| `mdb context <URI>` | Metadata + immediate 1-hop neighborhood |
|
|
142
|
+
| `mdb backlinks <URI>` | All sections that reference this URI |
|
|
143
|
+
| `mdb search <predicate>` | Search sections by metadata |
|
|
144
|
+
| `mdb impact <URI>` | All nodes that depend on this URI (reverse BFS) |
|
|
145
|
+
| `mdb neighbors <URI>` | All nodes reachable within N hops |
|
|
146
|
+
| `mdb explain <URI_A> <URI_B>` | All directed paths between two nodes |
|
|
147
|
+
| `mdb diff` | Structural graph diff against a git reference |
|
|
148
|
+
| `mdb query <expression>` | Boolean metadata query (`AND`, `OR`, `NOT`) |
|
|
149
|
+
| `mdb context-compose <URI>` | Bounded materialization for LLM consumption |
|
|
150
|
+
|
|
151
|
+
All commands accept `--json` for machine-readable output.
|
|
152
|
+
All outputs are deterministic and JSON-serializable. All URIs are stable across sessions.
|
|
153
|
+
|
|
154
|
+
### Selected examples
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Validate an entire repository
|
|
158
|
+
mdb validate --root docs/ --json
|
|
159
|
+
|
|
160
|
+
# 1-hop neighborhood of a node
|
|
161
|
+
mdb context docs/auth.md#auth --root docs/ --json
|
|
162
|
+
|
|
163
|
+
# Find all sections tagged api that are not obsolete
|
|
164
|
+
mdb query "tag:api AND NOT status=obsolete" --root docs/ --json
|
|
165
|
+
|
|
166
|
+
# Bounded context for LLM — depth 2, max 2000 tokens
|
|
167
|
+
mdb context-compose docs/auth.md#auth --root docs/ --depth 2 --token-limit 2000 --json
|
|
168
|
+
|
|
169
|
+
# What changed structurally since the last commit?
|
|
170
|
+
mdb diff --root docs/ --since HEAD~1 --json
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Philosophy
|
|
176
|
+
|
|
177
|
+
Five principles behind every design decision:
|
|
178
|
+
|
|
179
|
+
1. **Markdown is the source of truth** — no proprietary formats, no hidden state
|
|
180
|
+
2. **Knowledge should be inspectable** — every node, every edge, every relationship is visible in the source
|
|
181
|
+
3. **Relationships should be explicit** — `@include` and `@ref` are first-class graph primitives
|
|
182
|
+
4. **Stable identifiers are better than headings** — `file.md#id` survives reorganization
|
|
183
|
+
5. **AI memory should remain human-readable** — what an agent reads, a human can audit
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## Examples
|
|
188
|
+
|
|
189
|
+
See the [examples/](examples/) directory for a complete working knowledge base demonstrating sections, directives, composition, and graph traversal.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Development
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
# Install in editable mode
|
|
197
|
+
pip install -e .
|
|
198
|
+
|
|
199
|
+
# Run the full test suite
|
|
200
|
+
python -m pytest
|
|
201
|
+
|
|
202
|
+
# Run a specific module
|
|
203
|
+
python -m pytest tests/test_cli_validate.py -v
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
> 209 tests, 0 failures.
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mdbind"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "MdBind — Structured memory in plain Markdown"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pydantic>=2.0",
|
|
12
|
+
"markdown-it-py>=3.0",
|
|
13
|
+
"typer>=0.12",
|
|
14
|
+
"pyyaml>=6.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
mdb = "mdbind.cli:app"
|
|
19
|
+
|
|
20
|
+
[tool.setuptools.packages.find]
|
|
21
|
+
where = ["src"]
|
|
22
|
+
|
|
23
|
+
[tool.pytest.ini_options]
|
|
24
|
+
testpaths = ["tests"]
|
mdbind-0.1.1/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache persistente do SectionIndex (spec section 7).
|
|
3
|
+
|
|
4
|
+
Serializa o indice em <root>/.mdgraph/index.json e, em execucoes subsequentes,
|
|
5
|
+
reprocessa apenas os arquivos cujo hash SHA-256 tenha mudado.
|
|
6
|
+
Arquivos removidos tem suas secoes expurgadas automaticamente.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, Optional
|
|
14
|
+
|
|
15
|
+
# Versao do esquema do cache; mudar quando o formato mudar de forma incompativel
|
|
16
|
+
_CACHE_VERSION = 1
|
|
17
|
+
_CACHE_DIR = ".mdgraph"
|
|
18
|
+
_CACHE_FILE = "index.json"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Hash de arquivo
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def file_hash(path: Path) -> str:
|
|
26
|
+
"""Retorna o SHA-256 do conteudo do arquivo."""
|
|
27
|
+
h = hashlib.sha256()
|
|
28
|
+
h.update(path.read_bytes())
|
|
29
|
+
return h.hexdigest()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Leitura e escrita do cache
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
def _cache_path(root: Path) -> Path:
|
|
37
|
+
return root / _CACHE_DIR / _CACHE_FILE
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load_cache(root: Path) -> Optional[dict]:
|
|
41
|
+
"""
|
|
42
|
+
Carrega o cache do disco. Retorna None se nao existir ou for invalido.
|
|
43
|
+
"""
|
|
44
|
+
cp = _cache_path(root)
|
|
45
|
+
if not cp.exists():
|
|
46
|
+
return None
|
|
47
|
+
try:
|
|
48
|
+
data = json.loads(cp.read_text(encoding="utf-8"))
|
|
49
|
+
if data.get("version") != _CACHE_VERSION:
|
|
50
|
+
return None
|
|
51
|
+
return data
|
|
52
|
+
except (json.JSONDecodeError, KeyError):
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def save_cache(root: Path, file_hashes: Dict[str, str], sections_data: list) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Persiste o cache no disco.
|
|
59
|
+
|
|
60
|
+
file_hashes: {str(abs_path): sha256}
|
|
61
|
+
sections_data: lista de dicts serializaveis das ParsedSections
|
|
62
|
+
"""
|
|
63
|
+
cp = _cache_path(root)
|
|
64
|
+
cp.parent.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
payload = {
|
|
66
|
+
"version": _CACHE_VERSION,
|
|
67
|
+
"file_hashes": file_hashes,
|
|
68
|
+
"sections": sections_data,
|
|
69
|
+
}
|
|
70
|
+
cp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Serializacao / desserializacao de ParsedSection
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def serialize_section(section) -> dict:
|
|
78
|
+
"""Converte ParsedSection em dict JSON-serializavel."""
|
|
79
|
+
return {
|
|
80
|
+
"uri": section.uri,
|
|
81
|
+
"file_path": section.file_path,
|
|
82
|
+
"metadata": section.metadata,
|
|
83
|
+
"directives": [{"type": d.type, "target_uri": d.target_uri}
|
|
84
|
+
for d in section.directives],
|
|
85
|
+
"raw": {
|
|
86
|
+
"heading_level": section.raw.heading_level,
|
|
87
|
+
"heading_text": section.raw.heading_text,
|
|
88
|
+
"token_start": section.raw.token_start,
|
|
89
|
+
"token_end": section.raw.token_end,
|
|
90
|
+
"source_start_line": section.raw.source_start_line,
|
|
91
|
+
"source_end_line": section.raw.source_end_line,
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def deserialize_section(data: dict):
|
|
97
|
+
"""Reconstroi ParsedSection a partir de dict do cache."""
|
|
98
|
+
from mdbind.models import Directive, ParsedSection, RawSection
|
|
99
|
+
|
|
100
|
+
raw = RawSection(**data["raw"])
|
|
101
|
+
directives = [Directive(type=d["type"], target_uri=d["target_uri"])
|
|
102
|
+
for d in data.get("directives", [])]
|
|
103
|
+
return ParsedSection(
|
|
104
|
+
raw=raw,
|
|
105
|
+
uri=data["uri"],
|
|
106
|
+
file_path=data["file_path"],
|
|
107
|
+
metadata=data["metadata"],
|
|
108
|
+
directives=directives,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Logica incremental
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def build_index_with_cache(
|
|
117
|
+
root: Path,
|
|
118
|
+
md_files: list[Path],
|
|
119
|
+
no_cache: bool = False,
|
|
120
|
+
) -> tuple[list, Dict[str, str]]:
|
|
121
|
+
"""
|
|
122
|
+
Retorna (sections_list, file_hashes) usando cache quando possivel.
|
|
123
|
+
|
|
124
|
+
sections_list: lista de ParsedSection prontas para popular o SectionIndex
|
|
125
|
+
file_hashes: hashes atuais de todos os arquivos processados
|
|
126
|
+
"""
|
|
127
|
+
from mdbind.parser import parse_file
|
|
128
|
+
|
|
129
|
+
current_hashes: Dict[str, str] = {str(f): file_hash(f) for f in md_files}
|
|
130
|
+
|
|
131
|
+
# Sem cache ou --no-cache: reprocessar tudo
|
|
132
|
+
cached = None if no_cache else load_cache(root)
|
|
133
|
+
|
|
134
|
+
if cached is None:
|
|
135
|
+
sections = _parse_all(md_files, parse_file)
|
|
136
|
+
return sections, current_hashes
|
|
137
|
+
|
|
138
|
+
cached_hashes: Dict[str, str] = cached.get("file_hashes", {})
|
|
139
|
+
cached_sections_data: list = cached.get("sections", [])
|
|
140
|
+
|
|
141
|
+
# Agrupar secoes cacheadas por arquivo
|
|
142
|
+
cached_by_file: Dict[str, list] = {}
|
|
143
|
+
for s_data in cached_sections_data:
|
|
144
|
+
fp = s_data["file_path"]
|
|
145
|
+
cached_by_file.setdefault(fp, []).append(s_data)
|
|
146
|
+
|
|
147
|
+
sections: list = []
|
|
148
|
+
current_file_strs = {str(f) for f in md_files}
|
|
149
|
+
|
|
150
|
+
for f in md_files:
|
|
151
|
+
fs = str(f)
|
|
152
|
+
if cached_hashes.get(fs) == current_hashes[fs]:
|
|
153
|
+
# Cache hit: restaurar secoes do disco
|
|
154
|
+
for s_data in cached_by_file.get(fs, []):
|
|
155
|
+
sections.append(deserialize_section(s_data))
|
|
156
|
+
else:
|
|
157
|
+
# Cache miss: reparsar arquivo modificado
|
|
158
|
+
sections.extend(parse_file(f))
|
|
159
|
+
|
|
160
|
+
# Arquivos removidos: secoes de arquivos que nao existem mais sao ignoradas
|
|
161
|
+
# (nao adicionamos ao sections, portanto nao aparecem no indice)
|
|
162
|
+
|
|
163
|
+
return sections, current_hashes
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _parse_all(md_files: list[Path], parse_file) -> list:
|
|
167
|
+
sections = []
|
|
168
|
+
for f in md_files:
|
|
169
|
+
sections.extend(parse_file(f))
|
|
170
|
+
return sections
|