mdbind 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbind/__init__.py +0 -0
- mdbind/cache.py +170 -0
- mdbind/cli.py +1181 -0
- mdbind/composer.py +135 -0
- mdbind/cycle.py +24 -0
- mdbind/directives.py +116 -0
- mdbind/index.py +57 -0
- mdbind/models.py +86 -0
- mdbind/parser.py +241 -0
- mdbind-0.1.0.dist-info/METADATA +9 -0
- mdbind-0.1.0.dist-info/RECORD +14 -0
- mdbind-0.1.0.dist-info/WHEEL +5 -0
- mdbind-0.1.0.dist-info/entry_points.txt +2 -0
- mdbind-0.1.0.dist-info/top_level.txt +1 -0
mdbind/cli.py
ADDED
|
@@ -0,0 +1,1181 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI do mdgraph — entrypoint principal.
|
|
3
|
+
|
|
4
|
+
Comandos:
|
|
5
|
+
get <URI> Extrai uma secao com fidelidade documental (linhas brutas)
|
|
6
|
+
tree <URI> Exibe hierarquia visual de dependencias
|
|
7
|
+
compose <URI> Materializa documento unificado (B-007)
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
import typer
|
|
16
|
+
|
|
17
|
+
from mdbind.parser import ParseError, parse_file
|
|
18
|
+
|
|
19
|
+
app = typer.Typer(
|
|
20
|
+
name="mdb",
|
|
21
|
+
help="MdBind — Structured memory in plain Markdown.",
|
|
22
|
+
add_completion=False,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def _split_uri(uri: str) -> tuple[str, str]:
|
|
26
|
+
"""Divide 'arquivo.md#id' em ('arquivo.md', 'id'). Erro se sem fragmento."""
|
|
27
|
+
if "#" not in uri:
|
|
28
|
+
typer.echo(f"Erro: URI deve conter fragmento '#id'. Recebido: '{uri}'", err=True)
|
|
29
|
+
raise typer.Exit(code=1)
|
|
30
|
+
path_part, fragment = uri.split("#", 1)
|
|
31
|
+
if not path_part:
|
|
32
|
+
typer.echo(f"Erro: URI sem caminho de arquivo: '{uri}'", err=True)
|
|
33
|
+
raise typer.Exit(code=1)
|
|
34
|
+
if not fragment:
|
|
35
|
+
typer.echo(f"Erro: URI sem id de secao: '{uri}'", err=True)
|
|
36
|
+
raise typer.Exit(code=1)
|
|
37
|
+
return path_part, fragment
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
# get
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
@app.command()
|
|
45
|
+
def get(
|
|
46
|
+
uri: str = typer.Argument(..., help="URI da secao no formato arquivo.md#id"),
|
|
47
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
48
|
+
) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Extrai uma secao com 100%% de fidelidade documental (linhas brutas do arquivo fonte).
|
|
51
|
+
"""
|
|
52
|
+
file_path_str, section_id = _split_uri(uri)
|
|
53
|
+
file_path = Path(file_path_str).resolve()
|
|
54
|
+
|
|
55
|
+
if not file_path.exists():
|
|
56
|
+
typer.echo(f"Erro: arquivo nao encontrado: '{file_path}'", err=True)
|
|
57
|
+
raise typer.Exit(code=1)
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
sections = parse_file(file_path)
|
|
61
|
+
except ParseError as exc:
|
|
62
|
+
typer.echo(f"Erro de parsing: {exc}", err=True)
|
|
63
|
+
raise typer.Exit(code=1)
|
|
64
|
+
|
|
65
|
+
matched = next(
|
|
66
|
+
(s for s in sections if str(s.metadata.get("id", "")) == section_id),
|
|
67
|
+
None,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if matched is None:
|
|
71
|
+
typer.echo(
|
|
72
|
+
f"Erro: secao '{section_id}' nao encontrada em '{file_path}'",
|
|
73
|
+
err=True,
|
|
74
|
+
)
|
|
75
|
+
raise typer.Exit(code=1)
|
|
76
|
+
|
|
77
|
+
# Fatiamento documental: preserva o texto exato do arquivo fonte
|
|
78
|
+
lines = file_path.read_text(encoding="utf-8").splitlines(keepends=True)
|
|
79
|
+
start = matched.raw.source_start_line - 1 # base-0
|
|
80
|
+
end = matched.raw.source_end_line # slice exclusivo = ultima linha inclusiva
|
|
81
|
+
|
|
82
|
+
output = "".join(lines[start:end])
|
|
83
|
+
# Garantir newline final sem adicionar extra
|
|
84
|
+
if output and not output.endswith("\n"):
|
|
85
|
+
output += "\n"
|
|
86
|
+
|
|
87
|
+
if json_output:
|
|
88
|
+
import json as json_mod
|
|
89
|
+
typer.echo(json_mod.dumps({
|
|
90
|
+
"uri": uri,
|
|
91
|
+
"file_path": str(file_path),
|
|
92
|
+
"source_start_line": matched.raw.source_start_line,
|
|
93
|
+
"source_end_line": matched.raw.source_end_line,
|
|
94
|
+
"content": output,
|
|
95
|
+
}, ensure_ascii=False))
|
|
96
|
+
else:
|
|
97
|
+
typer.echo(output, nl=False)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# tree
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
@app.command()
|
|
105
|
+
def tree(
|
|
106
|
+
uri: str = typer.Argument(..., help="URI da secao no formato arquivo.md#id"),
|
|
107
|
+
root: Optional[Path] = typer.Option(
|
|
108
|
+
None, "--root", "-r",
|
|
109
|
+
help="Diretorio raiz do repositorio (padrao: diretorio do arquivo).",
|
|
110
|
+
),
|
|
111
|
+
refs: bool = typer.Option(
|
|
112
|
+
False, "--refs",
|
|
113
|
+
help="Exibir backlinks (quem depende desta secao).",
|
|
114
|
+
),
|
|
115
|
+
depth: Optional[int] = typer.Option(
|
|
116
|
+
None, "--depth", "-d",
|
|
117
|
+
help="Profundidade maxima da arvore (padrao: ilimitada).",
|
|
118
|
+
),
|
|
119
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
120
|
+
) -> None:
|
|
121
|
+
"""
|
|
122
|
+
Exibe a hierarquia visual de dependencias de uma secao.
|
|
123
|
+
"""
|
|
124
|
+
from mdbind.index import index_repository
|
|
125
|
+
|
|
126
|
+
file_path_str, section_id = _split_uri(uri)
|
|
127
|
+
file_path = Path(file_path_str).resolve()
|
|
128
|
+
|
|
129
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
graph = index_repository(repo_root)
|
|
133
|
+
except ParseError as exc:
|
|
134
|
+
typer.echo(f"Erro de parsing: {exc}", err=True)
|
|
135
|
+
raise typer.Exit(code=1)
|
|
136
|
+
|
|
137
|
+
# Montar URI absoluta para lookup
|
|
138
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
139
|
+
|
|
140
|
+
if abs_uri not in graph.index.sections:
|
|
141
|
+
typer.echo(f"Erro: URI '{abs_uri}' nao encontrada no indice.", err=True)
|
|
142
|
+
raise typer.Exit(code=1)
|
|
143
|
+
|
|
144
|
+
if json_output:
|
|
145
|
+
import json as json_mod
|
|
146
|
+
tree_data = _build_tree_outgoing(abs_uri, graph, visited=set(), depth=depth) if not refs else _build_tree_incoming(abs_uri, graph, visited=set(), depth=depth)
|
|
147
|
+
typer.echo(json_mod.dumps({"uri": abs_uri, "tree": tree_data}, ensure_ascii=False))
|
|
148
|
+
elif refs:
|
|
149
|
+
_print_tree_incoming(abs_uri, graph, prefix="", visited=set(), depth=depth)
|
|
150
|
+
else:
|
|
151
|
+
_print_tree_outgoing(abs_uri, graph, prefix="", visited=set(), depth=depth)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _label(uri: str, graph) -> str:
|
|
155
|
+
section = graph.index.sections.get(uri)
|
|
156
|
+
if section:
|
|
157
|
+
title = section.metadata.get("title", section.metadata.get("id", uri))
|
|
158
|
+
return f"{title} [{uri}]"
|
|
159
|
+
return uri
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _print_tree_outgoing(uri: str, graph, prefix: str, visited: set, depth: Optional[int] = None) -> None:
|
|
163
|
+
marker = "(ciclo)" if uri in visited else ""
|
|
164
|
+
typer.echo(f"{prefix}{_label(uri, graph)} {marker}".rstrip())
|
|
165
|
+
if uri in visited:
|
|
166
|
+
return
|
|
167
|
+
if depth is not None and depth <= 0:
|
|
168
|
+
return
|
|
169
|
+
visited = visited | {uri}
|
|
170
|
+
children = sorted(graph.outgoing_edges.get(uri, set()))
|
|
171
|
+
next_depth = None if depth is None else depth - 1
|
|
172
|
+
for i, child in enumerate(children):
|
|
173
|
+
connector = "└── " if i == len(children) - 1 else "├── "
|
|
174
|
+
_print_tree_outgoing(child, graph, prefix + connector, visited, next_depth)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _print_tree_incoming(uri: str, graph, prefix: str, visited: set, depth: Optional[int] = None) -> None:
|
|
178
|
+
marker = "(ciclo)" if uri in visited else ""
|
|
179
|
+
typer.echo(f"{prefix}{_label(uri, graph)} {marker}".rstrip())
|
|
180
|
+
if uri in visited:
|
|
181
|
+
return
|
|
182
|
+
if depth is not None and depth <= 0:
|
|
183
|
+
return
|
|
184
|
+
visited = visited | {uri}
|
|
185
|
+
parents = sorted(graph.incoming_edges.get(uri, set()))
|
|
186
|
+
next_depth = None if depth is None else depth - 1
|
|
187
|
+
for i, parent in enumerate(parents):
|
|
188
|
+
connector = "└── " if i == len(parents) - 1 else "├── "
|
|
189
|
+
_print_tree_incoming(parent, graph, prefix + connector, visited, next_depth)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _build_tree_outgoing(uri: str, graph, visited: set, depth: Optional[int] = None) -> list:
|
|
193
|
+
if uri in visited or (depth is not None and depth <= 0):
|
|
194
|
+
return []
|
|
195
|
+
visited = visited | {uri}
|
|
196
|
+
children = sorted(graph.outgoing_edges.get(uri, set()))
|
|
197
|
+
next_depth = None if depth is None else depth - 1
|
|
198
|
+
section = graph.index.sections.get(uri)
|
|
199
|
+
edge_type = "include" # default; edges are not typed in current model
|
|
200
|
+
result = []
|
|
201
|
+
for child in children:
|
|
202
|
+
node = {
|
|
203
|
+
"uri": child,
|
|
204
|
+
"type": edge_type,
|
|
205
|
+
"depth": (None if depth is None else depth - 1),
|
|
206
|
+
"children": _build_tree_outgoing(child, graph, visited, next_depth),
|
|
207
|
+
}
|
|
208
|
+
result.append(node)
|
|
209
|
+
return result
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _build_tree_incoming(uri: str, graph, visited: set, depth: Optional[int] = None) -> list:
|
|
213
|
+
if uri in visited or (depth is not None and depth <= 0):
|
|
214
|
+
return []
|
|
215
|
+
visited = visited | {uri}
|
|
216
|
+
parents = sorted(graph.incoming_edges.get(uri, set()))
|
|
217
|
+
next_depth = None if depth is None else depth - 1
|
|
218
|
+
result = []
|
|
219
|
+
for parent in parents:
|
|
220
|
+
node = {
|
|
221
|
+
"uri": parent,
|
|
222
|
+
"type": "incoming",
|
|
223
|
+
"depth": (None if depth is None else depth - 1),
|
|
224
|
+
"children": _build_tree_incoming(parent, graph, visited, next_depth),
|
|
225
|
+
}
|
|
226
|
+
result.append(node)
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
# compose
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
|
|
234
|
+
@app.command()
|
|
235
|
+
def compose(
|
|
236
|
+
uri: str = typer.Argument(..., help="URI da secao raiz no formato arquivo.md#id"),
|
|
237
|
+
root: Optional[Path] = typer.Option(
|
|
238
|
+
None, "--root", "-r",
|
|
239
|
+
help="Diretorio raiz do repositorio (padrao: diretorio do arquivo).",
|
|
240
|
+
),
|
|
241
|
+
strict: bool = typer.Option(False, "--strict", help="Abortar em URI nao resolvida."),
|
|
242
|
+
deduplicate: bool = typer.Option(False, "--deduplicate", help="Deduplicar nos repetidos."),
|
|
243
|
+
json_output: bool = typer.Option(False, "--json", help="Exportar como JSON estruturado."),
|
|
244
|
+
depth: Optional[int] = typer.Option(
|
|
245
|
+
None, "--depth", "-d",
|
|
246
|
+
help="Profundidade maxima de expansao de @include (padrao: ilimitada).",
|
|
247
|
+
),
|
|
248
|
+
) -> None:
|
|
249
|
+
"""
|
|
250
|
+
Materializa um documento Markdown unificado expandindo @include recursivamente.
|
|
251
|
+
"""
|
|
252
|
+
import json as json_mod
|
|
253
|
+
from mdbind.composer import compose as do_compose
|
|
254
|
+
from mdbind.index import index_repository
|
|
255
|
+
|
|
256
|
+
file_path_str, section_id = _split_uri(uri)
|
|
257
|
+
file_path = Path(file_path_str).resolve()
|
|
258
|
+
|
|
259
|
+
if not file_path.exists():
|
|
260
|
+
typer.echo(f"Erro: arquivo nao encontrado: '{file_path}'", err=True)
|
|
261
|
+
raise typer.Exit(code=1)
|
|
262
|
+
|
|
263
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
graph = index_repository(repo_root)
|
|
267
|
+
except ParseError as exc:
|
|
268
|
+
typer.echo(f"Erro de parsing: {exc}", err=True)
|
|
269
|
+
raise typer.Exit(code=1)
|
|
270
|
+
|
|
271
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
272
|
+
|
|
273
|
+
if abs_uri not in graph.index.sections:
|
|
274
|
+
typer.echo(f"Erro: URI '{abs_uri}' nao encontrada no indice.", err=True)
|
|
275
|
+
raise typer.Exit(code=1)
|
|
276
|
+
|
|
277
|
+
collected_warnings: list[str] = []
|
|
278
|
+
try:
|
|
279
|
+
result = do_compose(
|
|
280
|
+
abs_uri,
|
|
281
|
+
graph,
|
|
282
|
+
strict=strict,
|
|
283
|
+
deduplicate=deduplicate,
|
|
284
|
+
warnings=collected_warnings,
|
|
285
|
+
depth=depth,
|
|
286
|
+
)
|
|
287
|
+
except ValueError as exc:
|
|
288
|
+
typer.echo(f"Erro: {exc}", err=True)
|
|
289
|
+
raise typer.Exit(code=1)
|
|
290
|
+
|
|
291
|
+
for w in collected_warnings:
|
|
292
|
+
typer.echo(f"Aviso: {w}", err=True)
|
|
293
|
+
|
|
294
|
+
if json_output:
|
|
295
|
+
typer.echo(json_mod.dumps({"uri": abs_uri, "content": result}, ensure_ascii=False))
|
|
296
|
+
else:
|
|
297
|
+
typer.echo(result, nl=False)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# ---------------------------------------------------------------------------
|
|
301
|
+
# validate
|
|
302
|
+
# ---------------------------------------------------------------------------
|
|
303
|
+
|
|
304
|
+
@app.command()
|
|
305
|
+
def validate(
|
|
306
|
+
root: Optional[Path] = typer.Option(
|
|
307
|
+
None, "--root", "-r",
|
|
308
|
+
help="Diretorio raiz do repositorio (padrao: diretorio atual).",
|
|
309
|
+
),
|
|
310
|
+
json_output: bool = typer.Option(False, "--json", help="Exportar resultado como JSON."),
|
|
311
|
+
) -> None:
|
|
312
|
+
"""
|
|
313
|
+
Verifica a integridade estrutural do repositorio de grafos Markdown.
|
|
314
|
+
|
|
315
|
+
Checks: broken refs/includes, duplicate section IDs, include cycles,
|
|
316
|
+
sections without required payload.
|
|
317
|
+
|
|
318
|
+
Exit code 0 = clean, 1 = errors found.
|
|
319
|
+
"""
|
|
320
|
+
import json as json_mod
|
|
321
|
+
from mdbind.index import index_repository
|
|
322
|
+
|
|
323
|
+
repo_root = (root.resolve() if root else Path.cwd())
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
graph = index_repository(repo_root)
|
|
327
|
+
except ParseError as exc:
|
|
328
|
+
errors = [{"type": "parse_error", "uri": "", "detail": str(exc)}]
|
|
329
|
+
summary = {"total_sections": 0, "total_edges": 0, "errors": 1, "warnings": 0}
|
|
330
|
+
if json_output:
|
|
331
|
+
typer.echo(json_mod.dumps({"errors": errors, "warnings": [], "summary": summary}, ensure_ascii=False))
|
|
332
|
+
else:
|
|
333
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
334
|
+
raise typer.Exit(code=1)
|
|
335
|
+
|
|
336
|
+
errors: list[dict] = []
|
|
337
|
+
warnings: list[dict] = []
|
|
338
|
+
|
|
339
|
+
all_uris = set(graph.index.sections.keys())
|
|
340
|
+
total_edges = sum(len(targets) for targets in graph.outgoing_edges.values())
|
|
341
|
+
|
|
342
|
+
# 1. Broken refs and includes
|
|
343
|
+
for src_uri, section in graph.index.sections.items():
|
|
344
|
+
for directive in section.directives:
|
|
345
|
+
if directive.type in ("ref", "include"):
|
|
346
|
+
if directive.target_uri not in all_uris:
|
|
347
|
+
error_type = "broken_ref" if directive.type == "ref" else "broken_include"
|
|
348
|
+
errors.append({
|
|
349
|
+
"type": error_type,
|
|
350
|
+
"uri": src_uri,
|
|
351
|
+
"detail": f"target '{directive.target_uri}' not found in index",
|
|
352
|
+
})
|
|
353
|
+
|
|
354
|
+
# 2. Include cycles (DFS execution-path tracking)
|
|
355
|
+
def _dfs_cycle(uri: str, path: frozenset[str], visited_global: set[str]) -> None:
|
|
356
|
+
if uri in path:
|
|
357
|
+
errors.append({
|
|
358
|
+
"type": "cycle",
|
|
359
|
+
"uri": uri,
|
|
360
|
+
"detail": f"include cycle detected involving '{uri}'",
|
|
361
|
+
})
|
|
362
|
+
return
|
|
363
|
+
if uri in visited_global:
|
|
364
|
+
return
|
|
365
|
+
visited_global.add(uri)
|
|
366
|
+
section = graph.index.sections.get(uri)
|
|
367
|
+
if section is None:
|
|
368
|
+
return
|
|
369
|
+
new_path = path | {uri}
|
|
370
|
+
for directive in section.directives:
|
|
371
|
+
if directive.type == "include" and directive.target_uri in all_uris:
|
|
372
|
+
_dfs_cycle(directive.target_uri, new_path, visited_global)
|
|
373
|
+
|
|
374
|
+
visited_global: set[str] = set()
|
|
375
|
+
for uri in all_uris:
|
|
376
|
+
_dfs_cycle(uri, frozenset(), visited_global)
|
|
377
|
+
|
|
378
|
+
summary = {
|
|
379
|
+
"total_sections": len(all_uris),
|
|
380
|
+
"total_edges": total_edges,
|
|
381
|
+
"errors": len(errors),
|
|
382
|
+
"warnings": len(warnings),
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if json_output:
|
|
386
|
+
typer.echo(json_mod.dumps(
|
|
387
|
+
{"errors": errors, "warnings": warnings, "summary": summary},
|
|
388
|
+
ensure_ascii=False,
|
|
389
|
+
indent=2,
|
|
390
|
+
))
|
|
391
|
+
else:
|
|
392
|
+
if errors:
|
|
393
|
+
for e in errors:
|
|
394
|
+
typer.echo(f"ERROR [{e['type']}] {e['uri']}: {e['detail']}")
|
|
395
|
+
if warnings:
|
|
396
|
+
for w in warnings:
|
|
397
|
+
typer.echo(f"WARNING [{w['type']}] {w['uri']}: {w['detail']}")
|
|
398
|
+
if not errors and not warnings:
|
|
399
|
+
typer.echo(f"OK — {summary['total_sections']} sections, {summary['total_edges']} edges, no issues found.")
|
|
400
|
+
else:
|
|
401
|
+
typer.echo(
|
|
402
|
+
f"\nSummary: {summary['total_sections']} sections, "
|
|
403
|
+
f"{summary['errors']} errors, {summary['warnings']} warnings.",
|
|
404
|
+
err=True,
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
if errors:
|
|
408
|
+
raise typer.Exit(code=1)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# ---------------------------------------------------------------------------
|
|
412
|
+
# context (B-016)
|
|
413
|
+
# ---------------------------------------------------------------------------
|
|
414
|
+
|
|
415
|
+
@app.command()
|
|
416
|
+
def context(
|
|
417
|
+
uri: str = typer.Argument(..., help="Section URI in the format file.md#id"),
|
|
418
|
+
root: Optional[Path] = typer.Option(
|
|
419
|
+
None, "--root", "-r",
|
|
420
|
+
help="Repository root directory (default: file directory).",
|
|
421
|
+
),
|
|
422
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
423
|
+
) -> None:
|
|
424
|
+
"""
|
|
425
|
+
Returns structured context of a section: metadata, outgoing edges, incoming edges.
|
|
426
|
+
"""
|
|
427
|
+
import json as json_mod
|
|
428
|
+
from mdbind.index import index_repository
|
|
429
|
+
|
|
430
|
+
file_path_str, section_id = _split_uri(uri)
|
|
431
|
+
file_path = Path(file_path_str).resolve()
|
|
432
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
graph = index_repository(repo_root)
|
|
436
|
+
except ParseError as exc:
|
|
437
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
438
|
+
raise typer.Exit(code=1)
|
|
439
|
+
|
|
440
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
441
|
+
|
|
442
|
+
if abs_uri not in graph.index.sections:
|
|
443
|
+
typer.echo(f"Error: URI '{abs_uri}' not found in index.", err=True)
|
|
444
|
+
raise typer.Exit(code=1)
|
|
445
|
+
|
|
446
|
+
section = graph.index.sections[abs_uri]
|
|
447
|
+
|
|
448
|
+
outgoing = [
|
|
449
|
+
{"uri": t, "type": _edge_type(abs_uri, t, section)}
|
|
450
|
+
for t in sorted(graph.outgoing_edges.get(abs_uri, set()))
|
|
451
|
+
]
|
|
452
|
+
incoming = [
|
|
453
|
+
{"uri": s, "type": "incoming"}
|
|
454
|
+
for s in sorted(graph.incoming_edges.get(abs_uri, set()))
|
|
455
|
+
]
|
|
456
|
+
|
|
457
|
+
if json_output:
|
|
458
|
+
typer.echo(json_mod.dumps({
|
|
459
|
+
"uri": abs_uri,
|
|
460
|
+
"metadata": section.metadata,
|
|
461
|
+
"outgoing": outgoing,
|
|
462
|
+
"incoming": incoming,
|
|
463
|
+
}, ensure_ascii=False, indent=2))
|
|
464
|
+
else:
|
|
465
|
+
typer.echo(f"URI: {abs_uri}")
|
|
466
|
+
typer.echo(f"Metadata: {section.metadata}")
|
|
467
|
+
if outgoing:
|
|
468
|
+
typer.echo("Outgoing:")
|
|
469
|
+
for e in outgoing:
|
|
470
|
+
typer.echo(f" [{e['type']}] {e['uri']}")
|
|
471
|
+
if incoming:
|
|
472
|
+
typer.echo("Incoming:")
|
|
473
|
+
for e in incoming:
|
|
474
|
+
typer.echo(f" [ref] {e['uri']}")
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _edge_type(src_uri: str, target_uri: str, section) -> str:
|
|
478
|
+
for d in section.directives:
|
|
479
|
+
if d.target_uri == target_uri:
|
|
480
|
+
return d.type
|
|
481
|
+
return "ref"
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
# ---------------------------------------------------------------------------
|
|
485
|
+
# backlinks (B-017)
|
|
486
|
+
# ---------------------------------------------------------------------------
|
|
487
|
+
|
|
488
|
+
@app.command()
|
|
489
|
+
def backlinks(
|
|
490
|
+
uri: str = typer.Argument(..., help="Section URI in the format file.md#id"),
|
|
491
|
+
root: Optional[Path] = typer.Option(
|
|
492
|
+
None, "--root", "-r",
|
|
493
|
+
help="Repository root directory (default: file directory).",
|
|
494
|
+
),
|
|
495
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
496
|
+
) -> None:
|
|
497
|
+
"""
|
|
498
|
+
Lists all sections that reference this URI (incoming edges).
|
|
499
|
+
"""
|
|
500
|
+
import json as json_mod
|
|
501
|
+
from mdbind.index import index_repository
|
|
502
|
+
|
|
503
|
+
file_path_str, section_id = _split_uri(uri)
|
|
504
|
+
file_path = Path(file_path_str).resolve()
|
|
505
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
506
|
+
|
|
507
|
+
try:
|
|
508
|
+
graph = index_repository(repo_root)
|
|
509
|
+
except ParseError as exc:
|
|
510
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
511
|
+
raise typer.Exit(code=1)
|
|
512
|
+
|
|
513
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
514
|
+
|
|
515
|
+
if abs_uri not in graph.index.sections:
|
|
516
|
+
typer.echo(f"Error: URI '{abs_uri}' not found in index.", err=True)
|
|
517
|
+
raise typer.Exit(code=1)
|
|
518
|
+
|
|
519
|
+
bl = sorted(graph.incoming_edges.get(abs_uri, set()))
|
|
520
|
+
result = [{"uri": s, "type": _edge_type(s, abs_uri, graph.index.sections.get(s))} for s in bl]
|
|
521
|
+
|
|
522
|
+
if json_output:
|
|
523
|
+
typer.echo(json_mod.dumps({"uri": abs_uri, "backlinks": result}, ensure_ascii=False, indent=2))
|
|
524
|
+
else:
|
|
525
|
+
if not result:
|
|
526
|
+
typer.echo(f"No backlinks found for '{abs_uri}'.")
|
|
527
|
+
else:
|
|
528
|
+
typer.echo(f"Backlinks for '{abs_uri}':")
|
|
529
|
+
for e in result:
|
|
530
|
+
typer.echo(f" [{e['type']}] {e['uri']}")
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
# ---------------------------------------------------------------------------
|
|
534
|
+
# search (B-018)
|
|
535
|
+
# ---------------------------------------------------------------------------
|
|
536
|
+
|
|
537
|
+
@app.command()
|
|
538
|
+
def search(
|
|
539
|
+
predicate: str = typer.Argument(..., help="Predicate: key=value, key~=value, or tag:value"),
|
|
540
|
+
root: Path = typer.Option(
|
|
541
|
+
..., "--root", "-r",
|
|
542
|
+
help="Repository root directory.",
|
|
543
|
+
),
|
|
544
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
545
|
+
) -> None:
|
|
546
|
+
"""
|
|
547
|
+
Searches sections by metadata predicate. Supports key=value, key~=value, tag:value.
|
|
548
|
+
"""
|
|
549
|
+
import json as json_mod
|
|
550
|
+
import re
|
|
551
|
+
from mdbind.index import index_repository
|
|
552
|
+
|
|
553
|
+
try:
|
|
554
|
+
graph = index_repository(root.resolve())
|
|
555
|
+
except ParseError as exc:
|
|
556
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
557
|
+
raise typer.Exit(code=1)
|
|
558
|
+
|
|
559
|
+
# Parse predicate
|
|
560
|
+
tag_match = re.match(r"^tag:(.+)$", predicate)
|
|
561
|
+
exact_match = re.match(r"^([^~=]+)=(.+)$", predicate)
|
|
562
|
+
substring_match = re.match(r"^([^~=]+)~=(.+)$", predicate)
|
|
563
|
+
|
|
564
|
+
def _matches(metadata: dict) -> bool:
|
|
565
|
+
if tag_match:
|
|
566
|
+
tag_val = tag_match.group(1)
|
|
567
|
+
tags = metadata.get("tags", [])
|
|
568
|
+
if isinstance(tags, str):
|
|
569
|
+
tags = [t.strip() for t in tags.split(",")]
|
|
570
|
+
return tag_val in tags
|
|
571
|
+
if substring_match:
|
|
572
|
+
key, val = substring_match.group(1), substring_match.group(2)
|
|
573
|
+
return val.lower() in str(metadata.get(key, "")).lower()
|
|
574
|
+
if exact_match:
|
|
575
|
+
key, val = exact_match.group(1), exact_match.group(2)
|
|
576
|
+
return str(metadata.get(key, "")) == val
|
|
577
|
+
return False
|
|
578
|
+
|
|
579
|
+
results = [
|
|
580
|
+
{"uri": uri, "metadata": section.metadata}
|
|
581
|
+
for uri, section in graph.index.sections.items()
|
|
582
|
+
if _matches(section.metadata)
|
|
583
|
+
]
|
|
584
|
+
results.sort(key=lambda r: r["uri"])
|
|
585
|
+
|
|
586
|
+
if json_output:
|
|
587
|
+
typer.echo(json_mod.dumps({"predicate": predicate, "results": results}, ensure_ascii=False, indent=2))
|
|
588
|
+
else:
|
|
589
|
+
if not results:
|
|
590
|
+
typer.echo(f"No sections found matching '{predicate}'.")
|
|
591
|
+
else:
|
|
592
|
+
typer.echo(f"Found {len(results)} section(s) matching '{predicate}':")
|
|
593
|
+
for r in results:
|
|
594
|
+
typer.echo(f" {r['uri']}")
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# ---------------------------------------------------------------------------
|
|
598
|
+
# impact (B-019)
|
|
599
|
+
# ---------------------------------------------------------------------------
|
|
600
|
+
|
|
601
|
+
@app.command()
|
|
602
|
+
def impact(
|
|
603
|
+
uri: str = typer.Argument(..., help="Section URI in the format file.md#id"),
|
|
604
|
+
root: Optional[Path] = typer.Option(
|
|
605
|
+
None, "--root", "-r",
|
|
606
|
+
help="Repository root directory (default: file directory).",
|
|
607
|
+
),
|
|
608
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
609
|
+
) -> None:
|
|
610
|
+
"""
|
|
611
|
+
Returns all sections that depend (directly or indirectly) on this URI via reverse BFS.
|
|
612
|
+
"""
|
|
613
|
+
import json as json_mod
|
|
614
|
+
from collections import deque
|
|
615
|
+
from mdbind.index import index_repository
|
|
616
|
+
|
|
617
|
+
file_path_str, section_id = _split_uri(uri)
|
|
618
|
+
file_path = Path(file_path_str).resolve()
|
|
619
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
620
|
+
|
|
621
|
+
try:
|
|
622
|
+
graph = index_repository(repo_root)
|
|
623
|
+
except ParseError as exc:
|
|
624
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
625
|
+
raise typer.Exit(code=1)
|
|
626
|
+
|
|
627
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
628
|
+
|
|
629
|
+
if abs_uri not in graph.index.sections:
|
|
630
|
+
typer.echo(f"Error: URI '{abs_uri}' not found in index.", err=True)
|
|
631
|
+
raise typer.Exit(code=1)
|
|
632
|
+
|
|
633
|
+
# BFS on reverse graph (incoming edges)
|
|
634
|
+
direct = sorted(graph.incoming_edges.get(abs_uri, set()))
|
|
635
|
+
visited = set(direct) | {abs_uri}
|
|
636
|
+
queue = deque(direct)
|
|
637
|
+
indirect: list[str] = []
|
|
638
|
+
|
|
639
|
+
while queue:
|
|
640
|
+
current = queue.popleft()
|
|
641
|
+
for parent in graph.incoming_edges.get(current, set()):
|
|
642
|
+
if parent not in visited:
|
|
643
|
+
visited.add(parent)
|
|
644
|
+
indirect.append(parent)
|
|
645
|
+
queue.append(parent)
|
|
646
|
+
|
|
647
|
+
indirect.sort()
|
|
648
|
+
|
|
649
|
+
direct_out = [{"uri": u} for u in direct]
|
|
650
|
+
indirect_out = [{"uri": u} for u in indirect]
|
|
651
|
+
|
|
652
|
+
if json_output:
|
|
653
|
+
typer.echo(json_mod.dumps({
|
|
654
|
+
"uri": abs_uri,
|
|
655
|
+
"direct": direct_out,
|
|
656
|
+
"indirect": indirect_out,
|
|
657
|
+
}, ensure_ascii=False, indent=2))
|
|
658
|
+
else:
|
|
659
|
+
if not direct and not indirect:
|
|
660
|
+
typer.echo(f"No sections depend on '{abs_uri}'.")
|
|
661
|
+
else:
|
|
662
|
+
if direct:
|
|
663
|
+
typer.echo(f"Direct dependents of '{abs_uri}':")
|
|
664
|
+
for e in direct_out:
|
|
665
|
+
typer.echo(f" {e['uri']}")
|
|
666
|
+
if indirect:
|
|
667
|
+
typer.echo(f"Indirect dependents:")
|
|
668
|
+
for e in indirect_out:
|
|
669
|
+
typer.echo(f" {e['uri']}")
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
# ---------------------------------------------------------------------------
|
|
673
|
+
# neighbors (B-020)
|
|
674
|
+
# ---------------------------------------------------------------------------
|
|
675
|
+
|
|
676
|
+
@app.command()
|
|
677
|
+
def neighbors(
|
|
678
|
+
uri: str = typer.Argument(..., help="Section URI in the format file.md#id"),
|
|
679
|
+
root: Optional[Path] = typer.Option(
|
|
680
|
+
None, "--root", "-r",
|
|
681
|
+
help="Repository root directory (default: file directory).",
|
|
682
|
+
),
|
|
683
|
+
depth: int = typer.Option(1, "--depth", "-d", help="Max hops in either direction."),
|
|
684
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
685
|
+
) -> None:
|
|
686
|
+
"""
|
|
687
|
+
Returns all nodes reachable from URI within --depth hops (bidirectional).
|
|
688
|
+
"""
|
|
689
|
+
import json as json_mod
|
|
690
|
+
from collections import deque
|
|
691
|
+
from mdbind.index import index_repository
|
|
692
|
+
|
|
693
|
+
file_path_str, section_id = _split_uri(uri)
|
|
694
|
+
file_path = Path(file_path_str).resolve()
|
|
695
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
696
|
+
|
|
697
|
+
try:
|
|
698
|
+
graph = index_repository(repo_root)
|
|
699
|
+
except ParseError as exc:
|
|
700
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
701
|
+
raise typer.Exit(code=1)
|
|
702
|
+
|
|
703
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
704
|
+
|
|
705
|
+
if abs_uri not in graph.index.sections:
|
|
706
|
+
typer.echo(f"Error: URI '{abs_uri}' not found in index.", err=True)
|
|
707
|
+
raise typer.Exit(code=1)
|
|
708
|
+
|
|
709
|
+
# BFS bidirectional
|
|
710
|
+
visited: dict[str, tuple[int, str]] = {} # uri -> (distance, direction)
|
|
711
|
+
queue: deque[tuple[str, int]] = deque([(abs_uri, 0)])
|
|
712
|
+
visited[abs_uri] = (0, "self")
|
|
713
|
+
|
|
714
|
+
while queue:
|
|
715
|
+
current, dist = queue.popleft()
|
|
716
|
+
if dist >= depth:
|
|
717
|
+
continue
|
|
718
|
+
for nbr in graph.outgoing_edges.get(current, set()):
|
|
719
|
+
if nbr not in visited:
|
|
720
|
+
visited[nbr] = (dist + 1, "outgoing")
|
|
721
|
+
queue.append((nbr, dist + 1))
|
|
722
|
+
for nbr in graph.incoming_edges.get(current, set()):
|
|
723
|
+
if nbr not in visited:
|
|
724
|
+
visited[nbr] = (dist + 1, "incoming")
|
|
725
|
+
queue.append((nbr, dist + 1))
|
|
726
|
+
|
|
727
|
+
result = sorted(
|
|
728
|
+
[{"uri": u, "distance": d, "direction": dir_}
|
|
729
|
+
for u, (d, dir_) in visited.items() if u != abs_uri],
|
|
730
|
+
key=lambda x: (x["distance"], x["uri"]),
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
if json_output:
|
|
734
|
+
typer.echo(json_mod.dumps(
|
|
735
|
+
{"uri": abs_uri, "depth": depth, "neighbors": result},
|
|
736
|
+
ensure_ascii=False, indent=2,
|
|
737
|
+
))
|
|
738
|
+
else:
|
|
739
|
+
if not result:
|
|
740
|
+
typer.echo(f"No neighbors found within depth {depth}.")
|
|
741
|
+
else:
|
|
742
|
+
for n in result:
|
|
743
|
+
typer.echo(f" [d={n['distance']} {n['direction']}] {n['uri']}")
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
# ---------------------------------------------------------------------------
|
|
747
|
+
# explain (B-021)
|
|
748
|
+
# ---------------------------------------------------------------------------
|
|
749
|
+
|
|
750
|
+
@app.command()
|
|
751
|
+
def explain(
|
|
752
|
+
uri_a: str = typer.Argument(..., help="Source URI file.md#id"),
|
|
753
|
+
uri_b: str = typer.Argument(..., help="Target URI file.md#id"),
|
|
754
|
+
root: Optional[Path] = typer.Option(
|
|
755
|
+
None, "--root", "-r",
|
|
756
|
+
help="Repository root directory.",
|
|
757
|
+
),
|
|
758
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
759
|
+
) -> None:
|
|
760
|
+
"""
|
|
761
|
+
Finds all simple directed paths from URI_A to URI_B.
|
|
762
|
+
"""
|
|
763
|
+
import json as json_mod
|
|
764
|
+
from mdbind.index import index_repository
|
|
765
|
+
|
|
766
|
+
def _resolve(uri: str, default_parent: Path) -> str:
|
|
767
|
+
file_str, frag = _split_uri(uri)
|
|
768
|
+
return str(Path(file_str).resolve()) + "#" + frag
|
|
769
|
+
|
|
770
|
+
file_path_a = Path(_split_uri(uri_a)[0]).resolve()
|
|
771
|
+
repo_root = root.resolve() if root else file_path_a.parent
|
|
772
|
+
|
|
773
|
+
try:
|
|
774
|
+
graph = index_repository(repo_root)
|
|
775
|
+
except ParseError as exc:
|
|
776
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
777
|
+
raise typer.Exit(code=1)
|
|
778
|
+
|
|
779
|
+
abs_a = _resolve(uri_a, file_path_a.parent)
|
|
780
|
+
abs_b = _resolve(uri_b, file_path_a.parent)
|
|
781
|
+
|
|
782
|
+
for u, label in [(abs_a, "source"), (abs_b, "target")]:
|
|
783
|
+
if u not in graph.index.sections:
|
|
784
|
+
typer.echo(f"Error: {label} URI '{u}' not found in index.", err=True)
|
|
785
|
+
raise typer.Exit(code=1)
|
|
786
|
+
|
|
787
|
+
# DFS all simple paths (following outgoing edges only)
|
|
788
|
+
all_paths: list[list[str]] = []
|
|
789
|
+
|
|
790
|
+
def _dfs(current: str, target: str, path: list[str], visited: set[str]) -> None:
|
|
791
|
+
if current == target:
|
|
792
|
+
all_paths.append(list(path))
|
|
793
|
+
return
|
|
794
|
+
for nxt in graph.outgoing_edges.get(current, set()):
|
|
795
|
+
if nxt not in visited:
|
|
796
|
+
path.append(nxt)
|
|
797
|
+
visited.add(nxt)
|
|
798
|
+
_dfs(nxt, target, path, visited)
|
|
799
|
+
path.pop()
|
|
800
|
+
visited.discard(nxt)
|
|
801
|
+
|
|
802
|
+
_dfs(abs_a, abs_b, [abs_a], {abs_a})
|
|
803
|
+
|
|
804
|
+
paths_out = [
|
|
805
|
+
[{"uri": step, "edge_type": _edge_type(path[i], step, graph.index.sections.get(path[i]))}
|
|
806
|
+
for i, step in enumerate(path[1:], 1)]
|
|
807
|
+
for path in all_paths
|
|
808
|
+
]
|
|
809
|
+
# Prepend source node to each path for full representation
|
|
810
|
+
paths_full = [
|
|
811
|
+
[{"uri": path[0], "edge_type": None}] + edge_list
|
|
812
|
+
for path, edge_list in zip(all_paths, paths_out)
|
|
813
|
+
]
|
|
814
|
+
|
|
815
|
+
if json_output:
|
|
816
|
+
typer.echo(json_mod.dumps(
|
|
817
|
+
{"from": abs_a, "to": abs_b, "paths": paths_full},
|
|
818
|
+
ensure_ascii=False, indent=2,
|
|
819
|
+
))
|
|
820
|
+
else:
|
|
821
|
+
if not all_paths:
|
|
822
|
+
typer.echo(f"No paths found from '{abs_a}' to '{abs_b}'.")
|
|
823
|
+
else:
|
|
824
|
+
typer.echo(f"Found {len(all_paths)} path(s):")
|
|
825
|
+
for i, path in enumerate(all_paths, 1):
|
|
826
|
+
typer.echo(f" Path {i}: " + " → ".join(path))
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
# ---------------------------------------------------------------------------
|
|
830
|
+
# diff (B-022)
|
|
831
|
+
# ---------------------------------------------------------------------------
|
|
832
|
+
|
|
833
|
+
@app.command()
|
|
834
|
+
def diff(
|
|
835
|
+
root: Path = typer.Option(
|
|
836
|
+
..., "--root", "-r",
|
|
837
|
+
help="Repository root directory.",
|
|
838
|
+
),
|
|
839
|
+
since: str = typer.Option(
|
|
840
|
+
"HEAD~1", "--since",
|
|
841
|
+
help="Git ref to compare against (default: HEAD~1).",
|
|
842
|
+
),
|
|
843
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
844
|
+
) -> None:
|
|
845
|
+
"""
|
|
846
|
+
Computes structural diff of the graph against a historical git ref.
|
|
847
|
+
"""
|
|
848
|
+
import json as json_mod
|
|
849
|
+
import subprocess
|
|
850
|
+
import tempfile
|
|
851
|
+
import shutil
|
|
852
|
+
from mdbind.index import index_repository
|
|
853
|
+
from mdbind.parser import parse_text
|
|
854
|
+
from mdbind.models import SectionGraph, SectionIndex
|
|
855
|
+
|
|
856
|
+
repo_root = root.resolve()
|
|
857
|
+
|
|
858
|
+
# Build current graph
|
|
859
|
+
try:
|
|
860
|
+
current_graph = index_repository(repo_root)
|
|
861
|
+
except ParseError as exc:
|
|
862
|
+
typer.echo(f"Error (current): {exc}", err=True)
|
|
863
|
+
raise typer.Exit(code=1)
|
|
864
|
+
|
|
865
|
+
# Find git root
|
|
866
|
+
try:
|
|
867
|
+
git_root = subprocess.check_output(
|
|
868
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
869
|
+
cwd=str(repo_root),
|
|
870
|
+
stderr=subprocess.DEVNULL,
|
|
871
|
+
).decode().strip()
|
|
872
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
873
|
+
typer.echo("Error: not a git repository or git not available.", err=True)
|
|
874
|
+
raise typer.Exit(code=1)
|
|
875
|
+
|
|
876
|
+
# List .md files tracked by git at the given ref
|
|
877
|
+
try:
|
|
878
|
+
tracked = subprocess.check_output(
|
|
879
|
+
["git", "ls-tree", "-r", "--name-only", since],
|
|
880
|
+
cwd=git_root,
|
|
881
|
+
stderr=subprocess.DEVNULL,
|
|
882
|
+
).decode().splitlines()
|
|
883
|
+
except subprocess.CalledProcessError:
|
|
884
|
+
typer.echo(f"Error: git ref '{since}' not found.", err=True)
|
|
885
|
+
raise typer.Exit(code=1)
|
|
886
|
+
|
|
887
|
+
md_files_at_ref = [f for f in tracked if f.endswith(".md")]
|
|
888
|
+
|
|
889
|
+
# Build historical graph in memory from git content
|
|
890
|
+
hist_index = SectionIndex()
|
|
891
|
+
hist_graph = SectionGraph(index=hist_index)
|
|
892
|
+
|
|
893
|
+
for rel_path in md_files_at_ref:
|
|
894
|
+
try:
|
|
895
|
+
content = subprocess.check_output(
|
|
896
|
+
["git", "show", f"{since}:{rel_path}"],
|
|
897
|
+
cwd=git_root,
|
|
898
|
+
stderr=subprocess.DEVNULL,
|
|
899
|
+
).decode(errors="replace")
|
|
900
|
+
except subprocess.CalledProcessError:
|
|
901
|
+
continue
|
|
902
|
+
|
|
903
|
+
abs_path = Path(git_root) / rel_path
|
|
904
|
+
try:
|
|
905
|
+
sections = parse_text(content, abs_path)
|
|
906
|
+
except ParseError:
|
|
907
|
+
continue
|
|
908
|
+
|
|
909
|
+
for section in sections:
|
|
910
|
+
try:
|
|
911
|
+
hist_index.add(section)
|
|
912
|
+
except ValueError:
|
|
913
|
+
continue
|
|
914
|
+
for directive in section.directives:
|
|
915
|
+
if directive.type in ("ref", "include"):
|
|
916
|
+
hist_graph.add_edge(section.uri, directive.target_uri)
|
|
917
|
+
|
|
918
|
+
# Compute diff
|
|
919
|
+
current_uris = set(current_graph.index.sections.keys())
|
|
920
|
+
hist_uris = set(hist_graph.index.sections.keys())
|
|
921
|
+
|
|
922
|
+
added_sections = [{"uri": u} for u in sorted(current_uris - hist_uris)]
|
|
923
|
+
removed_sections = [{"uri": u} for u in sorted(hist_uris - current_uris)]
|
|
924
|
+
|
|
925
|
+
current_edges: set[tuple[str, str]] = set()
|
|
926
|
+
for src, targets in current_graph.outgoing_edges.items():
|
|
927
|
+
for tgt in targets:
|
|
928
|
+
current_edges.add((src, tgt))
|
|
929
|
+
|
|
930
|
+
hist_edges: set[tuple[str, str]] = set()
|
|
931
|
+
for src, targets in hist_graph.outgoing_edges.items():
|
|
932
|
+
for tgt in targets:
|
|
933
|
+
hist_edges.add((src, tgt))
|
|
934
|
+
|
|
935
|
+
added_edges = [{"from": s, "to": t, "type": "edge"} for s, t in sorted(current_edges - hist_edges)]
|
|
936
|
+
removed_edges = [{"from": s, "to": t, "type": "edge"} for s, t in sorted(hist_edges - current_edges)]
|
|
937
|
+
|
|
938
|
+
result = {
|
|
939
|
+
"since": since,
|
|
940
|
+
"added_sections": added_sections,
|
|
941
|
+
"removed_sections": removed_sections,
|
|
942
|
+
"added_edges": added_edges,
|
|
943
|
+
"removed_edges": removed_edges,
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
if json_output:
|
|
947
|
+
typer.echo(json_mod.dumps(result, ensure_ascii=False, indent=2))
|
|
948
|
+
else:
|
|
949
|
+
typer.echo(f"Diff against '{since}':")
|
|
950
|
+
typer.echo(f" +{len(added_sections)} sections, -{len(removed_sections)} sections")
|
|
951
|
+
typer.echo(f" +{len(added_edges)} edges, -{len(removed_edges)} edges")
|
|
952
|
+
for s in added_sections:
|
|
953
|
+
typer.echo(f" + [section] {s['uri']}")
|
|
954
|
+
for s in removed_sections:
|
|
955
|
+
typer.echo(f" - [section] {s['uri']}")
|
|
956
|
+
for e in added_edges:
|
|
957
|
+
typer.echo(f" + [edge] {e['from']} → {e['to']}")
|
|
958
|
+
for e in removed_edges:
|
|
959
|
+
typer.echo(f" - [edge] {e['from']} → {e['to']}")
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
# ---------------------------------------------------------------------------
|
|
963
|
+
# query (B-023)
|
|
964
|
+
# ---------------------------------------------------------------------------
|
|
965
|
+
|
|
966
|
+
@app.command()
|
|
967
|
+
def query(
|
|
968
|
+
expression: str = typer.Argument(..., help="Boolean expression: tag:api AND owner:team NOT status:obsolete"),
|
|
969
|
+
root: Path = typer.Option(
|
|
970
|
+
..., "--root", "-r",
|
|
971
|
+
help="Repository root directory.",
|
|
972
|
+
),
|
|
973
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
974
|
+
) -> None:
|
|
975
|
+
"""
|
|
976
|
+
Advanced boolean metadata query. Supports AND, OR, NOT, parentheses, and predicates.
|
|
977
|
+
|
|
978
|
+
Predicate formats: key=value, key~=value, tag:value
|
|
979
|
+
"""
|
|
980
|
+
import json as json_mod
|
|
981
|
+
import re
|
|
982
|
+
from mdbind.index import index_repository
|
|
983
|
+
|
|
984
|
+
try:
|
|
985
|
+
graph = index_repository(root.resolve())
|
|
986
|
+
except ParseError as exc:
|
|
987
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
988
|
+
raise typer.Exit(code=1)
|
|
989
|
+
|
|
990
|
+
# --- Tokenizer ---
|
|
991
|
+
_TOKEN_RE = re.compile(
|
|
992
|
+
r'\(|\)|AND\b|OR\b|NOT\b|[^\s()]+',
|
|
993
|
+
re.IGNORECASE,
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
def _tokenize(expr: str) -> list[str]:
|
|
997
|
+
return _TOKEN_RE.findall(expr)
|
|
998
|
+
|
|
999
|
+
# --- Predicate evaluator (reuses search logic) ---
|
|
1000
|
+
def _eval_predicate(pred: str, metadata: dict) -> bool:
|
|
1001
|
+
tag_m = re.match(r"^tag:(.+)$", pred)
|
|
1002
|
+
sub_m = re.match(r"^([^~=]+)~=(.+)$", pred)
|
|
1003
|
+
exact_m = re.match(r"^([^~=]+)=(.+)$", pred)
|
|
1004
|
+
if tag_m:
|
|
1005
|
+
tag_val = tag_m.group(1)
|
|
1006
|
+
tags = metadata.get("tags", [])
|
|
1007
|
+
if isinstance(tags, str):
|
|
1008
|
+
tags = [t.strip() for t in tags.split(",")]
|
|
1009
|
+
return tag_val in tags
|
|
1010
|
+
if sub_m:
|
|
1011
|
+
key, val = sub_m.group(1), sub_m.group(2)
|
|
1012
|
+
return val.lower() in str(metadata.get(key, "")).lower()
|
|
1013
|
+
if exact_m:
|
|
1014
|
+
key, val = exact_m.group(1), exact_m.group(2)
|
|
1015
|
+
return str(metadata.get(key, "")) == val
|
|
1016
|
+
return False
|
|
1017
|
+
|
|
1018
|
+
# --- Recursive descent parser ---
|
|
1019
|
+
class _Parser:
|
|
1020
|
+
def __init__(self, tokens: list[str]) -> None:
|
|
1021
|
+
self.tokens = tokens
|
|
1022
|
+
self.pos = 0
|
|
1023
|
+
|
|
1024
|
+
def peek(self) -> str | None:
|
|
1025
|
+
return self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
|
1026
|
+
|
|
1027
|
+
def consume(self) -> str:
|
|
1028
|
+
tok = self.tokens[self.pos]
|
|
1029
|
+
self.pos += 1
|
|
1030
|
+
return tok
|
|
1031
|
+
|
|
1032
|
+
def parse_expr(self):
|
|
1033
|
+
left = self.parse_term()
|
|
1034
|
+
while self.peek() and self.peek().upper() == "OR":
|
|
1035
|
+
self.consume()
|
|
1036
|
+
right = self.parse_term()
|
|
1037
|
+
left_fn = left
|
|
1038
|
+
right_fn = right
|
|
1039
|
+
left = lambda meta, l=left_fn, r=right_fn: l(meta) or r(meta)
|
|
1040
|
+
return left
|
|
1041
|
+
|
|
1042
|
+
def parse_term(self):
|
|
1043
|
+
left = self.parse_factor()
|
|
1044
|
+
while self.peek() and self.peek().upper() == "AND":
|
|
1045
|
+
self.consume()
|
|
1046
|
+
right = self.parse_factor()
|
|
1047
|
+
left_fn = left
|
|
1048
|
+
right_fn = right
|
|
1049
|
+
left = lambda meta, l=left_fn, r=right_fn: l(meta) and r(meta)
|
|
1050
|
+
return left
|
|
1051
|
+
|
|
1052
|
+
def parse_factor(self):
|
|
1053
|
+
tok = self.peek()
|
|
1054
|
+
if tok is None:
|
|
1055
|
+
return lambda meta: True
|
|
1056
|
+
if tok.upper() == "NOT":
|
|
1057
|
+
self.consume()
|
|
1058
|
+
inner = self.parse_factor()
|
|
1059
|
+
return lambda meta, f=inner: not f(meta)
|
|
1060
|
+
if tok == "(":
|
|
1061
|
+
self.consume()
|
|
1062
|
+
expr = self.parse_expr()
|
|
1063
|
+
if self.peek() == ")":
|
|
1064
|
+
self.consume()
|
|
1065
|
+
return expr
|
|
1066
|
+
pred = self.consume()
|
|
1067
|
+
return lambda meta, p=pred: _eval_predicate(p, meta)
|
|
1068
|
+
|
|
1069
|
+
try:
|
|
1070
|
+
tokens = _tokenize(expression)
|
|
1071
|
+
parser = _Parser(tokens)
|
|
1072
|
+
matcher = parser.parse_expr()
|
|
1073
|
+
except Exception as exc:
|
|
1074
|
+
typer.echo(f"Error parsing expression: {exc}", err=True)
|
|
1075
|
+
raise typer.Exit(code=1)
|
|
1076
|
+
|
|
1077
|
+
results = sorted(
|
|
1078
|
+
[{"uri": uri, "metadata": section.metadata}
|
|
1079
|
+
for uri, section in graph.index.sections.items()
|
|
1080
|
+
if matcher(section.metadata)],
|
|
1081
|
+
key=lambda r: r["uri"],
|
|
1082
|
+
)
|
|
1083
|
+
|
|
1084
|
+
if json_output:
|
|
1085
|
+
typer.echo(json_mod.dumps(
|
|
1086
|
+
{"expression": expression, "results": results},
|
|
1087
|
+
ensure_ascii=False, indent=2,
|
|
1088
|
+
))
|
|
1089
|
+
else:
|
|
1090
|
+
if not results:
|
|
1091
|
+
typer.echo(f"No sections matched '{expression}'.")
|
|
1092
|
+
else:
|
|
1093
|
+
typer.echo(f"Found {len(results)} section(s):")
|
|
1094
|
+
for r in results:
|
|
1095
|
+
typer.echo(f" {r['uri']}")
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
# ---------------------------------------------------------------------------
|
|
1099
|
+
# context-compose (B-024)
|
|
1100
|
+
# ---------------------------------------------------------------------------
|
|
1101
|
+
|
|
1102
|
+
@app.command(name="context-compose")
|
|
1103
|
+
def context_compose(
|
|
1104
|
+
uri: str = typer.Argument(..., help="Section URI in the format file.md#id"),
|
|
1105
|
+
root: Optional[Path] = typer.Option(
|
|
1106
|
+
None, "--root", "-r",
|
|
1107
|
+
help="Repository root directory (default: file directory).",
|
|
1108
|
+
),
|
|
1109
|
+
depth: Optional[int] = typer.Option(
|
|
1110
|
+
None, "--depth", "-d",
|
|
1111
|
+
help="Max inclusion depth (default: unlimited).",
|
|
1112
|
+
),
|
|
1113
|
+
token_limit: Optional[int] = typer.Option(
|
|
1114
|
+
None, "--token-limit", "-t",
|
|
1115
|
+
help="Approximate token budget (1 token ≈ 4 chars). Truncates when exceeded.",
|
|
1116
|
+
),
|
|
1117
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
1118
|
+
) -> None:
|
|
1119
|
+
"""
|
|
1120
|
+
Bounded semantic materialization for LLM consumption.
|
|
1121
|
+
Like compose, but respects --depth and --token-limit budgets.
|
|
1122
|
+
"""
|
|
1123
|
+
import json as json_mod
|
|
1124
|
+
from mdbind.composer import compose as do_compose
|
|
1125
|
+
from mdbind.index import index_repository
|
|
1126
|
+
|
|
1127
|
+
file_path_str, section_id = _split_uri(uri)
|
|
1128
|
+
file_path = Path(file_path_str).resolve()
|
|
1129
|
+
repo_root = root.resolve() if root else file_path.parent
|
|
1130
|
+
|
|
1131
|
+
if not file_path.exists():
|
|
1132
|
+
typer.echo(f"Error: file not found: '{file_path}'", err=True)
|
|
1133
|
+
raise typer.Exit(code=1)
|
|
1134
|
+
|
|
1135
|
+
try:
|
|
1136
|
+
graph = index_repository(repo_root)
|
|
1137
|
+
except ParseError as exc:
|
|
1138
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
1139
|
+
raise typer.Exit(code=1)
|
|
1140
|
+
|
|
1141
|
+
abs_uri = str(file_path) + "#" + section_id
|
|
1142
|
+
|
|
1143
|
+
if abs_uri not in graph.index.sections:
|
|
1144
|
+
typer.echo(f"Error: URI '{abs_uri}' not found in index.", err=True)
|
|
1145
|
+
raise typer.Exit(code=1)
|
|
1146
|
+
|
|
1147
|
+
collected_warnings: list[str] = []
|
|
1148
|
+
try:
|
|
1149
|
+
content = do_compose(
|
|
1150
|
+
abs_uri,
|
|
1151
|
+
graph,
|
|
1152
|
+
strict=False,
|
|
1153
|
+
deduplicate=False,
|
|
1154
|
+
warnings=collected_warnings,
|
|
1155
|
+
depth=depth,
|
|
1156
|
+
)
|
|
1157
|
+
except ValueError as exc:
|
|
1158
|
+
typer.echo(f"Error: {exc}", err=True)
|
|
1159
|
+
raise typer.Exit(code=1)
|
|
1160
|
+
|
|
1161
|
+
truncated = False
|
|
1162
|
+
if token_limit is not None:
|
|
1163
|
+
char_limit = token_limit * 4
|
|
1164
|
+
if len(content) > char_limit:
|
|
1165
|
+
content = content[:char_limit]
|
|
1166
|
+
truncated = True
|
|
1167
|
+
|
|
1168
|
+
token_estimate = len(content) // 4
|
|
1169
|
+
|
|
1170
|
+
if json_output:
|
|
1171
|
+
typer.echo(json_mod.dumps({
|
|
1172
|
+
"uri": abs_uri,
|
|
1173
|
+
"depth": depth,
|
|
1174
|
+
"token_estimate": token_estimate,
|
|
1175
|
+
"truncated": truncated,
|
|
1176
|
+
"content": content,
|
|
1177
|
+
}, ensure_ascii=False))
|
|
1178
|
+
else:
|
|
1179
|
+
if truncated:
|
|
1180
|
+
typer.echo(f"# [truncated at ~{token_limit} tokens]\n", err=True)
|
|
1181
|
+
typer.echo(content, nl=False)
|