mdbind 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbind/__init__.py +0 -0
- mdbind/cache.py +170 -0
- mdbind/cli.py +1181 -0
- mdbind/composer.py +135 -0
- mdbind/cycle.py +24 -0
- mdbind/directives.py +116 -0
- mdbind/index.py +57 -0
- mdbind/models.py +86 -0
- mdbind/parser.py +241 -0
- mdbind-0.1.0.dist-info/METADATA +9 -0
- mdbind-0.1.0.dist-info/RECORD +14 -0
- mdbind-0.1.0.dist-info/WHEEL +5 -0
- mdbind-0.1.0.dist-info/entry_points.txt +2 -0
- mdbind-0.1.0.dist-info/top_level.txt +1 -0
mdbind/__init__.py
ADDED
|
File without changes
|
mdbind/cache.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache persistente do SectionIndex (spec section 7).
|
|
3
|
+
|
|
4
|
+
Serializa o indice em <root>/.mdgraph/index.json e, em execucoes subsequentes,
|
|
5
|
+
reprocessa apenas os arquivos cujo hash SHA-256 tenha mudado.
|
|
6
|
+
Arquivos removidos tem suas secoes expurgadas automaticamente.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, Optional
|
|
14
|
+
|
|
15
|
+
# Versao do esquema do cache; mudar quando o formato mudar de forma incompativel
|
|
16
|
+
_CACHE_VERSION = 1
|
|
17
|
+
_CACHE_DIR = ".mdgraph"
|
|
18
|
+
_CACHE_FILE = "index.json"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Hash de arquivo
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def file_hash(path: Path) -> str:
|
|
26
|
+
"""Retorna o SHA-256 do conteudo do arquivo."""
|
|
27
|
+
h = hashlib.sha256()
|
|
28
|
+
h.update(path.read_bytes())
|
|
29
|
+
return h.hexdigest()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Leitura e escrita do cache
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
def _cache_path(root: Path) -> Path:
|
|
37
|
+
return root / _CACHE_DIR / _CACHE_FILE
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load_cache(root: Path) -> Optional[dict]:
|
|
41
|
+
"""
|
|
42
|
+
Carrega o cache do disco. Retorna None se nao existir ou for invalido.
|
|
43
|
+
"""
|
|
44
|
+
cp = _cache_path(root)
|
|
45
|
+
if not cp.exists():
|
|
46
|
+
return None
|
|
47
|
+
try:
|
|
48
|
+
data = json.loads(cp.read_text(encoding="utf-8"))
|
|
49
|
+
if data.get("version") != _CACHE_VERSION:
|
|
50
|
+
return None
|
|
51
|
+
return data
|
|
52
|
+
except (json.JSONDecodeError, KeyError):
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def save_cache(root: Path, file_hashes: Dict[str, str], sections_data: list) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Persiste o cache no disco.
|
|
59
|
+
|
|
60
|
+
file_hashes: {str(abs_path): sha256}
|
|
61
|
+
sections_data: lista de dicts serializaveis das ParsedSections
|
|
62
|
+
"""
|
|
63
|
+
cp = _cache_path(root)
|
|
64
|
+
cp.parent.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
payload = {
|
|
66
|
+
"version": _CACHE_VERSION,
|
|
67
|
+
"file_hashes": file_hashes,
|
|
68
|
+
"sections": sections_data,
|
|
69
|
+
}
|
|
70
|
+
cp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Serializacao / desserializacao de ParsedSection
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
def serialize_section(section) -> dict:
|
|
78
|
+
"""Converte ParsedSection em dict JSON-serializavel."""
|
|
79
|
+
return {
|
|
80
|
+
"uri": section.uri,
|
|
81
|
+
"file_path": section.file_path,
|
|
82
|
+
"metadata": section.metadata,
|
|
83
|
+
"directives": [{"type": d.type, "target_uri": d.target_uri}
|
|
84
|
+
for d in section.directives],
|
|
85
|
+
"raw": {
|
|
86
|
+
"heading_level": section.raw.heading_level,
|
|
87
|
+
"heading_text": section.raw.heading_text,
|
|
88
|
+
"token_start": section.raw.token_start,
|
|
89
|
+
"token_end": section.raw.token_end,
|
|
90
|
+
"source_start_line": section.raw.source_start_line,
|
|
91
|
+
"source_end_line": section.raw.source_end_line,
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def deserialize_section(data: dict):
|
|
97
|
+
"""Reconstroi ParsedSection a partir de dict do cache."""
|
|
98
|
+
from mdbind.models import Directive, ParsedSection, RawSection
|
|
99
|
+
|
|
100
|
+
raw = RawSection(**data["raw"])
|
|
101
|
+
directives = [Directive(type=d["type"], target_uri=d["target_uri"])
|
|
102
|
+
for d in data.get("directives", [])]
|
|
103
|
+
return ParsedSection(
|
|
104
|
+
raw=raw,
|
|
105
|
+
uri=data["uri"],
|
|
106
|
+
file_path=data["file_path"],
|
|
107
|
+
metadata=data["metadata"],
|
|
108
|
+
directives=directives,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Logica incremental
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def build_index_with_cache(
|
|
117
|
+
root: Path,
|
|
118
|
+
md_files: list[Path],
|
|
119
|
+
no_cache: bool = False,
|
|
120
|
+
) -> tuple[list, Dict[str, str]]:
|
|
121
|
+
"""
|
|
122
|
+
Retorna (sections_list, file_hashes) usando cache quando possivel.
|
|
123
|
+
|
|
124
|
+
sections_list: lista de ParsedSection prontas para popular o SectionIndex
|
|
125
|
+
file_hashes: hashes atuais de todos os arquivos processados
|
|
126
|
+
"""
|
|
127
|
+
from mdbind.parser import parse_file
|
|
128
|
+
|
|
129
|
+
current_hashes: Dict[str, str] = {str(f): file_hash(f) for f in md_files}
|
|
130
|
+
|
|
131
|
+
# Sem cache ou --no-cache: reprocessar tudo
|
|
132
|
+
cached = None if no_cache else load_cache(root)
|
|
133
|
+
|
|
134
|
+
if cached is None:
|
|
135
|
+
sections = _parse_all(md_files, parse_file)
|
|
136
|
+
return sections, current_hashes
|
|
137
|
+
|
|
138
|
+
cached_hashes: Dict[str, str] = cached.get("file_hashes", {})
|
|
139
|
+
cached_sections_data: list = cached.get("sections", [])
|
|
140
|
+
|
|
141
|
+
# Agrupar secoes cacheadas por arquivo
|
|
142
|
+
cached_by_file: Dict[str, list] = {}
|
|
143
|
+
for s_data in cached_sections_data:
|
|
144
|
+
fp = s_data["file_path"]
|
|
145
|
+
cached_by_file.setdefault(fp, []).append(s_data)
|
|
146
|
+
|
|
147
|
+
sections: list = []
|
|
148
|
+
current_file_strs = {str(f) for f in md_files}
|
|
149
|
+
|
|
150
|
+
for f in md_files:
|
|
151
|
+
fs = str(f)
|
|
152
|
+
if cached_hashes.get(fs) == current_hashes[fs]:
|
|
153
|
+
# Cache hit: restaurar secoes do disco
|
|
154
|
+
for s_data in cached_by_file.get(fs, []):
|
|
155
|
+
sections.append(deserialize_section(s_data))
|
|
156
|
+
else:
|
|
157
|
+
# Cache miss: reparsar arquivo modificado
|
|
158
|
+
sections.extend(parse_file(f))
|
|
159
|
+
|
|
160
|
+
# Arquivos removidos: secoes de arquivos que nao existem mais sao ignoradas
|
|
161
|
+
# (nao adicionamos ao sections, portanto nao aparecem no indice)
|
|
162
|
+
|
|
163
|
+
return sections, current_hashes
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _parse_all(md_files: list[Path], parse_file) -> list:
|
|
167
|
+
sections = []
|
|
168
|
+
for f in md_files:
|
|
169
|
+
sections.extend(parse_file(f))
|
|
170
|
+
return sections
|