mdbind 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbind/__init__.py ADDED
File without changes
mdbind/cache.py ADDED
@@ -0,0 +1,170 @@
1
+ """
2
+ Cache persistente do SectionIndex (spec section 7).
3
+
4
+ Serializa o indice em <root>/.mdgraph/index.json e, em execucoes subsequentes,
5
+ reprocessa apenas os arquivos cujo hash SHA-256 tenha mudado.
6
+ Arquivos removidos tem suas secoes expurgadas automaticamente.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import json
12
+ from pathlib import Path
13
+ from typing import Dict, Optional
14
+
15
+ # Versao do esquema do cache; mudar quando o formato mudar de forma incompativel
16
+ _CACHE_VERSION = 1
17
+ _CACHE_DIR = ".mdgraph"
18
+ _CACHE_FILE = "index.json"
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Hash de arquivo
23
+ # ---------------------------------------------------------------------------
24
+
25
+ def file_hash(path: Path) -> str:
26
+ """Retorna o SHA-256 do conteudo do arquivo."""
27
+ h = hashlib.sha256()
28
+ h.update(path.read_bytes())
29
+ return h.hexdigest()
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Leitura e escrita do cache
34
+ # ---------------------------------------------------------------------------
35
+
36
+ def _cache_path(root: Path) -> Path:
37
+ return root / _CACHE_DIR / _CACHE_FILE
38
+
39
+
40
+ def load_cache(root: Path) -> Optional[dict]:
41
+ """
42
+ Carrega o cache do disco. Retorna None se nao existir ou for invalido.
43
+ """
44
+ cp = _cache_path(root)
45
+ if not cp.exists():
46
+ return None
47
+ try:
48
+ data = json.loads(cp.read_text(encoding="utf-8"))
49
+ if data.get("version") != _CACHE_VERSION:
50
+ return None
51
+ return data
52
+ except (json.JSONDecodeError, KeyError):
53
+ return None
54
+
55
+
56
+ def save_cache(root: Path, file_hashes: Dict[str, str], sections_data: list) -> None:
57
+ """
58
+ Persiste o cache no disco.
59
+
60
+ file_hashes: {str(abs_path): sha256}
61
+ sections_data: lista de dicts serializaveis das ParsedSections
62
+ """
63
+ cp = _cache_path(root)
64
+ cp.parent.mkdir(parents=True, exist_ok=True)
65
+ payload = {
66
+ "version": _CACHE_VERSION,
67
+ "file_hashes": file_hashes,
68
+ "sections": sections_data,
69
+ }
70
+ cp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Serializacao / desserializacao de ParsedSection
75
+ # ---------------------------------------------------------------------------
76
+
77
+ def serialize_section(section) -> dict:
78
+ """Converte ParsedSection em dict JSON-serializavel."""
79
+ return {
80
+ "uri": section.uri,
81
+ "file_path": section.file_path,
82
+ "metadata": section.metadata,
83
+ "directives": [{"type": d.type, "target_uri": d.target_uri}
84
+ for d in section.directives],
85
+ "raw": {
86
+ "heading_level": section.raw.heading_level,
87
+ "heading_text": section.raw.heading_text,
88
+ "token_start": section.raw.token_start,
89
+ "token_end": section.raw.token_end,
90
+ "source_start_line": section.raw.source_start_line,
91
+ "source_end_line": section.raw.source_end_line,
92
+ },
93
+ }
94
+
95
+
96
+ def deserialize_section(data: dict):
97
+ """Reconstroi ParsedSection a partir de dict do cache."""
98
+ from mdbind.models import Directive, ParsedSection, RawSection
99
+
100
+ raw = RawSection(**data["raw"])
101
+ directives = [Directive(type=d["type"], target_uri=d["target_uri"])
102
+ for d in data.get("directives", [])]
103
+ return ParsedSection(
104
+ raw=raw,
105
+ uri=data["uri"],
106
+ file_path=data["file_path"],
107
+ metadata=data["metadata"],
108
+ directives=directives,
109
+ )
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Logica incremental
114
+ # ---------------------------------------------------------------------------
115
+
116
+ def build_index_with_cache(
117
+ root: Path,
118
+ md_files: list[Path],
119
+ no_cache: bool = False,
120
+ ) -> tuple[list, Dict[str, str]]:
121
+ """
122
+ Retorna (sections_list, file_hashes) usando cache quando possivel.
123
+
124
+ sections_list: lista de ParsedSection prontas para popular o SectionIndex
125
+ file_hashes: hashes atuais de todos os arquivos processados
126
+ """
127
+ from mdbind.parser import parse_file
128
+
129
+ current_hashes: Dict[str, str] = {str(f): file_hash(f) for f in md_files}
130
+
131
+ # Sem cache ou --no-cache: reprocessar tudo
132
+ cached = None if no_cache else load_cache(root)
133
+
134
+ if cached is None:
135
+ sections = _parse_all(md_files, parse_file)
136
+ return sections, current_hashes
137
+
138
+ cached_hashes: Dict[str, str] = cached.get("file_hashes", {})
139
+ cached_sections_data: list = cached.get("sections", [])
140
+
141
+ # Agrupar secoes cacheadas por arquivo
142
+ cached_by_file: Dict[str, list] = {}
143
+ for s_data in cached_sections_data:
144
+ fp = s_data["file_path"]
145
+ cached_by_file.setdefault(fp, []).append(s_data)
146
+
147
+ sections: list = []
148
+ current_file_strs = {str(f) for f in md_files}
149
+
150
+ for f in md_files:
151
+ fs = str(f)
152
+ if cached_hashes.get(fs) == current_hashes[fs]:
153
+ # Cache hit: restaurar secoes do disco
154
+ for s_data in cached_by_file.get(fs, []):
155
+ sections.append(deserialize_section(s_data))
156
+ else:
157
+ # Cache miss: reparsar arquivo modificado
158
+ sections.extend(parse_file(f))
159
+
160
+ # Arquivos removidos: secoes de arquivos que nao existem mais sao ignoradas
161
+ # (nao adicionamos ao sections, portanto nao aparecem no indice)
162
+
163
+ return sections, current_hashes
164
+
165
+
166
+ def _parse_all(md_files: list[Path], parse_file) -> list:
167
+ sections = []
168
+ for f in md_files:
169
+ sections.extend(parse_file(f))
170
+ return sections