mdbind 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbind/composer.py ADDED
@@ -0,0 +1,135 @@
1
+ """
2
+ Materializacao semantica: motor de composicao documental (spec section 8.3).
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import re
7
+ from pathlib import Path
8
+ from typing import FrozenSet, List, Optional, Set
9
+
10
+ from mdbind.cycle import enter_node, would_cycle
11
+ from mdbind.directives import _resolve_uri
12
+ from mdbind.models import SectionGraph
13
+
14
+ _PLACEHOLDER_TPL = "<!-- mdgraph:unresolved uri=\"{uri}\" -->"
15
+ _INCLUDE_RE = re.compile(r"^\[@include(?::[^\]]*)?\]\(([^)]+)\)\s*$")
16
+
17
+
18
+ def compose(
19
+ root_uri: str,
20
+ graph: SectionGraph,
21
+ *,
22
+ strict: bool = False,
23
+ deduplicate: bool = False,
24
+ warnings: Optional[List[str]] = None,
25
+ depth: Optional[int] = None,
26
+ ) -> str:
27
+ if warnings is None:
28
+ warnings = []
29
+
30
+ root_section = graph.index.get(root_uri)
31
+ if root_section is None:
32
+ raise ValueError(f"URI raiz nao encontrada: '{root_uri}'")
33
+
34
+ # O no raiz e sempre renormalizado para heading level 1
35
+ initial_offset = 1 - root_section.raw.heading_level
36
+
37
+ seen: Set[str] = set()
38
+ lines = _compose_node(
39
+ root_uri, graph,
40
+ heading_offset=initial_offset,
41
+ execution_path=frozenset(),
42
+ seen=seen,
43
+ strict=strict,
44
+ deduplicate=deduplicate,
45
+ warnings=warnings,
46
+ depth=depth,
47
+ )
48
+ return "\n".join(lines)
49
+
50
+
51
+ def _compose_node(
52
+ uri: str,
53
+ graph: SectionGraph,
54
+ heading_offset: int,
55
+ execution_path: FrozenSet[str],
56
+ seen: Set[str],
57
+ strict: bool,
58
+ deduplicate: bool,
59
+ warnings: List[str],
60
+ depth: Optional[int] = None,
61
+ ) -> List[str]:
62
+ section = graph.index.get(uri)
63
+ if section is None:
64
+ msg = f"URI nao encontrada: '{uri}'"
65
+ if strict:
66
+ raise ValueError(msg)
67
+ warnings.append(msg)
68
+ return [_PLACEHOLDER_TPL.format(uri=uri)]
69
+
70
+ if deduplicate and uri in seen:
71
+ return [f"@ref({uri})"]
72
+
73
+ seen.add(uri)
74
+ execution_path = enter_node(uri, execution_path)
75
+
76
+ raw_lines = _raw_lines(section)
77
+ result: List[str] = []
78
+
79
+ for line in raw_lines:
80
+ adjusted = _adjust_heading(line, heading_offset)
81
+ m = _INCLUDE_RE.match(adjusted.strip())
82
+ if m:
83
+ # Resolver URI relativa ao arquivo de origem da secao
84
+ raw_target = m.group(1).strip()
85
+ resolved_target = _resolve_uri(raw_target, section.file_path)
86
+
87
+ if would_cycle(resolved_target, execution_path):
88
+ warnings.append(
89
+ f"Ciclo detectado: '{resolved_target}' ja esta no caminho "
90
+ f"de execucao. Aresta rompida."
91
+ )
92
+ continue # rompe silenciosamente
93
+
94
+ # Verificar limite de profundidade
95
+ if depth is not None and depth <= 0:
96
+ continue # nao expande, descarta a linha de @include
97
+
98
+ child_offset = heading_offset # fallback se filho nao encontrado
99
+ child_section_lookup = graph.index.get(resolved_target)
100
+ if child_section_lookup is not None:
101
+ parent_new_level = section.raw.heading_level + heading_offset
102
+ child_offset = parent_new_level + 1 - child_section_lookup.raw.heading_level
103
+
104
+ child_lines = _compose_node(
105
+ resolved_target, graph,
106
+ heading_offset=child_offset,
107
+ execution_path=execution_path,
108
+ seen=seen,
109
+ strict=strict,
110
+ deduplicate=deduplicate,
111
+ warnings=warnings,
112
+ depth=None if depth is None else depth - 1,
113
+ )
114
+ result.extend(child_lines)
115
+ else:
116
+ result.append(adjusted)
117
+
118
+ return result
119
+
120
+
121
+ def _raw_lines(section) -> List[str]:
122
+ path = Path(section.file_path)
123
+ all_lines = path.read_text(encoding="utf-8").splitlines()
124
+ start = section.raw.source_start_line - 1
125
+ end = section.raw.source_end_line
126
+ return all_lines[start:end]
127
+
128
+
129
+ def _adjust_heading(line: str, offset: int) -> str:
130
+ if offset == 0 or not line.startswith("#"):
131
+ return line
132
+ original_level = len(line) - len(line.lstrip("#"))
133
+ new_level = max(1, original_level + offset)
134
+ return "#" * new_level + line[original_level:]
135
+
mdbind/cycle.py ADDED
@@ -0,0 +1,24 @@
1
+ """
2
+ Deteccao e resolucao de ciclos durante a materializacao (spec section 5).
3
+
4
+ O digrafo pode conter ciclos nativamente. Durante o compose, o motor rastreia
5
+ o caminho de execucao atual P (stack de URIs). Se uma aresta de inclusao (x, y)
6
+ for avaliada e y ∈ P, o ciclo e detectado e a aresta e rompida silenciosamente.
7
+
8
+ O SectionGraph original NAO e modificado; apenas a materializacao e afetada.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from typing import FrozenSet
13
+
14
+
15
+ def would_cycle(uri: str, execution_path: FrozenSet[str]) -> bool:
16
+ """
17
+ Retorna True se incluir `uri` no caminho atual criaria um ciclo.
18
+ """
19
+ return uri in execution_path
20
+
21
+
22
+ def enter_node(uri: str, execution_path: FrozenSet[str]) -> FrozenSet[str]:
23
+ """Retorna um novo caminho com `uri` adicionado."""
24
+ return execution_path | {uri}
mdbind/directives.py ADDED
@@ -0,0 +1,116 @@
1
+ """
2
+ Etapa 4 do pipeline: tokenizacao de diretivas.
3
+
4
+ Varre os tokens de conteudo de uma ParsedSection ja delimitada e converte
5
+ marcacoes semanticas (@ref, @include, @query) em objetos Directive tipados,
6
+ resolvendo URIs relativas ao arquivo de origem.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from pathlib import Path
12
+ from typing import List
13
+
14
+ from mdbind.models import Directive, ParsedSection
15
+
16
+ # Regex que captura [@tipo: label](uri) ou [@tipo](uri)
17
+ # Grupo 1: tipo (ref|include|query)
18
+ # Grupo 2: label opcional (pode ser vazio ou ausente)
19
+ # Grupo 3: uri de destino
20
+ _DIRECTIVE_RE = re.compile(r"\[@(ref|include|query)(?::\s*([^\]]*))?\]\(([^)]+)\)")
21
+
22
+
23
+ def _resolve_uri(target: str, source_file_path: str) -> str:
24
+ """
25
+ Resolve um target URI relativo ao diretorio do arquivo de origem.
26
+
27
+ Se o target nao tiver componente de caminho (ex: apenas "#id"), retorna
28
+ como esta. Se for absoluto ou ja normalizado, retorna normalizado.
29
+ Fragmentos (#id) sao preservados.
30
+ """
31
+ if not target:
32
+ return target
33
+
34
+ # Separar caminho e fragmento
35
+ if "#" in target:
36
+ path_part, fragment = target.split("#", 1)
37
+ fragment = "#" + fragment
38
+ else:
39
+ path_part, fragment = target, ""
40
+
41
+ if not path_part:
42
+ # Referencia ao proprio arquivo: "#id"
43
+ return source_file_path + fragment
44
+
45
+ p = Path(path_part)
46
+ if p.is_absolute():
47
+ return str(p) + fragment
48
+
49
+ # Resolver relativo ao diretorio do arquivo de origem
50
+ source_dir = Path(source_file_path).parent
51
+ resolved = (source_dir / p).resolve()
52
+ return str(resolved) + fragment
53
+
54
+
55
+ # Regex para o texto do link: @tipo ou @tipo: label
56
+ _LINK_TEXT_RE = re.compile(r"^@(ref|include|query)(?::\s*(.*))?$")
57
+
58
+
59
+ def extract_directives(tokens: list, source_file_path: str) -> List[Directive]:
60
+ """
61
+ Varre tokens buscando diretivas na sintaxe de link Markdown:
62
+ [@tipo: label](uri) ou [@tipo](uri)
63
+
64
+ Em markdown-it-py, esse padrao e tokenizado como:
65
+ link_open (attrs: [["href", uri]])
66
+ text (content: "@tipo: label")
67
+ link_close
68
+
69
+ A extracao examina cada link_open dentro de tokens `inline` e verifica
70
+ se o texto filho corresponde ao padrao de diretiva.
71
+ """
72
+ directives: List[Directive] = []
73
+
74
+ for tok in tokens:
75
+ if tok.type != "inline" or not tok.children:
76
+ continue
77
+ children = tok.children
78
+ i = 0
79
+ while i < len(children):
80
+ child = children[i]
81
+ if child.type == "link_open":
82
+ attrs = child.attrs or {}
83
+ href = attrs.get("href", "") if isinstance(attrs, dict) else ""
84
+ # Proximo filho deve ser o texto do link
85
+ if i + 1 < len(children) and children[i + 1].type == "text":
86
+ link_text = children[i + 1].content.strip()
87
+ m = _LINK_TEXT_RE.match(link_text)
88
+ if m and href:
89
+ dtype = m.group(1)
90
+ raw_label = m.group(2)
91
+ label = raw_label.strip() if raw_label else None
92
+ resolved = _resolve_uri(href.strip(), source_file_path)
93
+ directives.append(
94
+ Directive(type=dtype, target_uri=resolved, label=label) # type: ignore[arg-type]
95
+ )
96
+ i += 1
97
+
98
+ return directives
99
+
100
+
101
+ def bind_directives(section: ParsedSection, tokens: list) -> ParsedSection:
102
+ """
103
+ Retorna uma nova ParsedSection com o campo directives populado.
104
+ Varre apenas os tokens internos da secao (excluindo sub-secoes).
105
+ """
106
+ # Tokens internos: apos heading_open, inline, heading_close; para antes do proximo heading
107
+ all_inner = tokens[section.raw.token_start + 3: section.raw.token_end + 1]
108
+ inner: list = []
109
+ for tok in all_inner:
110
+ if tok.type == "heading_open":
111
+ break
112
+ inner.append(tok)
113
+
114
+ directives = extract_directives(inner, section.file_path)
115
+
116
+ return section.model_copy(update={"directives": directives})
mdbind/index.py ADDED
@@ -0,0 +1,57 @@
1
+ """
2
+ Etapa 5 do pipeline: indexacao do repositorio e construcao do SectionGraph.
3
+
4
+ index_repository(root_path) -> SectionGraph
5
+ - Descobre recursivamente todos os .md no diretorio raiz
6
+ - Executa o pipeline parser.parse_file em cada arquivo (com cache incremental)
7
+ - Registra secoes no SectionIndex
8
+ - Constroi arestas bidirecionais no SectionGraph
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+ from mdbind.cache import build_index_with_cache, save_cache, serialize_section
15
+ from mdbind.models import SectionGraph, SectionIndex
16
+ from mdbind.parser import ParseError
17
+
18
+
19
+ def index_repository(
20
+ root_path: str | Path,
21
+ *,
22
+ no_cache: bool = False,
23
+ persist_cache: bool = False,
24
+ ) -> SectionGraph:
25
+ """
26
+ Varre recursivamente root_path buscando arquivos .md,
27
+ parseia cada um e monta o SectionGraph completo em memoria.
28
+
29
+ Parametros:
30
+ no_cache: ignorar cache existente e reprocessar tudo
31
+ persist_cache: gravar cache em .mdgraph/index.json apos indexar
32
+
33
+ Raises ParseError se houver URIs duplicadas.
34
+ """
35
+ root = Path(root_path).resolve()
36
+ md_files = sorted(root.rglob("*.md"))
37
+
38
+ sections, file_hashes = build_index_with_cache(root, md_files, no_cache=no_cache)
39
+
40
+ index = SectionIndex()
41
+ graph = SectionGraph(index=index)
42
+
43
+ for section in sections:
44
+ try:
45
+ index.add(section)
46
+ except ValueError as exc:
47
+ raise ParseError(str(exc)) from exc
48
+
49
+ for directive in section.directives:
50
+ if directive.type in ("ref", "include"):
51
+ graph.add_edge(section.uri, directive.target_uri)
52
+
53
+ if persist_cache:
54
+ sections_data = [serialize_section(s) for s in index.sections.values()]
55
+ save_cache(root, file_hashes, sections_data)
56
+
57
+ return graph
mdbind/models.py ADDED
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+ from typing import Any, Dict, List, Literal, Set
5
+
6
+ from pydantic import BaseModel, Field, model_validator
7
+
8
+
9
+ # ---------------------------------------------------------------------------
10
+ # Fase 2: Delimitacao Fisica
11
+ # ---------------------------------------------------------------------------
12
+
13
+ class RawSection(BaseModel):
14
+ """Resolve apenas o escopo espacial da secao na AST e no arquivo fonte."""
15
+
16
+ heading_level: int
17
+ heading_text: str
18
+ token_start: int
19
+ token_end: int
20
+ source_start_line: int
21
+ source_end_line: int
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Fase 4: Semantica e Diretivas
26
+ # ---------------------------------------------------------------------------
27
+
28
+ class Directive(BaseModel):
29
+ """Diretivas deixam de ser texto e se tornam nos logicos."""
30
+
31
+ type: Literal["ref", "include", "query"]
32
+ target_uri: str
33
+ label: str | None = None
34
+
35
+
36
+ class ParsedSection(BaseModel):
37
+ """Resolve o significado. Amarra o espaco fisico aos metadados e referencias."""
38
+
39
+ raw: RawSection
40
+ uri: str
41
+ file_path: str
42
+ # metadata deve conter obrigatoriamente a chave 'id'
43
+ metadata: Dict[str, Any]
44
+ directives: List[Directive] = Field(default_factory=list)
45
+
46
+ @model_validator(mode="after")
47
+ def _require_id_in_metadata(self) -> "ParsedSection":
48
+ if "id" not in self.metadata:
49
+ raise ValueError("secao sem payload obrigatorio: campo 'id' ausente em metadata")
50
+ return self
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Fase 5: Indexacao e Grafo
55
+ # ---------------------------------------------------------------------------
56
+
57
+ class SectionIndex(BaseModel):
58
+ """Repositorio de acesso em O(1) de secoes ja parseadas."""
59
+
60
+ sections: Dict[str, ParsedSection] = Field(default_factory=dict)
61
+
62
+ def add(self, section: ParsedSection) -> None:
63
+ if section.uri in self.sections:
64
+ raise ValueError(f"URI duplicada no indice: '{section.uri}'")
65
+ self.sections[section.uri] = section
66
+
67
+ def get(self, uri: str) -> ParsedSection | None:
68
+ return self.sections.get(uri)
69
+
70
+
71
+ class SectionGraph(BaseModel):
72
+ """Gestao topologica de dependencias (Backlinks suportados)."""
73
+
74
+ index: SectionIndex = Field(default_factory=SectionIndex)
75
+ outgoing_edges: Dict[str, Set[str]] = Field(
76
+ default_factory=lambda: defaultdict(set)
77
+ )
78
+ incoming_edges: Dict[str, Set[str]] = Field(
79
+ default_factory=lambda: defaultdict(set)
80
+ )
81
+
82
+ model_config = {"arbitrary_types_allowed": True}
83
+
84
+ def add_edge(self, source_uri: str, target_uri: str) -> None:
85
+ self.outgoing_edges[source_uri].add(target_uri)
86
+ self.incoming_edges[target_uri].add(source_uri)
mdbind/parser.py ADDED
@@ -0,0 +1,241 @@
1
+ """
2
+ Parser Markdown: pipeline Markdown -> AST -> RawSection -> ParsedSection.
3
+
4
+ Etapas cobertas (spec section 2):
5
+ 1. Geracao de AST via markdown-it-py
6
+ 2. Section Discovery -> RawSection
7
+ 3. Metadata Binding -> ParsedSection
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+ from typing import List
13
+
14
+ import yaml
15
+ from markdown_it import MarkdownIt
16
+
17
+ from mdbind.directives import bind_directives
18
+ from mdbind.models import Directive, ParsedSection, RawSection
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Erros de parsing
23
+ # ---------------------------------------------------------------------------
24
+
25
+ class ParseError(Exception):
26
+ pass
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Etapa 1: Geracao de AST
31
+ # ---------------------------------------------------------------------------
32
+
33
+ def _tokenize(text: str) -> list:
34
+ md = MarkdownIt()
35
+ return md.parse(text)
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Etapa 2: Section Discovery -> List[RawSection]
40
+ # ---------------------------------------------------------------------------
41
+
42
+ def _discover_sections(tokens: list) -> List[RawSection]:
43
+ """
44
+ Varre a lista plana de tokens e delimita secoes por heading_open.
45
+ Uma secao vai do seu heading_open ate o proximo heading_open de nivel <= ao seu,
46
+ ou ate o fim do documento.
47
+ """
48
+ # Coletar posicoes dos headings
49
+ heading_positions = []
50
+ for i, tok in enumerate(tokens):
51
+ if tok.type == "heading_open":
52
+ level = int(tok.tag[1]) # "h1" -> 1, "h2" -> 2, etc.
53
+ # source_start_line: markdown-it usa base-0, convertemos para base-1
54
+ source_line = (tok.map[0] + 1) if tok.map else 0
55
+ heading_positions.append((i, level, source_line))
56
+
57
+ raws: List[RawSection] = []
58
+ for idx, (token_start, level, source_start_line) in enumerate(heading_positions):
59
+ # Texto do heading: token seguinte e heading_content, proximo e heading_close
60
+ heading_text_tok = tokens[token_start + 1]
61
+ heading_text = heading_text_tok.children[0].content if heading_text_tok.children else ""
62
+
63
+ # Determinar token_end e source_end_line
64
+ token_end = len(tokens) - 1
65
+ source_end_line = _last_source_line(tokens)
66
+
67
+ for future_start, future_level, future_source in heading_positions[idx + 1:]:
68
+ if future_level <= level:
69
+ # A proxima secao de mesmo nivel ou superior encerra esta
70
+ token_end = future_start - 1
71
+ source_end_line = future_source - 1
72
+ break
73
+
74
+ raws.append(RawSection(
75
+ heading_level=level,
76
+ heading_text=heading_text,
77
+ token_start=token_start,
78
+ token_end=token_end,
79
+ source_start_line=source_start_line,
80
+ source_end_line=source_end_line,
81
+ ))
82
+
83
+ return raws
84
+
85
+
86
+ def _last_source_line(tokens: list) -> int:
87
+ """Retorna a ultima linha fonte referenciada nos tokens (base-1)."""
88
+ last = 1
89
+ for tok in reversed(tokens):
90
+ if tok.map:
91
+ last = tok.map[1] # map[1] ja e o indice exclusivo (base-0), vira base-1
92
+ break
93
+ return last
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Etapa 3: Metadata Binding -> ParsedSection
98
+ # ---------------------------------------------------------------------------
99
+
100
+ def _bind_metadata(
101
+ raw: RawSection,
102
+ tokens: list,
103
+ file_path: str,
104
+ ) -> ParsedSection:
105
+ """
106
+ Analisa os tokens internos da RawSection buscando o bloco 'section' (YAML).
107
+ Aplica as validacoes da spec section 3.
108
+ """
109
+ # +3 pula: heading_open, inline (texto), heading_close
110
+ # O scan termina no primeiro heading interno (qualquer nivel), pois o bloco
111
+ # section so pode estar no conteudo direto da secao, nao em sub-secoes.
112
+ all_inner = tokens[raw.token_start + 3: raw.token_end + 1]
113
+ inner_tokens: list = []
114
+ for tok in all_inner:
115
+ if tok.type == "heading_open":
116
+ break
117
+ inner_tokens.append(tok)
118
+
119
+ section_blocks: list[str] = []
120
+ first_text_seen = False
121
+ section_block_index = -1 # posicao do primeiro bloco section nos inner_tokens
122
+
123
+ i = 0
124
+ while i < len(inner_tokens):
125
+ tok = inner_tokens[i]
126
+
127
+ if tok.type == "fence" and tok.info.strip() == "yaml":
128
+ parsed_yaml = None
129
+ try:
130
+ parsed_yaml = yaml.safe_load(tok.content) or {}
131
+ except yaml.YAMLError:
132
+ parsed_yaml = {}
133
+ if not isinstance(parsed_yaml, dict) or "section" not in parsed_yaml:
134
+ # Bloco yaml sem campo 'section' e ignorado (yaml generico)
135
+ first_text_seen = True
136
+ i += 1
137
+ continue
138
+ if first_text_seen and section_block_index == -1:
139
+ raise ParseError(
140
+ f"payload nao e o primeiro bloco na secao '{raw.heading_text}' "
141
+ f"(linha {raw.source_start_line})"
142
+ )
143
+ section_blocks.append(tok.content)
144
+ if section_block_index == -1:
145
+ section_block_index = i
146
+ elif tok.type in ("paragraph_open", "fence", "bullet_list_open",
147
+ "ordered_list_open", "blockquote_open", "html_block",
148
+ "table_open", "hr"):
149
+ # Qualquer bloco textual que nao seja o bloco section
150
+ if tok.type != "fence": # fence ja tratado acima
151
+ first_text_seen = True
152
+ elif tok.type == "inline" and tok.content.strip():
153
+ first_text_seen = True
154
+
155
+ i += 1
156
+
157
+ if len(section_blocks) > 1:
158
+ raise ParseError(
159
+ f"bloco section duplicado na secao '{raw.heading_text}' "
160
+ f"(linha {raw.source_start_line})"
161
+ )
162
+
163
+ if not section_blocks:
164
+ # Secao sem bloco section: metadata vazio, sem erro
165
+ # Nao podemos construir ParsedSection pois falta 'id' — retornamos None
166
+ # para que o chamador decida se ignora ou errou
167
+ return None # type: ignore[return-value]
168
+
169
+ raw_yaml = section_blocks[0]
170
+ try:
171
+ metadata = yaml.safe_load(raw_yaml) or {}
172
+ except yaml.YAMLError as exc:
173
+ raise ParseError(
174
+ f"YAML invalido no bloco section da secao '{raw.heading_text}': {exc}"
175
+ ) from exc
176
+
177
+ if not isinstance(metadata, dict):
178
+ raise ParseError(
179
+ f"bloco section da secao '{raw.heading_text}' nao e um mapeamento YAML valido"
180
+ )
181
+
182
+ if not metadata.get("section"):
183
+ raise ParseError(
184
+ f"secao sem payload obrigatorio: campo 'section' ausente na secao "
185
+ f"'{raw.heading_text}' (linha {raw.source_start_line})"
186
+ )
187
+
188
+ section_id = str(metadata.pop("section"))
189
+ metadata["id"] = section_id
190
+ uri = f"{file_path}#{section_id}"
191
+
192
+ return ParsedSection(
193
+ raw=raw,
194
+ uri=uri,
195
+ file_path=file_path,
196
+ metadata=metadata,
197
+ directives=[], # populado em B-003
198
+ )
199
+
200
+
201
+ # ---------------------------------------------------------------------------
202
+ # API publica
203
+ # ---------------------------------------------------------------------------
204
+
205
+ def parse_file(file_path: str | Path) -> List[ParsedSection]:
206
+ """
207
+ Executa o pipeline completo para um arquivo .md.
208
+ Retorna apenas as ParsedSections que possuem bloco section (com id).
209
+ Secoes sem bloco section sao silenciosamente ignoradas.
210
+ """
211
+ path = Path(file_path)
212
+ text = path.read_text(encoding="utf-8")
213
+ return parse_text(text, file_path=str(path))
214
+
215
+
216
+ def parse_text(text: str, file_path: str = "<string>") -> List[ParsedSection]:
217
+ """
218
+ Executa o pipeline completo sobre texto Markdown bruto.
219
+ """
220
+ tokens = _tokenize(text)
221
+ raws = _discover_sections(tokens)
222
+
223
+ seen_ids: set[str] = set()
224
+ sections: List[ParsedSection] = []
225
+
226
+ for raw in raws:
227
+ parsed = _bind_metadata(raw, tokens, file_path)
228
+ if parsed is None:
229
+ continue
230
+
231
+ section_id = str(parsed.metadata["id"])
232
+ if section_id in seen_ids:
233
+ raise ParseError(
234
+ f"id duplicado '{section_id}' no arquivo '{file_path}'"
235
+ )
236
+ seen_ids.add(section_id)
237
+ # Etapa 4: tokenizar diretivas
238
+ parsed = bind_directives(parsed, tokens)
239
+ sections.append(parsed)
240
+
241
+ return sections
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: mdbind
3
+ Version: 0.1.0
4
+ Summary: MdBind — Structured memory in plain Markdown
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: pydantic>=2.0
7
+ Requires-Dist: markdown-it-py>=3.0
8
+ Requires-Dist: typer>=0.12
9
+ Requires-Dist: pyyaml>=6.0
@@ -0,0 +1,14 @@
1
+ mdbind/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ mdbind/cache.py,sha256=KrdOyrR1pmkyQ6C-FjNlvGDvVGHs7RBaw1mMBF3EPok,5570
3
+ mdbind/cli.py,sha256=SQoHZV1jHvyQ7U_Ejjfjj8iVkjE37cfu0h30GDBGW3k,40992
4
+ mdbind/composer.py,sha256=mtLCeNdXblAVHf5cEY0-U7s_03c4h_1is52BDOLC5Kw,4175
5
+ mdbind/cycle.py,sha256=nnqWZKrfI0TpFqhpXQYfm4g0FJgaWTXyaJIhuG3V_-s,821
6
+ mdbind/directives.py,sha256=Vj_wRxoCsg2vt6hIwv_hc2R0yyHQtZmCPU6oSI-DQdg,4047
7
+ mdbind/index.py,sha256=M25hkb5QHT0cGsX2ujgdN5qUi5JiKbrqYWvEV8Xadl0,1786
8
+ mdbind/models.py,sha256=W79gzwYi5trhLMmdUhVx48J_FZn8hcVYrpI6gIpPPl0,2817
9
+ mdbind/parser.py,sha256=MsNPgORBq7MpNxXY07Lbactorn6ij8trG9M-v_Kyh_Q,8422
10
+ mdbind-0.1.0.dist-info/METADATA,sha256=SFy2YjelbOKXLTAKHDIORN-J9_586WXjEG82bC6uvko,248
11
+ mdbind-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
12
+ mdbind-0.1.0.dist-info/entry_points.txt,sha256=dAbUX6TuxiskOOMPGEtnMkyEeD3ab6NTpBQmwVxADYw,39
13
+ mdbind-0.1.0.dist-info/top_level.txt,sha256=rmhkm853CHFBfq831bRqCJcDDCUETMCUYBb2ytOutjA,7
14
+ mdbind-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mdb = mdbind.cli:app