archetype-md 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. archetype_md-0.1.0/LICENSE +21 -0
  2. archetype_md-0.1.0/PKG-INFO +12 -0
  3. archetype_md-0.1.0/pyproject.toml +118 -0
  4. archetype_md-0.1.0/src/archetype/README.md +188 -0
  5. archetype_md-0.1.0/src/archetype/__init__.py +20 -0
  6. archetype_md-0.1.0/src/archetype/markdown/__init__.py +67 -0
  7. archetype_md-0.1.0/src/archetype/markdown/_ast_normalizer.py +175 -0
  8. archetype_md-0.1.0/src/archetype/markdown/_projector.py +288 -0
  9. archetype_md-0.1.0/src/archetype/markdown/_shared.py +58 -0
  10. archetype_md-0.1.0/src/archetype/markdown/annotations.py +63 -0
  11. archetype_md-0.1.0/src/archetype/markdown/elements.py +125 -0
  12. archetype_md-0.1.0/src/archetype/markdown/errors.py +27 -0
  13. archetype_md-0.1.0/src/archetype/markdown/extractor.py +156 -0
  14. archetype_md-0.1.0/src/archetype/markdown/introspection.py +110 -0
  15. archetype_md-0.1.0/src/archetype/markdown/meta_validation.py +264 -0
  16. archetype_md-0.1.0/src/archetype/markdown/parser.py +19 -0
  17. archetype_md-0.1.0/src/archetype/markdown/renderer.py +297 -0
  18. archetype_md-0.1.0/src/archetype/markdown/template_model.py +47 -0
  19. archetype_md-0.1.0/src/archetype/templating/__init__.py +52 -0
  20. archetype_md-0.1.0/src/archetype/templating/environment.py +50 -0
  21. archetype_md-0.1.0/src/archetype/templating/resolve.py +45 -0
  22. archetype_md-0.1.0/tests/archetype/markdown/fixtures/__init__.py +0 -0
  23. archetype_md-0.1.0/tests/archetype/markdown/fixtures/sample_models.py +56 -0
  24. archetype_md-0.1.0/tests/archetype/markdown/test_annotations.py +67 -0
  25. archetype_md-0.1.0/tests/archetype/markdown/test_ast_normalizer.py +135 -0
  26. archetype_md-0.1.0/tests/archetype/markdown/test_elements.py +115 -0
  27. archetype_md-0.1.0/tests/archetype/markdown/test_errors.py +44 -0
  28. archetype_md-0.1.0/tests/archetype/markdown/test_extractor.py +94 -0
  29. archetype_md-0.1.0/tests/archetype/markdown/test_introspection.py +194 -0
  30. archetype_md-0.1.0/tests/archetype/markdown/test_markdown_public_api.py +47 -0
  31. archetype_md-0.1.0/tests/archetype/markdown/test_meta_validation.py +274 -0
  32. archetype_md-0.1.0/tests/archetype/markdown/test_parser.py +67 -0
  33. archetype_md-0.1.0/tests/archetype/markdown/test_projector.py +78 -0
  34. archetype_md-0.1.0/tests/archetype/markdown/test_renderer.py +186 -0
  35. archetype_md-0.1.0/tests/archetype/markdown/test_round_trip.py +122 -0
  36. archetype_md-0.1.0/tests/archetype/markdown/test_template_model.py +69 -0
  37. archetype_md-0.1.0/tests/archetype/templating/test_environment.py +80 -0
  38. archetype_md-0.1.0/tests/archetype/templating/test_integration.py +90 -0
  39. archetype_md-0.1.0/tests/archetype/templating/test_resolve.py +98 -0
  40. archetype_md-0.1.0/tests/archetype/templating/test_templating_public_api.py +19 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mark Norman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.1
2
+ Name: archetype-md
3
+ Version: 0.1.0
4
+ Summary: Pydantic as source of truth for agentic systems
5
+ Author-Email: Mark Norman <mark@marknorman.com>
6
+ License: MIT
7
+ Requires-Python: ==3.14.*
8
+ Requires-Dist: pydantic>=2.12.5
9
+ Requires-Dist: markdown-it-py>=4.0.0
10
+ Requires-Dist: jinja2>=3.1.0
11
+ Requires-Dist: pyyaml>=6.0.3
12
+
@@ -0,0 +1,118 @@
1
+ [build-system]
2
+ requires = [
3
+ "pdm-backend",
4
+ ]
5
+ build-backend = "pdm.backend"
6
+
7
+ [project]
8
+ name = "archetype-md"
9
+ version = "0.1.0"
10
+ description = "Pydantic as source of truth for agentic systems"
11
+ authors = [
12
+ { name = "Mark Norman", email = "mark@marknorman.com" },
13
+ ]
14
+ dependencies = [
15
+ "pydantic>=2.12.5",
16
+ "markdown-it-py>=4.0.0",
17
+ "jinja2>=3.1.0",
18
+ "pyyaml>=6.0.3",
19
+ ]
20
+ requires-python = "==3.14.*"
21
+
22
+ [project.license]
23
+ text = "MIT"
24
+
25
+ [tool.pytest.ini_options]
26
+ testpaths = [
27
+ "tests",
28
+ ]
29
+ pythonpath = [
30
+ "src",
31
+ ".",
32
+ ]
33
+ addopts = "-n 16"
34
+ asyncio_mode = "strict"
35
+
36
+ [tool.pdm]
37
+ distribution = true
38
+
39
+ [tool.pdm.build]
40
+ package-dir = "src"
41
+
42
+ [tool.pdm.scripts.post_install]
43
+ cmd = "pre-commit install"
44
+
45
+ [tool.pdm.scripts.test]
46
+ cmd = "pytest tests/"
47
+
48
+ [tool.pdm.scripts.test.env]
49
+ PYTHONPATH = "src"
50
+
51
+ [tool.pdm.scripts.lint]
52
+ cmd = "ruff check --fix src/ tests/"
53
+
54
+ [tool.pdm.scripts.lint.env]
55
+ PYTHONPATH = "src"
56
+
57
+ [tool.pdm.scripts.format]
58
+ cmd = "ruff format src/ tests/"
59
+
60
+ [tool.pdm.scripts.format.env]
61
+ PYTHONPATH = "src"
62
+
63
+ [tool.pdm.scripts.typecheck]
64
+ cmd = "pyright"
65
+
66
+ [tool.pdm.scripts.typecheck.env]
67
+ PYTHONPATH = "src"
68
+
69
+ [tool.ruff]
70
+ target-version = "py314"
71
+ line-length = 100
72
+ src = [
73
+ "src",
74
+ ]
75
+
76
+ [tool.ruff.lint]
77
+ select = [
78
+ "E",
79
+ "W",
80
+ "F",
81
+ "I",
82
+ "UP",
83
+ "B",
84
+ "SIM",
85
+ "RUF",
86
+ ]
87
+
88
+ [tool.ruff.lint.per-file-ignores]
89
+ "tests/**" = [
90
+ "E501",
91
+ ]
92
+
93
+ [tool.ruff.format]
94
+ quote-style = "double"
95
+
96
+ [tool.pyright]
97
+ pythonVersion = "3.14"
98
+ pythonPlatform = "Linux"
99
+ venvPath = "."
100
+ venv = ".venv"
101
+ extraPaths = [
102
+ "src",
103
+ ]
104
+ include = [
105
+ "src",
106
+ ]
107
+ typeCheckingMode = "basic"
108
+ reportMissingImports = "warning"
109
+ reportMissingTypeStubs = false
110
+
111
+ [dependency-groups]
112
+ dev = [
113
+ "ruff==0.14.14",
114
+ "pyright>=1.1.408",
115
+ "pytest-xdist>=3.8.0",
116
+ "pytest-asyncio>=1.3.0",
117
+ "pre-commit>=4.6.0",
118
+ ]
@@ -0,0 +1,188 @@
1
+ # Archetype
2
+
3
+ Pydantic as the single source of truth for agentic systems.
4
+
5
+ ## 1. Purpose
6
+
7
+ Agentic systems constantly move structured data across a fuzzy typed/textual (deterministic/generative)
8
+ boundary: a prompt describes the expected output shape, the LLM emits
9
+ markdown, code parses that markdown back into typed objects, and a downstream
10
+ agent re-renders it for the next step. When the prompt, the parser, and the
11
+ type definition live in separate places, they drift — silently — and that
12
+ drift is the most common source of bugs in LLM pipelines.
13
+
14
+ Archetype eliminates the drift by making one annotated Pydantic class the
15
+ authoritative declaration. From that single class, the library derives:
16
+
17
+ - the **markdown template** the agent is instructed to fill in,
18
+ - the **renderer** that turns instances back into markdown,
19
+ - the **parser/validator** that turns LLM output back into instances,
20
+ - the **JSON schema** for tool/structured-output integration,
21
+ - the **field introspection** that prompts use to describe their own
22
+ expected sections (`template_fields(Model)`),
23
+ - the **Jinja resolution context** for one-pass instruction templates.
24
+
25
+ A schema change in one place propagates everywhere automatically. Renaming
26
+ a field, adding a section, or changing a heading's structure cannot
27
+ desynchronize the prompt from the parser, because both are projections of
28
+ the same class.
29
+
30
+ ## 2. Usage
31
+
32
+ Archetype has two submodules:
33
+
34
+ - `archetype.markdown` — typed markdown documents driven by annotated
35
+ Pydantic models (template generation, rendering, parsing, validation,
36
+ subtree extraction, heading-field introspection).
37
+ - `archetype.templating` — a preconfigured Jinja environment with
38
+ markdown-aware globals (`template_fields`, `render_template`) and a
39
+ `resolve()` helper for one-pass instruction templating.
40
+
41
+ ### Declaring a document
42
+
43
+ ```python
44
+ from typing import Annotated
45
+ from archetype.markdown import (
46
+ MarkdownDocument, MarkdownHeader,
47
+ AsHeading, AsBulletList, TextTemplate,
48
+ )
49
+
50
+ class Finding(MarkdownHeader):
51
+ title: Annotated[str, TextTemplate("Finding {ordinal} - {value}")]
52
+ description: Annotated[str, AsHeading()]
53
+ evidence: Annotated[list[str], AsBulletList()]
54
+
55
+ class Review(MarkdownDocument):
56
+ title: Annotated[str, TextTemplate("{value}")]
57
+ summary: Annotated[str, AsHeading()]
58
+ findings: list[Finding]
59
+ ```
60
+
61
+ ### Rendering, parsing, introspecting
62
+
63
+ ```python
64
+ from archetype.markdown import (
65
+ render_template, render_instance, validate_markdown, template_fields,
66
+ )
67
+
68
+ # Skeleton markdown to embed in an agent's prompt
69
+ template_md = render_template(Review)
70
+
71
+ # Turn an LLM's markdown reply back into a typed instance
72
+ review: Review = validate_markdown(llm_output, Review)
73
+
74
+ # Re-render an instance to markdown (e.g. as input to a downstream agent)
75
+ markdown = render_instance(review)
76
+
77
+ # Iterate heading metadata for prompt construction
78
+ for field in template_fields(Review):
79
+ print(field.heading, field.description)
80
+ ```
81
+
82
+ ### Instruction templates with Jinja
83
+
84
+ ```python
85
+ from archetype.templating import resolve
86
+
87
+ def designer_instructions_provider(state: DesignerInput) -> str:
88
+ return resolve(
89
+ _load_template(),
90
+ feature=state.feature_definition,
91
+ )
92
+ ```
93
+
94
+ Inside the template:
95
+
96
+ ```jinja
97
+ The feature definition has these sections:
98
+ {% for field in template_fields(FeatureDefinition) %}
99
+ - **{{ field.heading }}** — {{ field.description }}
100
+ {% endfor %}
101
+
102
+ Your output must match this structure:
103
+
104
+ {{ render_template(DesignDocument) }}
105
+ ```
106
+
107
+ Templates use only `{{ path }}`, `{% for x in path %}…{% endfor %}`, and the
108
+ two registered globals — no filters, conditionals, macros, includes, or
109
+ inheritance. The restriction is convention, not runtime-enforced.
110
+
111
+ ## 3. What the Pydantic model drives
112
+
113
+ The annotated model is the hub; every artifact downstream is a derivation
114
+ of it. There is no parallel source of truth for any of these arrows.
115
+
116
+ ```
117
+ ┌──────────────────────────┐
118
+ │ Annotated Pydantic │
119
+ │ model (your class) │
120
+ │ │
121
+ │ • field names + types │
122
+ │ • Annotated[…] markers: │
123
+ │ AsHeading │
124
+ │ AsCodeBlock │
125
+ │ AsTable │
126
+ │ AsBulletList │
127
+ │ AsNumberedList │
128
+ │ TextTemplate │
129
+ │ • nested MarkdownHeader │
130
+ │ subclasses │
131
+ └─────────────┬────────────┘
132
+
133
+ ┌──────────────────┬───────────────┼───────────────┬──────────────────┐
134
+ │ │ │ │ │
135
+ ▼ drives ▼ drives ▼ controls ▼ validates ▼ exposes
136
+ ┌─────────────┐ ┌─────────────────┐ ┌──────────────┐ ┌─────────────┐ ┌────────────────┐
137
+ │ render_ │ │ render_instance │ │ validate_ │ │ Pydantic │ │ template_ │
138
+ │ template() │ │ () │ │ markdown() │ │ field + │ │ fields() → │
139
+ │ │ │ │ │ │ │ structural │ │ FieldInfo for │
140
+ │ skeleton │ │ instance → │ │ markdown → │ │ meta- │ │ each heading │
141
+ │ markdown │ │ markdown │ │ instance │ │ validation │ │ (.heading, │
142
+ │ for prompts │ │ │ │ │ │ at class │ │ .description) │
143
+ │ │ │ │ │ │ │ definition │ │ │
144
+ └─────────────┘ └─────────────────┘ └──────────────┘ └─────────────┘ └────────────────┘
145
+ │ │ │ │
146
+ │ │ │ │
147
+ └──────────┬───────┴─────────────────┴────────────────────────────────┘
148
+
149
+ ▼ all reachable inside Jinja via
150
+ ┌──────────────────────────────────────┐
151
+ │ archetype.templating.resolve(...) │
152
+ │ │
153
+ │ globals: template_fields, │
154
+ │ render_template │
155
+ │ │
156
+ │ one-pass agent-instruction rendering│
157
+ └──────────────────────────────────────┘
158
+
159
+ ▼ also drives
160
+ ┌──────────────────────────────────────┐
161
+ │ Model.model_json_schema() — JSON │
162
+ │ schema for structured-output / tool │
163
+ │ integrations (free from Pydantic) │
164
+ └──────────────────────────────────────┘
165
+
166
+ ▼ supports
167
+ ┌──────────────────────────────────────┐
168
+ │ extract_subtree() — slice a typed │
169
+ │ subtree out of a larger document │
170
+ └──────────────────────────────────────┘
171
+ ```
172
+
173
+ ### Per-arrow summary
174
+
175
+ | Arrow | Reads from the model | Produces |
176
+ | ---------------------- | ----------------------------------------------------- | --------------------------------------- |
177
+ | `render_template` | field names, annotations, nested types | skeleton markdown for prompts |
178
+ | `render_instance` | instance values + annotations | markdown serialization |
179
+ | `validate_markdown` | field types, annotations, structural rules | typed instance (or `MarkdownValidationError`) |
180
+ | Meta-validation hook | class structure at definition time | early `MarkdownError` on malformed templates |
181
+ | `template_fields` | heading-introducing fields and their docstrings | `FieldInfo(heading, description)` stream |
182
+ | `extract_subtree` | nested `MarkdownHeader` types | typed slice of a larger document |
183
+ | `Model.model_json_schema()` | field types (Pydantic-native) | JSON Schema for structured-output APIs |
184
+ | `resolve()` (Jinja) | the model, via `template_fields` / `render_template` | fully-resolved instruction string |
185
+
186
+ The takeaway: edit the annotated Pydantic class, and every artifact above
187
+ follows. No other file needs to change for the prompt, the parser, the
188
+ schema, and the renderer to stay in agreement.
@@ -0,0 +1,20 @@
1
+ """Archetype — Pydantic as source of truth for agentic systems.
2
+
3
+ Core idea: declare a Pydantic data model once, and have one change to that
4
+ model propagate, without any other code edits, to every derived artifact
5
+ the model participates in — markdown templates, renderers, parsers,
6
+ validators, JSON schemas, instruction placeholders, and more.
7
+
8
+ Modules:
9
+
10
+ - ``archetype.markdown`` — typed markdown documents via Pydantic.
11
+ Annotation-driven domain models, rendering, parsing, validation,
12
+ subtree extraction, and heading-field introspection.
13
+
14
+ - ``archetype.templating`` — Jinja-based template resolution. Provides
15
+ a preconfigured Jinja environment with markdown-aware globals
16
+ (``template_fields``, ``render_template``) and a ``resolve()`` helper
17
+ that renders a template string against a context object.
18
+
19
+ See individual submodule docstrings for details.
20
+ """
@@ -0,0 +1,67 @@
1
+ """Declarative markdown-document machinery for archetype.
2
+
3
+ See the architecture ADR and the markdown-machinery-design document for
4
+ context. Quick example:
5
+
6
+ from typing import Annotated
7
+ from archetype.markdown import (
8
+ MarkdownDocument, MarkdownHeader,
9
+ AsHeading, TextTemplate,
10
+ render_template, validate_markdown,
11
+ template_fields,
12
+ )
13
+
14
+ class Finding(MarkdownHeader):
15
+ title: Annotated[str, TextTemplate("Finding {ordinal} - {value}")]
16
+ description: Annotated[str, AsHeading()]
17
+
18
+ class Review(MarkdownDocument):
19
+ title: Annotated[str, TextTemplate("{value}")]
20
+ summary: Annotated[str, AsHeading()]
21
+ findings: list[Finding]
22
+
23
+ template = render_template(Review)
24
+ review = validate_markdown(produced_md, Review)
25
+ fields = template_fields(Review)
26
+ """
27
+
28
+ from archetype.markdown.annotations import (
29
+ AsBulletList,
30
+ AsCodeBlock,
31
+ AsHeading,
32
+ AsNumberedList,
33
+ AsTable,
34
+ TextTemplate,
35
+ )
36
+ from archetype.markdown.errors import (
37
+ MarkdownError,
38
+ MarkdownExtractionError,
39
+ MarkdownTemplateError,
40
+ MarkdownValidationError,
41
+ )
42
+ from archetype.markdown.extractor import extract_subtree
43
+ from archetype.markdown.introspection import FieldInfo, template_fields
44
+ from archetype.markdown.parser import validate_markdown
45
+ from archetype.markdown.renderer import render_instance, render_template
46
+ from archetype.markdown.template_model import MarkdownDocument, MarkdownHeader
47
+
48
+ __all__ = [
49
+ "AsBulletList",
50
+ "AsCodeBlock",
51
+ "AsHeading",
52
+ "AsNumberedList",
53
+ "AsTable",
54
+ "FieldInfo",
55
+ "MarkdownDocument",
56
+ "MarkdownError",
57
+ "MarkdownExtractionError",
58
+ "MarkdownHeader",
59
+ "MarkdownTemplateError",
60
+ "MarkdownValidationError",
61
+ "TextTemplate",
62
+ "extract_subtree",
63
+ "render_instance",
64
+ "render_template",
65
+ "template_fields",
66
+ "validate_markdown",
67
+ ]
@@ -0,0 +1,175 @@
1
+ """Normalize markdown-it-py AST tokens into a tree of typed BlockElement
2
+ instances + an optional MarkdownFrontmatter at the top.
3
+
4
+ Why a separate module: keeps the AST-token → typed-tree concern decoupled from
5
+ the projector (element-tree → domain instance). Tests can drive each layer
6
+ independently.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+
13
+ import yaml
14
+ from markdown_it import MarkdownIt
15
+ from markdown_it.token import Token
16
+
17
+ from archetype.markdown.elements import (
18
+ BlockElement,
19
+ MarkdownBulletList,
20
+ MarkdownCodeBlock,
21
+ MarkdownFrontmatter,
22
+ MarkdownHeading,
23
+ MarkdownNumberedList,
24
+ MarkdownParagraph,
25
+ MarkdownTable,
26
+ MarkdownTableRow,
27
+ )
28
+ from archetype.markdown.errors import MarkdownValidationError
29
+
30
+
31
+ @dataclass
32
+ class NormalizedDocument:
33
+ """The output of `normalize()` — frontmatter (or None) plus the top-level
34
+ block sequence. Each top-level heading carries its scoped body recursively."""
35
+
36
+ frontmatter: MarkdownFrontmatter | None
37
+ blocks: list[BlockElement]
38
+
39
+
40
+ def normalize(markdown: str) -> NormalizedDocument:
41
+ """Parse markdown text and produce a normalized element tree."""
42
+
43
+ fm, body = _split_frontmatter(markdown)
44
+ # Use commonmark + the table plugin. NOT MarkdownIt("gfm-like"): that
45
+ # preset enables the linkify rule, which requires the linkify-it-py
46
+ # package (not in our deps) and crashes at parse time without it.
47
+ md = MarkdownIt("commonmark").enable("table")
48
+ tokens = md.parse(body)
49
+ flat = _tokens_to_blocks(tokens)
50
+ blocks_with_scope = _nest_headings_by_level(flat)
51
+ return NormalizedDocument(frontmatter=fm, blocks=blocks_with_scope)
52
+
53
+
54
+ def _split_frontmatter(markdown: str) -> tuple[MarkdownFrontmatter | None, str]:
55
+ if not markdown.startswith("---\n"):
56
+ return None, markdown
57
+ end = markdown.find("\n---\n", 4)
58
+ if end == -1:
59
+ return None, markdown
60
+ raw_yaml = markdown[4 : end + 1]
61
+ rest = markdown[end + len("\n---\n") :]
62
+ try:
63
+ parsed = yaml.safe_load(raw_yaml) or {}
64
+ except yaml.YAMLError as exc:
65
+ raise MarkdownValidationError(f"Frontmatter YAML is malformed: {exc}") from exc
66
+ return MarkdownFrontmatter(raw_yaml=raw_yaml, parsed=parsed), rest
67
+
68
+
69
+ @dataclass
70
+ class _FlatBlock:
71
+ """Pass-1 wrapper that carries the AST heading level alongside the typed
72
+ element. Used only inside the normalizer; never escapes the module."""
73
+
74
+ element: BlockElement
75
+ level: int | None # set only for MarkdownHeading
76
+
77
+
78
+ def _tokens_to_blocks(tokens: list[Token]) -> list[_FlatBlock]:
79
+ """First pass: convert flat token stream into a flat list of `_FlatBlock`
80
+ wrappers."""
81
+ out: list[_FlatBlock] = []
82
+ i = 0
83
+ while i < len(tokens):
84
+ t = tokens[i]
85
+ if t.type == "heading_open":
86
+ level = int(t.tag[1]) # 'h2' -> 2
87
+ text = tokens[i + 1].content
88
+ out.append(_FlatBlock(element=MarkdownHeading(text=text, body=[]), level=level))
89
+ i += 3
90
+ elif t.type == "paragraph_open":
91
+ content = tokens[i + 1].content
92
+ out.append(_FlatBlock(element=MarkdownParagraph(content=content), level=None))
93
+ i += 3
94
+ elif t.type == "fence":
95
+ lang = t.info.strip() or None
96
+ out.append(
97
+ _FlatBlock(element=MarkdownCodeBlock(language=lang, content=t.content), level=None)
98
+ )
99
+ i += 1
100
+ elif t.type == "bullet_list_open":
101
+ items, advance = _collect_list_items(tokens, i, "bullet_list_close")
102
+ out.append(_FlatBlock(element=MarkdownBulletList(items=items), level=None))
103
+ i += advance
104
+ elif t.type == "ordered_list_open":
105
+ items, advance = _collect_list_items(tokens, i, "ordered_list_close")
106
+ out.append(_FlatBlock(element=MarkdownNumberedList(items=items), level=None))
107
+ i += advance
108
+ elif t.type == "table_open":
109
+ table, advance = _collect_table(tokens, i)
110
+ out.append(_FlatBlock(element=table, level=None))
111
+ i += advance
112
+ else:
113
+ i += 1
114
+ return out
115
+
116
+
117
+ def _collect_list_items(tokens: list[Token], start: int, close_type: str) -> tuple[list[str], int]:
118
+ items: list[str] = []
119
+ i = start + 1
120
+ while tokens[i].type != close_type:
121
+ if tokens[i].type == "list_item_open":
122
+ items.append(tokens[i + 2].content)
123
+ i += 1
124
+ return items, (i - start) + 1
125
+
126
+
127
+ def _collect_table(tokens: list[Token], start: int) -> tuple[MarkdownTable, int]:
128
+ columns: list[str] = []
129
+ rows: list[MarkdownTableRow] = []
130
+ i = start + 1
131
+ in_header = False
132
+ in_body = False
133
+ cur_row: list[str] = []
134
+ while tokens[i].type != "table_close":
135
+ t = tokens[i]
136
+ if t.type == "thead_open":
137
+ in_header = True
138
+ elif t.type == "thead_close":
139
+ in_header = False
140
+ elif t.type == "tbody_open":
141
+ in_body = True
142
+ elif t.type == "tbody_close":
143
+ in_body = False
144
+ elif t.type == "tr_open":
145
+ cur_row = []
146
+ elif t.type == "tr_close":
147
+ if in_header:
148
+ columns = cur_row
149
+ elif in_body:
150
+ rows.append(MarkdownTableRow(cells=cur_row))
151
+ elif t.type in ("th_open", "td_open"):
152
+ cur_row.append(tokens[i + 1].content)
153
+ i += 1
154
+ return MarkdownTable(columns=columns, rows=rows), (i - start) + 1
155
+
156
+
157
+ def _nest_headings_by_level(flat: list[_FlatBlock]) -> list[BlockElement]:
158
+ """Second pass: turn flat block list into a tree by nesting blocks under
159
+ the most recent open heading scope."""
160
+ root: list[BlockElement] = []
161
+ stack: list[tuple[int, MarkdownHeading]] = []
162
+ for fb in flat:
163
+ block = fb.element
164
+ if isinstance(block, MarkdownHeading):
165
+ assert fb.level is not None
166
+ level = fb.level
167
+ while stack and stack[-1][0] >= level:
168
+ stack.pop()
169
+ target = stack[-1][1].body if stack else root
170
+ target.append(block)
171
+ stack.append((level, block))
172
+ else:
173
+ target = stack[-1][1].body if stack else root
174
+ target.append(block)
175
+ return root