org-parser 0.23.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
org_parser/__init__.py ADDED
@@ -0,0 +1,116 @@
1
+ """org_parser — Python bindings for the tree-sitter org-mode parser.
2
+
3
+ This package provides convenience helpers for loading and dumping Org Mode
4
+ documents as [org_parser.document.Document][] instances.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ from org_parser._lang import PARSER
12
+ from org_parser.document import Document
13
+
14
+ __all__ = ["Document", "dump", "dumps", "load", "loads"]
15
+
16
+
17
+ def load(filename: str) -> Document:
18
+ """Load an Org Mode document from a file.
19
+
20
+ Args:
21
+ filename: Path to the Org Mode file.
22
+
23
+ Returns:
24
+ Parsed [org_parser.document.Document][] instance.
25
+
26
+ Example:
27
+ ```python
28
+ >>> from org_parser import load
29
+ >>> document = load('path/to/file.org')
30
+ >>> document.children[0].title_text
31
+ 'Some heading'
32
+ ```
33
+ """
34
+ path = Path(filename)
35
+ source = path.read_bytes()
36
+ tree = PARSER.parse(source)
37
+ return Document.from_tree(tree, filename, source)
38
+
39
+
40
+ def loads(source: str, filename: str | None = None) -> Document:
41
+ """Load an Org Mode document from a string.
42
+
43
+ Args:
44
+ source: Org Mode text to parse.
45
+ filename: Optional filename to assign to the parsed document.
46
+
47
+ Returns:
48
+ Parsed [org_parser.document.Document][] instance.
49
+
50
+ Example:
51
+ ```python
52
+ >>> from org_parser import loads
53
+ >>> document = loads("* TODO Heading 1")
54
+ >>> document.children[0].todo
55
+ 'TODO'
56
+ ```
57
+ """
58
+ assigned_filename = filename if filename is not None else ""
59
+ source_bytes = source.encode()
60
+ tree = PARSER.parse(source_bytes)
61
+ return Document.from_tree(tree, assigned_filename, source_bytes)
62
+
63
+
64
+ def dumps(document: Document) -> str:
65
+ """Return Org Mode text for a parsed document.
66
+
67
+ Produces the complete document text including all headings. For clean
68
+ (unmodified) parse-backed documents the original source is returned
69
+ verbatim; for dirty documents every section is reconstructed from its
70
+ semantic fields.
71
+
72
+ Args:
73
+ document: Parsed document instance.
74
+
75
+ Returns:
76
+ Full Org Mode source text.
77
+
78
+ Example:
79
+ ```python
80
+ >>> from org_parser import dumps, loads
81
+ >>> document = loads("* TODO Heading 1")
82
+ >>> dumps(document).startswith("* TODO")
83
+ True
84
+ ```
85
+ """
86
+ return document.render()
87
+
88
+
89
+ def dump(document: Document, filename: str | None = None) -> None:
90
+ """Write a parsed document to disk.
91
+
92
+ The output path is *filename* when provided; otherwise
93
+ [document.filename][org_parser.document.Document.filename].
94
+
95
+ Args:
96
+ document: Parsed document instance.
97
+ filename: Optional output path.
98
+
99
+ Raises:
100
+ ValueError: If neither *filename* nor ``document.filename`` is set.
101
+
102
+ Example:
103
+ ```python
104
+ >>> from pathlib import Path
105
+ >>> from org_parser import dump, loads
106
+ >>> document = loads("* TODO Heading 1")
107
+ >>> dump(document, 'path/to/file.org')
108
+ >>> out = Path('path/to/file.org')
109
+ >>> out.read_text().startswith("* TODO")
110
+ True
111
+ ```
112
+ """
113
+ target = filename if filename is not None else document.filename
114
+ if target == "":
115
+ raise ValueError("No output filename provided")
116
+ Path(target).write_text(dumps(document))
@@ -0,0 +1,78 @@
1
+ """Shared strict parsing helpers for ``from_source`` constructors.
2
+
3
+ These helpers centralize parse-then-extract flows used by semantic
4
+ ``from_source`` class methods.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING, TypeVar
10
+
11
+ from org_parser._lang import PARSER
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
16
+ from org_parser.document._document import Document
17
+
18
+ __all__ = ["parse_document_from_source", "parse_source_with_extractor"]
19
+
20
+ _ExtractedT = TypeVar("_ExtractedT")
21
+
22
+
23
+ def parse_document_from_source(source: str, *, filename: str = "") -> Document:
24
+ """Parse *source* and return a strict parse-backed :class:`Document`.
25
+
26
+ Args:
27
+ source: Org source text to parse.
28
+ filename: Optional filename assigned to the parsed document.
29
+
30
+ Returns:
31
+ The parsed semantic :class:`Document`.
32
+
33
+ Raises:
34
+ ValueError: If the parse tree contains any error or missing nodes.
35
+ """
36
+ source_bytes = source.encode()
37
+ tree = PARSER.parse(source_bytes)
38
+
39
+ from org_parser.document._document import Document
40
+
41
+ document = Document.from_tree(tree, filename, source_bytes)
42
+ if len(document.errors) > 0:
43
+ raise ValueError("Source contains parse errors")
44
+ return document
45
+
46
+
47
+ def parse_source_with_extractor(
48
+ source: str,
49
+ *,
50
+ extractor: Callable[[Document], _ExtractedT | None],
51
+ ) -> tuple[_ExtractedT, Document]:
52
+ """Parse *source*, validate syntax, and extract one semantic value.
53
+
54
+ Args:
55
+ source: Org source text to parse.
56
+ extractor: Callback that receives ``document`` and returns
57
+ the specific semantic value to return.
58
+
59
+ Returns:
60
+ A ``(extracted, document)`` tuple.
61
+
62
+ Raises:
63
+ ValueError: If the source cannot be parsed cleanly or no valid value is
64
+ extracted.
65
+ """
66
+ source_bytes = source.encode()
67
+ tree = PARSER.parse(source_bytes)
68
+
69
+ from org_parser.document._document import Document
70
+
71
+ document = Document.from_tree(tree, "", source_bytes)
72
+ if len(document.errors) > 0:
73
+ raise ValueError("Source contains parse errors")
74
+
75
+ extracted = extractor(document)
76
+ if extracted is None:
77
+ raise ValueError("Unexpected parse tree structure")
78
+ return extracted, document
org_parser/_lang.py ADDED
@@ -0,0 +1,12 @@
1
+ """Internal: tree-sitter Language and Parser singletons for Org Mode."""
2
+
3
+ from tree_sitter import Language, Parser
4
+ import tree_sitter_org
5
+
6
+ __all__ = ["ORG_LANGUAGE", "PARSER"]
7
+
8
+ #: The Org Mode :class:`~tree_sitter.Language` instance (module-level singleton).
9
+ ORG_LANGUAGE: Language = Language(tree_sitter_org.language())
10
+
11
+ #: A :class:`~tree_sitter.Parser` pre-configured with :data:`ORG_LANGUAGE`.
12
+ PARSER: Parser = Parser(ORG_LANGUAGE)
org_parser/_node.py ADDED
@@ -0,0 +1,81 @@
1
+ """Shared tree-sitter node utilities.
2
+
3
+ These helpers centralise recurring patterns for inspecting and extracting
4
+ decoded source text from tree-sitter nodes:
5
+
6
+ * :func:`is_error_node` — classify a node as an error or missing token.
7
+ * :func:`node_text` — when you already hold the raw ``bytes`` source buffer
8
+ (e.g. inside ``from_node`` factory methods).
9
+ * :func:`node_source` — when you hold a
10
+ :class:`~org_parser.document._document.Document` reference and need to
11
+ reach back into it (e.g. inside ``__str__`` methods on element objects).
12
+
13
+ Both text functions return an empty string rather than raising when the node
14
+ or document argument is ``None``, so callers do not need separate guard
15
+ clauses.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import TYPE_CHECKING
21
+
22
+ if TYPE_CHECKING:
23
+ import tree_sitter
24
+
25
+ from org_parser.document._document import Document
26
+
27
+ __all__ = ["is_error_node", "node_source", "report_internal_parse_errors"]
28
+
29
+ _ERROR_NODE_TYPE = "ERROR"
30
+
31
+
32
+ def is_error_node(node: tree_sitter.Node) -> bool:
33
+ """Return *True* if *node* is a parse-error or missing token.
34
+
35
+ Args:
36
+ node: Any tree-sitter node to inspect.
37
+
38
+ Returns:
39
+ ``True`` for ``ERROR``-typed nodes and for nodes where
40
+ ``node.is_missing`` is set by the parser's error-recovery.
41
+ """
42
+ return node.type == _ERROR_NODE_TYPE or node.is_missing
43
+
44
+
45
+ def node_source(node: tree_sitter.Node | None, document: Document | None) -> str:
46
+ """Return the decoded source text of *node* within *document*.
47
+
48
+ Args:
49
+ node: A tree-sitter node, or ``None`` for programmatically constructed
50
+ elements that carry no parse-tree backing.
51
+ document: The owning :class:`~org_parser.document._document.Document`,
52
+ or ``None``.
53
+
54
+ Returns:
55
+ The decoded source slice, or an empty string when either argument is
56
+ ``None``.
57
+
58
+ Raises:
59
+ ValueError: If the provided document has no backing source bytes.
60
+ """
61
+ if node is None or document is None:
62
+ return ""
63
+ return document.source_for(node).decode()
64
+
65
+
66
+ def report_internal_parse_errors(node: tree_sitter.Node, document: Document) -> None:
67
+ """Report top-level parse-error descendants inside *node*.
68
+
69
+ This records ``ERROR`` and missing nodes nested within a semantic object's
70
+ parse subtree so object-internal parse issues propagate to
71
+ :attr:`Document.errors`. Only top-level error regions are reported to avoid
72
+ duplicate nested entries for the same malformed segment.
73
+ """
74
+ stack: list[tuple[tree_sitter.Node, bool]] = [(node, False)]
75
+ while stack:
76
+ current, has_error_ancestor = stack.pop()
77
+ current_is_error = is_error_node(current)
78
+ if current_is_error and not has_error_ancestor:
79
+ document.report_error(current)
80
+ child_has_error_ancestor = has_error_ancestor or current_is_error
81
+ stack.extend((child, child_has_error_ancestor) for child in reversed(current.children))
org_parser/_nodes.py ADDED
@@ -0,0 +1,231 @@
1
+ """Tree-sitter grammar node-type name constants for the Org Mode grammar.
2
+
3
+ All string constants here correspond to ``node.type`` values produced by the
4
+ compiled Org Mode tree-sitter grammar. Centralising them here prevents the
5
+ same literal from being scattered across every module that dispatches on node
6
+ types.
7
+
8
+ Constants are grouped by semantic domain and kept in alphabetical order within
9
+ each group.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = [
15
+ "ANGLE_LINK",
16
+ "AUTHOR",
17
+ "BABEL_CALL",
18
+ "BLANK_LINE",
19
+ "BOLD",
20
+ "CAPTION_KEYWORD",
21
+ "CATEGORY",
22
+ "CENTER_BLOCK",
23
+ "CITATION",
24
+ "CLOCK",
25
+ "CLOSED",
26
+ "CODE",
27
+ "COMMENT",
28
+ "COMMENT_BLOCK",
29
+ "COMPLETION_COUNTER",
30
+ "DEADLINE",
31
+ "DELAY_MARK",
32
+ "DESCRIPTION",
33
+ "DRAWER",
34
+ "DYNAMIC_BLOCK",
35
+ "ENTITY",
36
+ "EXAMPLE_BLOCK",
37
+ "EXPORT_BLOCK",
38
+ "EXPORT_SNIPPET",
39
+ "FIXED_WIDTH",
40
+ "FOOTNOTE_REFERENCE",
41
+ "HEADING",
42
+ "HORIZONTAL_RULE",
43
+ "INDENT",
44
+ "INLINE_BABEL_CALL",
45
+ "INLINE_HEADERS",
46
+ "INLINE_SOURCE_BLOCK",
47
+ "ITALIC",
48
+ "LINE_BREAK",
49
+ "LIST",
50
+ "LIST_ITEM",
51
+ "LOGBOOK_DRAWER",
52
+ "MACRO",
53
+ "MACRO_ARGUMENTS",
54
+ "MACRO_NAME",
55
+ "NODE_PROPERTY",
56
+ "ORG_TABLE",
57
+ "PARAGRAPH",
58
+ "PLAIN_LINK",
59
+ "PLAIN_TEXT",
60
+ "PLANNING",
61
+ "PLANNING_KEYWORD",
62
+ "PLOT_KEYWORD",
63
+ "PROPERTY_DRAWER",
64
+ "QUOTE_BLOCK",
65
+ "RADIO_TARGET",
66
+ "REGULAR_LINK",
67
+ "REPEATER_MARK",
68
+ "RESULTS_KEYWORD",
69
+ "SCHEDULED",
70
+ "SPECIAL_BLOCK",
71
+ "SPECIAL_KEYWORD",
72
+ "SRC_BLOCK",
73
+ "STRIKE_THROUGH",
74
+ "SUBSCRIPT",
75
+ "SUPERSCRIPT",
76
+ "TABLEEL_TABLE",
77
+ "TABLE_CELL",
78
+ "TABLE_ROW",
79
+ "TABLE_RULE",
80
+ "TAG",
81
+ "TARGET",
82
+ "TBLFM_LINE",
83
+ "TBLNAME_KEYWORD",
84
+ "TIMESTAMP",
85
+ "TIME_UNIT",
86
+ "TITLE",
87
+ "TODO",
88
+ "TS_DAY",
89
+ "TS_DAYNAME",
90
+ "TS_MONTH",
91
+ "TS_TIME",
92
+ "TS_YEAR",
93
+ "UNDERLINE",
94
+ "VERBATIM",
95
+ "VERSE_BLOCK",
96
+ "ZEROTH_SECTION",
97
+ ]
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Document / section structure
101
+ # ---------------------------------------------------------------------------
102
+
103
+ HEADING = "heading"
104
+ ZEROTH_SECTION = "zeroth_section"
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # Planning
108
+ # ---------------------------------------------------------------------------
109
+
110
+ PLANNING = "planning"
111
+ PLANNING_KEYWORD = "planning_keyword"
112
+ TIMESTAMP = "timestamp"
113
+
114
+ # Planning keyword values — the text content of ``planning_keyword`` nodes
115
+ # (e.g. the word ``SCHEDULED`` in the source), not grammar node types.
116
+ SCHEDULED = "SCHEDULED"
117
+ DEADLINE = "DEADLINE"
118
+ CLOSED = "CLOSED"
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # Timestamp sub-nodes
122
+ # ---------------------------------------------------------------------------
123
+
124
+ TS_DAY = "ts_day"
125
+ TS_DAYNAME = "ts_dayname"
126
+ TS_MONTH = "ts_month"
127
+ TS_TIME = "ts_time"
128
+ TS_YEAR = "ts_year"
129
+ DELAY_MARK = "delay_mark"
130
+ REPEATER_MARK = "repeater_mark"
131
+ TIME_UNIT = "time_unit"
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # Heading components
135
+ # ---------------------------------------------------------------------------
136
+
137
+ COMPLETION_COUNTER = "completion_counter"
138
+ TAG = "tag"
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # Keywords
142
+ # ---------------------------------------------------------------------------
143
+
144
+ SPECIAL_KEYWORD = "special_keyword"
145
+
146
+ # Special keyword values — the upper-cased key text of ``special_keyword``
147
+ # nodes (e.g. the word ``TITLE`` in ``#+TITLE:``), not grammar node types.
148
+ TITLE = "TITLE"
149
+ AUTHOR = "AUTHOR"
150
+ CATEGORY = "CATEGORY"
151
+ DESCRIPTION = "DESCRIPTION"
152
+ TODO = "TODO"
153
+ FILETAGS = "FILETAGS"
154
+
155
+ # ---------------------------------------------------------------------------
156
+ # Drawers
157
+ # ---------------------------------------------------------------------------
158
+
159
+ DRAWER = "drawer"
160
+ LOGBOOK_DRAWER = "logbook_drawer"
161
+ NODE_PROPERTY = "node_property"
162
+ PROPERTY_DRAWER = "property_drawer"
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Element types
166
+ # ---------------------------------------------------------------------------
167
+
168
+ BABEL_CALL = "babel_call"
169
+ BLANK_LINE = "blank_line"
170
+ CAPTION_KEYWORD = "caption_keyword"
171
+ CENTER_BLOCK = "center_block"
172
+ CLOCK = "clock"
173
+ COMMENT = "comment"
174
+ COMMENT_BLOCK = "comment_block"
175
+ DYNAMIC_BLOCK = "dynamic_block"
176
+ EXAMPLE_BLOCK = "example_block"
177
+ EXPORT_BLOCK = "export_block"
178
+ FIXED_WIDTH = "fixed_width"
179
+ HORIZONTAL_RULE = "horizontal_rule"
180
+ INDENT = "indent"
181
+ LIST = "list"
182
+ LIST_ITEM = "list_item"
183
+ ORG_TABLE = "org_table"
184
+ PARAGRAPH = "paragraph"
185
+ PLOT_KEYWORD = "plot_keyword"
186
+ QUOTE_BLOCK = "quote_block"
187
+ RESULTS_KEYWORD = "results_keyword"
188
+ SPECIAL_BLOCK = "special_block"
189
+ SRC_BLOCK = "src_block"
190
+ TABLEEL_TABLE = "tableel_table"
191
+ TBLNAME_KEYWORD = "tblname_keyword"
192
+ VERSE_BLOCK = "verse_block"
193
+
194
+ # ---------------------------------------------------------------------------
195
+ # Table sub-nodes
196
+ # ---------------------------------------------------------------------------
197
+
198
+ TABLE_CELL = "table_cell"
199
+ TABLE_ROW = "table_row"
200
+ TABLE_RULE = "table_rule"
201
+ TBLFM_LINE = "tblfm_line"
202
+
203
+ # ---------------------------------------------------------------------------
204
+ # Inline object types
205
+ # ---------------------------------------------------------------------------
206
+
207
+ ANGLE_LINK = "angle_link"
208
+ BOLD = "bold"
209
+ ENTITY = "entity"
210
+ CITATION = "citation"
211
+ CODE = "code"
212
+ EXPORT_SNIPPET = "export_snippet"
213
+ FOOTNOTE_REFERENCE = "footnote_reference"
214
+ INLINE_BABEL_CALL = "inline_babel_call"
215
+ INLINE_HEADERS = "inline_headers"
216
+ INLINE_SOURCE_BLOCK = "inline_source_block"
217
+ ITALIC = "italic"
218
+ LINE_BREAK = "line_break"
219
+ MACRO = "macro"
220
+ MACRO_ARGUMENTS = "macro_arguments"
221
+ MACRO_NAME = "macro_name"
222
+ PLAIN_LINK = "plain_link"
223
+ PLAIN_TEXT = "plain_text"
224
+ RADIO_TARGET = "radio_target"
225
+ REGULAR_LINK = "regular_link"
226
+ STRIKE_THROUGH = "strike_through"
227
+ SUBSCRIPT = "subscript"
228
+ SUPERSCRIPT = "superscript"
229
+ TARGET = "target"
230
+ UNDERLINE = "underline"
231
+ VERBATIM = "verbatim"
@@ -0,0 +1,16 @@
1
+ """Document-level parsing, semantic classes, and raw tree access.
2
+
3
+ This subpackage provides:
4
+
5
+ * [org_parser.document.Document][] — the top-level semantic representation of an Org file,
6
+ including keyword properties (``TITLE``, ``AUTHOR``, …), the zeroth-section
7
+ body, and top-level headings.
8
+ * [org_parser.document.Heading][] — a heading / sub-heading with its parsed components
9
+ (level, TODO state, priority, title, tags, body, sub-headings).
10
+ """
11
+
12
+ from org_parser.document._document import Document, ParseError
13
+ from org_parser.document._heading import Heading
14
+ from org_parser.document._loader import load_raw
15
+
16
+ __all__ = ["Document", "Heading", "ParseError", "load_raw"]
@@ -0,0 +1,156 @@
1
+ """Shared body-extraction helpers.
2
+
3
+ Used mostly by [org_parser.document.Document][] and [org_parser.document.Heading][].
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ from org_parser._node import is_error_node, node_source
11
+ from org_parser._nodes import INDENT
12
+ from org_parser.element import Logbook, Properties, Repeat
13
+ from org_parser.element._dispatch import body_element_factories
14
+ from org_parser.element._element import Element, element_from_error_or_unknown
15
+ from org_parser.element._structure import Indent
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Callable
19
+
20
+ import tree_sitter
21
+
22
+ from org_parser.document._document import Document
23
+ from org_parser.document._heading import Heading
24
+ from org_parser.text._rich_text import RichText
25
+ from org_parser.time import Clock
26
+
27
+ # NOTE: Callable is kept in TYPE_CHECKING for the dispatch dict type annotations.
28
+
29
+ __all__ = [
30
+ "extract_body_element",
31
+ "extract_indent",
32
+ "merge_logbook_drawers",
33
+ "merge_properties_drawers",
34
+ ]
35
+
36
+
37
+ def merge_properties_drawers(
38
+ drawers: list[Properties],
39
+ *,
40
+ parent: Heading | Document,
41
+ ) -> Properties | None:
42
+ """Merge repeated properties drawers into one object.
43
+
44
+ Args:
45
+ drawers: All collected [org_parser.element.Properties][] drawers in source order.
46
+ parent: Owner object to assign to the merged drawer.
47
+
48
+ Returns:
49
+ A single merged [org_parser.element.Properties][], or ``None`` when *drawers* is
50
+ empty. Later drawers override earlier entries for the same key.
51
+ """
52
+ if not drawers:
53
+ return None
54
+ merged_values: dict[str, RichText] = {}
55
+ for drawer in drawers:
56
+ for key, value in drawer.items():
57
+ if key in merged_values:
58
+ del merged_values[key]
59
+ merged_values[key] = value
60
+ return Properties(properties=merged_values, parent=parent)
61
+
62
+
63
+ def merge_logbook_drawers(
64
+ drawers: list[Logbook],
65
+ *,
66
+ parent: Heading | Document,
67
+ ) -> Logbook | None:
68
+ """Merge repeated logbook drawers into one object.
69
+
70
+ Args:
71
+ drawers: All collected [org_parser.element.Logbook][] drawers in source order.
72
+ parent: Owner object to assign to the merged drawer.
73
+
74
+ Returns:
75
+ A single merged [org_parser.element.Logbook][], or ``None`` when *drawers* is empty.
76
+ """
77
+ if not drawers:
78
+ return None
79
+ merged_body: list[Element] = []
80
+ merged_clocks: list[Clock] = []
81
+ merged_repeats: list[Repeat] = []
82
+ for drawer in drawers:
83
+ merged_body.extend(drawer.body)
84
+ merged_clocks.extend(drawer.clock_entries)
85
+ merged_repeats.extend(drawer.repeats)
86
+ return Logbook(
87
+ body=merged_body,
88
+ clock_entries=merged_clocks,
89
+ repeats=merged_repeats,
90
+ parent=parent,
91
+ )
92
+
93
+
94
+ def extract_body_element(
95
+ node: tree_sitter.Node,
96
+ *,
97
+ parent: Heading | Document,
98
+ document: Document,
99
+ ) -> Element:
100
+ """Build one body element instance from a tree-sitter node.
101
+
102
+ Error nodes (``ERROR`` type or ``is_missing``) are recovered immediately
103
+ before dispatch so that callers do not need to guard the call site.
104
+
105
+ Args:
106
+ node: A tree-sitter child node from a section or zeroth-section.
107
+ parent: Owner heading or document.
108
+ document: The owning [org_parser.document.Document][].
109
+
110
+ Returns:
111
+ A semantic [org_parser.element.Element][] subclass matching *node.type*, or a
112
+ recovered [org_parser.element.Paragraph][] for
113
+ error and unrecognised nodes.
114
+ """
115
+ if is_error_node(node):
116
+ return element_from_error_or_unknown(node, document, parent=parent)
117
+ dispatch: dict[str, Callable[..., Element]] = {
118
+ **body_element_factories(),
119
+ INDENT: extract_indent,
120
+ }
121
+ factory = dispatch.get(node.type)
122
+ if factory is None:
123
+ return element_from_error_or_unknown(node, document, parent=parent)
124
+ return factory(node, document, parent=parent)
125
+
126
+
127
+ def extract_indent(
128
+ node: tree_sitter.Node,
129
+ document: Document,
130
+ *,
131
+ parent: Heading | Document,
132
+ ) -> Indent:
133
+ """Build one [org_parser.element.Indent][] with recursively parsed body nodes.
134
+
135
+ Args:
136
+ node: A tree-sitter ``indent`` node.
137
+ document: The owning [org_parser.document.Document][].
138
+ parent: Owner heading or document.
139
+
140
+ Returns:
141
+ An [org_parser.element.Indent][] whose body elements are recursively parsed.
142
+ """
143
+ indent_node = node.child_by_field_name("indent")
144
+ indent_text = node_source(indent_node, document)
145
+ indent = indent_text if indent_text != "" else None
146
+ block = Indent(
147
+ body=[
148
+ extract_body_element(child, parent=parent, document=document)
149
+ for child in node.children_by_field_name("body")
150
+ if child.is_named
151
+ ],
152
+ indent=indent,
153
+ parent=parent,
154
+ )
155
+ block.attach_source(node, document)
156
+ return block