python-hwpx 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hwpx/oxml/utils.py ADDED
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Optional, Tuple, Union
5
+
6
+ from lxml import etree
7
+
8
+ _TRUE_VALUES = {"1", "true", "True", "TRUE"}
9
+ _FALSE_VALUES = {"0", "false", "False", "FALSE"}
10
+
11
+
12
+ def local_name(node: etree._Element) -> str:
13
+ """Return the local (namespace-stripped) tag name for *node*."""
14
+ return etree.QName(node).localname
15
+
16
+
17
+ def parse_int(value: Optional[str], *, allow_none: bool = True) -> Optional[int]:
18
+ """Parse *value* as an integer.
19
+
20
+ When *allow_none* is ``True`` (the default) ``None`` is returned unchanged.
21
+ ``ValueError`` is raised if conversion fails.
22
+ """
23
+
24
+ if value is None:
25
+ if allow_none:
26
+ return None
27
+ raise ValueError("Missing integer value")
28
+ try:
29
+ return int(value)
30
+ except (TypeError, ValueError) as exc: # pragma: no cover - defensive branch
31
+ raise ValueError(f"Invalid integer value: {value!r}") from exc
32
+
33
+
34
+ def parse_bool(value: Optional[str], *, default: Optional[bool] = None) -> Optional[bool]:
35
+ """Convert a string attribute into a boolean."""
36
+
37
+ if value is None:
38
+ return default
39
+ if value in _TRUE_VALUES:
40
+ return True
41
+ if value in _FALSE_VALUES:
42
+ return False
43
+ raise ValueError(f"Invalid boolean value: {value!r}")
44
+
45
+
46
+ def text_or_none(node: etree._Element) -> Optional[str]:
47
+ """Return the text content of *node* stripped of leading/trailing whitespace."""
48
+
49
+ if node.text is None:
50
+ return None
51
+ text = node.text.strip()
52
+ return text if text else None
53
+
54
+
55
+ XmlSource = Union[str, bytes, Path, etree._Element, etree._ElementTree]
56
+
57
+
58
+ def coerce_xml_source(source: XmlSource) -> Tuple[etree._Element, etree._ElementTree]:
59
+ """Return ``(root, tree)`` for *source*.
60
+
61
+ *source* may be an ``lxml`` element, element tree, path-like object or
62
+ raw XML (``str``/``bytes``). The helper normalises the input so that callers
63
+ always receive both the element and the owning tree which is handy for XSD
64
+ validation.
65
+ """
66
+
67
+ if isinstance(source, etree._Element):
68
+ return source, source.getroottree()
69
+ if isinstance(source, etree._ElementTree):
70
+ return source.getroot(), source
71
+
72
+ if isinstance(source, (str, Path)):
73
+ path = Path(source)
74
+ if path.exists():
75
+ tree = etree.parse(str(path))
76
+ return tree.getroot(), tree
77
+ xml_bytes = str(source).encode("utf-8")
78
+ else:
79
+ xml_bytes = bytes(source)
80
+
81
+ root = etree.fromstring(xml_bytes)
82
+ return root, root.getroottree()
hwpx/package.py ADDED
@@ -0,0 +1,202 @@
1
+ """Utilities for working with the container format used by HWPX files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ from pathlib import Path
7
+ from typing import BinaryIO, Dict, Iterable, Mapping, MutableMapping
8
+ import xml.etree.ElementTree as ET
9
+ from zipfile import ZIP_DEFLATED, ZipFile
10
+
11
+ _OPF_NS = "http://www.idpf.org/2007/opf/"
12
+
13
+
14
+ def _ensure_bytes(value: bytes | str | ET.Element) -> bytes:
15
+ if isinstance(value, bytes):
16
+ return value
17
+ if isinstance(value, str):
18
+ return value.encode("utf-8")
19
+ if isinstance(value, ET.Element):
20
+ return ET.tostring(value, encoding="utf-8", xml_declaration=True)
21
+ raise TypeError(f"unsupported part payload type: {type(value)!r}")
22
+
23
+
24
+ class HwpxPackage:
25
+ """Represents the OPC-style package that stores HWPX parts."""
26
+
27
+ MANIFEST_PATH = "Contents/content.hpf"
28
+ HEADER_PATH = "Contents/header.xml"
29
+
30
+ def __init__(
31
+ self,
32
+ parts: MutableMapping[str, bytes],
33
+ source_path: Path | None = None,
34
+ ):
35
+ self._parts: MutableMapping[str, bytes] = dict(parts)
36
+ self._source_path = source_path
37
+ self._manifest_tree: ET.Element | None = None
38
+ self._spine_cache: list[str] | None = None
39
+ self._section_paths_cache: list[str] | None = None
40
+ self._header_paths_cache: list[str] | None = None
41
+
42
+ # -- construction ----------------------------------------------------
43
+ @classmethod
44
+ def open(cls, source: str | Path | bytes | BinaryIO) -> "HwpxPackage":
45
+ if isinstance(source, (str, Path)):
46
+ path = Path(source)
47
+ with ZipFile(path) as archive:
48
+ parts = {info.filename: archive.read(info.filename) for info in archive.infolist()}
49
+ return cls(parts, source_path=path)
50
+
51
+ if isinstance(source, (bytes, bytearray)):
52
+ buffer = io.BytesIO(source)
53
+ with ZipFile(buffer) as archive:
54
+ parts = {info.filename: archive.read(info.filename) for info in archive.infolist()}
55
+ return cls(parts)
56
+
57
+ if hasattr(source, "read"):
58
+ data = source.read()
59
+ buffer = io.BytesIO(data)
60
+ with ZipFile(buffer) as archive:
61
+ parts = {info.filename: archive.read(info.filename) for info in archive.infolist()}
62
+ package = cls(parts)
63
+ package._source_path = None
64
+ return package
65
+
66
+ raise TypeError("unsupported source type for HwpxPackage")
67
+
68
+ # -- accessors -------------------------------------------------------
69
+ def part_names(self) -> Iterable[str]:
70
+ return list(self._parts.keys())
71
+
72
+ def has_part(self, part_name: str) -> bool:
73
+ return part_name in self._parts
74
+
75
+ def get_part(self, part_name: str) -> bytes:
76
+ try:
77
+ return self._parts[part_name]
78
+ except KeyError as exc:
79
+ raise KeyError(f"package does not contain part '{part_name}'") from exc
80
+
81
+ def set_part(self, part_name: str, payload: bytes | str | ET.Element) -> None:
82
+ self._parts[part_name] = _ensure_bytes(payload)
83
+ if part_name == self.MANIFEST_PATH:
84
+ self._manifest_tree = None
85
+ self._spine_cache = None
86
+ self._section_paths_cache = None
87
+ self._header_paths_cache = None
88
+
89
+ def get_xml(self, part_name: str) -> ET.Element:
90
+ return ET.fromstring(self.get_part(part_name))
91
+
92
+ def set_xml(self, part_name: str, element: ET.Element) -> None:
93
+ self.set_part(part_name, element)
94
+
95
+ def get_text(self, part_name: str, encoding: str = "utf-8") -> str:
96
+ return self.get_part(part_name).decode(encoding)
97
+
98
+ # -- manifest helpers ------------------------------------------------
99
+ def manifest_tree(self) -> ET.Element:
100
+ if self._manifest_tree is None:
101
+ self._manifest_tree = self.get_xml(self.MANIFEST_PATH)
102
+ return self._manifest_tree
103
+
104
+ def _resolve_spine_paths(self) -> list[str]:
105
+ if self._spine_cache is None:
106
+ manifest = self.manifest_tree()
107
+ ns = {"opf": _OPF_NS}
108
+ manifest_items: Dict[str, str] = {}
109
+ for item in manifest.findall("./opf:manifest/opf:item", ns):
110
+ item_id = item.attrib.get("id")
111
+ href = item.attrib.get("href", "")
112
+ if item_id and href:
113
+ manifest_items[item_id] = href
114
+ spine_paths: list[str] = []
115
+ for itemref in manifest.findall("./opf:spine/opf:itemref", ns):
116
+ idref = itemref.attrib.get("idref")
117
+ if not idref:
118
+ continue
119
+ href = manifest_items.get(idref)
120
+ if href:
121
+ spine_paths.append(href)
122
+ self._spine_cache = spine_paths
123
+ return self._spine_cache
124
+
125
+ def section_paths(self) -> list[str]:
126
+ if self._section_paths_cache is None:
127
+ from pathlib import PurePosixPath
128
+
129
+ paths = [
130
+ path
131
+ for path in self._resolve_spine_paths()
132
+ if path and PurePosixPath(path).name.startswith("section")
133
+ ]
134
+ if not paths:
135
+ # Fallback: include known section files if they exist.
136
+ paths = [
137
+ name
138
+ for name in self._parts.keys()
139
+ if PurePosixPath(name).name.startswith("section")
140
+ ]
141
+ self._section_paths_cache = paths
142
+ return list(self._section_paths_cache)
143
+
144
+ def header_paths(self) -> list[str]:
145
+ if self._header_paths_cache is None:
146
+ from pathlib import PurePosixPath
147
+
148
+ paths = [
149
+ path
150
+ for path in self._resolve_spine_paths()
151
+ if path and PurePosixPath(path).name.startswith("header")
152
+ ]
153
+ if not paths and self.has_part(self.HEADER_PATH):
154
+ paths = [self.HEADER_PATH]
155
+ self._header_paths_cache = paths
156
+ return list(self._header_paths_cache)
157
+
158
+ # -- saving ----------------------------------------------------------
159
+ def save(
160
+ self,
161
+ path_or_stream: str | Path | BinaryIO | None = None,
162
+ updates: Mapping[str, bytes | str | ET.Element] | None = None,
163
+ ) -> str | Path | BinaryIO | bytes | None:
164
+ if updates:
165
+ for part_name, payload in updates.items():
166
+ self.set_part(part_name, payload)
167
+
168
+ destination = path_or_stream or self._source_path
169
+
170
+ if destination is None:
171
+ buffer = io.BytesIO()
172
+ self._write_to_stream(buffer)
173
+ return buffer.getvalue()
174
+
175
+ if isinstance(destination, (str, Path)):
176
+ dest_path = Path(destination)
177
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
178
+ with ZipFile(dest_path, "w", compression=ZIP_DEFLATED) as archive:
179
+ self._write_archive(archive)
180
+ self._source_path = dest_path
181
+ return dest_path
182
+
183
+ stream = destination
184
+ if hasattr(stream, "seek"):
185
+ stream.seek(0)
186
+ if hasattr(stream, "truncate"):
187
+ stream.truncate(0)
188
+ with ZipFile(stream, "w", compression=ZIP_DEFLATED) as archive:
189
+ self._write_archive(archive)
190
+ if hasattr(stream, "seek"):
191
+ stream.seek(0)
192
+ return stream
193
+
194
+ # -- internals -------------------------------------------------------
195
+ def _write_to_stream(self, stream: BinaryIO) -> None:
196
+ with ZipFile(stream, "w", compression=ZIP_DEFLATED) as archive:
197
+ self._write_archive(archive)
198
+ stream.seek(0)
199
+
200
+ def _write_archive(self, archive: ZipFile) -> None:
201
+ for part_name in sorted(self._parts.keys()):
202
+ archive.writestr(part_name, self._parts[part_name])
hwpx/tools/__init__.py ADDED
@@ -0,0 +1,36 @@
1
+ """Tooling helpers for inspecting HWPX archives."""
2
+
3
+ from .object_finder import FoundElement, ObjectFinder
4
+ from .text_extractor import (
5
+ DEFAULT_NAMESPACES,
6
+ ParagraphInfo,
7
+ SectionInfo,
8
+ TextExtractor,
9
+ build_parent_map,
10
+ describe_element_path,
11
+ strip_namespace,
12
+ )
13
+ from .validator import (
14
+ DocumentSchemas,
15
+ ValidationIssue,
16
+ ValidationReport,
17
+ load_default_schemas,
18
+ validate_document,
19
+ )
20
+
21
+ __all__ = [
22
+ "DEFAULT_NAMESPACES",
23
+ "ParagraphInfo",
24
+ "SectionInfo",
25
+ "TextExtractor",
26
+ "build_parent_map",
27
+ "describe_element_path",
28
+ "strip_namespace",
29
+ "FoundElement",
30
+ "ObjectFinder",
31
+ "DocumentSchemas",
32
+ "ValidationIssue",
33
+ "ValidationReport",
34
+ "load_default_schemas",
35
+ "validate_document",
36
+ ]
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
3
+ targetNamespace="http://www.hancom.co.kr/hwpml/2011/head"
4
+ xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head"
5
+ elementFormDefault="qualified">
6
+ <xs:element name="head" type="hh:HeadType"/>
7
+ <xs:complexType name="HeadType">
8
+ <xs:sequence>
9
+ <xs:any namespace="##any" processContents="lax" minOccurs="0" maxOccurs="unbounded"/>
10
+ </xs:sequence>
11
+ <xs:attribute name="version" type="xs:string" use="required"/>
12
+ <xs:attribute name="secCnt" type="xs:nonNegativeInteger" use="required"/>
13
+ </xs:complexType>
14
+ </xs:schema>
@@ -0,0 +1,12 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
3
+ targetNamespace="http://www.hancom.co.kr/hwpml/2011/section"
4
+ xmlns:hs="http://www.hancom.co.kr/hwpml/2011/section"
5
+ elementFormDefault="qualified">
6
+ <xs:element name="sec" type="hs:SectionType"/>
7
+ <xs:complexType name="SectionType">
8
+ <xs:sequence>
9
+ <xs:any namespace="##any" processContents="lax" minOccurs="0" maxOccurs="unbounded"/>
10
+ </xs:sequence>
11
+ </xs:complexType>
12
+ </xs:schema>
@@ -0,0 +1,347 @@
1
+ """Helper utilities that locate XML objects inside HWPX archives."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import (
8
+ Callable,
9
+ Dict,
10
+ Iterator,
11
+ List,
12
+ Mapping,
13
+ Optional,
14
+ Pattern,
15
+ Sequence,
16
+ Tuple,
17
+ Union,
18
+ )
19
+ from xml.etree import ElementTree as ET
20
+ from zipfile import ZipFile
21
+
22
+ from .text_extractor import (
23
+ DEFAULT_NAMESPACES,
24
+ AnnotationOptions,
25
+ SectionInfo,
26
+ TextExtractor,
27
+ _resolve_control_nested_text,
28
+ _resolve_hyperlink_target,
29
+ _resolve_note_text,
30
+ build_parent_map,
31
+ describe_element_path,
32
+ strip_namespace,
33
+ tag_matches,
34
+ )
35
+
36
+ __all__ = ["AttrMatcher", "AnnotationMatch", "FoundElement", "ObjectFinder"]
37
+
38
+
39
+ AttrMatcher = Union[str, Sequence[str], Pattern[str], Callable[[str], bool]]
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class FoundElement:
44
+ """Location information for an XML element that matched a query."""
45
+
46
+ section: SectionInfo
47
+ path: str
48
+ element: ET.Element
49
+
50
+ @property
51
+ def tag(self) -> str:
52
+ """Return the local tag name for the matched element."""
53
+
54
+ return strip_namespace(self.element.tag)
55
+
56
+ @property
57
+ def hierarchy(self) -> Tuple[str, ...]:
58
+ """Return the split representation of :pyattr:`path`."""
59
+
60
+ return tuple(self.path.split("/"))
61
+
62
+ @property
63
+ def text(self) -> Optional[str]:
64
+ """Expose ``element.text`` for convenience."""
65
+
66
+ return self.element.text
67
+
68
+ def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
69
+ """Fetch an attribute value from the underlying element."""
70
+
71
+ return self.element.attrib.get(name, default)
72
+
73
+ def __str__(self) -> str: # pragma: no cover - debugging helper
74
+ section = self.section.name
75
+ return f"{section}:{self.path} <{self.tag}>"
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class AnnotationMatch:
80
+ """Representation of a document annotation located by the finder."""
81
+
82
+ kind: str
83
+ element: FoundElement
84
+ value: Optional[str]
85
+
86
+
87
+ class ObjectFinder:
88
+ """Perform element searches across the XML payload in an HWPX document."""
89
+
90
+ def __init__(
91
+ self,
92
+ source: Union[str, Path, ZipFile],
93
+ *,
94
+ namespaces: Optional[Mapping[str, str]] = None,
95
+ ) -> None:
96
+ self._source = source
97
+ merged_namespaces = dict(DEFAULT_NAMESPACES)
98
+ if namespaces:
99
+ merged_namespaces.update(namespaces)
100
+ self.namespaces: Dict[str, str] = merged_namespaces
101
+
102
+ def iter(
103
+ self,
104
+ *,
105
+ tag: Union[str, Sequence[str], None] = None,
106
+ attrs: Optional[Mapping[str, AttrMatcher]] = None,
107
+ xpath: Optional[str] = None,
108
+ limit: Optional[int] = None,
109
+ section_filter: Optional[Callable[[SectionInfo], bool]] = None,
110
+ ) -> Iterator[FoundElement]:
111
+ """Yield elements that match a combination of criteria."""
112
+
113
+ with TextExtractor(self._source, namespaces=self.namespaces) as extractor:
114
+ for section in extractor.iter_sections():
115
+ if section_filter is not None and not section_filter(section):
116
+ continue
117
+ parent_map = build_parent_map(section.element)
118
+ if xpath is not None:
119
+ candidates = section.element.findall(xpath, namespaces=self.namespaces)
120
+ else:
121
+ candidates = section.element.iter()
122
+ for element in candidates:
123
+ if tag is not None and not tag_matches(element.tag, tag, self.namespaces):
124
+ continue
125
+ if attrs and not self._match_attributes(element, attrs):
126
+ continue
127
+ path = describe_element_path(element, parent_map)
128
+ yield FoundElement(section=section, path=path, element=element)
129
+ if limit is not None:
130
+ limit -= 1
131
+ if limit <= 0:
132
+ return
133
+
134
+ def find_first(
135
+ self,
136
+ *,
137
+ tag: Union[str, Sequence[str], None] = None,
138
+ attrs: Optional[Mapping[str, AttrMatcher]] = None,
139
+ xpath: Optional[str] = None,
140
+ section_filter: Optional[Callable[[SectionInfo], bool]] = None,
141
+ ) -> Optional[FoundElement]:
142
+ """Return the first element that matches or ``None`` when absent."""
143
+
144
+ return next(
145
+ self.iter(
146
+ tag=tag,
147
+ attrs=attrs,
148
+ xpath=xpath,
149
+ limit=1,
150
+ section_filter=section_filter,
151
+ ),
152
+ None,
153
+ )
154
+
155
+ def find_all(
156
+ self,
157
+ *,
158
+ tag: Union[str, Sequence[str], None] = None,
159
+ attrs: Optional[Mapping[str, AttrMatcher]] = None,
160
+ xpath: Optional[str] = None,
161
+ section_filter: Optional[Callable[[SectionInfo], bool]] = None,
162
+ limit: Optional[int] = None,
163
+ ) -> List[FoundElement]:
164
+ """Return every matching element eagerly as a list."""
165
+
166
+ return list(
167
+ self.iter(
168
+ tag=tag,
169
+ attrs=attrs,
170
+ xpath=xpath,
171
+ limit=limit,
172
+ section_filter=section_filter,
173
+ )
174
+ )
175
+
176
+ def iter_annotations(
177
+ self,
178
+ *,
179
+ kinds: Optional[Sequence[str]] = None,
180
+ options: Optional[AnnotationOptions] = None,
181
+ section_filter: Optional[Callable[[SectionInfo], bool]] = None,
182
+ preserve_breaks: bool = True,
183
+ ) -> Iterator[AnnotationMatch]:
184
+ """Yield annotations such as highlights or notes with formatted values."""
185
+
186
+ requested = {
187
+ kind.lower() for kind in (kinds or ("highlight", "footnote", "endnote", "hyperlink", "control"))
188
+ }
189
+ if not requested:
190
+ return
191
+
192
+ render_options = options or AnnotationOptions()
193
+
194
+ with TextExtractor(self._source, namespaces=self.namespaces) as extractor:
195
+ for section in extractor.iter_sections():
196
+ if section_filter is not None and not section_filter(section):
197
+ continue
198
+ parent_map = build_parent_map(section.element)
199
+
200
+ if "highlight" in requested:
201
+ for element in section.element.findall(
202
+ ".//hp:markpenBegin", namespaces=self.namespaces
203
+ ):
204
+ path = describe_element_path(element, parent_map)
205
+ found = FoundElement(section=section, path=path, element=element)
206
+ color = element.get("color") or ""
207
+ if render_options.highlight == "markers":
208
+ value = render_options.highlight_start.format(color=color)
209
+ else:
210
+ value = render_options.highlight_summary.format(color=color)
211
+ yield AnnotationMatch("highlight", found, value)
212
+
213
+ if "footnote" in requested:
214
+ for element in section.element.findall(
215
+ ".//hp:footNote", namespaces=self.namespaces
216
+ ):
217
+ yield self._format_note_annotation(
218
+ extractor,
219
+ section,
220
+ parent_map,
221
+ element,
222
+ kind="footnote",
223
+ options=render_options,
224
+ preserve_breaks=preserve_breaks,
225
+ )
226
+
227
+ if "endnote" in requested:
228
+ for element in section.element.findall(
229
+ ".//hp:endNote", namespaces=self.namespaces
230
+ ):
231
+ yield self._format_note_annotation(
232
+ extractor,
233
+ section,
234
+ parent_map,
235
+ element,
236
+ kind="endnote",
237
+ options=render_options,
238
+ preserve_breaks=preserve_breaks,
239
+ )
240
+
241
+ if "hyperlink" in requested:
242
+ for element in section.element.findall(
243
+ ".//hp:fieldBegin", namespaces=self.namespaces
244
+ ):
245
+ field_type = (element.get("type") or "").upper()
246
+ if field_type != "HYPERLINK":
247
+ continue
248
+ path = describe_element_path(element, parent_map)
249
+ found = FoundElement(section=section, path=path, element=element)
250
+ target = _resolve_hyperlink_target(element, self.namespaces) or ""
251
+ behavior = render_options.hyperlink
252
+ if behavior == "target":
253
+ value = render_options.hyperlink_target_format.format(target=target)
254
+ elif behavior == "placeholder":
255
+ value = render_options.hyperlink_placeholder.format(target=target)
256
+ else:
257
+ value = render_options.hyperlink_summary.format(target=target)
258
+ yield AnnotationMatch("hyperlink", found, value)
259
+
260
+ if "control" in requested:
261
+ for element in section.element.findall(
262
+ ".//hp:ctrl", namespaces=self.namespaces
263
+ ):
264
+ field_begin = element.find("hp:fieldBegin", namespaces=self.namespaces)
265
+ if field_begin is not None and (field_begin.get("type") or "").upper() == "HYPERLINK":
266
+ continue
267
+ if element.find("hp:fieldEnd", namespaces=self.namespaces) is not None:
268
+ continue
269
+ path = describe_element_path(element, parent_map)
270
+ found = FoundElement(section=section, path=path, element=element)
271
+ first_child = next(iter(element), None)
272
+ name = strip_namespace(first_child.tag) if first_child is not None else "ctrl"
273
+ ctrl_type = (
274
+ first_child.get("type") if first_child is not None else element.get("type") or ""
275
+ )
276
+ behavior = render_options.control
277
+ if behavior == "nested":
278
+ value = _resolve_control_nested_text(
279
+ extractor,
280
+ element,
281
+ render_options,
282
+ preserve_breaks=preserve_breaks,
283
+ )
284
+ elif behavior == "placeholder":
285
+ value = render_options.control_placeholder.format(name=name, type=ctrl_type)
286
+ else:
287
+ value = render_options.control_summary.format(name=name, type=ctrl_type)
288
+ yield AnnotationMatch("control", found, value)
289
+
290
+ def _format_note_annotation(
291
+ self,
292
+ extractor: TextExtractor,
293
+ section: SectionInfo,
294
+ parent_map: Mapping[ET.Element, ET.Element],
295
+ element: ET.Element,
296
+ *,
297
+ kind: str,
298
+ options: AnnotationOptions,
299
+ preserve_breaks: bool,
300
+ ) -> AnnotationMatch:
301
+ path = describe_element_path(element, parent_map)
302
+ found = FoundElement(section=section, path=path, element=element)
303
+ inst_id = element.get("instId") or ""
304
+ behavior = options.footnote if kind == "footnote" else options.endnote
305
+ if behavior == "inline":
306
+ text = _resolve_note_text(
307
+ extractor,
308
+ element,
309
+ options,
310
+ preserve_breaks=preserve_breaks,
311
+ )
312
+ value = options.note_inline_format.format(kind=kind, inst_id=inst_id, text=text)
313
+ elif behavior == "placeholder":
314
+ value = options.note_placeholder.format(kind=kind, inst_id=inst_id)
315
+ else:
316
+ value = options.note_summary.format(kind=kind, inst_id=inst_id)
317
+ return AnnotationMatch(kind, found, value)
318
+
319
+ # ------------------------------------------------------------------
320
+ # Internal helpers
321
+ # ------------------------------------------------------------------
322
+ @staticmethod
323
+ def _match_attributes(
324
+ element: ET.Element,
325
+ expected: Mapping[str, AttrMatcher],
326
+ ) -> bool:
327
+ for name, matcher in expected.items():
328
+ value = element.attrib.get(name)
329
+ if value is None:
330
+ return False
331
+ if isinstance(matcher, str):
332
+ if value != matcher:
333
+ return False
334
+ elif isinstance(matcher, Sequence) and not isinstance(matcher, (str, bytes)):
335
+ if value not in matcher:
336
+ return False
337
+ elif hasattr(matcher, "search"):
338
+ if not matcher.search(value): # type: ignore[call-arg]
339
+ return False
340
+ elif callable(matcher):
341
+ if not matcher(value):
342
+ return False
343
+ else:
344
+ raise TypeError(
345
+ "Attribute matchers must be str, Sequence, Pattern or callable",
346
+ )
347
+ return True