@beyondwork/docx-react-component 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +44 -104
  2. package/package.json +76 -46
  3. package/src/README.md +85 -0
  4. package/src/api/README.md +22 -0
  5. package/src/api/public-types.ts +525 -0
  6. package/src/compare/diff-engine.ts +530 -0
  7. package/src/compare/export-redlines.ts +162 -0
  8. package/src/compare/snapshot.ts +37 -0
  9. package/src/component-inventory.md +99 -0
  10. package/src/core/README.md +10 -0
  11. package/src/core/commands/README.md +3 -0
  12. package/src/core/commands/formatting-commands.ts +161 -0
  13. package/src/core/commands/image-commands.ts +144 -0
  14. package/src/core/commands/index.ts +1013 -0
  15. package/src/core/commands/list-commands.ts +370 -0
  16. package/src/core/commands/review-commands.ts +108 -0
  17. package/src/core/commands/text-commands.ts +119 -0
  18. package/src/core/schema/README.md +3 -0
  19. package/src/core/schema/text-schema.ts +512 -0
  20. package/src/core/selection/README.md +3 -0
  21. package/src/core/selection/mapping.ts +238 -0
  22. package/src/core/selection/review-anchors.ts +94 -0
  23. package/src/core/state/README.md +3 -0
  24. package/src/core/state/editor-state.ts +580 -0
  25. package/src/core/state/text-transaction.ts +276 -0
  26. package/src/formats/xlsx/io/parse-shared-strings.ts +41 -0
  27. package/src/formats/xlsx/io/parse-sheet.ts +289 -0
  28. package/src/formats/xlsx/io/parse-styles.ts +57 -0
  29. package/src/formats/xlsx/io/parse-workbook.ts +75 -0
  30. package/src/formats/xlsx/io/xlsx-session.ts +306 -0
  31. package/src/formats/xlsx/model/cell.ts +189 -0
  32. package/src/formats/xlsx/model/sheet.ts +244 -0
  33. package/src/formats/xlsx/model/styles.ts +118 -0
  34. package/src/formats/xlsx/model/workbook.ts +449 -0
  35. package/src/index.ts +45 -0
  36. package/src/io/README.md +10 -0
  37. package/src/io/docx-session.ts +1763 -0
  38. package/src/io/export/README.md +3 -0
  39. package/src/io/export/export-session.ts +165 -0
  40. package/src/io/export/minimal-docx.ts +115 -0
  41. package/src/io/export/reattach-preserved-parts.ts +54 -0
  42. package/src/io/export/serialize-comments.ts +876 -0
  43. package/src/io/export/serialize-footnotes.ts +217 -0
  44. package/src/io/export/serialize-headers-footers.ts +200 -0
  45. package/src/io/export/serialize-main-document.ts +982 -0
  46. package/src/io/export/serialize-numbering.ts +97 -0
  47. package/src/io/export/serialize-revisions.ts +389 -0
  48. package/src/io/export/serialize-runtime-revisions.ts +265 -0
  49. package/src/io/export/serialize-tables.ts +147 -0
  50. package/src/io/export/split-review-boundaries.ts +194 -0
  51. package/src/io/normalize/README.md +3 -0
  52. package/src/io/normalize/normalize-text.ts +437 -0
  53. package/src/io/ooxml/README.md +3 -0
  54. package/src/io/ooxml/parse-comments.ts +779 -0
  55. package/src/io/ooxml/parse-complex-content.ts +287 -0
  56. package/src/io/ooxml/parse-fields.ts +438 -0
  57. package/src/io/ooxml/parse-footnotes.ts +403 -0
  58. package/src/io/ooxml/parse-headers-footers.ts +483 -0
  59. package/src/io/ooxml/parse-inline-media.ts +431 -0
  60. package/src/io/ooxml/parse-main-document.ts +1846 -0
  61. package/src/io/ooxml/parse-numbering.ts +425 -0
  62. package/src/io/ooxml/parse-revisions.ts +658 -0
  63. package/src/io/ooxml/parse-shapes.ts +271 -0
  64. package/src/io/ooxml/parse-tables.ts +568 -0
  65. package/src/io/ooxml/parse-theme.ts +314 -0
  66. package/src/io/ooxml/part-manifest.ts +136 -0
  67. package/src/io/ooxml/revision-boundaries.ts +351 -0
  68. package/src/io/opc/README.md +3 -0
  69. package/src/io/opc/corrupt-package.ts +166 -0
  70. package/src/io/opc/docx-package.ts +74 -0
  71. package/src/io/opc/package-reader.ts +320 -0
  72. package/src/io/opc/package-writer.ts +273 -0
  73. package/src/legal/bookmarks.ts +196 -0
  74. package/src/legal/cross-references.ts +356 -0
  75. package/src/legal/defined-terms.ts +203 -0
  76. package/src/model/README.md +3 -0
  77. package/src/model/canonical-document.ts +1911 -0
  78. package/src/model/cds-1.0.0.ts +196 -0
  79. package/src/model/snapshot.ts +393 -0
  80. package/src/preservation/README.md +3 -0
  81. package/src/preservation/markup-compatibility.ts +48 -0
  82. package/src/preservation/opaque-fragment-store.ts +89 -0
  83. package/src/preservation/opaque-region.ts +233 -0
  84. package/src/preservation/package-preservation.ts +120 -0
  85. package/src/preservation/preserved-part-manifest.ts +56 -0
  86. package/src/preservation/relationship-retention.ts +57 -0
  87. package/src/preservation/store.ts +185 -0
  88. package/src/review/README.md +16 -0
  89. package/src/review/store/README.md +3 -0
  90. package/src/review/store/comment-anchors.ts +70 -0
  91. package/src/review/store/comment-remapping.ts +154 -0
  92. package/src/review/store/comment-store.ts +331 -0
  93. package/src/review/store/comment-thread.ts +109 -0
  94. package/src/review/store/revision-actions.ts +394 -0
  95. package/src/review/store/revision-store.ts +303 -0
  96. package/src/review/store/revision-types.ts +168 -0
  97. package/src/review/store/runtime-comment-store.ts +43 -0
  98. package/src/runtime/README.md +3 -0
  99. package/src/runtime/ai-action-policy.ts +764 -0
  100. package/src/runtime/document-runtime.ts +967 -0
  101. package/src/runtime/read-only-diagnostics-runtime.ts +232 -0
  102. package/src/runtime/review-runtime.ts +44 -0
  103. package/src/runtime/revision-runtime.ts +107 -0
  104. package/src/runtime/session-capabilities.ts +138 -0
  105. package/src/runtime/surface-projection.ts +570 -0
  106. package/src/runtime/table-commands.ts +87 -0
  107. package/src/runtime/table-schema.ts +140 -0
  108. package/src/runtime/virtualized-rendering.ts +258 -0
  109. package/src/ui/README.md +30 -0
  110. package/src/ui/WordReviewEditor.tsx +1504 -0
  111. package/src/ui/comments/README.md +3 -0
  112. package/src/ui/compatibility/README.md +3 -0
  113. package/src/ui/editor-surface/README.md +3 -0
  114. package/src/ui/headless/comment-decoration-model.ts +124 -0
  115. package/src/ui/headless/revision-decoration-model.ts +128 -0
  116. package/src/ui/headless/selection-helpers.ts +34 -0
  117. package/src/ui/headless/use-editor-keyboard.ts +98 -0
  118. package/src/ui/review/README.md +3 -0
  119. package/src/ui/shared/revision-filters.ts +31 -0
  120. package/src/ui/status/README.md +3 -0
  121. package/src/ui/theme/README.md +3 -0
  122. package/src/ui/toolbar/README.md +3 -0
  123. package/src/ui-tailwind/chrome/tw-alert-banner.tsx +48 -0
  124. package/src/ui-tailwind/chrome/tw-selection-toolbar.tsx +44 -0
  125. package/src/ui-tailwind/chrome/tw-unsaved-modal.tsx +58 -0
  126. package/src/ui-tailwind/chrome/use-before-unload.ts +20 -0
  127. package/src/ui-tailwind/editor-surface/pm-command-bridge.ts +139 -0
  128. package/src/ui-tailwind/editor-surface/pm-decorations.ts +98 -0
  129. package/src/ui-tailwind/editor-surface/pm-position-map.ts +123 -0
  130. package/src/ui-tailwind/editor-surface/pm-schema.ts +452 -0
  131. package/src/ui-tailwind/editor-surface/pm-state-from-snapshot.ts +327 -0
  132. package/src/ui-tailwind/editor-surface/search-plugin.ts +157 -0
  133. package/src/ui-tailwind/editor-surface/tw-caret.tsx +12 -0
  134. package/src/ui-tailwind/editor-surface/tw-editor-surface.tsx +150 -0
  135. package/src/ui-tailwind/editor-surface/tw-inline-token.tsx +118 -0
  136. package/src/ui-tailwind/editor-surface/tw-opaque-block.tsx +52 -0
  137. package/src/ui-tailwind/editor-surface/tw-paragraph-block.tsx +151 -0
  138. package/src/ui-tailwind/editor-surface/tw-prosemirror-surface.tsx +215 -0
  139. package/src/ui-tailwind/editor-surface/tw-segment-view.tsx +111 -0
  140. package/src/ui-tailwind/editor-surface/tw-table-node-view.tsx +122 -0
  141. package/src/ui-tailwind/index.ts +61 -0
  142. package/src/ui-tailwind/review/tw-comment-sidebar.tsx +276 -0
  143. package/src/ui-tailwind/review/tw-health-panel.tsx +120 -0
  144. package/src/ui-tailwind/review/tw-review-rail.tsx +120 -0
  145. package/src/ui-tailwind/review/tw-revision-sidebar.tsx +164 -0
  146. package/src/ui-tailwind/status/tw-status-bar.tsx +58 -0
  147. package/src/ui-tailwind/theme/editor-theme.css +190 -0
  148. package/src/ui-tailwind/toolbar/tw-toolbar-icon-button.tsx +48 -0
  149. package/src/ui-tailwind/toolbar/tw-toolbar.tsx +231 -0
  150. package/src/ui-tailwind/tw-review-workspace.tsx +140 -0
  151. package/src/validation/README.md +3 -0
  152. package/src/validation/compatibility-engine.ts +317 -0
  153. package/src/validation/compatibility-report.ts +160 -0
  154. package/src/validation/diagnostics.ts +203 -0
  155. package/src/validation/import-diagnostics.ts +128 -0
  156. package/src/validation/low-priority-word-surfaces.ts +373 -0
  157. package/dist/chunk-32W6IVQE.js +0 -7725
  158. package/dist/chunk-32W6IVQE.js.map +0 -1
  159. package/dist/index.cjs +0 -23722
  160. package/dist/index.cjs.map +0 -1
  161. package/dist/index.d.cts +0 -7
  162. package/dist/index.d.ts +0 -7
  163. package/dist/index.js +0 -16011
  164. package/dist/index.js.map +0 -1
  165. package/dist/public-types-DqCURAz8.d.cts +0 -1152
  166. package/dist/public-types-DqCURAz8.d.ts +0 -1152
  167. package/dist/tailwind.cjs +0 -8295
  168. package/dist/tailwind.cjs.map +0 -1
  169. package/dist/tailwind.d.cts +0 -323
  170. package/dist/tailwind.d.ts +0 -323
  171. package/dist/tailwind.js +0 -553
  172. package/dist/tailwind.js.map +0 -1
@@ -0,0 +1,438 @@
1
+ /**
2
+ * parse-fields.ts
3
+ *
4
+ * Standalone parsers for OOXML field codes (w:fldSimple, complex fldChar sequences)
5
+ * and bookmarks (w:bookmarkStart / w:bookmarkEnd).
6
+ *
7
+ * These types mirror the canonical FieldNode / BookmarkStartNode / BookmarkEndNode
8
+ * shapes from canonical-document.ts, but carry raw XML for preservation and are
9
+ * pre-normalization (no fragmentId / warningId yet).
10
+ *
11
+ * Usage: call parseFieldsFromParagraphXml() or parseBookmarksFromXml() for
12
+ * independent field/bookmark extraction. The functions are designed to be
13
+ * imported by parse-main-document.ts when full pipeline integration is ready.
14
+ */
15
+
16
+ // ─── Parsed types ────────────────────────────────────────────────────────────
17
+
18
+ export interface ParsedSimpleFieldNode {
19
+ type: "field";
20
+ fieldType: "simple";
21
+ instruction: string;
22
+ /** Raw XML of the content runs inside <w:fldSimple>. */
23
+ contentXml: string;
24
+ rawXml: string;
25
+ }
26
+
27
+ export interface ParsedComplexFieldNode {
28
+ type: "field";
29
+ fieldType: "complex";
30
+ instruction: string;
31
+ /** Raw XML of the content runs between the "separate" and "end" fldChar. */
32
+ contentXml: string;
33
+ /** Start index in the source document XML. */
34
+ start: number;
35
+ /** End index (exclusive) in the source document XML. */
36
+ end: number;
37
+ }
38
+
39
+ export type ParsedFieldNode = ParsedSimpleFieldNode | ParsedComplexFieldNode;
40
+
41
+ export interface ParsedBookmarkStartNode {
42
+ type: "bookmark_start";
43
+ bookmarkId: string;
44
+ name: string;
45
+ rawXml: string;
46
+ }
47
+
48
+ export interface ParsedBookmarkEndNode {
49
+ type: "bookmark_end";
50
+ bookmarkId: string;
51
+ rawXml: string;
52
+ }
53
+
54
+ export type ParsedBookmarkNode = ParsedBookmarkStartNode | ParsedBookmarkEndNode;
55
+
56
+ // ─── Internal XML types ───────────────────────────────────────────────────────
57
+
58
+ interface XmlElementNode {
59
+ type: "element";
60
+ name: string;
61
+ attributes: Record<string, string>;
62
+ children: XmlNode[];
63
+ start: number;
64
+ end: number;
65
+ }
66
+
67
+ interface XmlTextNode {
68
+ type: "text";
69
+ text: string;
70
+ start: number;
71
+ end: number;
72
+ }
73
+
74
+ type XmlNode = XmlElementNode | XmlTextNode;
75
+
76
+ // ─── Public API ───────────────────────────────────────────────────────────────
77
+
78
+ /**
79
+ * Parse all simple fields and bookmarks from the XML of a single paragraph
80
+ * element. Complex fields that span paragraphs are not fully resolved here;
81
+ * use extractComplexFieldsFromBodyXml for body-level extraction.
82
+ *
83
+ * @param paragraphXml Raw XML string of a <w:p> element.
84
+ */
85
+ export function parseFieldsFromParagraphXml(paragraphXml: string): {
86
+ simpleFields: ParsedSimpleFieldNode[];
87
+ bookmarks: ParsedBookmarkNode[];
88
+ } {
89
+ const root = parseXml(paragraphXml);
90
+ const pEl = findFirstChild(root, "p");
91
+ const target = pEl ?? root;
92
+
93
+ const simpleFields: ParsedSimpleFieldNode[] = [];
94
+ const bookmarks: ParsedBookmarkNode[] = [];
95
+
96
+ for (const child of target.children) {
97
+ if (child.type !== "element") continue;
98
+ const name = localName(child.name);
99
+
100
+ if (name === "fldSimple") {
101
+ const field = parseFldSimple(child, paragraphXml);
102
+ if (field) simpleFields.push(field);
103
+ } else if (name === "bookmarkStart") {
104
+ const bk = parseBookmarkStart(child, paragraphXml);
105
+ if (bk) bookmarks.push(bk);
106
+ } else if (name === "bookmarkEnd") {
107
+ const bk = parseBookmarkEnd(child, paragraphXml);
108
+ if (bk) bookmarks.push(bk);
109
+ }
110
+ }
111
+
112
+ return { simpleFields, bookmarks };
113
+ }
114
+
115
+ /**
116
+ * Extract complex field sequences from a body element XML string.
117
+ * A complex field is a begin / instrText* / separate / content / end sequence
118
+ * spread across multiple <w:r> siblings.
119
+ *
120
+ * @param bodyXml Raw XML string of a <w:body> element (or full document).
121
+ */
122
+ export function extractComplexFieldsFromBodyXml(bodyXml: string): ParsedComplexFieldNode[] {
123
+ const root = parseXml(bodyXml);
124
+ const results: ParsedComplexFieldNode[] = [];
125
+
126
+ // Walk all <w:p> children of the body
127
+ const bodyEl = findFirstChild(root, "body") ?? findFirstChild(root, "document");
128
+ const scanTarget = bodyEl ?? root;
129
+
130
+ for (const block of scanTarget.children) {
131
+ if (block.type !== "element") continue;
132
+ if (localName(block.name) !== "p") continue;
133
+ extractComplexFieldsFromParagraph(block, bodyXml, results);
134
+ }
135
+
136
+ return results;
137
+ }
138
+
139
+ /**
140
+ * Parse all bookmark start/end nodes from a full document or body XML.
141
+ */
142
+ export function extractBookmarksFromBodyXml(bodyXml: string): ParsedBookmarkNode[] {
143
+ const root = parseXml(bodyXml);
144
+ const results: ParsedBookmarkNode[] = [];
145
+ collectBookmarks(root, bodyXml, results);
146
+ return results;
147
+ }
148
+
149
+ // ─── Element-level parsers (exported for unit testing) ────────────────────────
150
+
151
+ export function parseFldSimple(
152
+ element: { attributes: Record<string, string>; children: XmlNode[]; start: number; end: number },
153
+ sourceXml: string,
154
+ ): ParsedSimpleFieldNode | undefined {
155
+ const instruction = (element.attributes["w:instr"] ?? element.attributes.instr ?? "").trim();
156
+ const contentXml = element.children
157
+ .filter((c): c is XmlElementNode => c.type === "element")
158
+ .map((c) => sourceXml.slice(c.start, c.end))
159
+ .join("");
160
+ return {
161
+ type: "field",
162
+ fieldType: "simple",
163
+ instruction,
164
+ contentXml,
165
+ rawXml: sourceXml.slice(element.start, element.end),
166
+ };
167
+ }
168
+
169
+ export function parseBookmarkStart(
170
+ element: { attributes: Record<string, string>; start: number; end: number },
171
+ sourceXml: string,
172
+ ): ParsedBookmarkStartNode | undefined {
173
+ const bookmarkId = element.attributes["w:id"] ?? element.attributes.id ?? "";
174
+ const name = element.attributes["w:name"] ?? element.attributes.name ?? "";
175
+ if (!bookmarkId) return undefined;
176
+ return {
177
+ type: "bookmark_start",
178
+ bookmarkId,
179
+ name,
180
+ rawXml: sourceXml.slice(element.start, element.end),
181
+ };
182
+ }
183
+
184
+ export function parseBookmarkEnd(
185
+ element: { attributes: Record<string, string>; start: number; end: number },
186
+ sourceXml: string,
187
+ ): ParsedBookmarkEndNode | undefined {
188
+ const bookmarkId = element.attributes["w:id"] ?? element.attributes.id ?? "";
189
+ if (!bookmarkId) return undefined;
190
+ return {
191
+ type: "bookmark_end",
192
+ bookmarkId,
193
+ rawXml: sourceXml.slice(element.start, element.end),
194
+ };
195
+ }
196
+
197
+ // ─── Internal helpers ─────────────────────────────────────────────────────────
198
+
199
+ function extractComplexFieldsFromParagraph(
200
+ paragraph: XmlElementNode,
201
+ sourceXml: string,
202
+ results: ParsedComplexFieldNode[],
203
+ ): void {
204
+ type FieldState = "idle" | "in-instr" | "in-content";
205
+ let state: FieldState = "idle";
206
+ let instrParts: string[] = [];
207
+ let contentStart = -1;
208
+ let contentEnd = -1;
209
+ let fieldStart = -1;
210
+
211
+ for (const child of paragraph.children) {
212
+ if (child.type !== "element" || localName(child.name) !== "r") continue;
213
+
214
+ const fldChar = findFirstChildEl(child, "fldChar");
215
+ const instrText = findFirstChildEl(child, "instrText");
216
+
217
+ if (fldChar) {
218
+ const charType = (
219
+ fldChar.attributes["w:fldCharType"] ??
220
+ fldChar.attributes.fldCharType ??
221
+ ""
222
+ ).toLowerCase();
223
+
224
+ if (charType === "begin") {
225
+ state = "in-instr";
226
+ instrParts = [];
227
+ fieldStart = child.start;
228
+ contentStart = -1;
229
+ contentEnd = -1;
230
+ } else if (charType === "separate" && state === "in-instr") {
231
+ state = "in-content";
232
+ contentStart = child.end;
233
+ } else if (charType === "end") {
234
+ if (state === "in-content" || state === "in-instr") {
235
+ if (state === "in-content") {
236
+ contentEnd = child.start;
237
+ }
238
+ const instruction = instrParts.join("").trim();
239
+ const contentXml =
240
+ contentStart >= 0 && contentEnd >= contentStart
241
+ ? sourceXml.slice(contentStart, contentEnd)
242
+ : "";
243
+ results.push({
244
+ type: "field",
245
+ fieldType: "complex",
246
+ instruction,
247
+ contentXml,
248
+ start: fieldStart,
249
+ end: child.end,
250
+ });
251
+ }
252
+ state = "idle";
253
+ instrParts = [];
254
+ }
255
+ } else if (instrText && state === "in-instr") {
256
+ const text = instrText.children
257
+ .filter((c): c is XmlTextNode => c.type === "text")
258
+ .map((c) => c.text)
259
+ .join("");
260
+ instrParts.push(text);
261
+ }
262
+ }
263
+ }
264
+
265
+ function collectBookmarks(
266
+ node: XmlElementNode,
267
+ sourceXml: string,
268
+ results: ParsedBookmarkNode[],
269
+ ): void {
270
+ for (const child of node.children) {
271
+ if (child.type !== "element") continue;
272
+ const name = localName(child.name);
273
+ if (name === "bookmarkStart") {
274
+ const bk = parseBookmarkStart(child, sourceXml);
275
+ if (bk) results.push(bk);
276
+ } else if (name === "bookmarkEnd") {
277
+ const bk = parseBookmarkEnd(child, sourceXml);
278
+ if (bk) results.push(bk);
279
+ } else {
280
+ collectBookmarks(child, sourceXml, results);
281
+ }
282
+ }
283
+ }
284
+
285
+ function findFirstChild(node: XmlElementNode, childLocalName: string): XmlElementNode | undefined {
286
+ return node.children.find(
287
+ (c): c is XmlElementNode => c.type === "element" && localName(c.name) === childLocalName,
288
+ );
289
+ }
290
+
291
+ function findFirstChildEl(node: XmlElementNode, childLocalName: string): XmlElementNode | undefined {
292
+ return node.children.find(
293
+ (c): c is XmlElementNode => c.type === "element" && localName(c.name) === childLocalName,
294
+ );
295
+ }
296
+
297
+ function localName(name: string): string {
298
+ const sep = name.indexOf(":");
299
+ return sep >= 0 ? name.slice(sep + 1) : name;
300
+ }
301
+
302
+ // ─── Minimal XML parser (same pattern as parse-tables.ts) ────────────────────
303
+
304
+ function parseXml(xml: string): XmlElementNode {
305
+ const root: XmlElementNode = {
306
+ type: "element",
307
+ name: "__root__",
308
+ attributes: {},
309
+ children: [],
310
+ start: 0,
311
+ end: xml.length,
312
+ };
313
+ const stack: XmlElementNode[] = [root];
314
+ let cursor = 0;
315
+
316
+ while (cursor < xml.length) {
317
+ if (xml.startsWith("<!--", cursor)) {
318
+ const end = xml.indexOf("-->", cursor);
319
+ cursor = end >= 0 ? end + 3 : xml.length;
320
+ continue;
321
+ }
322
+ if (xml.startsWith("<?", cursor)) {
323
+ const end = xml.indexOf("?>", cursor);
324
+ cursor = end >= 0 ? end + 2 : xml.length;
325
+ continue;
326
+ }
327
+ if (xml.startsWith("<![CDATA[", cursor)) {
328
+ const end = xml.indexOf("]]>", cursor);
329
+ const textEnd = end >= 0 ? end : xml.length;
330
+ stack[stack.length - 1]?.children.push({
331
+ type: "text",
332
+ text: xml.slice(cursor + 9, textEnd),
333
+ start: cursor,
334
+ end: end >= 0 ? end + 3 : xml.length,
335
+ });
336
+ cursor = end >= 0 ? end + 3 : xml.length;
337
+ continue;
338
+ }
339
+ if (xml[cursor] !== "<") {
340
+ const nextTag = xml.indexOf("<", cursor);
341
+ const end = nextTag >= 0 ? nextTag : xml.length;
342
+ const text = decodeXmlEntities(xml.slice(cursor, end));
343
+ if (text.length > 0) {
344
+ stack[stack.length - 1]?.children.push({ type: "text", text, start: cursor, end });
345
+ }
346
+ cursor = end;
347
+ continue;
348
+ }
349
+ if (xml[cursor + 1] === "/") {
350
+ const end = xml.indexOf(">", cursor);
351
+ if (end < 0) throw new Error("Malformed XML: missing >.");
352
+ const name = xml.slice(cursor + 2, end).trim();
353
+ const current = stack.pop();
354
+ if (!current || localName(current.name) !== localName(name)) {
355
+ throw new Error(`Malformed XML: unexpected closing tag </${name}>.`);
356
+ }
357
+ current.end = end + 1;
358
+ cursor = end + 1;
359
+ continue;
360
+ }
361
+ const tagEnd = findTagEnd(xml, cursor);
362
+ const tagBody = xml.slice(cursor + 1, tagEnd);
363
+ const selfClosing = /\/\s*$/.test(tagBody);
364
+ const { name, attributes } = parseTag(tagBody.replace(/\/\s*$/, "").trim());
365
+ const element: XmlElementNode = {
366
+ type: "element",
367
+ name,
368
+ attributes,
369
+ children: [],
370
+ start: cursor,
371
+ end: tagEnd + 1,
372
+ };
373
+ stack[stack.length - 1]?.children.push(element);
374
+ if (!selfClosing) stack.push(element);
375
+ cursor = tagEnd + 1;
376
+ }
377
+
378
+ if (stack.length !== 1) throw new Error("Malformed XML: unclosed element.");
379
+ return root;
380
+ }
381
+
382
+ function parseTag(tagBody: string): { name: string; attributes: Record<string, string> } {
383
+ let cursor = 0;
384
+ while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
385
+ const nameStart = cursor;
386
+ while (cursor < tagBody.length && !/\s/.test(tagBody[cursor] ?? "")) cursor += 1;
387
+ const name = tagBody.slice(nameStart, cursor);
388
+ const attributes: Record<string, string> = {};
389
+
390
+ while (cursor < tagBody.length) {
391
+ while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
392
+ if (cursor >= tagBody.length) break;
393
+ const keyStart = cursor;
394
+ while (cursor < tagBody.length && !/[\s=]/.test(tagBody[cursor] ?? "")) cursor += 1;
395
+ const key = tagBody.slice(keyStart, cursor);
396
+ while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
397
+ if (tagBody[cursor] !== "=") { attributes[key] = ""; continue; }
398
+ cursor += 1;
399
+ while (cursor < tagBody.length && /\s/.test(tagBody[cursor] ?? "")) cursor += 1;
400
+ const quote = tagBody[cursor];
401
+ if (quote !== `"` && quote !== `'`) throw new Error(`Malformed XML attribute ${key}.`);
402
+ cursor += 1;
403
+ const valueStart = cursor;
404
+ while (cursor < tagBody.length && tagBody[cursor] !== quote) cursor += 1;
405
+ attributes[key] = decodeXmlEntities(tagBody.slice(valueStart, cursor));
406
+ cursor += 1;
407
+ }
408
+ return { name, attributes };
409
+ }
410
+
411
+ function findTagEnd(xml: string, start: number): number {
412
+ let cursor = start + 1;
413
+ let quote: string | null = null;
414
+ while (cursor < xml.length) {
415
+ const c = xml[cursor];
416
+ if (quote) { if (c === quote) quote = null; cursor += 1; continue; }
417
+ if (c === `"` || c === `'`) { quote = c; cursor += 1; continue; }
418
+ if (c === ">") return cursor;
419
+ cursor += 1;
420
+ }
421
+ throw new Error("Malformed XML: missing >.");
422
+ }
423
+
424
+ function decodeXmlEntities(value: string): string {
425
+ return value.replace(/&(#x[0-9a-fA-F]+|#\d+|amp|lt|gt|quot|apos);/g, (match, entity) => {
426
+ switch (entity) {
427
+ case "amp": return "&";
428
+ case "lt": return "<";
429
+ case "gt": return ">";
430
+ case "quot": return `"`;
431
+ case "apos": return `'`;
432
+ default:
433
+ if (entity.startsWith("#x")) return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
434
+ if (entity.startsWith("#")) return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
435
+ return match;
436
+ }
437
+ });
438
+ }