odf-kit 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/reader/html-renderer.d.ts +31 -0
- package/dist/reader/html-renderer.d.ts.map +1 -0
- package/dist/reader/html-renderer.js +125 -0
- package/dist/reader/html-renderer.js.map +1 -0
- package/dist/reader/index.d.ts +39 -0
- package/dist/reader/index.d.ts.map +1 -0
- package/dist/reader/index.js +40 -0
- package/dist/reader/index.js.map +1 -0
- package/dist/reader/parser.d.ts +53 -0
- package/dist/reader/parser.d.ts.map +1 -0
- package/dist/reader/parser.js +409 -0
- package/dist/reader/parser.js.map +1 -0
- package/dist/reader/types.d.ts +139 -0
- package/dist/reader/types.d.ts.map +1 -0
- package/dist/reader/types.js +14 -0
- package/dist/reader/types.js.map +1 -0
- package/dist/reader/xml-parser.d.ts +45 -0
- package/dist/reader/xml-parser.d.ts.map +1 -0
- package/dist/reader/xml-parser.js +146 -0
- package/dist/reader/xml-parser.js.map +1 -0
- package/package.json +65 -61
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML renderer for the ODT document model.
|
|
3
|
+
*
|
|
4
|
+
* Converts the structured document model produced by the ODT parser into
|
|
5
|
+
* an HTML string. Output is clean, semantic HTML using standard elements:
|
|
6
|
+
* <p>, <h1>–<h6>, <strong>, <em>, <u>, <s>, <sup>, <sub>, <a>, <br>,
|
|
7
|
+
* <ul>, <ol>, <li>, <table>, <tr>, <td>.
|
|
8
|
+
*
|
|
9
|
+
* Text content is HTML-escaped. Attribute values used in href are also
|
|
10
|
+
* HTML-escaped so the output is safe to embed in any HTML context.
|
|
11
|
+
*
|
|
12
|
+
* By default renderHtml() returns a complete HTML document with a
|
|
13
|
+
* <!DOCTYPE html> declaration. Pass { fragment: true } to receive only
|
|
14
|
+
* the inner body content, suitable for embedding in an existing page.
|
|
15
|
+
*/
|
|
16
|
+
import type { BodyNode, HtmlOptions } from "./types.js";
|
|
17
|
+
/**
|
|
18
|
+
* Convert a document body to an HTML string.
|
|
19
|
+
*
|
|
20
|
+
* @param body - Array of BodyNode objects in document order.
|
|
21
|
+
* @param options - HTML output options.
|
|
22
|
+
* @returns HTML string. Full document by default; inner fragment when
|
|
23
|
+
* options.fragment is true.
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const html = renderHtml(doc.body, { fragment: true });
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export declare function renderHtml(body: BodyNode[], options?: HtmlOptions): string;
|
|
31
|
+
//# sourceMappingURL=html-renderer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-renderer.d.ts","sourceRoot":"","sources":["../../src/reader/html-renderer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAiC,WAAW,EAAE,MAAM,YAAY,CAAC;AA6FvF;;;;;;;;;;;;GAYG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,QAAQ,EAAE,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,MAAM,CAI1E"}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML renderer for the ODT document model.
|
|
3
|
+
*
|
|
4
|
+
* Converts the structured document model produced by the ODT parser into
|
|
5
|
+
* an HTML string. Output is clean, semantic HTML using standard elements:
|
|
6
|
+
* <p>, <h1>–<h6>, <strong>, <em>, <u>, <s>, <sup>, <sub>, <a>, <br>,
|
|
7
|
+
* <ul>, <ol>, <li>, <table>, <tr>, <td>.
|
|
8
|
+
*
|
|
9
|
+
* Text content is HTML-escaped. Attribute values used in href are also
|
|
10
|
+
* HTML-escaped so the output is safe to embed in any HTML context.
|
|
11
|
+
*
|
|
12
|
+
* By default renderHtml() returns a complete HTML document with a
|
|
13
|
+
* <!DOCTYPE html> declaration. Pass { fragment: true } to receive only
|
|
14
|
+
* the inner body content, suitable for embedding in an existing page.
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Escape the five characters that must be encoded in HTML text content
|
|
18
|
+
* and attribute values.
|
|
19
|
+
*/
|
|
20
|
+
function escapeHtml(text) {
|
|
21
|
+
return text
|
|
22
|
+
.replace(/&/g, "&")
|
|
23
|
+
.replace(/</g, "<")
|
|
24
|
+
.replace(/>/g, ">")
|
|
25
|
+
.replace(/"/g, """)
|
|
26
|
+
.replace(/'/g, "'");
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Render a single TextSpan to an HTML string.
|
|
30
|
+
*
|
|
31
|
+
* Nesting order: bold → italic → underline → strikethrough →
|
|
32
|
+
* superscript/subscript → hyperlink. This order matches common browser
|
|
33
|
+
* rendering conventions and produces valid, readable HTML.
|
|
34
|
+
*/
|
|
35
|
+
function renderSpan(span) {
|
|
36
|
+
if (span.lineBreak)
|
|
37
|
+
return "<br>";
|
|
38
|
+
let html = escapeHtml(span.text);
|
|
39
|
+
if (span.bold)
|
|
40
|
+
html = `<strong>${html}</strong>`;
|
|
41
|
+
if (span.italic)
|
|
42
|
+
html = `<em>${html}</em>`;
|
|
43
|
+
if (span.underline)
|
|
44
|
+
html = `<u>${html}</u>`;
|
|
45
|
+
if (span.strikethrough)
|
|
46
|
+
html = `<s>${html}</s>`;
|
|
47
|
+
if (span.superscript)
|
|
48
|
+
html = `<sup>${html}</sup>`;
|
|
49
|
+
if (span.subscript)
|
|
50
|
+
html = `<sub>${html}</sub>`;
|
|
51
|
+
if (span.href !== undefined)
|
|
52
|
+
html = `<a href="${escapeHtml(span.href)}">${html}</a>`;
|
|
53
|
+
return html;
|
|
54
|
+
}
|
|
55
|
+
/** Render an array of TextSpan objects to a concatenated HTML string. */
|
|
56
|
+
function renderSpans(spans) {
|
|
57
|
+
return spans.map(renderSpan).join("");
|
|
58
|
+
}
|
|
59
|
+
/** Render a ListNode to an HTML <ul> or <ol> string. */
|
|
60
|
+
function renderList(list) {
|
|
61
|
+
const tag = list.ordered ? "ol" : "ul";
|
|
62
|
+
const items = list.items
|
|
63
|
+
.map((item) => {
|
|
64
|
+
const content = renderSpans(item.spans);
|
|
65
|
+
const nested = item.children !== undefined ? renderList(item.children) : "";
|
|
66
|
+
return `<li>${content}${nested}</li>`;
|
|
67
|
+
})
|
|
68
|
+
.join("");
|
|
69
|
+
return `<${tag}>${items}</${tag}>`;
|
|
70
|
+
}
|
|
71
|
+
/** Render a TableNode to an HTML <table> string. */
|
|
72
|
+
function renderTable(table) {
|
|
73
|
+
const rows = table.rows
|
|
74
|
+
.map((row) => {
|
|
75
|
+
const cells = row.cells
|
|
76
|
+
.map((cell) => {
|
|
77
|
+
const attrParts = [];
|
|
78
|
+
if (cell.colSpan !== undefined && cell.colSpan > 1) {
|
|
79
|
+
attrParts.push(`colspan="${cell.colSpan}"`);
|
|
80
|
+
}
|
|
81
|
+
if (cell.rowSpan !== undefined && cell.rowSpan > 1) {
|
|
82
|
+
attrParts.push(`rowspan="${cell.rowSpan}"`);
|
|
83
|
+
}
|
|
84
|
+
const attrs = attrParts.length > 0 ? " " + attrParts.join(" ") : "";
|
|
85
|
+
return `<td${attrs}>${renderSpans(cell.spans)}</td>`;
|
|
86
|
+
})
|
|
87
|
+
.join("");
|
|
88
|
+
return `<tr>${cells}</tr>`;
|
|
89
|
+
})
|
|
90
|
+
.join("");
|
|
91
|
+
return `<table>${rows}</table>`;
|
|
92
|
+
}
|
|
93
|
+
/** Render a single BodyNode to an HTML string. */
|
|
94
|
+
function renderNode(node) {
|
|
95
|
+
switch (node.kind) {
|
|
96
|
+
case "paragraph":
|
|
97
|
+
return `<p>${renderSpans(node.spans)}</p>`;
|
|
98
|
+
case "heading":
|
|
99
|
+
return `<h${node.level}>${renderSpans(node.spans)}</h${node.level}>`;
|
|
100
|
+
case "list":
|
|
101
|
+
return renderList(node);
|
|
102
|
+
case "table":
|
|
103
|
+
return renderTable(node);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Convert a document body to an HTML string.
|
|
108
|
+
*
|
|
109
|
+
* @param body - Array of BodyNode objects in document order.
|
|
110
|
+
* @param options - HTML output options.
|
|
111
|
+
* @returns HTML string. Full document by default; inner fragment when
|
|
112
|
+
* options.fragment is true.
|
|
113
|
+
*
|
|
114
|
+
* @example
|
|
115
|
+
* ```typescript
|
|
116
|
+
* const html = renderHtml(doc.body, { fragment: true });
|
|
117
|
+
* ```
|
|
118
|
+
*/
|
|
119
|
+
export function renderHtml(body, options) {
|
|
120
|
+
const inner = body.map(renderNode).join("\n");
|
|
121
|
+
if (options?.fragment === true)
|
|
122
|
+
return inner;
|
|
123
|
+
return `<!DOCTYPE html>\n<html>\n<body>\n${inner}\n</body>\n</html>`;
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=html-renderer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-renderer.js","sourceRoot":"","sources":["../../src/reader/html-renderer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAIH;;;GAGG;AACH,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI;SACR,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAC5B,CAAC;AAED;;;;;;GAMG;AACH,SAAS,UAAU,CAAC,IAAc;IAChC,IAAI,IAAI,CAAC,SAAS;QAAE,OAAO,MAAM,CAAC;IAElC,IAAI,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEjC,IAAI,IAAI,CAAC,IAAI;QAAE,IAAI,GAAG,WAAW,IAAI,WAAW,CAAC;IACjD,IAAI,IAAI,CAAC,MAAM;QAAE,IAAI,GAAG,OAAO,IAAI,OAAO,CAAC;IAC3C,IAAI,IAAI,CAAC,SAAS;QAAE,IAAI,GAAG,MAAM,IAAI,MAAM,CAAC;IAC5C,IAAI,IAAI,CAAC,aAAa;QAAE,IAAI,GAAG,MAAM,IAAI,MAAM,CAAC;IAChD,IAAI,IAAI,CAAC,WAAW;QAAE,IAAI,GAAG,QAAQ,IAAI,QAAQ,CAAC;IAClD,IAAI,IAAI,CAAC,SAAS;QAAE,IAAI,GAAG,QAAQ,IAAI,QAAQ,CAAC;IAChD,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS;QAAE,IAAI,GAAG,YAAY,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC;IAErF,OAAO,IAAI,CAAC;AACd,CAAC;AAED,yEAAyE;AACzE,SAAS,WAAW,CAAC,KAAiB;IACpC,OAAO,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACxC,CAAC;AAED,wDAAwD;AACxD,SAAS,UAAU,CAAC,IAAc;IAChC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK;SACrB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACZ,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC5E,OAAO,OAAO,OAAO,GAAG,MAAM,OAAO,CAAC;IACxC,CAAC,CAAC;SACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACZ,OAAO,IAAI,GAAG,IAAI,KAAK,KAAK,GAAG,GAAG,CAAC;AACrC,CAAC;AAED,oDAAoD;AACpD,SAAS,WAAW,CAAC,KAAgB;IACnC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI;SACpB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACX,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK;aACpB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,MAAM,SAAS,GAAa,EAAE,CAAC;YAC/B,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;gBACnD,SAAS,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC;YAC9C,CAAC;YACD,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;gBACnD,SAAS,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC;YAC9C,CAAC;YACD,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACpE,OAAO,MAAM,KAAK,IAAI,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;QACvD,CAAC,CAAC;aACD,IAAI,CAAC,EAAE,CAAC,CAAC;QACZ,OAAO,OAAO,KAAK,OAAO,CAAC;IAC7B,CAAC,CAAC;SACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACZ,OAAO,UAAU,IAAI,UAAU,CAAC;AAClC,CAAC;AAED,kDAAkD;AAClD,SAAS,UAAU,CAAC,IAAc;IAChC,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;QAClB,KAAK,WAAW;YACd,OAAO,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;QAC7C,KAAK,SAAS;YACZ,OAAO,KAAK,IAAI,CAAC,KAAK,IAAI,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,GAAG,CAAC;QACvE,KAAK,MAAM;YACT,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC;QAC1B,KAAK,OAAO;YACV,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,UAAU,CAAC,IAAgB,EAAE,OAAqB;IAChE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9C,IAAI,OAAO,EAAE,QAAQ,KAAK,IAAI;QAAE,OAAO,KAAK,CAAC;IAC7C,OAAO,oCAAoC,KAAK,oBAAoB,CAAC;AACvE,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public API for the odf-kit ODT reader.
|
|
3
|
+
*
|
|
4
|
+
* Import from "odf-kit/reader" (separate from the main "odf-kit" import
|
|
5
|
+
* so the reader is only bundled when explicitly needed):
|
|
6
|
+
*
|
|
7
|
+
* ```typescript
|
|
8
|
+
* import { readOdt, odtToHtml } from "odf-kit/reader";
|
|
9
|
+
* ```
|
|
10
|
+
*
|
|
11
|
+
* readOdt() returns an OdtDocumentModel with a body array and a toHtml()
|
|
12
|
+
* method. odtToHtml() is a convenience wrapper that calls readOdt().toHtml()
|
|
13
|
+
* in a single step.
|
|
14
|
+
*/
|
|
15
|
+
export { readOdt } from "./parser.js";
|
|
16
|
+
export type { OdtDocumentModel, OdtMetadata, BodyNode, ParagraphNode, HeadingNode, ListNode, ListItemNode, TableNode, TableRowNode, TableCellNode, TextSpan, HtmlOptions, } from "./types.js";
|
|
17
|
+
import type { HtmlOptions } from "./types.js";
|
|
18
|
+
/**
|
|
19
|
+
* Convert an .odt file directly to an HTML string.
|
|
20
|
+
*
|
|
21
|
+
* Convenience wrapper around readOdt().toHtml(). Use readOdt() directly
|
|
22
|
+
* when you need access to the document model or metadata.
|
|
23
|
+
*
|
|
24
|
+
* @param bytes - The raw .odt file as a Uint8Array.
|
|
25
|
+
* @param options - HTML output options.
|
|
26
|
+
* @returns HTML string. Full document by default; inner fragment when
|
|
27
|
+
* options.fragment is true.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* import { odtToHtml } from "odf-kit/reader";
|
|
32
|
+
* import { readFileSync } from "node:fs";
|
|
33
|
+
*
|
|
34
|
+
* const bytes = new Uint8Array(readFileSync("document.odt"));
|
|
35
|
+
* const html = odtToHtml(bytes, { fragment: true });
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare function odtToHtml(bytes: Uint8Array, options?: HtmlOptions): string;
|
|
39
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reader/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,YAAY,EACV,gBAAgB,EAChB,WAAW,EACX,QAAQ,EACR,aAAa,EACb,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,YAAY,EACZ,aAAa,EACb,QAAQ,EACR,WAAW,GACZ,MAAM,YAAY,CAAC;AAGpB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,MAAM,CAE1E"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public API for the odf-kit ODT reader.
|
|
3
|
+
*
|
|
4
|
+
* Import from "odf-kit/reader" (separate from the main "odf-kit" import
|
|
5
|
+
* so the reader is only bundled when explicitly needed):
|
|
6
|
+
*
|
|
7
|
+
* ```typescript
|
|
8
|
+
* import { readOdt, odtToHtml } from "odf-kit/reader";
|
|
9
|
+
* ```
|
|
10
|
+
*
|
|
11
|
+
* readOdt() returns an OdtDocumentModel with a body array and a toHtml()
|
|
12
|
+
* method. odtToHtml() is a convenience wrapper that calls readOdt().toHtml()
|
|
13
|
+
* in a single step.
|
|
14
|
+
*/
|
|
15
|
+
export { readOdt } from "./parser.js";
|
|
16
|
+
import { readOdt } from "./parser.js";
|
|
17
|
+
/**
|
|
18
|
+
* Convert an .odt file directly to an HTML string.
|
|
19
|
+
*
|
|
20
|
+
* Convenience wrapper around readOdt().toHtml(). Use readOdt() directly
|
|
21
|
+
* when you need access to the document model or metadata.
|
|
22
|
+
*
|
|
23
|
+
* @param bytes - The raw .odt file as a Uint8Array.
|
|
24
|
+
* @param options - HTML output options.
|
|
25
|
+
* @returns HTML string. Full document by default; inner fragment when
|
|
26
|
+
* options.fragment is true.
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```typescript
|
|
30
|
+
* import { odtToHtml } from "odf-kit/reader";
|
|
31
|
+
* import { readFileSync } from "node:fs";
|
|
32
|
+
*
|
|
33
|
+
* const bytes = new Uint8Array(readFileSync("document.odt"));
|
|
34
|
+
* const html = odtToHtml(bytes, { fragment: true });
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
export function odtToHtml(bytes, options) {
|
|
38
|
+
return readOdt(bytes).toHtml(options);
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/reader/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAgBtC,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AAGtC;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,SAAS,CAAC,KAAiB,EAAE,OAAqB;IAChE,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ODT parser — the core of the odf-kit reader.
|
|
3
|
+
*
|
|
4
|
+
* Unpacks an .odt file (ZIP), parses content.xml and meta.xml, builds
|
|
5
|
+
* the structured OdtDocumentModel, and wires up the toHtml() method.
|
|
6
|
+
*
|
|
7
|
+
* The parsing pipeline:
|
|
8
|
+
* 1. Unzip the .odt bytes with fflate
|
|
9
|
+
* 2. Parse content.xml with parseXml — produces an XmlElementNode tree
|
|
10
|
+
* 3. Build style maps from <office:automatic-styles> so character
|
|
11
|
+
* formatting (bold, italic, etc.) and list types can be resolved
|
|
12
|
+
* 4. Walk <office:body>/<office:text> to produce BodyNode[]
|
|
13
|
+
* 5. Parse meta.xml (if present) for document metadata
|
|
14
|
+
* 6. Return an OdtDocumentModel whose toHtml() delegates to renderHtml()
|
|
15
|
+
*
|
|
16
|
+
* Exported for unit testing: parseMetaXml is tested in isolation.
|
|
17
|
+
* Internal helpers (buildStyleMaps, parseBodyNodes, etc.) are tested
|
|
18
|
+
* indirectly through readOdt round-trip integration tests.
|
|
19
|
+
*/
|
|
20
|
+
import type { OdtDocumentModel, OdtMetadata } from "./types.js";
|
|
21
|
+
/**
|
|
22
|
+
* Parse a meta.xml string and return the document metadata.
|
|
23
|
+
*
|
|
24
|
+
* Exported so it can be tested in isolation with known XML strings
|
|
25
|
+
* without needing a full .odt ZIP file.
|
|
26
|
+
*
|
|
27
|
+
* @param metaXml - Content of meta.xml as a string.
|
|
28
|
+
* @returns Populated OdtMetadata object. Missing fields are undefined.
|
|
29
|
+
*/
|
|
30
|
+
export declare function parseMetaXml(metaXml: string): OdtMetadata;
|
|
31
|
+
/**
|
|
32
|
+
* Parse an .odt file and return a structured document model.
|
|
33
|
+
*
|
|
34
|
+
* Reads content.xml for the document body and automatic styles, and
|
|
35
|
+
* meta.xml for document metadata. Both files are always present in
|
|
36
|
+
* spec-compliant .odt files.
|
|
37
|
+
*
|
|
38
|
+
* @param bytes - The raw .odt file as a Uint8Array.
|
|
39
|
+
* @returns A populated OdtDocumentModel with body, metadata, and toHtml().
|
|
40
|
+
* @throws Error if the input is not a valid ZIP or is missing content.xml.
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* ```typescript
|
|
44
|
+
* import { readOdt } from "odf-kit/reader";
|
|
45
|
+
* import { readFileSync } from "node:fs";
|
|
46
|
+
*
|
|
47
|
+
* const bytes = new Uint8Array(readFileSync("document.odt"));
|
|
48
|
+
* const doc = readOdt(bytes);
|
|
49
|
+
* console.log(doc.body.length, "body nodes");
|
|
50
|
+
* ```
|
|
51
|
+
*/
|
|
52
|
+
export declare function readOdt(bytes: Uint8Array): OdtDocumentModel;
|
|
53
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../../src/reader/parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAMH,OAAO,KAAK,EACV,gBAAgB,EAChB,WAAW,EAWZ,MAAM,YAAY,CAAC;AAgXpB;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,WAAW,CAuBzD;AAMD;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,OAAO,CAAC,KAAK,EAAE,UAAU,GAAG,gBAAgB,CAwB3D"}
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ODT parser — the core of the odf-kit reader.
|
|
3
|
+
*
|
|
4
|
+
* Unpacks an .odt file (ZIP), parses content.xml and meta.xml, builds
|
|
5
|
+
* the structured OdtDocumentModel, and wires up the toHtml() method.
|
|
6
|
+
*
|
|
7
|
+
* The parsing pipeline:
|
|
8
|
+
* 1. Unzip the .odt bytes with fflate
|
|
9
|
+
* 2. Parse content.xml with parseXml — produces an XmlElementNode tree
|
|
10
|
+
* 3. Build style maps from <office:automatic-styles> so character
|
|
11
|
+
* formatting (bold, italic, etc.) and list types can be resolved
|
|
12
|
+
* 4. Walk <office:body>/<office:text> to produce BodyNode[]
|
|
13
|
+
* 5. Parse meta.xml (if present) for document metadata
|
|
14
|
+
* 6. Return an OdtDocumentModel whose toHtml() delegates to renderHtml()
|
|
15
|
+
*
|
|
16
|
+
* Exported for unit testing: parseMetaXml is tested in isolation.
|
|
17
|
+
* Internal helpers (buildStyleMaps, parseBodyNodes, etc.) are tested
|
|
18
|
+
* indirectly through readOdt round-trip integration tests.
|
|
19
|
+
*/
|
|
20
|
+
import { unzipSync, strFromU8 } from "fflate";
|
|
21
|
+
import { parseXml } from "./xml-parser.js";
|
|
22
|
+
import { renderHtml } from "./html-renderer.js";
|
|
23
|
+
// ============================================================
|
|
24
|
+
// Internal XML navigation helpers
|
|
25
|
+
// ============================================================
|
|
26
|
+
/** Return the first direct element child with the given tag, or undefined. */
|
|
27
|
+
function findElement(node, tag) {
|
|
28
|
+
for (const child of node.children) {
|
|
29
|
+
if (child.type === "element" && child.tag === tag)
|
|
30
|
+
return child;
|
|
31
|
+
}
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
/** Return the concatenated text content of all direct text children. */
|
|
35
|
+
function textContent(node) {
|
|
36
|
+
return node.children
|
|
37
|
+
.filter((c) => c.type === "text")
|
|
38
|
+
.map((c) => c.text)
|
|
39
|
+
.join("");
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Merge a base character style with an override.
|
|
43
|
+
*
|
|
44
|
+
* The override wins for any property it explicitly sets (true).
|
|
45
|
+
* Unset properties in the override fall back to the base.
|
|
46
|
+
* Since odf-kit only ever sets properties to true (never explicitly
|
|
47
|
+
* to false), this produces correct inheritance for all generated output.
|
|
48
|
+
*/
|
|
49
|
+
function mergeStyle(base, override) {
|
|
50
|
+
const result = {};
|
|
51
|
+
if (base.bold || override.bold)
|
|
52
|
+
result.bold = true;
|
|
53
|
+
if (base.italic || override.italic)
|
|
54
|
+
result.italic = true;
|
|
55
|
+
if (base.underline || override.underline)
|
|
56
|
+
result.underline = true;
|
|
57
|
+
if (base.strikethrough || override.strikethrough)
|
|
58
|
+
result.strikethrough = true;
|
|
59
|
+
if (base.superscript || override.superscript)
|
|
60
|
+
result.superscript = true;
|
|
61
|
+
if (base.subscript || override.subscript)
|
|
62
|
+
result.subscript = true;
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
/** Build a TextSpan from text and resolved formatting, omitting falsy properties. */
|
|
66
|
+
function makeSpan(text, style, href) {
|
|
67
|
+
const span = { text };
|
|
68
|
+
if (style.bold)
|
|
69
|
+
span.bold = true;
|
|
70
|
+
if (style.italic)
|
|
71
|
+
span.italic = true;
|
|
72
|
+
if (style.underline)
|
|
73
|
+
span.underline = true;
|
|
74
|
+
if (style.strikethrough)
|
|
75
|
+
span.strikethrough = true;
|
|
76
|
+
if (style.superscript)
|
|
77
|
+
span.superscript = true;
|
|
78
|
+
if (style.subscript)
|
|
79
|
+
span.subscript = true;
|
|
80
|
+
if (href !== undefined)
|
|
81
|
+
span.href = href;
|
|
82
|
+
return span;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Scan a styles container element (either office:automatic-styles or
|
|
86
|
+
* office:styles) and populate the provided style maps in place.
|
|
87
|
+
*/
|
|
88
|
+
function scanStylesElement(container, charStyles, listOrdered) {
|
|
89
|
+
for (const child of container.children) {
|
|
90
|
+
if (child.type !== "element")
|
|
91
|
+
continue;
|
|
92
|
+
if (child.tag === "style:style") {
|
|
93
|
+
const name = child.attrs["style:name"];
|
|
94
|
+
if (!name)
|
|
95
|
+
continue;
|
|
96
|
+
const textPropsEl = findElement(child, "style:text-properties");
|
|
97
|
+
if (!textPropsEl)
|
|
98
|
+
continue;
|
|
99
|
+
const style = {};
|
|
100
|
+
const p = textPropsEl.attrs;
|
|
101
|
+
if (p["fo:font-weight"] === "bold")
|
|
102
|
+
style.bold = true;
|
|
103
|
+
if (p["fo:font-style"] === "italic")
|
|
104
|
+
style.italic = true;
|
|
105
|
+
const underlineStyle = p["style:text-underline-style"];
|
|
106
|
+
if (underlineStyle !== undefined && underlineStyle !== "none")
|
|
107
|
+
style.underline = true;
|
|
108
|
+
const strikeStyle = p["style:text-line-through-style"];
|
|
109
|
+
if (strikeStyle !== undefined && strikeStyle !== "none")
|
|
110
|
+
style.strikethrough = true;
|
|
111
|
+
const textPosition = p["style:text-position"];
|
|
112
|
+
if (textPosition !== undefined) {
|
|
113
|
+
if (textPosition.startsWith("super"))
|
|
114
|
+
style.superscript = true;
|
|
115
|
+
if (textPosition.startsWith("sub"))
|
|
116
|
+
style.subscript = true;
|
|
117
|
+
}
|
|
118
|
+
charStyles.set(name, style);
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
if (child.tag === "text:list-style") {
|
|
122
|
+
const name = child.attrs["style:name"];
|
|
123
|
+
if (!name)
|
|
124
|
+
continue;
|
|
125
|
+
for (const levelChild of child.children) {
|
|
126
|
+
if (levelChild.type !== "element")
|
|
127
|
+
continue;
|
|
128
|
+
if (levelChild.attrs["text:level"] !== "1")
|
|
129
|
+
continue;
|
|
130
|
+
if (levelChild.tag === "text:list-level-style-number") {
|
|
131
|
+
listOrdered.set(name, true);
|
|
132
|
+
}
|
|
133
|
+
else if (levelChild.tag === "text:list-level-style-bullet") {
|
|
134
|
+
listOrdered.set(name, false);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Build style maps from both <office:automatic-styles> and <office:styles>
|
|
142
|
+
* in a parsed content.xml tree.
|
|
143
|
+
*
|
|
144
|
+
* List styles defined by odf-kit appear in <office:styles> (named styles),
|
|
145
|
+
* while character formatting automatic styles appear in
|
|
146
|
+
* <office:automatic-styles>. Scanning both ensures all styles are resolved.
|
|
147
|
+
*/
|
|
148
|
+
function buildStyleMaps(contentRoot) {
|
|
149
|
+
const charStyles = new Map();
|
|
150
|
+
const listOrdered = new Map();
|
|
151
|
+
const autoStylesEl = findElement(contentRoot, "office:automatic-styles");
|
|
152
|
+
if (autoStylesEl)
|
|
153
|
+
scanStylesElement(autoStylesEl, charStyles, listOrdered);
|
|
154
|
+
const namedStylesEl = findElement(contentRoot, "office:styles");
|
|
155
|
+
if (namedStylesEl)
|
|
156
|
+
scanStylesElement(namedStylesEl, charStyles, listOrdered);
|
|
157
|
+
return { charStyles, listOrdered };
|
|
158
|
+
}
|
|
159
|
+
// ============================================================
|
|
160
|
+
// Inline span parsing
|
|
161
|
+
// ============================================================
|
|
162
|
+
/**
|
|
163
|
+
* Parse the inline content of a paragraph, heading, list item, or table
|
|
164
|
+
* cell into an array of TextSpan objects.
|
|
165
|
+
*
|
|
166
|
+
* Recursively handles nested <text:span>, <text:a>, <text:line-break>,
|
|
167
|
+
* <text:tab>, and <text:s> (ODF non-breaking space element).
|
|
168
|
+
*
|
|
169
|
+
* @param node - The container element whose children are walked.
|
|
170
|
+
* @param charStyles - Resolved character style map from automatic-styles.
|
|
171
|
+
* @param baseStyle - Inherited character formatting from the container.
|
|
172
|
+
* @param href - Inherited hyperlink href, set when inside <text:a>.
|
|
173
|
+
*/
|
|
174
|
+
function parseSpans(node, charStyles, baseStyle = {}, href) {
|
|
175
|
+
const spans = [];
|
|
176
|
+
for (const child of node.children) {
|
|
177
|
+
if (child.type === "text") {
|
|
178
|
+
if (child.text.length > 0) {
|
|
179
|
+
spans.push(makeSpan(child.text, baseStyle, href));
|
|
180
|
+
}
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
switch (child.tag) {
|
|
184
|
+
case "text:line-break":
|
|
185
|
+
spans.push({ text: "", lineBreak: true });
|
|
186
|
+
break;
|
|
187
|
+
case "text:tab":
|
|
188
|
+
spans.push(makeSpan("\t", baseStyle, href));
|
|
189
|
+
break;
|
|
190
|
+
case "text:s": {
|
|
191
|
+
// ODF space element — text:c gives the repeat count (default 1)
|
|
192
|
+
const count = parseInt(child.attrs["text:c"] ?? "1", 10);
|
|
193
|
+
spans.push(makeSpan(" ".repeat(count), baseStyle, href));
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
case "text:span": {
|
|
197
|
+
const styleName = child.attrs["text:style-name"];
|
|
198
|
+
const spanStyle = styleName !== undefined ? (charStyles.get(styleName) ?? {}) : {};
|
|
199
|
+
const merged = mergeStyle(baseStyle, spanStyle);
|
|
200
|
+
spans.push(...parseSpans(child, charStyles, merged, href));
|
|
201
|
+
break;
|
|
202
|
+
}
|
|
203
|
+
case "text:a": {
|
|
204
|
+
const childHref = child.attrs["xlink:href"] ?? href;
|
|
205
|
+
spans.push(...parseSpans(child, charStyles, baseStyle, childHref));
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
default:
|
|
209
|
+
// Unknown inline elements: recurse to pick up any text children
|
|
210
|
+
spans.push(...parseSpans(child, charStyles, baseStyle, href));
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return spans;
|
|
215
|
+
}
|
|
216
|
+
// ============================================================
|
|
217
|
+
// Body node parsers
|
|
218
|
+
// ============================================================
|
|
219
|
+
/** Parse a <text:list> element into a ListNode. */
|
|
220
|
+
function parseList(listEl, styles) {
|
|
221
|
+
const styleName = listEl.attrs["text:style-name"] ?? "";
|
|
222
|
+
const ordered = styles.listOrdered.get(styleName) ?? false;
|
|
223
|
+
const items = [];
|
|
224
|
+
for (const child of listEl.children) {
|
|
225
|
+
if (child.type !== "element" || child.tag !== "text:list-item")
|
|
226
|
+
continue;
|
|
227
|
+
let spans = [];
|
|
228
|
+
let nested;
|
|
229
|
+
for (const itemChild of child.children) {
|
|
230
|
+
if (itemChild.type !== "element")
|
|
231
|
+
continue;
|
|
232
|
+
if (itemChild.tag === "text:p" || itemChild.tag === "text:h") {
|
|
233
|
+
spans = spans.concat(parseSpans(itemChild, styles.charStyles));
|
|
234
|
+
}
|
|
235
|
+
else if (itemChild.tag === "text:list") {
|
|
236
|
+
nested = parseList(itemChild, styles);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
const item = { spans };
|
|
240
|
+
if (nested !== undefined)
|
|
241
|
+
item.children = nested;
|
|
242
|
+
items.push(item);
|
|
243
|
+
}
|
|
244
|
+
return { kind: "list", ordered, items };
|
|
245
|
+
}
|
|
246
|
+
/** Parse a <table:table> element into a TableNode. */
|
|
247
|
+
function parseTable(tableEl, styles) {
|
|
248
|
+
const rows = [];
|
|
249
|
+
for (const child of tableEl.children) {
|
|
250
|
+
if (child.type !== "element" || child.tag !== "table:table-row")
|
|
251
|
+
continue;
|
|
252
|
+
const cells = [];
|
|
253
|
+
for (const cellEl of child.children) {
|
|
254
|
+
if (cellEl.type !== "element")
|
|
255
|
+
continue;
|
|
256
|
+
// Skip covered cells — they are placeholders for merged cell spans
|
|
257
|
+
if (cellEl.tag === "table:covered-table-cell")
|
|
258
|
+
continue;
|
|
259
|
+
if (cellEl.tag !== "table:table-cell")
|
|
260
|
+
continue;
|
|
261
|
+
const colSpan = parseInt(cellEl.attrs["table:number-columns-spanned"] ?? "1", 10);
|
|
262
|
+
const rowSpan = parseInt(cellEl.attrs["table:number-rows-spanned"] ?? "1", 10);
|
|
263
|
+
// Collect spans from all <text:p> children (multi-paragraph cells
|
|
264
|
+
// are concatenated for Tier 1 — paragraph breaks within cells are
|
|
265
|
+
// not yet represented in the model)
|
|
266
|
+
let spans = [];
|
|
267
|
+
for (const cellChild of cellEl.children) {
|
|
268
|
+
if (cellChild.type === "element" && cellChild.tag === "text:p") {
|
|
269
|
+
spans = spans.concat(parseSpans(cellChild, styles.charStyles));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
const cell = { spans };
|
|
273
|
+
if (colSpan > 1)
|
|
274
|
+
cell.colSpan = colSpan;
|
|
275
|
+
if (rowSpan > 1)
|
|
276
|
+
cell.rowSpan = rowSpan;
|
|
277
|
+
cells.push(cell);
|
|
278
|
+
}
|
|
279
|
+
rows.push({ cells });
|
|
280
|
+
}
|
|
281
|
+
return { kind: "table", rows };
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Walk the children of an <office:text> (or <text:section>) element and
|
|
285
|
+
* produce an ordered array of BodyNode objects.
|
|
286
|
+
*
|
|
287
|
+
* Handles paragraphs, headings, lists, tables, and sections (which are
|
|
288
|
+
* transparent containers). All other elements are skipped.
|
|
289
|
+
*/
|
|
290
|
+
function parseBodyNodes(bodyTextEl, styles) {
|
|
291
|
+
const nodes = [];
|
|
292
|
+
for (const child of bodyTextEl.children) {
|
|
293
|
+
if (child.type !== "element")
|
|
294
|
+
continue;
|
|
295
|
+
switch (child.tag) {
|
|
296
|
+
case "text:p": {
|
|
297
|
+
const para = {
|
|
298
|
+
kind: "paragraph",
|
|
299
|
+
spans: parseSpans(child, styles.charStyles),
|
|
300
|
+
};
|
|
301
|
+
nodes.push(para);
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
case "text:h": {
|
|
305
|
+
const rawLevel = parseInt(child.attrs["text:outline-level"] ?? "1", 10);
|
|
306
|
+
const level = Math.min(Math.max(rawLevel, 1), 6);
|
|
307
|
+
const heading = {
|
|
308
|
+
kind: "heading",
|
|
309
|
+
level,
|
|
310
|
+
spans: parseSpans(child, styles.charStyles),
|
|
311
|
+
};
|
|
312
|
+
nodes.push(heading);
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
315
|
+
case "text:list":
|
|
316
|
+
nodes.push(parseList(child, styles));
|
|
317
|
+
break;
|
|
318
|
+
case "table:table":
|
|
319
|
+
nodes.push(parseTable(child, styles));
|
|
320
|
+
break;
|
|
321
|
+
case "text:section":
|
|
322
|
+
// Sections are transparent containers — recurse into their content
|
|
323
|
+
nodes.push(...parseBodyNodes(child, styles));
|
|
324
|
+
break;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return nodes;
|
|
328
|
+
}
|
|
329
|
+
// ============================================================
|
|
330
|
+
// Metadata parser (exported for unit testing)
|
|
331
|
+
// ============================================================
|
|
332
|
+
/**
|
|
333
|
+
* Parse a meta.xml string and return the document metadata.
|
|
334
|
+
*
|
|
335
|
+
* Exported so it can be tested in isolation with known XML strings
|
|
336
|
+
* without needing a full .odt ZIP file.
|
|
337
|
+
*
|
|
338
|
+
* @param metaXml - Content of meta.xml as a string.
|
|
339
|
+
* @returns Populated OdtMetadata object. Missing fields are undefined.
|
|
340
|
+
*/
|
|
341
|
+
export function parseMetaXml(metaXml) {
|
|
342
|
+
const root = parseXml(metaXml);
|
|
343
|
+
const metaEl = findElement(root, "office:meta");
|
|
344
|
+
if (!metaEl)
|
|
345
|
+
return {};
|
|
346
|
+
const metadata = {};
|
|
347
|
+
const titleEl = findElement(metaEl, "dc:title");
|
|
348
|
+
if (titleEl)
|
|
349
|
+
metadata.title = textContent(titleEl);
|
|
350
|
+
const creatorEl = findElement(metaEl, "dc:creator");
|
|
351
|
+
if (creatorEl)
|
|
352
|
+
metadata.creator = textContent(creatorEl);
|
|
353
|
+
const descEl = findElement(metaEl, "dc:description");
|
|
354
|
+
if (descEl)
|
|
355
|
+
metadata.description = textContent(descEl);
|
|
356
|
+
const creationEl = findElement(metaEl, "meta:creation-date");
|
|
357
|
+
if (creationEl)
|
|
358
|
+
metadata.creationDate = textContent(creationEl);
|
|
359
|
+
const modEl = findElement(metaEl, "dc:date");
|
|
360
|
+
if (modEl)
|
|
361
|
+
metadata.modificationDate = textContent(modEl);
|
|
362
|
+
return metadata;
|
|
363
|
+
}
|
|
364
|
+
// ============================================================
|
|
365
|
+
// Public API
|
|
366
|
+
// ============================================================
|
|
367
|
+
/**
|
|
368
|
+
* Parse an .odt file and return a structured document model.
|
|
369
|
+
*
|
|
370
|
+
* Reads content.xml for the document body and automatic styles, and
|
|
371
|
+
* meta.xml for document metadata. Both files are always present in
|
|
372
|
+
* spec-compliant .odt files.
|
|
373
|
+
*
|
|
374
|
+
* @param bytes - The raw .odt file as a Uint8Array.
|
|
375
|
+
* @returns A populated OdtDocumentModel with body, metadata, and toHtml().
|
|
376
|
+
* @throws Error if the input is not a valid ZIP or is missing content.xml.
|
|
377
|
+
*
|
|
378
|
+
* @example
|
|
379
|
+
* ```typescript
|
|
380
|
+
* import { readOdt } from "odf-kit/reader";
|
|
381
|
+
* import { readFileSync } from "node:fs";
|
|
382
|
+
*
|
|
383
|
+
* const bytes = new Uint8Array(readFileSync("document.odt"));
|
|
384
|
+
* const doc = readOdt(bytes);
|
|
385
|
+
* console.log(doc.body.length, "body nodes");
|
|
386
|
+
* ```
|
|
387
|
+
*/
|
|
388
|
+
export function readOdt(bytes) {
|
|
389
|
+
const zip = unzipSync(bytes);
|
|
390
|
+
const contentXmlBytes = zip["content.xml"];
|
|
391
|
+
if (!contentXmlBytes)
|
|
392
|
+
throw new Error("readOdt: content.xml not found in ODT file");
|
|
393
|
+
const contentXml = strFromU8(contentXmlBytes);
|
|
394
|
+
const metaXmlBytes = zip["meta.xml"];
|
|
395
|
+
const metadata = metaXmlBytes ? parseMetaXml(strFromU8(metaXmlBytes)) : {};
|
|
396
|
+
const contentRoot = parseXml(contentXml);
|
|
397
|
+
const styles = buildStyleMaps(contentRoot);
|
|
398
|
+
const bodyEl = findElement(contentRoot, "office:body");
|
|
399
|
+
const bodyTextEl = bodyEl ? findElement(bodyEl, "office:text") : undefined;
|
|
400
|
+
const body = bodyTextEl ? parseBodyNodes(bodyTextEl, styles) : [];
|
|
401
|
+
return {
|
|
402
|
+
metadata,
|
|
403
|
+
body,
|
|
404
|
+
toHtml(options) {
|
|
405
|
+
return renderHtml(body, options);
|
|
406
|
+
},
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../../src/reader/parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAE3C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAgBhD,+DAA+D;AAC/D,kCAAkC;AAClC,+DAA+D;AAE/D,8EAA8E;AAC9E,SAAS,WAAW,CAAC,IAAoB,EAAE,GAAW;IACpD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,KAAK,GAAG;YAAE,OAAO,KAAK,CAAC;IAClE,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,wEAAwE;AACxE,SAAS,WAAW,CAAC,IAAoB;IACvC,OAAO,IAAI,CAAC,QAAQ;SACjB,MAAM,CAAC,CAAC,CAAC,EAA2C,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC;SACzE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,IAAI,CAAC,EAAE,CAAC,CAAC;AACd,CAAC;AAmBD;;;;;;;GAOG;AACH,SAAS,UAAU,CAAC,IAAe,EAAE,QAAmB;IACtD,MAAM,MAAM,GAAc,EAAE,CAAC;IAC7B,IAAI,IAAI,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IACnD,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC;IACzD,IAAI,IAAI,CAAC,SAAS,IAAI,QAAQ,CAAC,SAAS;QAAE,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;IAClE,IAAI,IAAI,CAAC,aAAa,IAAI,QAAQ,CAAC,aAAa;QAAE,MAAM,CAAC,aAAa,GAAG,IAAI,CAAC;IAC9E,IAAI,IAAI,CAAC,WAAW,IAAI,QAAQ,CAAC,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC;IACxE,IAAI,IAAI,CAAC,SAAS,IAAI,QAAQ,CAAC,SAAS;QAAE,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;IAClE,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,qFAAqF;AACrF,SAAS,QAAQ,CAAC,IAAY,EAAE,KAAgB,EAAE,IAAa;IAC7D,MAAM,IAAI,GAAa,EAAE,IAAI,EAAE,CAAC;IAChC,IAAI,KAAK,CAAC,IAAI;QAAE,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACjC,IAAI,KAAK,CAAC,MAAM;QAAE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrC,IAAI,KAAK,CAAC,SAAS;QAAE,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;IAC3C,IAAI,KAAK,CAAC,aAAa;QAAE,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IACnD,IAAI,KAAK,CAAC,WAAW;QAAE,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC/C,IAAI,KAAK,CAAC,SAAS;QAAE,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;IAC3C,IAAI,IAAI,KAAK,SAAS;QAAE,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACzC,OAAO,IAAI,CAAC;AACd,CAAC;AAiBD;;;GAGG;AACH,SAAS,iBAAiB,CACxB,SAAyB,EACzB,UAAkC,EAClC,WAAiC;IAEjC,KAAK,MAAM,KAAK,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;QACvC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS;YAAE,SAAS;QAEvC,IAAI,KAAK,CAAC,GAAG,KAAK,aAAa,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;YACvC,IAAI,CAAC,IAAI;gBAAE,SAAS;YAEpB,MAAM,WAAW,GAAG,WAAW,CAAC,KAAK,EAAE,uBAAuB,CAAC,CAAC;YAChE,IAAI,CAAC,WAAW;gBAAE,SAAS;YAE3B,MAAM,KAAK,GAAc,EAAE,CAAC;YAC5B,MAAM,CAAC,GAAG,WAAW,CAAC,KAAK,CAAC;YAE5B,IAAI,CAAC,CAAC,gBAAgB,CAAC,KAAK,MAAM;gBAAE,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC;YACtD,IAAI,CAAC,CAAC,eAAe,CAAC,KAAK,QAAQ;gBAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC;YAEzD,MAAM,cAAc,GAAG,CAAC,CAAC,4BAA4B,CAAC,CAAC;YACvD,IAAI,cAAc,KAAK,SAAS,IAAI,cAAc,KAAK,MAAM;gBAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC;YAEtF,MAAM,WAAW,GAAG,CAAC,CAAC,+BAA+B,CAAC,CAAC;YACvD,IAAI,WAAW,KAAK,SAAS,IAAI,WAAW,KAAK,MAAM;gBAAE,KAAK,CAAC,aAAa,GAAG,IAAI,CAAC;YAEpF,MAAM,YAAY,GAAG,CAAC,CAAC,qBAAqB,CAAC,CAAC;YAC9C,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;gBAC/B,IAAI,YAAY,CAAC,UAAU,CAAC,OAAO,CAAC;oBAAE,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC;gBAC/D,IAAI,YAAY,CAAC,UAAU,CAAC,KAAK,CAAC;oBAAE,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC;YAC7D,CAAC;YAED,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,IAAI,KAAK,CAAC,GAAG,KAAK,iBAAiB,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;YACvC,IAAI,CAAC,IAAI;gBAAE,SAAS;YAEpB,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACxC,IAAI,UAAU,CAAC,IAAI,KAAK,SAAS;oBAAE,SAAS;gBAC5C,IAAI,UAAU,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,GAAG;oBAAE,SAAS;gBACrD,IAAI,UAAU,CAAC,GAAG,KAAK,8BAA8B,EAAE,CAAC;oBACtD,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;gBAC9B,CAAC;qBAAM,IAAI,UAAU,CAAC,GAAG,KAAK,8BAA8B,EAAE,CAAC;oBAC7D,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,cAAc,CAAC,WAA2B;IACjD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAqB,CAAC;IAChD,MAAM,WAAW,GAAG,IAAI,GAAG,EAAmB,CAAC;IAE/C,MAAM,YAAY,GAAG,WAAW,CAAC,WAAW,EAAE,yBAAyB,CAAC,CAAC;IACzE,IAAI,YAAY;QAAE,iBAAiB,CAAC,YAAY,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;IAE3E,MAAM,aAAa,GAAG,WAAW,CAAC,WAAW,EAAE,eAAe,CAAC,CAAC;IAChE,IAAI,aAAa;QAAE,iBAAiB,CAAC,aAAa,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;IAE7E,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,CAAC;AACrC,CAAC;AAED,+DAA+D;AAC/D,sBAAsB;AACtB,+DAA+D;AAE/D;;;;;;;;;;;GAWG;AACH,SAAS,UAAU,CACjB,IAAoB,EACpB,UAAkC,EAClC,YAAuB,EAAE,EACzB,IAAa;IAEb,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClC,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;YACpD,CAAC;YACD,SAAS;QACX,CAAC;QAED,QAAQ,KAAK,CAAC,GAAG,EAAE,CAAC;YAClB,KAAK,iBAAiB;gBACpB,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC1C,MAAM;YAER,KAAK,UAAU;gBACb,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC5C,MAAM;YAER,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,gEAAgE;gBAChE,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBACzD,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;gBACzD,MAAM;YACR,CAAC;YAED,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;gBACjD,MAAM,SAAS,GAAG,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACnF,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;gBAChD,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC3D,MAAM;YACR,CAAC;YAED,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,IAAI,CAAC;gBACpD,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC;gBACnE,MAAM;YACR,CAAC;YAED;gBACE,gEAAgE;gBAChE,KAAK,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC9D,MAAM;QACV,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,+DAA+D;AAC/D,oBAAoB;AACpB,+DAA+D;AAE/D,mDAAmD;AACnD,SAAS,SAAS,CAAC,MAAsB,EAAE,MAAiB;IAC1D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC;IACxD,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC;IAE3D,MAAM,KAAK,GAAmB,EAAE,CAAC;IAEjC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,KAAK,gBAAgB;YAAE,SAAS;QAEzE,IAAI,KAAK,GAAe,EAAE,CAAC;QAC3B,IAAI,MAA4B,CAAC;QAEjC,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACvC,IAAI,SAAS,CAAC,IAAI,KAAK,SAAS;gBAAE,SAAS;YAC3C,IAAI,SAAS,CAAC,GAAG,KAAK,QAAQ,IAAI,SAAS,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;gBAC7D,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;YACjE,CAAC;iBAAM,IAAI,SAAS,CAAC,GAAG,KAAK,WAAW,EAAE,CAAC;gBACzC,MAAM,GAAG,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACxC,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAiB,EAAE,KAAK,EAAE,CAAC;QACrC,IAAI,MAAM,KAAK,SAAS;YAAE,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC;QACjD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACnB,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC;AAED,sDAAsD;AACtD,SAAS,UAAU,CAAC,OAAuB,EAAE,MAAiB;IAC5D,MAAM,IAAI,GAAmB,EAAE,CAAC;IAEhC,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,KAAK,iBAAiB;YAAE,SAAS;QAE1E,MAAM,KAAK,GAAoB,EAAE,CAAC;QAElC,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACpC,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS;gBAAE,SAAS;YACxC,mEAAmE;YACnE,IAAI,MAAM,CAAC,GAAG,KAAK,0BAA0B;gBAAE,SAAS;YACxD,IAAI,MAAM,CAAC,GAAG,KAAK,kBAAkB;gBAAE,SAAS;YAEhD,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YAE/E,kEAAkE;YAClE,kEAAkE;YAClE,oCAAoC;YACpC,IAAI,KAAK,GAAe,EAAE,CAAC;YAC3B,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACxC,IAAI,SAAS,CAAC,IAAI,KAAK,SAAS,IAAI,SAAS,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;oBAC/D,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YAED,MAAM,IAAI,GAAkB,EAAE,KAAK,EAAE,CAAC;YACtC,IAAI,OAAO,GAAG,CAAC;gBAAE,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;YACxC,IAAI,OAAO,GAAG,CAAC;gBAAE,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AACjC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,UAA0B,EAAE,MAAiB;IACnE,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,KAAK,MAAM,KAAK,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;QACxC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS;YAAE,SAAS;QAEvC,QAAQ,KAAK,CAAC,GAAG,EAAE,CAAC;YAClB,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,IAAI,GAAkB;oBAC1B,IAAI,EAAE,WAAW;oBACjB,KAAK,EAAE,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC;iBAC5C,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjB,MAAM;YACR,CAAC;YAED,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,oBAAoB,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;gBACxE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,EAAE,CAAC,CAA0B,CAAC;gBAC1E,MAAM,OAAO,GAAgB;oBAC3B,IAAI,EAAE,SAAS;oBACf,KAAK;oBACL,KAAK,EAAE,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC;iBAC5C,CAAC;gBACF,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACpB,MAAM;YACR,CAAC;YAED,KAAK,WAAW;gBACd,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBACrC,MAAM;YAER,KAAK,aAAa;gBAChB,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBACtC,MAAM;YAER,KAAK,cAAc;gBACjB,mEAAmE;gBACnE,KAAK,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;gBAC7C,MAAM;QACV,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,+DAA+D;AAC/D,8CAA8C;AAC9C,+DAA+D;AAE/D;;;;;;;;GAQG;AACH,MAAM,UAAU,YAAY,CAAC,OAAe;IAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,QAAQ,GAAgB,EAAE,CAAC;IAEjC,MAAM,OAAO,GAAG,WAAW,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,QAAQ,CAAC,KAAK,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAEnD,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IACpD,IAAI,SAAS;QAAE,QAAQ,CAAC,OAAO,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IAEzD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IACrD,IAAI,MAAM;QAAE,QAAQ,CAAC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;IAEvD,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,EAAE,oBAAoB,CAAC,CAAC;IAC7D,IAAI,UAAU;QAAE,QAAQ,CAAC,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC;IAEhE,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC7C,IAAI,KAAK;QAAE,QAAQ,CAAC,gBAAgB,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IAE1D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,+DAA+D;AAC/D,aAAa;AACb,+DAA+D;AAE/D;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,UAAU,OAAO,CAAC,KAAiB;IACvC,MAAM,GAAG,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;IAE7B,MAAM,eAAe,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC;IAC3C,IAAI,CAAC,eAAe;QAAE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IACpF,MAAM,UAAU,GAAG,SAAS,CAAC,eAAe,CAAC,CAAC;IAE9C,MAAM,YAAY,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,QAAQ,GAAgB,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAExF,MAAM,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;IAE3C,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;IACvD,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAC3E,MAAM,IAAI,GAAe,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAE9E,OAAO;QACL,QAAQ;QACR,IAAI;QACJ,MAAM,CAAC,OAAqB;YAC1B,OAAO,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACnC,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document model types for the odf-kit ODT reader.
|
|
3
|
+
*
|
|
4
|
+
* These interfaces describe the intermediate representation produced by
|
|
5
|
+
* readOdt() — a typed, traversable document model that maps ODF structure
|
|
6
|
+
* to familiar concepts without exposing any ODF XML details.
|
|
7
|
+
*
|
|
8
|
+
* The model is intentionally simple for Tier 1: paragraphs, headings,
|
|
9
|
+
* lists, and tables, each carrying one or more TextSpan objects for
|
|
10
|
+
* inline content. Tier 2 will extend the model with styled output
|
|
11
|
+
* (fonts, colors, margins) and embedded images.
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* A single run of inline content with optional character formatting.
|
|
15
|
+
*
|
|
16
|
+
* A paragraph or heading is made up of one or more TextSpan objects.
|
|
17
|
+
* Adjacent runs with different formatting are kept separate. A span
|
|
18
|
+
* with lineBreak set to true represents a <text:line-break/> element
|
|
19
|
+
* and carries an empty text string.
|
|
20
|
+
*/
|
|
21
|
+
export interface TextSpan {
|
|
22
|
+
/** The text content of this run. Empty string when lineBreak is true. */
|
|
23
|
+
text: string;
|
|
24
|
+
/** When true, this run represents a hard line break (<br> in HTML). */
|
|
25
|
+
lineBreak?: true;
|
|
26
|
+
bold?: boolean;
|
|
27
|
+
italic?: boolean;
|
|
28
|
+
underline?: boolean;
|
|
29
|
+
strikethrough?: boolean;
|
|
30
|
+
superscript?: boolean;
|
|
31
|
+
subscript?: boolean;
|
|
32
|
+
/** The href value when this run is part of a hyperlink. */
|
|
33
|
+
href?: string;
|
|
34
|
+
}
|
|
35
|
+
/** A paragraph in the document body. */
|
|
36
|
+
export interface ParagraphNode {
|
|
37
|
+
kind: "paragraph";
|
|
38
|
+
spans: TextSpan[];
|
|
39
|
+
}
|
|
40
|
+
/** A heading in the document body at the given outline level. */
|
|
41
|
+
export interface HeadingNode {
|
|
42
|
+
kind: "heading";
|
|
43
|
+
level: 1 | 2 | 3 | 4 | 5 | 6;
|
|
44
|
+
spans: TextSpan[];
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* A list item. May contain a nested child list for multi-level lists.
|
|
48
|
+
*/
|
|
49
|
+
export interface ListItemNode {
|
|
50
|
+
spans: TextSpan[];
|
|
51
|
+
children?: ListNode;
|
|
52
|
+
}
|
|
53
|
+
/** An ordered or unordered list. */
|
|
54
|
+
export interface ListNode {
|
|
55
|
+
kind: "list";
|
|
56
|
+
ordered: boolean;
|
|
57
|
+
items: ListItemNode[];
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* A single table cell. colSpan and rowSpan are only present when
|
|
61
|
+
* the cell spans more than one column or row.
|
|
62
|
+
*/
|
|
63
|
+
export interface TableCellNode {
|
|
64
|
+
spans: TextSpan[];
|
|
65
|
+
colSpan?: number;
|
|
66
|
+
rowSpan?: number;
|
|
67
|
+
}
|
|
68
|
+
/** A table row. */
|
|
69
|
+
export interface TableRowNode {
|
|
70
|
+
cells: TableCellNode[];
|
|
71
|
+
}
|
|
72
|
+
/** A table. */
|
|
73
|
+
export interface TableNode {
|
|
74
|
+
kind: "table";
|
|
75
|
+
rows: TableRowNode[];
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Discriminated union of all node types that can appear in the
|
|
79
|
+
* document body. Use the kind property to narrow to a specific type.
|
|
80
|
+
*/
|
|
81
|
+
export type BodyNode = ParagraphNode | HeadingNode | ListNode | TableNode;
|
|
82
|
+
/** Document metadata extracted from meta.xml. */
|
|
83
|
+
export interface OdtMetadata {
|
|
84
|
+
title?: string;
|
|
85
|
+
creator?: string;
|
|
86
|
+
description?: string;
|
|
87
|
+
/** ISO 8601 date string from meta:creation-date. */
|
|
88
|
+
creationDate?: string;
|
|
89
|
+
/** ISO 8601 date string from dc:date (last modified). */
|
|
90
|
+
modificationDate?: string;
|
|
91
|
+
}
|
|
92
|
+
/** Options for HTML conversion. */
|
|
93
|
+
export interface HtmlOptions {
|
|
94
|
+
/**
|
|
95
|
+
* When true, omit the <!DOCTYPE html><html><body> wrapper and return
|
|
96
|
+
* only the inner HTML fragment. Useful for embedding in an existing page.
|
|
97
|
+
* Default: false.
|
|
98
|
+
*/
|
|
99
|
+
fragment?: boolean;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* The parsed ODT document returned by readOdt().
|
|
103
|
+
*
|
|
104
|
+
* Provides typed access to the document body and metadata, plus a
|
|
105
|
+
* convenience method for HTML conversion.
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
* ```typescript
|
|
109
|
+
* import { readOdt } from "odf-kit/reader";
|
|
110
|
+
* import { readFileSync } from "node:fs";
|
|
111
|
+
*
|
|
112
|
+
* const bytes = new Uint8Array(readFileSync("document.odt"));
|
|
113
|
+
* const doc = readOdt(bytes);
|
|
114
|
+
* console.log(doc.metadata.title);
|
|
115
|
+
* const html = doc.toHtml({ fragment: true });
|
|
116
|
+
* ```
|
|
117
|
+
*/
|
|
118
|
+
export interface OdtDocumentModel {
|
|
119
|
+
/** Document metadata from meta.xml. */
|
|
120
|
+
readonly metadata: OdtMetadata;
|
|
121
|
+
/**
|
|
122
|
+
* Ordered list of body nodes: paragraphs, headings, lists, and tables
|
|
123
|
+
* in document order.
|
|
124
|
+
*/
|
|
125
|
+
readonly body: BodyNode[];
|
|
126
|
+
/**
|
|
127
|
+
* Convert the document to an HTML string.
|
|
128
|
+
*
|
|
129
|
+
* @param options - HTML output options.
|
|
130
|
+
* @returns HTML string representation of the document.
|
|
131
|
+
*
|
|
132
|
+
* @example
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const html = doc.toHtml({ fragment: true });
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
toHtml(options?: HtmlOptions): string;
|
|
138
|
+
}
|
|
139
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/reader/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH;;;;;;;GAOG;AACH,MAAM,WAAW,QAAQ;IACvB,yEAAyE;IACzE,IAAI,EAAE,MAAM,CAAC;IACb,uEAAuE;IACvE,SAAS,CAAC,EAAE,IAAI,CAAC;IACjB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,2DAA2D;IAC3D,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,wCAAwC;AACxC,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,WAAW,CAAC;IAClB,KAAK,EAAE,QAAQ,EAAE,CAAC;CACnB;AAED,iEAAiE;AACjE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,KAAK,EAAE,QAAQ,EAAE,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB;AAED,oCAAoC;AACpC,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,YAAY,EAAE,CAAC;CACvB;AAED;;;GAGG;AACH,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,mBAAmB;AACnB,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,aAAa,EAAE,CAAC;CACxB;AAED,eAAe;AACf,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,YAAY,EAAE,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,MAAM,QAAQ,GAAG,aAAa,GAAG,WAAW,GAAG,QAAQ,GAAG,SAAS,CAAC;AAE1E,iDAAiD;AACjD,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oDAAoD;IACpD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,yDAAyD;IACzD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,mCAAmC;AACnC,MAAM,WAAW,WAAW;IAC1B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,WAAW,gBAAgB;IAC/B,uCAAuC;IACvC,QAAQ,CAAC,QAAQ,EAAE,WAAW,CAAC;IAC/B;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC1B;;;;;;;;;;OAUG;IACH,MAAM,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,MAAM,CAAC;CACvC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document model types for the odf-kit ODT reader.
|
|
3
|
+
*
|
|
4
|
+
* These interfaces describe the intermediate representation produced by
|
|
5
|
+
* readOdt() — a typed, traversable document model that maps ODF structure
|
|
6
|
+
* to familiar concepts without exposing any ODF XML details.
|
|
7
|
+
*
|
|
8
|
+
* The model is intentionally simple for Tier 1: paragraphs, headings,
|
|
9
|
+
* lists, and tables, each carrying one or more TextSpan objects for
|
|
10
|
+
* inline content. Tier 2 will extend the model with styled output
|
|
11
|
+
* (fonts, colors, margins) and embedded images.
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
|
14
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/reader/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal XML parser for ODF content.xml and meta.xml.
|
|
3
|
+
*
|
|
4
|
+
* ODF files always contain well-formed XML, which allows a straightforward
|
|
5
|
+
* single-pass parser rather than a full spec-compliant implementation.
|
|
6
|
+
* The parser handles all constructs present in ODF XML: elements, text
|
|
7
|
+
* nodes, attributes, self-closing tags, XML declarations, and comments.
|
|
8
|
+
*
|
|
9
|
+
* The output is a lightweight element tree: each node is either an
|
|
10
|
+
* XmlElementNode (tag, attributes, children) or an XmlTextNode (text
|
|
11
|
+
* content). XML entities in both text nodes and attribute values are
|
|
12
|
+
* decoded to their character equivalents so callers work with plain
|
|
13
|
+
* strings throughout.
|
|
14
|
+
*
|
|
15
|
+
* Exported for unit testing and for use by the ODT parser.
|
|
16
|
+
*/
|
|
17
|
+
/** An element node in the XML tree. */
|
|
18
|
+
export interface XmlElementNode {
|
|
19
|
+
type: "element";
|
|
20
|
+
/** The fully-qualified tag name including namespace prefix, e.g. "text:p". */
|
|
21
|
+
tag: string;
|
|
22
|
+
/** Attribute map. Keys include namespace prefix, e.g. "text:style-name". */
|
|
23
|
+
attrs: Record<string, string>;
|
|
24
|
+
children: XmlNode[];
|
|
25
|
+
}
|
|
26
|
+
/** A text node in the XML tree. XML entities are decoded. */
|
|
27
|
+
export interface XmlTextNode {
|
|
28
|
+
type: "text";
|
|
29
|
+
text: string;
|
|
30
|
+
}
|
|
31
|
+
/** A node in the XML tree. */
|
|
32
|
+
export type XmlNode = XmlElementNode | XmlTextNode;
|
|
33
|
+
/**
|
|
34
|
+
* Parse an ODF XML string into an element tree.
|
|
35
|
+
*
|
|
36
|
+
* Returns the root element of the document. Skips XML declarations
|
|
37
|
+
* (<?xml ...?>), processing instructions, and comments. Assumes
|
|
38
|
+
* well-formed XML — ODF files produced by conformant writers always are.
|
|
39
|
+
*
|
|
40
|
+
* @param xml - XML string, optionally starting with a UTF-8 BOM.
|
|
41
|
+
* @returns The root XmlElementNode.
|
|
42
|
+
* @throws Error if the input contains no root element.
|
|
43
|
+
*/
|
|
44
|
+
export declare function parseXml(xml: string): XmlElementNode;
|
|
45
|
+
//# sourceMappingURL=xml-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"xml-parser.d.ts","sourceRoot":"","sources":["../../src/reader/xml-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,uCAAuC;AACvC,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,SAAS,CAAC;IAChB,8EAA8E;IAC9E,GAAG,EAAE,MAAM,CAAC;IACZ,4EAA4E;IAC5E,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC9B,QAAQ,EAAE,OAAO,EAAE,CAAC;CACrB;AAED,6DAA6D;AAC7D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED,8BAA8B;AAC9B,MAAM,MAAM,OAAO,GAAG,cAAc,GAAG,WAAW,CAAC;AAoCnD;;;;;;;;;;GAUG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,CA6FpD"}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal XML parser for ODF content.xml and meta.xml.
|
|
3
|
+
*
|
|
4
|
+
* ODF files always contain well-formed XML, which allows a straightforward
|
|
5
|
+
* single-pass parser rather than a full spec-compliant implementation.
|
|
6
|
+
* The parser handles all constructs present in ODF XML: elements, text
|
|
7
|
+
* nodes, attributes, self-closing tags, XML declarations, and comments.
|
|
8
|
+
*
|
|
9
|
+
* The output is a lightweight element tree: each node is either an
|
|
10
|
+
* XmlElementNode (tag, attributes, children) or an XmlTextNode (text
|
|
11
|
+
* content). XML entities in both text nodes and attribute values are
|
|
12
|
+
* decoded to their character equivalents so callers work with plain
|
|
13
|
+
* strings throughout.
|
|
14
|
+
*
|
|
15
|
+
* Exported for unit testing and for use by the ODT parser.
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* Decode the five standard XML predefined entities in a string.
|
|
19
|
+
*
|
|
20
|
+
* Applied to both text node content and attribute values so that callers
|
|
21
|
+
* always receive plain character strings, never entity references.
|
|
22
|
+
*/
|
|
23
|
+
function decodeEntities(raw) {
|
|
24
|
+
return raw
|
|
25
|
+
.replace(/&/g, "&")
|
|
26
|
+
.replace(/</g, "<")
|
|
27
|
+
.replace(/>/g, ">")
|
|
28
|
+
.replace(/"/g, '"')
|
|
29
|
+
.replace(/'/g, "'");
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Parse attribute key="value" pairs from the inner content of an open tag.
|
|
33
|
+
*
|
|
34
|
+
* Handles namespace-prefixed names such as text:style-name and
|
|
35
|
+
* xlink:href. Attribute values are entity-decoded.
|
|
36
|
+
*
|
|
37
|
+
* @param raw - The portion of the tag string after the tag name.
|
|
38
|
+
* @returns Map of attribute name to decoded value.
|
|
39
|
+
*/
|
|
40
|
+
function parseAttributes(raw) {
|
|
41
|
+
const attrs = {};
|
|
42
|
+
const re = /([a-zA-Z_:][a-zA-Z0-9_:.-]*)="([^"]*)"/g;
|
|
43
|
+
let m;
|
|
44
|
+
while ((m = re.exec(raw)) !== null) {
|
|
45
|
+
attrs[m[1]] = decodeEntities(m[2]);
|
|
46
|
+
}
|
|
47
|
+
return attrs;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Parse an ODF XML string into an element tree.
|
|
51
|
+
*
|
|
52
|
+
* Returns the root element of the document. Skips XML declarations
|
|
53
|
+
* (<?xml ...?>), processing instructions, and comments. Assumes
|
|
54
|
+
* well-formed XML — ODF files produced by conformant writers always are.
|
|
55
|
+
*
|
|
56
|
+
* @param xml - XML string, optionally starting with a UTF-8 BOM.
|
|
57
|
+
* @returns The root XmlElementNode.
|
|
58
|
+
* @throws Error if the input contains no root element.
|
|
59
|
+
*/
|
|
60
|
+
export function parseXml(xml) {
|
|
61
|
+
// Strip UTF-8 BOM if present
|
|
62
|
+
const src = xml.startsWith("\uFEFF") ? xml.slice(1) : xml;
|
|
63
|
+
const stack = [];
|
|
64
|
+
let root;
|
|
65
|
+
let i = 0;
|
|
66
|
+
while (i < src.length) {
|
|
67
|
+
if (src[i] !== "<") {
|
|
68
|
+
// Text node
|
|
69
|
+
const end = src.indexOf("<", i);
|
|
70
|
+
const raw = end === -1 ? src.slice(i) : src.slice(i, end);
|
|
71
|
+
i = end === -1 ? src.length : end;
|
|
72
|
+
if (raw.length > 0 && stack.length > 0) {
|
|
73
|
+
stack[stack.length - 1].children.push({ type: "text", text: decodeEntities(raw) });
|
|
74
|
+
}
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
// XML comment: <!-- ... -->
|
|
78
|
+
if (src.startsWith("<!--", i)) {
|
|
79
|
+
const end = src.indexOf("-->", i);
|
|
80
|
+
i = end === -1 ? src.length : end + 3;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
// CDATA section: <![CDATA[ ... ]]>
|
|
84
|
+
if (src.startsWith("<![CDATA[", i)) {
|
|
85
|
+
const end = src.indexOf("]]>", i);
|
|
86
|
+
if (end !== -1) {
|
|
87
|
+
const text = src.slice(i + 9, end);
|
|
88
|
+
if (text.length > 0 && stack.length > 0) {
|
|
89
|
+
stack[stack.length - 1].children.push({ type: "text", text });
|
|
90
|
+
}
|
|
91
|
+
i = end + 3;
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
i = src.length;
|
|
95
|
+
}
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
// All other tags: find the closing >
|
|
99
|
+
const end = src.indexOf(">", i);
|
|
100
|
+
if (end === -1)
|
|
101
|
+
break; // malformed — stop
|
|
102
|
+
const inner = src.slice(i + 1, end);
|
|
103
|
+
i = end + 1;
|
|
104
|
+
// XML declaration or processing instruction: <?...?>
|
|
105
|
+
if (inner.startsWith("?"))
|
|
106
|
+
continue;
|
|
107
|
+
// DOCTYPE declaration
|
|
108
|
+
if (inner.startsWith("!"))
|
|
109
|
+
continue;
|
|
110
|
+
// Close tag: </tag>
|
|
111
|
+
if (inner.startsWith("/")) {
|
|
112
|
+
stack.pop();
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
// Self-closing tag: <tag attrs/>
|
|
116
|
+
if (inner.endsWith("/")) {
|
|
117
|
+
const body = inner.slice(0, -1).trimEnd();
|
|
118
|
+
const space = body.search(/\s/);
|
|
119
|
+
const tag = space === -1 ? body : body.slice(0, space);
|
|
120
|
+
const attrs = space === -1 ? {} : parseAttributes(body.slice(space + 1));
|
|
121
|
+
const node = { type: "element", tag, attrs, children: [] };
|
|
122
|
+
if (stack.length > 0) {
|
|
123
|
+
stack[stack.length - 1].children.push(node);
|
|
124
|
+
}
|
|
125
|
+
else if (!root) {
|
|
126
|
+
root = node;
|
|
127
|
+
}
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
// Open tag: <tag attrs>
|
|
131
|
+
const space = inner.search(/\s/);
|
|
132
|
+
const tag = space === -1 ? inner : inner.slice(0, space);
|
|
133
|
+
const attrs = space === -1 ? {} : parseAttributes(inner.slice(space + 1));
|
|
134
|
+
const node = { type: "element", tag, attrs, children: [] };
|
|
135
|
+
if (stack.length > 0) {
|
|
136
|
+
stack[stack.length - 1].children.push(node);
|
|
137
|
+
}
|
|
138
|
+
stack.push(node);
|
|
139
|
+
if (!root)
|
|
140
|
+
root = node;
|
|
141
|
+
}
|
|
142
|
+
if (!root)
|
|
143
|
+
throw new Error("parseXml: no root element found");
|
|
144
|
+
return root;
|
|
145
|
+
}
|
|
146
|
+
//# sourceMappingURL=xml-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"xml-parser.js","sourceRoot":"","sources":["../../src/reader/xml-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAqBH;;;;;GAKG;AACH,SAAS,cAAc,CAAC,GAAW;IACjC,OAAO,GAAG;SACP,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,KAAK,GAA2B,EAAE,CAAC;IACzC,MAAM,EAAE,GAAG,yCAAyC,CAAC;IACrD,IAAI,CAAC,CAAC;IACN,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACrC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,6BAA6B;IAC7B,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAE1D,MAAM,KAAK,GAAqB,EAAE,CAAC;IACnC,IAAI,IAAgC,CAAC;IACrC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QACtB,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;YACnB,YAAY;YACZ,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YAChC,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YAC1D,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC;YAClC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACrF,CAAC;YACD,SAAS;QACX,CAAC;QAED,4BAA4B;QAC5B,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAClC,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;YACtC,SAAS;QACX,CAAC;QAED,mCAAmC;QACnC,IAAI,GAAG,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,CAAC;YACnC,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;gBACf,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;gBACnC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACxC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;gBAChE,CAAC;gBACD,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;YACd,CAAC;iBAAM,CAAC;gBACN,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QAED,qCAAqC;QACrC,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QAChC,IAAI,GAAG,KAAK,CAAC,CAAC;YAAE,MAAM,CAAC,mBAAmB;QAE1C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;QACpC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;QAEZ,qDAAqD;QACrD,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,sBAAsB;QACtB,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAEpC,oBAAoB;QACpB,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,KAAK,CAAC,GAAG,EAAE,CAAC;YACZ,SAAS;QACX,CAAC;QAED,iCAAiC;QACjC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAChC,MAAM,GAAG,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YACvD,MAAM,KAAK,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;YACzE,MAAM,IAAI,GAAmB,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;YAC3E,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC9C,CAAC;iBAAM,IAAI,CAAC,IAAI,EAAE,CAAC;gBACjB,IAAI,GAAG,IAAI,CAAC;YACd,CAAC;YACD,SAAS;QACX,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACjC,MAAM,GAAG,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QACzD,MAAM,KAAK,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;QAC1E,MAAM,IAAI,GAAmB,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAE3E,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEjB,IAAI,CAAC,IAAI;YAAE,IAAI,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,IAAI,CAAC,IAAI;QAAE,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;IAC9D,OAAO,IAAI,CAAC;AACd,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,61 +1,65 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "odf-kit",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Create OpenDocument Format files (.odt, .ods, .odp, .odg) in TypeScript/JavaScript. Works in Node.js and browsers.",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "./dist/index.js",
|
|
7
|
-
"types": "./dist/index.d.ts",
|
|
8
|
-
"exports": {
|
|
9
|
-
".": {
|
|
10
|
-
"types": "./dist/index.d.ts",
|
|
11
|
-
"import": "./dist/index.js"
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
"
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
"
|
|
60
|
-
|
|
61
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "odf-kit",
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "Create OpenDocument Format files (.odt, .ods, .odp, .odg) in TypeScript/JavaScript. Works in Node.js and browsers.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
},
|
|
13
|
+
"./reader": {
|
|
14
|
+
"types": "./dist/reader/index.d.ts",
|
|
15
|
+
"import": "./dist/reader/index.js"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"files": [
|
|
19
|
+
"dist",
|
|
20
|
+
"LICENSE",
|
|
21
|
+
"README.md",
|
|
22
|
+
"CHANGELOG.md"
|
|
23
|
+
],
|
|
24
|
+
"scripts": {
|
|
25
|
+
"build": "tsc",
|
|
26
|
+
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
|
|
27
|
+
"lint": "eslint src tests",
|
|
28
|
+
"format": "prettier --write src tests",
|
|
29
|
+
"format:check": "prettier --check src tests",
|
|
30
|
+
"clean": "node -e \"const fs=require('fs');fs.rmSync('dist',{recursive:true,force:true})\"",
|
|
31
|
+
"prepublishOnly": "npm run clean && npm run build"
|
|
32
|
+
},
|
|
33
|
+
"keywords": [
|
|
34
|
+
"odf",
|
|
35
|
+
"opendocument",
|
|
36
|
+
"odt",
|
|
37
|
+
"ods",
|
|
38
|
+
"odp",
|
|
39
|
+
"odg",
|
|
40
|
+
"libreoffice",
|
|
41
|
+
"document",
|
|
42
|
+
"spreadsheet",
|
|
43
|
+
"presentation",
|
|
44
|
+
"browser"
|
|
45
|
+
],
|
|
46
|
+
"author": "GitHubNewbie0",
|
|
47
|
+
"license": "Apache-2.0",
|
|
48
|
+
"repository": {
|
|
49
|
+
"type": "git",
|
|
50
|
+
"url": "git+https://github.com/GitHubNewbie0/odf-kit.git"
|
|
51
|
+
},
|
|
52
|
+
"dependencies": {
|
|
53
|
+
"fflate": "^0.8.2"
|
|
54
|
+
},
|
|
55
|
+
"devDependencies": {
|
|
56
|
+
"@eslint/js": "^9.18.0",
|
|
57
|
+
"@types/jest": "^29.5.14",
|
|
58
|
+
"eslint": "^9.18.0",
|
|
59
|
+
"jest": "^29.7.0",
|
|
60
|
+
"prettier": "^3.4.2",
|
|
61
|
+
"ts-jest": "^29.2.5",
|
|
62
|
+
"typescript": "^5.7.3",
|
|
63
|
+
"typescript-eslint": "^8.21.0"
|
|
64
|
+
}
|
|
65
|
+
}
|