@doxi/docx 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +59 -0
- package/dist/blocks.d.ts +30 -0
- package/dist/blocks.d.ts.map +1 -0
- package/dist/blocks.js +69 -0
- package/dist/blocks.js.map +1 -0
- package/dist/export.d.ts +41 -0
- package/dist/export.d.ts.map +1 -0
- package/dist/export.js +70 -0
- package/dist/export.js.map +1 -0
- package/dist/import.d.ts +35 -0
- package/dist/import.d.ts.map +1 -0
- package/dist/import.js +156 -0
- package/dist/import.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/dist/inline.d.ts +15 -0
- package/dist/inline.d.ts.map +1 -0
- package/dist/inline.js +48 -0
- package/dist/inline.js.map +1 -0
- package/dist/lists.d.ts +17 -0
- package/dist/lists.d.ts.map +1 -0
- package/dist/lists.js +60 -0
- package/dist/lists.js.map +1 -0
- package/dist/parse-blocks.d.ts +33 -0
- package/dist/parse-blocks.d.ts.map +1 -0
- package/dist/parse-blocks.js +145 -0
- package/dist/parse-blocks.js.map +1 -0
- package/dist/parse-numbering.d.ts +24 -0
- package/dist/parse-numbering.d.ts.map +1 -0
- package/dist/parse-numbering.js +90 -0
- package/dist/parse-numbering.js.map +1 -0
- package/dist/parse-runs.d.ts +28 -0
- package/dist/parse-runs.d.ts.map +1 -0
- package/dist/parse-runs.js +230 -0
- package/dist/parse-runs.js.map +1 -0
- package/dist/parse-tables.d.ts +15 -0
- package/dist/parse-tables.d.ts.map +1 -0
- package/dist/parse-tables.js +200 -0
- package/dist/parse-tables.js.map +1 -0
- package/dist/parse-xml.d.ts +26 -0
- package/dist/parse-xml.d.ts.map +1 -0
- package/dist/parse-xml.js +286 -0
- package/dist/parse-xml.js.map +1 -0
- package/dist/parts.d.ts +18 -0
- package/dist/parts.d.ts.map +1 -0
- package/dist/parts.js +102 -0
- package/dist/parts.js.map +1 -0
- package/dist/runs.d.ts +40 -0
- package/dist/runs.d.ts.map +1 -0
- package/dist/runs.js +72 -0
- package/dist/runs.js.map +1 -0
- package/dist/tables.d.ts +18 -0
- package/dist/tables.d.ts.map +1 -0
- package/dist/tables.js +95 -0
- package/dist/tables.js.map +1 -0
- package/dist/unzip.d.ts +16 -0
- package/dist/unzip.d.ts.map +1 -0
- package/dist/unzip.js +108 -0
- package/dist/unzip.js.map +1 -0
- package/dist/warnings.d.ts +18 -0
- package/dist/warnings.d.ts.map +1 -0
- package/dist/warnings.js +12 -0
- package/dist/warnings.js.map +1 -0
- package/dist/xml.d.ts +23 -0
- package/dist/xml.d.ts.map +1 -0
- package/dist/xml.js +58 -0
- package/dist/xml.js.map +1 -0
- package/dist/zip.d.ts +15 -0
- package/dist/zip.d.ts.map +1 -0
- package/dist/zip.js +165 -0
- package/dist/zip.js.map +1 -0
- package/package.json +42 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lists.d.ts","sourceRoot":"","sources":["../src/lists.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,YAAY,CAAA;AACxC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,UAAU,CAAA;AAE1C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AAE1C;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,GAAG,UAAU,EAAE,CAcnF"}
|
package/dist/lists.js
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { renderParagraph, renderHeading } from './blocks.js';
|
|
2
|
+
/**
|
|
3
|
+
* Render a Doxiva `list` node as a flat sequence of `<w:p>` elements.
|
|
4
|
+
*
|
|
5
|
+
* Numbering model:
|
|
6
|
+
* - Each Doxiva `list` node gets its OWN numId so consecutive lists
|
|
7
|
+
* restart numbering. The numId is allocated on first sight via
|
|
8
|
+
* `ctx.numIdCounter` (starts at 3 to avoid the predefined 1/2 in
|
|
9
|
+
* numbering.xml) and recorded on `ctx.numberingPlan` so the numbering
|
|
10
|
+
* part can emit a matching `<w:num>` instance.
|
|
11
|
+
* - Nested lists increase `ilvl` and ALSO get a fresh numId — the
|
|
12
|
+
* numbering.xml abstractNum definitions provide ilvl 0 and 1 levels.
|
|
13
|
+
*/
|
|
14
|
+
export function renderList(list, ctx, ilvl) {
|
|
15
|
+
const ordered = list.attrs.ordered === true;
|
|
16
|
+
let numId = ctx.numIds.get(list);
|
|
17
|
+
if (numId === undefined) {
|
|
18
|
+
numId = ctx.numIdCounter.value++;
|
|
19
|
+
ctx.numIds.set(list, numId);
|
|
20
|
+
ctx.numberingPlan.push({ numId, ordered });
|
|
21
|
+
}
|
|
22
|
+
const out = [];
|
|
23
|
+
for (let i = 0; i < list.content.childCount; i++) {
|
|
24
|
+
const item = list.content.child(i);
|
|
25
|
+
out.push(...renderListItem(item, ctx, ilvl, numId));
|
|
26
|
+
}
|
|
27
|
+
return out;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Render a `list_item` node. The first inner paragraph carries the list
|
|
31
|
+
* marker (`<w:numPr>`); subsequent inner blocks are continuation paragraphs
|
|
32
|
+
* with no marker — matching Word's "multi-paragraph list item" idiom.
|
|
33
|
+
* Nested lists recurse with ilvl + 1.
|
|
34
|
+
*/
|
|
35
|
+
function renderListItem(item, ctx, ilvl, numId) {
|
|
36
|
+
const out = [];
|
|
37
|
+
let assignedMarker = false;
|
|
38
|
+
for (let i = 0; i < item.content.childCount; i++) {
|
|
39
|
+
const child = item.content.child(i);
|
|
40
|
+
const name = child.type.name;
|
|
41
|
+
if (name === 'paragraph') {
|
|
42
|
+
if (!assignedMarker) {
|
|
43
|
+
out.push(renderParagraph(child, ctx, { listNumId: numId, listIlvl: ilvl }));
|
|
44
|
+
assignedMarker = true;
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
out.push(renderParagraph(child, ctx));
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
else if (name === 'list') {
|
|
51
|
+
out.push(...renderList(child, ctx, ilvl + 1));
|
|
52
|
+
}
|
|
53
|
+
else if (name === 'heading') {
|
|
54
|
+
out.push(renderHeading(child, ctx));
|
|
55
|
+
}
|
|
56
|
+
// Other block types inside list items are not in v0.7 scope.
|
|
57
|
+
}
|
|
58
|
+
return out;
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=lists.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lists.js","sourceRoot":"","sources":["../src/lists.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAG5D;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY,EAAE,GAAc,EAAE,IAAY;IACnE,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,KAAK,IAAI,CAAA;IAC3C,IAAI,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IAChC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACxB,KAAK,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,CAAA;QAChC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAA;QAC3B,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAA;IAC5C,CAAC;IACD,MAAM,GAAG,GAAiB,EAAE,CAAA;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACjD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAW,CAAA;QAC5C,GAAG,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAA;IACrD,CAAC;IACD,OAAO,GAAG,CAAA;AACZ,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,IAAY,EAAE,GAAc,EAAE,IAAY,EAAE,KAAa;IAC/E,MAAM,GAAG,GAAiB,EAAE,CAAA;IAC5B,IAAI,cAAc,GAAG,KAAK,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAW,CAAA;QAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAA;QAC5B,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;YACzB,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAA;gBAC3E,cAAc,GAAG,IAAI,CAAA;YACvB,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAA;YACvC,CAAC;QACH,CAAC;aAAM,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YAC3B,GAAG,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,GAAG,CAAC,CAAC,CAAC,CAAA;QAC/C,CAAC;aAAM,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YAC9B,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAA;QACrC,CAAC;QACD,6DAA6D;IAC/D,CAAC;IACD,OAAO,GAAG,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse a `<w:p>` element into one Doxiva block node — paragraph, heading,
|
|
3
|
+
* blockquote, hr, or page_break — by reversing what `blocks.ts` emits.
|
|
4
|
+
*
|
|
5
|
+
* List paragraphs (those with `<w:numPr>`) are returned with a discriminated
|
|
6
|
+
* `kind: 'list-paragraph'` so the body dispatcher in `import.ts` can group
|
|
7
|
+
* consecutive entries into a `list` node. We chose a discriminated union over
|
|
8
|
+
* a side channel because the numId/ilvl tuple needs to flow alongside the
|
|
9
|
+
* paragraph node itself and inventing a flag on the schema node would leak
|
|
10
|
+
* importer state into the model.
|
|
11
|
+
*/
|
|
12
|
+
import type { DxNode, Schema } from '@doxi/core';
|
|
13
|
+
import type { ParsedElement } from './parse-xml.js';
|
|
14
|
+
import type { WarningCollector } from './warnings.js';
|
|
15
|
+
export interface ParseBlockCtx {
|
|
16
|
+
readonly schema: Schema;
|
|
17
|
+
readonly warnings: WarningCollector;
|
|
18
|
+
readonly rels: ReadonlyMap<string, {
|
|
19
|
+
target: string;
|
|
20
|
+
type?: string;
|
|
21
|
+
}>;
|
|
22
|
+
}
|
|
23
|
+
export type ParsedBlock = {
|
|
24
|
+
readonly kind: 'block';
|
|
25
|
+
readonly node: DxNode;
|
|
26
|
+
} | {
|
|
27
|
+
readonly kind: 'list-paragraph';
|
|
28
|
+
readonly numId: number;
|
|
29
|
+
readonly ilvl: number;
|
|
30
|
+
readonly node: DxNode;
|
|
31
|
+
};
|
|
32
|
+
export declare function parseParagraph(el: ParsedElement, ctx: ParseBlockCtx): ParsedBlock;
|
|
33
|
+
//# sourceMappingURL=parse-blocks.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-blocks.d.ts","sourceRoot":"","sources":["../src/parse-blocks.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,YAAY,CAAA;AAChD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAGrD,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAA;IACvB,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,CAAA;IACnC,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC,MAAM,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CACtE;AAED,MAAM,MAAM,WAAW,GACnB;IAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GACjD;IACE,QAAQ,CAAC,IAAI,EAAE,gBAAgB,CAAA;IAC/B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAA;IACtB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;CACtB,CAAA;AAWL,wBAAgB,cAAc,CAAC,EAAE,EAAE,aAAa,EAAE,GAAG,EAAE,aAAa,GAAG,WAAW,CA2CjF"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse a `<w:p>` element into one Doxiva block node — paragraph, heading,
|
|
3
|
+
* blockquote, hr, or page_break — by reversing what `blocks.ts` emits.
|
|
4
|
+
*
|
|
5
|
+
* List paragraphs (those with `<w:numPr>`) are returned with a discriminated
|
|
6
|
+
* `kind: 'list-paragraph'` so the body dispatcher in `import.ts` can group
|
|
7
|
+
* consecutive entries into a `list` node. We chose a discriminated union over
|
|
8
|
+
* a side channel because the numId/ilvl tuple needs to flow alongside the
|
|
9
|
+
* paragraph node itself and inventing a flag on the schema node would leak
|
|
10
|
+
* importer state into the model.
|
|
11
|
+
*/
|
|
12
|
+
import { parseInline } from './parse-runs.js';
|
|
13
|
+
export function parseParagraph(el, ctx) {
|
|
14
|
+
const pPr = findChild(el, 'w:pPr');
|
|
15
|
+
const info = pPr ? readPPr(pPr) : { hrFlag: false };
|
|
16
|
+
const inline = parseInline(el, ctx);
|
|
17
|
+
// Special block shapes, in priority order.
|
|
18
|
+
// 1. hr — pBdr.bottom with no inline content.
|
|
19
|
+
if (info.hrFlag && inline.nodes.length === 0 && !inline.sawPageBreak) {
|
|
20
|
+
return { kind: 'block', node: ctx.schema.node('hr', null) };
|
|
21
|
+
}
|
|
22
|
+
// 2. page_break — only a `<w:br w:type="page"/>` and no other content.
|
|
23
|
+
if (inline.onlyPageBreak) {
|
|
24
|
+
return { kind: 'block', node: ctx.schema.node('page_break', null) };
|
|
25
|
+
}
|
|
26
|
+
// Build the paragraph-shaped attrs for the host block.
|
|
27
|
+
const blockAttrs = {};
|
|
28
|
+
if (info.align !== undefined)
|
|
29
|
+
blockAttrs.align = info.align;
|
|
30
|
+
if (info.lineHeight !== undefined)
|
|
31
|
+
blockAttrs.lineHeight = info.lineHeight;
|
|
32
|
+
// 3. Heading (Heading1..Heading6).
|
|
33
|
+
const headingLevel = info.styleId ? parseHeadingLevel(info.styleId) : null;
|
|
34
|
+
if (headingLevel !== null) {
|
|
35
|
+
const node = ctx.schema.node('heading', { ...blockAttrs, level: headingLevel }, inline.nodes);
|
|
36
|
+
return wrapWithList(node, info);
|
|
37
|
+
}
|
|
38
|
+
// 4. Blockquote — Quote style → blockquote(paragraph(inline)).
|
|
39
|
+
if (info.styleId === 'Quote') {
|
|
40
|
+
const inner = ctx.schema.node('paragraph', blockAttrs, inline.nodes);
|
|
41
|
+
const node = ctx.schema.node('blockquote', null, [inner]);
|
|
42
|
+
return wrapWithList(node, info);
|
|
43
|
+
}
|
|
44
|
+
// 5. Plain paragraph.
|
|
45
|
+
const node = ctx.schema.node('paragraph', blockAttrs, inline.nodes);
|
|
46
|
+
return wrapWithList(node, info);
|
|
47
|
+
}
|
|
48
|
+
function wrapWithList(node, info) {
|
|
49
|
+
if (info.numId !== undefined) {
|
|
50
|
+
return {
|
|
51
|
+
kind: 'list-paragraph',
|
|
52
|
+
numId: info.numId,
|
|
53
|
+
ilvl: info.ilvl ?? 0,
|
|
54
|
+
node,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
return { kind: 'block', node };
|
|
58
|
+
}
|
|
59
|
+
function readPPr(pPr) {
|
|
60
|
+
const info = { hrFlag: false };
|
|
61
|
+
for (const child of pPr.children) {
|
|
62
|
+
if (typeof child === 'string')
|
|
63
|
+
continue;
|
|
64
|
+
switch (child.name) {
|
|
65
|
+
case 'w:pStyle': {
|
|
66
|
+
const v = child.attrs['w:val'];
|
|
67
|
+
if (v)
|
|
68
|
+
info.styleId = v;
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
case 'w:numPr': {
|
|
72
|
+
for (const np of child.children) {
|
|
73
|
+
if (typeof np === 'string')
|
|
74
|
+
continue;
|
|
75
|
+
if (np.name === 'w:numId') {
|
|
76
|
+
const v = np.attrs['w:val'];
|
|
77
|
+
if (v) {
|
|
78
|
+
const n = parseInt(v, 10);
|
|
79
|
+
if (Number.isFinite(n))
|
|
80
|
+
info.numId = n;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
else if (np.name === 'w:ilvl') {
|
|
84
|
+
const v = np.attrs['w:val'];
|
|
85
|
+
if (v) {
|
|
86
|
+
const n = parseInt(v, 10);
|
|
87
|
+
if (Number.isFinite(n))
|
|
88
|
+
info.ilvl = n;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
case 'w:jc': {
|
|
95
|
+
const v = child.attrs['w:val'] ?? '';
|
|
96
|
+
if (v === 'left' || v === 'center' || v === 'right')
|
|
97
|
+
info.align = v;
|
|
98
|
+
else if (v === 'both')
|
|
99
|
+
info.align = 'justify';
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
case 'w:spacing': {
|
|
103
|
+
const lineRule = child.attrs['w:lineRule'] ?? 'auto';
|
|
104
|
+
const line = child.attrs['w:line'];
|
|
105
|
+
if (lineRule === 'auto' && line) {
|
|
106
|
+
const n = parseInt(line, 10);
|
|
107
|
+
if (Number.isFinite(n)) {
|
|
108
|
+
// 240 → 1; 360 → 1.5; 480 → 2. Round to one decimal.
|
|
109
|
+
info.lineHeight = Math.round((n / 240) * 10) / 10;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
case 'w:pBdr': {
|
|
115
|
+
for (const b of child.children) {
|
|
116
|
+
if (typeof b === 'string')
|
|
117
|
+
continue;
|
|
118
|
+
if (b.name === 'w:bottom') {
|
|
119
|
+
info.hrFlag = true;
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
// Other pPr children (outlineLvl, ind, contextualSpacing, etc.) are
|
|
126
|
+
// silently ignored.
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return info;
|
|
130
|
+
}
|
|
131
|
+
function parseHeadingLevel(styleId) {
|
|
132
|
+
// Exporter emits exactly 'Heading1' .. 'Heading6'.
|
|
133
|
+
const m = /^Heading([1-6])$/.exec(styleId);
|
|
134
|
+
return m ? parseInt(m[1], 10) : null;
|
|
135
|
+
}
|
|
136
|
+
function findChild(el, name) {
|
|
137
|
+
for (const c of el.children) {
|
|
138
|
+
if (typeof c === 'string')
|
|
139
|
+
continue;
|
|
140
|
+
if (c.name === name)
|
|
141
|
+
return c;
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=parse-blocks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-blocks.js","sourceRoot":"","sources":["../src/parse-blocks.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAA;AA0B7C,MAAM,UAAU,cAAc,CAAC,EAAiB,EAAE,GAAkB;IAClE,MAAM,GAAG,GAAG,SAAS,CAAC,EAAE,EAAE,OAAO,CAAC,CAAA;IAClC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAA;IACnD,MAAM,MAAM,GAAG,WAAW,CAAC,EAAE,EAAE,GAAG,CAAC,CAAA;IAEnC,2CAA2C;IAE3C,8CAA8C;IAC9C,IAAI,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;QACrE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAA;IAC7D,CAAC;IAED,uEAAuE;IACvE,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QACzB,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,EAAE,CAAA;IACrE,CAAC;IAED,uDAAuD;IACvD,MAAM,UAAU,GAA4B,EAAE,CAAA;IAC9C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS;QAAE,UAAU,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAA;IAC3D,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS;QAAE,UAAU,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAA;IAE1E,mCAAmC;IACnC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAC1E,IAAI,YAAY,KAAK,IAAI,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAC1B,SAAS,EACT,EAAE,GAAG,UAAU,EAAE,KAAK,EAAE,YAAY,EAAE,EACtC,MAAM,CAAC,KAAK,CACb,CAAA;QACD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IACjC,CAAC;IAED,+DAA+D;IAC/D,IAAI,IAAI,CAAC,OAAO,KAAK,OAAO,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,UAAU,EAAE,MAAM,CAAC,KAAK,CAAC,CAAA;QACpE,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAA;QACzD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IACjC,CAAC;IAED,sBAAsB;IACtB,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,UAAU,EAAE,MAAM,CAAC,KAAK,CAAC,CAAA;IACnE,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;AACjC,CAAC;AAED,SAAS,YAAY,CAAC,IAAY,EAAE,IAAa;IAC/C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QAC7B,OAAO;YACL,IAAI,EAAE,gBAAgB;YACtB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,CAAC;YACpB,IAAI;SACL,CAAA;IACH,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAA;AAChC,CAAC;AAED,SAAS,OAAO,CAAC,GAAkB;IACjC,MAAM,IAAI,GAAY,EAAE,MAAM,EAAE,KAAK,EAAE,CAAA;IACvC,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACjC,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,SAAQ;QACvC,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;gBAC9B,IAAI,CAAC;oBAAE,IAAI,CAAC,OAAO,GAAG,CAAC,CAAA;gBACvB,MAAK;YACP,CAAC;YACD,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;oBAChC,IAAI,OAAO,EAAE,KAAK,QAAQ;wBAAE,SAAQ;oBACpC,IAAI,EAAE,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;wBAC1B,MAAM,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;wBAC3B,IAAI,CAAC,EAAE,CAAC;4BACN,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;4BACzB,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;gCAAE,IAAI,CAAC,KAAK,GAAG,CAAC,CAAA;wBACxC,CAAC;oBACH,CAAC;yBAAM,IAAI,EAAE,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;wBAChC,MAAM,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;wBAC3B,IAAI,CAAC,EAAE,CAAC;4BACN,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;4BACzB,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;gCAAE,IAAI,CAAC,IAAI,GAAG,CAAC,CAAA;wBACvC,CAAC;oBACH,CAAC;gBACH,CAAC;gBACD,MAAK;YACP,CAAC;YACD,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;gBACpC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,OAAO;oBAAE,IAAI,CAAC,KAAK,GAAG,CAAC,CAAA;qBAC9D,IAAI,CAAC,KAAK,MAAM;oBAAE,IAAI,CAAC,KAAK,GAAG,SAAS,CAAA;gBAC7C,MAAK;YACP,CAAC;YACD,KAAK,WAAW,CAAC,CAAC,CAAC;gBACjB,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,MAAM,CAAA;gBACpD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAA;gBAClC,IAAI,QAAQ,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC;oBAChC,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;oBAC5B,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;wBACvB,qDAAqD;wBACrD,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,CAAA;oBACnD,CAAC;gBACH,CAAC;gBACD,MAAK;YACP,CAAC;YACD,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;oBAC/B,IAAI,OAAO,CAAC,KAAK,QAAQ;wBAAE,SAAQ;oBACnC,IAAI,CAAC,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;wBAC1B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAA;wBAClB,MAAK;oBACP,CAAC;gBACH,CAAC;gBACD,MAAK;YACP,CAAC;YACD,oEAAoE;YACpE,oBAAoB;QACtB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,iBAAiB,CAAC,OAAe;IACxC,mDAAmD;IACnD,MAAM,CAAC,GAAG,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC1C,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;AACvC,CAAC;AAED,SAAS,SAAS,CAAC,EAAiB,EAAE,IAAY;IAChD,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,QAAQ,EAAE,CAAC;QAC5B,IAAI,OAAO,CAAC,KAAK,QAAQ;YAAE,SAAQ;QACnC,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI;YAAE,OAAO,CAAC,CAAA;IAC/B,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse `word/numbering.xml` into a numId → { ordered } map.
|
|
3
|
+
*
|
|
4
|
+
* The exporter (`parts.ts`) emits two predefined abstractNums:
|
|
5
|
+
* - abstractNumId 0 → bullet
|
|
6
|
+
* - abstractNumId 1 → decimal
|
|
7
|
+
* and adds one `<w:num>` row per Doxiva list. We only need to know whether
|
|
8
|
+
* each numId resolves to a bullet (ordered=false) or a numeric/lettered list
|
|
9
|
+
* (ordered=true) to reverse the grouping.
|
|
10
|
+
*
|
|
11
|
+
* Foreign-doc tolerance: if numbering.xml is missing or unparseable, we
|
|
12
|
+
* return an empty map. Callers fall back to `ordered: false` so the list
|
|
13
|
+
* still imports.
|
|
14
|
+
*/
|
|
15
|
+
import type { WarningCollector } from './warnings.js';
|
|
16
|
+
export interface NumberingInstance {
|
|
17
|
+
readonly numId: number;
|
|
18
|
+
readonly ordered: boolean;
|
|
19
|
+
}
|
|
20
|
+
export interface NumberingInfo {
|
|
21
|
+
readonly instances: ReadonlyMap<number, NumberingInstance>;
|
|
22
|
+
}
|
|
23
|
+
export declare function parseNumbering(xml: string | undefined, warnings: WarningCollector): NumberingInfo;
|
|
24
|
+
//# sourceMappingURL=parse-numbering.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-numbering.d.ts","sourceRoot":"","sources":["../src/parse-numbering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAErD,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAA;IACtB,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAA;CAC1B;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,SAAS,EAAE,WAAW,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAA;CAC3D;AAID,wBAAgB,cAAc,CAC5B,GAAG,EAAE,MAAM,GAAG,SAAS,EACvB,QAAQ,EAAE,gBAAgB,GACzB,aAAa,CA+Df"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse `word/numbering.xml` into a numId → { ordered } map.
|
|
3
|
+
*
|
|
4
|
+
* The exporter (`parts.ts`) emits two predefined abstractNums:
|
|
5
|
+
* - abstractNumId 0 → bullet
|
|
6
|
+
* - abstractNumId 1 → decimal
|
|
7
|
+
* and adds one `<w:num>` row per Doxiva list. We only need to know whether
|
|
8
|
+
* each numId resolves to a bullet (ordered=false) or a numeric/lettered list
|
|
9
|
+
* (ordered=true) to reverse the grouping.
|
|
10
|
+
*
|
|
11
|
+
* Foreign-doc tolerance: if numbering.xml is missing or unparseable, we
|
|
12
|
+
* return an empty map. Callers fall back to `ordered: false` so the list
|
|
13
|
+
* still imports.
|
|
14
|
+
*/
|
|
15
|
+
import { parseXml } from './parse-xml.js';
|
|
16
|
+
const EMPTY_INFO = { instances: new Map() };
|
|
17
|
+
export function parseNumbering(xml, warnings) {
|
|
18
|
+
if (!xml)
|
|
19
|
+
return EMPTY_INFO;
|
|
20
|
+
let root;
|
|
21
|
+
try {
|
|
22
|
+
root = parseXml(xml).root;
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
warnings.add({
|
|
26
|
+
code: 'malformed-xml',
|
|
27
|
+
message: `Failed to parse numbering.xml: ${err.message}`,
|
|
28
|
+
path: 'word/numbering.xml',
|
|
29
|
+
});
|
|
30
|
+
return EMPTY_INFO;
|
|
31
|
+
}
|
|
32
|
+
// abstractNumId → ordered
|
|
33
|
+
const abstractOrdered = new Map();
|
|
34
|
+
// numId → abstractNumId
|
|
35
|
+
const numToAbstract = new Map();
|
|
36
|
+
for (const c of root.children) {
|
|
37
|
+
if (typeof c === 'string')
|
|
38
|
+
continue;
|
|
39
|
+
if (c.name === 'w:abstractNum') {
|
|
40
|
+
const idStr = c.attrs['w:abstractNumId'];
|
|
41
|
+
const id = idStr ? parseInt(idStr, 10) : NaN;
|
|
42
|
+
if (!Number.isFinite(id))
|
|
43
|
+
continue;
|
|
44
|
+
// Use the first <w:lvl> (ilvl=0) format to decide bullet vs ordered.
|
|
45
|
+
let ordered = false;
|
|
46
|
+
for (const lvl of c.children) {
|
|
47
|
+
if (typeof lvl === 'string')
|
|
48
|
+
continue;
|
|
49
|
+
if (lvl.name !== 'w:lvl')
|
|
50
|
+
continue;
|
|
51
|
+
const ilvlStr = lvl.attrs['w:ilvl'] ?? '0';
|
|
52
|
+
if (ilvlStr !== '0')
|
|
53
|
+
continue;
|
|
54
|
+
for (const lvlChild of lvl.children) {
|
|
55
|
+
if (typeof lvlChild === 'string')
|
|
56
|
+
continue;
|
|
57
|
+
if (lvlChild.name === 'w:numFmt') {
|
|
58
|
+
const fmt = lvlChild.attrs['w:val'] ?? '';
|
|
59
|
+
ordered = fmt !== 'bullet' && fmt !== 'none' && fmt !== '';
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
abstractOrdered.set(id, ordered);
|
|
65
|
+
}
|
|
66
|
+
else if (c.name === 'w:num') {
|
|
67
|
+
const idStr = c.attrs['w:numId'];
|
|
68
|
+
const id = idStr ? parseInt(idStr, 10) : NaN;
|
|
69
|
+
if (!Number.isFinite(id))
|
|
70
|
+
continue;
|
|
71
|
+
for (const np of c.children) {
|
|
72
|
+
if (typeof np === 'string')
|
|
73
|
+
continue;
|
|
74
|
+
if (np.name === 'w:abstractNumId') {
|
|
75
|
+
const refStr = np.attrs['w:val'];
|
|
76
|
+
const ref = refStr ? parseInt(refStr, 10) : NaN;
|
|
77
|
+
if (Number.isFinite(ref))
|
|
78
|
+
numToAbstract.set(id, ref);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const instances = new Map();
|
|
84
|
+
for (const [numId, abs] of numToAbstract) {
|
|
85
|
+
const ordered = abstractOrdered.get(abs) ?? false;
|
|
86
|
+
instances.set(numId, { numId, ordered });
|
|
87
|
+
}
|
|
88
|
+
return { instances };
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=parse-numbering.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-numbering.js","sourceRoot":"","sources":["../src/parse-numbering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,EAAsB,MAAM,gBAAgB,CAAA;AAY7D,MAAM,UAAU,GAAkB,EAAE,SAAS,EAAE,IAAI,GAAG,EAAE,EAAE,CAAA;AAE1D,MAAM,UAAU,cAAc,CAC5B,GAAuB,EACvB,QAA0B;IAE1B,IAAI,CAAC,GAAG;QAAE,OAAO,UAAU,CAAA;IAC3B,IAAI,IAAmB,CAAA;IACvB,IAAI,CAAC;QACH,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAA;IAC3B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,GAAG,CAAC;YACX,IAAI,EAAE,eAAe;YACrB,OAAO,EAAE,kCAAmC,GAAa,CAAC,OAAO,EAAE;YACnE,IAAI,EAAE,oBAAoB;SAC3B,CAAC,CAAA;QACF,OAAO,UAAU,CAAA;IACnB,CAAC;IAED,0BAA0B;IAC1B,MAAM,eAAe,GAAG,IAAI,GAAG,EAAmB,CAAA;IAClD,wBAAwB;IACxB,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAA;IAE/C,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC9B,IAAI,OAAO,CAAC,KAAK,QAAQ;YAAE,SAAQ;QACnC,IAAI,CAAC,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAA;YACxC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;YAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAAE,SAAQ;YAClC,qEAAqE;YACrE,IAAI,OAAO,GAAG,KAAK,CAAA;YACnB,KAAK,MAAM,GAAG,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;gBAC7B,IAAI,OAAO,GAAG,KAAK,QAAQ;oBAAE,SAAQ;gBACrC,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO;oBAAE,SAAQ;gBAClC,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAA;gBAC1C,IAAI,OAAO,KAAK,GAAG;oBAAE,SAAQ;gBAC7B,KAAK,MAAM,QAAQ,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;oBACpC,IAAI,OAAO,QAAQ,KAAK,QAAQ;wBAAE,SAAQ;oBAC1C,IAAI,QAAQ,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;wBACjC,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;wBACzC,OAAO,GAAG,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,EAAE,CAAA;oBAC5D,CAAC;gBACH,CAAC;gBACD,MAAK;YACP,CAAC;YACD,eAAe,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAA;QAClC,CAAC;aAAM,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;YAChC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;YAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAAE,SAAQ;YAClC,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC;gBAC5B,IAAI,OAAO,EAAE,KAAK,QAAQ;oBAAE,SAAQ;gBACpC,IAAI,EAAE,CAAC,IAAI,KAAK,iBAAiB,EAAE,CAAC;oBAClC,MAAM,MAAM,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;oBAChC,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;oBAC/C,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC;wBAAE,aAAa,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAA;gBACtD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,EAA6B,CAAA;IACtD,KAAK,MAAM,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,aAAa,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAA;QACjD,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAA;IAC1C,CAAC;IACD,OAAO,EAAE,SAAS,EAAE,CAAA;AACtB,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse the inline content of a `<w:p>` (or `<w:tc>` paragraph child) into
|
|
3
|
+
* the Doxiva inline model: a flat list of text leaves with mark sets.
|
|
4
|
+
*
|
|
5
|
+
* Reverses what `runs.ts`/`inline.ts` emit in the exporter:
|
|
6
|
+
* - `<w:r>` → one or more text leaves carrying the run's rPr marks.
|
|
7
|
+
* - `<w:hyperlink r:id="...">` → resolves the rel target and applies a
|
|
8
|
+
* `link` mark to all inner runs.
|
|
9
|
+
* - `<w:br w:type="page"/>` inside a run → reported back to the caller
|
|
10
|
+
* via a sentinel so `parseParagraph` can convert a "page break only"
|
|
11
|
+
* paragraph into a `page_break` block.
|
|
12
|
+
*
|
|
13
|
+
* Unknown rPr children are silently skipped (DOCX has dozens of rare
|
|
14
|
+
* formatting tags we don't model). Unknown paragraph children emit an
|
|
15
|
+
* `unknown-element` warning.
|
|
16
|
+
*/
|
|
17
|
+
import type { DxNode } from '@doxi/core';
|
|
18
|
+
import type { ParsedElement } from './parse-xml.js';
|
|
19
|
+
import type { ParseBlockCtx } from './parse-blocks.js';
|
|
20
|
+
export interface InlineResult {
|
|
21
|
+
readonly nodes: DxNode[];
|
|
22
|
+
/** True if any `<w:br w:type="page"/>` was observed in a run. */
|
|
23
|
+
readonly sawPageBreak: boolean;
|
|
24
|
+
/** True if the ONLY content in the paragraph was page-break runs. */
|
|
25
|
+
readonly onlyPageBreak: boolean;
|
|
26
|
+
}
|
|
27
|
+
export declare function parseInline(paragraph: ParsedElement, ctx: ParseBlockCtx): InlineResult;
|
|
28
|
+
//# sourceMappingURL=parse-runs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-runs.d.ts","sourceRoot":"","sources":["../src/parse-runs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAQ,MAAM,YAAY,CAAA;AAC9C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAA;AAEtD,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAA;IACxB,iEAAiE;IACjE,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAA;IAC9B,qEAAqE;IACrE,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAA;CAChC;AAED,wBAAgB,WAAW,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,EAAE,aAAa,GAAG,YAAY,CAqEtF"}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse the inline content of a `<w:p>` (or `<w:tc>` paragraph child) into
|
|
3
|
+
* the Doxiva inline model: a flat list of text leaves with mark sets.
|
|
4
|
+
*
|
|
5
|
+
* Reverses what `runs.ts`/`inline.ts` emit in the exporter:
|
|
6
|
+
* - `<w:r>` → one or more text leaves carrying the run's rPr marks.
|
|
7
|
+
* - `<w:hyperlink r:id="...">` → resolves the rel target and applies a
|
|
8
|
+
* `link` mark to all inner runs.
|
|
9
|
+
* - `<w:br w:type="page"/>` inside a run → reported back to the caller
|
|
10
|
+
* via a sentinel so `parseParagraph` can convert a "page break only"
|
|
11
|
+
* paragraph into a `page_break` block.
|
|
12
|
+
*
|
|
13
|
+
* Unknown rPr children are silently skipped (DOCX has dozens of rare
|
|
14
|
+
* formatting tags we don't model). Unknown paragraph children emit an
|
|
15
|
+
* `unknown-element` warning.
|
|
16
|
+
*/
|
|
17
|
+
export function parseInline(paragraph, ctx) {
|
|
18
|
+
const nodes = [];
|
|
19
|
+
let sawPageBreak = false;
|
|
20
|
+
let sawNonBreakContent = false;
|
|
21
|
+
for (const child of paragraph.children) {
|
|
22
|
+
if (typeof child === 'string')
|
|
23
|
+
continue;
|
|
24
|
+
switch (child.name) {
|
|
25
|
+
case 'w:pPr':
|
|
26
|
+
// Paragraph properties are read by parseParagraph; ignore here.
|
|
27
|
+
break;
|
|
28
|
+
case 'w:r': {
|
|
29
|
+
const runOutcome = parseRun(child, ctx, []);
|
|
30
|
+
if (runOutcome.pageBreak)
|
|
31
|
+
sawPageBreak = true;
|
|
32
|
+
if (runOutcome.nodes.length > 0) {
|
|
33
|
+
sawNonBreakContent = true;
|
|
34
|
+
nodes.push(...runOutcome.nodes);
|
|
35
|
+
}
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
case 'w:hyperlink': {
|
|
39
|
+
const rid = child.attrs['r:id'] ?? child.attrs['r:Id'];
|
|
40
|
+
const linkMarkType = ctx.schema.marks.link;
|
|
41
|
+
let marks = [];
|
|
42
|
+
if (rid && linkMarkType) {
|
|
43
|
+
const rel = ctx.rels.get(rid);
|
|
44
|
+
if (rel) {
|
|
45
|
+
// Sanitize: reject javascript:/vbscript:/non-image data:
|
|
46
|
+
// hyperlinks coming in from foreign DOCX. The link mark is
|
|
47
|
+
// dropped (text content still flows through the inner
|
|
48
|
+
// runs) and a warning is emitted so the host UI can surface
|
|
49
|
+
// the partial-fidelity event.
|
|
50
|
+
if (isUnsafeHref(rel.target)) {
|
|
51
|
+
ctx.warnings.add({
|
|
52
|
+
code: 'unsafe-content',
|
|
53
|
+
message: `Dropped unsafe hyperlink: ${rel.target}`,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
marks = [linkMarkType.create({ href: rel.target })];
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
ctx.warnings.add({
|
|
62
|
+
code: 'unknown-relationship',
|
|
63
|
+
message: `Hyperlink references missing relationship: ${rid}`,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
for (const inner of child.children) {
|
|
68
|
+
if (typeof inner === 'string')
|
|
69
|
+
continue;
|
|
70
|
+
if (inner.name === 'w:r') {
|
|
71
|
+
const runOutcome = parseRun(inner, ctx, marks);
|
|
72
|
+
if (runOutcome.pageBreak)
|
|
73
|
+
sawPageBreak = true;
|
|
74
|
+
if (runOutcome.nodes.length > 0) {
|
|
75
|
+
sawNonBreakContent = true;
|
|
76
|
+
nodes.push(...runOutcome.nodes);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
default:
|
|
83
|
+
ctx.warnings.add({
|
|
84
|
+
code: 'unknown-element',
|
|
85
|
+
message: `Unknown paragraph child: ${child.name}`,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return { nodes, sawPageBreak, onlyPageBreak: sawPageBreak && !sawNonBreakContent };
|
|
90
|
+
}
|
|
91
|
+
function parseRun(run, ctx, extraMarks) {
|
|
92
|
+
const nodes = [];
|
|
93
|
+
let pageBreak = false;
|
|
94
|
+
// Read rPr first (if present) to know which marks to apply.
|
|
95
|
+
let rprMarks = [];
|
|
96
|
+
for (const child of run.children) {
|
|
97
|
+
if (typeof child === 'string')
|
|
98
|
+
continue;
|
|
99
|
+
if (child.name === 'w:rPr') {
|
|
100
|
+
rprMarks = parseRunMarks(child, ctx);
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const marks = mergeMarks(extraMarks, rprMarks);
|
|
105
|
+
for (const child of run.children) {
|
|
106
|
+
if (typeof child === 'string')
|
|
107
|
+
continue;
|
|
108
|
+
switch (child.name) {
|
|
109
|
+
case 'w:rPr':
|
|
110
|
+
break;
|
|
111
|
+
case 'w:t': {
|
|
112
|
+
const text = collectText(child);
|
|
113
|
+
if (text.length > 0) {
|
|
114
|
+
nodes.push(ctx.schema.text(text, marks));
|
|
115
|
+
}
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
case 'w:tab':
|
|
119
|
+
nodes.push(ctx.schema.text('\t', marks));
|
|
120
|
+
break;
|
|
121
|
+
case 'w:br': {
|
|
122
|
+
const type = child.attrs['w:type'] ?? '';
|
|
123
|
+
if (type === 'page') {
|
|
124
|
+
pageBreak = true;
|
|
125
|
+
}
|
|
126
|
+
// textWrapping / column breaks are not modeled — silently skip.
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
default:
|
|
130
|
+
// Run-level unknowns are noisy; silently skip.
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return { nodes, pageBreak };
|
|
135
|
+
}
|
|
136
|
+
function parseRunMarks(rPr, ctx) {
|
|
137
|
+
const marks = [];
|
|
138
|
+
const m = ctx.schema.marks;
|
|
139
|
+
for (const child of rPr.children) {
|
|
140
|
+
if (typeof child === 'string')
|
|
141
|
+
continue;
|
|
142
|
+
switch (child.name) {
|
|
143
|
+
case 'w:b':
|
|
144
|
+
if (m.bold)
|
|
145
|
+
marks.push(m.bold.create({}));
|
|
146
|
+
break;
|
|
147
|
+
case 'w:i':
|
|
148
|
+
if (m.italic)
|
|
149
|
+
marks.push(m.italic.create({}));
|
|
150
|
+
break;
|
|
151
|
+
case 'w:u': {
|
|
152
|
+
const val = child.attrs['w:val'] ?? '';
|
|
153
|
+
if (val !== 'none' && m.underline)
|
|
154
|
+
marks.push(m.underline.create({}));
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
case 'w:color': {
|
|
158
|
+
const val = child.attrs['w:val'];
|
|
159
|
+
if (val && m.color) {
|
|
160
|
+
const value = val.startsWith('#') ? val : `#${val}`;
|
|
161
|
+
marks.push(m.color.create({ value }));
|
|
162
|
+
}
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
case 'w:sz': {
|
|
166
|
+
const val = child.attrs['w:val'];
|
|
167
|
+
if (val && m.font_size) {
|
|
168
|
+
const halfPts = parseInt(val, 10);
|
|
169
|
+
if (Number.isFinite(halfPts)) {
|
|
170
|
+
marks.push(m.font_size.create({ px: Math.round(halfPts / 1.5) }));
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
case 'w:rFonts': {
|
|
176
|
+
const name = child.attrs['w:ascii'] ?? child.attrs['w:hAnsi'];
|
|
177
|
+
if (name && m.font_family) {
|
|
178
|
+
marks.push(m.font_family.create({ name }));
|
|
179
|
+
}
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
default:
|
|
183
|
+
// Silently skip unknown rPr children — DOCX has many obscure formatting
|
|
184
|
+
// tags (w:strike, w:vertAlign, w:lang, etc.) we don't model in v0.8.
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return marks;
|
|
189
|
+
}
|
|
190
|
+
function collectText(el) {
|
|
191
|
+
let s = '';
|
|
192
|
+
for (const c of el.children) {
|
|
193
|
+
if (typeof c === 'string')
|
|
194
|
+
s += c;
|
|
195
|
+
}
|
|
196
|
+
return s;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Sort marks by their MarkType rank so the resulting set matches the order
|
|
200
|
+
* that `Schema.text` would produce via `addToSet`. This keeps text nodes
|
|
201
|
+
* round-trippable through `toJSON`/`fromJSON` and against schema-built
|
|
202
|
+
* fixtures in tests.
|
|
203
|
+
*/
|
|
204
|
+
function mergeMarks(a, b) {
|
|
205
|
+
if (a.length === 0 && b.length === 0)
|
|
206
|
+
return [];
|
|
207
|
+
const all = [...a, ...b];
|
|
208
|
+
all.sort((m1, m2) => m1.type.rank - m2.type.rank);
|
|
209
|
+
return all;
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* v0.9 Track D — reject `javascript:`, `vbscript:`, and non-image
|
|
213
|
+
* `data:` hyperlink targets. A foreign DOCX could include a
|
|
214
|
+
* `<w:hyperlink>` whose rel target is `javascript:alert(1)`; rendering
|
|
215
|
+
* that as `<a href="javascript:...">` in the editor would create an
|
|
216
|
+
* XSS sink once the user clicks the link in a viewer that follows
|
|
217
|
+
* hrefs (e.g. exported preview).
|
|
218
|
+
*/
|
|
219
|
+
function isUnsafeHref(target) {
|
|
220
|
+
const stripped = target.replace(/[ -\s]+/g, '').toLowerCase();
|
|
221
|
+
if (stripped.startsWith('javascript:'))
|
|
222
|
+
return true;
|
|
223
|
+
if (stripped.startsWith('vbscript:'))
|
|
224
|
+
return true;
|
|
225
|
+
if (stripped.startsWith('data:') && !stripped.startsWith('data:image/')) {
|
|
226
|
+
return true;
|
|
227
|
+
}
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
//# sourceMappingURL=parse-runs.js.map
|