@sobree/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +106 -0
  3. package/dist/__vite-browser-external-DYxpcVy9.js +5 -0
  4. package/dist/__vite-browser-external-DYxpcVy9.js.map +1 -0
  5. package/dist/blob/cache.d.ts +69 -0
  6. package/dist/blob/fetch.d.ts +18 -0
  7. package/dist/blob/hash.d.ts +13 -0
  8. package/dist/blob/index.d.ts +33 -0
  9. package/dist/blob/memory.d.ts +2 -0
  10. package/dist/blob/types.d.ts +80 -0
  11. package/dist/createSobree.d.ts +132 -0
  12. package/dist/doc/api.d.ts +132 -0
  13. package/dist/doc/builders.d.ts +42 -0
  14. package/dist/doc/pageSetupBridge.d.ts +26 -0
  15. package/dist/doc/parts.d.ts +18 -0
  16. package/dist/doc/runs.d.ts +47 -0
  17. package/dist/doc/styles.d.ts +19 -0
  18. package/dist/doc/types.d.ts +800 -0
  19. package/dist/doc/walk.d.ts +30 -0
  20. package/dist/docx/export/contentTypes.d.ts +35 -0
  21. package/dist/docx/export/context.d.ts +59 -0
  22. package/dist/docx/export/document.d.ts +19 -0
  23. package/dist/docx/export/drawings.d.ts +10 -0
  24. package/dist/docx/export/headers.d.ts +19 -0
  25. package/dist/docx/export/index.d.ts +14 -0
  26. package/dist/docx/export/runs.d.ts +8 -0
  27. package/dist/docx/export/styles.d.ts +8 -0
  28. package/dist/docx/export/zip.d.ts +13 -0
  29. package/dist/docx/import/anchoredFrames.d.ts +34 -0
  30. package/dist/docx/import/comments.d.ts +3 -0
  31. package/dist/docx/import/document.d.ts +57 -0
  32. package/dist/docx/import/flowFrames.d.ts +11 -0
  33. package/dist/docx/import/footnotes.d.ts +3 -0
  34. package/dist/docx/import/headers.d.ts +50 -0
  35. package/dist/docx/import/index.d.ts +12 -0
  36. package/dist/docx/import/inlineFrames.d.ts +62 -0
  37. package/dist/docx/import/numbering.d.ts +2 -0
  38. package/dist/docx/import/paragraph.d.ts +24 -0
  39. package/dist/docx/import/paragraphs.d.ts +27 -0
  40. package/dist/docx/import/rels.d.ts +5 -0
  41. package/dist/docx/import/runs.d.ts +64 -0
  42. package/dist/docx/import/settings.d.ts +48 -0
  43. package/dist/docx/import/styles.d.ts +3 -0
  44. package/dist/docx/import/tables.d.ts +12 -0
  45. package/dist/docx/import/unzip.d.ts +13 -0
  46. package/dist/docx/shared/namespaces.d.ts +31 -0
  47. package/dist/docx/shared/pageSize.d.ts +27 -0
  48. package/dist/docx/shared/shading.d.ts +2 -0
  49. package/dist/docx/shared/units.d.ts +35 -0
  50. package/dist/docx/shared/xml.d.ts +29 -0
  51. package/dist/docx/types.d.ts +98 -0
  52. package/dist/editor/index.d.ts +1078 -0
  53. package/dist/editor/internal/blockRegistry.d.ts +91 -0
  54. package/dist/editor/internal/mutations.d.ts +63 -0
  55. package/dist/editor/internal/positionMap.d.ts +35 -0
  56. package/dist/editor/table.d.ts +96 -0
  57. package/dist/editor/view/docRenderer/anchorLayer.d.ts +26 -0
  58. package/dist/editor/view/docRenderer/block.d.ts +13 -0
  59. package/dist/editor/view/docRenderer/fontFallback.d.ts +28 -0
  60. package/dist/editor/view/docRenderer/index.d.ts +18 -0
  61. package/dist/editor/view/docRenderer/inline.d.ts +15 -0
  62. package/dist/editor/view/docRenderer/inlineFrame.d.ts +4 -0
  63. package/dist/editor/view/docRenderer/lists.d.ts +28 -0
  64. package/dist/editor/view/docRenderer/paragraph.d.ts +2 -0
  65. package/dist/editor/view/docRenderer/properties.d.ts +2 -0
  66. package/dist/editor/view/docRenderer/table.d.ts +15 -0
  67. package/dist/editor/view/docRenderer/units.d.ts +48 -0
  68. package/dist/editor/view/docSerialize/block.d.ts +14 -0
  69. package/dist/editor/view/docSerialize/index.d.ts +8 -0
  70. package/dist/editor/view/docSerialize/inline.d.ts +11 -0
  71. package/dist/editor/view/docSerialize/table.d.ts +12 -0
  72. package/dist/editor/view/imageResize.d.ts +16 -0
  73. package/dist/embed/floatingCorner.d.ts +44 -0
  74. package/dist/embed/viewport.d.ts +133 -0
  75. package/dist/fonts/embedAPI.d.ts +33 -0
  76. package/dist/fonts/emit.d.ts +24 -0
  77. package/dist/fonts/fontFaceRegistry.d.ts +20 -0
  78. package/dist/fonts/fsType.d.ts +36 -0
  79. package/dist/fonts/index.d.ts +19 -0
  80. package/dist/fonts/liveness.d.ts +2 -0
  81. package/dist/fonts/odttf.d.ts +33 -0
  82. package/dist/fonts/parse.d.ts +29 -0
  83. package/dist/fonts/types.d.ts +52 -0
  84. package/dist/headless.d.ts +168 -0
  85. package/dist/history/history.d.ts +100 -0
  86. package/dist/history/index.d.ts +4 -0
  87. package/dist/history/types.d.ts +54 -0
  88. package/dist/index.css +1 -0
  89. package/dist/index.d.ts +52 -0
  90. package/dist/index.js +10561 -0
  91. package/dist/index.js.map +1 -0
  92. package/dist/markdown/parse.d.ts +6 -0
  93. package/dist/pagination/cost.d.ts +32 -0
  94. package/dist/pagination/index.d.ts +2 -0
  95. package/dist/pagination/paginate.d.ts +10 -0
  96. package/dist/pagination/postConditions.d.ts +10 -0
  97. package/dist/pagination/types.d.ts +94 -0
  98. package/dist/paperStack/pageSetup.d.ts +42 -0
  99. package/dist/paperStack/paginationAdapter/buildItems.d.ts +19 -0
  100. package/dist/paperStack/paginationAdapter/distribute.d.ts +23 -0
  101. package/dist/paperStack/paginationAdapter/index.d.ts +18 -0
  102. package/dist/paperStack/paginationAdapter/paragraphLines.d.ts +23 -0
  103. package/dist/paperStack/paginationAdapter/splitList.d.ts +19 -0
  104. package/dist/paperStack/paginationAdapter/splitParagraph.d.ts +21 -0
  105. package/dist/paperStack/paginationAdapter/types.d.ts +30 -0
  106. package/dist/paperStack/paper.d.ts +107 -0
  107. package/dist/paperStack/paperStack.d.ts +245 -0
  108. package/dist/plugin.d.ts +24 -0
  109. package/dist/plugins/marks.d.ts +49 -0
  110. package/dist/plugins/sections.d.ts +15 -0
  111. package/dist/presence/attach.d.ts +48 -0
  112. package/dist/presence/awareness.d.ts +28 -0
  113. package/dist/presence/index.d.ts +19 -0
  114. package/dist/presence/overlay.d.ts +28 -0
  115. package/dist/presence/state.d.ts +36 -0
  116. package/dist/sobree.d.ts +211 -0
  117. package/dist/tokens.css +144 -0
  118. package/dist/util/selection.d.ts +13 -0
  119. package/dist/ydoc/apply.d.ts +68 -0
  120. package/dist/ydoc/index.d.ts +18 -0
  121. package/dist/ydoc/project.d.ts +41 -0
  122. package/dist/ydoc/runs.d.ts +51 -0
  123. package/dist/ydoc/schema.d.ts +123 -0
  124. package/dist/ydoc/seed.d.ts +45 -0
  125. package/dist/ydoc/textDiff.d.ts +59 -0
  126. package/dist/zoneEdit/index.d.ts +22 -0
  127. package/package.json +61 -0
@@ -0,0 +1,30 @@
1
+ import { Block, InlineRun, Paragraph, SobreeDocument, Table } from './types';
2
+ /**
3
+ * Visitor pattern over the document tree.
4
+ *
5
+ * Every visitor key is optional — implement only the nodes you care about.
6
+ * Return `false` from any handler to skip descending into children of that
7
+ * node; return anything else (or omit the return) to continue.
8
+ *
9
+ * Why not exhaustive? Because the AST will gain shapes over time (comments,
10
+ * tracked changes, equations) and existing visitors shouldn't break when we
11
+ * add a new node kind. Skipped nodes log nothing — silent traversal.
12
+ */
13
+ export interface DocVisitor {
14
+ document?: (doc: SobreeDocument) => void | false;
15
+ block?: (block: Block) => void | false;
16
+ paragraph?: (p: Paragraph) => void | false;
17
+ table?: (t: Table) => void | false;
18
+ run?: (r: InlineRun) => void | false;
19
+ }
20
+ export declare function walk(doc: SobreeDocument, v: DocVisitor): void;
21
+ export declare function walkBlock(block: Block, v: DocVisitor): void;
22
+ export declare function walkRun(run: InlineRun, v: DocVisitor): void;
23
+ /**
24
+ * Collect every text run's text into a single flat string. Useful for
25
+ * search, outline extraction, and "give me the plain text" callers.
26
+ */
27
+ export declare function plainText(doc: SobreeDocument): string;
28
+ export declare function runsToText(runs: readonly InlineRun[]): string;
29
+ /** Derive the heading level from a paragraph's styleId, if any. */
30
+ export declare function headingLevelOf(p: Paragraph): number | null;
@@ -0,0 +1,35 @@
1
+ declare const REL_TYPES: {
2
+ readonly header: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header";
3
+ readonly footer: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer";
4
+ readonly image: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
5
+ readonly hyperlink: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
6
+ readonly fontTable: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable";
7
+ readonly font: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/font";
8
+ };
9
+ type RelKind = keyof typeof REL_TYPES;
10
+ /**
11
+ * `[Content_Types].xml` tells Office which content-type handler to use for
12
+ * each part. `overrides` are appended to the baseline; `imageExtensions`
13
+ * become `<Default>` content-type entries so embedded media round-trips.
14
+ */
15
+ export declare function renderContentTypesXml(overrides?: Array<{
16
+ partName: string;
17
+ contentType: string;
18
+ }>, imageExtensions?: readonly string[]): string;
19
+ /**
20
+ * `_rels/.rels` — the package-level relationships, pointing at the main
21
+ * document part.
22
+ */
23
+ export declare function renderRootRelsXml(): string;
24
+ /**
25
+ * `word/_rels/document.xml.rels` — relationships originating from
26
+ * `document.xml`. Always includes the styles relationship (`rId1`);
27
+ * callers pass additional header/footer/image relationships to append.
28
+ */
29
+ export declare function renderDocumentRelsXml(extras?: Array<{
30
+ id: string;
31
+ type: RelKind;
32
+ target: string;
33
+ external?: boolean;
34
+ }>): string;
35
+ export {};
@@ -0,0 +1,59 @@
1
+ import { SobreeDocument } from '../../doc/types';
2
+ /**
3
+ * Mutable per-export bookkeeping: tracks which image `partPath`s have
4
+ * been allocated rIds, which new parts to add to the ZIP, and which
5
+ * content-type overrides the package manifest needs to declare.
6
+ *
7
+ * Constructed once in `exportDocx`, threaded through document + runs
8
+ * emission, then folded into the final relationships / manifest.
9
+ */
10
+ export interface ExportContext {
11
+ /** Next rId to hand out via `allocRel`. Mutated as rIds are allocated. */
12
+ nextRid: number;
13
+ /** Rels to append to `word/_rels/document.xml.rels`. */
14
+ relationships: Array<{
15
+ id: string;
16
+ type: "header" | "footer" | "image" | "hyperlink" | "fontTable";
17
+ target: string;
18
+ /** External targets (URLs) need `TargetMode="External"`. */
19
+ external?: boolean;
20
+ }>;
21
+ /** New ZIP parts to include in the output package (e.g. `word/media/image1.png`). */
22
+ parts: Record<string, Uint8Array | string>;
23
+ /** Content-type overrides to declare in `[Content_Types].xml`. */
24
+ contentTypeOverrides: Array<{
25
+ partName: string;
26
+ contentType: string;
27
+ }>;
28
+ /** Media extensions seen (for content-type Default entries). */
29
+ mediaExtensions: Set<string>;
30
+ /** Cached path → rId so repeated DrawingRuns share one relationship. */
31
+ imageRelByPartPath: Map<string, string>;
32
+ /** Cached href → rId so repeated hyperlinks share one relationship. */
33
+ hyperlinkRelByHref: Map<string, string>;
34
+ /** Running docPr id counter — Word wants unique per-drawing ids. */
35
+ nextDocPrId: number;
36
+ /**
37
+ * Running revision id counter — Word requires `w:id="N"` on each
38
+ * `<w:ins>` / `<w:del>` / `<w:rPrChange>` / paragraph-mark revision
39
+ * element, unique within the document. We share one counter across
40
+ * all revision kinds to keep the IDs simple and contiguous.
41
+ */
42
+ nextRevisionId: number;
43
+ }
44
+ export declare function makeExportContext(startRid: number): ExportContext;
45
+ /** Allocate the next w:id for a tracked-revision element. */
46
+ export declare function nextRevisionId(ctx: ExportContext): number;
47
+ /**
48
+ * Ensure an image relationship exists for the given `partPath`. Copies the
49
+ * bytes into `ctx.parts` on first encounter and returns the allocated rId.
50
+ */
51
+ export declare function allocImageRel(ctx: ExportContext, partPath: string, doc: SobreeDocument): string | null;
52
+ /** Next unique `docPr id` for a drawing. */
53
+ export declare function nextDocPr(ctx: ExportContext): number;
54
+ /**
55
+ * Ensure a hyperlink relationship exists for the given external `href`.
56
+ * Hyperlinks are external-target rels (TargetMode="External"), so the
57
+ * URL itself is the rel's `Target` and no part is added to the ZIP.
58
+ */
59
+ export declare function allocHyperlinkRel(ctx: ExportContext, href: string): string;
@@ -0,0 +1,19 @@
1
+ import { ExportContext } from './context';
2
+ import { Block, SobreeDocument } from '../../doc/types';
3
+ /**
4
+ * Render the SobreeDocument body into `word/document.xml` (string form).
5
+ *
6
+ * `sectPrXmls` is the parallel array from `emitHeadersAndFooters` —
7
+ * one per section. Non-final sections' sectPr is spliced into the
8
+ * `<w:pPr>` of the LAST PARAGRAPH of that section's body range (OOXML
9
+ * convention; ECMA-376 §17.6.18). The final section's sectPr lands at
10
+ * body level after the last block. `SectionBreak` blocks themselves
11
+ * produce no output — they're delimiters whose semantics are carried
12
+ * by the spliced sectPr.
13
+ *
14
+ * `ctx` is mutated as drawings are encountered — each image registers
15
+ * a relationship and a ZIP media part.
16
+ */
17
+ export declare function renderDocumentXml(doc: SobreeDocument, sectPrXmls: readonly string[], ctx: ExportContext): string;
18
+ /** Also used for header/footer part bodies. */
19
+ export declare function renderBlocks(blocks: readonly Block[], ctx: ExportContext, doc: SobreeDocument): string[];
@@ -0,0 +1,10 @@
1
+ import { DrawingRun } from '../../doc/types';
2
+ /**
3
+ * Emit a `<w:drawing>` XML fragment for an inline image. Consumes an
4
+ * `rId` allocated elsewhere (via `ExportContext.allocImageRel`) and
5
+ * writes the OOXML shape Word expects for a single inline picture.
6
+ *
7
+ * Anchored / floating drawings are out of scope for Phase 5 — all images
8
+ * render inline.
9
+ */
10
+ export declare function renderDrawing(run: DrawingRun, rId: string, docPrId: number): string;
@@ -0,0 +1,19 @@
1
+ import { ExportContext } from './context';
2
+ import { SectionProperties, SobreeDocument } from '../../doc/types';
3
+ /**
4
+ * Build the OOXML scaffolding for every section in `doc`: header/footer
5
+ * XML parts, relationships, content-type overrides — and a parallel
6
+ * array of `<w:sectPr>` XML strings, one per section.
7
+ *
8
+ * Mutates `ctx`. Each referenced header/footer appends to `ctx.parts`,
9
+ * `ctx.relationships`, and `ctx.contentTypeOverrides`. Returns the
10
+ * sectPr XMLs in section order so the body renderer can splice the
11
+ * non-final ones into the last paragraph of each section's range and
12
+ * place the final one at body level.
13
+ *
14
+ * Header/footer parts are deduped across sections by their `partId`:
15
+ * the same `header1.xml` referenced from two sections only emits one
16
+ * part, with one `rId`. Subsequent references reuse the existing rId.
17
+ */
18
+ export declare function emitHeadersAndFooters(doc: SobreeDocument, ctx: ExportContext): string[];
19
+ export declare function renderSectPr(section: SectionProperties, headerRefs: string[], footerRefs: string[]): string;
@@ -0,0 +1,14 @@
1
+ import { SobreeDocument } from '../../doc/types';
2
+ import { DocxExportResult } from '../types';
3
+ /**
4
+ * Export a SobreeDocument as a .docx Blob + raw bytes.
5
+ *
6
+ * Emits the OOXML package:
7
+ * - `[Content_Types].xml`, `_rels/.rels`,
8
+ * `word/_rels/document.xml.rels`
9
+ * - `word/styles.xml`, `word/document.xml`
10
+ * - `word/header*.xml` / `word/footer*.xml` (per header/footer reference)
11
+ * - `word/media/*` (per referenced image) — copied verbatim from
12
+ * `doc.rawParts` and declared as image relationships.
13
+ */
14
+ export declare function exportDocx(doc: SobreeDocument): DocxExportResult;
@@ -0,0 +1,8 @@
1
+ import { ExportContext } from './context';
2
+ import { InlineRun, SobreeDocument } from '../../doc/types';
3
+ /**
4
+ * Render a list of InlineRuns into concatenated `<w:r>` / `<w:fldSimple>`
5
+ * / `<w:drawing>` XML. Drawings use `ctx` to allocate a relationship id
6
+ * and register the underlying media part.
7
+ */
8
+ export declare function inlinesToRuns(inlines: readonly InlineRun[], ctx: ExportContext, doc: SobreeDocument): string;
@@ -0,0 +1,8 @@
1
+ import { NamedStyle } from '../../doc/types';
2
+ /**
3
+ * Render the document's named styles into `word/styles.xml`. Word needs a
4
+ * style-definition entry for every `w:pStyle` referenced in the body.
5
+ * Missing entries make Word fall back to Normal, stripping the visual
6
+ * hierarchy.
7
+ */
8
+ export declare function renderStylesXml(styles: readonly NamedStyle[]): string;
@@ -0,0 +1,13 @@
1
+ /** Map of part-path → contents (string parts auto-encoded to UTF-8). */
2
+ export type DocxParts = Record<string, string | Uint8Array>;
3
+ export interface DocxPackage {
4
+ blob: Blob;
5
+ bytes: Uint8Array;
6
+ }
7
+ /**
8
+ * Build a `.docx` package from a parts map. fflate's `zipSync` is plenty
9
+ * fast for the sizes we care about. We return both a Blob (for downloads)
10
+ * and the raw bytes (for node/jsdom environments where Blob.arrayBuffer()
11
+ * isn't implemented).
12
+ */
13
+ export declare function packageDocx(parts: DocxParts): DocxPackage;
@@ -0,0 +1,34 @@
1
+ import { AnchoredFrame, Block } from '../../doc/types';
2
+ export interface AnchoredFramesContext {
3
+ /** RelationshipId → part path lookup, e.g. `"rId4" → "media/image1.png"`. */
4
+ rels: Map<string, string>;
5
+ /**
6
+ * Importer's body-block list AT THE TIME this function runs. Used to
7
+ * resolve `paragraphIndex` for the AnchorOrigin: each frame is
8
+ * attributed to the body paragraph that contained its `<w:drawing>`,
9
+ * so the renderer knows which page receives the frame after
10
+ * pagination. May be empty during early-pass parsing; callers can
11
+ * pass `[]` and the renderer will treat all frames as section-relative.
12
+ */
13
+ bodyParagraphIndexByElement?: Map<Element, number>;
14
+ /**
15
+ * Recursive body walker for `<w:txbxContent>`, injected by the caller
16
+ * to avoid an `anchoredFrames ↔ document` import cycle. When present,
17
+ * textbox bodies parse through the SAME pipeline as the document body
18
+ * — real run formatting, paragraph spacing, lists, tables — so a
19
+ * frame whose content flows into the body (see `flowFrames`) keeps
20
+ * its true layout. When absent, falls back to flat text (tests).
21
+ */
22
+ parseBlockBody?: (txbxContent: Element) => Block[];
23
+ }
24
+ /**
25
+ * Walk every `<w:drawing>/<wp:anchor>` in the document and return one
26
+ * `AnchoredFrame` per top-level anchored drawing. The returned list is
27
+ * in document order, which matters for z-stacking when frames
28
+ * overlap (later siblings paint on top).
29
+ *
30
+ * The frame's `id` is deterministic: `"anchor-{N}"` where N is its
31
+ * document-order index. Selection / persistence rely on this being
32
+ * stable across re-imports of the same source.
33
+ */
34
+ export declare function parseAnchoredFrames(xmlDoc: Document, ctx: AnchoredFramesContext, claim?: boolean): AnchoredFrame[];
@@ -0,0 +1,3 @@
1
+ import { ConvertContext } from './paragraph';
2
+ import { Comment } from '../../doc/types';
3
+ export declare function parseCommentsXml(xml: string | undefined, ctx: ConvertContext, extendedXml?: string | undefined): Record<number, Comment>;
@@ -0,0 +1,57 @@
1
+ import { ConvertContext } from './paragraph';
2
+ import { Block } from '../../doc/types';
3
+ export { type ConvertContext, convertParagraph } from './paragraph';
4
+ export interface DocumentImport {
5
+ body: Block[];
6
+ warnings: string[];
7
+ /**
8
+ * `<w:sectPr>` elements collected in document order, for the import
9
+ * pipeline to convert into `SectionProperties[]`. Includes both
10
+ * inline (paragraph-pPr) and body-level sectPrs.
11
+ *
12
+ * Length equals the number of sections in the resulting document.
13
+ * Inline sectPrs end non-final sections; the body-level one (always
14
+ * last) is the document-final section.
15
+ */
16
+ sectPrEls: Element[];
17
+ }
18
+ /**
19
+ * Convert a parsed `word/document.xml` into the SobreeDocument body — a
20
+ * flat list of `Block`s (Paragraphs, Tables, SectionBreaks).
21
+ *
22
+ * Multi-section detection: any paragraph whose `<w:pPr>` carries an
23
+ * inline `<w:sectPr>` is the last paragraph of a non-final section.
24
+ * The walker emits a `SectionBreak` block immediately after such a
25
+ * paragraph, and stashes the sectPr Element for the import pipeline
26
+ * to convert into `SectionProperties`. The body-level `<w:sectPr>` is
27
+ * stashed last as the document-final section's properties.
28
+ */
29
+ /**
30
+ * Optional per-paragraph block replacements. When the body walker
31
+ * encounters a `<w:p>` element that's a key in `replaceParagraphs`,
32
+ * it emits the mapped Block *instead of* calling `convertParagraph`
33
+ * on it (and does NOT consume the paragraph's text content as a
34
+ * Paragraph block).
35
+ *
36
+ * Used by the `InlineFrame` import path to swap out section-heading
37
+ * paragraphs for first-class `InlineFrame` blocks at their original
38
+ * document-order position — without resorting to DOM-attribute
39
+ * markers or a post-walk splice. The contract is a typed map; the
40
+ * caller owns key identity.
41
+ */
42
+ export interface ConvertOptions {
43
+ replaceParagraphs?: Map<Element, Block>;
44
+ }
45
+ export declare function convertDocumentXml(xmlDoc: Document, ctx: ConvertContext, opts?: ConvertOptions): DocumentImport;
46
+ /**
47
+ * Walk a container element (`<w:body>` for `document.xml`, `<w:hdr>` /
48
+ * `<w:ftr>` for header/footer parts) and turn its direct paragraph +
49
+ * table children into `Block[]`. Extracted from `convertDocumentXml`
50
+ * so header/footer parts get the same rich-content import — drawings,
51
+ * comment ranges, revisions, formatted runs — instead of being
52
+ * collapsed to flat text by `flattenZone`.
53
+ *
54
+ * Header / footer parts never carry inline `<w:sectPr>` elements, so
55
+ * for those the returned `sectPrEls` is always empty.
56
+ */
57
+ export declare function convertBlocksFromContainer(container: Element, ctx: ConvertContext, opts?: ConvertOptions): DocumentImport;
@@ -0,0 +1,11 @@
1
+ import { AnchoredFrame, Block } from '../../doc/types';
2
+ /**
3
+ * Splice flowable frames' content into `body` at their anchor
4
+ * paragraph and drop them from the overlay set. Returns the rebuilt
5
+ * body and the frames that remain overlays (with `paragraphIndex`
6
+ * remapped to the new body positions). Pure — no mutation of inputs.
7
+ */
8
+ export declare function flowDisplacingTextboxes(body: readonly Block[], frames: readonly AnchoredFrame[]): {
9
+ body: Block[];
10
+ frames: AnchoredFrame[];
11
+ };
@@ -0,0 +1,3 @@
1
+ import { ConvertContext } from './paragraph';
2
+ import { Block } from '../../doc/types';
3
+ export declare function parseFootnotesXml(xml: string | undefined, ctx: ConvertContext): Record<number, Block[]>;
@@ -0,0 +1,50 @@
1
+ import { wVal } from '../shared/xml';
2
+ import { PageZoneText } from '../../paperStack/pageSetup';
3
+ import { SectionProperties } from '../../doc/types';
4
+ /** Zone text extracted from the docx, with `{page}`/`{pages}` placeholders. */
5
+ export interface ImportedZones {
6
+ header: PageZoneText;
7
+ footer: PageZoneText;
8
+ }
9
+ /**
10
+ * Resolve header/footer references in the body's first `<w:sectPr>`, load
11
+ * each referenced part, and flatten to plain text with `{page}` / `{pages}`
12
+ * substituted. Returns Sobree's `PageZoneText` model.
13
+ *
14
+ * Phase 2 ignores "even" references and keeps only "default" and "first".
15
+ * "Different last page" has no native Word equivalent; we leave
16
+ * `differentLast` off.
17
+ */
18
+ export declare function readHeadersAndFooters(bodyXml: Document, relsXml: string | undefined, textParts: Record<string, string>): ImportedZones;
19
+ /**
20
+ * `header*.xml` / `footer*.xml` → plain text with `{page}` / `{pages}`
21
+ * substituted for Word field codes. Flat text only; paragraph breaks
22
+ * become `\n`, inline formatting is dropped. Paired with
23
+ * `templateToBlocks` for the AST round-trip — the bridge converts the
24
+ * `{page}` tokens back into native `FieldRun` nodes.
25
+ */
26
+ export declare function flattenZone(xml: string): string;
27
+ /** Read a twip-valued attribute off `<w:pgSz>` / `<w:pgMar>`. */
28
+ export declare function readTwipsAttr(el: Element | null, name: string): number | null;
29
+ /**
30
+ * Convert a `<w:sectPr>` Element into a fully-populated `SectionProperties`.
31
+ *
32
+ * Reads pgSz / pgMar / vAlign / titlePg / type plus header and footer
33
+ * references (resolved through `rels` to partIds). Falls back to A4
34
+ * portrait + 1" margins when geometry is missing — matches Word's
35
+ * behaviour when an imported sectPr is sparse.
36
+ */
37
+ export declare function readSection(sectPr: Element, rels: Map<string, string>): SectionProperties;
38
+ /** Shared helper: parse `<w:sectPr>` for pgSz/pgMar/vAlign. */
39
+ export declare function readPageGeometry(xmlDoc: Document): {
40
+ widthTwips: number | null;
41
+ heightTwips: number | null;
42
+ margins: {
43
+ top: number | null;
44
+ right: number | null;
45
+ bottom: number | null;
46
+ left: number | null;
47
+ };
48
+ vAlign: "top" | "center" | "bottom" | "both" | null;
49
+ } | null;
50
+ export { wVal };
@@ -0,0 +1,12 @@
1
+ import { templateToBlocks } from '../../doc/pageSetupBridge';
2
+ import { DocxImportResult } from '../types';
3
+ import { emptyDocument } from '../../doc/builders';
4
+ import { Block, SobreeDocument } from '../../doc/types';
5
+ /**
6
+ * Top-level entry point for importing a .docx file. Returns a native
7
+ * `SobreeDocument` plus any warnings surfaced by the conversion.
8
+ */
9
+ export declare function importDocx(src: File | Blob | ArrayBuffer | Uint8Array): Promise<DocxImportResult>;
10
+ export type { Block, SobreeDocument };
11
+ export { emptyDocument };
12
+ export { templateToBlocks };
@@ -0,0 +1,62 @@
1
+ import { Block, InlineFrame } from '../../doc/types';
2
+ export interface InlineFramesContext {
3
+ /** RelationshipId → part path lookup. */
4
+ rels: Map<string, string>;
5
+ /**
6
+ * Recursive body parser supplied by the caller. The textbox content
7
+ * (`<w:txbxContent>`) is a body of `<w:p>` / `<w:tbl>` children
8
+ * that should parse with the same rules as the document body —
9
+ * paragraph properties, runs, tables, even nested inline frames.
10
+ * Phase 1.1: callers can pass a simple text-only stub; Phase 1.2+
11
+ * will pass the full body walker.
12
+ */
13
+ parseBlockBody: (txbxContent: Element) => Block[];
14
+ /**
15
+ * When true, `<w:lastRenderedPageBreak/>` HINTS inside the textbox
16
+ * content cascade up to set `InlineFrame.pageBreakBefore`. These
17
+ * are stale layout hints Word writes during save, not author-
18
+ * declared directives — ECMA-376 says consumers SHOULD ignore
19
+ * them for layout. We respect them in two cases:
20
+ * 1. The body walker already opted in (heavily-decorated CVs
21
+ * where the hints reliably match LO's reference pagination —
22
+ * threshold is `≥10` total LRPB elements in the doc, decided
23
+ * by `convertDocumentXml` and threaded through here).
24
+ * 2. The frame contains an explicit `<w:pageBreakBefore/>` in
25
+ * the outer paragraph's pPr (always honoured).
26
+ * Without this flag, only explicit directives count.
27
+ */
28
+ honorLastRenderedPageBreaks?: boolean;
29
+ }
30
+ /**
31
+ * One InlineFrame plus the source DOM nodes it came from.
32
+ *
33
+ * `drawingEl` is the `<w:drawing>` the importer should TREAT AS
34
+ * REMOVED (legacy lifter will skip it; renderer paints from `frame`).
35
+ *
36
+ * `hostParagraphEl` is the `<w:p>` that contained the drawing — its
37
+ * outer `<w:pPr>` props (pageBreakBefore, keepNext) flowed into the
38
+ * frame. After the new path takes over, this paragraph becomes
39
+ * empty in the source; the importer can treat the InlineFrame as
40
+ * REPLACING it in the body block stream.
41
+ */
42
+ export interface ParsedInlineFrame {
43
+ frame: InlineFrame;
44
+ drawingEl: Element;
45
+ hostParagraphEl: Element;
46
+ }
47
+ /**
48
+ * Walk every `<w:drawing>/<wp:inline>` in the document. For drawings
49
+ * whose payload includes at least one `<wps:txbx>`, emit one
50
+ * `InlineFrame`. Returns the frames in document order.
51
+ *
52
+ * When `claim` is true (the default), each claimed `<w:drawing>` is
53
+ * REMOVED from the input XML so the legacy `liftTextBoxContent` pass
54
+ * downstream can't double-process it. The host paragraph stays in
55
+ * place (now empty) so the body walker still emits a paragraph
56
+ * block at the right position — which the importer then swaps for
57
+ * the corresponding `InlineFrame` via `ConvertOptions.replaceParagraphs`.
58
+ *
59
+ * Set `claim: false` to inspect frames without mutating the XML
60
+ * (used by unit tests).
61
+ */
62
+ export declare function parseInlineFrames(xmlDoc: Document, ctx: InlineFramesContext, claim?: boolean): ParsedInlineFrame[];
@@ -0,0 +1,2 @@
1
+ import { NumberingDefinition } from '../../doc/types';
2
+ export declare function parseNumberingXml(xml: string | undefined): NumberingDefinition[];
@@ -0,0 +1,24 @@
1
+ import { Paragraph } from '../../doc/types';
2
+ /**
3
+ * Shared context for importing a body — rels + media lookup. Lives here
4
+ * (rather than in `document.ts`) so `tables.ts` can pull it without
5
+ * forming a `document.ts` ↔ `tables.ts` import cycle.
6
+ */
7
+ export interface ConvertContext {
8
+ /** Rels map (`rId` → target path). Used for image embed resolution. */
9
+ rels: Map<string, string>;
10
+ /** When true, `<w:lastRenderedPageBreak/>` markers are honoured as
11
+ * forced page breaks (i.e. translated to `pageBreakBefore: true`).
12
+ * Caller sets this after counting hints per document: a meaningful
13
+ * number of hints (≥3) indicates Word's layout produced reliable
14
+ * page boundaries; a stray single hint is usually stale and
15
+ * ignored. */
16
+ honorLastRenderedPageBreaks?: boolean;
17
+ }
18
+ /**
19
+ * Convert a single `<w:p>` element into a Paragraph block. Handles
20
+ * paragraph formatting (heading style, alignment, spacing, numbering),
21
+ * runs (text/hyperlink/drawing), and image embed resolution via the
22
+ * rels map carried in `ctx`.
23
+ */
24
+ export declare function convertParagraph(p: Element, ctx: ConvertContext, activeComments?: Set<number>): Paragraph;
@@ -0,0 +1,27 @@
1
+ import { ImportedRun } from './runs';
2
+ import { ParagraphFormat } from '../types';
3
+ /** Source-order paragraph item: either a flat run or a hyperlink-wrapped group. */
4
+ export type ImportedItem = {
5
+ kind: "run";
6
+ run: ImportedRun;
7
+ } | {
8
+ kind: "hyperlink";
9
+ relId?: string;
10
+ runs: ImportedRun[];
11
+ };
12
+ export interface ImportedParagraph {
13
+ /** Items in document order. Hyperlinks contain inner runs. */
14
+ items: ImportedItem[];
15
+ format: ParagraphFormat;
16
+ }
17
+ /**
18
+ * Read a single `<w:p>` into an `ImportedParagraph`.
19
+ *
20
+ * `activeComments` is an *external* set the caller threads across
21
+ * paragraphs so comment ranges (`<w:commentRangeStart/End>`) that span
22
+ * multiple paragraphs tag the middle paragraphs' runs too. When
23
+ * omitted, a fresh empty set is used — fine for contexts where ranges
24
+ * shouldn't cross the paragraph (footnote bodies, comment bodies,
25
+ * table cells).
26
+ */
27
+ export declare function readParagraph(p: Element, activeComments?: Set<number>): ImportedParagraph;
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Parse a `_rels/*.rels` file into a map of `Id` → `Target`. The Target is
3
+ * relative to the .rels file's own directory (Word's convention).
4
+ */
5
+ export declare function parseRels(xmlSrc: string): Map<string, string>;
@@ -0,0 +1,64 @@
1
+ import { RunFormat } from '../types';
2
+ /** Frame-of-reference choices the importer carries through; mapped 1:1 to
3
+ * the `DrawingAnchor.relativeFromH` / `relativeFromV` AST values. */
4
+ export interface ImportedAnchor {
5
+ offsetXEmu: number;
6
+ offsetYEmu: number;
7
+ relativeFromH: "page" | "margin" | "column" | "character";
8
+ relativeFromV: "page" | "margin" | "paragraph" | "line";
9
+ behindDoc?: boolean;
10
+ }
11
+ /** Drawing info extracted from a `<w:drawing>` inside a run. */
12
+ export interface ImportedDrawing {
13
+ /** Relationship id of the embedded image (`<a:blip r:embed="rIdN"/>`). */
14
+ embedRelId?: string;
15
+ widthEmu?: number;
16
+ heightEmu?: number;
17
+ altText?: string;
18
+ /** Present when the drawing is a `<wp:anchor>` (floating) rather than
19
+ * `<wp:inline>`. The renderer positions the image absolutely via
20
+ * these coordinates. */
21
+ anchor?: ImportedAnchor;
22
+ }
23
+ /**
24
+ * Read a `<w:r>` element into a `{ text, format }` pair. The document
25
+ * converter maps the format flags onto the native `RunProperties` shape.
26
+ */
27
+ export interface ImportedRun {
28
+ text: string;
29
+ format: RunFormat;
30
+ /** True if this run was `<w:br/>`; `text` is empty in that case. */
31
+ isHardBreak: boolean;
32
+ /** Type of break for `isHardBreak` runs — line (Shift-Enter), page
33
+ * (force new page), or column (force next column in a multi-column
34
+ * section). Defaults to "line" when omitted. */
35
+ breakType?: "line" | "page" | "column";
36
+ /** Set when the run wraps an inline `<w:drawing>` (image). */
37
+ drawing?: ImportedDrawing;
38
+ /** Set when the run wraps a `<w:footnoteReference w:id="N"/>`. */
39
+ footnoteRefId?: number;
40
+ /** Set when the run wraps a `<w:commentReference w:id="N"/>`. */
41
+ commentRefId?: number;
42
+ /** Set when the run is inside a `<w:ins>` / `<w:del>` wrapper. */
43
+ revision?: {
44
+ type: "ins" | "del";
45
+ author?: string;
46
+ date?: string;
47
+ };
48
+ /** Set when the run is between a `<w:commentRangeStart w:id="N"/>`
49
+ * and matching `<w:commentRangeEnd>`. Multiple ids when nested /
50
+ * overlapping comments cover the run. */
51
+ commentIds?: readonly number[];
52
+ /**
53
+ * Set when the source was a `<w:fldSimple w:instr="...">`. The
54
+ * paragraph converter emits a `FieldRun` from this — used for
55
+ * page-number tokens (`PAGE` / `NUMPAGES`) in headers and footers
56
+ * so the round-trip through `blocksToTemplate` preserves `{page}` /
57
+ * `{pages}`.
58
+ */
59
+ field?: {
60
+ instruction: string;
61
+ cached?: string;
62
+ };
63
+ }
64
+ export declare function readRun(r: Element): ImportedRun;
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Parse the subset of `word/settings.xml` that affects rendering.
3
+ *
4
+ * The two flags we care about right now are the ones that decide
5
+ * whether Word applies its implicit "Word 2010+ Normal style" baseline
6
+ * (line ≈ 1.08, after = 8pt) at render time even when styles.xml
7
+ * leaves Normal empty:
8
+ *
9
+ * - `<w:compatibilityMode w:val="14"/>` — Word's rendering era.
10
+ * 12+ = Word 2007+ with the modern Normal defaults.
11
+ * <12 = legacy mode, no implicit spacing.
12
+ * - `<w:doNotUseHTMLParagraphAutoSpacing/>` — when present, Word
13
+ * explicitly opts out of the modern auto-spacing and renders
14
+ * tight regardless of compatibilityMode.
15
+ *
16
+ * This is the missing piece that explains why a Word-authored docx
17
+ * with an empty `<w:style w:styleId="Normal">` renders with visible
18
+ * inter-paragraph breathing in Word (compatibilityMode 14, auto-
19
+ * spacing on), but a programmatically-generated docx with no
20
+ * `<w:compatibilityMode>` renders tight in Word too. Without this
21
+ * gate, Sobree's baseline-injection either over- or under-applies
22
+ * depending on the source.
23
+ */
24
+ export interface DocSettings {
25
+ /** Numeric compatibility mode from `<w:compatibilityMode>`. Undefined
26
+ * if the docx omits it (treat as legacy = pre-Word-2007). */
27
+ compatibilityMode?: number;
28
+ /** True when `<w:doNotUseHTMLParagraphAutoSpacing/>` is present. */
29
+ doNotUseHTMLParagraphAutoSpacing: boolean;
30
+ /** `<w:defaultTabStop w:val="N"/>` in twips. Used as the interval
31
+ * for tab advances in paragraphs that don't declare their own
32
+ * `<w:tabs>`. Word's factory default is 720 twips (0.5"). */
33
+ defaultTabStopTwips?: number;
34
+ }
35
+ export declare function parseSettingsXml(xml: string | undefined): DocSettings;
36
+ /**
37
+ * Should we apply Word's implicit "Normal style" paragraph baseline
38
+ * (line ≈ 1.08, after = 8pt) for paragraphs whose explicit settings
39
+ * leave those fields undefined?
40
+ *
41
+ * Word's rule, distilled: yes when in Word 2007+ rendering mode
42
+ * (compatibilityMode >= 12) AND auto-spacing isn't explicitly turned
43
+ * off. Without this gate, Sobree either over-applies (on a docx-
44
+ * library-style doc that lacks compatibilityMode → renders tight in
45
+ * Word too) or under-applies (on a Word-authored doc whose Normal
46
+ * style is empty → Word fills in defaults, we don't).
47
+ */
48
+ export declare function shouldApplyAutoSpacing(settings: DocSettings): boolean;