npm - @fiduswriter/document - Versions diffs - 0.1.0-alpha.1 - Mend

@fiduswriter/document 0.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

package/LICENSE +661 -0
package/README.md +16 -0
package/jest.config.js +23 -0
package/package.json +59 -0
package/schema.json +1 -0
package/scripts/export-schema.js +16 -0
package/src/bibliography/common.js +92 -0
package/src/bibliography/csl_bib.js +139 -0
package/src/citations/citeproc_sys.js +42 -0
package/src/citations/format.js +194 -0
package/src/common/blob.js +10 -0
package/src/common/file.js +25 -0
package/src/common/index.js +12 -0
package/src/common/network.js +79 -0
package/src/common/text.js +44 -0
package/src/editor/e2ee/encryptor.js +228 -0
package/src/exporter/docx/citations.js +177 -0
package/src/exporter/docx/comments.js +165 -0
package/src/exporter/docx/footnotes.js +240 -0
package/src/exporter/docx/images.js +101 -0
package/src/exporter/docx/index.js +185 -0
package/src/exporter/docx/lists.js +260 -0
package/src/exporter/docx/math.js +46 -0
package/src/exporter/docx/metadata.js +289 -0
package/src/exporter/docx/rels.js +193 -0
package/src/exporter/docx/render.js +941 -0
package/src/exporter/docx/richtext.js +1182 -0
package/src/exporter/docx/tables.js +112 -0
package/src/exporter/docx/tools.js +50 -0
package/src/exporter/epub/index.js +142 -0
package/src/exporter/epub/templates.js +140 -0
package/src/exporter/epub/tools.js +96 -0
package/src/exporter/html/citations.js +121 -0
package/src/exporter/html/convert.js +813 -0
package/src/exporter/html/index.js +192 -0
package/src/exporter/html/templates.js +34 -0
package/src/exporter/html/tools.js +50 -0
package/src/exporter/jats/bibliography.js +183 -0
package/src/exporter/jats/citations.js +109 -0
package/src/exporter/jats/convert.js +871 -0
package/src/exporter/jats/index.js +92 -0
package/src/exporter/jats/templates.js +35 -0
package/src/exporter/jats/text.js +72 -0
package/src/exporter/latex/convert.js +934 -0
package/src/exporter/latex/escape_latex.js +21 -0
package/src/exporter/latex/index.js +74 -0
package/src/exporter/latex/readme.js +22 -0
package/src/exporter/native/shrink.js +132 -0
package/src/exporter/odt/citations.js +101 -0
package/src/exporter/odt/footnotes.js +147 -0
package/src/exporter/odt/images.js +115 -0
package/src/exporter/odt/index.js +156 -0
package/src/exporter/odt/math.js +57 -0
package/src/exporter/odt/metadata.js +251 -0
package/src/exporter/odt/render.js +806 -0
package/src/exporter/odt/richtext.js +865 -0
package/src/exporter/odt/styles.js +387 -0
package/src/exporter/odt/track.js +68 -0
package/src/exporter/pandoc/citations.js +98 -0
package/src/exporter/pandoc/convert.js +1017 -0
package/src/exporter/pandoc/index.js +92 -0
package/src/exporter/pandoc/readme.js +8 -0
package/src/exporter/pandoc/tools.js +51 -0
package/src/exporter/print/index.js +177 -0
package/src/exporter/tools/doc_content.js +144 -0
package/src/exporter/tools/file.js +9 -0
package/src/exporter/tools/json.js +73 -0
package/src/exporter/tools/svg.js +29 -0
package/src/exporter/tools/xml.js +531 -0
package/src/exporter/tools/xml_zip.js +95 -0
package/src/exporter/tools/zip.js +90 -0
package/src/exporter/tools/zotero_csl.js +93 -0
package/src/importer/citations.js +129 -0
package/src/importer/docx/citations.js +123 -0
package/src/importer/docx/convert.js +1427 -0
package/src/importer/docx/helpers.js +9 -0
package/src/importer/docx/omml2mathml.js +1448 -0
package/src/importer/docx/parse.js +735 -0
package/src/importer/native/get_images.js +76 -0
package/src/importer/native/update.js +29 -0
package/src/importer/odt/citations.js +87 -0
package/src/importer/odt/convert.js +1855 -0
package/src/importer/pandoc/convert.js +884 -0
package/src/importer/pandoc/helpers.js +84 -0
package/src/importer/zip_analyzer.js +102 -0
package/src/index.js +1 -0
package/src/mathlive/opf_includes.js +24 -0
package/src/schema/common/annotate.js +76 -0
package/src/schema/common/base.js +118 -0
package/src/schema/common/citation.js +62 -0
package/src/schema/common/equation.js +31 -0
package/src/schema/common/figure.js +190 -0
package/src/schema/common/heading.js +43 -0
package/src/schema/common/index.js +40 -0
package/src/schema/common/list.js +95 -0
package/src/schema/common/reference.js +100 -0
package/src/schema/common/table.js +103 -0
package/src/schema/common/track.js +190 -0
package/src/schema/const.js +58 -0
package/src/schema/convert.js +1272 -0
package/src/schema/document/content.js +187 -0
package/src/schema/document/index.js +117 -0
package/src/schema/document/structure.js +452 -0
package/src/schema/export.js +21 -0
package/src/schema/footnotes.js +126 -0
package/src/schema/footnotes_convert.js +31 -0
package/src/schema/i18n.js +595 -0
package/src/schema/index.js +5 -0
package/src/schema/mini_json.js +61 -0
package/src/schema/text.js +22 -0

package/src/importer/docx/convert.js ADDED Viewed

@@ -0,0 +1,1427 @@
+import {MathMLToLaTeX} from "mathml-to-latex"
+import {xmlDOM} from "../../exporter/tools/xml.js"
+import {
+    randomCommentId,
+    randomFigureId,
+    randomHeadingId
+} from "../../schema/common/index.js"
+import {
+    isDocxBibliographyField,
+    isDocxCitationField,
+    isDocxSdtBibliography,
+    isDocxSdtCitation,
+    parseDocxFieldCitation,
+    parseDocxSdtCitation
+} from "./citations.js"
+import {normalizeText} from "./helpers.js"
+import {omml2mathml} from "./omml2mathml.js"
+import {DocxParser} from "./parse.js"
+export class DocxConvert {
+    constructor(zip, importId, template, bibliography) {
+        this.zip = zip
+        this.importId = importId
+        this.template = template
+        this.bibliography = bibliography
+        this.images = {}
+        this.parser = new DocxParser(zip)
+        this.tracks = {}
+        this.currentTracks = []
+        this.currentFields = []
+        this.currentCommentIds = []
+        this.sourcesXml = null
+    }
+    async init() {
+        await this.parser.init()
+        // Load Word-native bibliography sources if present.
+        // This file is required by DocxCitationsParser for CITATION field codes.
+        this.sourcesXml =
+            (await this.zip.file("customXml/item1.xml")?.async("string")) ??
+            null
+        const body = this.parser.document.query("w:body")
+        if (!body) {
+            return {
+                content: {
+                    type: "doc",
+                    content: []
+                },
+                settings: {
+                    import_id: this.importId,
+                    tracked: false,
+                    language: "en-US"
+                },
+                comments: {}
+            }
+        }
+        // Find all reference targets in the document for cross-references
+        this.referenceTargets = this.findReferenceTargets(this.parser.document)
+        const convertedContent = this.convertDocument(body)
+        // Convert document
+        return {
+            content: convertedContent,
+            settings: {
+                import_id: this.importId,
+                tracked: this.hasTrackedChanges(this.parser.document),
+                language: this.detectLanguage(this.parser.document)
+            },
+            comments: this.parser.comments
+        }
+    }
+    convertDocument(body) {
+        const templateParts = this.template.content.content.slice()
+        templateParts.shift() // Remove first element
+        const document = {
+            type: "doc",
+            attrs: {
+                import_id: this.importId
+            },
+            content: []
+        }
+        // Add title (required first element)
+        const title = this.extractTitle(body)
+        document.content.push({
+            type: "title",
+            content: title.content || [
+                {type: "text", text: gettext("Untitled")}
+            ]
+        })
+        title.containerNodes.forEach(node => {
+            node.parentElement.removeChild(node)
+        })
+        document.attrs.title =
+            title.content.map(node => node.textContent).join("") ||
+            gettext("Untitled")
+        // Extract metadata sections
+        const metadata = this.extractMetadata(body)
+        metadata.forEach(({type, content}) => {
+            const templatePart = templateParts.find(
+                part => part.attrs.metadata === type
+            )
+            const attrs = {}
+            if (templatePart.attrs.hidden) {
+                attrs.hidden = false
+            }
+            if (templatePart) {
+                document.content.push({
+                    type: templatePart.type,
+                    attrs: {
+                        ...templatePart.attrs,
+                        ...attrs
+                    },
+                    content: content.content
+                })
+                // Remove paragraphs from content so they are not added to body
+                content.containerNodes.forEach(node => {
+                    node.parentElement?.removeChild(node)
+                })
+            }
+        })
+        // Extract main content sections
+        const sections = this.groupContentIntoSections(body)
+        // Map sections to template parts
+        sections.forEach(section => {
+            const templatePart = this.findMatchingTemplatePart(
+                section.title,
+                templateParts
+            )
+            if (templatePart) {
+                document.content.push({
+                    type: "richtext_part",
+                    attrs: {
+                        title: templatePart.attrs.title,
+                        id: templatePart.attrs.id,
+                        metadata: templatePart.attrs.metadata || undefined,
+                        marks: templatePart.attrs.marks || [
+                            "strong",
+                            "em",
+                            "link"
+                        ]
+                    },
+                    content: section.content
+                })
+            }
+        })
+        // Add remaining content to body section
+        const unassignedContent = sections
+            .filter(
+                section =>
+                    !this.findMatchingTemplatePart(section.title, templateParts)
+            )
+            .flatMap(section => section.content)
+        if (unassignedContent.length) {
+            const bodyTemplatePart = templateParts.find(
+                part => !part.attrs.metadata && part.type === "richtext_part"
+            )
+            document.content.push({
+                type: "richtext_part",
+                attrs: {
+                    title: bodyTemplatePart
+                        ? bodyTemplatePart.attrs.title
+                        : "Body",
+                    id: bodyTemplatePart ? bodyTemplatePart.attrs.id : "body",
+                    marks: ["strong", "em", "link"]
+                },
+                content: unassignedContent
+            })
+        }
+        return document
+    }
+    extractMetadata(body) {
+        const metadata = []
+        // Try structured contributor data from custom properties first
+        const contributorsByRole = this.extractContributorsFromCustomProps()
+        if (Object.keys(contributorsByRole).length) {
+            Object.entries(contributorsByRole).forEach(
+                ([role, contributors]) => {
+                    metadata.push({
+                        type: role,
+                        content: {content: contributors, containerNodes: []}
+                    })
+                }
+            )
+        } else {
+            // Fall back to legacy author extraction
+            const authors = this.extractAuthors(body)
+            if (authors.content.length) {
+                metadata.push({
+                    type: "authors",
+                    content: authors
+                })
+            }
+        }
+        // Extract abstract if present
+        const abstract = this.extractAbstract(body)
+        if (abstract.content.length) {
+            metadata.push({
+                type: "abstract",
+                content: abstract
+            })
+        }
+        // Extract keywords if present
+        const keywords = this.extractKeywords(body)
+        if (keywords.content.length) {
+            metadata.push({
+                type: "keywords",
+                content: keywords
+            })
+        }
+        return metadata
+    }
+    extractContributorsFromCustomProps() {
+        if (!this.parser.customDoc) {
+            return {}
+        }
+        const properties = this.parser.customDoc.queryAll("property")
+        const contributors = []
+        properties.forEach(prop => {
+            const name = prop.getAttribute("name")
+            if (!name || !name.startsWith("fidus_contributor_")) {
+                return
+            }
+            const match = name.match(/^fidus_contributor_(\d+)_(\w+)$/)
+            if (!match) {
+                return
+            }
+            const num = parseInt(match[1])
+            const field = match[2]
+            const lpwstr = prop.query("vt:lpwstr")
+            const value = lpwstr ? lpwstr.textContent : ""
+            if (!contributors[num - 1]) {
+                contributors[num - 1] = {
+                    type: "contributor",
+                    attrs: {
+                        firstname: "",
+                        lastname: "",
+                        email: "",
+                        institution: "",
+                        id_type: "",
+                        id_value: "",
+                        role: ""
+                    }
+                }
+            }
+            if (field === "role") {
+                contributors[num - 1].attrs.role = value
+            } else if (
+                [
+                    "firstname",
+                    "lastname",
+                    "email",
+                    "institution",
+                    "id_type",
+                    "id_value"
+                ].includes(field)
+            ) {
+                contributors[num - 1].attrs[field] = value
+            }
+        })
+        const byRole = {}
+        contributors.forEach(contributor => {
+            if (!contributor) {
+                return
+            }
+            const role = contributor.attrs.role || "authors"
+            if (!byRole[role]) {
+                byRole[role] = []
+            }
+            byRole[role].push(contributor)
+        })
+        return byRole
+    }
+    extractAuthors(body) {
+        const authors = []
+        // Try to find author information in metadata
+        const authorNodes = body
+            .queryAll("w:pStyle", {"w:val": "Author"})
+            .map(pStyle => pStyle.closest("w:p"))
+            .filter(p => p)
+        authorNodes.forEach(authorNode => {
+            const authorText = this.getTextContent(authorNode)
+            const [firstname = "", lastname = ""] = authorText.split(" ", 2)
+            authors.push({
+                type: "contributor",
+                attrs: {
+                    firstname,
+                    lastname,
+                    email: "",
+                    institution: ""
+                }
+            })
+        })
+        if (authors.length) {
+            return {
+                content: authors,
+                containerNodes: authorNodes
+            }
+        }
+        // Also check Creator in document properties
+        const creator = this.parser.coreDoc.query("dc:creator")?.textContent
+        if (creator) {
+            const [firstname = "", lastname = ""] = creator.split(" ", 2)
+            return {
+                content: [
+                    {
+                        type: "contributor",
+                        attrs: {
+                            firstname,
+                            lastname,
+                            email: "",
+                            institution: ""
+                        }
+                    }
+                ],
+                containerNodes: []
+            }
+        }
+        return {content: [], containerNodes: []}
+    }
+    extractAbstract(body) {
+        // Look for section with Abstract style or heading
+        const abstractNodes = body
+            .queryAll("w:pStyle", {"w:val": "Abstract"})
+            .map(pStyle => pStyle.closest("w:p"))
+            .filter(p => p)
+        if (abstractNodes.length) {
+            return {
+                content: abstractNodes.map(abstractNode =>
+                    this.convertBlock(abstractNode)
+                ),
+                containerNodes: abstractNodes
+            }
+        }
+        const extractedPart = this.extractPartOnTitle(body, ["Abstract"])
+        if (extractedPart.content.length) {
+            return {
+                content: extractedPart.content.map(abstractNode =>
+                    this.convertBlock(abstractNode)
+                ),
+                containerNodes: extractedPart.content.concat([
+                    extractedPart.header
+                ])
+            }
+        }
+        return {content: [], containerNodes: []}
+    }
+    extractKeywords(body) {
+        let extraNodes = []
+        // Look for keywords section or metadata
+        let keywordNodes = body
+            .queryAll("w:pStyle", {"w:val": "Keywords"})
+            .map(pStyle => pStyle.closest("w:p"))
+            .filter(p => p)
+        if (!keywordNodes.length) {
+            // If no keywords section is found, look for a title called "Keywords"
+            const extractedPart = this.extractPartOnTitle(
+                body,
+                ["Keywords", "Keywords:", "Keyword"],
+                1
+            )
+            if (extractedPart.content.length) {
+                keywordNodes = extractedPart.content
+                extraNodes = extractedPart.header ? [extractedPart.header] : []
+            }
+        }
+        if (keywordNodes) {
+            return {
+                content: keywordNodes
+                    .map(keywordsNode => this.getTextContent(keywordsNode))
+                    .flatMap(str => str.split(/[,;|:]+/)) // Split on multiple separators
+                    .map(keyword => keyword.trim()) // Trim whitespace
+                    .filter(keyword => keyword.length > 0)
+                    .map(keyword => ({
+                        type: "tag",
+                        attrs: {
+                            tag: keyword
+                        }
+                    })),
+                containerNodes: keywordNodes.concat(extraNodes)
+            }
+        }
+        return {content: [], containerNodes: []}
+    }
+    extractPartOnTitle(body, titleWords, maxPars = false) {
+        // Fall back to heading starting with TITLEWORD in text
+        if (typeof titleWords === "string") {
+            titleWords = [titleWords]
+        }
+        const headingPars = body
+            .queryAll("w:pStyle", {
+                "w:val": [
+                    "Heading1",
+                    "Heading2",
+                    "Heading3",
+                    "Heading4",
+                    "Heading5",
+                    "Heading6",
+                    "Heading7",
+                    "Heading8",
+                    "Heading9"
+                ]
+            })
+            .map(pStyle => pStyle.closest("w:p"))
+            .filter(p => p)
+        const header = headingPars.find(p =>
+            titleWords.includes(this.getTextContent(p).trim())
+        )
+        const content = []
+        if (header && header.nextSibling) {
+            //const content = []
+            //const containerNodes = [sectionHeader]
+            const headerLevel = this.getParaStyle(header).level
+            let searchPar = header
+            // Add everything to abstract until next heading with the same or lower level
+            while (
+                searchPar.nextSibling &&
+                (!maxPars || content.length < maxPars)
+            ) {
+                searchPar = searchPar.nextSibling
+                const paraStyle = this.getParaStyle(searchPar)
+                if (paraStyle.isHeading && paraStyle.level <= headerLevel) {
+                    break
+                }
+                content.push(searchPar)
+            }
+        }
+        return {header, content}
+    }
+    groupContentIntoSections(body) {
+        const sections = []
+        let currentSection = {
+            title: null,
+            content: []
+        }
+        const skippedBlocks = []
+        body.children.forEach(node => {
+            if (skippedBlocks.includes(node)) {
+                return
+            }
+            if (node.tagName !== "w:p") {
+                return
+            }
+            const style = this.getParaStyle(node)
+            const title = this.getSectionTitle(node, style)
+            if (title && style.isHeading) {
+                if (currentSection.content.length) {
+                    sections.push(currentSection)
+                }
+                currentSection = {
+                    title,
+                    content: []
+                }
+            }
+            const block = this.convertBlock(node, skippedBlocks)
+            if (block) {
+                currentSection.content.push(block)
+            }
+        })
+        if (currentSection.content.length) {
+            sections.push(currentSection)
+        }
+        return sections
+    }
+    getSectionTitle(node, style) {
+        if (!node || !style) {
+            return null
+        }
+        // For headings, use text content as section title
+        if (style.isHeading && style.level <= 4) {
+            return this.getTextContent(node)
+        }
+        // Check style name for section indicators
+        if (style.name) {
+            const name = style.name.toLowerCase()
+            if (name.includes("section") || name.includes("title")) {
+                return this.getTextContent(node)
+            }
+        }
+        return null
+    }
+    findMatchingTemplatePart(sectionTitle, templateParts) {
+        if (!sectionTitle) {
+            return null
+        }
+        // Try exact match first
+        let matchingPart = templateParts.find(
+            part =>
+                part.type === "richtext_part" &&
+                !part.attrs.metadata &&
+                part.attrs.title.toLowerCase() === sectionTitle.toLowerCase()
+        )
+        if (!matchingPart) {
+            // Try fuzzy matching
+            matchingPart = templateParts.find(
+                part =>
+                    part.type === "richtext_part" &&
+                    !part.attrs.metadata &&
+                    this.isSimilarTitle(part.attrs.title, sectionTitle)
+            )
+        }
+        return matchingPart
+    }
+    isSimilarTitle(title1, title2) {
+        const normalized1 = normalizeText(title1)
+        const normalized2 = normalizeText(title2)
+        return (
+            normalized1.includes(normalized2) ||
+            normalized2.includes(normalized1)
+        )
+    }
+    getTextContent(node) {
+        return node
+            .queryAll("w:t")
+            .map(t => t.textContent)
+            .join("")
+    }
+    extractTitle(body) {
+        // First try to find paragraph with Title style
+        const titlePars = body
+            .queryAll("w:pStyle", {"w:val": "Title"})
+            .map(pStyle => pStyle.closest("w:p"))
+            .filter(p => p)
+        if (titlePars.length) {
+            return {
+                content: this.convertInline(titlePars[0]),
+                containerNodes: [titlePars[0]]
+            }
+        }
+        // Fall back to first heading
+        const headingPars = body
+            .queryAll("w:pStyle", {
+                "w:val": [
+                    "Heading1",
+                    "Heading2",
+                    "Heading3",
+                    "Heading4",
+                    "Heading5",
+                    "Heading6",
+                    "Heading7",
+                    "Heading8",
+                    "Heading9"
+                ]
+            })
+            .map(pStyle => pStyle.closest("w:p"))
+            .filter(p => p)
+        if (headingPars.length) {
+            return {
+                content: this.convertInline(headingPars[0]),
+                containerNodes: [headingPars[0]]
+            }
+        }
+        return {
+            content: [
+                {
+                    type: "text",
+                    text: gettext("Untitled")
+                }
+            ],
+            containerNodes: []
+        }
+    }
+    inBibliography(node) {
+        // Check if we currently are in a field.
+        const currentField = this.currentFields[this.currentFields.length - 1]
+        if (
+            currentField &&
+            isDocxBibliographyField(currentField.instructions)
+        ) {
+            return true
+        }
+        // Check every SDT block inside this paragraph.
+        for (const sdt of node.queryAll("w:sdt")) {
+            if (isDocxSdtBibliography(sdt)) {
+                return true
+            }
+        }
+        return false
+    }
+    convertBlock(node, skippedBlocks = []) {
+        if (node.tagName !== "w:p") {
+            return null
+        }
+        const inBibliography = this.inBibliography(node)
+        let converted
+        const style = this.getParaStyle(node)
+        if (style.isHeading) {
+            converted = this.convertHeading(node, style)
+        } else if (style.numbering) {
+            converted = this.convertListItem(node, style)
+        } else if (
+            style.isCaption &&
+            (node.query("w:drawing") || node.query("w:pict"))
+        ) {
+            converted = this.convertFigure(node, node)
+        } else if (
+            style.isCaption &&
+            (node.nextSibling?.query("w:drawing") ||
+                node.nextSibling?.query("w:pict")) &&
+            !skippedBlocks.includes(node.nextSibling)
+        ) {
+            skippedBlocks.push(node.nextSibling)
+            converted = this.convertFigure(node.nextSibling, node)
+        } else if (node.query("w:drawing") || node.query("w:pict")) {
+            if (
+                node.nextSibling &&
+                this.getParaStyle(node.nextSibling).isCaption
+            ) {
+                skippedBlocks.push(node.nextSibling)
+                converted = this.convertFigure(node, node.nextSibling)
+            } else {
+                converted = this.convertFigure(node)
+            }
+        } else {
+            converted = this.convertParagraph(node)
+        }
+        if (inBibliography || this.inBibliography(node)) {
+            // We skip bibliography paragraphs
+            return null
+        }
+        return this.wrapTrackChanges(node, converted)
+    }
+    wrapTrackChanges(node, content) {
+        if (!content || !node.previousSibling) {
+            return content
+        }
+        const track = this.getTracksFromNode(node.previousSibling)
+        if (!track) {
+            return content
+        }
+        return {
+            ...content,
+            attrs: Object.assign({}, content.attrs || {}, {track})
+        }
+    }
+    getTracksFromNode(node) {
+        const deletion = node.query("w:pPr")?.query("w:del")
+        const insertion = node.query("w:pPr")?.query("w:ins")
+        const tracks = []
+        if (insertion) {
+            const date = new Date(insertion.getAttribute("w:date"))
+            const date10 = Math.floor(date.getTime() / 60000) * 10
+            tracks.push({
+                type: "insertion",
+                user: 0, // Default user ID
+                username: insertion.getAttribute("w:author"),
+                date: date10
+            })
+        }
+        if (deletion) {
+            const date = new Date(deletion.getAttribute("w:date"))
+            const date10 = Math.floor(date.getTime() / 60000) * 10
+            tracks.push({
+                type: "deletion",
+                user: 0, // Default user ID
+                username: deletion.getAttribute("w:author"),
+                date: date10
+            })
+        }
+        if (tracks.length === 0) {
+            return null
+        }
+        return tracks
+    }
+    getParaStyle(node) {
+        const pStyle = node.query("w:pStyle")
+        const styleId = pStyle?.getAttribute("w:val")
+        const style = this.parser.styles[styleId] || {}
+        const numPr = node.query("w:numPr")
+        const numId = numPr?.query("w:numId")?.getAttribute("w:val")
+        const ilvl = parseInt(
+            numPr?.query("w:ilvl")?.getAttribute("w:val") || "0"
+        )
+        return {
+            ...style,
+            numbering: numId
+                ? {
+                      id: numId,
+                      level: ilvl,
+                      definition: this.parser.numbering[numId]
+                  }
+                : null
+        }
+    }
+    convertParagraph(node) {
+        const pStyle = node.query("w:pStyle")
+        const styleId = pStyle?.getAttribute("w:val")
+        // Check if this is a code block (Code style or inherited from one)
+        if (
+            styleId &&
+            (this.parser.isCodeStyle?.(styleId) || styleId === "Code")
+        ) {
+            return {
+                type: "code_block",
+                attrs: {
+                    track: [],
+                    language: "",
+                    category: "",
+                    title: "",
+                    id: ""
+                },
+                content: this.convertInline(node)
+            }
+        }
+        return {
+            type: "paragraph",
+            content: this.convertInline(node)
+        }
+    }
+    convertHeading(node, style) {
+        return {
+            type: `heading${style.level}`,
+            attrs: {
+                id: randomHeadingId(),
+                level: style.level
+            },
+            content: this.convertInline(node)
+        }
+    }
+    convertListItem(node, style) {
+        const numbering = style.numbering
+        const level = numbering.definition?.levels[numbering.level]
+        return {
+            type: level?.format === "bullet" ? "bullet_list" : "ordered_list",
+            attrs: {
+                id: `L${Math.random().toString(36).slice(2)}`,
+                level: numbering.level,
+                start: level?.start || 1
+            },
+            content: [
+                {
+                    type: "list_item",
+                    content: [this.convertParagraph(node)]
+                }
+            ]
+        }
+    }
+    convertFigure(node, captionNode = null) {
+        let captionBlock, captionOrder
+        if (captionNode) {
+            captionBlock = this.convertParagraph(captionNode)
+            captionOrder = node.nextSibling === captionNode ? "after" : "before"
+        }
+        const drawing = node.query("w:drawing")
+        if (!drawing) {
+            return null
+        }
+        const blip = drawing.query("a:blip")
+        if (!blip) {
+            return null
+        }
+        const rId = blip.getAttribute("r:embed")
+        const rel = this.parser.relationships[rId]
+        if (!rel) {
+            return null
+        }
+        const imagePath = rel.target.split("/").pop()
+        const imageBlob = this.parser.images[imagePath]
+        if (!imageBlob) {
+            return null
+        }
+        // <a:ext cx="5753598" cy="4463556" />
+        //
+        const size = drawing.query("a:ext")
+        const width = parseInt(size.getAttribute("cx") || 0) / 9525 // In EMUs
+        const height = parseInt(size.getAttribute("cy") || 0) / 9525 // In EMUs
+        const imageId = Math.floor(Math.random() * 1000000)
+        this.images[imageId] = {
+            id: imageId,
+            title: imagePath,
+            image: imagePath,
+            file: imageBlob,
+            copyright: {
+                holder: false,
+                year: false,
+                freeToRead: true,
+                licenses: []
+            },
+            checksum: 0,
+            width,
+            height
+        }
+        const image = {
+            type: "image",
+            attrs: {
+                image: imageId
+            }
+        }
+        const caption = {
+            type: "figure_caption",
+            content: captionBlock?.content || []
+        }
+        const content =
+            captionOrder === "before" ? [caption, image] : [image, caption]
+        return {
+            type: "figure",
+            attrs: {
+                id: randomFigureId(),
+                aligned: "center",
+                width: 100,
+                caption: !!captionBlock
+            },
+            content
+        }
+    }
+    convertInline(node) {
+        const content = []
+        // We'll process all inline nodes in document order
+        node.children.forEach(child => {
+            let contentReceiver = content
+            const currentField =
+                this.currentFields[this.currentFields.length - 1]
+            if (currentField) {
+                if (currentField.status === "instruction") {
+                    // We're currently inside the instruction part of a fieldChar
+                    const instrText = child.query("w:instrText")
+                    if (instrText) {
+                        currentField.instructions += instrText.textContent
+                    }
+                }
+                if (currentField.status === "display") {
+                    // We're currently inside the display part of a fieldChar
+                    contentReceiver = currentField.display
+                }
+            }
+            if (child.tagName === "w:r") {
+                // A run
+                const fieldChar = child.query("w:fldChar")
+                if (fieldChar) {
+                    let currentField
+                    let rendercurrentField = false
+                    const type = fieldChar.getAttribute("w:fldCharType")
+                    if (type === "begin") {
+                        currentField = {
+                            status: "instruction",
+                            display: [],
+                            instructions: "",
+                            data: null
+                        }
+                        this.currentFields.push(currentField)
+                    } else if (type === "separate") {
+                        currentField =
+                            this.currentFields[this.currentFields.length - 1]
+                        currentField.status = "display"
+                        contentReceiver = currentField.display
+                    } else if (type === "end") {
+                        currentField = this.currentFields.pop()
+                        // If a fieldChar is closed and there was no display part,
+                        // or it is inside another fieldChar, do nothing
+                        if (
+                            currentField &&
+                            currentField.status === "display" &&
+                            this.currentFields.length === 0
+                        ) {
+                            rendercurrentField = true
+                            contentReceiver = content
+                        }
+                    }
+                    // Capture base64-encoded field data (used by EndNote)
+                    const fldDataNode = fieldChar.query("w:fldData")
+                    if (fldDataNode && currentField) {
+                        currentField.data = fldDataNode.textContent || null
+                    }
+                    if (rendercurrentField && currentField) {
+                        this.renderField(currentField).forEach(node =>
+                            contentReceiver.push(node)
+                        )
+                    }
+                    return
+                }
+                // Process footnote references
+                const footnoteRef = child.query("w:footnoteReference")
+                if (footnoteRef) {
+                    const footnoteId = footnoteRef.getAttribute("w:id")
+                    if (this.parser.footnotes[footnoteId]) {
+                        contentReceiver.push(this.convertFootnote(footnoteId))
+                    }
+                    return
+                }
+                // Process endnote references
+                const endnoteRef = child.query("w:endnoteReference")
+                if (endnoteRef) {
+                    const endnoteId = endnoteRef.getAttribute("w:id")
+                    if (this.parser.endnotes[endnoteId]) {
+                        contentReceiver.push(
+                            this.convertFootnote(endnoteId, true)
+                        )
+                    }
+                    return
+                }
+                // Process text with formatting
+                const text =
+                    child.query("w:t")?.textContent ||
+                    child.query("w:delText")?.textContent
+                if (!text) {
+                    // Process line breaks
+                    if (child.query("w:br")) {
+                        contentReceiver.push({type: "hard_break"})
+                    }
+                    return
+                }
+                const rPr = child.query("w:rPr")
+                const formatting = rPr
+                    ? this.parser.extractRunProperties(rPr)
+                    : {}
+                const insertion = child.closest("w:ins")
+                const deletion = child.closest("w:del")
+                contentReceiver.push({
+                    type: "text",
+                    text,
+                    marks: this.getCurrentMarks(formatting, insertion, deletion)
+                })
+            } else if (child.tagName === "w:commentRangeStart") {
+                const commentId = child.getAttribute("w:id")
+                if (commentId && this.parser.comments[commentId]) {
+                    this.currentCommentIds.push(commentId)
+                }
+                return
+            } else if (child.tagName === "w:commentRangeEnd") {
+                const commentId = child.getAttribute("w:id")
+                if (commentId) {
+                    const index = this.currentCommentIds.indexOf(commentId)
+                    if (index !== -1) {
+                        this.currentCommentIds.splice(index, 1)
+                    }
+                }
+                return
+            } else if (
+                child.tagName === "w:r" &&
+                child.query("w:commentReference")
+            ) {
+                // Comment reference - just skip it (we already handle the range)
+                return
+            } else if (child.tagName === "w:hyperlink") {
+                // Process hyperlink
+                const rId = child.getAttribute("r:id")
+                const anchor = child.getAttribute("w:anchor")
+                const relationship = rId ? this.parser.relationships[rId] : null
+                const href =
+                    relationship?.target || (anchor ? `#${anchor}` : null)
+                if (href) {
+                    const runs = child.queryAll("w:r")
+                    const text = runs
+                        .map(run => run.query("w:t")?.textContent || "")
+                        .join("")
+                    if (text) {
+                        // Check if this is an internal link (bookmark reference) that should be a cross-reference
+                        if (anchor && this.referenceTargets[anchor]) {
+                            // If the link text is similar to the target text, treat it as a cross-reference
+                            const target = this.referenceTargets[anchor]
+                            const targetText = target.text || anchor
+                            // Compare normalized versions to check if text matches target
+                            if (
+                                normalizeText(text) ===
+                                    normalizeText(targetText) ||
+                                // Also check for "Figure X: " or "Table X: " style references
+                                text.match(
+                                    /^(Figure|Table|Equation)\s+\d+(\.\d+)*(\:|\.)?\s*$/i
+                                )
+                            ) {
+                                contentReceiver.push(
+                                    this.convertCrossReference(anchor, text)
+                                )
+                                return
+                            }
+                        }
+                        // Otherwise, treat as a normal link
+                        const rPr = runs[0]?.query("w:rPr")
+                        const formatting = rPr
+                            ? this.parser.extractRunProperties(rPr)
+                            : {}
+                        const marks = this.getCurrentMarks(formatting)
+                        marks.push({
+                            type: "link",
+                            attrs: {href, title: text}
+                        })
+                        contentReceiver.push({
+                            type: "text",
+                            text,
+                            marks
+                        })
+                    }
+                }
+            } else if (child.tagName === "m:oMath") {
+                const equationNode = this.convertEquation(child)
+                if (equationNode) {
+                    contentReceiver.push(equationNode)
+                }
+            } else if (child.tagName === "w:sdt") {
+                if (isDocxSdtCitation(child)) {
+                    // Used by Mendeley Cite & Citavi
+                    const citationNode = parseDocxSdtCitation(
+                        child,
+                        this.bibliography
+                    )
+                    if (citationNode) {
+                        contentReceiver.push(citationNode)
+                    }
+                }
+            } else {
+                console.warn("unhandled node", child)
+            }
+        })
+        return content
+    }
+    // Method to help process cross-references in documents
+    findReferenceTargets(document) {
+        const targets = {}
+        // Find bookmarks
+        document.queryAll("w:bookmarkStart").forEach(bookmark => {
+            const id = bookmark.getAttribute("w:id")
+            const name = bookmark.getAttribute("w:name")
+            if (id && name) {
+                targets[name] = {
+                    id: name,
+                    type: "bookmark"
+                }
+            }
+        })
+        // Find headings (with styles like Heading1, Heading2, etc.)
+        document.queryAll("w:pStyle").forEach(pStyle => {
+            const val = pStyle.getAttribute("w:val")
+            if (val && val.match(/^Heading\d+$/)) {
+                const paragraph = pStyle.closest("w:p")
+                if (paragraph) {
+                    const text = this.getTextContent(paragraph)
+                    // Create an ID from the heading text
+                    const id = text
+                        .trim()
+                        .toLowerCase()
+                        .replace(/[^\w\s-]/g, "")
+                        .replace(/\s+/g, "-")
+                    targets[id] = {
+                        id: id,
+                        type: "heading",
+                        text: text
+                    }
+                }
+            }
+        })
+        return targets
+    }
+    convertFootnote(id, isEndnote = false) {
+        const footnoteContent = isEndnote
+            ? this.parser.endnotes[id].content
+            : this.parser.footnotes[id].content
+        // Convert the footnote content to our document model
+        const content = []
+        footnoteContent.forEach(block => {
+            if (block.type === "paragraph") {
+                content.push({
+                    type: "paragraph",
+                    content: block.content.map(node => {
+                        if (node.type === "text") {
+                            return {
+                                type: "text",
+                                text: node.text,
+                                marks: node.marks || []
+                            }
+                        }
+                        return node
+                    })
+                })
+            }
+        })
+        return {
+            type: "footnote",
+            attrs: {
+                footnote: content
+            }
+        }
+    }
+    convertEquation(oMathNode) {
+        // Extract OMML content and convert to MathML
+        const mmlNode = omml2mathml(oMathNode)
+        const latex = MathMLToLaTeX.convert(mmlNode.outerXML)
+        return {
+            type: "equation",
+            attrs: {
+                equation: latex
+            }
+        }
+    }
+    simplifiedOmmlToLatex(omml) {
+        // This is a very basic conversion - in a real implementation you would
+        // use a library like MathML-to-LaTeX or implement a more complete converter
+        // Extract text content as a fallback
+        const textContent = omml
+            .replace(/<[^>]+>/g, " ")
+            .replace(/\s+/g, " ")
+            .trim()
+        // If the OMML contains a fraction
+        if (omml.includes("<m:f>")) {
+            const numMatch = omml.match(/<m:num>(.*?)<\/m:num>/s)
+            const denMatch = omml.match(/<m:den>(.*?)<\/m:den>/s)
+            if (numMatch && denMatch) {
+                const num = numMatch[1].replace(/<[^>]+>/g, "").trim()
+                const den = denMatch[1].replace(/<[^>]+>/g, "").trim()
+                return `\\frac{${num}}{${den}}`
+            }
+        }
+        // If it contains a superscript
+        if (omml.includes("<m:sup>")) {
+            const baseMatch = omml.match(/<m:e>(.*?)<\/m:e>/s)
+            const supMatch = omml.match(/<m:sup>(.*?)<\/m:sup>/s)
+            if (baseMatch && supMatch) {
+                const base = baseMatch[1].replace(/<[^>]+>/g, "").trim()
+                const sup = supMatch[1].replace(/<[^>]+>/g, "").trim()
+                return `${base}^{${sup}}`
+            }
+        }
+        // If it contains a subscript
+        if (omml.includes("<m:sub>")) {
+            const baseMatch = omml.match(/<m:e>(.*?)<\/m:e>/s)
+            const subMatch = omml.match(/<m:sub>(.*?)<\/m:sub>/s)
+            if (baseMatch && subMatch) {
+                const base = baseMatch[1].replace(/<[^>]+>/g, "").trim()
+                const sub = subMatch[1].replace(/<[^>]+>/g, "").trim()
+                return `${base}_{${sub}}`
+            }
+        }
+        // Return a simplified representation with the text content
+        return textContent || "x^2" // Default fallback
+    }
+    renderField(field) {
+        const instr = field.instructions.trim()
+        // Handle REF fields (cross-references)
+        if (instr.startsWith("REF ")) {
+            // Extract the target bookmark/heading ID
+            const parts = instr.substring(4).trim().split(/\s+/)
+            if (parts.length > 0) {
+                const target = parts[0]
+                const text = field.display.reduce(
+                    (accumulator, currentValue) => {
+                        if (currentValue.type === "text") {
+                            return accumulator + currentValue.text
+                        }
+                        return accumulator
+                    },
+                    ""
+                )
+                return [this.convertCrossReference(target, text)]
+            }
+        }
+        // Handle SEQ fields (figure/table/equation number cross-references)
+        else if (instr.startsWith("SEQ ")) {
+            // This is a sequence field that generates numbers for figures/tables/equations.
+            // For cross-references, we look for the text in the display part.
+            const seqMatch = instr.match(/^SEQ\s+(\S+)/)
+            if (seqMatch) {
+                const _seqName = seqMatch[1]
+                const text = field.display.reduce((acc, curr) => {
+                    if (curr.type === "text") {
+                        return acc + curr.text
+                    }
+                    return acc
+                }, "")
+                if (text) {
+                    // Return as a plain text node since we can't resolve SEQ references easily
+                    return [
+                        {
+                            type: "text",
+                            text,
+                            marks: []
+                        }
+                    ]
+                }
+                return []
+            }
+        }
+        // Handle citation fields
+        else if (isDocxCitationField(instr)) {
+            return [
+                parseDocxFieldCitation(
+                    instr,
+                    field.data,
+                    this.sourcesXml,
+                    this.bibliography
+                )
+            ]
+        } else if (isDocxBibliographyField(instr)) {
+            // We don't render the contents of bibliography fields
+            return []
+        } else {
+            // We do not support this field type, so instead we return the display content.
+            return field.display || []
+        }
+    }
+    convertCrossReference(targetId, displayText) {
+        // Look up the target in our reference targets
+        const target = this.referenceTargets[targetId]
+        // If we found the target, use its information
+        if (target) {
+            return {
+                type: "cross_reference",
+                attrs: {
+                    id: targetId,
+                    title: displayText || target.text || targetId
+                }
+            }
+        }
+        // If target not found, create a reference with the display text or target ID
+        return {
+            type: "cross_reference",
+            attrs: {
+                id: targetId,
+                title: displayText || targetId
+            }
+        }
+    }
+    createMarksFromFormatting(formatting, insertion = null, deletion = null) {
+        const marks = []
+        if (formatting.bold) {
+            marks.push({type: "strong"})
+        }
+        if (formatting.italic) {
+            marks.push({type: "em"})
+        }
+        if (formatting.underline) {
+            marks.push({type: "underline"})
+        }
+        // Handle superscript and subscript
+        if (formatting.vertAlign === "superscript") {
+            marks.push({type: "sup"})
+        }
+        if (formatting.vertAlign === "subscript") {
+            marks.push({type: "sub"})
+        }
+        // Handle inline code (monospace fonts)
+        if (formatting.fontFamily) {
+            const monospacePatterns = [
+                /^courier/i,
+                /^consolas/i,
+                /^monaco/i,
+                /^menlo/i,
+                /^lucida console/i,
+                /^liberation mono/i,
+                /^dejavu sans mono/i,
+                /^bitstream vera sans mono/i,
+                /^source code pro/i,
+                /^fira code/i,
+                /^ubuntu mono/i,
+                /^droid sans mono/i
+            ]
+            const isMonospace = monospacePatterns.some(pattern =>
+                pattern.test(formatting.fontFamily)
+            )
+            if (isMonospace) {
+                marks.push({type: "code"})
+            }
+        }
+        if (insertion) {
+            const date = new Date(insertion.getAttribute("w:date"))
+            const date10 = Math.floor(date.getTime() / 600000) * 10
+            marks.push({
+                type: "insertion",
+                attrs: {
+                    user: 0,
+                    username: insertion.getAttribute("w:author"),
+                    date: date10,
+                    approved: false
+                }
+            })
+        }
+        if (deletion) {
+            const date = new Date(deletion.getAttribute("w:date"))
+            const date10 = Math.floor(date.getTime() / 600000) * 10
+            marks.push({
+                type: "deletion",
+                attrs: {
+                    user: 0,
+                    username: deletion.getAttribute("w:author"),
+                    date: date10
+                }
+            })
+        }
+        return marks
+    }
+    getCurrentMarks(formatting, insertion, deletion) {
+        const marks = this.createMarksFromFormatting(
+            formatting,
+            insertion,
+            deletion
+        )
+        // Add comment marks for any active comment IDs
+        this.currentCommentIds.forEach(commentId => {
+            marks.push({
+                type: "comment",
+                attrs: {
+                    id: Number.parseInt(commentId)
+                }
+            })
+        })
+        return marks
+    }
+    hasTrackedChanges(doc) {
+        return Boolean(doc.query("w:ins") || doc.query("w:del"))
+    }
+    detectLanguage(doc) {
+        return doc.query("w:lang")?.getAttribute("w:val") || "en-US"
+    }
+}