@docen/import-docx 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1 +1,1705 @@
1
- import{fromXml as F}from"xast-util-from-xml";import{unzipSync as wt}from"fflate";import{toUint8Array as dt}from"undio";import{imageMeta as ht}from"image-meta";const gt=.6666666666666666,N={CODE_BLOCK:"CodeBlock",CODE_PREFIX:"Code"},mt={docxToTipTap:{left:"left",right:"right",center:"center",both:"justify"}},q=96;function z(n){return Math.round(n*q/1440)}function M(n){return`${z(n)}px`}function bt(n){return Math.round(n/(914400/q))}function A(n){const t=parseInt(n,10);if(!isNaN(t))return bt(t)}function a(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"&&e.name===t)return e;return null}function b(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"){if(e.name===t)return e;const r=b(e,t);if(r)return r}return null}function x(n,t){const e=[];if(!n.children)return e;for(const r of n.children)r.type==="element"&&(r.name===t&&e.push(r),e.push(...x(r,t)));return e}function T(n,t){const e=n[t];if(!e)return;const r=parseInt(e,10);if(!isNaN(r))return e}function B(n){return t=>n.includes(t)}const O="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function G(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let i=0;for(let o=0;o<t;o+=3){const s=n[o],c=o+1<t?n[o+1]:0,l=o+2<t?n[o+2]:0,u=s>>2,p=(s&3)<<4|c>>4,w=(c&15)<<2|l>>6,f=l&63;r[i++]=O[u],r[i++]=O[p],r[i++]=o+1<t?O[w]:"=",r[i++]=o+2<t?O[f]:"="}return r.join("")}function yt(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const vt=globalThis.process?.release?.name==="node",V=typeof window<"u";async function It(n){const t=await n;return t.default||t}let P,J=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class xt extends J{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class kt extends J{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!P)throw new Error("@napi-rs/canvas module is not resolved");return P.createCanvas(t,e)}}async function Ct(n){P??=await It(n())}async function Mt(n){if(V)return xt;if(vt){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await Ct(n),kt}throw new Error("Unsupported environment for canvas operations")}async function Tt(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await Mt(e.canvasImport),i=await Lt(n,r),o=(t.left||0)/1e5*i.width,s=(t.top||0)/1e5*i.height,c=(t.right||0)/1e5*i.width,l=(t.bottom||0)/1e5*i.height,u=Math.round(i.width-o-c),p=Math.round(i.height-s-l);if(u<=0||p<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const w=new r().create(u,p);if(!w.context)throw new Error("Failed to get 2D context from canvas");w.context.drawImage(i,o,s,u,p,0,0,u,p);const f=w.canvas.toDataURL(),d=await(await fetch(f)).arrayBuffer();return new Uint8Array(d)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function Lt(n,t){if(V){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const i=new Image;return new Promise((o,s)=>{i.onload=()=>{URL.revokeObjectURL(r),o(i)},i.onerror=()=>{URL.revokeObjectURL(r),s(new Error("Failed to load image"))},i.src=r})}catch(i){throw URL.revokeObjectURL(r),i}}else{if(!P)throw new Error("@napi-rs/canvas module is not resolved");return await P.loadImage(Buffer.from(n))}}const Rt="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",Ft=B(["left","right","center","inside","outside"]),Pt=B(["top","bottom","center","inside","outside"]),$t=B(["page","character","column","margin","leftMargin","rightMargin","insideMargin","outsideMargin"]),Nt=B(["page","paragraph","margin","topMargin","bottomMargin","insideMargin","outsideMargin","line"]);function At(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,i=n.attributes.b;if(!(!t&&!e&&!r&&!i))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:i?parseInt(i,10):void 0}}function Bt(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const o=t.children[0].value;Ft(o)&&(r=o)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function Ot(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const o=t.children[0].value;Pt(o)&&(r=o)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function Q(n){let t=a(n,"w:drawing");if(t)return t;const e=a(n,"mc:AlternateContent"),r=e&&a(e,"mc:Choice");return r?a(r,"w:drawing"):null}function Y(n,t,e,r){const i=e/r,o=n/t;return Math.abs(i-o)>.1?i>o?{width:n,height:Math.round(n/i)}:{width:Math.round(t*i),height:t}:{width:n,height:t}}async function Dt(n,t){const e=new Map,r=n["word/_rels/document.xml.rels"];if(!r)return e;const i=F(new TextDecoder().decode(r)),o=a(i,"Relationships");if(!o)return e;const s=x(o,"Relationship");for(const c of s)if(c.attributes.Type===Rt&&c.attributes.Id&&c.attributes.Target){const l="word/"+c.attributes.Target,u=n[l];if(!u)continue;let p,w,f="png";try{const h=ht(u);p=h.width,w=h.height,h.type&&(f=h.type)}catch{}let d;if(t)d=(await t({id:c.attributes.Id,contentType:`image/${f}`,data:u})).src;else{const h=G(u);d=`data:image/${f};base64,${h}`}e.set(c.attributes.Id,{src:d,width:p,height:w})}return e}async function W(n,t){const{context:e}=t,r=b(n,"a:blip");if(!r?.attributes["r:embed"])return null;const i=r.attributes["r:embed"],o=e.images.get(i);if(!o)return null;let s=o.src;const c=b(n,"a:srcRect");if(c){const g=At(c);if(g&&s.startsWith("data:")){const[m,C]=s.split(",");if(C){const L=yt(C);try{const R=await Tt(L,g,{canvasImport:e.image?.canvasImport,enabled:e.image?.enableImageCrop??!1}),S=G(R);s=`${m},${S}`}catch(R){console.warn("Image cropping failed, using original image:",R)}}}}const l=b(n,"wp:extent");let u,p;if(l){const g=l.attributes.cx,m=l.attributes.cy;typeof g=="string"&&(u=A(g)),typeof m=="string"&&(p=A(m))}const w=b(n,"a:xfrm");let f;if(w?.attributes.rot){const g=parseInt(w.attributes.rot,10);isNaN(g)||(f=g/6e4)}const d=b(n,"wp:docPr")?.attributes.title,h=b(n,"wp:positionH"),y=b(n,"wp:positionV");let I;if(h||y){const g=h?Bt(h):void 0,m=y?Ot(y):void 0,C=h?.attributes.relativeFrom,L=y?.attributes.relativeFrom,R=typeof C=="string"&&$t(C)?C:"page",S=typeof L=="string"&&Nt(L)?L:"page";I={horizontalPosition:{relative:R,...g?.align&&{align:g.align},...g?.offset!==void 0&&{offset:g.offset}},verticalPosition:{relative:S,...m?.align&&{align:m.align},...m?.offset!==void 0&&{offset:m.offset}}}}const v=b(n,"pic:spPr");let k;if(v){const g=b(v,"a:ln"),m=g&&b(g,"a:solidFill"),C=m&&b(m,"a:srgbClr");C?.attributes.val&&(k={type:"solidFill",solidFillType:"rgb",value:C.attributes.val})}return{type:"image",attrs:{src:s,alt:"",...u!==void 0&&{width:u},...p!==void 0&&{height:p},...f!==void 0&&{rotation:f},...d&&{title:d},...I&&{floating:I},...k&&{outline:k}}}}function Et(n,t,e){if(t&&e&&n.width&&n.height){const r=Y(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function Z(n,t){const e=[],r=a(n,"wp:inline")||a(n,"wp:anchor");if(!r)return e;const i=a(r,"wp:extent");let o,s;if(i){const p=i.attributes.cx,w=i.attributes.cy;typeof p=="string"&&(o=A(p)),typeof w=="string"&&(s=A(w))}const c=a(r,"a:graphic");if(!c)return e;const l=a(c,"a:graphicData");if(!l)return e;const u=a(l,"wpg:wgp");if(u){const p=a(u,"wpg:grpSp"),w=p?[...x(p,"pic:pic"),...x(p,"pic")]:[...x(u,"pic:pic"),...x(u,"pic")];for(const f of w){const d=a(f,"a:graphic");if(!d){const v=a(f,"pic:blipFill")||b(f,"a:blipFill");if(!v)continue;const k=a(v,"a:blip")||b(v,"a:blip");if(!k?.attributes["r:embed"])continue;const g=k.attributes["r:embed"],m=t.context.images.get(g);if(!m)continue;e.push(Et(m,o,s));continue}const h={children:[d]},y=await W(h,t);if(!y)continue;const I=h.children[0]?.type==="element"?b(h.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(o&&s&&I){const v=t.context.images.get(I);if(v?.width&&v?.height){const k=Y(o,s,v.width,v.height);y.attrs.width=k.width,y.attrs.height=k.height}else y.attrs.width=o,y.attrs.height=s}e.push(y)}}else{const p=await W(n,t);p&&e.push(p)}return e}const St="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function zt(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=F(new TextDecoder().decode(e)),i=a(r,"Relationships");if(!i)return t;const o=x(i,"Relationship");for(const s of o)s.attributes.Type===St&&s.attributes.Id&&s.attributes.Target&&t.set(s.attributes.Id,s.attributes.Target);return t}function Wt(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const i=F(new TextDecoder().decode(r)),o=new Map,s=a(i,"w:numbering");if(!s)return t;const c=x(s,"w:abstractNum");for(const u of c){const p=u.attributes["w:abstractNumId"],w=a(u,"w:lvl");if(!w)continue;const f=a(w,"w:numFmt");f?.attributes["w:val"]&&o.set(p,f.attributes["w:val"]);const d=a(w,"w:start");d?.attributes["w:val"]&&e.set(p,parseInt(d.attributes["w:val"],10))}const l=x(s,"w:num");for(const u of l){const p=u.attributes["w:numId"],w=a(u,"w:abstractNumId");if(!w?.attributes["w:val"])continue;const f=w.attributes["w:val"],d=o.get(f);if(!d)continue;const h=e.get(f);d==="bullet"?t.set(p,{type:"bullet"}):t.set(p,{type:"ordered",...h!==void 0&&{start:h}})}return t}function _t(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=F(new TextDecoder().decode(e)),i=a(r,"w:styles");if(!i)return t;const o=x(i,"w:style").filter(s=>s.attributes["w:type"]==="paragraph");for(const s of o){const c=s.attributes["w:styleId"];if(!c)continue;const l={styleId:c},u=a(s,"w:name");u?.attributes["w:val"]&&(l.name=u.attributes["w:val"]);const p=a(s,"w:pPr");if(p){const f=a(p,"w:outlineLvl");f?.attributes["w:val"]!==void 0&&(l.outlineLvl=parseInt(f.attributes["w:val"],10))}const w=a(s,"w:rPr");if(w){const f={},d=a(w,"w:color");if(d?.attributes["w:val"]&&d.attributes["w:val"]!=="auto"){const I=d.attributes["w:val"];f.color=I.startsWith("#")?I:`#${I}`}a(w,"w:b")&&(f.bold=!0),a(w,"w:i")&&(f.italic=!0),a(w,"w:u")&&(f.underline=!0),a(w,"w:strike")&&(f.strike=!0);const h=a(w,"w:sz");if(h?.attributes["w:val"]){const I=h.attributes["w:val"],v=parseInt(I,10);isNaN(v)||(f.fontSize=v)}const y=a(w,"w:rFonts");y?.attributes["w:ascii"]&&(f.fontFamily=y.attributes["w:ascii"]),Object.keys(f).length>0&&(l.charFormat=f)}t.set(c,l)}return t}function tt(n,t){const e=a(n,"w:t");if(!e)return null;const r=e.children.find(o=>o.type==="text");if(!r?.value)return null;const i=_(n,t);return{type:"text",text:r.value,...i.length&&{marks:i}}}async function D(n,t){const{context:e,styleInfo:r}=t,i=[];for(const o of n.children)if(o.type==="element"){if(o.name==="w:hyperlink"){const s=o,c=s.attributes["r:id"],l=e.hyperlinks.get(c);if(!l)continue;for(const u of s.children){if(u.type!=="element"||u.name!=="w:r")continue;const p=u,w=Q(p);if(w){const d=await W(w,{context:e});if(d){i.push(d);continue}const h=await Z(w,{context:e});if(h.length){i.push(...h);continue}}const f=tt(p,r);f&&(f.marks=f.marks||[],f.marks.push({type:"link",attrs:{href:l}}),i.push(f))}}else if(o.name==="w:r"){const s=o,c=Q(s);if(c){const u=await Z(c,{context:e});if(u.length){i.push(...u);continue}}if(a(s,"w:br")){const u=_(s,r);i.push({type:"hardBreak",...u.length&&{marks:u}})}const l=tt(s,r);l&&i.push(l)}}return i}function _(n,t){const e=[],r=a(n,"w:rPr");let i={};if(t?.charFormat&&(i={...t.charFormat}),r){const o=a(r,"w:b");o&&(o.attributes["w:val"]==="false"?i.bold=!1:i.bold=!0);const s=a(r,"w:i");s&&(s.attributes["w:val"]==="false"?i.italic=!1:i.italic=!0),a(r,"w:u")&&(i.underline=!0),a(r,"w:strike")&&(i.strike=!0);const c=a(r,"w:color");if(c?.attributes["w:val"]&&c.attributes["w:val"]!=="auto"){const f=c.attributes["w:val"];i.color=f.startsWith("#")?f:`#${f}`}const l=a(r,"w:sz");if(l?.attributes["w:val"]){const f=l.attributes["w:val"],d=parseInt(f,10);isNaN(d)||(i.fontSize=d)}const u=a(r,"w:rFonts");u?.attributes["w:ascii"]&&(i.fontFamily=u.attributes["w:ascii"]);const p=a(r,"w:shd");if(p?.attributes["w:fill"]&&p.attributes["w:fill"]!=="auto"){const f=p.attributes["w:fill"];i.backgroundColor=f.startsWith("#")?f:`#${f}`}a(r,"w:highlight")&&e.push({type:"highlight"});const w=a(r,"w:vertAlign");if(w){const f=w.attributes["w:val"];f==="subscript"?e.push({type:"subscript"}):f==="superscript"&&e.push({type:"superscript"})}}if(i.bold&&e.push({type:"bold"}),i.italic&&e.push({type:"italic"}),i.underline&&e.push({type:"underline"}),i.strike&&e.push({type:"strike"}),i.color||i.backgroundColor||i.fontSize||i.fontFamily){const o={color:i.color||"",backgroundColor:i.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(i.fontSize){const s=Math.round(i.fontSize*gt*10)/10;o.fontSize=`${s}px`}i.fontFamily&&(o.fontFamily=i.fontFamily),e.push({type:"textStyle",attrs:o})}return e}function j(n){const t=a(n,"w:pPr");if(!t)return;const e=a(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],i=mt.docxToTipTap[r];return i?{textAlign:i}:void 0}function et(n){const t=a(n,"w:pPr");if(!t)return null;const e={},r=a(t,"w:ind");if(r){const o=T(r.attributes,"w:left");if(o){const l=parseInt(o,10);e.indentLeft=M(l)}const s=T(r.attributes,"w:right");if(s){const l=parseInt(s,10);e.indentRight=M(l)}const c=T(r.attributes,"w:firstLine");if(c){const l=parseInt(c,10);e.indentFirstLine=M(l)}else{const l=T(r.attributes,"w:hanging");if(l){const u=o?parseInt(o,10):0,p=parseInt(l,10),w=u-p;e.indentFirstLine=M(w)}}}const i=a(t,"w:spacing");if(i){const o=T(i.attributes,"w:before");if(o){const c=parseInt(o,10);e.spacingBefore=M(c)}const s=T(i.attributes,"w:after");if(s){const c=parseInt(s,10);e.spacingAfter=M(c)}}return Object.keys(e).length?e:null}async function $(n,t){const{context:e,styleInfo:r}=t,i=a(n,"w:pPr"),o=(i&&a(i,"w:pStyle"))?.attributes["w:val"];if(o&&e.styleMap){const u=e.styleMap.get(o);if(u?.outlineLvl!==void 0&&u.outlineLvl>=0&&u.outlineLvl<=5){const w=u.outlineLvl+1;return nt(n,t,u,w)}const p=o.match(/^Heading(\d+)$/);if(p){const w=parseInt(p[1],10);return nt(n,t,u,w)}}const s=o&&e.styleMap?e.styleMap.get(o):void 0,c=await D(n,{context:e,styleInfo:r||s}),l={...j(n),...et(n)};if(jt(n)){const u=c.filter(p=>p.type!=="hardBreak");return[{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:u.length?u:void 0},{type:"horizontalRule"}]}if(c.length===1&&c[0].type==="hardBreak"){const u=a(n,"w:r");if((u&&a(u,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}if(c.length===1&&c[0].type==="image"){const u=c[0];return{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:[u]}}return{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:c}}function jt(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const i of r.children)i.type==="element"&&e(i)};return e(n),t.some(r=>a(r,"w:br")?.attributes["w:type"]==="page")}async function nt(n,t,e,r){return{type:"heading",attrs:{level:r,...et(n)},content:await D(n,{context:t.context,styleInfo:e})}}function E(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],i={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},o={};if(r&&r!=="auto"&&(o.color=`#${r}`),e){const s=parseInt(e);isNaN(s)||(o.width=Math.round(s/6))}return t&&i[t]&&(o.style=i[t]),Object.keys(o).length>0?o:null}function Ht(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=a(n,"w:tblPr");if(!e)return null;const r=a(e,"w:tblCellMar");if(!r)return null;const i=a(r,"w:top");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]);isNaN(l)||(t.marginTop=l)}const o=a(r,"w:bottom");if(o?.attributes["w:w"]){const l=parseInt(o.attributes["w:w"]);isNaN(l)||(t.marginBottom=l)}const s=a(r,"w:left");if(s?.attributes["w:w"]){const l=parseInt(s.attributes["w:w"]);isNaN(l)||(t.marginLeft=l)}const c=a(r,"w:right");if(c?.attributes["w:w"]){const l=parseInt(c.attributes["w:w"]);isNaN(l)||(t.marginRight=l)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function Ut(n){const t={rowHeight:null},e=a(n,"w:trPr");if(!e)return t;const r=a(e,"w:trHeight");if(r?.attributes["w:val"]){const i=parseInt(r.attributes["w:val"]),o=z(i);t.rowHeight=`${o}px`}return t}function rt(n){const t={colspan:1,rowspan:1,colwidth:null},e=a(n,"w:tcPr");if(!e)return t;const r=a(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colspan=parseInt(r.attributes["w:val"])),a(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowspan=0);const i=a(e,"w:tcW");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]),u=z(l);t.colwidth=[u]}const o=a(e,"w:shd");o?.attributes["w:fill"]&&(t.backgroundColor=`#${o.attributes["w:fill"]}`);const s=a(e,"w:vAlign");s?.attributes["w:val"]&&(t.verticalAlign=s.attributes["w:val"]);const c=a(e,"w:tcBorders");if(c){const l=E(a(c,"w:top"));l&&(t.borderTop=l);const u=E(a(c,"w:bottom"));u&&(t.borderBottom=u);const p=E(a(c,"w:left"));p&&(t.borderLeft=p);const w=E(a(c,"w:right"));w&&(t.borderRight=w)}return t}function Xt(n){return n.name==="w:tbl"}async function it(n,t){const e=[];for(const s of n.children)s.type==="element"&&s.name==="w:tr"&&e.push(s);const r=new Map,i=await Promise.all(e.map((s,c)=>Kt(s,{context:t.context,activeRowspans:r,rows:e,rowIndex:c}))),o=Ht(n);return{type:"table",...o&&{attrs:o},content:i}}async function Kt(n,t){const e=[];let r=0;const i=Ut(n);for(const o of n.children){if(o.type!=="element"||o.name!=="w:tc")continue;const s=t.activeRowspans.get(r);if(s&&s>0){t.activeRowspans.set(r,s-1),r++;continue}let c=rt(o);if(c?.rowspan===1){const u=qt({rows:t.rows,rowIndex:t.rowIndex,colIndex:r});u>1&&(c={...c,rowspan:u})}if(c?.rowspan&&c.rowspan>1&&t.activeRowspans.set(r,c.rowspan-1),c?.rowspan===0){r++;continue}const l=await Gt(o,t);e.push({type:"tableCell",...c&&{attrs:c},content:l}),r+=c?.colspan||1}return{type:"tableRow",...i&&{attrs:i},content:e}}function qt(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const i=n.rows[r];let o=!1;for(const s of i.children){if(s.type!=="element"||s.name!=="w:tc")continue;const c=rt(s),l=c?.colspan||1;if(e>=0&&e<l){if(c?.rowspan===0)t++,o=!0;else return t;break}e-=l}if(!o)break}return t}async function Gt(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const i=await $(r,t);Array.isArray(i)?e.push(...i):e.push(i)}return e.length?e:[{type:"paragraph",content:[]}]}const ot="\u2610",H="\u2611";function st(n){const t=a(n,"w:r");if(!t)return null;const e=a(t,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");return r?.value&&r||null}function U(n){const t=st(n);if(!t)return!1;const e=t.value;return e.startsWith(ot)||e.startsWith(H)}function at(n){return st(n)?.value.startsWith(H)||!1}async function ct(n,t){return{type:"taskItem",attrs:{checked:at(n)},content:[await Vt(n,t)]}}async function lt(n,t){const{siblings:e,index:r,processedIndices:i}=t,o=[];let s=r;for(;s<e.length;){const c=e[s];if(c.name!=="w:p"||!U(c))break;i.add(s);const l=await ct(c,{context:t.context,styleInfo:t.styleInfo});o.push(l),s++}return{type:"taskList",content:o}}async function Vt(n,t){const{context:e,styleInfo:r}=t,i=await D(n,{context:e,styleInfo:r});if(i.length>0&&i[0].type==="text"){const s=i[0],c=s.text;if(c.startsWith(ot)||c.startsWith(H)){const l=c.substring(2).trimStart();l?s.text=l:i.shift()}}const o=j(n);return{type:"paragraph",...o&&{attrs:o},content:i.length?i:void 0}}function ut(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e===N.CODE_BLOCK||e?.startsWith(N.CODE_PREFIX)||!1}function ft(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith(N.CODE_BLOCK)&&e.replace(N.CODE_BLOCK,"").toLowerCase()||void 0}function X(n){const t=a(n,"w:pPr");return!!t&&a(t,"w:numPr")!==void 0}function K(n){const t=a(n,"w:pPr"),e=t&&a(t,"w:numPr");if(!e)return null;const r=a(e,"w:ilvl"),i=a(e,"w:numId");return!r||!i?null:{numId:i.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function pt(n){const t=a(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const i of t.children)i.type==="element"&&(i.name==="w:br"&&i.attributes["w:type"]==="page"?e=!0:i.name==="w:t"?i.children.find(o=>o.type==="text")?.value?.trim().length&&(r=!0):i.name!=="w:rPr"&&(r=!0));return e&&!r}async function Jt(n,t={}){const e=await dt(n),r=wt(e),i=zt(r),o=await Dt(r,t.image?.handler),s=r["word/document.xml"];if(!s)throw new Error("Invalid DOCX file: missing word/document.xml");const c=F(new TextDecoder().decode(s)),l=Wt(r),u=_t(r),p={...t,hyperlinks:i,images:o,listTypeMap:l,styleMap:u};return await Qt(c,{context:p})}async function Qt(n,t){if(n.type!=="root")return{type:"doc",content:[]};const e=a(n,"w:document");if(!e)return{type:"doc",content:[]};const r=a(e,"w:body");return r?{type:"doc",content:await Yt(r.children.filter(i=>i.type==="element"),t)}:{type:"doc",content:[]}}async function Yt(n,t){const e=[],r=new Set;for(let i=0;i<n.length;i++){if(r.has(i))continue;const o=n[i];if(t.context.ignoreEmptyParagraphs&&o.name==="w:p"&&re(o))continue;const s=await Zt(o,n,i,t,r);Array.isArray(s)?e.push(...s):s&&e.push(s)}return e}async function Zt(n,t,e,r,i){switch(n.name){case"w:tbl":return await it(n,r);case"w:p":return ut(n)?await te(n):U(n)?await lt(n,{...r,siblings:t,index:e,processedIndices:i}):X(n)?await ee(n,t,e,r,i):pt(n)?{type:"horizontalRule"}:await $(n,r);default:return null}}async function te(n){const t=ft(n),e=ne(n);return{type:"codeBlock",...t&&{attrs:{language:t}},content:e}}async function ee(n,t,e,r,i){const o=K(n);if(!o)return await $(n,r);const s=r.context.listTypeMap.get(o.numId),c=s?.type||"bullet",l=[];let u=e;for(;u<t.length;){const w=t[u];if(w.name!=="w:p"||!X(w))break;const f=K(w);if(!f||f.numId!==o.numId)break;i.add(u);const d=await $(w,r),h=Array.isArray(d)?d[0]:d;l.push({type:"listItem",content:[h]}),u++}const p={type:c==="bullet"?"bulletList":"orderedList",content:l};return c==="ordered"&&(p.attrs={type:null,...s?.start!==void 0&&{start:s.start}}),p}function ne(n){const t=[],e=x(n,"w:r");for(const r of e){const i=a(r,"w:t");if(!i)continue;const o=i.children.find(s=>s.type==="text");o&&"value"in o&&o.value&&t.push({type:"text",text:o.value})}return t}function re(n){const t=x(n,"w:r");for(const e of t){const r=a(e,"w:t");if(r){const o=r.children.find(s=>s.type==="text");if(o&&"value"in o&&o.value&&o.value.trim().length>0)return!1}if(a(e,"w:drawing")||a(e,"mc:AlternateContent")||a(e,"w:pict"))return!1;const i=a(e,"w:br");if(i&&i.attributes["w:type"]==="page")return!1}return!0}export{$ as convertParagraph,it as convertTable,ct as convertTaskItem,lt as convertTaskList,j as extractAlignment,_ as extractMarks,D as extractRuns,ft as getCodeBlockLanguage,K as getListInfo,at as getTaskItemChecked,ut as isCodeBlock,pt as isHorizontalRule,X as isListItem,Xt as isTable,U as isTaskItem,Jt as parseDOCX};
1
+ import { fromXml } from "xast-util-from-xml";
2
+ import { unzipSync } from "fflate";
3
+ import { toUint8Array } from "undio";
4
+ import { imageMeta } from "image-meta";
5
+ //#region ../utils/dist/index.mjs
6
+ /**
7
+ * TWIP (Twentieth of a Point) conversion constants
8
+ * 1 inch = 1440 TWIPs
9
+ */
10
+ const TWIPS_PER_INCH = 1440;
11
+ /**
12
+ * EMU (English Metric Unit) conversion constants
13
+ * 1 inch = 914400 EMUs
14
+ */
15
+ const EMUS_PER_INCH = 914400;
16
+ const PIXELS_PER_HALF_POINT = 1 / 1.5;
17
+ /**
18
+ * DOCX style names
19
+ */
20
+ const DOCX_STYLE_NAMES = {
21
+ CODE_BLOCK: "CodeBlock",
22
+ CODE_PREFIX: "Code"
23
+ };
24
+ /**
25
+ * Text alignment mappings
26
+ */
27
+ const TEXT_ALIGN_MAP = {
28
+ tiptapToDocx: {
29
+ left: "left",
30
+ right: "right",
31
+ center: "center",
32
+ justify: "both"
33
+ },
34
+ docxToTipTap: {
35
+ left: "left",
36
+ right: "right",
37
+ center: "center",
38
+ both: "justify"
39
+ }
40
+ };
41
+ /**
42
+ * Unit conversion utilities for DOCX processing
43
+ * Handles conversions between TWIPs, EMUs, pixels, and other units
44
+ */
45
+ const PIXELS_PER_INCH = 96;
46
+ /**
47
+ * Convert TWIPs to CSS pixels (returns number)
48
+ * @param twip - Value in TWIPs (1 inch = 1440 TWIPs)
49
+ * @returns Number value in pixels
50
+ *
51
+ * @example
52
+ * convertTwipToPixels(1440) // returns 96
53
+ */
54
+ function convertTwipToPixels(twip) {
55
+ return Math.round(twip * PIXELS_PER_INCH / TWIPS_PER_INCH);
56
+ }
57
+ /**
58
+ * Convert TWIPs to CSS string (returns "px" string)
59
+ * @param twip - Value in TWIPs
60
+ * @returns CSS value string in pixels (e.g., "20px")
61
+ *
62
+ * @example
63
+ * convertTwipToCssString(1440) // returns "96px"
64
+ */
65
+ function convertTwipToCssString(twip) {
66
+ return `${convertTwipToPixels(twip)}px`;
67
+ }
68
+ /**
69
+ * Convert EMUs to pixels
70
+ * EMU = English Metric Unit (1 inch = 914400 EMUs)
71
+ * @param emu - Value in EMUs
72
+ * @returns Value in pixels
73
+ *
74
+ * @example
75
+ * convertEmuToPixels(914400) // returns 96
76
+ */
77
+ function convertEmuToPixels(emu) {
78
+ return Math.round(emu / (EMUS_PER_INCH / PIXELS_PER_INCH));
79
+ }
80
+ /**
81
+ * Convert EMU string to pixels
82
+ * @param emuStr - EMU value as string
83
+ * @returns Pixel value or undefined if invalid
84
+ *
85
+ * @example
86
+ * convertEmuStringToPixels("914400") // returns 96
87
+ * convertEmuStringToPixels("invalid") // returns undefined
88
+ */
89
+ function convertEmuStringToPixels(emuStr) {
90
+ const emu = parseInt(emuStr, 10);
91
+ if (isNaN(emu)) return void 0;
92
+ return convertEmuToPixels(emu);
93
+ }
94
+ /**
95
+ * Find direct child element with specified name
96
+ * @param node - Parent XML element or root node
97
+ * @param name - Child element name to find (can include namespace prefix, e.g., "w:p")
98
+ * @returns Child element if found, null otherwise
99
+ *
100
+ * @example
101
+ * const paragraph = findChild(document, "w:p");
102
+ */
103
+ function findChild(node, name) {
104
+ if (!node.children) return null;
105
+ for (const child of node.children) if (child.type === "element" && child.name === name) return child;
106
+ return null;
107
+ }
108
+ /**
109
+ * Find deep descendant element with specified name (recursive)
110
+ * Searches through all descendants, not just direct children
111
+ * @param node - Root XML element
112
+ * @param name - Descendant element name to find
113
+ * @returns Descendant element if found, null otherwise
114
+ *
115
+ * @example
116
+ * const textElement = findDeepChild(run, "w:t");
117
+ */
118
+ function findDeepChild(node, name) {
119
+ if (!node.children) return null;
120
+ for (const child of node.children) if (child.type === "element") {
121
+ if (child.name === name) return child;
122
+ const found = findDeepChild(child, name);
123
+ if (found) return found;
124
+ }
125
+ return null;
126
+ }
127
+ /**
128
+ * Find all deep descendant elements with specified name (recursive)
129
+ * @param node - Root XML element
130
+ * @param name - Descendant element name to find
131
+ * @returns Array of matching descendant elements
132
+ *
133
+ * @example
134
+ * const allTextRuns = findDeepChildren(paragraph, "w:r");
135
+ */
136
+ function findDeepChildren(node, name) {
137
+ const results = [];
138
+ if (!node.children) return results;
139
+ for (const child of node.children) if (child.type === "element") {
140
+ if (child.name === name) results.push(child);
141
+ results.push(...findDeepChildren(child, name));
142
+ }
143
+ return results;
144
+ }
145
+ /**
146
+ * Parse TWIP attribute value from element attributes
147
+ * TWIP = Twentieth of a Point (1 inch = 1440 TWIPs)
148
+ * @param attributes - Element attributes object
149
+ * @param name - Attribute name to parse
150
+ * @returns TWIP value as string, or undefined if not found
151
+ *
152
+ * @example
153
+ * const leftIndent = parseTwipAttr(pPr.attributes, "w:left");
154
+ */
155
+ function parseTwipAttr(attributes, name) {
156
+ const value = attributes[name];
157
+ if (!value) return void 0;
158
+ const num = parseInt(value, 10);
159
+ if (isNaN(num)) return void 0;
160
+ return value;
161
+ }
162
+ /**
163
+ * Type guard utilities for DOCX processing
164
+ */
165
+ /**
166
+ * Type guard factory function
167
+ * Creates a type guard function that checks if a value is one of the valid values
168
+ *
169
+ * @param validValues - Readonly array of valid string values
170
+ * @returns Type guard function
171
+ *
172
+ * @example
173
+ * const isValidAlign = createStringValidator(["left", "right", "center"] as const);
174
+ * if (isValidAlign(value)) {
175
+ * // value is typed as "left" | "right" | "center"
176
+ * }
177
+ */
178
+ function createStringValidator(validValues) {
179
+ return (value) => {
180
+ return validValues.includes(value);
181
+ };
182
+ }
183
+ //#endregion
184
+ //#region src/utils/base64.ts
185
+ /**
186
+ * Base64 encoding utilities
187
+ */
188
+ /**
189
+ * Base64 lookup table for fast encoding
190
+ */
191
+ const BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
192
+ /**
193
+ * Convert Uint8Array to base64 string using lookup table and bitwise operations
194
+ * Similar to base64-arraybuffer implementation but without external dependencies
195
+ * Performance: O(n) time complexity, no stack overflow risk
196
+ *
197
+ * @param bytes - Uint8Array to encode
198
+ * @returns Base64 encoded string
199
+ */
200
+ function uint8ArrayToBase64(bytes) {
201
+ const len = bytes.length;
202
+ const resultLen = Math.ceil(len / 3) * 4;
203
+ const result = Array.from({ length: resultLen });
204
+ let resultIndex = 0;
205
+ for (let i = 0; i < len; i += 3) {
206
+ const byte1 = bytes[i];
207
+ const byte2 = i + 1 < len ? bytes[i + 1] : 0;
208
+ const byte3 = i + 2 < len ? bytes[i + 2] : 0;
209
+ const index0 = byte1 >> 2;
210
+ const index1 = (byte1 & 3) << 4 | byte2 >> 4;
211
+ const index2 = (byte2 & 15) << 2 | byte3 >> 6;
212
+ const index3 = byte3 & 63;
213
+ result[resultIndex++] = BASE64_CHARS[index0];
214
+ result[resultIndex++] = BASE64_CHARS[index1];
215
+ result[resultIndex++] = i + 1 < len ? BASE64_CHARS[index2] : "=";
216
+ result[resultIndex++] = i + 2 < len ? BASE64_CHARS[index3] : "=";
217
+ }
218
+ return result.join("");
219
+ }
220
+ /**
221
+ * Convert base64 string to Uint8Array
222
+ *
223
+ * @param base64 - Base64 encoded string
224
+ * @returns Decoded data as Uint8Array
225
+ */
226
+ function base64ToUint8Array(base64) {
227
+ const binaryString = atob(base64);
228
+ const bytes = new Uint8Array(binaryString.length);
229
+ for (let i = 0; i < binaryString.length; i++) bytes[i] = binaryString.charCodeAt(i);
230
+ return bytes;
231
+ }
232
+ //#endregion
233
+ //#region src/utils/image.ts
234
+ /**
235
+ * Detect current environment
236
+ */
237
+ const isNode = globalThis.process?.release?.name === "node";
238
+ const isBrowser = typeof window !== "undefined";
239
+ /**
240
+ * Handle interop for module default exports (from unpdf)
241
+ */
242
+ async function interopDefault(m) {
243
+ const resolved = await m;
244
+ return resolved.default || resolved;
245
+ }
246
+ let resolvedCanvasModule;
247
+ /**
248
+ * Base canvas factory for cross-platform canvas creation
249
+ */
250
+ var BaseCanvasFactory = class {
251
+ #enableHWA = false;
252
+ constructor({ enableHWA = false } = {}) {
253
+ this.#enableHWA = enableHWA;
254
+ }
255
+ create(width, height) {
256
+ const canvas = this._createCanvas(width, height);
257
+ return {
258
+ canvas,
259
+ context: canvas.getContext("2d", { willReadFrequently: !this.#enableHWA })
260
+ };
261
+ }
262
+ reset({ canvas }, width, height) {
263
+ if (!canvas) throw new Error("Canvas is not specified");
264
+ canvas.width = width;
265
+ canvas.height = height;
266
+ }
267
+ destroy(context) {
268
+ if (!context.canvas) throw new Error("Canvas is not specified");
269
+ context.canvas.width = 0;
270
+ context.canvas.height = 0;
271
+ context.canvas = void 0;
272
+ context.context = void 0;
273
+ }
274
+ _createCanvas(width, height) {
275
+ throw new Error("Not implemented");
276
+ }
277
+ };
278
+ /**
279
+ * Browser canvas factory using native HTMLCanvasElement
280
+ */
281
+ var DOMCanvasFactory = class extends BaseCanvasFactory {
282
+ _document;
283
+ constructor({ ownerDocument = globalThis.document, enableHWA = false } = {}) {
284
+ super({ enableHWA });
285
+ this._document = ownerDocument;
286
+ }
287
+ _createCanvas(width, height) {
288
+ const canvas = this._document.createElement("canvas");
289
+ canvas.width = width;
290
+ canvas.height = height;
291
+ return canvas;
292
+ }
293
+ };
294
+ /**
295
+ * Node.js canvas factory using @napi-rs/canvas
296
+ */
297
+ var NodeCanvasFactory = class extends BaseCanvasFactory {
298
+ constructor({ enableHWA = false } = {}) {
299
+ super({ enableHWA });
300
+ }
301
+ _createCanvas(width, height) {
302
+ if (!resolvedCanvasModule) throw new Error("@napi-rs/canvas module is not resolved");
303
+ return resolvedCanvasModule.createCanvas(width, height);
304
+ }
305
+ };
306
+ /**
307
+ * Resolve canvas module (from unpdf)
308
+ */
309
+ async function resolveCanvasModule(canvasImport) {
310
+ resolvedCanvasModule ??= await interopDefault(canvasImport());
311
+ }
312
+ /**
313
+ * Create appropriate canvas factory for current environment
314
+ *
315
+ * @param canvasImport - Dynamic import function for @napi-rs/canvas (required in Node.js)
316
+ * @returns CanvasFactory instance
317
+ */
318
+ async function createCanvasFactory(canvasImport) {
319
+ if (isBrowser) return DOMCanvasFactory;
320
+ if (isNode) {
321
+ if (!canvasImport) throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");
322
+ await resolveCanvasModule(canvasImport);
323
+ return NodeCanvasFactory;
324
+ }
325
+ throw new Error("Unsupported environment for canvas operations");
326
+ }
327
+ /**
328
+ * Crop image if crop information is provided
329
+ *
330
+ * @param imageData - Original image data as Uint8Array
331
+ * @param crop - Crop rectangle (DOCX format: 0-100000)
332
+ * @param options - Cropping options
333
+ * @returns Cropped image data, or original if no crop or error occurs
334
+ */
335
+ async function cropImageIfNeeded(imageData, crop, options = {}) {
336
+ if (!crop || !crop.left && !crop.top && !crop.right && !crop.bottom) return imageData;
337
+ if (options.enabled === false) return imageData;
338
+ try {
339
+ const CanvasFactory = await createCanvasFactory(options.canvasImport);
340
+ const img = await loadImage(imageData, CanvasFactory);
341
+ const left = (crop.left || 0) / 1e5 * img.width;
342
+ const top = (crop.top || 0) / 1e5 * img.height;
343
+ const right = (crop.right || 0) / 1e5 * img.width;
344
+ const bottom = (crop.bottom || 0) / 1e5 * img.height;
345
+ const croppedWidth = Math.round(img.width - left - right);
346
+ const croppedHeight = Math.round(img.height - top - bottom);
347
+ if (croppedWidth <= 0 || croppedHeight <= 0) {
348
+ console.warn("Invalid crop dimensions, returning original image");
349
+ return imageData;
350
+ }
351
+ const drawingContext = new CanvasFactory().create(croppedWidth, croppedHeight);
352
+ if (!drawingContext.context) throw new Error("Failed to get 2D context from canvas");
353
+ drawingContext.context.drawImage(img, left, top, croppedWidth, croppedHeight, 0, 0, croppedWidth, croppedHeight);
354
+ const dataUrl = drawingContext.canvas.toDataURL();
355
+ const buffer = await (await fetch(dataUrl)).arrayBuffer();
356
+ return new Uint8Array(buffer);
357
+ } catch (error) {
358
+ console.warn("Image cropping failed, returning original image:", error);
359
+ return imageData;
360
+ }
361
+ }
362
+ /**
363
+ * Load image from buffer (environment-agnostic)
364
+ *
365
+ * @param data - Image data as Uint8Array
366
+ * @param _CanvasFactory - Canvas factory class (unused, for compatibility)
367
+ * @returns Loaded canvas image
368
+ */
369
+ async function loadImage(data, _CanvasFactory) {
370
+ if (isBrowser) {
371
+ const blob = new Blob([data.buffer]);
372
+ const url = URL.createObjectURL(blob);
373
+ try {
374
+ const img = new Image();
375
+ return new Promise((resolve, reject) => {
376
+ img.onload = () => {
377
+ URL.revokeObjectURL(url);
378
+ resolve(img);
379
+ };
380
+ img.onerror = () => {
381
+ URL.revokeObjectURL(url);
382
+ reject(/* @__PURE__ */ new Error("Failed to load image"));
383
+ };
384
+ img.src = url;
385
+ });
386
+ } catch (error) {
387
+ URL.revokeObjectURL(url);
388
+ throw error;
389
+ }
390
+ } else {
391
+ if (!resolvedCanvasModule) throw new Error("@napi-rs/canvas module is not resolved");
392
+ return await resolvedCanvasModule.loadImage(Buffer.from(data));
393
+ }
394
+ }
395
+ //#endregion
396
+ //#region src/parsers/images.ts
397
+ const IMAGE_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
398
+ /**
399
+ * Type guards for valid horizontal/vertical alignment values
400
+ */
401
+ const isValidHorizontalAlign = createStringValidator([
402
+ "left",
403
+ "right",
404
+ "center",
405
+ "inside",
406
+ "outside"
407
+ ]);
408
+ const isValidVerticalAlign = createStringValidator([
409
+ "top",
410
+ "bottom",
411
+ "center",
412
+ "inside",
413
+ "outside"
414
+ ]);
415
+ const isValidHorizontalRelative = createStringValidator([
416
+ "page",
417
+ "character",
418
+ "column",
419
+ "margin",
420
+ "leftMargin",
421
+ "rightMargin",
422
+ "insideMargin",
423
+ "outsideMargin"
424
+ ]);
425
+ const isValidVerticalRelative = createStringValidator([
426
+ "page",
427
+ "paragraph",
428
+ "margin",
429
+ "topMargin",
430
+ "bottomMargin",
431
+ "insideMargin",
432
+ "outsideMargin",
433
+ "line"
434
+ ]);
435
+ /**
436
+ * Extract crop rectangle from a:srcRect element
437
+ */
438
+ function extractCropRect(srcRect) {
439
+ const left = srcRect.attributes["l"];
440
+ const top = srcRect.attributes["t"];
441
+ const right = srcRect.attributes["r"];
442
+ const bottom = srcRect.attributes["b"];
443
+ if (!left && !top && !right && !bottom) return void 0;
444
+ return {
445
+ left: left ? parseInt(left, 10) : void 0,
446
+ top: top ? parseInt(top, 10) : void 0,
447
+ right: right ? parseInt(right, 10) : void 0,
448
+ bottom: bottom ? parseInt(bottom, 10) : void 0
449
+ };
450
+ }
451
+ /**
452
+ * Apply crop to image data and update dimensions
453
+ * Shared logic for both direct (no picGraphic) and synthetic drawing paths
454
+ */
455
+ async function applyCropToImage(pic, imgInfo, params) {
456
+ if (!findChild(pic, "pic:spPr") || !imgInfo.src.startsWith("data:")) return imgInfo;
457
+ const srcRect = findDeepChild(pic, "a:srcRect");
458
+ if (!srcRect) return imgInfo;
459
+ const crop = extractCropRect(srcRect);
460
+ if (!crop || !crop.left && !crop.top && !crop.right && !crop.bottom) return imgInfo;
461
+ try {
462
+ const [metadata, base64Data] = imgInfo.src.split(",");
463
+ if (!base64Data) return imgInfo;
464
+ const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
465
+ canvasImport: params.context.image?.canvasImport,
466
+ enabled: params.context.image?.enableImageCrop ?? false
467
+ }));
468
+ const originalWidth = imgInfo.width || 0;
469
+ const originalHeight = imgInfo.height || 0;
470
+ const cropLeftPct = (crop.left || 0) / 1e5;
471
+ const cropTopPct = (crop.top || 0) / 1e5;
472
+ const cropRightPct = (crop.right || 0) / 1e5;
473
+ const cropBottomPct = (crop.bottom || 0) / 1e5;
474
+ const visibleWidthPct = 1 - cropLeftPct - cropRightPct;
475
+ const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
476
+ const croppedWidth = Math.round(originalWidth * visibleWidthPct);
477
+ const croppedHeight = Math.round(originalHeight * visibleHeightPct);
478
+ return {
479
+ src: `${metadata},${croppedBase64}`,
480
+ width: croppedWidth,
481
+ height: croppedHeight
482
+ };
483
+ } catch (error) {
484
+ console.warn("Grouped image cropping failed, using original image:", error);
485
+ return imgInfo;
486
+ }
487
+ }
488
+ /**
489
+ * Extract horizontal position (align/offset) from position element
490
+ */
491
+ function extractHorizontalPosition(positionEl) {
492
+ const alignEl = findChild(positionEl, "wp:align");
493
+ const offsetEl = findChild(positionEl, "wp:posOffset");
494
+ let align;
495
+ if (alignEl?.children[0]?.type === "text") {
496
+ const value = alignEl.children[0].value;
497
+ if (isValidHorizontalAlign(value)) align = value;
498
+ }
499
+ const offset = offsetEl?.children[0]?.type === "text" ? parseInt(offsetEl.children[0].value, 10) : void 0;
500
+ if (!align && offset === void 0) return void 0;
501
+ return {
502
+ ...align && { align },
503
+ ...offset !== void 0 && { offset }
504
+ };
505
+ }
506
+ /**
507
+ * Extract vertical position (align/offset) from position element
508
+ */
509
+ function extractVerticalPosition(positionEl) {
510
+ const alignEl = findChild(positionEl, "wp:align");
511
+ const offsetEl = findChild(positionEl, "wp:posOffset");
512
+ let align;
513
+ if (alignEl?.children[0]?.type === "text") {
514
+ const value = alignEl.children[0].value;
515
+ if (isValidVerticalAlign(value)) align = value;
516
+ }
517
+ const offset = offsetEl?.children[0]?.type === "text" ? parseInt(offsetEl.children[0].value, 10) : void 0;
518
+ if (!align && offset === void 0) return void 0;
519
+ return {
520
+ ...align && { align },
521
+ ...offset !== void 0 && { offset }
522
+ };
523
+ }
524
+ /**
525
+ * Find drawing element (handles both direct and mc:AlternateContent wrapping)
526
+ */
527
+ function findDrawingElement(run) {
528
+ let drawing = findChild(run, "w:drawing");
529
+ if (drawing) return drawing;
530
+ const altContent = findChild(run, "mc:AlternateContent");
531
+ const choice = altContent && findChild(altContent, "mc:Choice");
532
+ return choice ? findChild(choice, "w:drawing") : null;
533
+ }
534
+ /**
535
+ * Adjust image dimensions to fit within group bounds while preserving aspect ratio
536
+ */
537
+ function fitToGroup(groupWidth, groupHeight, metaWidth, metaHeight) {
538
+ const metaRatio = metaWidth / metaHeight;
539
+ const groupRatio = groupWidth / groupHeight;
540
+ if (Math.abs(metaRatio - groupRatio) > .1) if (metaRatio > groupRatio) return {
541
+ width: groupWidth,
542
+ height: Math.round(groupWidth / metaRatio)
543
+ };
544
+ else return {
545
+ width: Math.round(groupHeight * metaRatio),
546
+ height: groupHeight
547
+ };
548
+ return {
549
+ width: groupWidth,
550
+ height: groupHeight
551
+ };
552
+ }
553
+ /**
554
+ * Extract images from DOCX and convert to base64 data URLs or use custom handler
555
+ * Returns Map of relationship ID to image info (src + dimensions)
556
+ */
557
+ async function extractImages(files, handler) {
558
+ const images = /* @__PURE__ */ new Map();
559
+ const relsXml = files["word/_rels/document.xml.rels"];
560
+ if (!relsXml) return images;
561
+ const relationships = findChild(fromXml(new TextDecoder().decode(relsXml)), "Relationships");
562
+ if (!relationships) return images;
563
+ const rels = findDeepChildren(relationships, "Relationship");
564
+ for (const rel of rels) if (rel.attributes.Type === IMAGE_REL_TYPE && rel.attributes.Id && rel.attributes.Target) {
565
+ const imageData = files["word/" + rel.attributes.Target];
566
+ if (!imageData) continue;
567
+ let width;
568
+ let height;
569
+ let imageType = "png";
570
+ try {
571
+ const meta = imageMeta(imageData);
572
+ width = meta.width;
573
+ height = meta.height;
574
+ if (meta.type) imageType = meta.type;
575
+ } catch {}
576
+ let src;
577
+ if (handler) src = (await handler({
578
+ id: rel.attributes.Id,
579
+ contentType: `image/${imageType}`,
580
+ data: imageData
581
+ })).src;
582
+ else {
583
+ const base64 = uint8ArrayToBase64(imageData);
584
+ src = `data:image/${imageType};base64,${base64}`;
585
+ }
586
+ images.set(rel.attributes.Id, {
587
+ src,
588
+ width,
589
+ height
590
+ });
591
+ }
592
+ return images;
593
+ }
594
+ /**
595
+ * Extract single image from a drawing element
596
+ * Returns TipTap image node or null
597
+ */
598
+ async function extractImageFromDrawing(drawing, params) {
599
+ const { context } = params;
600
+ const blip = findDeepChild(drawing, "a:blip");
601
+ if (!blip?.attributes["r:embed"]) return null;
602
+ const rId = blip.attributes["r:embed"];
603
+ const imgInfo = context.images.get(rId);
604
+ if (!imgInfo) return null;
605
+ let src = imgInfo.src;
606
+ const srcRect = findDeepChild(drawing, "a:srcRect");
607
+ if (srcRect) {
608
+ const crop = extractCropRect(srcRect);
609
+ if (crop && src.startsWith("data:")) {
610
+ const [metadata, base64Data] = src.split(",");
611
+ if (base64Data) {
612
+ const bytes = base64ToUint8Array(base64Data);
613
+ try {
614
+ src = `${metadata},${uint8ArrayToBase64(await cropImageIfNeeded(bytes, crop, {
615
+ canvasImport: context.image?.canvasImport,
616
+ enabled: context.image?.enableImageCrop ?? false
617
+ }))}`;
618
+ } catch (error) {
619
+ console.warn("Image cropping failed, using original image:", error);
620
+ }
621
+ }
622
+ }
623
+ }
624
+ const extent = findDeepChild(drawing, "wp:extent");
625
+ let width;
626
+ let height;
627
+ if (extent) {
628
+ const cx = extent.attributes["cx"];
629
+ const cy = extent.attributes["cy"];
630
+ if (typeof cx === "string") width = convertEmuStringToPixels(cx);
631
+ if (typeof cy === "string") height = convertEmuStringToPixels(cy);
632
+ }
633
+ const xfrm = findDeepChild(drawing, "a:xfrm");
634
+ let rotation;
635
+ if (xfrm?.attributes["rot"]) {
636
+ const rot = parseInt(xfrm.attributes["rot"], 10);
637
+ if (!isNaN(rot)) rotation = rot / 6e4;
638
+ }
639
+ const title = findDeepChild(drawing, "wp:docPr")?.attributes["title"];
640
+ const positionH = findDeepChild(drawing, "wp:positionH");
641
+ const positionV = findDeepChild(drawing, "wp:positionV");
642
+ let floating;
643
+ if (positionH || positionV) {
644
+ const hPos = positionH ? extractHorizontalPosition(positionH) : void 0;
645
+ const vPos = positionV ? extractVerticalPosition(positionV) : void 0;
646
+ const hRelative = positionH?.attributes["relativeFrom"];
647
+ const vRelative = positionV?.attributes["relativeFrom"];
648
+ const horizontalRelative = typeof hRelative === "string" && isValidHorizontalRelative(hRelative) ? hRelative : "page";
649
+ const verticalRelative = typeof vRelative === "string" && isValidVerticalRelative(vRelative) ? vRelative : "page";
650
+ floating = {
651
+ horizontalPosition: {
652
+ relative: horizontalRelative,
653
+ ...hPos?.align && { align: hPos.align },
654
+ ...hPos?.offset !== void 0 && { offset: hPos.offset }
655
+ },
656
+ verticalPosition: {
657
+ relative: verticalRelative,
658
+ ...vPos?.align && { align: vPos.align },
659
+ ...vPos?.offset !== void 0 && { offset: vPos.offset }
660
+ }
661
+ };
662
+ }
663
+ const spPr = findDeepChild(drawing, "pic:spPr");
664
+ let outline;
665
+ if (spPr) {
666
+ const ln = findDeepChild(spPr, "a:ln");
667
+ const solidFill = ln && findDeepChild(ln, "a:solidFill");
668
+ const srgbClr = solidFill && findDeepChild(solidFill, "a:srgbClr");
669
+ if (srgbClr?.attributes["val"]) outline = {
670
+ type: "solidFill",
671
+ solidFillType: "rgb",
672
+ value: srgbClr.attributes["val"]
673
+ };
674
+ }
675
+ return {
676
+ type: "image",
677
+ attrs: {
678
+ src,
679
+ alt: "",
680
+ ...width !== void 0 && { width },
681
+ ...height !== void 0 && { height },
682
+ ...rotation !== void 0 && { rotation },
683
+ ...title && { title },
684
+ ...floating && { floating },
685
+ ...outline && { outline }
686
+ }
687
+ };
688
+ }
689
+ /**
690
+ * Extract images from a drawing element
691
+ * Handles both single images and grouped images (<wpg:wgp>)
692
+ */
693
+ async function extractImagesFromDrawing(drawing, params) {
694
+ const result = [];
695
+ const inline = findChild(drawing, "wp:inline") || findChild(drawing, "wp:anchor");
696
+ if (!inline) return result;
697
+ const extent = findChild(inline, "wp:extent");
698
+ let groupWidth;
699
+ let groupHeight;
700
+ if (extent) {
701
+ const cx = extent.attributes["cx"];
702
+ const cy = extent.attributes["cy"];
703
+ if (typeof cx === "string") groupWidth = convertEmuStringToPixels(cx);
704
+ if (typeof cy === "string") groupHeight = convertEmuStringToPixels(cy);
705
+ }
706
+ const graphic = findChild(inline, "a:graphic");
707
+ if (!graphic) return result;
708
+ const graphicData = findChild(graphic, "a:graphicData");
709
+ if (!graphicData) return result;
710
+ const group = findChild(graphicData, "wpg:wgp");
711
+ if (group) {
712
+ const groupSp = findChild(group, "wpg:grpSp");
713
+ const pictures = groupSp ? [...findDeepChildren(groupSp, "pic:pic"), ...findDeepChildren(groupSp, "pic")] : [...findDeepChildren(group, "pic:pic"), ...findDeepChildren(group, "pic")];
714
+ for (const pic of pictures) {
715
+ const picGraphic = findChild(pic, "a:graphic");
716
+ if (!picGraphic) {
717
+ const blipFill = findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
718
+ if (!blipFill) continue;
719
+ const blip = findChild(blipFill, "a:blip") || findDeepChild(blipFill, "a:blip");
720
+ if (!blip?.attributes["r:embed"]) continue;
721
+ const rId = blip.attributes["r:embed"];
722
+ const imgInfo = params.context.images.get(rId);
723
+ if (!imgInfo) continue;
724
+ const processedImgInfo = await applyCropToImage(pic, imgInfo, params);
725
+ result.push({
726
+ type: "image",
727
+ attrs: {
728
+ src: processedImgInfo.src,
729
+ alt: "",
730
+ width: processedImgInfo.width,
731
+ height: processedImgInfo.height
732
+ }
733
+ });
734
+ continue;
735
+ }
736
+ const syntheticDrawing = {
737
+ type: "element",
738
+ name: "w:drawing",
739
+ children: [picGraphic],
740
+ attributes: {}
741
+ };
742
+ const image = await extractImageFromDrawing(syntheticDrawing, params);
743
+ if (!image) continue;
744
+ const srcRect = findChild(pic, "pic:spPr") ? findDeepChild(pic, "a:srcRect") : void 0;
745
+ const crop = srcRect && extractCropRect(srcRect) ? extractCropRect(srcRect) : void 0;
746
+ if (crop && (crop.left || crop.top || crop.right || crop.bottom) && image.attrs?.src?.startsWith("data:")) try {
747
+ const [metadata, base64Data] = image.attrs.src.split(",");
748
+ if (base64Data) {
749
+ const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
750
+ canvasImport: params.context.image?.canvasImport,
751
+ enabled: params.context.image?.enableImageCrop ?? false
752
+ }));
753
+ image.attrs.src = `${metadata},${croppedBase64}`;
754
+ const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
755
+ if (rId) {
756
+ const imgInfo = params.context.images.get(rId);
757
+ if (imgInfo?.width && imgInfo?.height) {
758
+ const cropLeftPct = (crop.left || 0) / 1e5;
759
+ const cropTopPct = (crop.top || 0) / 1e5;
760
+ const cropRightPct = (crop.right || 0) / 1e5;
761
+ const cropBottomPct = (crop.bottom || 0) / 1e5;
762
+ const visibleWidthPct = 1 - cropLeftPct - cropRightPct;
763
+ const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
764
+ const croppedWidth = Math.round(imgInfo.width * visibleWidthPct);
765
+ const croppedHeight = Math.round(imgInfo.height * visibleHeightPct);
766
+ image.attrs.width = croppedWidth;
767
+ image.attrs.height = croppedHeight;
768
+ }
769
+ }
770
+ }
771
+ } catch (error) {
772
+ console.warn("Grouped image cropping failed, using original image:", error);
773
+ }
774
+ else {
775
+ const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
776
+ if (groupWidth && groupHeight && rId) {
777
+ const imgInfo = params.context.images.get(rId);
778
+ if (imgInfo?.width && imgInfo?.height) {
779
+ const adjusted = fitToGroup(groupWidth, groupHeight, imgInfo.width, imgInfo.height);
780
+ image.attrs.width = adjusted.width;
781
+ image.attrs.height = adjusted.height;
782
+ } else {
783
+ image.attrs.width = groupWidth;
784
+ image.attrs.height = groupHeight;
785
+ }
786
+ }
787
+ }
788
+ result.push(image);
789
+ }
790
+ } else {
791
+ const image = await extractImageFromDrawing(drawing, params);
792
+ if (image) result.push(image);
793
+ }
794
+ return result;
795
+ }
796
+ //#endregion
797
+ //#region src/parsers/hyperlinks.ts
798
+ const HYPERLINK_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
799
+ /**
800
+ * Extract hyperlinks from DOCX relationships
801
+ * Returns Map of relationship ID to hyperlink target URL
802
+ */
803
+ function extractHyperlinks(files) {
804
+ const hyperlinks = /* @__PURE__ */ new Map();
805
+ const relsXml = files["word/_rels/document.xml.rels"];
806
+ if (!relsXml) return hyperlinks;
807
+ const relationships = findChild(fromXml(new TextDecoder().decode(relsXml)), "Relationships");
808
+ if (!relationships) return hyperlinks;
809
+ const rels = findDeepChildren(relationships, "Relationship");
810
+ for (const rel of rels) if (rel.attributes.Type === HYPERLINK_REL_TYPE && rel.attributes.Id && rel.attributes.Target) hyperlinks.set(rel.attributes.Id, rel.attributes.Target);
811
+ return hyperlinks;
812
+ }
813
+ //#endregion
814
+ //#region src/parsers/numbering.ts
815
+ /**
816
+ * Parse numbering.xml to build list type map
817
+ */
818
+ function parseNumberingXml(files) {
819
+ const listTypeMap = /* @__PURE__ */ new Map();
820
+ const abstractNumStarts = /* @__PURE__ */ new Map();
821
+ const numberingXml = files["word/numbering.xml"];
822
+ if (!numberingXml) return listTypeMap;
823
+ const numberingXast = fromXml(new TextDecoder().decode(numberingXml));
824
+ const abstractNumFormats = /* @__PURE__ */ new Map();
825
+ const numbering = findChild(numberingXast, "w:numbering");
826
+ if (!numbering) return listTypeMap;
827
+ const abstractNums = findDeepChildren(numbering, "w:abstractNum");
828
+ for (const abstractNum of abstractNums) {
829
+ const abstractNumId = abstractNum.attributes["w:abstractNumId"];
830
+ const lvl = findChild(abstractNum, "w:lvl");
831
+ if (!lvl) continue;
832
+ const numFmt = findChild(lvl, "w:numFmt");
833
+ if (numFmt?.attributes["w:val"]) abstractNumFormats.set(abstractNumId, numFmt.attributes["w:val"]);
834
+ const start = findChild(lvl, "w:start");
835
+ if (start?.attributes["w:val"]) abstractNumStarts.set(abstractNumId, parseInt(start.attributes["w:val"], 10));
836
+ }
837
+ const nums = findDeepChildren(numbering, "w:num");
838
+ for (const num of nums) {
839
+ const numId = num.attributes["w:numId"];
840
+ const abstractNumId = findChild(num, "w:abstractNumId");
841
+ if (!abstractNumId?.attributes["w:val"]) continue;
842
+ const abstractNumIdVal = abstractNumId.attributes["w:val"];
843
+ const numFmt = abstractNumFormats.get(abstractNumIdVal);
844
+ if (!numFmt) continue;
845
+ const start = abstractNumStarts.get(abstractNumIdVal);
846
+ if (numFmt === "bullet") listTypeMap.set(numId, { type: "bullet" });
847
+ else listTypeMap.set(numId, {
848
+ type: "ordered",
849
+ ...start !== void 0 && { start }
850
+ });
851
+ }
852
+ return listTypeMap;
853
+ }
854
+ //#endregion
855
+ //#region src/parsers/styles.ts
856
+ /**
857
+ * Parse styles.xml to build style map
858
+ * Extracts outlineLvl from paragraph styles to identify headings
859
+ * Extracts character format (color, bold, etc.) from style definitions
860
+ */
861
+ function parseStylesXml(files) {
862
+ const styleMap = /* @__PURE__ */ new Map();
863
+ const stylesXml = files["word/styles.xml"];
864
+ if (!stylesXml) return styleMap;
865
+ const styles = findChild(fromXml(new TextDecoder().decode(stylesXml)), "w:styles");
866
+ if (!styles) return styleMap;
867
+ const paragraphStyles = findDeepChildren(styles, "w:style").filter((style) => style.attributes["w:type"] === "paragraph");
868
+ for (const style of paragraphStyles) {
869
+ const styleId = style.attributes["w:styleId"];
870
+ if (!styleId) continue;
871
+ const styleInfo = { styleId };
872
+ const name = findChild(style, "w:name");
873
+ if (name?.attributes["w:val"]) styleInfo.name = name.attributes["w:val"];
874
+ const pPr = findChild(style, "w:pPr");
875
+ if (pPr) {
876
+ const outlineLvl = findChild(pPr, "w:outlineLvl");
877
+ if (outlineLvl?.attributes["w:val"] !== void 0) styleInfo.outlineLvl = parseInt(outlineLvl.attributes["w:val"], 10);
878
+ }
879
+ const rPr = findChild(style, "w:rPr");
880
+ if (rPr) {
881
+ const charFormat = {};
882
+ const color = findChild(rPr, "w:color");
883
+ if (color?.attributes["w:val"] && color.attributes["w:val"] !== "auto") {
884
+ const colorVal = color.attributes["w:val"];
885
+ charFormat.color = colorVal.startsWith("#") ? colorVal : `#${colorVal}`;
886
+ }
887
+ if (findChild(rPr, "w:b")) charFormat.bold = true;
888
+ if (findChild(rPr, "w:i")) charFormat.italic = true;
889
+ if (findChild(rPr, "w:u")) charFormat.underline = true;
890
+ if (findChild(rPr, "w:strike")) charFormat.strike = true;
891
+ const sz = findChild(rPr, "w:sz");
892
+ if (sz?.attributes["w:val"]) {
893
+ const sizeVal = sz.attributes["w:val"];
894
+ const size = parseInt(sizeVal, 10);
895
+ if (!isNaN(size)) charFormat.fontSize = size;
896
+ }
897
+ const rFonts = findChild(rPr, "w:rFonts");
898
+ if (rFonts?.attributes["w:ascii"]) charFormat.fontFamily = rFonts.attributes["w:ascii"];
899
+ if (Object.keys(charFormat).length > 0) styleInfo.charFormat = charFormat;
900
+ }
901
+ styleMap.set(styleId, styleInfo);
902
+ }
903
+ return styleMap;
904
+ }
905
+ //#endregion
906
+ //#region src/converters/text.ts
907
+ /**
908
+ * Extract text node from run
909
+ */
910
+ function extractTextFromRun(run, styleInfo) {
911
+ const textElement = findChild(run, "w:t");
912
+ if (!textElement) return null;
913
+ const text = textElement.children.find((c) => c.type === "text");
914
+ if (!text?.value) return null;
915
+ const marks = extractMarks(run, styleInfo);
916
+ return {
917
+ type: "text",
918
+ text: text.value,
919
+ ...marks.length && { marks }
920
+ };
921
+ }
922
+ /**
923
+ * Extract all text runs from paragraph
924
+ */
925
+ async function extractRuns(paragraph, params) {
926
+ const { context, styleInfo } = params;
927
+ const runs = [];
928
+ for (const child of paragraph.children) {
929
+ if (child.type !== "element") continue;
930
+ if (child.name === "w:hyperlink") {
931
+ const hyperlink = child;
932
+ const rId = hyperlink.attributes["r:id"];
933
+ const href = context.hyperlinks.get(rId);
934
+ if (!href) continue;
935
+ for (const hlChild of hyperlink.children) {
936
+ if (hlChild.type !== "element" || hlChild.name !== "w:r") continue;
937
+ const run = hlChild;
938
+ const drawing = findDrawingElement(run);
939
+ if (drawing) {
940
+ const image = await extractImageFromDrawing(drawing, { context });
941
+ if (image) {
942
+ runs.push(image);
943
+ continue;
944
+ }
945
+ const imageList = await extractImagesFromDrawing(drawing, { context });
946
+ if (imageList.length) {
947
+ runs.push(...imageList);
948
+ continue;
949
+ }
950
+ }
951
+ const textNode = extractTextFromRun(run, styleInfo);
952
+ if (textNode) {
953
+ textNode.marks = textNode.marks || [];
954
+ textNode.marks.push({
955
+ type: "link",
956
+ attrs: { href }
957
+ });
958
+ runs.push(textNode);
959
+ }
960
+ }
961
+ } else if (child.name === "w:r") {
962
+ const run = child;
963
+ const drawing = findDrawingElement(run);
964
+ if (drawing) {
965
+ const imageList = await extractImagesFromDrawing(drawing, { context });
966
+ if (imageList.length) {
967
+ runs.push(...imageList);
968
+ continue;
969
+ }
970
+ }
971
+ if (findChild(run, "w:br")) {
972
+ const marks = extractMarks(run, styleInfo);
973
+ runs.push({
974
+ type: "hardBreak",
975
+ ...marks.length && { marks }
976
+ });
977
+ }
978
+ const textNode = extractTextFromRun(run, styleInfo);
979
+ if (textNode) runs.push(textNode);
980
+ }
981
+ }
982
+ return runs;
983
+ }
984
+ /**
985
+ * Extract formatting marks
986
+ * Merges style character format with run-level formatting (run takes precedence)
987
+ */
988
+ function extractMarks(run, styleInfo) {
989
+ const marks = [];
990
+ const rPr = findChild(run, "w:rPr");
991
+ let mergedFormat = {};
992
+ if (styleInfo?.charFormat) mergedFormat = { ...styleInfo.charFormat };
993
+ if (rPr) {
994
+ const boldEl = findChild(rPr, "w:b");
995
+ if (boldEl) if (boldEl.attributes["w:val"] === "false") mergedFormat.bold = false;
996
+ else mergedFormat.bold = true;
997
+ const italicEl = findChild(rPr, "w:i");
998
+ if (italicEl) if (italicEl.attributes["w:val"] === "false") mergedFormat.italic = false;
999
+ else mergedFormat.italic = true;
1000
+ if (findChild(rPr, "w:u")) mergedFormat.underline = true;
1001
+ if (findChild(rPr, "w:strike")) mergedFormat.strike = true;
1002
+ const colorEl = findChild(rPr, "w:color");
1003
+ if (colorEl?.attributes["w:val"] && colorEl.attributes["w:val"] !== "auto") {
1004
+ const colorVal = colorEl.attributes["w:val"];
1005
+ mergedFormat.color = colorVal.startsWith("#") ? colorVal : `#${colorVal}`;
1006
+ }
1007
+ const szEl = findChild(rPr, "w:sz");
1008
+ if (szEl?.attributes["w:val"]) {
1009
+ const sizeVal = szEl.attributes["w:val"];
1010
+ const size = parseInt(sizeVal, 10);
1011
+ if (!isNaN(size)) mergedFormat.fontSize = size;
1012
+ }
1013
+ const rFontsEl = findChild(rPr, "w:rFonts");
1014
+ if (rFontsEl?.attributes["w:ascii"]) mergedFormat.fontFamily = rFontsEl.attributes["w:ascii"];
1015
+ const shdEl = findChild(rPr, "w:shd");
1016
+ if (shdEl?.attributes["w:fill"] && shdEl.attributes["w:fill"] !== "auto") {
1017
+ const fillColor = shdEl.attributes["w:fill"];
1018
+ mergedFormat.backgroundColor = fillColor.startsWith("#") ? fillColor : `#${fillColor}`;
1019
+ }
1020
+ if (findChild(rPr, "w:highlight")) marks.push({ type: "highlight" });
1021
+ const vertAlign = findChild(rPr, "w:vertAlign");
1022
+ if (vertAlign) {
1023
+ const val = vertAlign.attributes["w:val"];
1024
+ if (val === "subscript") marks.push({ type: "subscript" });
1025
+ else if (val === "superscript") marks.push({ type: "superscript" });
1026
+ }
1027
+ }
1028
+ if (mergedFormat.bold) marks.push({ type: "bold" });
1029
+ if (mergedFormat.italic) marks.push({ type: "italic" });
1030
+ if (mergedFormat.underline) marks.push({ type: "underline" });
1031
+ if (mergedFormat.strike) marks.push({ type: "strike" });
1032
+ if (mergedFormat.color || mergedFormat.backgroundColor || mergedFormat.fontSize || mergedFormat.fontFamily) {
1033
+ const textStyleAttrs = {
1034
+ color: mergedFormat.color || "",
1035
+ backgroundColor: mergedFormat.backgroundColor || "",
1036
+ fontSize: "",
1037
+ fontFamily: "",
1038
+ lineHeight: ""
1039
+ };
1040
+ if (mergedFormat.fontSize) textStyleAttrs.fontSize = `${Math.round(mergedFormat.fontSize * PIXELS_PER_HALF_POINT * 10) / 10}px`;
1041
+ if (mergedFormat.fontFamily) textStyleAttrs.fontFamily = mergedFormat.fontFamily;
1042
+ marks.push({
1043
+ type: "textStyle",
1044
+ attrs: textStyleAttrs
1045
+ });
1046
+ }
1047
+ return marks;
1048
+ }
1049
+ /**
1050
+ * Extract text alignment
1051
+ */
1052
+ function extractAlignment(paragraph) {
1053
+ const pPr = findChild(paragraph, "w:pPr");
1054
+ if (!pPr) return void 0;
1055
+ const jc = findChild(pPr, "w:jc");
1056
+ if (!jc?.attributes["w:val"]) return void 0;
1057
+ const alignment = jc.attributes["w:val"];
1058
+ const textAlign = TEXT_ALIGN_MAP.docxToTipTap[alignment];
1059
+ return textAlign ? { textAlign } : void 0;
1060
+ }
1061
+ //#endregion
1062
+ //#region src/converters/paragraph.ts
1063
+ /**
1064
+ * Extract paragraph style attributes from DOCX paragraph properties
1065
+ */
1066
+ function extractParagraphStyles(node) {
1067
+ const pPr = findChild(node, "w:pPr");
1068
+ if (!pPr) return null;
1069
+ const result = {};
1070
+ const ind = findChild(pPr, "w:ind");
1071
+ if (ind) {
1072
+ const left = parseTwipAttr(ind.attributes, "w:left");
1073
+ if (left) result.indentLeft = convertTwipToCssString(parseInt(left, 10));
1074
+ const right = parseTwipAttr(ind.attributes, "w:right");
1075
+ if (right) result.indentRight = convertTwipToCssString(parseInt(right, 10));
1076
+ const firstLine = parseTwipAttr(ind.attributes, "w:firstLine");
1077
+ if (firstLine) result.indentFirstLine = convertTwipToCssString(parseInt(firstLine, 10));
1078
+ else {
1079
+ const hanging = parseTwipAttr(ind.attributes, "w:hanging");
1080
+ if (hanging) result.indentFirstLine = convertTwipToCssString((left ? parseInt(left, 10) : 0) - parseInt(hanging, 10));
1081
+ }
1082
+ }
1083
+ const spacing = findChild(pPr, "w:spacing");
1084
+ if (spacing) {
1085
+ const before = parseTwipAttr(spacing.attributes, "w:before");
1086
+ if (before) result.spacingBefore = convertTwipToCssString(parseInt(before, 10));
1087
+ const after = parseTwipAttr(spacing.attributes, "w:after");
1088
+ if (after) result.spacingAfter = convertTwipToCssString(parseInt(after, 10));
1089
+ }
1090
+ return Object.keys(result).length ? result : null;
1091
+ }
1092
+ /**
1093
+ * Convert DOCX paragraph node to TipTap paragraph
1094
+ */
1095
+ async function convertParagraph(node, params) {
1096
+ const { context, styleInfo: paramStyleInfo } = params;
1097
+ const pPr = findChild(node, "w:pPr");
1098
+ const styleName = (pPr && findChild(pPr, "w:pStyle"))?.attributes["w:val"];
1099
+ if (styleName && context.styleMap) {
1100
+ const styleInfo = context.styleMap.get(styleName);
1101
+ if (styleInfo?.outlineLvl !== void 0 && styleInfo.outlineLvl >= 0 && styleInfo.outlineLvl <= 5) return convertHeading(node, params, styleInfo, styleInfo.outlineLvl + 1);
1102
+ const headingMatch = styleName.match(/^Heading(\d+)$/);
1103
+ if (headingMatch) return convertHeading(node, params, styleInfo, parseInt(headingMatch[1], 10));
1104
+ }
1105
+ const styleInfo = styleName && context.styleMap ? context.styleMap.get(styleName) : void 0;
1106
+ const runs = await extractRuns(node, {
1107
+ context,
1108
+ styleInfo: paramStyleInfo || styleInfo
1109
+ });
1110
+ const attrs = {
1111
+ ...extractAlignment(node),
1112
+ ...extractParagraphStyles(node)
1113
+ };
1114
+ if (checkForPageBreak(node)) {
1115
+ const filteredRuns = runs.filter((run) => run.type !== "hardBreak");
1116
+ return [{
1117
+ type: "paragraph",
1118
+ ...Object.keys(attrs).length && { attrs },
1119
+ content: filteredRuns.length ? filteredRuns : void 0
1120
+ }, { type: "horizontalRule" }];
1121
+ }
1122
+ if (runs.length === 1 && runs[0].type === "hardBreak") {
1123
+ const run = findChild(node, "w:r");
1124
+ if ((run && findChild(run, "w:br"))?.attributes["w:type"] === "page") return { type: "horizontalRule" };
1125
+ }
1126
+ if (runs.length === 1 && runs[0].type === "image") {
1127
+ const imageNode = runs[0];
1128
+ return {
1129
+ type: "paragraph",
1130
+ ...Object.keys(attrs).length && { attrs },
1131
+ content: [imageNode]
1132
+ };
1133
+ }
1134
+ return {
1135
+ type: "paragraph",
1136
+ ...Object.keys(attrs).length && { attrs },
1137
+ content: runs
1138
+ };
1139
+ }
1140
+ /**
1141
+ * Check if paragraph contains page break
1142
+ */
1143
+ function checkForPageBreak(node) {
1144
+ const runElements = [];
1145
+ const collectRuns = (n) => {
1146
+ if (n.name === "w:r") runElements.push(n);
1147
+ else for (const child of n.children) if (child.type === "element") collectRuns(child);
1148
+ };
1149
+ collectRuns(node);
1150
+ return runElements.some((run) => {
1151
+ return findChild(run, "w:br")?.attributes["w:type"] === "page";
1152
+ });
1153
+ }
1154
+ /**
1155
+ * Convert to heading (internal function)
1156
+ */
1157
+ async function convertHeading(node, params, styleInfo, level) {
1158
+ return {
1159
+ type: "heading",
1160
+ attrs: {
1161
+ level,
1162
+ ...extractParagraphStyles(node)
1163
+ },
1164
+ content: await extractRuns(node, {
1165
+ context: params.context,
1166
+ styleInfo
1167
+ })
1168
+ };
1169
+ }
1170
+ //#endregion
1171
+ //#region src/parsers/table.ts
1172
+ /**
1173
+ * Parse a single border element
1174
+ */
1175
+ function parseBorder(borderNode) {
1176
+ if (!borderNode) return null;
1177
+ const val = borderNode.attributes["w:val"];
1178
+ const size = borderNode.attributes["w:sz"];
1179
+ const color = borderNode.attributes["w:color"];
1180
+ const styleMap = {
1181
+ single: "solid",
1182
+ dashed: "dashed",
1183
+ dotted: "dotted",
1184
+ double: "double",
1185
+ none: "none",
1186
+ nil: "none"
1187
+ };
1188
+ const border = {};
1189
+ if (color && color !== "auto") border.color = `#${color}`;
1190
+ if (size) {
1191
+ const eighthPoints = parseInt(size);
1192
+ if (!isNaN(eighthPoints)) border.width = Math.round(eighthPoints / 6);
1193
+ }
1194
+ if (val && styleMap[val]) border.style = styleMap[val];
1195
+ return Object.keys(border).length > 0 ? border : null;
1196
+ }
1197
+ /**
1198
+ * Get table properties (cell margins)
1199
+ */
1200
+ function parseTableProperties(tableNode) {
1201
+ const props = {
1202
+ marginTop: void 0,
1203
+ marginBottom: void 0,
1204
+ marginLeft: void 0,
1205
+ marginRight: void 0
1206
+ };
1207
+ const tblPr = findChild(tableNode, "w:tblPr");
1208
+ if (!tblPr) return null;
1209
+ const tblCellMar = findChild(tblPr, "w:tblCellMar");
1210
+ if (!tblCellMar) return null;
1211
+ const top = findChild(tblCellMar, "w:top");
1212
+ if (top?.attributes["w:w"]) {
1213
+ const twentieths = parseInt(top.attributes["w:w"]);
1214
+ if (!isNaN(twentieths)) props.marginTop = twentieths;
1215
+ }
1216
+ const bottom = findChild(tblCellMar, "w:bottom");
1217
+ if (bottom?.attributes["w:w"]) {
1218
+ const twentieths = parseInt(bottom.attributes["w:w"]);
1219
+ if (!isNaN(twentieths)) props.marginBottom = twentieths;
1220
+ }
1221
+ const left = findChild(tblCellMar, "w:left");
1222
+ if (left?.attributes["w:w"]) {
1223
+ const twentieths = parseInt(left.attributes["w:w"]);
1224
+ if (!isNaN(twentieths)) props.marginLeft = twentieths;
1225
+ }
1226
+ const right = findChild(tblCellMar, "w:right");
1227
+ if (right?.attributes["w:w"]) {
1228
+ const twentieths = parseInt(right.attributes["w:w"]);
1229
+ if (!isNaN(twentieths)) props.marginRight = twentieths;
1230
+ }
1231
+ if (props.marginTop === void 0 && props.marginBottom === void 0 && props.marginLeft === void 0 && props.marginRight === void 0) return null;
1232
+ return props;
1233
+ }
1234
+ /**
1235
+ * Get row properties (rowHeight)
1236
+ */
1237
+ function parseRowProperties(rowNode) {
1238
+ const props = { rowHeight: null };
1239
+ const trPr = findChild(rowNode, "w:trPr");
1240
+ if (!trPr) return props;
1241
+ const trHeight = findChild(trPr, "w:trHeight");
1242
+ if (trHeight?.attributes["w:val"]) props.rowHeight = `${convertTwipToPixels(parseInt(trHeight.attributes["w:val"]))}px`;
1243
+ return props;
1244
+ }
1245
+ /**
1246
+ * Get cell properties (colspan, rowspan, colwidth, backgroundColor, verticalAlign, borders)
1247
+ */
1248
+ function parseCellProperties(cellNode) {
1249
+ const props = {
1250
+ colspan: 1,
1251
+ rowspan: 1,
1252
+ colwidth: null
1253
+ };
1254
+ const tcPr = findChild(cellNode, "w:tcPr");
1255
+ if (!tcPr) return props;
1256
+ const gridSpan = findChild(tcPr, "w:gridSpan");
1257
+ if (gridSpan?.attributes["w:val"]) props.colspan = parseInt(gridSpan.attributes["w:val"]);
1258
+ if (findChild(tcPr, "w:vMerge")?.attributes["w:val"] === "continue") props.rowspan = 0;
1259
+ const tcW = findChild(tcPr, "w:tcW");
1260
+ if (tcW?.attributes["w:w"]) props.colwidth = [convertTwipToPixels(parseInt(tcW.attributes["w:w"]))];
1261
+ const shd = findChild(tcPr, "w:shd");
1262
+ if (shd?.attributes["w:fill"]) props.backgroundColor = `#${shd.attributes["w:fill"]}`;
1263
+ const vAlign = findChild(tcPr, "w:vAlign");
1264
+ if (vAlign?.attributes["w:val"]) props.verticalAlign = vAlign.attributes["w:val"];
1265
+ const tcBorders = findChild(tcPr, "w:tcBorders");
1266
+ if (tcBorders) {
1267
+ const topBorder = parseBorder(findChild(tcBorders, "w:top"));
1268
+ if (topBorder) props.borderTop = topBorder;
1269
+ const bottomBorder = parseBorder(findChild(tcBorders, "w:bottom"));
1270
+ if (bottomBorder) props.borderBottom = bottomBorder;
1271
+ const leftBorder = parseBorder(findChild(tcBorders, "w:left"));
1272
+ if (leftBorder) props.borderLeft = leftBorder;
1273
+ const rightBorder = parseBorder(findChild(tcBorders, "w:right"));
1274
+ if (rightBorder) props.borderRight = rightBorder;
1275
+ }
1276
+ return props;
1277
+ }
1278
+ //#endregion
1279
+ //#region src/converters/table.ts
1280
+ /**
1281
+ * Check if an element is a table
1282
+ */
1283
+ function isTable(node) {
1284
+ return node.name === "w:tbl";
1285
+ }
1286
+ /**
1287
+ * Convert a table element to TipTap JSON
1288
+ */
1289
+ async function convertTable(node, params) {
1290
+ const rows = [];
1291
+ for (const child of node.children) if (child.type === "element" && child.name === "w:tr") rows.push(child);
1292
+ const activeRowspans = /* @__PURE__ */ new Map();
1293
+ const content = await Promise.all(rows.map((row, rowIndex) => convertTableRow(row, {
1294
+ context: params.context,
1295
+ activeRowspans,
1296
+ rows,
1297
+ rowIndex
1298
+ })));
1299
+ const tableProps = parseTableProperties(node);
1300
+ return {
1301
+ type: "table",
1302
+ ...tableProps && { attrs: tableProps },
1303
+ content
1304
+ };
1305
+ }
1306
+ /**
1307
+ * Convert a table row to TipTap JSON
1308
+ */
1309
+ async function convertTableRow(rowNode, params) {
1310
+ const cells = [];
1311
+ let colIndex = 0;
1312
+ const rowProps = parseRowProperties(rowNode);
1313
+ for (const child of rowNode.children) {
1314
+ if (child.type !== "element" || child.name !== "w:tc") continue;
1315
+ const mergedBy = params.activeRowspans.get(colIndex);
1316
+ if (mergedBy && mergedBy > 0) {
1317
+ params.activeRowspans.set(colIndex, mergedBy - 1);
1318
+ colIndex++;
1319
+ continue;
1320
+ }
1321
+ let cellProps = parseCellProperties(child);
1322
+ if (cellProps?.rowspan === 1) {
1323
+ const actualRowSpan = calculateRowspan({
1324
+ rows: params.rows,
1325
+ rowIndex: params.rowIndex,
1326
+ colIndex
1327
+ });
1328
+ if (actualRowSpan > 1) cellProps = {
1329
+ ...cellProps,
1330
+ rowspan: actualRowSpan
1331
+ };
1332
+ }
1333
+ if (cellProps?.rowspan && cellProps.rowspan > 1) params.activeRowspans.set(colIndex, cellProps.rowspan - 1);
1334
+ if (cellProps?.rowspan === 0) {
1335
+ colIndex++;
1336
+ continue;
1337
+ }
1338
+ const paragraphs = await convertCellContent(child, params);
1339
+ cells.push({
1340
+ type: "tableCell",
1341
+ ...cellProps && { attrs: cellProps },
1342
+ content: paragraphs
1343
+ });
1344
+ colIndex += cellProps?.colspan || 1;
1345
+ }
1346
+ return {
1347
+ type: "tableRow",
1348
+ ...rowProps && { attrs: rowProps },
1349
+ content: cells
1350
+ };
1351
+ }
1352
+ /**
1353
+ * Calculate the actual rowspan of a cell
1354
+ */
1355
+ function calculateRowspan(params) {
1356
+ let rowspan = 1;
1357
+ let colIndex = params.colIndex;
1358
+ for (let rowIndex = params.rowIndex + 1; rowIndex < params.rows.length; rowIndex++) {
1359
+ const row = params.rows[rowIndex];
1360
+ let cellFound = false;
1361
+ for (const child of row.children) {
1362
+ if (child.type !== "element" || child.name !== "w:tc") continue;
1363
+ const cellProps = parseCellProperties(child);
1364
+ const colSpan = cellProps?.colspan || 1;
1365
+ if (colIndex >= 0 && colIndex < colSpan) {
1366
+ if (cellProps?.rowspan === 0) {
1367
+ rowspan++;
1368
+ cellFound = true;
1369
+ } else return rowspan;
1370
+ break;
1371
+ }
1372
+ colIndex -= colSpan;
1373
+ }
1374
+ if (!cellFound) break;
1375
+ }
1376
+ return rowspan;
1377
+ }
1378
+ /**
1379
+ * Convert cell content (typically paragraphs)
1380
+ */
1381
+ async function convertCellContent(cellNode, params) {
1382
+ const paragraphs = [];
1383
+ for (const child of cellNode.children) if (child.type === "element" && child.name === "w:p") {
1384
+ const paragraph = await convertParagraph(child, params);
1385
+ if (Array.isArray(paragraph)) paragraphs.push(...paragraph);
1386
+ else paragraphs.push(paragraph);
1387
+ }
1388
+ return paragraphs.length ? paragraphs : [{
1389
+ type: "paragraph",
1390
+ content: []
1391
+ }];
1392
+ }
1393
+ //#endregion
1394
+ //#region src/converters/task-list.ts
1395
+ const CHECKBOX_UNCHECKED = "☐";
1396
+ const CHECKBOX_CHECKED = "☑";
1397
+ /**
1398
+ * Get first text node from element
1399
+ */
1400
+ function getFirstTextNode(node) {
1401
+ const run = findChild(node, "w:r");
1402
+ if (!run) return null;
1403
+ const textElement = findChild(run, "w:t");
1404
+ if (!textElement) return null;
1405
+ const textNode = textElement.children.find((c) => c.type === "text");
1406
+ return textNode?.value && textNode || null;
1407
+ }
1408
+ /**
1409
+ * Check if a paragraph is a task item
1410
+ */
1411
+ function isTaskItem(node) {
1412
+ const textNode = getFirstTextNode(node);
1413
+ if (!textNode) return false;
1414
+ const text = textNode.value;
1415
+ return text.startsWith(CHECKBOX_UNCHECKED) || text.startsWith(CHECKBOX_CHECKED);
1416
+ }
1417
+ /**
1418
+ * Get the checked state from a task item
1419
+ */
1420
+ function getTaskItemChecked(node) {
1421
+ return getFirstTextNode(node)?.value.startsWith(CHECKBOX_CHECKED) || false;
1422
+ }
1423
+ /**
1424
+ * Convert a task item to TipTap JSON
1425
+ */
1426
+ async function convertTaskItem(node, params) {
1427
+ return {
1428
+ type: "taskItem",
1429
+ attrs: { checked: getTaskItemChecked(node) },
1430
+ content: [await convertTaskItemParagraph(node, params)]
1431
+ };
1432
+ }
1433
+ /**
1434
+ * Convert task list (handles consecutive task items)
1435
+ */
1436
+ async function convertTaskList(_node, params) {
1437
+ const { siblings, index, processedIndices } = params;
1438
+ const items = [];
1439
+ let i = index;
1440
+ while (i < siblings.length) {
1441
+ const el = siblings[i];
1442
+ if (el.name !== "w:p" || !isTaskItem(el)) break;
1443
+ processedIndices.add(i);
1444
+ const taskItem = await convertTaskItem(el, {
1445
+ context: params.context,
1446
+ styleInfo: params.styleInfo
1447
+ });
1448
+ items.push(taskItem);
1449
+ i++;
1450
+ }
1451
+ return {
1452
+ type: "taskList",
1453
+ content: items
1454
+ };
1455
+ }
1456
+ /**
1457
+ * Convert a task item paragraph, removing the checkbox symbol
1458
+ */
1459
+ async function convertTaskItemParagraph(node, params) {
1460
+ const { context, styleInfo } = params;
1461
+ const runs = await extractRuns(node, {
1462
+ context,
1463
+ styleInfo
1464
+ });
1465
+ if (runs.length > 0 && runs[0].type === "text") {
1466
+ const firstRun = runs[0];
1467
+ const text = firstRun.text;
1468
+ if (text.startsWith(CHECKBOX_UNCHECKED) || text.startsWith(CHECKBOX_CHECKED)) {
1469
+ const remainingText = text.substring(2).trimStart();
1470
+ if (remainingText) firstRun.text = remainingText;
1471
+ else runs.shift();
1472
+ }
1473
+ }
1474
+ const attrs = extractAlignment(node);
1475
+ return {
1476
+ type: "paragraph",
1477
+ ...attrs && { attrs },
1478
+ content: runs.length ? runs : void 0
1479
+ };
1480
+ }
1481
+ //#endregion
1482
+ //#region src/converters/code-block.ts
1483
+ /**
1484
+ * Check if a paragraph is a code block
1485
+ */
1486
+ function isCodeBlock(node) {
1487
+ const pPr = findChild(node, "w:pPr");
1488
+ const style = (pPr && findChild(pPr, "w:pStyle"))?.attributes["w:val"];
1489
+ return style === DOCX_STYLE_NAMES.CODE_BLOCK || style?.startsWith(DOCX_STYLE_NAMES.CODE_PREFIX) || false;
1490
+ }
1491
+ /**
1492
+ * Get code block language
1493
+ */
1494
+ function getCodeBlockLanguage(node) {
1495
+ const pPr = findChild(node, "w:pPr");
1496
+ const style = (pPr && findChild(pPr, "w:pStyle"))?.attributes["w:val"];
1497
+ if (!style?.startsWith(DOCX_STYLE_NAMES.CODE_BLOCK)) return void 0;
1498
+ return style.replace(DOCX_STYLE_NAMES.CODE_BLOCK, "").toLowerCase() || void 0;
1499
+ }
1500
+ //#endregion
1501
+ //#region src/converters/list.ts
1502
+ /**
1503
+ * Check if a paragraph is a list item
1504
+ */
1505
+ function isListItem(node) {
1506
+ const pPr = findChild(node, "w:pPr");
1507
+ return !!pPr && findChild(pPr, "w:numPr") !== void 0;
1508
+ }
1509
+ /**
1510
+ * Get list numbering info
1511
+ */
1512
+ function getListInfo(node) {
1513
+ const pPr = findChild(node, "w:pPr");
1514
+ const numPr = pPr && findChild(pPr, "w:numPr");
1515
+ if (!numPr) return null;
1516
+ const ilvl = findChild(numPr, "w:ilvl");
1517
+ const numId = findChild(numPr, "w:numId");
1518
+ if (!ilvl || !numId) return null;
1519
+ return {
1520
+ numId: numId.attributes["w:val"],
1521
+ level: parseInt(ilvl.attributes["w:val"] || "0", 10)
1522
+ };
1523
+ }
1524
+ //#endregion
1525
+ //#region src/converters/horizontal-rule.ts
1526
+ /**
1527
+ * Check if a paragraph is a horizontal rule (page break)
1528
+ */
1529
+ function isHorizontalRule(node) {
1530
+ const run = findChild(node, "w:r");
1531
+ if (!run) return false;
1532
+ let hasPageBreak = false;
1533
+ let hasOtherContent = false;
1534
+ for (const runChild of run.children) {
1535
+ if (runChild.type !== "element") continue;
1536
+ if (runChild.name === "w:br" && runChild.attributes["w:type"] === "page") hasPageBreak = true;
1537
+ else if (runChild.name === "w:t") {
1538
+ if (runChild.children.find((c) => c.type === "text")?.value?.trim().length) hasOtherContent = true;
1539
+ } else if (runChild.name !== "w:rPr") hasOtherContent = true;
1540
+ }
1541
+ return hasPageBreak && !hasOtherContent;
1542
+ }
1543
+ //#endregion
1544
+ //#region src/parser.ts
1545
+ /**
1546
+ * Main entry point: Parse DOCX file and convert to TipTap JSON
1547
+ */
1548
+ async function parseDOCX(input, options = {}) {
1549
+ const files = unzipSync(await toUint8Array(input));
1550
+ const hyperlinks = extractHyperlinks(files);
1551
+ const images = await extractImages(files, options.image?.handler);
1552
+ const documentXml = files["word/document.xml"];
1553
+ if (!documentXml) throw new Error("Invalid DOCX file: missing word/document.xml");
1554
+ const documentXast = fromXml(new TextDecoder().decode(documentXml));
1555
+ const listTypeMap = parseNumberingXml(files);
1556
+ const styleMap = parseStylesXml(files);
1557
+ return await convertDocument(documentXast, { context: {
1558
+ ...options,
1559
+ hyperlinks,
1560
+ images,
1561
+ listTypeMap,
1562
+ styleMap
1563
+ } });
1564
+ }
1565
+ /**
1566
+ * Convert document XAST to TipTap JSON
1567
+ */
1568
+ async function convertDocument(node, params) {
1569
+ if (node.type !== "root") return {
1570
+ type: "doc",
1571
+ content: []
1572
+ };
1573
+ const document = findChild(node, "w:document");
1574
+ if (!document) return {
1575
+ type: "doc",
1576
+ content: []
1577
+ };
1578
+ const body = findChild(document, "w:body");
1579
+ if (!body) return {
1580
+ type: "doc",
1581
+ content: []
1582
+ };
1583
+ return {
1584
+ type: "doc",
1585
+ content: await convertElements(body.children.filter((c) => c.type === "element"), params)
1586
+ };
1587
+ }
1588
+ /**
1589
+ * Convert XML elements to TipTap nodes (main conversion loop)
1590
+ */
1591
+ async function convertElements(elements, params) {
1592
+ const result = [];
1593
+ const processedIndices = /* @__PURE__ */ new Set();
1594
+ for (let i = 0; i < elements.length; i++) {
1595
+ if (processedIndices.has(i)) continue;
1596
+ const element = elements[i];
1597
+ if (params.context.ignoreEmptyParagraphs && element.name === "w:p" && isEmptyParagraph(element)) continue;
1598
+ const node = await convertElement(element, elements, i, params, processedIndices);
1599
+ if (Array.isArray(node)) result.push(...node);
1600
+ else if (node) result.push(node);
1601
+ }
1602
+ return result;
1603
+ }
1604
+ /**
1605
+ * Convert single XML element to TipTap node (routing function)
1606
+ */
1607
+ async function convertElement(element, siblings, index, params, processedIndices) {
1608
+ switch (element.name) {
1609
+ case "w:tbl": return await convertTable(element, params);
1610
+ case "w:p":
1611
+ if (isCodeBlock(element)) return await convertCodeBlock(element);
1612
+ if (isTaskItem(element)) return await convertTaskList(element, {
1613
+ ...params,
1614
+ siblings,
1615
+ index,
1616
+ processedIndices
1617
+ });
1618
+ if (isListItem(element)) return await convertList(element, siblings, index, params, processedIndices);
1619
+ if (isHorizontalRule(element)) return { type: "horizontalRule" };
1620
+ return await convertParagraph(element, params);
1621
+ default: return null;
1622
+ }
1623
+ }
1624
+ /**
1625
+ * Convert code block paragraph
1626
+ */
1627
+ async function convertCodeBlock(element) {
1628
+ const language = getCodeBlockLanguage(element);
1629
+ const content = extractTextFromParagraph(element);
1630
+ return {
1631
+ type: "codeBlock",
1632
+ ...language && { attrs: { language } },
1633
+ content
1634
+ };
1635
+ }
1636
+ /**
1637
+ * Convert list (handles consecutive list items)
1638
+ */
1639
+ async function convertList(startElement, siblings, startIndex, params, processedIndices) {
1640
+ const listInfo = getListInfo(startElement);
1641
+ if (!listInfo) return await convertParagraph(startElement, params);
1642
+ const listTypeInfo = params.context.listTypeMap.get(listInfo.numId);
1643
+ const listType = listTypeInfo?.type || "bullet";
1644
+ const items = [];
1645
+ let i = startIndex;
1646
+ while (i < siblings.length) {
1647
+ const el = siblings[i];
1648
+ if (el.name !== "w:p" || !isListItem(el)) break;
1649
+ const info = getListInfo(el);
1650
+ if (!info || info.numId !== listInfo.numId) break;
1651
+ processedIndices.add(i);
1652
+ const paragraph = await convertParagraph(el, params);
1653
+ const listItemContent = Array.isArray(paragraph) ? paragraph[0] : paragraph;
1654
+ items.push({
1655
+ type: "listItem",
1656
+ content: [listItemContent]
1657
+ });
1658
+ i++;
1659
+ }
1660
+ const listNode = {
1661
+ type: listType === "bullet" ? "bulletList" : "orderedList",
1662
+ content: items
1663
+ };
1664
+ if (listType === "ordered") listNode.attrs = {
1665
+ type: null,
1666
+ ...listTypeInfo?.start !== void 0 && { start: listTypeInfo.start }
1667
+ };
1668
+ return listNode;
1669
+ }
1670
+ /**
1671
+ * Extract text content from paragraph (for code blocks)
1672
+ */
1673
+ function extractTextFromParagraph(element) {
1674
+ const content = [];
1675
+ const runs = findDeepChildren(element, "w:r");
1676
+ for (const run of runs) {
1677
+ const textElement = findChild(run, "w:t");
1678
+ if (!textElement) continue;
1679
+ const textNode = textElement.children.find((c) => c.type === "text");
1680
+ if (textNode && "value" in textNode && textNode.value) content.push({
1681
+ type: "text",
1682
+ text: textNode.value
1683
+ });
1684
+ }
1685
+ return content;
1686
+ }
1687
+ /**
1688
+ * Check if a paragraph is empty
1689
+ */
1690
+ function isEmptyParagraph(element) {
1691
+ const runs = findDeepChildren(element, "w:r");
1692
+ for (const run of runs) {
1693
+ const textElement = findChild(run, "w:t");
1694
+ if (textElement) {
1695
+ const textNode = textElement.children.find((c) => c.type === "text");
1696
+ if (textNode && "value" in textNode && textNode.value && textNode.value.trim().length > 0) return false;
1697
+ }
1698
+ if (findChild(run, "w:drawing") || findChild(run, "mc:AlternateContent") || findChild(run, "w:pict")) return false;
1699
+ const br = findChild(run, "w:br");
1700
+ if (br && br.attributes["w:type"] === "page") return false;
1701
+ }
1702
+ return true;
1703
+ }
1704
+ //#endregion
1705
+ export { convertParagraph, convertTable, convertTaskItem, convertTaskList, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };