@docen/import-docx 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +290 -290
- package/dist/index.d.mts +6133 -5764
- package/dist/index.mjs +1705 -1
- package/package.json +11 -10
- package/dist/index.cjs +0 -1
- package/dist/index.d.cts +0 -6757
- package/dist/index.d.ts +0 -6757
package/dist/index.mjs
CHANGED
|
@@ -1 +1,1705 @@
|
|
|
1
|
-
import{fromXml as F}from"xast-util-from-xml";import{unzipSync as wt}from"fflate";import{toUint8Array as dt}from"undio";import{imageMeta as ht}from"image-meta";const gt=.6666666666666666,N={CODE_BLOCK:"CodeBlock",CODE_PREFIX:"Code"},mt={docxToTipTap:{left:"left",right:"right",center:"center",both:"justify"}},q=96;function z(n){return Math.round(n*q/1440)}function M(n){return`${z(n)}px`}function bt(n){return Math.round(n/(914400/q))}function A(n){const t=parseInt(n,10);if(!isNaN(t))return bt(t)}function a(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"&&e.name===t)return e;return null}function b(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"){if(e.name===t)return e;const r=b(e,t);if(r)return r}return null}function x(n,t){const e=[];if(!n.children)return e;for(const r of n.children)r.type==="element"&&(r.name===t&&e.push(r),e.push(...x(r,t)));return e}function T(n,t){const e=n[t];if(!e)return;const r=parseInt(e,10);if(!isNaN(r))return e}function B(n){return t=>n.includes(t)}const O="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function G(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let i=0;for(let o=0;o<t;o+=3){const s=n[o],c=o+1<t?n[o+1]:0,l=o+2<t?n[o+2]:0,u=s>>2,p=(s&3)<<4|c>>4,w=(c&15)<<2|l>>6,f=l&63;r[i++]=O[u],r[i++]=O[p],r[i++]=o+1<t?O[w]:"=",r[i++]=o+2<t?O[f]:"="}return r.join("")}function yt(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const vt=globalThis.process?.release?.name==="node",V=typeof window<"u";async function It(n){const t=await n;return t.default||t}let P,J=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class xt extends J{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class kt extends J{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!P)throw new Error("@napi-rs/canvas module is not resolved");return P.createCanvas(t,e)}}async function Ct(n){P??=await It(n())}async function Mt(n){if(V)return xt;if(vt){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await Ct(n),kt}throw new Error("Unsupported environment for canvas operations")}async function Tt(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await Mt(e.canvasImport),i=await Lt(n,r),o=(t.left||0)/1e5*i.width,s=(t.top||0)/1e5*i.height,c=(t.right||0)/1e5*i.width,l=(t.bottom||0)/1e5*i.height,u=Math.round(i.width-o-c),p=Math.round(i.height-s-l);if(u<=0||p<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const w=new r().create(u,p);if(!w.context)throw new Error("Failed to get 2D context from canvas");w.context.drawImage(i,o,s,u,p,0,0,u,p);const f=w.canvas.toDataURL(),d=await(await fetch(f)).arrayBuffer();return new Uint8Array(d)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function Lt(n,t){if(V){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const i=new Image;return new Promise((o,s)=>{i.onload=()=>{URL.revokeObjectURL(r),o(i)},i.onerror=()=>{URL.revokeObjectURL(r),s(new Error("Failed to load image"))},i.src=r})}catch(i){throw URL.revokeObjectURL(r),i}}else{if(!P)throw new Error("@napi-rs/canvas module is not resolved");return await P.loadImage(Buffer.from(n))}}const Rt="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",Ft=B(["left","right","center","inside","outside"]),Pt=B(["top","bottom","center","inside","outside"]),$t=B(["page","character","column","margin","leftMargin","rightMargin","insideMargin","outsideMargin"]),Nt=B(["page","paragraph","margin","topMargin","bottomMargin","insideMargin","outsideMargin","line"]);function At(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,i=n.attributes.b;if(!(!t&&!e&&!r&&!i))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:i?parseInt(i,10):void 0}}function Bt(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const o=t.children[0].value;Ft(o)&&(r=o)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function Ot(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const o=t.children[0].value;Pt(o)&&(r=o)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function Q(n){let t=a(n,"w:drawing");if(t)return t;const e=a(n,"mc:AlternateContent"),r=e&&a(e,"mc:Choice");return r?a(r,"w:drawing"):null}function Y(n,t,e,r){const i=e/r,o=n/t;return Math.abs(i-o)>.1?i>o?{width:n,height:Math.round(n/i)}:{width:Math.round(t*i),height:t}:{width:n,height:t}}async function Dt(n,t){const e=new Map,r=n["word/_rels/document.xml.rels"];if(!r)return e;const i=F(new TextDecoder().decode(r)),o=a(i,"Relationships");if(!o)return e;const s=x(o,"Relationship");for(const c of s)if(c.attributes.Type===Rt&&c.attributes.Id&&c.attributes.Target){const l="word/"+c.attributes.Target,u=n[l];if(!u)continue;let p,w,f="png";try{const h=ht(u);p=h.width,w=h.height,h.type&&(f=h.type)}catch{}let d;if(t)d=(await t({id:c.attributes.Id,contentType:`image/${f}`,data:u})).src;else{const h=G(u);d=`data:image/${f};base64,${h}`}e.set(c.attributes.Id,{src:d,width:p,height:w})}return e}async function W(n,t){const{context:e}=t,r=b(n,"a:blip");if(!r?.attributes["r:embed"])return null;const i=r.attributes["r:embed"],o=e.images.get(i);if(!o)return null;let s=o.src;const c=b(n,"a:srcRect");if(c){const g=At(c);if(g&&s.startsWith("data:")){const[m,C]=s.split(",");if(C){const L=yt(C);try{const R=await Tt(L,g,{canvasImport:e.image?.canvasImport,enabled:e.image?.enableImageCrop??!1}),S=G(R);s=`${m},${S}`}catch(R){console.warn("Image cropping failed, using original image:",R)}}}}const l=b(n,"wp:extent");let u,p;if(l){const g=l.attributes.cx,m=l.attributes.cy;typeof g=="string"&&(u=A(g)),typeof m=="string"&&(p=A(m))}const w=b(n,"a:xfrm");let f;if(w?.attributes.rot){const g=parseInt(w.attributes.rot,10);isNaN(g)||(f=g/6e4)}const d=b(n,"wp:docPr")?.attributes.title,h=b(n,"wp:positionH"),y=b(n,"wp:positionV");let I;if(h||y){const g=h?Bt(h):void 0,m=y?Ot(y):void 0,C=h?.attributes.relativeFrom,L=y?.attributes.relativeFrom,R=typeof C=="string"&&$t(C)?C:"page",S=typeof L=="string"&&Nt(L)?L:"page";I={horizontalPosition:{relative:R,...g?.align&&{align:g.align},...g?.offset!==void 0&&{offset:g.offset}},verticalPosition:{relative:S,...m?.align&&{align:m.align},...m?.offset!==void 0&&{offset:m.offset}}}}const v=b(n,"pic:spPr");let k;if(v){const g=b(v,"a:ln"),m=g&&b(g,"a:solidFill"),C=m&&b(m,"a:srgbClr");C?.attributes.val&&(k={type:"solidFill",solidFillType:"rgb",value:C.attributes.val})}return{type:"image",attrs:{src:s,alt:"",...u!==void 0&&{width:u},...p!==void 0&&{height:p},...f!==void 0&&{rotation:f},...d&&{title:d},...I&&{floating:I},...k&&{outline:k}}}}function Et(n,t,e){if(t&&e&&n.width&&n.height){const r=Y(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function Z(n,t){const e=[],r=a(n,"wp:inline")||a(n,"wp:anchor");if(!r)return e;const i=a(r,"wp:extent");let o,s;if(i){const p=i.attributes.cx,w=i.attributes.cy;typeof p=="string"&&(o=A(p)),typeof w=="string"&&(s=A(w))}const c=a(r,"a:graphic");if(!c)return e;const l=a(c,"a:graphicData");if(!l)return e;const u=a(l,"wpg:wgp");if(u){const p=a(u,"wpg:grpSp"),w=p?[...x(p,"pic:pic"),...x(p,"pic")]:[...x(u,"pic:pic"),...x(u,"pic")];for(const f of w){const d=a(f,"a:graphic");if(!d){const v=a(f,"pic:blipFill")||b(f,"a:blipFill");if(!v)continue;const k=a(v,"a:blip")||b(v,"a:blip");if(!k?.attributes["r:embed"])continue;const g=k.attributes["r:embed"],m=t.context.images.get(g);if(!m)continue;e.push(Et(m,o,s));continue}const h={children:[d]},y=await W(h,t);if(!y)continue;const I=h.children[0]?.type==="element"?b(h.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(o&&s&&I){const v=t.context.images.get(I);if(v?.width&&v?.height){const k=Y(o,s,v.width,v.height);y.attrs.width=k.width,y.attrs.height=k.height}else y.attrs.width=o,y.attrs.height=s}e.push(y)}}else{const p=await W(n,t);p&&e.push(p)}return e}const St="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function zt(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=F(new TextDecoder().decode(e)),i=a(r,"Relationships");if(!i)return t;const o=x(i,"Relationship");for(const s of o)s.attributes.Type===St&&s.attributes.Id&&s.attributes.Target&&t.set(s.attributes.Id,s.attributes.Target);return t}function Wt(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const i=F(new TextDecoder().decode(r)),o=new Map,s=a(i,"w:numbering");if(!s)return t;const c=x(s,"w:abstractNum");for(const u of c){const p=u.attributes["w:abstractNumId"],w=a(u,"w:lvl");if(!w)continue;const f=a(w,"w:numFmt");f?.attributes["w:val"]&&o.set(p,f.attributes["w:val"]);const d=a(w,"w:start");d?.attributes["w:val"]&&e.set(p,parseInt(d.attributes["w:val"],10))}const l=x(s,"w:num");for(const u of l){const p=u.attributes["w:numId"],w=a(u,"w:abstractNumId");if(!w?.attributes["w:val"])continue;const f=w.attributes["w:val"],d=o.get(f);if(!d)continue;const h=e.get(f);d==="bullet"?t.set(p,{type:"bullet"}):t.set(p,{type:"ordered",...h!==void 0&&{start:h}})}return t}function _t(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=F(new TextDecoder().decode(e)),i=a(r,"w:styles");if(!i)return t;const o=x(i,"w:style").filter(s=>s.attributes["w:type"]==="paragraph");for(const s of o){const c=s.attributes["w:styleId"];if(!c)continue;const l={styleId:c},u=a(s,"w:name");u?.attributes["w:val"]&&(l.name=u.attributes["w:val"]);const p=a(s,"w:pPr");if(p){const f=a(p,"w:outlineLvl");f?.attributes["w:val"]!==void 0&&(l.outlineLvl=parseInt(f.attributes["w:val"],10))}const w=a(s,"w:rPr");if(w){const f={},d=a(w,"w:color");if(d?.attributes["w:val"]&&d.attributes["w:val"]!=="auto"){const I=d.attributes["w:val"];f.color=I.startsWith("#")?I:`#${I}`}a(w,"w:b")&&(f.bold=!0),a(w,"w:i")&&(f.italic=!0),a(w,"w:u")&&(f.underline=!0),a(w,"w:strike")&&(f.strike=!0);const h=a(w,"w:sz");if(h?.attributes["w:val"]){const I=h.attributes["w:val"],v=parseInt(I,10);isNaN(v)||(f.fontSize=v)}const y=a(w,"w:rFonts");y?.attributes["w:ascii"]&&(f.fontFamily=y.attributes["w:ascii"]),Object.keys(f).length>0&&(l.charFormat=f)}t.set(c,l)}return t}function tt(n,t){const e=a(n,"w:t");if(!e)return null;const r=e.children.find(o=>o.type==="text");if(!r?.value)return null;const i=_(n,t);return{type:"text",text:r.value,...i.length&&{marks:i}}}async function D(n,t){const{context:e,styleInfo:r}=t,i=[];for(const o of n.children)if(o.type==="element"){if(o.name==="w:hyperlink"){const s=o,c=s.attributes["r:id"],l=e.hyperlinks.get(c);if(!l)continue;for(const u of s.children){if(u.type!=="element"||u.name!=="w:r")continue;const p=u,w=Q(p);if(w){const d=await W(w,{context:e});if(d){i.push(d);continue}const h=await Z(w,{context:e});if(h.length){i.push(...h);continue}}const f=tt(p,r);f&&(f.marks=f.marks||[],f.marks.push({type:"link",attrs:{href:l}}),i.push(f))}}else if(o.name==="w:r"){const s=o,c=Q(s);if(c){const u=await Z(c,{context:e});if(u.length){i.push(...u);continue}}if(a(s,"w:br")){const u=_(s,r);i.push({type:"hardBreak",...u.length&&{marks:u}})}const l=tt(s,r);l&&i.push(l)}}return i}function _(n,t){const e=[],r=a(n,"w:rPr");let i={};if(t?.charFormat&&(i={...t.charFormat}),r){const o=a(r,"w:b");o&&(o.attributes["w:val"]==="false"?i.bold=!1:i.bold=!0);const s=a(r,"w:i");s&&(s.attributes["w:val"]==="false"?i.italic=!1:i.italic=!0),a(r,"w:u")&&(i.underline=!0),a(r,"w:strike")&&(i.strike=!0);const c=a(r,"w:color");if(c?.attributes["w:val"]&&c.attributes["w:val"]!=="auto"){const f=c.attributes["w:val"];i.color=f.startsWith("#")?f:`#${f}`}const l=a(r,"w:sz");if(l?.attributes["w:val"]){const f=l.attributes["w:val"],d=parseInt(f,10);isNaN(d)||(i.fontSize=d)}const u=a(r,"w:rFonts");u?.attributes["w:ascii"]&&(i.fontFamily=u.attributes["w:ascii"]);const p=a(r,"w:shd");if(p?.attributes["w:fill"]&&p.attributes["w:fill"]!=="auto"){const f=p.attributes["w:fill"];i.backgroundColor=f.startsWith("#")?f:`#${f}`}a(r,"w:highlight")&&e.push({type:"highlight"});const w=a(r,"w:vertAlign");if(w){const f=w.attributes["w:val"];f==="subscript"?e.push({type:"subscript"}):f==="superscript"&&e.push({type:"superscript"})}}if(i.bold&&e.push({type:"bold"}),i.italic&&e.push({type:"italic"}),i.underline&&e.push({type:"underline"}),i.strike&&e.push({type:"strike"}),i.color||i.backgroundColor||i.fontSize||i.fontFamily){const o={color:i.color||"",backgroundColor:i.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(i.fontSize){const s=Math.round(i.fontSize*gt*10)/10;o.fontSize=`${s}px`}i.fontFamily&&(o.fontFamily=i.fontFamily),e.push({type:"textStyle",attrs:o})}return e}function j(n){const t=a(n,"w:pPr");if(!t)return;const e=a(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],i=mt.docxToTipTap[r];return i?{textAlign:i}:void 0}function et(n){const t=a(n,"w:pPr");if(!t)return null;const e={},r=a(t,"w:ind");if(r){const o=T(r.attributes,"w:left");if(o){const l=parseInt(o,10);e.indentLeft=M(l)}const s=T(r.attributes,"w:right");if(s){const l=parseInt(s,10);e.indentRight=M(l)}const c=T(r.attributes,"w:firstLine");if(c){const l=parseInt(c,10);e.indentFirstLine=M(l)}else{const l=T(r.attributes,"w:hanging");if(l){const u=o?parseInt(o,10):0,p=parseInt(l,10),w=u-p;e.indentFirstLine=M(w)}}}const i=a(t,"w:spacing");if(i){const o=T(i.attributes,"w:before");if(o){const c=parseInt(o,10);e.spacingBefore=M(c)}const s=T(i.attributes,"w:after");if(s){const c=parseInt(s,10);e.spacingAfter=M(c)}}return Object.keys(e).length?e:null}async function $(n,t){const{context:e,styleInfo:r}=t,i=a(n,"w:pPr"),o=(i&&a(i,"w:pStyle"))?.attributes["w:val"];if(o&&e.styleMap){const u=e.styleMap.get(o);if(u?.outlineLvl!==void 0&&u.outlineLvl>=0&&u.outlineLvl<=5){const w=u.outlineLvl+1;return nt(n,t,u,w)}const p=o.match(/^Heading(\d+)$/);if(p){const w=parseInt(p[1],10);return nt(n,t,u,w)}}const s=o&&e.styleMap?e.styleMap.get(o):void 0,c=await D(n,{context:e,styleInfo:r||s}),l={...j(n),...et(n)};if(jt(n)){const u=c.filter(p=>p.type!=="hardBreak");return[{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:u.length?u:void 0},{type:"horizontalRule"}]}if(c.length===1&&c[0].type==="hardBreak"){const u=a(n,"w:r");if((u&&a(u,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}if(c.length===1&&c[0].type==="image"){const u=c[0];return{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:[u]}}return{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:c}}function jt(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const i of r.children)i.type==="element"&&e(i)};return e(n),t.some(r=>a(r,"w:br")?.attributes["w:type"]==="page")}async function nt(n,t,e,r){return{type:"heading",attrs:{level:r,...et(n)},content:await D(n,{context:t.context,styleInfo:e})}}function E(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],i={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},o={};if(r&&r!=="auto"&&(o.color=`#${r}`),e){const s=parseInt(e);isNaN(s)||(o.width=Math.round(s/6))}return t&&i[t]&&(o.style=i[t]),Object.keys(o).length>0?o:null}function Ht(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=a(n,"w:tblPr");if(!e)return null;const r=a(e,"w:tblCellMar");if(!r)return null;const i=a(r,"w:top");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]);isNaN(l)||(t.marginTop=l)}const o=a(r,"w:bottom");if(o?.attributes["w:w"]){const l=parseInt(o.attributes["w:w"]);isNaN(l)||(t.marginBottom=l)}const s=a(r,"w:left");if(s?.attributes["w:w"]){const l=parseInt(s.attributes["w:w"]);isNaN(l)||(t.marginLeft=l)}const c=a(r,"w:right");if(c?.attributes["w:w"]){const l=parseInt(c.attributes["w:w"]);isNaN(l)||(t.marginRight=l)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function Ut(n){const t={rowHeight:null},e=a(n,"w:trPr");if(!e)return t;const r=a(e,"w:trHeight");if(r?.attributes["w:val"]){const i=parseInt(r.attributes["w:val"]),o=z(i);t.rowHeight=`${o}px`}return t}function rt(n){const t={colspan:1,rowspan:1,colwidth:null},e=a(n,"w:tcPr");if(!e)return t;const r=a(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colspan=parseInt(r.attributes["w:val"])),a(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowspan=0);const i=a(e,"w:tcW");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]),u=z(l);t.colwidth=[u]}const o=a(e,"w:shd");o?.attributes["w:fill"]&&(t.backgroundColor=`#${o.attributes["w:fill"]}`);const s=a(e,"w:vAlign");s?.attributes["w:val"]&&(t.verticalAlign=s.attributes["w:val"]);const c=a(e,"w:tcBorders");if(c){const l=E(a(c,"w:top"));l&&(t.borderTop=l);const u=E(a(c,"w:bottom"));u&&(t.borderBottom=u);const p=E(a(c,"w:left"));p&&(t.borderLeft=p);const w=E(a(c,"w:right"));w&&(t.borderRight=w)}return t}function Xt(n){return n.name==="w:tbl"}async function it(n,t){const e=[];for(const s of n.children)s.type==="element"&&s.name==="w:tr"&&e.push(s);const r=new Map,i=await Promise.all(e.map((s,c)=>Kt(s,{context:t.context,activeRowspans:r,rows:e,rowIndex:c}))),o=Ht(n);return{type:"table",...o&&{attrs:o},content:i}}async function Kt(n,t){const e=[];let r=0;const i=Ut(n);for(const o of n.children){if(o.type!=="element"||o.name!=="w:tc")continue;const s=t.activeRowspans.get(r);if(s&&s>0){t.activeRowspans.set(r,s-1),r++;continue}let c=rt(o);if(c?.rowspan===1){const u=qt({rows:t.rows,rowIndex:t.rowIndex,colIndex:r});u>1&&(c={...c,rowspan:u})}if(c?.rowspan&&c.rowspan>1&&t.activeRowspans.set(r,c.rowspan-1),c?.rowspan===0){r++;continue}const l=await Gt(o,t);e.push({type:"tableCell",...c&&{attrs:c},content:l}),r+=c?.colspan||1}return{type:"tableRow",...i&&{attrs:i},content:e}}function qt(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const i=n.rows[r];let o=!1;for(const s of i.children){if(s.type!=="element"||s.name!=="w:tc")continue;const c=rt(s),l=c?.colspan||1;if(e>=0&&e<l){if(c?.rowspan===0)t++,o=!0;else return t;break}e-=l}if(!o)break}return t}async function Gt(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const i=await $(r,t);Array.isArray(i)?e.push(...i):e.push(i)}return e.length?e:[{type:"paragraph",content:[]}]}const ot="\u2610",H="\u2611";function st(n){const t=a(n,"w:r");if(!t)return null;const e=a(t,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");return r?.value&&r||null}function U(n){const t=st(n);if(!t)return!1;const e=t.value;return e.startsWith(ot)||e.startsWith(H)}function at(n){return st(n)?.value.startsWith(H)||!1}async function ct(n,t){return{type:"taskItem",attrs:{checked:at(n)},content:[await Vt(n,t)]}}async function lt(n,t){const{siblings:e,index:r,processedIndices:i}=t,o=[];let s=r;for(;s<e.length;){const c=e[s];if(c.name!=="w:p"||!U(c))break;i.add(s);const l=await ct(c,{context:t.context,styleInfo:t.styleInfo});o.push(l),s++}return{type:"taskList",content:o}}async function Vt(n,t){const{context:e,styleInfo:r}=t,i=await D(n,{context:e,styleInfo:r});if(i.length>0&&i[0].type==="text"){const s=i[0],c=s.text;if(c.startsWith(ot)||c.startsWith(H)){const l=c.substring(2).trimStart();l?s.text=l:i.shift()}}const o=j(n);return{type:"paragraph",...o&&{attrs:o},content:i.length?i:void 0}}function ut(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e===N.CODE_BLOCK||e?.startsWith(N.CODE_PREFIX)||!1}function ft(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith(N.CODE_BLOCK)&&e.replace(N.CODE_BLOCK,"").toLowerCase()||void 0}function X(n){const t=a(n,"w:pPr");return!!t&&a(t,"w:numPr")!==void 0}function K(n){const t=a(n,"w:pPr"),e=t&&a(t,"w:numPr");if(!e)return null;const r=a(e,"w:ilvl"),i=a(e,"w:numId");return!r||!i?null:{numId:i.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function pt(n){const t=a(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const i of t.children)i.type==="element"&&(i.name==="w:br"&&i.attributes["w:type"]==="page"?e=!0:i.name==="w:t"?i.children.find(o=>o.type==="text")?.value?.trim().length&&(r=!0):i.name!=="w:rPr"&&(r=!0));return e&&!r}async function Jt(n,t={}){const e=await dt(n),r=wt(e),i=zt(r),o=await Dt(r,t.image?.handler),s=r["word/document.xml"];if(!s)throw new Error("Invalid DOCX file: missing word/document.xml");const c=F(new TextDecoder().decode(s)),l=Wt(r),u=_t(r),p={...t,hyperlinks:i,images:o,listTypeMap:l,styleMap:u};return await Qt(c,{context:p})}async function Qt(n,t){if(n.type!=="root")return{type:"doc",content:[]};const e=a(n,"w:document");if(!e)return{type:"doc",content:[]};const r=a(e,"w:body");return r?{type:"doc",content:await Yt(r.children.filter(i=>i.type==="element"),t)}:{type:"doc",content:[]}}async function Yt(n,t){const e=[],r=new Set;for(let i=0;i<n.length;i++){if(r.has(i))continue;const o=n[i];if(t.context.ignoreEmptyParagraphs&&o.name==="w:p"&&re(o))continue;const s=await Zt(o,n,i,t,r);Array.isArray(s)?e.push(...s):s&&e.push(s)}return e}async function Zt(n,t,e,r,i){switch(n.name){case"w:tbl":return await it(n,r);case"w:p":return ut(n)?await te(n):U(n)?await lt(n,{...r,siblings:t,index:e,processedIndices:i}):X(n)?await ee(n,t,e,r,i):pt(n)?{type:"horizontalRule"}:await $(n,r);default:return null}}async function te(n){const t=ft(n),e=ne(n);return{type:"codeBlock",...t&&{attrs:{language:t}},content:e}}async function ee(n,t,e,r,i){const o=K(n);if(!o)return await $(n,r);const s=r.context.listTypeMap.get(o.numId),c=s?.type||"bullet",l=[];let u=e;for(;u<t.length;){const w=t[u];if(w.name!=="w:p"||!X(w))break;const f=K(w);if(!f||f.numId!==o.numId)break;i.add(u);const d=await $(w,r),h=Array.isArray(d)?d[0]:d;l.push({type:"listItem",content:[h]}),u++}const p={type:c==="bullet"?"bulletList":"orderedList",content:l};return c==="ordered"&&(p.attrs={type:null,...s?.start!==void 0&&{start:s.start}}),p}function ne(n){const t=[],e=x(n,"w:r");for(const r of e){const i=a(r,"w:t");if(!i)continue;const o=i.children.find(s=>s.type==="text");o&&"value"in o&&o.value&&t.push({type:"text",text:o.value})}return t}function re(n){const t=x(n,"w:r");for(const e of t){const r=a(e,"w:t");if(r){const o=r.children.find(s=>s.type==="text");if(o&&"value"in o&&o.value&&o.value.trim().length>0)return!1}if(a(e,"w:drawing")||a(e,"mc:AlternateContent")||a(e,"w:pict"))return!1;const i=a(e,"w:br");if(i&&i.attributes["w:type"]==="page")return!1}return!0}export{$ as convertParagraph,it as convertTable,ct as convertTaskItem,lt as convertTaskList,j as extractAlignment,_ as extractMarks,D as extractRuns,ft as getCodeBlockLanguage,K as getListInfo,at as getTaskItemChecked,ut as isCodeBlock,pt as isHorizontalRule,X as isListItem,Xt as isTable,U as isTaskItem,Jt as parseDOCX};
|
|
1
|
+
import { fromXml } from "xast-util-from-xml";
|
|
2
|
+
import { unzipSync } from "fflate";
|
|
3
|
+
import { toUint8Array } from "undio";
|
|
4
|
+
import { imageMeta } from "image-meta";
|
|
5
|
+
//#region ../utils/dist/index.mjs
|
|
6
|
+
/**
|
|
7
|
+
* TWIP (Twentieth of a Point) conversion constants
|
|
8
|
+
* 1 inch = 1440 TWIPs
|
|
9
|
+
*/
|
|
10
|
+
const TWIPS_PER_INCH = 1440;
|
|
11
|
+
/**
|
|
12
|
+
* EMU (English Metric Unit) conversion constants
|
|
13
|
+
* 1 inch = 914400 EMUs
|
|
14
|
+
*/
|
|
15
|
+
const EMUS_PER_INCH = 914400;
|
|
16
|
+
const PIXELS_PER_HALF_POINT = 1 / 1.5;
|
|
17
|
+
/**
|
|
18
|
+
* DOCX style names
|
|
19
|
+
*/
|
|
20
|
+
const DOCX_STYLE_NAMES = {
|
|
21
|
+
CODE_BLOCK: "CodeBlock",
|
|
22
|
+
CODE_PREFIX: "Code"
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Text alignment mappings
|
|
26
|
+
*/
|
|
27
|
+
const TEXT_ALIGN_MAP = {
|
|
28
|
+
tiptapToDocx: {
|
|
29
|
+
left: "left",
|
|
30
|
+
right: "right",
|
|
31
|
+
center: "center",
|
|
32
|
+
justify: "both"
|
|
33
|
+
},
|
|
34
|
+
docxToTipTap: {
|
|
35
|
+
left: "left",
|
|
36
|
+
right: "right",
|
|
37
|
+
center: "center",
|
|
38
|
+
both: "justify"
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Unit conversion utilities for DOCX processing
|
|
43
|
+
* Handles conversions between TWIPs, EMUs, pixels, and other units
|
|
44
|
+
*/
|
|
45
|
+
const PIXELS_PER_INCH = 96;
|
|
46
|
+
/**
|
|
47
|
+
* Convert TWIPs to CSS pixels (returns number)
|
|
48
|
+
* @param twip - Value in TWIPs (1 inch = 1440 TWIPs)
|
|
49
|
+
* @returns Number value in pixels
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* convertTwipToPixels(1440) // returns 96
|
|
53
|
+
*/
|
|
54
|
+
function convertTwipToPixels(twip) {
|
|
55
|
+
return Math.round(twip * PIXELS_PER_INCH / TWIPS_PER_INCH);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Convert TWIPs to CSS string (returns "px" string)
|
|
59
|
+
* @param twip - Value in TWIPs
|
|
60
|
+
* @returns CSS value string in pixels (e.g., "20px")
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* convertTwipToCssString(1440) // returns "96px"
|
|
64
|
+
*/
|
|
65
|
+
function convertTwipToCssString(twip) {
|
|
66
|
+
return `${convertTwipToPixels(twip)}px`;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Convert EMUs to pixels
|
|
70
|
+
* EMU = English Metric Unit (1 inch = 914400 EMUs)
|
|
71
|
+
* @param emu - Value in EMUs
|
|
72
|
+
* @returns Value in pixels
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* convertEmuToPixels(914400) // returns 96
|
|
76
|
+
*/
|
|
77
|
+
function convertEmuToPixels(emu) {
|
|
78
|
+
return Math.round(emu / (EMUS_PER_INCH / PIXELS_PER_INCH));
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Convert EMU string to pixels
|
|
82
|
+
* @param emuStr - EMU value as string
|
|
83
|
+
* @returns Pixel value or undefined if invalid
|
|
84
|
+
*
|
|
85
|
+
* @example
|
|
86
|
+
* convertEmuStringToPixels("914400") // returns 96
|
|
87
|
+
* convertEmuStringToPixels("invalid") // returns undefined
|
|
88
|
+
*/
|
|
89
|
+
function convertEmuStringToPixels(emuStr) {
|
|
90
|
+
const emu = parseInt(emuStr, 10);
|
|
91
|
+
if (isNaN(emu)) return void 0;
|
|
92
|
+
return convertEmuToPixels(emu);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Find direct child element with specified name
|
|
96
|
+
* @param node - Parent XML element or root node
|
|
97
|
+
* @param name - Child element name to find (can include namespace prefix, e.g., "w:p")
|
|
98
|
+
* @returns Child element if found, null otherwise
|
|
99
|
+
*
|
|
100
|
+
* @example
|
|
101
|
+
* const paragraph = findChild(document, "w:p");
|
|
102
|
+
*/
|
|
103
|
+
function findChild(node, name) {
|
|
104
|
+
if (!node.children) return null;
|
|
105
|
+
for (const child of node.children) if (child.type === "element" && child.name === name) return child;
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Find deep descendant element with specified name (recursive)
|
|
110
|
+
* Searches through all descendants, not just direct children
|
|
111
|
+
* @param node - Root XML element
|
|
112
|
+
* @param name - Descendant element name to find
|
|
113
|
+
* @returns Descendant element if found, null otherwise
|
|
114
|
+
*
|
|
115
|
+
* @example
|
|
116
|
+
* const textElement = findDeepChild(run, "w:t");
|
|
117
|
+
*/
|
|
118
|
+
function findDeepChild(node, name) {
|
|
119
|
+
if (!node.children) return null;
|
|
120
|
+
for (const child of node.children) if (child.type === "element") {
|
|
121
|
+
if (child.name === name) return child;
|
|
122
|
+
const found = findDeepChild(child, name);
|
|
123
|
+
if (found) return found;
|
|
124
|
+
}
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Find all deep descendant elements with specified name (recursive)
|
|
129
|
+
* @param node - Root XML element
|
|
130
|
+
* @param name - Descendant element name to find
|
|
131
|
+
* @returns Array of matching descendant elements
|
|
132
|
+
*
|
|
133
|
+
* @example
|
|
134
|
+
* const allTextRuns = findDeepChildren(paragraph, "w:r");
|
|
135
|
+
*/
|
|
136
|
+
function findDeepChildren(node, name) {
|
|
137
|
+
const results = [];
|
|
138
|
+
if (!node.children) return results;
|
|
139
|
+
for (const child of node.children) if (child.type === "element") {
|
|
140
|
+
if (child.name === name) results.push(child);
|
|
141
|
+
results.push(...findDeepChildren(child, name));
|
|
142
|
+
}
|
|
143
|
+
return results;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Parse TWIP attribute value from element attributes
|
|
147
|
+
* TWIP = Twentieth of a Point (1 inch = 1440 TWIPs)
|
|
148
|
+
* @param attributes - Element attributes object
|
|
149
|
+
* @param name - Attribute name to parse
|
|
150
|
+
* @returns TWIP value as string, or undefined if not found
|
|
151
|
+
*
|
|
152
|
+
* @example
|
|
153
|
+
* const leftIndent = parseTwipAttr(pPr.attributes, "w:left");
|
|
154
|
+
*/
|
|
155
|
+
function parseTwipAttr(attributes, name) {
|
|
156
|
+
const value = attributes[name];
|
|
157
|
+
if (!value) return void 0;
|
|
158
|
+
const num = parseInt(value, 10);
|
|
159
|
+
if (isNaN(num)) return void 0;
|
|
160
|
+
return value;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Type guard utilities for DOCX processing
|
|
164
|
+
*/
|
|
165
|
+
/**
|
|
166
|
+
* Type guard factory function
|
|
167
|
+
* Creates a type guard function that checks if a value is one of the valid values
|
|
168
|
+
*
|
|
169
|
+
* @param validValues - Readonly array of valid string values
|
|
170
|
+
* @returns Type guard function
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* const isValidAlign = createStringValidator(["left", "right", "center"] as const);
|
|
174
|
+
* if (isValidAlign(value)) {
|
|
175
|
+
* // value is typed as "left" | "right" | "center"
|
|
176
|
+
* }
|
|
177
|
+
*/
|
|
178
|
+
function createStringValidator(validValues) {
|
|
179
|
+
return (value) => {
|
|
180
|
+
return validValues.includes(value);
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
//#endregion
|
|
184
|
+
//#region src/utils/base64.ts
|
|
185
|
+
/**
|
|
186
|
+
* Base64 encoding utilities
|
|
187
|
+
*/
|
|
188
|
+
/**
|
|
189
|
+
* Base64 lookup table for fast encoding
|
|
190
|
+
*/
|
|
191
|
+
const BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
192
|
+
/**
|
|
193
|
+
* Convert Uint8Array to base64 string using lookup table and bitwise operations
|
|
194
|
+
* Similar to base64-arraybuffer implementation but without external dependencies
|
|
195
|
+
* Performance: O(n) time complexity, no stack overflow risk
|
|
196
|
+
*
|
|
197
|
+
* @param bytes - Uint8Array to encode
|
|
198
|
+
* @returns Base64 encoded string
|
|
199
|
+
*/
|
|
200
|
+
function uint8ArrayToBase64(bytes) {
|
|
201
|
+
const len = bytes.length;
|
|
202
|
+
const resultLen = Math.ceil(len / 3) * 4;
|
|
203
|
+
const result = Array.from({ length: resultLen });
|
|
204
|
+
let resultIndex = 0;
|
|
205
|
+
for (let i = 0; i < len; i += 3) {
|
|
206
|
+
const byte1 = bytes[i];
|
|
207
|
+
const byte2 = i + 1 < len ? bytes[i + 1] : 0;
|
|
208
|
+
const byte3 = i + 2 < len ? bytes[i + 2] : 0;
|
|
209
|
+
const index0 = byte1 >> 2;
|
|
210
|
+
const index1 = (byte1 & 3) << 4 | byte2 >> 4;
|
|
211
|
+
const index2 = (byte2 & 15) << 2 | byte3 >> 6;
|
|
212
|
+
const index3 = byte3 & 63;
|
|
213
|
+
result[resultIndex++] = BASE64_CHARS[index0];
|
|
214
|
+
result[resultIndex++] = BASE64_CHARS[index1];
|
|
215
|
+
result[resultIndex++] = i + 1 < len ? BASE64_CHARS[index2] : "=";
|
|
216
|
+
result[resultIndex++] = i + 2 < len ? BASE64_CHARS[index3] : "=";
|
|
217
|
+
}
|
|
218
|
+
return result.join("");
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Convert base64 string to Uint8Array
|
|
222
|
+
*
|
|
223
|
+
* @param base64 - Base64 encoded string
|
|
224
|
+
* @returns Decoded data as Uint8Array
|
|
225
|
+
*/
|
|
226
|
+
function base64ToUint8Array(base64) {
|
|
227
|
+
const binaryString = atob(base64);
|
|
228
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
229
|
+
for (let i = 0; i < binaryString.length; i++) bytes[i] = binaryString.charCodeAt(i);
|
|
230
|
+
return bytes;
|
|
231
|
+
}
|
|
232
|
+
//#endregion
|
|
233
|
+
//#region src/utils/image.ts
|
|
234
|
+
/**
|
|
235
|
+
* Detect current environment
|
|
236
|
+
*/
|
|
237
|
+
const isNode = globalThis.process?.release?.name === "node";
|
|
238
|
+
const isBrowser = typeof window !== "undefined";
|
|
239
|
+
/**
|
|
240
|
+
* Handle interop for module default exports (from unpdf)
|
|
241
|
+
*/
|
|
242
|
+
async function interopDefault(m) {
|
|
243
|
+
const resolved = await m;
|
|
244
|
+
return resolved.default || resolved;
|
|
245
|
+
}
|
|
246
|
+
let resolvedCanvasModule;
|
|
247
|
+
/**
|
|
248
|
+
* Base canvas factory for cross-platform canvas creation
|
|
249
|
+
*/
|
|
250
|
+
var BaseCanvasFactory = class {
|
|
251
|
+
#enableHWA = false;
|
|
252
|
+
constructor({ enableHWA = false } = {}) {
|
|
253
|
+
this.#enableHWA = enableHWA;
|
|
254
|
+
}
|
|
255
|
+
create(width, height) {
|
|
256
|
+
const canvas = this._createCanvas(width, height);
|
|
257
|
+
return {
|
|
258
|
+
canvas,
|
|
259
|
+
context: canvas.getContext("2d", { willReadFrequently: !this.#enableHWA })
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
reset({ canvas }, width, height) {
|
|
263
|
+
if (!canvas) throw new Error("Canvas is not specified");
|
|
264
|
+
canvas.width = width;
|
|
265
|
+
canvas.height = height;
|
|
266
|
+
}
|
|
267
|
+
destroy(context) {
|
|
268
|
+
if (!context.canvas) throw new Error("Canvas is not specified");
|
|
269
|
+
context.canvas.width = 0;
|
|
270
|
+
context.canvas.height = 0;
|
|
271
|
+
context.canvas = void 0;
|
|
272
|
+
context.context = void 0;
|
|
273
|
+
}
|
|
274
|
+
_createCanvas(width, height) {
|
|
275
|
+
throw new Error("Not implemented");
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
/**
|
|
279
|
+
* Browser canvas factory using native HTMLCanvasElement
|
|
280
|
+
*/
|
|
281
|
+
var DOMCanvasFactory = class extends BaseCanvasFactory {
|
|
282
|
+
_document;
|
|
283
|
+
constructor({ ownerDocument = globalThis.document, enableHWA = false } = {}) {
|
|
284
|
+
super({ enableHWA });
|
|
285
|
+
this._document = ownerDocument;
|
|
286
|
+
}
|
|
287
|
+
_createCanvas(width, height) {
|
|
288
|
+
const canvas = this._document.createElement("canvas");
|
|
289
|
+
canvas.width = width;
|
|
290
|
+
canvas.height = height;
|
|
291
|
+
return canvas;
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
/**
|
|
295
|
+
* Node.js canvas factory using @napi-rs/canvas
|
|
296
|
+
*/
|
|
297
|
+
var NodeCanvasFactory = class extends BaseCanvasFactory {
|
|
298
|
+
constructor({ enableHWA = false } = {}) {
|
|
299
|
+
super({ enableHWA });
|
|
300
|
+
}
|
|
301
|
+
_createCanvas(width, height) {
|
|
302
|
+
if (!resolvedCanvasModule) throw new Error("@napi-rs/canvas module is not resolved");
|
|
303
|
+
return resolvedCanvasModule.createCanvas(width, height);
|
|
304
|
+
}
|
|
305
|
+
};
|
|
306
|
+
/**
|
|
307
|
+
* Resolve canvas module (from unpdf)
|
|
308
|
+
*/
|
|
309
|
+
async function resolveCanvasModule(canvasImport) {
|
|
310
|
+
resolvedCanvasModule ??= await interopDefault(canvasImport());
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Create appropriate canvas factory for current environment
|
|
314
|
+
*
|
|
315
|
+
* @param canvasImport - Dynamic import function for @napi-rs/canvas (required in Node.js)
|
|
316
|
+
* @returns CanvasFactory instance
|
|
317
|
+
*/
|
|
318
|
+
async function createCanvasFactory(canvasImport) {
|
|
319
|
+
if (isBrowser) return DOMCanvasFactory;
|
|
320
|
+
if (isNode) {
|
|
321
|
+
if (!canvasImport) throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");
|
|
322
|
+
await resolveCanvasModule(canvasImport);
|
|
323
|
+
return NodeCanvasFactory;
|
|
324
|
+
}
|
|
325
|
+
throw new Error("Unsupported environment for canvas operations");
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Crop image if crop information is provided
|
|
329
|
+
*
|
|
330
|
+
* @param imageData - Original image data as Uint8Array
|
|
331
|
+
* @param crop - Crop rectangle (DOCX format: 0-100000)
|
|
332
|
+
* @param options - Cropping options
|
|
333
|
+
* @returns Cropped image data, or original if no crop or error occurs
|
|
334
|
+
*/
|
|
335
|
+
async function cropImageIfNeeded(imageData, crop, options = {}) {
|
|
336
|
+
if (!crop || !crop.left && !crop.top && !crop.right && !crop.bottom) return imageData;
|
|
337
|
+
if (options.enabled === false) return imageData;
|
|
338
|
+
try {
|
|
339
|
+
const CanvasFactory = await createCanvasFactory(options.canvasImport);
|
|
340
|
+
const img = await loadImage(imageData, CanvasFactory);
|
|
341
|
+
const left = (crop.left || 0) / 1e5 * img.width;
|
|
342
|
+
const top = (crop.top || 0) / 1e5 * img.height;
|
|
343
|
+
const right = (crop.right || 0) / 1e5 * img.width;
|
|
344
|
+
const bottom = (crop.bottom || 0) / 1e5 * img.height;
|
|
345
|
+
const croppedWidth = Math.round(img.width - left - right);
|
|
346
|
+
const croppedHeight = Math.round(img.height - top - bottom);
|
|
347
|
+
if (croppedWidth <= 0 || croppedHeight <= 0) {
|
|
348
|
+
console.warn("Invalid crop dimensions, returning original image");
|
|
349
|
+
return imageData;
|
|
350
|
+
}
|
|
351
|
+
const drawingContext = new CanvasFactory().create(croppedWidth, croppedHeight);
|
|
352
|
+
if (!drawingContext.context) throw new Error("Failed to get 2D context from canvas");
|
|
353
|
+
drawingContext.context.drawImage(img, left, top, croppedWidth, croppedHeight, 0, 0, croppedWidth, croppedHeight);
|
|
354
|
+
const dataUrl = drawingContext.canvas.toDataURL();
|
|
355
|
+
const buffer = await (await fetch(dataUrl)).arrayBuffer();
|
|
356
|
+
return new Uint8Array(buffer);
|
|
357
|
+
} catch (error) {
|
|
358
|
+
console.warn("Image cropping failed, returning original image:", error);
|
|
359
|
+
return imageData;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Load image from buffer (environment-agnostic)
|
|
364
|
+
*
|
|
365
|
+
* @param data - Image data as Uint8Array
|
|
366
|
+
* @param _CanvasFactory - Canvas factory class (unused, for compatibility)
|
|
367
|
+
* @returns Loaded canvas image
|
|
368
|
+
*/
|
|
369
|
+
async function loadImage(data, _CanvasFactory) {
|
|
370
|
+
if (isBrowser) {
|
|
371
|
+
const blob = new Blob([data.buffer]);
|
|
372
|
+
const url = URL.createObjectURL(blob);
|
|
373
|
+
try {
|
|
374
|
+
const img = new Image();
|
|
375
|
+
return new Promise((resolve, reject) => {
|
|
376
|
+
img.onload = () => {
|
|
377
|
+
URL.revokeObjectURL(url);
|
|
378
|
+
resolve(img);
|
|
379
|
+
};
|
|
380
|
+
img.onerror = () => {
|
|
381
|
+
URL.revokeObjectURL(url);
|
|
382
|
+
reject(/* @__PURE__ */ new Error("Failed to load image"));
|
|
383
|
+
};
|
|
384
|
+
img.src = url;
|
|
385
|
+
});
|
|
386
|
+
} catch (error) {
|
|
387
|
+
URL.revokeObjectURL(url);
|
|
388
|
+
throw error;
|
|
389
|
+
}
|
|
390
|
+
} else {
|
|
391
|
+
if (!resolvedCanvasModule) throw new Error("@napi-rs/canvas module is not resolved");
|
|
392
|
+
return await resolvedCanvasModule.loadImage(Buffer.from(data));
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
//#endregion
|
|
396
|
+
//#region src/parsers/images.ts
|
|
397
|
+
const IMAGE_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
|
|
398
|
+
/**
|
|
399
|
+
* Type guards for valid horizontal/vertical alignment values
|
|
400
|
+
*/
|
|
401
|
+
const isValidHorizontalAlign = createStringValidator([
|
|
402
|
+
"left",
|
|
403
|
+
"right",
|
|
404
|
+
"center",
|
|
405
|
+
"inside",
|
|
406
|
+
"outside"
|
|
407
|
+
]);
|
|
408
|
+
const isValidVerticalAlign = createStringValidator([
|
|
409
|
+
"top",
|
|
410
|
+
"bottom",
|
|
411
|
+
"center",
|
|
412
|
+
"inside",
|
|
413
|
+
"outside"
|
|
414
|
+
]);
|
|
415
|
+
const isValidHorizontalRelative = createStringValidator([
|
|
416
|
+
"page",
|
|
417
|
+
"character",
|
|
418
|
+
"column",
|
|
419
|
+
"margin",
|
|
420
|
+
"leftMargin",
|
|
421
|
+
"rightMargin",
|
|
422
|
+
"insideMargin",
|
|
423
|
+
"outsideMargin"
|
|
424
|
+
]);
|
|
425
|
+
const isValidVerticalRelative = createStringValidator([
|
|
426
|
+
"page",
|
|
427
|
+
"paragraph",
|
|
428
|
+
"margin",
|
|
429
|
+
"topMargin",
|
|
430
|
+
"bottomMargin",
|
|
431
|
+
"insideMargin",
|
|
432
|
+
"outsideMargin",
|
|
433
|
+
"line"
|
|
434
|
+
]);
|
|
435
|
+
/**
|
|
436
|
+
* Extract crop rectangle from a:srcRect element
|
|
437
|
+
*/
|
|
438
|
+
function extractCropRect(srcRect) {
|
|
439
|
+
const left = srcRect.attributes["l"];
|
|
440
|
+
const top = srcRect.attributes["t"];
|
|
441
|
+
const right = srcRect.attributes["r"];
|
|
442
|
+
const bottom = srcRect.attributes["b"];
|
|
443
|
+
if (!left && !top && !right && !bottom) return void 0;
|
|
444
|
+
return {
|
|
445
|
+
left: left ? parseInt(left, 10) : void 0,
|
|
446
|
+
top: top ? parseInt(top, 10) : void 0,
|
|
447
|
+
right: right ? parseInt(right, 10) : void 0,
|
|
448
|
+
bottom: bottom ? parseInt(bottom, 10) : void 0
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Apply crop to image data and update dimensions
|
|
453
|
+
* Shared logic for both direct (no picGraphic) and synthetic drawing paths
|
|
454
|
+
*/
|
|
455
|
+
async function applyCropToImage(pic, imgInfo, params) {
|
|
456
|
+
if (!findChild(pic, "pic:spPr") || !imgInfo.src.startsWith("data:")) return imgInfo;
|
|
457
|
+
const srcRect = findDeepChild(pic, "a:srcRect");
|
|
458
|
+
if (!srcRect) return imgInfo;
|
|
459
|
+
const crop = extractCropRect(srcRect);
|
|
460
|
+
if (!crop || !crop.left && !crop.top && !crop.right && !crop.bottom) return imgInfo;
|
|
461
|
+
try {
|
|
462
|
+
const [metadata, base64Data] = imgInfo.src.split(",");
|
|
463
|
+
if (!base64Data) return imgInfo;
|
|
464
|
+
const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
|
|
465
|
+
canvasImport: params.context.image?.canvasImport,
|
|
466
|
+
enabled: params.context.image?.enableImageCrop ?? false
|
|
467
|
+
}));
|
|
468
|
+
const originalWidth = imgInfo.width || 0;
|
|
469
|
+
const originalHeight = imgInfo.height || 0;
|
|
470
|
+
const cropLeftPct = (crop.left || 0) / 1e5;
|
|
471
|
+
const cropTopPct = (crop.top || 0) / 1e5;
|
|
472
|
+
const cropRightPct = (crop.right || 0) / 1e5;
|
|
473
|
+
const cropBottomPct = (crop.bottom || 0) / 1e5;
|
|
474
|
+
const visibleWidthPct = 1 - cropLeftPct - cropRightPct;
|
|
475
|
+
const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
|
|
476
|
+
const croppedWidth = Math.round(originalWidth * visibleWidthPct);
|
|
477
|
+
const croppedHeight = Math.round(originalHeight * visibleHeightPct);
|
|
478
|
+
return {
|
|
479
|
+
src: `${metadata},${croppedBase64}`,
|
|
480
|
+
width: croppedWidth,
|
|
481
|
+
height: croppedHeight
|
|
482
|
+
};
|
|
483
|
+
} catch (error) {
|
|
484
|
+
console.warn("Grouped image cropping failed, using original image:", error);
|
|
485
|
+
return imgInfo;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Extract horizontal position (align/offset) from position element
|
|
490
|
+
*/
|
|
491
|
+
function extractHorizontalPosition(positionEl) {
|
|
492
|
+
const alignEl = findChild(positionEl, "wp:align");
|
|
493
|
+
const offsetEl = findChild(positionEl, "wp:posOffset");
|
|
494
|
+
let align;
|
|
495
|
+
if (alignEl?.children[0]?.type === "text") {
|
|
496
|
+
const value = alignEl.children[0].value;
|
|
497
|
+
if (isValidHorizontalAlign(value)) align = value;
|
|
498
|
+
}
|
|
499
|
+
const offset = offsetEl?.children[0]?.type === "text" ? parseInt(offsetEl.children[0].value, 10) : void 0;
|
|
500
|
+
if (!align && offset === void 0) return void 0;
|
|
501
|
+
return {
|
|
502
|
+
...align && { align },
|
|
503
|
+
...offset !== void 0 && { offset }
|
|
504
|
+
};
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Extract vertical position (align/offset) from position element
|
|
508
|
+
*/
|
|
509
|
+
function extractVerticalPosition(positionEl) {
|
|
510
|
+
const alignEl = findChild(positionEl, "wp:align");
|
|
511
|
+
const offsetEl = findChild(positionEl, "wp:posOffset");
|
|
512
|
+
let align;
|
|
513
|
+
if (alignEl?.children[0]?.type === "text") {
|
|
514
|
+
const value = alignEl.children[0].value;
|
|
515
|
+
if (isValidVerticalAlign(value)) align = value;
|
|
516
|
+
}
|
|
517
|
+
const offset = offsetEl?.children[0]?.type === "text" ? parseInt(offsetEl.children[0].value, 10) : void 0;
|
|
518
|
+
if (!align && offset === void 0) return void 0;
|
|
519
|
+
return {
|
|
520
|
+
...align && { align },
|
|
521
|
+
...offset !== void 0 && { offset }
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Find drawing element (handles both direct and mc:AlternateContent wrapping)
|
|
526
|
+
*/
|
|
527
|
+
function findDrawingElement(run) {
|
|
528
|
+
let drawing = findChild(run, "w:drawing");
|
|
529
|
+
if (drawing) return drawing;
|
|
530
|
+
const altContent = findChild(run, "mc:AlternateContent");
|
|
531
|
+
const choice = altContent && findChild(altContent, "mc:Choice");
|
|
532
|
+
return choice ? findChild(choice, "w:drawing") : null;
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Adjust image dimensions to fit within group bounds while preserving aspect ratio
|
|
536
|
+
*/
|
|
537
|
+
function fitToGroup(groupWidth, groupHeight, metaWidth, metaHeight) {
|
|
538
|
+
const metaRatio = metaWidth / metaHeight;
|
|
539
|
+
const groupRatio = groupWidth / groupHeight;
|
|
540
|
+
if (Math.abs(metaRatio - groupRatio) > .1) if (metaRatio > groupRatio) return {
|
|
541
|
+
width: groupWidth,
|
|
542
|
+
height: Math.round(groupWidth / metaRatio)
|
|
543
|
+
};
|
|
544
|
+
else return {
|
|
545
|
+
width: Math.round(groupHeight * metaRatio),
|
|
546
|
+
height: groupHeight
|
|
547
|
+
};
|
|
548
|
+
return {
|
|
549
|
+
width: groupWidth,
|
|
550
|
+
height: groupHeight
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
/**
|
|
554
|
+
* Extract images from DOCX and convert to base64 data URLs or use custom handler
|
|
555
|
+
* Returns Map of relationship ID to image info (src + dimensions)
|
|
556
|
+
*/
|
|
557
|
+
async function extractImages(files, handler) {
|
|
558
|
+
const images = /* @__PURE__ */ new Map();
|
|
559
|
+
const relsXml = files["word/_rels/document.xml.rels"];
|
|
560
|
+
if (!relsXml) return images;
|
|
561
|
+
const relationships = findChild(fromXml(new TextDecoder().decode(relsXml)), "Relationships");
|
|
562
|
+
if (!relationships) return images;
|
|
563
|
+
const rels = findDeepChildren(relationships, "Relationship");
|
|
564
|
+
for (const rel of rels) if (rel.attributes.Type === IMAGE_REL_TYPE && rel.attributes.Id && rel.attributes.Target) {
|
|
565
|
+
const imageData = files["word/" + rel.attributes.Target];
|
|
566
|
+
if (!imageData) continue;
|
|
567
|
+
let width;
|
|
568
|
+
let height;
|
|
569
|
+
let imageType = "png";
|
|
570
|
+
try {
|
|
571
|
+
const meta = imageMeta(imageData);
|
|
572
|
+
width = meta.width;
|
|
573
|
+
height = meta.height;
|
|
574
|
+
if (meta.type) imageType = meta.type;
|
|
575
|
+
} catch {}
|
|
576
|
+
let src;
|
|
577
|
+
if (handler) src = (await handler({
|
|
578
|
+
id: rel.attributes.Id,
|
|
579
|
+
contentType: `image/${imageType}`,
|
|
580
|
+
data: imageData
|
|
581
|
+
})).src;
|
|
582
|
+
else {
|
|
583
|
+
const base64 = uint8ArrayToBase64(imageData);
|
|
584
|
+
src = `data:image/${imageType};base64,${base64}`;
|
|
585
|
+
}
|
|
586
|
+
images.set(rel.attributes.Id, {
|
|
587
|
+
src,
|
|
588
|
+
width,
|
|
589
|
+
height
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
return images;
|
|
593
|
+
}
|
|
594
|
+
/**
|
|
595
|
+
* Extract single image from a drawing element
|
|
596
|
+
* Returns TipTap image node or null
|
|
597
|
+
*/
|
|
598
|
+
async function extractImageFromDrawing(drawing, params) {
|
|
599
|
+
const { context } = params;
|
|
600
|
+
const blip = findDeepChild(drawing, "a:blip");
|
|
601
|
+
if (!blip?.attributes["r:embed"]) return null;
|
|
602
|
+
const rId = blip.attributes["r:embed"];
|
|
603
|
+
const imgInfo = context.images.get(rId);
|
|
604
|
+
if (!imgInfo) return null;
|
|
605
|
+
let src = imgInfo.src;
|
|
606
|
+
const srcRect = findDeepChild(drawing, "a:srcRect");
|
|
607
|
+
if (srcRect) {
|
|
608
|
+
const crop = extractCropRect(srcRect);
|
|
609
|
+
if (crop && src.startsWith("data:")) {
|
|
610
|
+
const [metadata, base64Data] = src.split(",");
|
|
611
|
+
if (base64Data) {
|
|
612
|
+
const bytes = base64ToUint8Array(base64Data);
|
|
613
|
+
try {
|
|
614
|
+
src = `${metadata},${uint8ArrayToBase64(await cropImageIfNeeded(bytes, crop, {
|
|
615
|
+
canvasImport: context.image?.canvasImport,
|
|
616
|
+
enabled: context.image?.enableImageCrop ?? false
|
|
617
|
+
}))}`;
|
|
618
|
+
} catch (error) {
|
|
619
|
+
console.warn("Image cropping failed, using original image:", error);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
const extent = findDeepChild(drawing, "wp:extent");
|
|
625
|
+
let width;
|
|
626
|
+
let height;
|
|
627
|
+
if (extent) {
|
|
628
|
+
const cx = extent.attributes["cx"];
|
|
629
|
+
const cy = extent.attributes["cy"];
|
|
630
|
+
if (typeof cx === "string") width = convertEmuStringToPixels(cx);
|
|
631
|
+
if (typeof cy === "string") height = convertEmuStringToPixels(cy);
|
|
632
|
+
}
|
|
633
|
+
const xfrm = findDeepChild(drawing, "a:xfrm");
|
|
634
|
+
let rotation;
|
|
635
|
+
if (xfrm?.attributes["rot"]) {
|
|
636
|
+
const rot = parseInt(xfrm.attributes["rot"], 10);
|
|
637
|
+
if (!isNaN(rot)) rotation = rot / 6e4;
|
|
638
|
+
}
|
|
639
|
+
const title = findDeepChild(drawing, "wp:docPr")?.attributes["title"];
|
|
640
|
+
const positionH = findDeepChild(drawing, "wp:positionH");
|
|
641
|
+
const positionV = findDeepChild(drawing, "wp:positionV");
|
|
642
|
+
let floating;
|
|
643
|
+
if (positionH || positionV) {
|
|
644
|
+
const hPos = positionH ? extractHorizontalPosition(positionH) : void 0;
|
|
645
|
+
const vPos = positionV ? extractVerticalPosition(positionV) : void 0;
|
|
646
|
+
const hRelative = positionH?.attributes["relativeFrom"];
|
|
647
|
+
const vRelative = positionV?.attributes["relativeFrom"];
|
|
648
|
+
const horizontalRelative = typeof hRelative === "string" && isValidHorizontalRelative(hRelative) ? hRelative : "page";
|
|
649
|
+
const verticalRelative = typeof vRelative === "string" && isValidVerticalRelative(vRelative) ? vRelative : "page";
|
|
650
|
+
floating = {
|
|
651
|
+
horizontalPosition: {
|
|
652
|
+
relative: horizontalRelative,
|
|
653
|
+
...hPos?.align && { align: hPos.align },
|
|
654
|
+
...hPos?.offset !== void 0 && { offset: hPos.offset }
|
|
655
|
+
},
|
|
656
|
+
verticalPosition: {
|
|
657
|
+
relative: verticalRelative,
|
|
658
|
+
...vPos?.align && { align: vPos.align },
|
|
659
|
+
...vPos?.offset !== void 0 && { offset: vPos.offset }
|
|
660
|
+
}
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
const spPr = findDeepChild(drawing, "pic:spPr");
|
|
664
|
+
let outline;
|
|
665
|
+
if (spPr) {
|
|
666
|
+
const ln = findDeepChild(spPr, "a:ln");
|
|
667
|
+
const solidFill = ln && findDeepChild(ln, "a:solidFill");
|
|
668
|
+
const srgbClr = solidFill && findDeepChild(solidFill, "a:srgbClr");
|
|
669
|
+
if (srgbClr?.attributes["val"]) outline = {
|
|
670
|
+
type: "solidFill",
|
|
671
|
+
solidFillType: "rgb",
|
|
672
|
+
value: srgbClr.attributes["val"]
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
return {
|
|
676
|
+
type: "image",
|
|
677
|
+
attrs: {
|
|
678
|
+
src,
|
|
679
|
+
alt: "",
|
|
680
|
+
...width !== void 0 && { width },
|
|
681
|
+
...height !== void 0 && { height },
|
|
682
|
+
...rotation !== void 0 && { rotation },
|
|
683
|
+
...title && { title },
|
|
684
|
+
...floating && { floating },
|
|
685
|
+
...outline && { outline }
|
|
686
|
+
}
|
|
687
|
+
};
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Extract images from a drawing element
|
|
691
|
+
* Handles both single images and grouped images (<wpg:wgp>)
|
|
692
|
+
*/
|
|
693
|
+
async function extractImagesFromDrawing(drawing, params) {
|
|
694
|
+
const result = [];
|
|
695
|
+
const inline = findChild(drawing, "wp:inline") || findChild(drawing, "wp:anchor");
|
|
696
|
+
if (!inline) return result;
|
|
697
|
+
const extent = findChild(inline, "wp:extent");
|
|
698
|
+
let groupWidth;
|
|
699
|
+
let groupHeight;
|
|
700
|
+
if (extent) {
|
|
701
|
+
const cx = extent.attributes["cx"];
|
|
702
|
+
const cy = extent.attributes["cy"];
|
|
703
|
+
if (typeof cx === "string") groupWidth = convertEmuStringToPixels(cx);
|
|
704
|
+
if (typeof cy === "string") groupHeight = convertEmuStringToPixels(cy);
|
|
705
|
+
}
|
|
706
|
+
const graphic = findChild(inline, "a:graphic");
|
|
707
|
+
if (!graphic) return result;
|
|
708
|
+
const graphicData = findChild(graphic, "a:graphicData");
|
|
709
|
+
if (!graphicData) return result;
|
|
710
|
+
const group = findChild(graphicData, "wpg:wgp");
|
|
711
|
+
if (group) {
|
|
712
|
+
const groupSp = findChild(group, "wpg:grpSp");
|
|
713
|
+
const pictures = groupSp ? [...findDeepChildren(groupSp, "pic:pic"), ...findDeepChildren(groupSp, "pic")] : [...findDeepChildren(group, "pic:pic"), ...findDeepChildren(group, "pic")];
|
|
714
|
+
for (const pic of pictures) {
|
|
715
|
+
const picGraphic = findChild(pic, "a:graphic");
|
|
716
|
+
if (!picGraphic) {
|
|
717
|
+
const blipFill = findChild(pic, "pic:blipFill") || findDeepChild(pic, "a:blipFill");
|
|
718
|
+
if (!blipFill) continue;
|
|
719
|
+
const blip = findChild(blipFill, "a:blip") || findDeepChild(blipFill, "a:blip");
|
|
720
|
+
if (!blip?.attributes["r:embed"]) continue;
|
|
721
|
+
const rId = blip.attributes["r:embed"];
|
|
722
|
+
const imgInfo = params.context.images.get(rId);
|
|
723
|
+
if (!imgInfo) continue;
|
|
724
|
+
const processedImgInfo = await applyCropToImage(pic, imgInfo, params);
|
|
725
|
+
result.push({
|
|
726
|
+
type: "image",
|
|
727
|
+
attrs: {
|
|
728
|
+
src: processedImgInfo.src,
|
|
729
|
+
alt: "",
|
|
730
|
+
width: processedImgInfo.width,
|
|
731
|
+
height: processedImgInfo.height
|
|
732
|
+
}
|
|
733
|
+
});
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
const syntheticDrawing = {
|
|
737
|
+
type: "element",
|
|
738
|
+
name: "w:drawing",
|
|
739
|
+
children: [picGraphic],
|
|
740
|
+
attributes: {}
|
|
741
|
+
};
|
|
742
|
+
const image = await extractImageFromDrawing(syntheticDrawing, params);
|
|
743
|
+
if (!image) continue;
|
|
744
|
+
const srcRect = findChild(pic, "pic:spPr") ? findDeepChild(pic, "a:srcRect") : void 0;
|
|
745
|
+
const crop = srcRect && extractCropRect(srcRect) ? extractCropRect(srcRect) : void 0;
|
|
746
|
+
if (crop && (crop.left || crop.top || crop.right || crop.bottom) && image.attrs?.src?.startsWith("data:")) try {
|
|
747
|
+
const [metadata, base64Data] = image.attrs.src.split(",");
|
|
748
|
+
if (base64Data) {
|
|
749
|
+
const croppedBase64 = uint8ArrayToBase64(await cropImageIfNeeded(base64ToUint8Array(base64Data), crop, {
|
|
750
|
+
canvasImport: params.context.image?.canvasImport,
|
|
751
|
+
enabled: params.context.image?.enableImageCrop ?? false
|
|
752
|
+
}));
|
|
753
|
+
image.attrs.src = `${metadata},${croppedBase64}`;
|
|
754
|
+
const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
|
|
755
|
+
if (rId) {
|
|
756
|
+
const imgInfo = params.context.images.get(rId);
|
|
757
|
+
if (imgInfo?.width && imgInfo?.height) {
|
|
758
|
+
const cropLeftPct = (crop.left || 0) / 1e5;
|
|
759
|
+
const cropTopPct = (crop.top || 0) / 1e5;
|
|
760
|
+
const cropRightPct = (crop.right || 0) / 1e5;
|
|
761
|
+
const cropBottomPct = (crop.bottom || 0) / 1e5;
|
|
762
|
+
const visibleWidthPct = 1 - cropLeftPct - cropRightPct;
|
|
763
|
+
const visibleHeightPct = 1 - cropTopPct - cropBottomPct;
|
|
764
|
+
const croppedWidth = Math.round(imgInfo.width * visibleWidthPct);
|
|
765
|
+
const croppedHeight = Math.round(imgInfo.height * visibleHeightPct);
|
|
766
|
+
image.attrs.width = croppedWidth;
|
|
767
|
+
image.attrs.height = croppedHeight;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
} catch (error) {
|
|
772
|
+
console.warn("Grouped image cropping failed, using original image:", error);
|
|
773
|
+
}
|
|
774
|
+
else {
|
|
775
|
+
const rId = syntheticDrawing.children[0]?.type === "element" ? findDeepChild(syntheticDrawing.children[0], "a:blip")?.attributes["r:embed"] : void 0;
|
|
776
|
+
if (groupWidth && groupHeight && rId) {
|
|
777
|
+
const imgInfo = params.context.images.get(rId);
|
|
778
|
+
if (imgInfo?.width && imgInfo?.height) {
|
|
779
|
+
const adjusted = fitToGroup(groupWidth, groupHeight, imgInfo.width, imgInfo.height);
|
|
780
|
+
image.attrs.width = adjusted.width;
|
|
781
|
+
image.attrs.height = adjusted.height;
|
|
782
|
+
} else {
|
|
783
|
+
image.attrs.width = groupWidth;
|
|
784
|
+
image.attrs.height = groupHeight;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
result.push(image);
|
|
789
|
+
}
|
|
790
|
+
} else {
|
|
791
|
+
const image = await extractImageFromDrawing(drawing, params);
|
|
792
|
+
if (image) result.push(image);
|
|
793
|
+
}
|
|
794
|
+
return result;
|
|
795
|
+
}
|
|
796
|
+
//#endregion
|
|
797
|
+
//#region src/parsers/hyperlinks.ts
|
|
798
|
+
const HYPERLINK_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
|
|
799
|
+
/**
|
|
800
|
+
* Extract hyperlinks from DOCX relationships
|
|
801
|
+
* Returns Map of relationship ID to hyperlink target URL
|
|
802
|
+
*/
|
|
803
|
+
function extractHyperlinks(files) {
|
|
804
|
+
const hyperlinks = /* @__PURE__ */ new Map();
|
|
805
|
+
const relsXml = files["word/_rels/document.xml.rels"];
|
|
806
|
+
if (!relsXml) return hyperlinks;
|
|
807
|
+
const relationships = findChild(fromXml(new TextDecoder().decode(relsXml)), "Relationships");
|
|
808
|
+
if (!relationships) return hyperlinks;
|
|
809
|
+
const rels = findDeepChildren(relationships, "Relationship");
|
|
810
|
+
for (const rel of rels) if (rel.attributes.Type === HYPERLINK_REL_TYPE && rel.attributes.Id && rel.attributes.Target) hyperlinks.set(rel.attributes.Id, rel.attributes.Target);
|
|
811
|
+
return hyperlinks;
|
|
812
|
+
}
|
|
813
|
+
//#endregion
|
|
814
|
+
//#region src/parsers/numbering.ts
|
|
815
|
+
/**
|
|
816
|
+
* Parse numbering.xml to build list type map
|
|
817
|
+
*/
|
|
818
|
+
function parseNumberingXml(files) {
|
|
819
|
+
const listTypeMap = /* @__PURE__ */ new Map();
|
|
820
|
+
const abstractNumStarts = /* @__PURE__ */ new Map();
|
|
821
|
+
const numberingXml = files["word/numbering.xml"];
|
|
822
|
+
if (!numberingXml) return listTypeMap;
|
|
823
|
+
const numberingXast = fromXml(new TextDecoder().decode(numberingXml));
|
|
824
|
+
const abstractNumFormats = /* @__PURE__ */ new Map();
|
|
825
|
+
const numbering = findChild(numberingXast, "w:numbering");
|
|
826
|
+
if (!numbering) return listTypeMap;
|
|
827
|
+
const abstractNums = findDeepChildren(numbering, "w:abstractNum");
|
|
828
|
+
for (const abstractNum of abstractNums) {
|
|
829
|
+
const abstractNumId = abstractNum.attributes["w:abstractNumId"];
|
|
830
|
+
const lvl = findChild(abstractNum, "w:lvl");
|
|
831
|
+
if (!lvl) continue;
|
|
832
|
+
const numFmt = findChild(lvl, "w:numFmt");
|
|
833
|
+
if (numFmt?.attributes["w:val"]) abstractNumFormats.set(abstractNumId, numFmt.attributes["w:val"]);
|
|
834
|
+
const start = findChild(lvl, "w:start");
|
|
835
|
+
if (start?.attributes["w:val"]) abstractNumStarts.set(abstractNumId, parseInt(start.attributes["w:val"], 10));
|
|
836
|
+
}
|
|
837
|
+
const nums = findDeepChildren(numbering, "w:num");
|
|
838
|
+
for (const num of nums) {
|
|
839
|
+
const numId = num.attributes["w:numId"];
|
|
840
|
+
const abstractNumId = findChild(num, "w:abstractNumId");
|
|
841
|
+
if (!abstractNumId?.attributes["w:val"]) continue;
|
|
842
|
+
const abstractNumIdVal = abstractNumId.attributes["w:val"];
|
|
843
|
+
const numFmt = abstractNumFormats.get(abstractNumIdVal);
|
|
844
|
+
if (!numFmt) continue;
|
|
845
|
+
const start = abstractNumStarts.get(abstractNumIdVal);
|
|
846
|
+
if (numFmt === "bullet") listTypeMap.set(numId, { type: "bullet" });
|
|
847
|
+
else listTypeMap.set(numId, {
|
|
848
|
+
type: "ordered",
|
|
849
|
+
...start !== void 0 && { start }
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
return listTypeMap;
|
|
853
|
+
}
|
|
854
|
+
//#endregion
|
|
855
|
+
//#region src/parsers/styles.ts
|
|
856
|
+
/**
|
|
857
|
+
* Parse styles.xml to build style map
|
|
858
|
+
* Extracts outlineLvl from paragraph styles to identify headings
|
|
859
|
+
* Extracts character format (color, bold, etc.) from style definitions
|
|
860
|
+
*/
|
|
861
|
+
function parseStylesXml(files) {
|
|
862
|
+
const styleMap = /* @__PURE__ */ new Map();
|
|
863
|
+
const stylesXml = files["word/styles.xml"];
|
|
864
|
+
if (!stylesXml) return styleMap;
|
|
865
|
+
const styles = findChild(fromXml(new TextDecoder().decode(stylesXml)), "w:styles");
|
|
866
|
+
if (!styles) return styleMap;
|
|
867
|
+
const paragraphStyles = findDeepChildren(styles, "w:style").filter((style) => style.attributes["w:type"] === "paragraph");
|
|
868
|
+
for (const style of paragraphStyles) {
|
|
869
|
+
const styleId = style.attributes["w:styleId"];
|
|
870
|
+
if (!styleId) continue;
|
|
871
|
+
const styleInfo = { styleId };
|
|
872
|
+
const name = findChild(style, "w:name");
|
|
873
|
+
if (name?.attributes["w:val"]) styleInfo.name = name.attributes["w:val"];
|
|
874
|
+
const pPr = findChild(style, "w:pPr");
|
|
875
|
+
if (pPr) {
|
|
876
|
+
const outlineLvl = findChild(pPr, "w:outlineLvl");
|
|
877
|
+
if (outlineLvl?.attributes["w:val"] !== void 0) styleInfo.outlineLvl = parseInt(outlineLvl.attributes["w:val"], 10);
|
|
878
|
+
}
|
|
879
|
+
const rPr = findChild(style, "w:rPr");
|
|
880
|
+
if (rPr) {
|
|
881
|
+
const charFormat = {};
|
|
882
|
+
const color = findChild(rPr, "w:color");
|
|
883
|
+
if (color?.attributes["w:val"] && color.attributes["w:val"] !== "auto") {
|
|
884
|
+
const colorVal = color.attributes["w:val"];
|
|
885
|
+
charFormat.color = colorVal.startsWith("#") ? colorVal : `#${colorVal}`;
|
|
886
|
+
}
|
|
887
|
+
if (findChild(rPr, "w:b")) charFormat.bold = true;
|
|
888
|
+
if (findChild(rPr, "w:i")) charFormat.italic = true;
|
|
889
|
+
if (findChild(rPr, "w:u")) charFormat.underline = true;
|
|
890
|
+
if (findChild(rPr, "w:strike")) charFormat.strike = true;
|
|
891
|
+
const sz = findChild(rPr, "w:sz");
|
|
892
|
+
if (sz?.attributes["w:val"]) {
|
|
893
|
+
const sizeVal = sz.attributes["w:val"];
|
|
894
|
+
const size = parseInt(sizeVal, 10);
|
|
895
|
+
if (!isNaN(size)) charFormat.fontSize = size;
|
|
896
|
+
}
|
|
897
|
+
const rFonts = findChild(rPr, "w:rFonts");
|
|
898
|
+
if (rFonts?.attributes["w:ascii"]) charFormat.fontFamily = rFonts.attributes["w:ascii"];
|
|
899
|
+
if (Object.keys(charFormat).length > 0) styleInfo.charFormat = charFormat;
|
|
900
|
+
}
|
|
901
|
+
styleMap.set(styleId, styleInfo);
|
|
902
|
+
}
|
|
903
|
+
return styleMap;
|
|
904
|
+
}
|
|
905
|
+
//#endregion
|
|
906
|
+
//#region src/converters/text.ts
|
|
907
|
+
/**
|
|
908
|
+
* Extract text node from run
|
|
909
|
+
*/
|
|
910
|
+
function extractTextFromRun(run, styleInfo) {
|
|
911
|
+
const textElement = findChild(run, "w:t");
|
|
912
|
+
if (!textElement) return null;
|
|
913
|
+
const text = textElement.children.find((c) => c.type === "text");
|
|
914
|
+
if (!text?.value) return null;
|
|
915
|
+
const marks = extractMarks(run, styleInfo);
|
|
916
|
+
return {
|
|
917
|
+
type: "text",
|
|
918
|
+
text: text.value,
|
|
919
|
+
...marks.length && { marks }
|
|
920
|
+
};
|
|
921
|
+
}
|
|
922
|
+
/**
|
|
923
|
+
* Extract all text runs from paragraph
|
|
924
|
+
*/
|
|
925
|
+
async function extractRuns(paragraph, params) {
|
|
926
|
+
const { context, styleInfo } = params;
|
|
927
|
+
const runs = [];
|
|
928
|
+
for (const child of paragraph.children) {
|
|
929
|
+
if (child.type !== "element") continue;
|
|
930
|
+
if (child.name === "w:hyperlink") {
|
|
931
|
+
const hyperlink = child;
|
|
932
|
+
const rId = hyperlink.attributes["r:id"];
|
|
933
|
+
const href = context.hyperlinks.get(rId);
|
|
934
|
+
if (!href) continue;
|
|
935
|
+
for (const hlChild of hyperlink.children) {
|
|
936
|
+
if (hlChild.type !== "element" || hlChild.name !== "w:r") continue;
|
|
937
|
+
const run = hlChild;
|
|
938
|
+
const drawing = findDrawingElement(run);
|
|
939
|
+
if (drawing) {
|
|
940
|
+
const image = await extractImageFromDrawing(drawing, { context });
|
|
941
|
+
if (image) {
|
|
942
|
+
runs.push(image);
|
|
943
|
+
continue;
|
|
944
|
+
}
|
|
945
|
+
const imageList = await extractImagesFromDrawing(drawing, { context });
|
|
946
|
+
if (imageList.length) {
|
|
947
|
+
runs.push(...imageList);
|
|
948
|
+
continue;
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
const textNode = extractTextFromRun(run, styleInfo);
|
|
952
|
+
if (textNode) {
|
|
953
|
+
textNode.marks = textNode.marks || [];
|
|
954
|
+
textNode.marks.push({
|
|
955
|
+
type: "link",
|
|
956
|
+
attrs: { href }
|
|
957
|
+
});
|
|
958
|
+
runs.push(textNode);
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
} else if (child.name === "w:r") {
|
|
962
|
+
const run = child;
|
|
963
|
+
const drawing = findDrawingElement(run);
|
|
964
|
+
if (drawing) {
|
|
965
|
+
const imageList = await extractImagesFromDrawing(drawing, { context });
|
|
966
|
+
if (imageList.length) {
|
|
967
|
+
runs.push(...imageList);
|
|
968
|
+
continue;
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
if (findChild(run, "w:br")) {
|
|
972
|
+
const marks = extractMarks(run, styleInfo);
|
|
973
|
+
runs.push({
|
|
974
|
+
type: "hardBreak",
|
|
975
|
+
...marks.length && { marks }
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
const textNode = extractTextFromRun(run, styleInfo);
|
|
979
|
+
if (textNode) runs.push(textNode);
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
return runs;
|
|
983
|
+
}
|
|
984
|
+
/**
|
|
985
|
+
* Extract formatting marks
|
|
986
|
+
* Merges style character format with run-level formatting (run takes precedence)
|
|
987
|
+
*/
|
|
988
|
+
function extractMarks(run, styleInfo) {
|
|
989
|
+
const marks = [];
|
|
990
|
+
const rPr = findChild(run, "w:rPr");
|
|
991
|
+
let mergedFormat = {};
|
|
992
|
+
if (styleInfo?.charFormat) mergedFormat = { ...styleInfo.charFormat };
|
|
993
|
+
if (rPr) {
|
|
994
|
+
const boldEl = findChild(rPr, "w:b");
|
|
995
|
+
if (boldEl) if (boldEl.attributes["w:val"] === "false") mergedFormat.bold = false;
|
|
996
|
+
else mergedFormat.bold = true;
|
|
997
|
+
const italicEl = findChild(rPr, "w:i");
|
|
998
|
+
if (italicEl) if (italicEl.attributes["w:val"] === "false") mergedFormat.italic = false;
|
|
999
|
+
else mergedFormat.italic = true;
|
|
1000
|
+
if (findChild(rPr, "w:u")) mergedFormat.underline = true;
|
|
1001
|
+
if (findChild(rPr, "w:strike")) mergedFormat.strike = true;
|
|
1002
|
+
const colorEl = findChild(rPr, "w:color");
|
|
1003
|
+
if (colorEl?.attributes["w:val"] && colorEl.attributes["w:val"] !== "auto") {
|
|
1004
|
+
const colorVal = colorEl.attributes["w:val"];
|
|
1005
|
+
mergedFormat.color = colorVal.startsWith("#") ? colorVal : `#${colorVal}`;
|
|
1006
|
+
}
|
|
1007
|
+
const szEl = findChild(rPr, "w:sz");
|
|
1008
|
+
if (szEl?.attributes["w:val"]) {
|
|
1009
|
+
const sizeVal = szEl.attributes["w:val"];
|
|
1010
|
+
const size = parseInt(sizeVal, 10);
|
|
1011
|
+
if (!isNaN(size)) mergedFormat.fontSize = size;
|
|
1012
|
+
}
|
|
1013
|
+
const rFontsEl = findChild(rPr, "w:rFonts");
|
|
1014
|
+
if (rFontsEl?.attributes["w:ascii"]) mergedFormat.fontFamily = rFontsEl.attributes["w:ascii"];
|
|
1015
|
+
const shdEl = findChild(rPr, "w:shd");
|
|
1016
|
+
if (shdEl?.attributes["w:fill"] && shdEl.attributes["w:fill"] !== "auto") {
|
|
1017
|
+
const fillColor = shdEl.attributes["w:fill"];
|
|
1018
|
+
mergedFormat.backgroundColor = fillColor.startsWith("#") ? fillColor : `#${fillColor}`;
|
|
1019
|
+
}
|
|
1020
|
+
if (findChild(rPr, "w:highlight")) marks.push({ type: "highlight" });
|
|
1021
|
+
const vertAlign = findChild(rPr, "w:vertAlign");
|
|
1022
|
+
if (vertAlign) {
|
|
1023
|
+
const val = vertAlign.attributes["w:val"];
|
|
1024
|
+
if (val === "subscript") marks.push({ type: "subscript" });
|
|
1025
|
+
else if (val === "superscript") marks.push({ type: "superscript" });
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
if (mergedFormat.bold) marks.push({ type: "bold" });
|
|
1029
|
+
if (mergedFormat.italic) marks.push({ type: "italic" });
|
|
1030
|
+
if (mergedFormat.underline) marks.push({ type: "underline" });
|
|
1031
|
+
if (mergedFormat.strike) marks.push({ type: "strike" });
|
|
1032
|
+
if (mergedFormat.color || mergedFormat.backgroundColor || mergedFormat.fontSize || mergedFormat.fontFamily) {
|
|
1033
|
+
const textStyleAttrs = {
|
|
1034
|
+
color: mergedFormat.color || "",
|
|
1035
|
+
backgroundColor: mergedFormat.backgroundColor || "",
|
|
1036
|
+
fontSize: "",
|
|
1037
|
+
fontFamily: "",
|
|
1038
|
+
lineHeight: ""
|
|
1039
|
+
};
|
|
1040
|
+
if (mergedFormat.fontSize) textStyleAttrs.fontSize = `${Math.round(mergedFormat.fontSize * PIXELS_PER_HALF_POINT * 10) / 10}px`;
|
|
1041
|
+
if (mergedFormat.fontFamily) textStyleAttrs.fontFamily = mergedFormat.fontFamily;
|
|
1042
|
+
marks.push({
|
|
1043
|
+
type: "textStyle",
|
|
1044
|
+
attrs: textStyleAttrs
|
|
1045
|
+
});
|
|
1046
|
+
}
|
|
1047
|
+
return marks;
|
|
1048
|
+
}
|
|
1049
|
+
/**
|
|
1050
|
+
* Extract text alignment
|
|
1051
|
+
*/
|
|
1052
|
+
function extractAlignment(paragraph) {
|
|
1053
|
+
const pPr = findChild(paragraph, "w:pPr");
|
|
1054
|
+
if (!pPr) return void 0;
|
|
1055
|
+
const jc = findChild(pPr, "w:jc");
|
|
1056
|
+
if (!jc?.attributes["w:val"]) return void 0;
|
|
1057
|
+
const alignment = jc.attributes["w:val"];
|
|
1058
|
+
const textAlign = TEXT_ALIGN_MAP.docxToTipTap[alignment];
|
|
1059
|
+
return textAlign ? { textAlign } : void 0;
|
|
1060
|
+
}
|
|
1061
|
+
//#endregion
|
|
1062
|
+
//#region src/converters/paragraph.ts
|
|
1063
|
+
/**
|
|
1064
|
+
* Extract paragraph style attributes from DOCX paragraph properties
|
|
1065
|
+
*/
|
|
1066
|
+
function extractParagraphStyles(node) {
|
|
1067
|
+
const pPr = findChild(node, "w:pPr");
|
|
1068
|
+
if (!pPr) return null;
|
|
1069
|
+
const result = {};
|
|
1070
|
+
const ind = findChild(pPr, "w:ind");
|
|
1071
|
+
if (ind) {
|
|
1072
|
+
const left = parseTwipAttr(ind.attributes, "w:left");
|
|
1073
|
+
if (left) result.indentLeft = convertTwipToCssString(parseInt(left, 10));
|
|
1074
|
+
const right = parseTwipAttr(ind.attributes, "w:right");
|
|
1075
|
+
if (right) result.indentRight = convertTwipToCssString(parseInt(right, 10));
|
|
1076
|
+
const firstLine = parseTwipAttr(ind.attributes, "w:firstLine");
|
|
1077
|
+
if (firstLine) result.indentFirstLine = convertTwipToCssString(parseInt(firstLine, 10));
|
|
1078
|
+
else {
|
|
1079
|
+
const hanging = parseTwipAttr(ind.attributes, "w:hanging");
|
|
1080
|
+
if (hanging) result.indentFirstLine = convertTwipToCssString((left ? parseInt(left, 10) : 0) - parseInt(hanging, 10));
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
const spacing = findChild(pPr, "w:spacing");
|
|
1084
|
+
if (spacing) {
|
|
1085
|
+
const before = parseTwipAttr(spacing.attributes, "w:before");
|
|
1086
|
+
if (before) result.spacingBefore = convertTwipToCssString(parseInt(before, 10));
|
|
1087
|
+
const after = parseTwipAttr(spacing.attributes, "w:after");
|
|
1088
|
+
if (after) result.spacingAfter = convertTwipToCssString(parseInt(after, 10));
|
|
1089
|
+
}
|
|
1090
|
+
return Object.keys(result).length ? result : null;
|
|
1091
|
+
}
|
|
1092
|
+
/**
|
|
1093
|
+
* Convert DOCX paragraph node to TipTap paragraph
|
|
1094
|
+
*/
|
|
1095
|
+
async function convertParagraph(node, params) {
|
|
1096
|
+
const { context, styleInfo: paramStyleInfo } = params;
|
|
1097
|
+
const pPr = findChild(node, "w:pPr");
|
|
1098
|
+
const styleName = (pPr && findChild(pPr, "w:pStyle"))?.attributes["w:val"];
|
|
1099
|
+
if (styleName && context.styleMap) {
|
|
1100
|
+
const styleInfo = context.styleMap.get(styleName);
|
|
1101
|
+
if (styleInfo?.outlineLvl !== void 0 && styleInfo.outlineLvl >= 0 && styleInfo.outlineLvl <= 5) return convertHeading(node, params, styleInfo, styleInfo.outlineLvl + 1);
|
|
1102
|
+
const headingMatch = styleName.match(/^Heading(\d+)$/);
|
|
1103
|
+
if (headingMatch) return convertHeading(node, params, styleInfo, parseInt(headingMatch[1], 10));
|
|
1104
|
+
}
|
|
1105
|
+
const styleInfo = styleName && context.styleMap ? context.styleMap.get(styleName) : void 0;
|
|
1106
|
+
const runs = await extractRuns(node, {
|
|
1107
|
+
context,
|
|
1108
|
+
styleInfo: paramStyleInfo || styleInfo
|
|
1109
|
+
});
|
|
1110
|
+
const attrs = {
|
|
1111
|
+
...extractAlignment(node),
|
|
1112
|
+
...extractParagraphStyles(node)
|
|
1113
|
+
};
|
|
1114
|
+
if (checkForPageBreak(node)) {
|
|
1115
|
+
const filteredRuns = runs.filter((run) => run.type !== "hardBreak");
|
|
1116
|
+
return [{
|
|
1117
|
+
type: "paragraph",
|
|
1118
|
+
...Object.keys(attrs).length && { attrs },
|
|
1119
|
+
content: filteredRuns.length ? filteredRuns : void 0
|
|
1120
|
+
}, { type: "horizontalRule" }];
|
|
1121
|
+
}
|
|
1122
|
+
if (runs.length === 1 && runs[0].type === "hardBreak") {
|
|
1123
|
+
const run = findChild(node, "w:r");
|
|
1124
|
+
if ((run && findChild(run, "w:br"))?.attributes["w:type"] === "page") return { type: "horizontalRule" };
|
|
1125
|
+
}
|
|
1126
|
+
if (runs.length === 1 && runs[0].type === "image") {
|
|
1127
|
+
const imageNode = runs[0];
|
|
1128
|
+
return {
|
|
1129
|
+
type: "paragraph",
|
|
1130
|
+
...Object.keys(attrs).length && { attrs },
|
|
1131
|
+
content: [imageNode]
|
|
1132
|
+
};
|
|
1133
|
+
}
|
|
1134
|
+
return {
|
|
1135
|
+
type: "paragraph",
|
|
1136
|
+
...Object.keys(attrs).length && { attrs },
|
|
1137
|
+
content: runs
|
|
1138
|
+
};
|
|
1139
|
+
}
|
|
1140
|
+
/**
|
|
1141
|
+
* Check if paragraph contains page break
|
|
1142
|
+
*/
|
|
1143
|
+
function checkForPageBreak(node) {
|
|
1144
|
+
const runElements = [];
|
|
1145
|
+
const collectRuns = (n) => {
|
|
1146
|
+
if (n.name === "w:r") runElements.push(n);
|
|
1147
|
+
else for (const child of n.children) if (child.type === "element") collectRuns(child);
|
|
1148
|
+
};
|
|
1149
|
+
collectRuns(node);
|
|
1150
|
+
return runElements.some((run) => {
|
|
1151
|
+
return findChild(run, "w:br")?.attributes["w:type"] === "page";
|
|
1152
|
+
});
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* Convert to heading (internal function)
|
|
1156
|
+
*/
|
|
1157
|
+
async function convertHeading(node, params, styleInfo, level) {
|
|
1158
|
+
return {
|
|
1159
|
+
type: "heading",
|
|
1160
|
+
attrs: {
|
|
1161
|
+
level,
|
|
1162
|
+
...extractParagraphStyles(node)
|
|
1163
|
+
},
|
|
1164
|
+
content: await extractRuns(node, {
|
|
1165
|
+
context: params.context,
|
|
1166
|
+
styleInfo
|
|
1167
|
+
})
|
|
1168
|
+
};
|
|
1169
|
+
}
|
|
1170
|
+
//#endregion
|
|
1171
|
+
//#region src/parsers/table.ts
|
|
1172
|
+
/**
|
|
1173
|
+
* Parse a single border element
|
|
1174
|
+
*/
|
|
1175
|
+
function parseBorder(borderNode) {
|
|
1176
|
+
if (!borderNode) return null;
|
|
1177
|
+
const val = borderNode.attributes["w:val"];
|
|
1178
|
+
const size = borderNode.attributes["w:sz"];
|
|
1179
|
+
const color = borderNode.attributes["w:color"];
|
|
1180
|
+
const styleMap = {
|
|
1181
|
+
single: "solid",
|
|
1182
|
+
dashed: "dashed",
|
|
1183
|
+
dotted: "dotted",
|
|
1184
|
+
double: "double",
|
|
1185
|
+
none: "none",
|
|
1186
|
+
nil: "none"
|
|
1187
|
+
};
|
|
1188
|
+
const border = {};
|
|
1189
|
+
if (color && color !== "auto") border.color = `#${color}`;
|
|
1190
|
+
if (size) {
|
|
1191
|
+
const eighthPoints = parseInt(size);
|
|
1192
|
+
if (!isNaN(eighthPoints)) border.width = Math.round(eighthPoints / 6);
|
|
1193
|
+
}
|
|
1194
|
+
if (val && styleMap[val]) border.style = styleMap[val];
|
|
1195
|
+
return Object.keys(border).length > 0 ? border : null;
|
|
1196
|
+
}
|
|
1197
|
+
/**
|
|
1198
|
+
* Get table properties (cell margins)
|
|
1199
|
+
*/
|
|
1200
|
+
function parseTableProperties(tableNode) {
|
|
1201
|
+
const props = {
|
|
1202
|
+
marginTop: void 0,
|
|
1203
|
+
marginBottom: void 0,
|
|
1204
|
+
marginLeft: void 0,
|
|
1205
|
+
marginRight: void 0
|
|
1206
|
+
};
|
|
1207
|
+
const tblPr = findChild(tableNode, "w:tblPr");
|
|
1208
|
+
if (!tblPr) return null;
|
|
1209
|
+
const tblCellMar = findChild(tblPr, "w:tblCellMar");
|
|
1210
|
+
if (!tblCellMar) return null;
|
|
1211
|
+
const top = findChild(tblCellMar, "w:top");
|
|
1212
|
+
if (top?.attributes["w:w"]) {
|
|
1213
|
+
const twentieths = parseInt(top.attributes["w:w"]);
|
|
1214
|
+
if (!isNaN(twentieths)) props.marginTop = twentieths;
|
|
1215
|
+
}
|
|
1216
|
+
const bottom = findChild(tblCellMar, "w:bottom");
|
|
1217
|
+
if (bottom?.attributes["w:w"]) {
|
|
1218
|
+
const twentieths = parseInt(bottom.attributes["w:w"]);
|
|
1219
|
+
if (!isNaN(twentieths)) props.marginBottom = twentieths;
|
|
1220
|
+
}
|
|
1221
|
+
const left = findChild(tblCellMar, "w:left");
|
|
1222
|
+
if (left?.attributes["w:w"]) {
|
|
1223
|
+
const twentieths = parseInt(left.attributes["w:w"]);
|
|
1224
|
+
if (!isNaN(twentieths)) props.marginLeft = twentieths;
|
|
1225
|
+
}
|
|
1226
|
+
const right = findChild(tblCellMar, "w:right");
|
|
1227
|
+
if (right?.attributes["w:w"]) {
|
|
1228
|
+
const twentieths = parseInt(right.attributes["w:w"]);
|
|
1229
|
+
if (!isNaN(twentieths)) props.marginRight = twentieths;
|
|
1230
|
+
}
|
|
1231
|
+
if (props.marginTop === void 0 && props.marginBottom === void 0 && props.marginLeft === void 0 && props.marginRight === void 0) return null;
|
|
1232
|
+
return props;
|
|
1233
|
+
}
|
|
1234
|
+
/**
|
|
1235
|
+
* Get row properties (rowHeight)
|
|
1236
|
+
*/
|
|
1237
|
+
function parseRowProperties(rowNode) {
|
|
1238
|
+
const props = { rowHeight: null };
|
|
1239
|
+
const trPr = findChild(rowNode, "w:trPr");
|
|
1240
|
+
if (!trPr) return props;
|
|
1241
|
+
const trHeight = findChild(trPr, "w:trHeight");
|
|
1242
|
+
if (trHeight?.attributes["w:val"]) props.rowHeight = `${convertTwipToPixels(parseInt(trHeight.attributes["w:val"]))}px`;
|
|
1243
|
+
return props;
|
|
1244
|
+
}
|
|
1245
|
+
/**
|
|
1246
|
+
* Get cell properties (colspan, rowspan, colwidth, backgroundColor, verticalAlign, borders)
|
|
1247
|
+
*/
|
|
1248
|
+
function parseCellProperties(cellNode) {
|
|
1249
|
+
const props = {
|
|
1250
|
+
colspan: 1,
|
|
1251
|
+
rowspan: 1,
|
|
1252
|
+
colwidth: null
|
|
1253
|
+
};
|
|
1254
|
+
const tcPr = findChild(cellNode, "w:tcPr");
|
|
1255
|
+
if (!tcPr) return props;
|
|
1256
|
+
const gridSpan = findChild(tcPr, "w:gridSpan");
|
|
1257
|
+
if (gridSpan?.attributes["w:val"]) props.colspan = parseInt(gridSpan.attributes["w:val"]);
|
|
1258
|
+
if (findChild(tcPr, "w:vMerge")?.attributes["w:val"] === "continue") props.rowspan = 0;
|
|
1259
|
+
const tcW = findChild(tcPr, "w:tcW");
|
|
1260
|
+
if (tcW?.attributes["w:w"]) props.colwidth = [convertTwipToPixels(parseInt(tcW.attributes["w:w"]))];
|
|
1261
|
+
const shd = findChild(tcPr, "w:shd");
|
|
1262
|
+
if (shd?.attributes["w:fill"]) props.backgroundColor = `#${shd.attributes["w:fill"]}`;
|
|
1263
|
+
const vAlign = findChild(tcPr, "w:vAlign");
|
|
1264
|
+
if (vAlign?.attributes["w:val"]) props.verticalAlign = vAlign.attributes["w:val"];
|
|
1265
|
+
const tcBorders = findChild(tcPr, "w:tcBorders");
|
|
1266
|
+
if (tcBorders) {
|
|
1267
|
+
const topBorder = parseBorder(findChild(tcBorders, "w:top"));
|
|
1268
|
+
if (topBorder) props.borderTop = topBorder;
|
|
1269
|
+
const bottomBorder = parseBorder(findChild(tcBorders, "w:bottom"));
|
|
1270
|
+
if (bottomBorder) props.borderBottom = bottomBorder;
|
|
1271
|
+
const leftBorder = parseBorder(findChild(tcBorders, "w:left"));
|
|
1272
|
+
if (leftBorder) props.borderLeft = leftBorder;
|
|
1273
|
+
const rightBorder = parseBorder(findChild(tcBorders, "w:right"));
|
|
1274
|
+
if (rightBorder) props.borderRight = rightBorder;
|
|
1275
|
+
}
|
|
1276
|
+
return props;
|
|
1277
|
+
}
|
|
1278
|
+
//#endregion
|
|
1279
|
+
//#region src/converters/table.ts
|
|
1280
|
+
/**
|
|
1281
|
+
* Check if an element is a table
|
|
1282
|
+
*/
|
|
1283
|
+
function isTable(node) {
|
|
1284
|
+
return node.name === "w:tbl";
|
|
1285
|
+
}
|
|
1286
|
+
/**
|
|
1287
|
+
* Convert a table element to TipTap JSON
|
|
1288
|
+
*/
|
|
1289
|
+
async function convertTable(node, params) {
|
|
1290
|
+
const rows = [];
|
|
1291
|
+
for (const child of node.children) if (child.type === "element" && child.name === "w:tr") rows.push(child);
|
|
1292
|
+
const activeRowspans = /* @__PURE__ */ new Map();
|
|
1293
|
+
const content = await Promise.all(rows.map((row, rowIndex) => convertTableRow(row, {
|
|
1294
|
+
context: params.context,
|
|
1295
|
+
activeRowspans,
|
|
1296
|
+
rows,
|
|
1297
|
+
rowIndex
|
|
1298
|
+
})));
|
|
1299
|
+
const tableProps = parseTableProperties(node);
|
|
1300
|
+
return {
|
|
1301
|
+
type: "table",
|
|
1302
|
+
...tableProps && { attrs: tableProps },
|
|
1303
|
+
content
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
1306
|
+
/**
|
|
1307
|
+
* Convert a table row to TipTap JSON
|
|
1308
|
+
*/
|
|
1309
|
+
async function convertTableRow(rowNode, params) {
|
|
1310
|
+
const cells = [];
|
|
1311
|
+
let colIndex = 0;
|
|
1312
|
+
const rowProps = parseRowProperties(rowNode);
|
|
1313
|
+
for (const child of rowNode.children) {
|
|
1314
|
+
if (child.type !== "element" || child.name !== "w:tc") continue;
|
|
1315
|
+
const mergedBy = params.activeRowspans.get(colIndex);
|
|
1316
|
+
if (mergedBy && mergedBy > 0) {
|
|
1317
|
+
params.activeRowspans.set(colIndex, mergedBy - 1);
|
|
1318
|
+
colIndex++;
|
|
1319
|
+
continue;
|
|
1320
|
+
}
|
|
1321
|
+
let cellProps = parseCellProperties(child);
|
|
1322
|
+
if (cellProps?.rowspan === 1) {
|
|
1323
|
+
const actualRowSpan = calculateRowspan({
|
|
1324
|
+
rows: params.rows,
|
|
1325
|
+
rowIndex: params.rowIndex,
|
|
1326
|
+
colIndex
|
|
1327
|
+
});
|
|
1328
|
+
if (actualRowSpan > 1) cellProps = {
|
|
1329
|
+
...cellProps,
|
|
1330
|
+
rowspan: actualRowSpan
|
|
1331
|
+
};
|
|
1332
|
+
}
|
|
1333
|
+
if (cellProps?.rowspan && cellProps.rowspan > 1) params.activeRowspans.set(colIndex, cellProps.rowspan - 1);
|
|
1334
|
+
if (cellProps?.rowspan === 0) {
|
|
1335
|
+
colIndex++;
|
|
1336
|
+
continue;
|
|
1337
|
+
}
|
|
1338
|
+
const paragraphs = await convertCellContent(child, params);
|
|
1339
|
+
cells.push({
|
|
1340
|
+
type: "tableCell",
|
|
1341
|
+
...cellProps && { attrs: cellProps },
|
|
1342
|
+
content: paragraphs
|
|
1343
|
+
});
|
|
1344
|
+
colIndex += cellProps?.colspan || 1;
|
|
1345
|
+
}
|
|
1346
|
+
return {
|
|
1347
|
+
type: "tableRow",
|
|
1348
|
+
...rowProps && { attrs: rowProps },
|
|
1349
|
+
content: cells
|
|
1350
|
+
};
|
|
1351
|
+
}
|
|
1352
|
+
/**
|
|
1353
|
+
* Calculate the actual rowspan of a cell
|
|
1354
|
+
*/
|
|
1355
|
+
function calculateRowspan(params) {
|
|
1356
|
+
let rowspan = 1;
|
|
1357
|
+
let colIndex = params.colIndex;
|
|
1358
|
+
for (let rowIndex = params.rowIndex + 1; rowIndex < params.rows.length; rowIndex++) {
|
|
1359
|
+
const row = params.rows[rowIndex];
|
|
1360
|
+
let cellFound = false;
|
|
1361
|
+
for (const child of row.children) {
|
|
1362
|
+
if (child.type !== "element" || child.name !== "w:tc") continue;
|
|
1363
|
+
const cellProps = parseCellProperties(child);
|
|
1364
|
+
const colSpan = cellProps?.colspan || 1;
|
|
1365
|
+
if (colIndex >= 0 && colIndex < colSpan) {
|
|
1366
|
+
if (cellProps?.rowspan === 0) {
|
|
1367
|
+
rowspan++;
|
|
1368
|
+
cellFound = true;
|
|
1369
|
+
} else return rowspan;
|
|
1370
|
+
break;
|
|
1371
|
+
}
|
|
1372
|
+
colIndex -= colSpan;
|
|
1373
|
+
}
|
|
1374
|
+
if (!cellFound) break;
|
|
1375
|
+
}
|
|
1376
|
+
return rowspan;
|
|
1377
|
+
}
|
|
1378
|
+
/**
|
|
1379
|
+
* Convert cell content (typically paragraphs)
|
|
1380
|
+
*/
|
|
1381
|
+
async function convertCellContent(cellNode, params) {
|
|
1382
|
+
const paragraphs = [];
|
|
1383
|
+
for (const child of cellNode.children) if (child.type === "element" && child.name === "w:p") {
|
|
1384
|
+
const paragraph = await convertParagraph(child, params);
|
|
1385
|
+
if (Array.isArray(paragraph)) paragraphs.push(...paragraph);
|
|
1386
|
+
else paragraphs.push(paragraph);
|
|
1387
|
+
}
|
|
1388
|
+
return paragraphs.length ? paragraphs : [{
|
|
1389
|
+
type: "paragraph",
|
|
1390
|
+
content: []
|
|
1391
|
+
}];
|
|
1392
|
+
}
|
|
1393
|
+
//#endregion
|
|
1394
|
+
//#region src/converters/task-list.ts
|
|
1395
|
+
const CHECKBOX_UNCHECKED = "☐";
|
|
1396
|
+
const CHECKBOX_CHECKED = "☑";
|
|
1397
|
+
/**
|
|
1398
|
+
* Get first text node from element
|
|
1399
|
+
*/
|
|
1400
|
+
function getFirstTextNode(node) {
|
|
1401
|
+
const run = findChild(node, "w:r");
|
|
1402
|
+
if (!run) return null;
|
|
1403
|
+
const textElement = findChild(run, "w:t");
|
|
1404
|
+
if (!textElement) return null;
|
|
1405
|
+
const textNode = textElement.children.find((c) => c.type === "text");
|
|
1406
|
+
return textNode?.value && textNode || null;
|
|
1407
|
+
}
|
|
1408
|
+
/**
|
|
1409
|
+
* Check if a paragraph is a task item
|
|
1410
|
+
*/
|
|
1411
|
+
function isTaskItem(node) {
|
|
1412
|
+
const textNode = getFirstTextNode(node);
|
|
1413
|
+
if (!textNode) return false;
|
|
1414
|
+
const text = textNode.value;
|
|
1415
|
+
return text.startsWith(CHECKBOX_UNCHECKED) || text.startsWith(CHECKBOX_CHECKED);
|
|
1416
|
+
}
|
|
1417
|
+
/**
|
|
1418
|
+
* Get the checked state from a task item
|
|
1419
|
+
*/
|
|
1420
|
+
function getTaskItemChecked(node) {
|
|
1421
|
+
return getFirstTextNode(node)?.value.startsWith(CHECKBOX_CHECKED) || false;
|
|
1422
|
+
}
|
|
1423
|
+
/**
|
|
1424
|
+
* Convert a task item to TipTap JSON
|
|
1425
|
+
*/
|
|
1426
|
+
async function convertTaskItem(node, params) {
|
|
1427
|
+
return {
|
|
1428
|
+
type: "taskItem",
|
|
1429
|
+
attrs: { checked: getTaskItemChecked(node) },
|
|
1430
|
+
content: [await convertTaskItemParagraph(node, params)]
|
|
1431
|
+
};
|
|
1432
|
+
}
|
|
1433
|
+
/**
|
|
1434
|
+
* Convert task list (handles consecutive task items)
|
|
1435
|
+
*/
|
|
1436
|
+
async function convertTaskList(_node, params) {
|
|
1437
|
+
const { siblings, index, processedIndices } = params;
|
|
1438
|
+
const items = [];
|
|
1439
|
+
let i = index;
|
|
1440
|
+
while (i < siblings.length) {
|
|
1441
|
+
const el = siblings[i];
|
|
1442
|
+
if (el.name !== "w:p" || !isTaskItem(el)) break;
|
|
1443
|
+
processedIndices.add(i);
|
|
1444
|
+
const taskItem = await convertTaskItem(el, {
|
|
1445
|
+
context: params.context,
|
|
1446
|
+
styleInfo: params.styleInfo
|
|
1447
|
+
});
|
|
1448
|
+
items.push(taskItem);
|
|
1449
|
+
i++;
|
|
1450
|
+
}
|
|
1451
|
+
return {
|
|
1452
|
+
type: "taskList",
|
|
1453
|
+
content: items
|
|
1454
|
+
};
|
|
1455
|
+
}
|
|
1456
|
+
/**
|
|
1457
|
+
* Convert a task item paragraph, removing the checkbox symbol
|
|
1458
|
+
*/
|
|
1459
|
+
async function convertTaskItemParagraph(node, params) {
|
|
1460
|
+
const { context, styleInfo } = params;
|
|
1461
|
+
const runs = await extractRuns(node, {
|
|
1462
|
+
context,
|
|
1463
|
+
styleInfo
|
|
1464
|
+
});
|
|
1465
|
+
if (runs.length > 0 && runs[0].type === "text") {
|
|
1466
|
+
const firstRun = runs[0];
|
|
1467
|
+
const text = firstRun.text;
|
|
1468
|
+
if (text.startsWith(CHECKBOX_UNCHECKED) || text.startsWith(CHECKBOX_CHECKED)) {
|
|
1469
|
+
const remainingText = text.substring(2).trimStart();
|
|
1470
|
+
if (remainingText) firstRun.text = remainingText;
|
|
1471
|
+
else runs.shift();
|
|
1472
|
+
}
|
|
1473
|
+
}
|
|
1474
|
+
const attrs = extractAlignment(node);
|
|
1475
|
+
return {
|
|
1476
|
+
type: "paragraph",
|
|
1477
|
+
...attrs && { attrs },
|
|
1478
|
+
content: runs.length ? runs : void 0
|
|
1479
|
+
};
|
|
1480
|
+
}
|
|
1481
|
+
//#endregion
|
|
1482
|
+
//#region src/converters/code-block.ts
|
|
1483
|
+
/**
|
|
1484
|
+
* Check if a paragraph is a code block
|
|
1485
|
+
*/
|
|
1486
|
+
function isCodeBlock(node) {
|
|
1487
|
+
const pPr = findChild(node, "w:pPr");
|
|
1488
|
+
const style = (pPr && findChild(pPr, "w:pStyle"))?.attributes["w:val"];
|
|
1489
|
+
return style === DOCX_STYLE_NAMES.CODE_BLOCK || style?.startsWith(DOCX_STYLE_NAMES.CODE_PREFIX) || false;
|
|
1490
|
+
}
|
|
1491
|
+
/**
|
|
1492
|
+
* Get code block language
|
|
1493
|
+
*/
|
|
1494
|
+
function getCodeBlockLanguage(node) {
|
|
1495
|
+
const pPr = findChild(node, "w:pPr");
|
|
1496
|
+
const style = (pPr && findChild(pPr, "w:pStyle"))?.attributes["w:val"];
|
|
1497
|
+
if (!style?.startsWith(DOCX_STYLE_NAMES.CODE_BLOCK)) return void 0;
|
|
1498
|
+
return style.replace(DOCX_STYLE_NAMES.CODE_BLOCK, "").toLowerCase() || void 0;
|
|
1499
|
+
}
|
|
1500
|
+
//#endregion
|
|
1501
|
+
//#region src/converters/list.ts
|
|
1502
|
+
/**
|
|
1503
|
+
* Check if a paragraph is a list item
|
|
1504
|
+
*/
|
|
1505
|
+
function isListItem(node) {
|
|
1506
|
+
const pPr = findChild(node, "w:pPr");
|
|
1507
|
+
return !!pPr && findChild(pPr, "w:numPr") !== void 0;
|
|
1508
|
+
}
|
|
1509
|
+
/**
|
|
1510
|
+
* Get list numbering info
|
|
1511
|
+
*/
|
|
1512
|
+
function getListInfo(node) {
|
|
1513
|
+
const pPr = findChild(node, "w:pPr");
|
|
1514
|
+
const numPr = pPr && findChild(pPr, "w:numPr");
|
|
1515
|
+
if (!numPr) return null;
|
|
1516
|
+
const ilvl = findChild(numPr, "w:ilvl");
|
|
1517
|
+
const numId = findChild(numPr, "w:numId");
|
|
1518
|
+
if (!ilvl || !numId) return null;
|
|
1519
|
+
return {
|
|
1520
|
+
numId: numId.attributes["w:val"],
|
|
1521
|
+
level: parseInt(ilvl.attributes["w:val"] || "0", 10)
|
|
1522
|
+
};
|
|
1523
|
+
}
|
|
1524
|
+
//#endregion
|
|
1525
|
+
//#region src/converters/horizontal-rule.ts
|
|
1526
|
+
/**
|
|
1527
|
+
* Check if a paragraph is a horizontal rule (page break)
|
|
1528
|
+
*/
|
|
1529
|
+
function isHorizontalRule(node) {
|
|
1530
|
+
const run = findChild(node, "w:r");
|
|
1531
|
+
if (!run) return false;
|
|
1532
|
+
let hasPageBreak = false;
|
|
1533
|
+
let hasOtherContent = false;
|
|
1534
|
+
for (const runChild of run.children) {
|
|
1535
|
+
if (runChild.type !== "element") continue;
|
|
1536
|
+
if (runChild.name === "w:br" && runChild.attributes["w:type"] === "page") hasPageBreak = true;
|
|
1537
|
+
else if (runChild.name === "w:t") {
|
|
1538
|
+
if (runChild.children.find((c) => c.type === "text")?.value?.trim().length) hasOtherContent = true;
|
|
1539
|
+
} else if (runChild.name !== "w:rPr") hasOtherContent = true;
|
|
1540
|
+
}
|
|
1541
|
+
return hasPageBreak && !hasOtherContent;
|
|
1542
|
+
}
|
|
1543
|
+
//#endregion
|
|
1544
|
+
//#region src/parser.ts
|
|
1545
|
+
/**
|
|
1546
|
+
* Main entry point: Parse DOCX file and convert to TipTap JSON
|
|
1547
|
+
*/
|
|
1548
|
+
async function parseDOCX(input, options = {}) {
|
|
1549
|
+
const files = unzipSync(await toUint8Array(input));
|
|
1550
|
+
const hyperlinks = extractHyperlinks(files);
|
|
1551
|
+
const images = await extractImages(files, options.image?.handler);
|
|
1552
|
+
const documentXml = files["word/document.xml"];
|
|
1553
|
+
if (!documentXml) throw new Error("Invalid DOCX file: missing word/document.xml");
|
|
1554
|
+
const documentXast = fromXml(new TextDecoder().decode(documentXml));
|
|
1555
|
+
const listTypeMap = parseNumberingXml(files);
|
|
1556
|
+
const styleMap = parseStylesXml(files);
|
|
1557
|
+
return await convertDocument(documentXast, { context: {
|
|
1558
|
+
...options,
|
|
1559
|
+
hyperlinks,
|
|
1560
|
+
images,
|
|
1561
|
+
listTypeMap,
|
|
1562
|
+
styleMap
|
|
1563
|
+
} });
|
|
1564
|
+
}
|
|
1565
|
+
/**
|
|
1566
|
+
* Convert document XAST to TipTap JSON
|
|
1567
|
+
*/
|
|
1568
|
+
async function convertDocument(node, params) {
|
|
1569
|
+
if (node.type !== "root") return {
|
|
1570
|
+
type: "doc",
|
|
1571
|
+
content: []
|
|
1572
|
+
};
|
|
1573
|
+
const document = findChild(node, "w:document");
|
|
1574
|
+
if (!document) return {
|
|
1575
|
+
type: "doc",
|
|
1576
|
+
content: []
|
|
1577
|
+
};
|
|
1578
|
+
const body = findChild(document, "w:body");
|
|
1579
|
+
if (!body) return {
|
|
1580
|
+
type: "doc",
|
|
1581
|
+
content: []
|
|
1582
|
+
};
|
|
1583
|
+
return {
|
|
1584
|
+
type: "doc",
|
|
1585
|
+
content: await convertElements(body.children.filter((c) => c.type === "element"), params)
|
|
1586
|
+
};
|
|
1587
|
+
}
|
|
1588
|
+
/**
|
|
1589
|
+
* Convert XML elements to TipTap nodes (main conversion loop)
|
|
1590
|
+
*/
|
|
1591
|
+
async function convertElements(elements, params) {
|
|
1592
|
+
const result = [];
|
|
1593
|
+
const processedIndices = /* @__PURE__ */ new Set();
|
|
1594
|
+
for (let i = 0; i < elements.length; i++) {
|
|
1595
|
+
if (processedIndices.has(i)) continue;
|
|
1596
|
+
const element = elements[i];
|
|
1597
|
+
if (params.context.ignoreEmptyParagraphs && element.name === "w:p" && isEmptyParagraph(element)) continue;
|
|
1598
|
+
const node = await convertElement(element, elements, i, params, processedIndices);
|
|
1599
|
+
if (Array.isArray(node)) result.push(...node);
|
|
1600
|
+
else if (node) result.push(node);
|
|
1601
|
+
}
|
|
1602
|
+
return result;
|
|
1603
|
+
}
|
|
1604
|
+
/**
|
|
1605
|
+
* Convert single XML element to TipTap node (routing function)
|
|
1606
|
+
*/
|
|
1607
|
+
async function convertElement(element, siblings, index, params, processedIndices) {
|
|
1608
|
+
switch (element.name) {
|
|
1609
|
+
case "w:tbl": return await convertTable(element, params);
|
|
1610
|
+
case "w:p":
|
|
1611
|
+
if (isCodeBlock(element)) return await convertCodeBlock(element);
|
|
1612
|
+
if (isTaskItem(element)) return await convertTaskList(element, {
|
|
1613
|
+
...params,
|
|
1614
|
+
siblings,
|
|
1615
|
+
index,
|
|
1616
|
+
processedIndices
|
|
1617
|
+
});
|
|
1618
|
+
if (isListItem(element)) return await convertList(element, siblings, index, params, processedIndices);
|
|
1619
|
+
if (isHorizontalRule(element)) return { type: "horizontalRule" };
|
|
1620
|
+
return await convertParagraph(element, params);
|
|
1621
|
+
default: return null;
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
/**
|
|
1625
|
+
* Convert code block paragraph
|
|
1626
|
+
*/
|
|
1627
|
+
async function convertCodeBlock(element) {
|
|
1628
|
+
const language = getCodeBlockLanguage(element);
|
|
1629
|
+
const content = extractTextFromParagraph(element);
|
|
1630
|
+
return {
|
|
1631
|
+
type: "codeBlock",
|
|
1632
|
+
...language && { attrs: { language } },
|
|
1633
|
+
content
|
|
1634
|
+
};
|
|
1635
|
+
}
|
|
1636
|
+
/**
|
|
1637
|
+
* Convert list (handles consecutive list items)
|
|
1638
|
+
*/
|
|
1639
|
+
async function convertList(startElement, siblings, startIndex, params, processedIndices) {
|
|
1640
|
+
const listInfo = getListInfo(startElement);
|
|
1641
|
+
if (!listInfo) return await convertParagraph(startElement, params);
|
|
1642
|
+
const listTypeInfo = params.context.listTypeMap.get(listInfo.numId);
|
|
1643
|
+
const listType = listTypeInfo?.type || "bullet";
|
|
1644
|
+
const items = [];
|
|
1645
|
+
let i = startIndex;
|
|
1646
|
+
while (i < siblings.length) {
|
|
1647
|
+
const el = siblings[i];
|
|
1648
|
+
if (el.name !== "w:p" || !isListItem(el)) break;
|
|
1649
|
+
const info = getListInfo(el);
|
|
1650
|
+
if (!info || info.numId !== listInfo.numId) break;
|
|
1651
|
+
processedIndices.add(i);
|
|
1652
|
+
const paragraph = await convertParagraph(el, params);
|
|
1653
|
+
const listItemContent = Array.isArray(paragraph) ? paragraph[0] : paragraph;
|
|
1654
|
+
items.push({
|
|
1655
|
+
type: "listItem",
|
|
1656
|
+
content: [listItemContent]
|
|
1657
|
+
});
|
|
1658
|
+
i++;
|
|
1659
|
+
}
|
|
1660
|
+
const listNode = {
|
|
1661
|
+
type: listType === "bullet" ? "bulletList" : "orderedList",
|
|
1662
|
+
content: items
|
|
1663
|
+
};
|
|
1664
|
+
if (listType === "ordered") listNode.attrs = {
|
|
1665
|
+
type: null,
|
|
1666
|
+
...listTypeInfo?.start !== void 0 && { start: listTypeInfo.start }
|
|
1667
|
+
};
|
|
1668
|
+
return listNode;
|
|
1669
|
+
}
|
|
1670
|
+
/**
|
|
1671
|
+
* Extract text content from paragraph (for code blocks)
|
|
1672
|
+
*/
|
|
1673
|
+
function extractTextFromParagraph(element) {
|
|
1674
|
+
const content = [];
|
|
1675
|
+
const runs = findDeepChildren(element, "w:r");
|
|
1676
|
+
for (const run of runs) {
|
|
1677
|
+
const textElement = findChild(run, "w:t");
|
|
1678
|
+
if (!textElement) continue;
|
|
1679
|
+
const textNode = textElement.children.find((c) => c.type === "text");
|
|
1680
|
+
if (textNode && "value" in textNode && textNode.value) content.push({
|
|
1681
|
+
type: "text",
|
|
1682
|
+
text: textNode.value
|
|
1683
|
+
});
|
|
1684
|
+
}
|
|
1685
|
+
return content;
|
|
1686
|
+
}
|
|
1687
|
+
/**
|
|
1688
|
+
* Check if a paragraph is empty
|
|
1689
|
+
*/
|
|
1690
|
+
function isEmptyParagraph(element) {
|
|
1691
|
+
const runs = findDeepChildren(element, "w:r");
|
|
1692
|
+
for (const run of runs) {
|
|
1693
|
+
const textElement = findChild(run, "w:t");
|
|
1694
|
+
if (textElement) {
|
|
1695
|
+
const textNode = textElement.children.find((c) => c.type === "text");
|
|
1696
|
+
if (textNode && "value" in textNode && textNode.value && textNode.value.trim().length > 0) return false;
|
|
1697
|
+
}
|
|
1698
|
+
if (findChild(run, "w:drawing") || findChild(run, "mc:AlternateContent") || findChild(run, "w:pict")) return false;
|
|
1699
|
+
const br = findChild(run, "w:br");
|
|
1700
|
+
if (br && br.attributes["w:type"] === "page") return false;
|
|
1701
|
+
}
|
|
1702
|
+
return true;
|
|
1703
|
+
}
|
|
1704
|
+
//#endregion
|
|
1705
|
+
export { convertParagraph, convertTable, convertTaskItem, convertTaskList, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|