@docen/import-docx 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/index.cjs +1 -1
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -68,7 +68,9 @@ interface DocxImportOptions {
|
|
|
68
68
|
/** Custom image converter (default: embed as base64) */
|
|
69
69
|
convertImage?: (image: DocxImageInfo) => Promise<DocxImageResult>;
|
|
70
70
|
|
|
71
|
-
/** Whether to ignore empty paragraphs (default: false)
|
|
71
|
+
/** Whether to ignore empty paragraphs (default: false).
|
|
72
|
+
* Empty paragraphs are those without text content or images.
|
|
73
|
+
* Paragraphs containing only whitespace or images are not considered empty. */
|
|
72
74
|
ignoreEmptyParagraphs?: boolean;
|
|
73
75
|
}
|
|
74
76
|
```
|
package/dist/index.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";const xastUtilFromXml=require("xast-util-from-xml"),fflate=require("fflate"),undio=require("undio"),imageMeta=require("image-meta");function extractRuns(c,n,t){const r=[];for(const s of n.children)if(s.type==="element"){if(s.name==="w:hyperlink"){const l=s,a=l.attributes["r:id"],m=c.get(a);if(m){for(const h of l.children)if(h.type==="element"&&h.name==="w:r"){const b=h;let k=i(b,"w:drawing");if(!k){const A=i(b,"mc:AlternateContent");if(A){const N=i(A,"mc:Choice");N&&(k=i(N,"w:drawing"))}}if(k){const A=P$1(k,t);if(A){r.push(A);continue}const N=j$1(k,t);if(r.push(...N),N.length>0)continue}const x=i(b,"w:t");if(!x)continue;const I=x.children.find(A=>A.type==="text");if(!I||!I.value)continue;const T=extractMarks(b);T.push({type:"link",attrs:{href:m}});const C={type:"text",text:I.value};T.length>0&&(C.marks=T),r.push(C)}}continue}if(s.name==="w:r"){const l=s;let a=i(l,"w:drawing");if(!a){const x=i(l,"mc:AlternateContent");if(x){const I=i(x,"mc:Choice");I&&(a=i(I,"w:drawing"))}}if(a){const x=j$1(a,t);if(r.push(...x),x.length>0)continue}if(i(l,"w:br")){const x=extractMarks(l),I={type:"hardBreak"};x.length>0&&(I.marks=x),r.push(I)}const m=i(l,"w:t");if(!m)continue;const h=m.children.find(x=>x.type==="text");if(!h||!h.value)continue;const b=extractMarks(l),k={type:"text",text:h.value};b.length>0&&(k.marks=b),r.push(k)}}return r}function extractMarks(c){const n=[],t=i(c,"w:rPr");if(!t)return n;i(t,"w:b")&&n.push({type:"bold"}),i(t,"w:i")&&n.push({type:"italic"}),i(t,"w:u")&&n.push({type:"underline"}),i(t,"w:strike")&&n.push({type:"strike"}),i(t,"w:highlight")&&n.push({type:"highlight"});const r=i(t,"w:vertAlign");if(r){const h=r.attributes["w:val"];h==="subscript"?n.push({type:"subscript"}):h==="superscript"&&n.push({type:"superscript"})}const s=i(t,"w:color"),l=i(t,"w:shd"),a=i(t,"w:sz"),m=i(t,"w:rFonts");if(s||l||a||m){const h={color:"",backgroundColor:"",fontSize:"",fontFamily:"",lineHeight:""};if(s&&s.attributes["w:val"]){const b=s.attributes["w:val"];if(b!=="auto"){const k=b.startsWith("#")?b:`#${b}`;h.color=k}}if(l&&l.attributes["w:fill"]){const b=l.attributes["w:fill"];if(b!=="auto"){const k=b.startsWith("#")?b:`#${b}`;h.backgroundColor=k}}if(a&&a.attributes["w:val"]){const b=a.attributes["w:val"],k=parseFloat(b);if(!isNaN(k)){const x=Math.round(k/1.5*10)/10;h.fontSize=`${x}px`}}m&&m.attributes["w:ascii"]&&(h.fontFamily=m.attributes["w:ascii"]),n.push({type:"textStyle",attrs:h})}return n}function extractAlignment(c){const n=i(c,"w:pPr");if(!n)return;const t=i(n,"w:jc");if(!t?.attributes["w:val"])return;const r=t.attributes["w:val"],s={left:"left",right:"right",center:"center",both:"justify"}[r];return s?{textAlign:s}:void 0}function P$1(c,n){const t=M(c,"a:blip");if(!t?.attributes["r:embed"])return null;const r=t.attributes["r:embed"],s=n.get(r);if(!s)return null;const l=M(c,"wp:extent");let a,m;if(l){const k=l.attributes.cx,x=l.attributes.cy;if(typeof k=="string"){const I=parseInt(k,10);isNaN(I)||(a=Math.round(I/9525))}if(typeof x=="string"){const I=parseInt(x,10);isNaN(I)||(m=Math.round(I/9525))}}const h=M(c,"wp:docPr");let b;if(h){const k=h.attributes.title;typeof k=="string"&&k&&(b=k)}return{type:"image",attrs:{src:s,alt:"",...a!==void 0&&{width:a},...m!==void 0&&{height:m},...b!==void 0&&{title:b}}}}function j$1(c,n){const t=[],r=i(c,"wp:inline")||i(c,"wp:anchor");if(!r)return t;const s=i(r,"wp:extent");let l,a;if(s){const k=s.attributes.cx,x=s.attributes.cy;if(typeof k=="string"){const I=parseInt(k,10);isNaN(I)||(l=Math.round(I/9525))}if(typeof x=="string"){const I=parseInt(x,10);isNaN(I)||(a=Math.round(I/9525))}}const m=i(r,"a:graphic");if(!m)return t;const h=i(m,"a:graphicData");if(!h)return t;const b=i(h,"wpg:wgp");if(b){const k=i(b,"wpg:grpSp");let x=[];if(k){const I=R(k,"pic:pic"),T=R(k,"pic");x=[...I,...T]}else{const I=R(b,"pic:pic"),T=R(b,"pic");x=[...I,...T]}for(const I of x){const T=i(I,"a:graphic");if(!T){const A=i(I,"pic:blipFill")||M(I,"a:blipFill");if(A){const N=i(A,"a:blip")||M(A,"a:blip");if(N&&N.attributes["r:embed"]){const q=N.attributes["r:embed"],W=n.get(q);if(W){let S=l,O=a;if(W&&l&&a)try{let D,E;if(W.startsWith("data:")){const G=W.split(",")[1];if(G){const K=atob(G),V=new Uint8Array(K.length);for(let Q=0;Q<K.length;Q++)V[Q]=K.charCodeAt(Q);const Y=imageMeta.imageMeta(V);D=Y.width,E=Y.height}}if(D&&E){const G=D/E,K=l/a;Math.abs(G-K)>.1&&(G>K?(S=l,O=Math.round(l/G)):(O=a,S=Math.round(a*G)))}}catch(D){console.warn("Failed to extract image metadata for aspect ratio:",D)}t.push({type:"image",attrs:{src:W,alt:"",...S!==void 0&&{width:S},...O!==void 0&&{height:O}}})}}}continue}const C=P$1({children:[T]},n);if(C){if(l!==void 0&&a!==void 0&&C.attrs.src)try{const A=C.attrs.src;let N,q;if(A.startsWith("data:")){const W=A.split(",")[1];if(W){const S=atob(W),O=new Uint8Array(S.length);for(let E=0;E<S.length;E++)O[E]=S.charCodeAt(E);const D=imageMeta.imageMeta(O);N=D.width,q=D.height}}if(N&&q){const W=N/q,S=l/a;Math.abs(W-S)>.1?W>S?(C.attrs.width=l,C.attrs.height=Math.round(l/W)):(C.attrs.height=a,C.attrs.width=Math.round(a*W)):(C.attrs.width=l,C.attrs.height=a)}else C.attrs.width=l,C.attrs.height=a}catch{C.attrs.width=l,C.attrs.height=a}t.push(C)}}}else{const k=P$1(c,n);k&&t.push(k)}return t}function i(c,n){for(const t of c.children)if(t.type==="element"&&t.name===n)return t}function M(c,n){for(const t of c.children)if(t.type==="element"&&t.name===n)return t;for(const t of c.children)if(t.type==="element"){const r=M(t,n);if(r)return r}}function R(c,n){const t=[];for(const r of c.children)r.type==="element"&&r.name===n&&t.push(r);for(const r of c.children)r.type==="element"&&t.push(...R(r,n));return t}function convertParagraph(c,n,t){let r;for(const a of c.children)if(a.type==="element"&&a.name==="w:pPr"){const m=a;for(const h of m.children)if(h.type==="element"&&h.name==="w:pStyle"){r=h.attributes["w:val"];break}break}if(r){const a=r.match(/^Heading(\d)$/);if(a){const m=parseInt(a[1]);return f$1(c,n,m,t)}}const s=extractRuns(n,c,t);if(s.length===1&&s[0].type==="hardBreak"){for(const a of c.children)if(a.type==="element"&&a.name==="w:r"){for(const m of a.children)if(m.type==="element"&&m.name==="w:br"&&m.attributes["w:type"]==="page")return{type:"horizontalRule"}}}if(s.length===1&&s[0].type==="image")return s[0];const l=extractAlignment(c);return{type:"paragraph",...l&&{attrs:l},content:s}}function f$1(c,n,t,r){return{type:"heading",attrs:{level:t},content:extractRuns(n,c,r)}}function isListItem(c){const n=e(c,"w:pPr");return n?!!e(n,"w:numPr"):!1}function getListInfo(c){const n=e(c,"w:pPr");if(!n)return null;const t=e(n,"w:numPr");if(!t)return null;const r=e(t,"w:ilvl"),s=e(t,"w:numId");return!r||!s?null:{numId:s.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0")}}function e(c,n){for(const t of c.children)if(t.type==="element"&&t.name===n)return t}function isCodeBlock(c){const n=o(c,"w:pPr");if(!n)return!1;const t=o(n,"w:pStyle");if(!t)return!1;const r=t.attributes["w:val"];return r==="CodeBlock"||r?.startsWith("Code")}function getCodeBlockLanguage(c){const n=o(c,"w:pPr");if(!n)return;const t=o(n,"w:pStyle");if(!t)return;const r=t.attributes["w:val"];if(r?.startsWith("CodeBlock"))return r.replace("CodeBlock","").toLowerCase()||void 0}function o(c,n){for(const t of c.children)if(t.type==="element"&&t.name===n)return t}function isTable(c){return c.name==="w:tbl"}function convertTable(c,n,t){const r=[],s=[];for(const a of c.children)a.type==="element"&&a.name==="w:tr"&&s.push(a);const l=new Map;return s.forEach((a,m)=>{r.push(d(a,m===0,n,t,l,s,m))}),{type:"table",content:r}}function d(c,n,t,r,s,l,a){const m=[];let h=0;for(const b of c.children)if(b.type==="element"&&b.name==="w:tc"){const k=s.get(h);if(k&&k>0){s.set(h,k-1),h++;continue}let x=u$1(b);if(x&&x.rowspan===1){const C=g(l,a,h);C>1&&(x={...x,rowspan:C})}if(x&&x.rowspan>1&&s.set(h,x.rowspan-1),x&&x.rowspan===0){h++;continue}const I="tableCell",T=y(b,t,r);m.push({type:I,...x&&{attrs:x},content:[T]}),h+=x?.colspan||1}return{type:"tableRow",content:m}}function u$1(c){const n={colspan:1,rowspan:1,colwidth:null};let t;for(const r of c.children)if(r.type==="element"&&r.name==="w:tcPr"){t=r;break}if(!t)return n;for(const r of t.children)if(r.type==="element"&&r.name==="w:gridSpan"){const s=r.attributes["w:val"];s&&(n.colspan=parseInt(s));break}for(const r of t.children)if(r.type==="element"&&r.name==="w:vMerge"){r.attributes["w:val"]==="continue"&&(n.rowspan=0);break}for(const r of t.children)if(r.type==="element"&&r.name==="w:tcW"){const s=r.attributes["w:w"];s&&(n.colwidth=parseInt(s));break}return n}function g(c,n,t){let r=1,s=t;for(let l=n+1;l<c.length;l++){const a=c[l];let m=!1;for(const h of a.children)if(h.type==="element"&&h.name==="w:tc"){const b=u$1(h),k=b?.colspan||1;if(s>=0&&s<k){if(b?.rowspan===0)r++,m=!0;else return r;break}s-=k}if(!m)break}return r}function y(c,n,t){const r=[];for(const s of c.children)if(s.type==="element"&&s.name==="w:p"){const l=convertParagraph(s,n,t);r.push(l)}return r[0]||{type:"paragraph",content:[]}}function isTaskItem(c){for(const n of c.children)if(n.type==="element"&&n.name==="w:r"){for(const t of n.children)if(t.type==="element"&&t.name==="w:t"){const r=t.children.find(s=>s.type==="text");if(r&&"value"in r){const s=r.value;return s.startsWith("\u2610")||s.startsWith("\u2611")}}break}return!1}function getTaskItemChecked(c){for(const n of c.children)if(n.type==="element"&&n.name==="w:r"){for(const t of n.children)if(t.type==="element"&&t.name==="w:t"){const r=t.children.find(s=>s.type==="text");if(r&&"value"in r)return r.value.startsWith("\u2611")}break}return!1}function convertTaskItem(c){const n=getTaskItemChecked(c),t=f(c);return{type:"taskItem",attrs:{checked:n},content:[t]}}function f(c){const n=[];let t=!1;for(const s of c.children)if(s.type==="element"&&s.name==="w:r"){let l=!1;if(!t){for(const a of s.children)if(a.type==="element"&&a.name==="w:t"){const m=a.children.find(h=>h.type==="text");if(m&&"value"in m){const h=m.value;if(h.startsWith("\u2610")||h.startsWith("\u2611")){l=!0,t=!0;const b=h.substring(2).trimStart();b.length>0&&n.push({type:"text",text:b})}}}}if(!l){const a=p(s);for(const m of s.children)if(m.type==="element"&&m.name==="w:t"){const h=m.children.find(b=>b.type==="text");if(h&&"value"in h){const b={type:"text",text:h.value};a.length>0&&(b.marks=a),n.push(b)}}}}const r=u(c);return{type:"paragraph",...r&&{attrs:r},content:n.length>0?n:void 0}}function p(c){const n=[];for(const t of c.children)if(t.type==="element"&&t.name==="w:rPr"){const r=t;for(const s of r.children)if(s.type==="element"&&s.name==="w:b"){n.push({type:"bold"});break}for(const s of r.children)if(s.type==="element"&&s.name==="w:i"){n.push({type:"italic"});break}for(const s of r.children)if(s.type==="element"&&s.name==="w:u"){n.push({type:"underline"});break}for(const s of r.children)if(s.type==="element"&&s.name==="w:strike"){n.push({type:"strike"});break}break}return n}function u(c){for(const n of c.children)if(n.type==="element"&&n.name==="w:pPr"){const t=n;for(const r of t.children)if(r.type==="element"&&r.name==="w:jc"){const s=r.attributes["w:val"];if(s==="both")return{textAlign:"justify"};if(s==="center")return{textAlign:"center"};if(s==="right")return{textAlign:"right"};if(s==="left")return{textAlign:"left"}}}}function isHorizontalRule(c){for(const n of c.children)if(n.type==="element"&&n.name==="w:r"){const t=n;let r=!1,s=!1;for(const l of t.children)if(l.type==="element")if(l.name==="w:br")l.attributes["w:type"]==="page"&&(r=!0);else if(l.name==="w:t"){const a=l.children.find(m=>m.type==="text");a&&"value"in a&&a.value&&a.value.trim().length>0&&(s=!0)}else l.name!=="w:rPr"&&(s=!0);if(r&&!s)return!0}return!1}const w="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function v(c){const n=c.length,t=Math.ceil(n/3)*4,r=Array.from({length:t});let s=0;for(let l=0;l<n;l+=3){const a=c[l],m=l+1<n?c[l+1]:0,h=l+2<n?c[l+2]:0,b=a>>2,k=(a&3)<<4|m>>4,x=(m&15)<<2|h>>6,I=h&63;r[s++]=w[b],r[s++]=w[k],r[s++]=l+1<n?w[x]:"=",r[s++]=l+2<n?w[I]:"="}return r.join("")}const defaultImageConverter=async c=>{const n=v(c.data);return{src:`data:${c.contentType};base64,${n}`}};async function parseDOCX(c,n={}){const{convertImage:t=defaultImageConverter,ignoreEmptyParagraphs:r=!1}=n,s=await undio.toUint8Array(c),l=fflate.unzipSync(s),a=B(l),m=X(l),h=new Map;for(const[I,T]of m.entries())try{let C;try{C=`image/${imageMeta.imageMeta(T).type}`}catch{C="image/png"}const A=await t({id:I,contentType:C,data:T});h.set(I,A.src)}catch(C){console.warn(`Failed to convert image ${I}:`,C);let A="image/png";try{A=`image/${imageMeta.imageMeta(T).type}`}catch{}const N=v(T),q=`data:${A};base64,${N}`;h.set(I,q)}const b=l["word/document.xml"];if(!b)throw new Error("Invalid DOCX file: missing word/document.xml");const k=xastUtilFromXml.fromXml(new TextDecoder().decode(b)),x=J(l);return U(k,h,a,x,r)}function J(c){const n=new Map,t=new Map,r=c["word/numbering.xml"];if(!r)return n;const s=xastUtilFromXml.fromXml(new TextDecoder().decode(r)),l=new Map;if(s.type==="root"){for(const a of s.children)if(a.type==="element"&&a.name==="w:numbering"){const m=a;for(const h of m.children)if(h.type==="element"&&h.name==="w:abstractNum"){const b=h,k=b.attributes["w:abstractNumId"];for(const x of b.children)if(x.type==="element"&&x.name==="w:lvl"){for(const I of x.children)if(I.type==="element"&&I.name==="w:numFmt"){const T=I.attributes["w:val"];if(T){l.set(k,T);break}}for(const I of x.children)if(I.type==="element"&&I.name==="w:start"){const T=I.attributes["w:val"];T&&t.set(k,parseInt(T,10));break}break}}for(const h of m.children)if(h.type==="element"&&h.name==="w:num"){const b=h,k=b.attributes["w:numId"];for(const x of b.children)if(x.type==="element"&&x.name==="w:abstractNumId"){const I=x.attributes["w:val"],T=l.get(I);if(T){const C=t.get(I);T==="bullet"?n.set(k,{type:"bullet"}):n.set(k,{type:"ordered",...C!==void 0&&{start:C}})}break}}break}}return n}function X(c){const n=new Map,t=c["word/_rels/document.xml.rels"];if(!t)return n;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(t));if(r.type==="root"){for(const s of r.children)if(s.type==="element"&&s.name==="Relationships"){const l=s;for(const a of l.children)if(a.type==="element"&&a.name==="Relationship"){const m=a,h=m.attributes.Type;if(h&&h==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"){const b=m.attributes.Id,k=m.attributes.Target;if(b&&k){const x="word/"+k,I=c[x];I&&n.set(b,I)}}}break}}return n}function B(c){const n=new Map,t=c["word/_rels/document.xml.rels"];if(!t)return n;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(t));if(r.type==="root"){for(const s of r.children)if(s.type==="element"&&s.name==="Relationships"){const l=s;for(const a of l.children)if(a.type==="element"&&a.name==="Relationship"){const m=a,h=m.attributes.Type;if(h&&h==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"){const b=m.attributes.Id,k=m.attributes.Target;b&&k&&n.set(b,k)}}break}}return n}function U(c,n,t,r,s){if(c.type!=="root")return{type:"doc",content:[]};for(const l of c.children)if(l.type==="element"&&l.name==="w:document"){const a=l;for(const m of a.children)if(m.type==="element"&&m.name==="w:body")return{type:"doc",content:F(m.children.filter(h=>h.type==="element"),n,t,r,s)};break}return{type:"doc",content:[]}}function F(c,n,t,r,s){const l=[];let a=0;for(;a<c.length;){const m=c[a];if(m.name==="w:tbl"){l.push(convertTable(m,t,n)),a++,a<c.length&&c[a].name==="w:p"&&L(c[a])&&a++;continue}if(m.name==="w:p"){if(s&&L(m)){a++;continue}if(isCodeBlock(m)){const h=P(c,a);l.push(...h),a+=h.length;continue}if(isTaskItem(m)){const h=H(c,a);l.push(...h),a+=_(c,a);continue}if(isListItem(m)){const h=$(c,a,n,t,r);l.push(...h),a+=z(c,a);continue}if(isHorizontalRule(m)){l.push({type:"horizontalRule"}),a++;continue}l.push(convertParagraph(m,t,n)),a++;continue}a++}return l}function P(c,n){const t=[];let r=n;for(;r<c.length;){const s=c[r];if(s.name!=="w:p"||!isCodeBlock(s))break;const l=getCodeBlockLanguage(s),a={type:"codeBlock",...l&&{attrs:{language:l}},content:j(s)};t.push(a),r++}return t}function $(c,n,t,r,s){const l=[];let a=n;for(;a<c.length;){const m=c[a];if(m.name!=="w:p"||!isListItem(m))break;const h=getListInfo(m);if(!h)break;const b=s.get(h.numId),k=b?.type||"bullet",x=[];for(;a<c.length;){const T=c[a];if(T.name!=="w:p"||!isListItem(T))break;const C=getListInfo(T);if(!C||C.numId!==h.numId)break;const A={type:"listItem",content:[convertParagraph(T,r,t)]};x.push(A),a++}const I={type:k==="bullet"?"bulletList":"orderedList",content:x};k==="ordered"&&(I.attrs={type:null,...b?.start!==void 0&&{start:b.start}}),l.push(I)}return l}function z(c,n){let t=0,r=n;for(;r<c.length;){const s=c[r];if(s.name!=="w:p"||!isListItem(s))break;t++,r++}return t}function H(c,n){const t=[];let r=n;for(;r<c.length;){const s=c[r];if(s.name!=="w:p"||!isTaskItem(s))break;const l=convertTaskItem(s);t.push(l),r++}return[{type:"taskList",content:t}]}function _(c,n){let t=0,r=n;for(;r<c.length;){const s=c[r];if(s.name!=="w:p"||!isTaskItem(s))break;t++,r++}return t}function j(c){const n=[];for(const t of c.children){if(t.type!=="element"||t.name!=="w:r")continue;const r=t;for(const s of r.children)if(s.type==="element"&&s.name==="w:t"){const l=s.children.find(a=>a.type==="text");l&&"value"in l&&n.push({type:"text",text:l.value})}}return n}function L(c){for(const n of c.children){if(n.type!=="element"||n.name!=="w:r")continue;const t=n;for(const r of t.children)if(r.type==="element"&&r.name==="w:t"){const s=r.children.find(l=>l.type==="text");if(s&&"value"in s&&s.value.trim().length>0)return!1}}return!0}exports.convertParagraph=convertParagraph,exports.convertTable=convertTable,exports.convertTaskItem=convertTaskItem,exports.defaultImageConverter=defaultImageConverter,exports.extractAlignment=extractAlignment,exports.extractMarks=extractMarks,exports.extractRuns=extractRuns,exports.getCodeBlockLanguage=getCodeBlockLanguage,exports.getListInfo=getListInfo,exports.getTaskItemChecked=getTaskItemChecked,exports.isCodeBlock=isCodeBlock,exports.isHorizontalRule=isHorizontalRule,exports.isListItem=isListItem,exports.isTable=isTable,exports.isTaskItem=isTaskItem,exports.parseDOCX=parseDOCX;
|
|
1
|
+
"use strict";const xastUtilFromXml=require("xast-util-from-xml"),fflate=require("fflate"),undio=require("undio"),imageMeta=require("image-meta");function extractRuns(c,r,n){const t=[];for(const s of r.children)if(s.type==="element"){if(s.name==="w:hyperlink"){const l=s,a=l.attributes["r:id"],m=c.get(a);if(m){for(const h of l.children)if(h.type==="element"&&h.name==="w:r"){const b=h;let k=i(b,"w:drawing");if(!k){const A=i(b,"mc:AlternateContent");if(A){const N=i(A,"mc:Choice");N&&(k=i(N,"w:drawing"))}}if(k){const A=P$1(k,n);if(A){t.push(A);continue}const N=j$1(k,n);if(t.push(...N),N.length>0)continue}const x=i(b,"w:t");if(!x)continue;const I=x.children.find(A=>A.type==="text");if(!I||!I.value)continue;const T=extractMarks(b);T.push({type:"link",attrs:{href:m}});const C={type:"text",text:I.value};T.length>0&&(C.marks=T),t.push(C)}}continue}if(s.name==="w:r"){const l=s;let a=i(l,"w:drawing");if(!a){const x=i(l,"mc:AlternateContent");if(x){const I=i(x,"mc:Choice");I&&(a=i(I,"w:drawing"))}}if(a){const x=j$1(a,n);if(t.push(...x),x.length>0)continue}if(i(l,"w:br")){const x=extractMarks(l),I={type:"hardBreak"};x.length>0&&(I.marks=x),t.push(I)}const m=i(l,"w:t");if(!m)continue;const h=m.children.find(x=>x.type==="text");if(!h||!h.value)continue;const b=extractMarks(l),k={type:"text",text:h.value};b.length>0&&(k.marks=b),t.push(k)}}return t}function extractMarks(c){const r=[],n=i(c,"w:rPr");if(!n)return r;i(n,"w:b")&&r.push({type:"bold"}),i(n,"w:i")&&r.push({type:"italic"}),i(n,"w:u")&&r.push({type:"underline"}),i(n,"w:strike")&&r.push({type:"strike"}),i(n,"w:highlight")&&r.push({type:"highlight"});const t=i(n,"w:vertAlign");if(t){const h=t.attributes["w:val"];h==="subscript"?r.push({type:"subscript"}):h==="superscript"&&r.push({type:"superscript"})}const s=i(n,"w:color"),l=i(n,"w:shd"),a=i(n,"w:sz"),m=i(n,"w:rFonts");if(s||l||a||m){const h={color:"",backgroundColor:"",fontSize:"",fontFamily:"",lineHeight:""};if(s&&s.attributes["w:val"]){const b=s.attributes["w:val"];if(b!=="auto"){const k=b.startsWith("#")?b:`#${b}`;h.color=k}}if(l&&l.attributes["w:fill"]){const b=l.attributes["w:fill"];if(b!=="auto"){const k=b.startsWith("#")?b:`#${b}`;h.backgroundColor=k}}if(a&&a.attributes["w:val"]){const b=a.attributes["w:val"],k=parseFloat(b);if(!isNaN(k)){const x=Math.round(k/1.5*10)/10;h.fontSize=`${x}px`}}m&&m.attributes["w:ascii"]&&(h.fontFamily=m.attributes["w:ascii"]),r.push({type:"textStyle",attrs:h})}return r}function extractAlignment(c){const r=i(c,"w:pPr");if(!r)return;const n=i(r,"w:jc");if(!n?.attributes["w:val"])return;const t=n.attributes["w:val"],s={left:"left",right:"right",center:"center",both:"justify"}[t];return s?{textAlign:s}:void 0}function P$1(c,r){const n=M(c,"a:blip");if(!n?.attributes["r:embed"])return null;const t=n.attributes["r:embed"],s=r.get(t);if(!s)return null;const l=M(c,"wp:extent");let a,m;if(l){const k=l.attributes.cx,x=l.attributes.cy;if(typeof k=="string"){const I=parseInt(k,10);isNaN(I)||(a=Math.round(I/9525))}if(typeof x=="string"){const I=parseInt(x,10);isNaN(I)||(m=Math.round(I/9525))}}const h=M(c,"wp:docPr");let b;if(h){const k=h.attributes.title;typeof k=="string"&&k&&(b=k)}return{type:"image",attrs:{src:s,alt:"",...a!==void 0&&{width:a},...m!==void 0&&{height:m},...b!==void 0&&{title:b}}}}function j$1(c,r){const n=[],t=i(c,"wp:inline")||i(c,"wp:anchor");if(!t)return n;const s=i(t,"wp:extent");let l,a;if(s){const k=s.attributes.cx,x=s.attributes.cy;if(typeof k=="string"){const I=parseInt(k,10);isNaN(I)||(l=Math.round(I/9525))}if(typeof x=="string"){const I=parseInt(x,10);isNaN(I)||(a=Math.round(I/9525))}}const m=i(t,"a:graphic");if(!m)return n;const h=i(m,"a:graphicData");if(!h)return n;const b=i(h,"wpg:wgp");if(b){const k=i(b,"wpg:grpSp");let x=[];if(k){const I=R(k,"pic:pic"),T=R(k,"pic");x=[...I,...T]}else{const I=R(b,"pic:pic"),T=R(b,"pic");x=[...I,...T]}for(const I of x){const T=i(I,"a:graphic");if(!T){const A=i(I,"pic:blipFill")||M(I,"a:blipFill");if(A){const N=i(A,"a:blip")||M(A,"a:blip");if(N&&N.attributes["r:embed"]){const q=N.attributes["r:embed"],W=r.get(q);if(W){let S=l,O=a;if(W&&l&&a)try{let D,E;if(W.startsWith("data:")){const G=W.split(",")[1];if(G){const K=atob(G),V=new Uint8Array(K.length);for(let Q=0;Q<K.length;Q++)V[Q]=K.charCodeAt(Q);const Y=imageMeta.imageMeta(V);D=Y.width,E=Y.height}}if(D&&E){const G=D/E,K=l/a;Math.abs(G-K)>.1&&(G>K?(S=l,O=Math.round(l/G)):(O=a,S=Math.round(a*G)))}}catch(D){console.warn("Failed to extract image metadata for aspect ratio:",D)}n.push({type:"image",attrs:{src:W,alt:"",...S!==void 0&&{width:S},...O!==void 0&&{height:O}}})}}}continue}const C=P$1({children:[T]},r);if(C){if(l!==void 0&&a!==void 0&&C.attrs.src)try{const A=C.attrs.src;let N,q;if(A.startsWith("data:")){const W=A.split(",")[1];if(W){const S=atob(W),O=new Uint8Array(S.length);for(let E=0;E<S.length;E++)O[E]=S.charCodeAt(E);const D=imageMeta.imageMeta(O);N=D.width,q=D.height}}if(N&&q){const W=N/q,S=l/a;Math.abs(W-S)>.1?W>S?(C.attrs.width=l,C.attrs.height=Math.round(l/W)):(C.attrs.height=a,C.attrs.width=Math.round(a*W)):(C.attrs.width=l,C.attrs.height=a)}else C.attrs.width=l,C.attrs.height=a}catch{C.attrs.width=l,C.attrs.height=a}n.push(C)}}}else{const k=P$1(c,r);k&&n.push(k)}return n}function i(c,r){for(const n of c.children)if(n.type==="element"&&n.name===r)return n}function M(c,r){for(const n of c.children)if(n.type==="element"&&n.name===r)return n;for(const n of c.children)if(n.type==="element"){const t=M(n,r);if(t)return t}}function R(c,r){const n=[];for(const t of c.children)t.type==="element"&&t.name===r&&n.push(t);for(const t of c.children)t.type==="element"&&n.push(...R(t,r));return n}function convertParagraph(c,r,n){let t;for(const a of c.children)if(a.type==="element"&&a.name==="w:pPr"){const m=a;for(const h of m.children)if(h.type==="element"&&h.name==="w:pStyle"){t=h.attributes["w:val"];break}break}if(t){const a=t.match(/^Heading(\d)$/);if(a){const m=parseInt(a[1]);return f$1(c,r,m,n)}}const s=extractRuns(r,c,n);if(s.length===1&&s[0].type==="hardBreak"){for(const a of c.children)if(a.type==="element"&&a.name==="w:r"){for(const m of a.children)if(m.type==="element"&&m.name==="w:br"&&m.attributes["w:type"]==="page")return{type:"horizontalRule"}}}if(s.length===1&&s[0].type==="image")return s[0];const l=extractAlignment(c);return{type:"paragraph",...l&&{attrs:l},content:s}}function f$1(c,r,n,t){return{type:"heading",attrs:{level:n},content:extractRuns(r,c,t)}}function isListItem(c){const r=e(c,"w:pPr");return r?!!e(r,"w:numPr"):!1}function getListInfo(c){const r=e(c,"w:pPr");if(!r)return null;const n=e(r,"w:numPr");if(!n)return null;const t=e(n,"w:ilvl"),s=e(n,"w:numId");return!t||!s?null:{numId:s.attributes["w:val"],level:parseInt(t.attributes["w:val"]||"0")}}function e(c,r){for(const n of c.children)if(n.type==="element"&&n.name===r)return n}function isCodeBlock(c){const r=o(c,"w:pPr");if(!r)return!1;const n=o(r,"w:pStyle");if(!n)return!1;const t=n.attributes["w:val"];return t==="CodeBlock"||t?.startsWith("Code")}function getCodeBlockLanguage(c){const r=o(c,"w:pPr");if(!r)return;const n=o(r,"w:pStyle");if(!n)return;const t=n.attributes["w:val"];if(t?.startsWith("CodeBlock"))return t.replace("CodeBlock","").toLowerCase()||void 0}function o(c,r){for(const n of c.children)if(n.type==="element"&&n.name===r)return n}function isTable(c){return c.name==="w:tbl"}function convertTable(c,r,n){const t=[],s=[];for(const a of c.children)a.type==="element"&&a.name==="w:tr"&&s.push(a);const l=new Map;return s.forEach((a,m)=>{t.push(d(a,m===0,r,n,l,s,m))}),{type:"table",content:t}}function d(c,r,n,t,s,l,a){const m=[];let h=0;for(const b of c.children)if(b.type==="element"&&b.name==="w:tc"){const k=s.get(h);if(k&&k>0){s.set(h,k-1),h++;continue}let x=u$1(b);if(x&&x.rowspan===1){const C=g(l,a,h);C>1&&(x={...x,rowspan:C})}if(x&&x.rowspan>1&&s.set(h,x.rowspan-1),x&&x.rowspan===0){h++;continue}const I="tableCell",T=y(b,n,t);m.push({type:I,...x&&{attrs:x},content:[T]}),h+=x?.colspan||1}return{type:"tableRow",content:m}}function u$1(c){const r={colspan:1,rowspan:1,colwidth:null};let n;for(const t of c.children)if(t.type==="element"&&t.name==="w:tcPr"){n=t;break}if(!n)return r;for(const t of n.children)if(t.type==="element"&&t.name==="w:gridSpan"){const s=t.attributes["w:val"];s&&(r.colspan=parseInt(s));break}for(const t of n.children)if(t.type==="element"&&t.name==="w:vMerge"){t.attributes["w:val"]==="continue"&&(r.rowspan=0);break}for(const t of n.children)if(t.type==="element"&&t.name==="w:tcW"){const s=t.attributes["w:w"];s&&(r.colwidth=parseInt(s));break}return r}function g(c,r,n){let t=1,s=n;for(let l=r+1;l<c.length;l++){const a=c[l];let m=!1;for(const h of a.children)if(h.type==="element"&&h.name==="w:tc"){const b=u$1(h),k=b?.colspan||1;if(s>=0&&s<k){if(b?.rowspan===0)t++,m=!0;else return t;break}s-=k}if(!m)break}return t}function y(c,r,n){const t=[];for(const s of c.children)if(s.type==="element"&&s.name==="w:p"){const l=convertParagraph(s,r,n);t.push(l)}return t[0]||{type:"paragraph",content:[]}}function isTaskItem(c){for(const r of c.children)if(r.type==="element"&&r.name==="w:r"){for(const n of r.children)if(n.type==="element"&&n.name==="w:t"){const t=n.children.find(s=>s.type==="text");if(t&&"value"in t){const s=t.value;return s.startsWith("\u2610")||s.startsWith("\u2611")}}break}return!1}function getTaskItemChecked(c){for(const r of c.children)if(r.type==="element"&&r.name==="w:r"){for(const n of r.children)if(n.type==="element"&&n.name==="w:t"){const t=n.children.find(s=>s.type==="text");if(t&&"value"in t)return t.value.startsWith("\u2611")}break}return!1}function convertTaskItem(c){const r=getTaskItemChecked(c),n=f(c);return{type:"taskItem",attrs:{checked:r},content:[n]}}function f(c){const r=[];let n=!1;for(const s of c.children)if(s.type==="element"&&s.name==="w:r"){let l=!1;if(!n){for(const a of s.children)if(a.type==="element"&&a.name==="w:t"){const m=a.children.find(h=>h.type==="text");if(m&&"value"in m){const h=m.value;if(h.startsWith("\u2610")||h.startsWith("\u2611")){l=!0,n=!0;const b=h.substring(2).trimStart();b.length>0&&r.push({type:"text",text:b})}}}}if(!l){const a=p(s);for(const m of s.children)if(m.type==="element"&&m.name==="w:t"){const h=m.children.find(b=>b.type==="text");if(h&&"value"in h){const b={type:"text",text:h.value};a.length>0&&(b.marks=a),r.push(b)}}}}const t=u(c);return{type:"paragraph",...t&&{attrs:t},content:r.length>0?r:void 0}}function p(c){const r=[];for(const n of c.children)if(n.type==="element"&&n.name==="w:rPr"){const t=n;for(const s of t.children)if(s.type==="element"&&s.name==="w:b"){r.push({type:"bold"});break}for(const s of t.children)if(s.type==="element"&&s.name==="w:i"){r.push({type:"italic"});break}for(const s of t.children)if(s.type==="element"&&s.name==="w:u"){r.push({type:"underline"});break}for(const s of t.children)if(s.type==="element"&&s.name==="w:strike"){r.push({type:"strike"});break}break}return r}function u(c){for(const r of c.children)if(r.type==="element"&&r.name==="w:pPr"){const n=r;for(const t of n.children)if(t.type==="element"&&t.name==="w:jc"){const s=t.attributes["w:val"];if(s==="both")return{textAlign:"justify"};if(s==="center")return{textAlign:"center"};if(s==="right")return{textAlign:"right"};if(s==="left")return{textAlign:"left"}}}}function isHorizontalRule(c){for(const r of c.children)if(r.type==="element"&&r.name==="w:r"){const n=r;let t=!1,s=!1;for(const l of n.children)if(l.type==="element")if(l.name==="w:br")l.attributes["w:type"]==="page"&&(t=!0);else if(l.name==="w:t"){const a=l.children.find(m=>m.type==="text");a&&"value"in a&&a.value&&a.value.trim().length>0&&(s=!0)}else l.name!=="w:rPr"&&(s=!0);if(t&&!s)return!0}return!1}const w="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function v(c){const r=c.length,n=Math.ceil(r/3)*4,t=Array.from({length:n});let s=0;for(let l=0;l<r;l+=3){const a=c[l],m=l+1<r?c[l+1]:0,h=l+2<r?c[l+2]:0,b=a>>2,k=(a&3)<<4|m>>4,x=(m&15)<<2|h>>6,I=h&63;t[s++]=w[b],t[s++]=w[k],t[s++]=l+1<r?w[x]:"=",t[s++]=l+2<r?w[I]:"="}return t.join("")}const defaultImageConverter=async c=>{const r=v(c.data);return{src:`data:${c.contentType};base64,${r}`}};async function parseDOCX(c,r={}){const{convertImage:n=defaultImageConverter,ignoreEmptyParagraphs:t=!1}=r,s=await undio.toUint8Array(c),l=fflate.unzipSync(s),a=B(l),m=X(l),h=new Map;for(const[I,T]of m.entries())try{let C;try{C=`image/${imageMeta.imageMeta(T).type}`}catch{C="image/png"}const A=await n({id:I,contentType:C,data:T});h.set(I,A.src)}catch(C){console.warn(`Failed to convert image ${I}:`,C);let A="image/png";try{A=`image/${imageMeta.imageMeta(T).type}`}catch{}const N=v(T),q=`data:${A};base64,${N}`;h.set(I,q)}const b=l["word/document.xml"];if(!b)throw new Error("Invalid DOCX file: missing word/document.xml");const k=xastUtilFromXml.fromXml(new TextDecoder().decode(b)),x=J(l);return U(k,h,a,x,t)}function J(c){const r=new Map,n=new Map,t=c["word/numbering.xml"];if(!t)return r;const s=xastUtilFromXml.fromXml(new TextDecoder().decode(t)),l=new Map;if(s.type==="root"){for(const a of s.children)if(a.type==="element"&&a.name==="w:numbering"){const m=a;for(const h of m.children)if(h.type==="element"&&h.name==="w:abstractNum"){const b=h,k=b.attributes["w:abstractNumId"];for(const x of b.children)if(x.type==="element"&&x.name==="w:lvl"){for(const I of x.children)if(I.type==="element"&&I.name==="w:numFmt"){const T=I.attributes["w:val"];if(T){l.set(k,T);break}}for(const I of x.children)if(I.type==="element"&&I.name==="w:start"){const T=I.attributes["w:val"];T&&n.set(k,parseInt(T,10));break}break}}for(const h of m.children)if(h.type==="element"&&h.name==="w:num"){const b=h,k=b.attributes["w:numId"];for(const x of b.children)if(x.type==="element"&&x.name==="w:abstractNumId"){const I=x.attributes["w:val"],T=l.get(I);if(T){const C=n.get(I);T==="bullet"?r.set(k,{type:"bullet"}):r.set(k,{type:"ordered",...C!==void 0&&{start:C}})}break}}break}}return r}function X(c){const r=new Map,n=c["word/_rels/document.xml.rels"];if(!n)return r;const t=xastUtilFromXml.fromXml(new TextDecoder().decode(n));if(t.type==="root"){for(const s of t.children)if(s.type==="element"&&s.name==="Relationships"){const l=s;for(const a of l.children)if(a.type==="element"&&a.name==="Relationship"){const m=a,h=m.attributes.Type;if(h&&h==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"){const b=m.attributes.Id,k=m.attributes.Target;if(b&&k){const x="word/"+k,I=c[x];I&&r.set(b,I)}}}break}}return r}function B(c){const r=new Map,n=c["word/_rels/document.xml.rels"];if(!n)return r;const t=xastUtilFromXml.fromXml(new TextDecoder().decode(n));if(t.type==="root"){for(const s of t.children)if(s.type==="element"&&s.name==="Relationships"){const l=s;for(const a of l.children)if(a.type==="element"&&a.name==="Relationship"){const m=a,h=m.attributes.Type;if(h&&h==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"){const b=m.attributes.Id,k=m.attributes.Target;b&&k&&r.set(b,k)}}break}}return r}function U(c,r,n,t,s){if(c.type!=="root")return{type:"doc",content:[]};for(const l of c.children)if(l.type==="element"&&l.name==="w:document"){const a=l;for(const m of a.children)if(m.type==="element"&&m.name==="w:body")return{type:"doc",content:F(m.children.filter(h=>h.type==="element"),r,n,t,s)};break}return{type:"doc",content:[]}}function F(c,r,n,t,s){const l=[];let a=0;for(;a<c.length;){const m=c[a];if(m.name==="w:tbl"){l.push(convertTable(m,n,r)),a++,a<c.length&&c[a].name==="w:p"&&L(c[a])&&a++;continue}if(m.name==="w:p"){if(s&&L(m)){a++;continue}if(isCodeBlock(m)){const h=P(c,a);l.push(...h),a+=h.length;continue}if(isTaskItem(m)){const h=H(c,a);l.push(...h),a+=_(c,a);continue}if(isListItem(m)){const h=$(c,a,r,n,t);l.push(...h),a+=z(c,a);continue}if(isHorizontalRule(m)){l.push({type:"horizontalRule"}),a++;continue}l.push(convertParagraph(m,n,r)),a++;continue}a++}return l}function P(c,r){const n=[];let t=r;for(;t<c.length;){const s=c[t];if(s.name!=="w:p"||!isCodeBlock(s))break;const l=getCodeBlockLanguage(s),a={type:"codeBlock",...l&&{attrs:{language:l}},content:j(s)};n.push(a),t++}return n}function $(c,r,n,t,s){const l=[];let a=r;for(;a<c.length;){const m=c[a];if(m.name!=="w:p"||!isListItem(m))break;const h=getListInfo(m);if(!h)break;const b=s.get(h.numId),k=b?.type||"bullet",x=[];for(;a<c.length;){const T=c[a];if(T.name!=="w:p"||!isListItem(T))break;const C=getListInfo(T);if(!C||C.numId!==h.numId)break;const A={type:"listItem",content:[convertParagraph(T,t,n)]};x.push(A),a++}const I={type:k==="bullet"?"bulletList":"orderedList",content:x};k==="ordered"&&(I.attrs={type:null,...b?.start!==void 0&&{start:b.start}}),l.push(I)}return l}function z(c,r){let n=0,t=r;for(;t<c.length;){const s=c[t];if(s.name!=="w:p"||!isListItem(s))break;n++,t++}return n}function H(c,r){const n=[];let t=r;for(;t<c.length;){const s=c[t];if(s.name!=="w:p"||!isTaskItem(s))break;const l=convertTaskItem(s);n.push(l),t++}return[{type:"taskList",content:n}]}function _(c,r){let n=0,t=r;for(;t<c.length;){const s=c[t];if(s.name!=="w:p"||!isTaskItem(s))break;n++,t++}return n}function j(c){const r=[];for(const n of c.children){if(n.type!=="element"||n.name!=="w:r")continue;const t=n;for(const s of t.children)if(s.type==="element"&&s.name==="w:t"){const l=s.children.find(a=>a.type==="text");l&&"value"in l&&r.push({type:"text",text:l.value})}}return r}function L(c){for(const r of c.children){if(r.type!=="element"||r.name!=="w:r")continue;const n=r;for(const t of n.children){if(t.type==="element"&&t.name==="w:t"){const s=t.children.find(l=>l.type==="text");if(s&&"value"in s&&s.value.trim().length>0)return!1}if(t.type==="element"&&(t.name==="w:drawing"||t.name==="mc:AlternateContent"||t.name==="w:pict"))return!1}}return!0}exports.convertParagraph=convertParagraph,exports.convertTable=convertTable,exports.convertTaskItem=convertTaskItem,exports.defaultImageConverter=defaultImageConverter,exports.extractAlignment=extractAlignment,exports.extractMarks=extractMarks,exports.extractRuns=extractRuns,exports.getCodeBlockLanguage=getCodeBlockLanguage,exports.getListInfo=getListInfo,exports.getTaskItemChecked=getTaskItemChecked,exports.isCodeBlock=isCodeBlock,exports.isHorizontalRule=isHorizontalRule,exports.isListItem=isListItem,exports.isTable=isTable,exports.isTaskItem=isTaskItem,exports.parseDOCX=parseDOCX;
|
package/dist/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{fromXml as W}from"xast-util-from-xml";import{unzipSync as et}from"fflate";import{toUint8Array as nt}from"undio";import{imageMeta as F}from"image-meta";function L(o,e,t){const n=[];for(const r of e.children)if(r.type==="element"){if(r.name==="w:hyperlink"){const s=r,i=s.attributes["r:id"],a=o.get(i);if(a){for(const c of s.children)if(c.type==="element"&&c.name==="w:r"){const f=c;let l=h(f,"w:drawing");if(!l){const w=h(f,"mc:AlternateContent");if(w){const y=h(w,"mc:Choice");y&&(l=h(y,"w:drawing"))}}if(l){const w=j(l,t);if(w){n.push(w);continue}const y=_(l,t);if(n.push(...y),y.length>0)continue}const u=h(f,"w:t");if(!u)continue;const p=u.children.find(w=>w.type==="text");if(!p||!p.value)continue;const d=S(f);d.push({type:"link",attrs:{href:a}});const m={type:"text",text:p.value};d.length>0&&(m.marks=d),n.push(m)}}continue}if(r.name==="w:r"){const s=r;let i=h(s,"w:drawing");if(!i){const u=h(s,"mc:AlternateContent");if(u){const p=h(u,"mc:Choice");p&&(i=h(p,"w:drawing"))}}if(i){const u=_(i,t);if(n.push(...u),u.length>0)continue}if(h(s,"w:br")){const u=S(s),p={type:"hardBreak"};u.length>0&&(p.marks=u),n.push(p)}const a=h(s,"w:t");if(!a)continue;const c=a.children.find(u=>u.type==="text");if(!c||!c.value)continue;const f=S(s),l={type:"text",text:c.value};f.length>0&&(l.marks=f),n.push(l)}}return n}function S(o){const e=[],t=h(o,"w:rPr");if(!t)return e;h(t,"w:b")&&e.push({type:"bold"}),h(t,"w:i")&&e.push({type:"italic"}),h(t,"w:u")&&e.push({type:"underline"}),h(t,"w:strike")&&e.push({type:"strike"}),h(t,"w:highlight")&&e.push({type:"highlight"});const n=h(t,"w:vertAlign");if(n){const c=n.attributes["w:val"];c==="subscript"?e.push({type:"subscript"}):c==="superscript"&&e.push({type:"superscript"})}const r=h(t,"w:color"),s=h(t,"w:shd"),i=h(t,"w:sz"),a=h(t,"w:rFonts");if(r||s||i||a){const c={color:"",backgroundColor:"",fontSize:"",fontFamily:"",lineHeight:""};if(r&&r.attributes["w:val"]){const f=r.attributes["w:val"];if(f!=="auto"){const l=f.startsWith("#")?f:`#${f}`;c.color=l}}if(s&&s.attributes["w:fill"]){const f=s.attributes["w:fill"];if(f!=="auto"){const l=f.startsWith("#")?f:`#${f}`;c.backgroundColor=l}}if(i&&i.attributes["w:val"]){const f=i.attributes["w:val"],l=parseFloat(f);if(!isNaN(l)){const u=Math.round(l/1.5*10)/10;c.fontSize=`${u}px`}}a&&a.attributes["w:ascii"]&&(c.fontFamily=a.attributes["w:ascii"]),e.push({type:"textStyle",attrs:c})}return e}function O(o){const e=h(o,"w:pPr");if(!e)return;const t=h(e,"w:jc");if(!t?.attributes["w:val"])return;const n=t.attributes["w:val"],r={left:"left",right:"right",center:"center",both:"justify"}[n];return r?{textAlign:r}:void 0}function j(o,e){const t=P(o,"a:blip");if(!t?.attributes["r:embed"])return null;const n=t.attributes["r:embed"],r=e.get(n);if(!r)return null;const s=P(o,"wp:extent");let i,a;if(s){const l=s.attributes.cx,u=s.attributes.cy;if(typeof l=="string"){const p=parseInt(l,10);isNaN(p)||(i=Math.round(p/9525))}if(typeof u=="string"){const p=parseInt(u,10);isNaN(p)||(a=Math.round(p/9525))}}const c=P(o,"wp:docPr");let f;if(c){const l=c.attributes.title;typeof l=="string"&&l&&(f=l)}return{type:"image",attrs:{src:r,alt:"",...i!==void 0&&{width:i},...a!==void 0&&{height:a},...f!==void 0&&{title:f}}}}function _(o,e){const t=[],n=h(o,"wp:inline")||h(o,"wp:anchor");if(!n)return t;const r=h(n,"wp:extent");let s,i;if(r){const l=r.attributes.cx,u=r.attributes.cy;if(typeof l=="string"){const p=parseInt(l,10);isNaN(p)||(s=Math.round(p/9525))}if(typeof u=="string"){const p=parseInt(u,10);isNaN(p)||(i=Math.round(p/9525))}}const a=h(n,"a:graphic");if(!a)return t;const c=h(a,"a:graphicData");if(!c)return t;const f=h(c,"wpg:wgp");if(f){const l=h(f,"wpg:grpSp");let u=[];if(l){const p=T(l,"pic:pic"),d=T(l,"pic");u=[...p,...d]}else{const p=T(f,"pic:pic"),d=T(f,"pic");u=[...p,...d]}for(const p of u){const d=h(p,"a:graphic");if(!d){const w=h(p,"pic:blipFill")||P(p,"a:blipFill");if(w){const y=h(w,"a:blip")||P(w,"a:blip");if(y&&y.attributes["r:embed"]){const x=y.attributes["r:embed"],g=e.get(x);if(g){let b=s,I=i;if(g&&s&&i)try{let v,k;if(g.startsWith("data:")){const C=g.split(",")[1];if(C){const M=atob(C),U=new Uint8Array(M.length);for(let N=0;N<M.length;N++)U[N]=M.charCodeAt(N);const X=F(U);v=X.width,k=X.height}}if(v&&k){const C=v/k,M=s/i;Math.abs(C-M)>.1&&(C>M?(b=s,I=Math.round(s/C)):(I=i,b=Math.round(i*C)))}}catch(v){console.warn("Failed to extract image metadata for aspect ratio:",v)}t.push({type:"image",attrs:{src:g,alt:"",...b!==void 0&&{width:b},...I!==void 0&&{height:I}}})}}}continue}const m=j({children:[d]},e);if(m){if(s!==void 0&&i!==void 0&&m.attrs.src)try{const w=m.attrs.src;let y,x;if(w.startsWith("data:")){const g=w.split(",")[1];if(g){const b=atob(g),I=new Uint8Array(b.length);for(let k=0;k<b.length;k++)I[k]=b.charCodeAt(k);const v=F(I);y=v.width,x=v.height}}if(y&&x){const g=y/x,b=s/i;Math.abs(g-b)>.1?g>b?(m.attrs.width=s,m.attrs.height=Math.round(s/g)):(m.attrs.height=i,m.attrs.width=Math.round(i*g)):(m.attrs.width=s,m.attrs.height=i)}else m.attrs.width=s,m.attrs.height=i}catch{m.attrs.width=s,m.attrs.height=i}t.push(m)}}}else{const l=j(o,e);l&&t.push(l)}return t}function h(o,e){for(const t of o.children)if(t.type==="element"&&t.name===e)return t}function P(o,e){for(const t of o.children)if(t.type==="element"&&t.name===e)return t;for(const t of o.children)if(t.type==="element"){const n=P(t,e);if(n)return n}}function T(o,e){const t=[];for(const n of o.children)n.type==="element"&&n.name===e&&t.push(n);for(const n of o.children)n.type==="element"&&t.push(...T(n,e));return t}function R(o,e,t){let n;for(const i of o.children)if(i.type==="element"&&i.name==="w:pPr"){const a=i;for(const c of a.children)if(c.type==="element"&&c.name==="w:pStyle"){n=c.attributes["w:val"];break}break}if(n){const i=n.match(/^Heading(\d)$/);if(i){const a=parseInt(i[1]);return rt(o,e,a,t)}}const r=L(e,o,t);if(r.length===1&&r[0].type==="hardBreak"){for(const i of o.children)if(i.type==="element"&&i.name==="w:r"){for(const a of i.children)if(a.type==="element"&&a.name==="w:br"&&a.attributes["w:type"]==="page")return{type:"horizontalRule"}}}if(r.length===1&&r[0].type==="image")return r[0];const s=O(o);return{type:"paragraph",...s&&{attrs:s},content:r}}function rt(o,e,t,n){return{type:"heading",attrs:{level:t},content:L(e,o,n)}}function $(o){const e=A(o,"w:pPr");return e?!!A(e,"w:numPr"):!1}function E(o){const e=A(o,"w:pPr");if(!e)return null;const t=A(e,"w:numPr");if(!t)return null;const n=A(t,"w:ilvl"),r=A(t,"w:numId");return!n||!r?null:{numId:r.attributes["w:val"],level:parseInt(n.attributes["w:val"]||"0")}}function A(o,e){for(const t of o.children)if(t.type==="element"&&t.name===e)return t}function H(o){const e=B(o,"w:pPr");if(!e)return!1;const t=B(e,"w:pStyle");if(!t)return!1;const n=t.attributes["w:val"];return n==="CodeBlock"||n?.startsWith("Code")}function J(o){const e=B(o,"w:pPr");if(!e)return;const t=B(e,"w:pStyle");if(!t)return;const n=t.attributes["w:val"];if(n?.startsWith("CodeBlock"))return n.replace("CodeBlock","").toLowerCase()||void 0}function B(o,e){for(const t of o.children)if(t.type==="element"&&t.name===e)return t}function ot(o){return o.name==="w:tbl"}function q(o,e,t){const n=[],r=[];for(const i of o.children)i.type==="element"&&i.name==="w:tr"&&r.push(i);const s=new Map;return r.forEach((i,a)=>{n.push(it(i,a===0,e,t,s,r,a))}),{type:"table",content:n}}function it(o,e,t,n,r,s,i){const a=[];let c=0;for(const f of o.children)if(f.type==="element"&&f.name==="w:tc"){const l=r.get(c);if(l&&l>0){r.set(c,l-1),c++;continue}let u=G(f);if(u&&u.rowspan===1){const m=st(s,i,c);m>1&&(u={...u,rowspan:m})}if(u&&u.rowspan>1&&r.set(c,u.rowspan-1),u&&u.rowspan===0){c++;continue}const p="tableCell",d=ct(f,t,n);a.push({type:p,...u&&{attrs:u},content:[d]}),c+=u?.colspan||1}return{type:"tableRow",content:a}}function G(o){const e={colspan:1,rowspan:1,colwidth:null};let t;for(const n of o.children)if(n.type==="element"&&n.name==="w:tcPr"){t=n;break}if(!t)return e;for(const n of t.children)if(n.type==="element"&&n.name==="w:gridSpan"){const r=n.attributes["w:val"];r&&(e.colspan=parseInt(r));break}for(const n of t.children)if(n.type==="element"&&n.name==="w:vMerge"){n.attributes["w:val"]==="continue"&&(e.rowspan=0);break}for(const n of t.children)if(n.type==="element"&&n.name==="w:tcW"){const r=n.attributes["w:w"];r&&(e.colwidth=parseInt(r));break}return e}function st(o,e,t){let n=1,r=t;for(let s=e+1;s<o.length;s++){const i=o[s];let a=!1;for(const c of i.children)if(c.type==="element"&&c.name==="w:tc"){const f=G(c),l=f?.colspan||1;if(r>=0&&r<l){if(f?.rowspan===0)n++,a=!0;else return n;break}r-=l}if(!a)break}return n}function ct(o,e,t){const n=[];for(const r of o.children)if(r.type==="element"&&r.name==="w:p"){const s=R(r,e,t);n.push(s)}return n[0]||{type:"paragraph",content:[]}}function D(o){for(const e of o.children)if(e.type==="element"&&e.name==="w:r"){for(const t of e.children)if(t.type==="element"&&t.name==="w:t"){const n=t.children.find(r=>r.type==="text");if(n&&"value"in n){const r=n.value;return r.startsWith("\u2610")||r.startsWith("\u2611")}}break}return!1}function K(o){for(const e of o.children)if(e.type==="element"&&e.name==="w:r"){for(const t of e.children)if(t.type==="element"&&t.name==="w:t"){const n=t.children.find(r=>r.type==="text");if(n&&"value"in n)return n.value.startsWith("\u2611")}break}return!1}function Q(o){const e=K(o),t=at(o);return{type:"taskItem",attrs:{checked:e},content:[t]}}function at(o){const e=[];let t=!1;for(const r of o.children)if(r.type==="element"&&r.name==="w:r"){let s=!1;if(!t){for(const i of r.children)if(i.type==="element"&&i.name==="w:t"){const a=i.children.find(c=>c.type==="text");if(a&&"value"in a){const c=a.value;if(c.startsWith("\u2610")||c.startsWith("\u2611")){s=!0,t=!0;const f=c.substring(2).trimStart();f.length>0&&e.push({type:"text",text:f})}}}}if(!s){const i=ft(r);for(const a of r.children)if(a.type==="element"&&a.name==="w:t"){const c=a.children.find(f=>f.type==="text");if(c&&"value"in c){const f={type:"text",text:c.value};i.length>0&&(f.marks=i),e.push(f)}}}}const n=lt(o);return{type:"paragraph",...n&&{attrs:n},content:e.length>0?e:void 0}}function ft(o){const e=[];for(const t of o.children)if(t.type==="element"&&t.name==="w:rPr"){const n=t;for(const r of n.children)if(r.type==="element"&&r.name==="w:b"){e.push({type:"bold"});break}for(const r of n.children)if(r.type==="element"&&r.name==="w:i"){e.push({type:"italic"});break}for(const r of n.children)if(r.type==="element"&&r.name==="w:u"){e.push({type:"underline"});break}for(const r of n.children)if(r.type==="element"&&r.name==="w:strike"){e.push({type:"strike"});break}break}return e}function lt(o){for(const e of o.children)if(e.type==="element"&&e.name==="w:pPr"){const t=e;for(const n of t.children)if(n.type==="element"&&n.name==="w:jc"){const r=n.attributes["w:val"];if(r==="both")return{textAlign:"justify"};if(r==="center")return{textAlign:"center"};if(r==="right")return{textAlign:"right"};if(r==="left")return{textAlign:"left"}}}}function V(o){for(const e of o.children)if(e.type==="element"&&e.name==="w:r"){const t=e;let n=!1,r=!1;for(const s of t.children)if(s.type==="element")if(s.name==="w:br")s.attributes["w:type"]==="page"&&(n=!0);else if(s.name==="w:t"){const i=s.children.find(a=>a.type==="text");i&&"value"in i&&i.value&&i.value.trim().length>0&&(r=!0)}else s.name!=="w:rPr"&&(r=!0);if(n&&!r)return!0}return!1}const z="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function Y(o){const e=o.length,t=Math.ceil(e/3)*4,n=Array.from({length:t});let r=0;for(let s=0;s<e;s+=3){const i=o[s],a=s+1<e?o[s+1]:0,c=s+2<e?o[s+2]:0,f=i>>2,l=(i&3)<<4|a>>4,u=(a&15)<<2|c>>6,p=c&63;n[r++]=z[f],n[r++]=z[l],n[r++]=s+1<e?z[u]:"=",n[r++]=s+2<e?z[p]:"="}return n.join("")}const Z=async o=>{const e=Y(o.data);return{src:`data:${o.contentType};base64,${e}`}};async function ut(o,e={}){const{convertImage:t=Z,ignoreEmptyParagraphs:n=!1}=e,r=await nt(o),s=et(r),i=mt(s),a=ht(s),c=new Map;for(const[p,d]of a.entries())try{let m;try{m=`image/${F(d).type}`}catch{m="image/png"}const w=await t({id:p,contentType:m,data:d});c.set(p,w.src)}catch(m){console.warn(`Failed to convert image ${p}:`,m);let w="image/png";try{w=`image/${F(d).type}`}catch{}const y=Y(d),x=`data:${w};base64,${y}`;c.set(p,x)}const f=s["word/document.xml"];if(!f)throw new Error("Invalid DOCX file: missing word/document.xml");const l=W(new TextDecoder().decode(f)),u=pt(s);return dt(l,c,i,u,n)}function pt(o){const e=new Map,t=new Map,n=o["word/numbering.xml"];if(!n)return e;const r=W(new TextDecoder().decode(n)),s=new Map;if(r.type==="root"){for(const i of r.children)if(i.type==="element"&&i.name==="w:numbering"){const a=i;for(const c of a.children)if(c.type==="element"&&c.name==="w:abstractNum"){const f=c,l=f.attributes["w:abstractNumId"];for(const u of f.children)if(u.type==="element"&&u.name==="w:lvl"){for(const p of u.children)if(p.type==="element"&&p.name==="w:numFmt"){const d=p.attributes["w:val"];if(d){s.set(l,d);break}}for(const p of u.children)if(p.type==="element"&&p.name==="w:start"){const d=p.attributes["w:val"];d&&t.set(l,parseInt(d,10));break}break}}for(const c of a.children)if(c.type==="element"&&c.name==="w:num"){const f=c,l=f.attributes["w:numId"];for(const u of f.children)if(u.type==="element"&&u.name==="w:abstractNumId"){const p=u.attributes["w:val"],d=s.get(p);if(d){const m=t.get(p);d==="bullet"?e.set(l,{type:"bullet"}):e.set(l,{type:"ordered",...m!==void 0&&{start:m}})}break}}break}}return e}function ht(o){const e=new Map,t=o["word/_rels/document.xml.rels"];if(!t)return e;const n=W(new TextDecoder().decode(t));if(n.type==="root"){for(const r of n.children)if(r.type==="element"&&r.name==="Relationships"){const s=r;for(const i of s.children)if(i.type==="element"&&i.name==="Relationship"){const a=i,c=a.attributes.Type;if(c&&c==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"){const f=a.attributes.Id,l=a.attributes.Target;if(f&&l){const u="word/"+l,p=o[u];p&&e.set(f,p)}}}break}}return e}function mt(o){const e=new Map,t=o["word/_rels/document.xml.rels"];if(!t)return e;const n=W(new TextDecoder().decode(t));if(n.type==="root"){for(const r of n.children)if(r.type==="element"&&r.name==="Relationships"){const s=r;for(const i of s.children)if(i.type==="element"&&i.name==="Relationship"){const a=i,c=a.attributes.Type;if(c&&c==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"){const f=a.attributes.Id,l=a.attributes.Target;f&&l&&e.set(f,l)}}break}}return e}function dt(o,e,t,n,r){if(o.type!=="root")return{type:"doc",content:[]};for(const s of o.children)if(s.type==="element"&&s.name==="w:document"){const i=s;for(const a of i.children)if(a.type==="element"&&a.name==="w:body")return{type:"doc",content:wt(a.children.filter(c=>c.type==="element"),e,t,n,r)};break}return{type:"doc",content:[]}}function wt(o,e,t,n,r){const s=[];let i=0;for(;i<o.length;){const a=o[i];if(a.name==="w:tbl"){s.push(q(a,t,e)),i++,i<o.length&&o[i].name==="w:p"&&tt(o[i])&&i++;continue}if(a.name==="w:p"){if(r&&tt(a)){i++;continue}if(H(a)){const c=yt(o,i);s.push(...c),i+=c.length;continue}if(D(a)){const c=vt(o,i);s.push(...c),i+=kt(o,i);continue}if($(a)){const c=gt(o,i,e,t,n);s.push(...c),i+=bt(o,i);continue}if(V(a)){s.push({type:"horizontalRule"}),i++;continue}s.push(R(a,t,e)),i++;continue}i++}return s}function yt(o,e){const t=[];let n=e;for(;n<o.length;){const r=o[n];if(r.name!=="w:p"||!H(r))break;const s=J(r),i={type:"codeBlock",...s&&{attrs:{language:s}},content:xt(r)};t.push(i),n++}return t}function gt(o,e,t,n,r){const s=[];let i=e;for(;i<o.length;){const a=o[i];if(a.name!=="w:p"||!$(a))break;const c=E(a);if(!c)break;const f=r.get(c.numId),l=f?.type||"bullet",u=[];for(;i<o.length;){const d=o[i];if(d.name!=="w:p"||!$(d))break;const m=E(d);if(!m||m.numId!==c.numId)break;const w={type:"listItem",content:[R(d,n,t)]};u.push(w),i++}const p={type:l==="bullet"?"bulletList":"orderedList",content:u};l==="ordered"&&(p.attrs={type:null,...f?.start!==void 0&&{start:f.start}}),s.push(p)}return s}function bt(o,e){let t=0,n=e;for(;n<o.length;){const r=o[n];if(r.name!=="w:p"||!$(r))break;t++,n++}return t}function vt(o,e){const t=[];let n=e;for(;n<o.length;){const r=o[n];if(r.name!=="w:p"||!D(r))break;const s=Q(r);t.push(s),n++}return[{type:"taskList",content:t}]}function kt(o,e){let t=0,n=e;for(;n<o.length;){const r=o[n];if(r.name!=="w:p"||!D(r))break;t++,n++}return t}function xt(o){const e=[];for(const t of o.children){if(t.type!=="element"||t.name!=="w:r")continue;const n=t;for(const r of n.children)if(r.type==="element"&&r.name==="w:t"){const s=r.children.find(i=>i.type==="text");s&&"value"in s&&e.push({type:"text",text:s.value})}}return e}function tt(o){for(const e of o.children){if(e.type!=="element"||e.name!=="w:r")continue;const t=e;for(const n of t.children)if(n.type==="element"&&n.name==="w:t"){const r=n.children.find(s=>s.type==="text");if(r&&"value"in r&&r.value.trim().length>0)return!1}}return!0}export{R as convertParagraph,q as convertTable,Q as convertTaskItem,Z as defaultImageConverter,O as extractAlignment,S as extractMarks,L as extractRuns,J as getCodeBlockLanguage,E as getListInfo,K as getTaskItemChecked,H as isCodeBlock,V as isHorizontalRule,$ as isListItem,ot as isTable,D as isTaskItem,ut as parseDOCX};
|
|
1
|
+
import{fromXml as W}from"xast-util-from-xml";import{unzipSync as et}from"fflate";import{toUint8Array as nt}from"undio";import{imageMeta as F}from"image-meta";function L(o,n,e){const t=[];for(const r of n.children)if(r.type==="element"){if(r.name==="w:hyperlink"){const s=r,i=s.attributes["r:id"],a=o.get(i);if(a){for(const c of s.children)if(c.type==="element"&&c.name==="w:r"){const f=c;let l=h(f,"w:drawing");if(!l){const w=h(f,"mc:AlternateContent");if(w){const y=h(w,"mc:Choice");y&&(l=h(y,"w:drawing"))}}if(l){const w=j(l,e);if(w){t.push(w);continue}const y=_(l,e);if(t.push(...y),y.length>0)continue}const u=h(f,"w:t");if(!u)continue;const p=u.children.find(w=>w.type==="text");if(!p||!p.value)continue;const d=S(f);d.push({type:"link",attrs:{href:a}});const m={type:"text",text:p.value};d.length>0&&(m.marks=d),t.push(m)}}continue}if(r.name==="w:r"){const s=r;let i=h(s,"w:drawing");if(!i){const u=h(s,"mc:AlternateContent");if(u){const p=h(u,"mc:Choice");p&&(i=h(p,"w:drawing"))}}if(i){const u=_(i,e);if(t.push(...u),u.length>0)continue}if(h(s,"w:br")){const u=S(s),p={type:"hardBreak"};u.length>0&&(p.marks=u),t.push(p)}const a=h(s,"w:t");if(!a)continue;const c=a.children.find(u=>u.type==="text");if(!c||!c.value)continue;const f=S(s),l={type:"text",text:c.value};f.length>0&&(l.marks=f),t.push(l)}}return t}function S(o){const n=[],e=h(o,"w:rPr");if(!e)return n;h(e,"w:b")&&n.push({type:"bold"}),h(e,"w:i")&&n.push({type:"italic"}),h(e,"w:u")&&n.push({type:"underline"}),h(e,"w:strike")&&n.push({type:"strike"}),h(e,"w:highlight")&&n.push({type:"highlight"});const t=h(e,"w:vertAlign");if(t){const c=t.attributes["w:val"];c==="subscript"?n.push({type:"subscript"}):c==="superscript"&&n.push({type:"superscript"})}const r=h(e,"w:color"),s=h(e,"w:shd"),i=h(e,"w:sz"),a=h(e,"w:rFonts");if(r||s||i||a){const c={color:"",backgroundColor:"",fontSize:"",fontFamily:"",lineHeight:""};if(r&&r.attributes["w:val"]){const f=r.attributes["w:val"];if(f!=="auto"){const l=f.startsWith("#")?f:`#${f}`;c.color=l}}if(s&&s.attributes["w:fill"]){const f=s.attributes["w:fill"];if(f!=="auto"){const l=f.startsWith("#")?f:`#${f}`;c.backgroundColor=l}}if(i&&i.attributes["w:val"]){const f=i.attributes["w:val"],l=parseFloat(f);if(!isNaN(l)){const u=Math.round(l/1.5*10)/10;c.fontSize=`${u}px`}}a&&a.attributes["w:ascii"]&&(c.fontFamily=a.attributes["w:ascii"]),n.push({type:"textStyle",attrs:c})}return n}function O(o){const n=h(o,"w:pPr");if(!n)return;const e=h(n,"w:jc");if(!e?.attributes["w:val"])return;const t=e.attributes["w:val"],r={left:"left",right:"right",center:"center",both:"justify"}[t];return r?{textAlign:r}:void 0}function j(o,n){const e=A(o,"a:blip");if(!e?.attributes["r:embed"])return null;const t=e.attributes["r:embed"],r=n.get(t);if(!r)return null;const s=A(o,"wp:extent");let i,a;if(s){const l=s.attributes.cx,u=s.attributes.cy;if(typeof l=="string"){const p=parseInt(l,10);isNaN(p)||(i=Math.round(p/9525))}if(typeof u=="string"){const p=parseInt(u,10);isNaN(p)||(a=Math.round(p/9525))}}const c=A(o,"wp:docPr");let f;if(c){const l=c.attributes.title;typeof l=="string"&&l&&(f=l)}return{type:"image",attrs:{src:r,alt:"",...i!==void 0&&{width:i},...a!==void 0&&{height:a},...f!==void 0&&{title:f}}}}function _(o,n){const e=[],t=h(o,"wp:inline")||h(o,"wp:anchor");if(!t)return e;const r=h(t,"wp:extent");let s,i;if(r){const l=r.attributes.cx,u=r.attributes.cy;if(typeof l=="string"){const p=parseInt(l,10);isNaN(p)||(s=Math.round(p/9525))}if(typeof u=="string"){const p=parseInt(u,10);isNaN(p)||(i=Math.round(p/9525))}}const a=h(t,"a:graphic");if(!a)return e;const c=h(a,"a:graphicData");if(!c)return e;const f=h(c,"wpg:wgp");if(f){const l=h(f,"wpg:grpSp");let u=[];if(l){const p=T(l,"pic:pic"),d=T(l,"pic");u=[...p,...d]}else{const p=T(f,"pic:pic"),d=T(f,"pic");u=[...p,...d]}for(const p of u){const d=h(p,"a:graphic");if(!d){const w=h(p,"pic:blipFill")||A(p,"a:blipFill");if(w){const y=h(w,"a:blip")||A(w,"a:blip");if(y&&y.attributes["r:embed"]){const x=y.attributes["r:embed"],g=n.get(x);if(g){let b=s,I=i;if(g&&s&&i)try{let v,k;if(g.startsWith("data:")){const C=g.split(",")[1];if(C){const M=atob(C),U=new Uint8Array(M.length);for(let N=0;N<M.length;N++)U[N]=M.charCodeAt(N);const X=F(U);v=X.width,k=X.height}}if(v&&k){const C=v/k,M=s/i;Math.abs(C-M)>.1&&(C>M?(b=s,I=Math.round(s/C)):(I=i,b=Math.round(i*C)))}}catch(v){console.warn("Failed to extract image metadata for aspect ratio:",v)}e.push({type:"image",attrs:{src:g,alt:"",...b!==void 0&&{width:b},...I!==void 0&&{height:I}}})}}}continue}const m=j({children:[d]},n);if(m){if(s!==void 0&&i!==void 0&&m.attrs.src)try{const w=m.attrs.src;let y,x;if(w.startsWith("data:")){const g=w.split(",")[1];if(g){const b=atob(g),I=new Uint8Array(b.length);for(let k=0;k<b.length;k++)I[k]=b.charCodeAt(k);const v=F(I);y=v.width,x=v.height}}if(y&&x){const g=y/x,b=s/i;Math.abs(g-b)>.1?g>b?(m.attrs.width=s,m.attrs.height=Math.round(s/g)):(m.attrs.height=i,m.attrs.width=Math.round(i*g)):(m.attrs.width=s,m.attrs.height=i)}else m.attrs.width=s,m.attrs.height=i}catch{m.attrs.width=s,m.attrs.height=i}e.push(m)}}}else{const l=j(o,n);l&&e.push(l)}return e}function h(o,n){for(const e of o.children)if(e.type==="element"&&e.name===n)return e}function A(o,n){for(const e of o.children)if(e.type==="element"&&e.name===n)return e;for(const e of o.children)if(e.type==="element"){const t=A(e,n);if(t)return t}}function T(o,n){const e=[];for(const t of o.children)t.type==="element"&&t.name===n&&e.push(t);for(const t of o.children)t.type==="element"&&e.push(...T(t,n));return e}function R(o,n,e){let t;for(const i of o.children)if(i.type==="element"&&i.name==="w:pPr"){const a=i;for(const c of a.children)if(c.type==="element"&&c.name==="w:pStyle"){t=c.attributes["w:val"];break}break}if(t){const i=t.match(/^Heading(\d)$/);if(i){const a=parseInt(i[1]);return rt(o,n,a,e)}}const r=L(n,o,e);if(r.length===1&&r[0].type==="hardBreak"){for(const i of o.children)if(i.type==="element"&&i.name==="w:r"){for(const a of i.children)if(a.type==="element"&&a.name==="w:br"&&a.attributes["w:type"]==="page")return{type:"horizontalRule"}}}if(r.length===1&&r[0].type==="image")return r[0];const s=O(o);return{type:"paragraph",...s&&{attrs:s},content:r}}function rt(o,n,e,t){return{type:"heading",attrs:{level:e},content:L(n,o,t)}}function $(o){const n=P(o,"w:pPr");return n?!!P(n,"w:numPr"):!1}function E(o){const n=P(o,"w:pPr");if(!n)return null;const e=P(n,"w:numPr");if(!e)return null;const t=P(e,"w:ilvl"),r=P(e,"w:numId");return!t||!r?null:{numId:r.attributes["w:val"],level:parseInt(t.attributes["w:val"]||"0")}}function P(o,n){for(const e of o.children)if(e.type==="element"&&e.name===n)return e}function H(o){const n=B(o,"w:pPr");if(!n)return!1;const e=B(n,"w:pStyle");if(!e)return!1;const t=e.attributes["w:val"];return t==="CodeBlock"||t?.startsWith("Code")}function J(o){const n=B(o,"w:pPr");if(!n)return;const e=B(n,"w:pStyle");if(!e)return;const t=e.attributes["w:val"];if(t?.startsWith("CodeBlock"))return t.replace("CodeBlock","").toLowerCase()||void 0}function B(o,n){for(const e of o.children)if(e.type==="element"&&e.name===n)return e}function ot(o){return o.name==="w:tbl"}function q(o,n,e){const t=[],r=[];for(const i of o.children)i.type==="element"&&i.name==="w:tr"&&r.push(i);const s=new Map;return r.forEach((i,a)=>{t.push(it(i,a===0,n,e,s,r,a))}),{type:"table",content:t}}function it(o,n,e,t,r,s,i){const a=[];let c=0;for(const f of o.children)if(f.type==="element"&&f.name==="w:tc"){const l=r.get(c);if(l&&l>0){r.set(c,l-1),c++;continue}let u=G(f);if(u&&u.rowspan===1){const m=st(s,i,c);m>1&&(u={...u,rowspan:m})}if(u&&u.rowspan>1&&r.set(c,u.rowspan-1),u&&u.rowspan===0){c++;continue}const p="tableCell",d=ct(f,e,t);a.push({type:p,...u&&{attrs:u},content:[d]}),c+=u?.colspan||1}return{type:"tableRow",content:a}}function G(o){const n={colspan:1,rowspan:1,colwidth:null};let e;for(const t of o.children)if(t.type==="element"&&t.name==="w:tcPr"){e=t;break}if(!e)return n;for(const t of e.children)if(t.type==="element"&&t.name==="w:gridSpan"){const r=t.attributes["w:val"];r&&(n.colspan=parseInt(r));break}for(const t of e.children)if(t.type==="element"&&t.name==="w:vMerge"){t.attributes["w:val"]==="continue"&&(n.rowspan=0);break}for(const t of e.children)if(t.type==="element"&&t.name==="w:tcW"){const r=t.attributes["w:w"];r&&(n.colwidth=parseInt(r));break}return n}function st(o,n,e){let t=1,r=e;for(let s=n+1;s<o.length;s++){const i=o[s];let a=!1;for(const c of i.children)if(c.type==="element"&&c.name==="w:tc"){const f=G(c),l=f?.colspan||1;if(r>=0&&r<l){if(f?.rowspan===0)t++,a=!0;else return t;break}r-=l}if(!a)break}return t}function ct(o,n,e){const t=[];for(const r of o.children)if(r.type==="element"&&r.name==="w:p"){const s=R(r,n,e);t.push(s)}return t[0]||{type:"paragraph",content:[]}}function D(o){for(const n of o.children)if(n.type==="element"&&n.name==="w:r"){for(const e of n.children)if(e.type==="element"&&e.name==="w:t"){const t=e.children.find(r=>r.type==="text");if(t&&"value"in t){const r=t.value;return r.startsWith("\u2610")||r.startsWith("\u2611")}}break}return!1}function K(o){for(const n of o.children)if(n.type==="element"&&n.name==="w:r"){for(const e of n.children)if(e.type==="element"&&e.name==="w:t"){const t=e.children.find(r=>r.type==="text");if(t&&"value"in t)return t.value.startsWith("\u2611")}break}return!1}function Q(o){const n=K(o),e=at(o);return{type:"taskItem",attrs:{checked:n},content:[e]}}function at(o){const n=[];let e=!1;for(const r of o.children)if(r.type==="element"&&r.name==="w:r"){let s=!1;if(!e){for(const i of r.children)if(i.type==="element"&&i.name==="w:t"){const a=i.children.find(c=>c.type==="text");if(a&&"value"in a){const c=a.value;if(c.startsWith("\u2610")||c.startsWith("\u2611")){s=!0,e=!0;const f=c.substring(2).trimStart();f.length>0&&n.push({type:"text",text:f})}}}}if(!s){const i=ft(r);for(const a of r.children)if(a.type==="element"&&a.name==="w:t"){const c=a.children.find(f=>f.type==="text");if(c&&"value"in c){const f={type:"text",text:c.value};i.length>0&&(f.marks=i),n.push(f)}}}}const t=lt(o);return{type:"paragraph",...t&&{attrs:t},content:n.length>0?n:void 0}}function ft(o){const n=[];for(const e of o.children)if(e.type==="element"&&e.name==="w:rPr"){const t=e;for(const r of t.children)if(r.type==="element"&&r.name==="w:b"){n.push({type:"bold"});break}for(const r of t.children)if(r.type==="element"&&r.name==="w:i"){n.push({type:"italic"});break}for(const r of t.children)if(r.type==="element"&&r.name==="w:u"){n.push({type:"underline"});break}for(const r of t.children)if(r.type==="element"&&r.name==="w:strike"){n.push({type:"strike"});break}break}return n}function lt(o){for(const n of o.children)if(n.type==="element"&&n.name==="w:pPr"){const e=n;for(const t of e.children)if(t.type==="element"&&t.name==="w:jc"){const r=t.attributes["w:val"];if(r==="both")return{textAlign:"justify"};if(r==="center")return{textAlign:"center"};if(r==="right")return{textAlign:"right"};if(r==="left")return{textAlign:"left"}}}}function V(o){for(const n of o.children)if(n.type==="element"&&n.name==="w:r"){const e=n;let t=!1,r=!1;for(const s of e.children)if(s.type==="element")if(s.name==="w:br")s.attributes["w:type"]==="page"&&(t=!0);else if(s.name==="w:t"){const i=s.children.find(a=>a.type==="text");i&&"value"in i&&i.value&&i.value.trim().length>0&&(r=!0)}else s.name!=="w:rPr"&&(r=!0);if(t&&!r)return!0}return!1}const z="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function Y(o){const n=o.length,e=Math.ceil(n/3)*4,t=Array.from({length:e});let r=0;for(let s=0;s<n;s+=3){const i=o[s],a=s+1<n?o[s+1]:0,c=s+2<n?o[s+2]:0,f=i>>2,l=(i&3)<<4|a>>4,u=(a&15)<<2|c>>6,p=c&63;t[r++]=z[f],t[r++]=z[l],t[r++]=s+1<n?z[u]:"=",t[r++]=s+2<n?z[p]:"="}return t.join("")}const Z=async o=>{const n=Y(o.data);return{src:`data:${o.contentType};base64,${n}`}};async function ut(o,n={}){const{convertImage:e=Z,ignoreEmptyParagraphs:t=!1}=n,r=await nt(o),s=et(r),i=mt(s),a=ht(s),c=new Map;for(const[p,d]of a.entries())try{let m;try{m=`image/${F(d).type}`}catch{m="image/png"}const w=await e({id:p,contentType:m,data:d});c.set(p,w.src)}catch(m){console.warn(`Failed to convert image ${p}:`,m);let w="image/png";try{w=`image/${F(d).type}`}catch{}const y=Y(d),x=`data:${w};base64,${y}`;c.set(p,x)}const f=s["word/document.xml"];if(!f)throw new Error("Invalid DOCX file: missing word/document.xml");const l=W(new TextDecoder().decode(f)),u=pt(s);return dt(l,c,i,u,t)}function pt(o){const n=new Map,e=new Map,t=o["word/numbering.xml"];if(!t)return n;const r=W(new TextDecoder().decode(t)),s=new Map;if(r.type==="root"){for(const i of r.children)if(i.type==="element"&&i.name==="w:numbering"){const a=i;for(const c of a.children)if(c.type==="element"&&c.name==="w:abstractNum"){const f=c,l=f.attributes["w:abstractNumId"];for(const u of f.children)if(u.type==="element"&&u.name==="w:lvl"){for(const p of u.children)if(p.type==="element"&&p.name==="w:numFmt"){const d=p.attributes["w:val"];if(d){s.set(l,d);break}}for(const p of u.children)if(p.type==="element"&&p.name==="w:start"){const d=p.attributes["w:val"];d&&e.set(l,parseInt(d,10));break}break}}for(const c of a.children)if(c.type==="element"&&c.name==="w:num"){const f=c,l=f.attributes["w:numId"];for(const u of f.children)if(u.type==="element"&&u.name==="w:abstractNumId"){const p=u.attributes["w:val"],d=s.get(p);if(d){const m=e.get(p);d==="bullet"?n.set(l,{type:"bullet"}):n.set(l,{type:"ordered",...m!==void 0&&{start:m}})}break}}break}}return n}function ht(o){const n=new Map,e=o["word/_rels/document.xml.rels"];if(!e)return n;const t=W(new TextDecoder().decode(e));if(t.type==="root"){for(const r of t.children)if(r.type==="element"&&r.name==="Relationships"){const s=r;for(const i of s.children)if(i.type==="element"&&i.name==="Relationship"){const a=i,c=a.attributes.Type;if(c&&c==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"){const f=a.attributes.Id,l=a.attributes.Target;if(f&&l){const u="word/"+l,p=o[u];p&&n.set(f,p)}}}break}}return n}function mt(o){const n=new Map,e=o["word/_rels/document.xml.rels"];if(!e)return n;const t=W(new TextDecoder().decode(e));if(t.type==="root"){for(const r of t.children)if(r.type==="element"&&r.name==="Relationships"){const s=r;for(const i of s.children)if(i.type==="element"&&i.name==="Relationship"){const a=i,c=a.attributes.Type;if(c&&c==="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"){const f=a.attributes.Id,l=a.attributes.Target;f&&l&&n.set(f,l)}}break}}return n}function dt(o,n,e,t,r){if(o.type!=="root")return{type:"doc",content:[]};for(const s of o.children)if(s.type==="element"&&s.name==="w:document"){const i=s;for(const a of i.children)if(a.type==="element"&&a.name==="w:body")return{type:"doc",content:wt(a.children.filter(c=>c.type==="element"),n,e,t,r)};break}return{type:"doc",content:[]}}function wt(o,n,e,t,r){const s=[];let i=0;for(;i<o.length;){const a=o[i];if(a.name==="w:tbl"){s.push(q(a,e,n)),i++,i<o.length&&o[i].name==="w:p"&&tt(o[i])&&i++;continue}if(a.name==="w:p"){if(r&&tt(a)){i++;continue}if(H(a)){const c=yt(o,i);s.push(...c),i+=c.length;continue}if(D(a)){const c=vt(o,i);s.push(...c),i+=kt(o,i);continue}if($(a)){const c=gt(o,i,n,e,t);s.push(...c),i+=bt(o,i);continue}if(V(a)){s.push({type:"horizontalRule"}),i++;continue}s.push(R(a,e,n)),i++;continue}i++}return s}function yt(o,n){const e=[];let t=n;for(;t<o.length;){const r=o[t];if(r.name!=="w:p"||!H(r))break;const s=J(r),i={type:"codeBlock",...s&&{attrs:{language:s}},content:xt(r)};e.push(i),t++}return e}function gt(o,n,e,t,r){const s=[];let i=n;for(;i<o.length;){const a=o[i];if(a.name!=="w:p"||!$(a))break;const c=E(a);if(!c)break;const f=r.get(c.numId),l=f?.type||"bullet",u=[];for(;i<o.length;){const d=o[i];if(d.name!=="w:p"||!$(d))break;const m=E(d);if(!m||m.numId!==c.numId)break;const w={type:"listItem",content:[R(d,t,e)]};u.push(w),i++}const p={type:l==="bullet"?"bulletList":"orderedList",content:u};l==="ordered"&&(p.attrs={type:null,...f?.start!==void 0&&{start:f.start}}),s.push(p)}return s}function bt(o,n){let e=0,t=n;for(;t<o.length;){const r=o[t];if(r.name!=="w:p"||!$(r))break;e++,t++}return e}function vt(o,n){const e=[];let t=n;for(;t<o.length;){const r=o[t];if(r.name!=="w:p"||!D(r))break;const s=Q(r);e.push(s),t++}return[{type:"taskList",content:e}]}function kt(o,n){let e=0,t=n;for(;t<o.length;){const r=o[t];if(r.name!=="w:p"||!D(r))break;e++,t++}return e}function xt(o){const n=[];for(const e of o.children){if(e.type!=="element"||e.name!=="w:r")continue;const t=e;for(const r of t.children)if(r.type==="element"&&r.name==="w:t"){const s=r.children.find(i=>i.type==="text");s&&"value"in s&&n.push({type:"text",text:s.value})}}return n}function tt(o){for(const n of o.children){if(n.type!=="element"||n.name!=="w:r")continue;const e=n;for(const t of e.children){if(t.type==="element"&&t.name==="w:t"){const r=t.children.find(s=>s.type==="text");if(r&&"value"in r&&r.value.trim().length>0)return!1}if(t.type==="element"&&(t.name==="w:drawing"||t.name==="mc:AlternateContent"||t.name==="w:pict"))return!1}}return!0}export{R as convertParagraph,q as convertTable,Q as convertTaskItem,Z as defaultImageConverter,O as extractAlignment,S as extractMarks,L as extractRuns,J as getCodeBlockLanguage,E as getListInfo,K as getTaskItemChecked,H as isCodeBlock,V as isHorizontalRule,$ as isListItem,ot as isTable,D as isTaskItem,ut as parseDOCX};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@docen/import-docx",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.2",
|
|
4
4
|
"description": "A powerful TipTap/ProseMirror extension that imports Microsoft Word DOCX files to editor content",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"converter",
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"devDependencies": {
|
|
53
53
|
"@tiptap/core": "3.7.2",
|
|
54
54
|
"@types/xast": "2.0.4",
|
|
55
|
-
"@docen/tiptap-extensions": "0.0.
|
|
55
|
+
"@docen/tiptap-extensions": "0.0.2"
|
|
56
56
|
},
|
|
57
57
|
"scripts": {
|
|
58
58
|
"dev": "unbuild --stub",
|