@docen/import-docx 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -9
- package/dist/index.cjs +1 -1
- package/dist/index.d.cts +60 -61
- package/dist/index.d.mts +60 -61
- package/dist/index.d.ts +60 -61
- package/dist/index.mjs +1 -1
- package/package.json +5 -4
- package/dist/chunks/index.cjs +0 -1
- package/dist/chunks/index.mjs +0 -1
package/README.md
CHANGED
|
@@ -89,10 +89,10 @@ interface DocxImportOptions {
|
|
|
89
89
|
|
|
90
90
|
/**
|
|
91
91
|
* Enable or disable image cropping during import
|
|
92
|
-
* When true
|
|
93
|
-
* When false, crop information is ignored and full image is used
|
|
92
|
+
* When true, images with crop information in DOCX will be cropped
|
|
93
|
+
* When false (default), crop information is ignored and full image is used
|
|
94
94
|
*
|
|
95
|
-
* @default
|
|
95
|
+
* @default false
|
|
96
96
|
*/
|
|
97
97
|
enableImageCrop?: boolean;
|
|
98
98
|
}
|
|
@@ -220,7 +220,7 @@ async function importDocx(file: File) {
|
|
|
220
220
|
|
|
221
221
|
### Node.js Environment with Image Cropping
|
|
222
222
|
|
|
223
|
-
|
|
223
|
+
To enable image cropping in Node.js environment, you need to provide `@napi-rs/canvas`:
|
|
224
224
|
|
|
225
225
|
```typescript
|
|
226
226
|
import { parseDOCX } from "@docen/import-docx";
|
|
@@ -231,13 +231,15 @@ const buffer = readFileSync("document.docx");
|
|
|
231
231
|
|
|
232
232
|
const content = await parseDOCX(buffer, {
|
|
233
233
|
canvasImport: () => import("@napi-rs/canvas"),
|
|
234
|
-
enableImageCrop: true, // default is
|
|
234
|
+
enableImageCrop: true, // Enable cropping (default is false)
|
|
235
235
|
});
|
|
236
236
|
```
|
|
237
237
|
|
|
238
|
+
**Note:** By default, image cropping is disabled. Images are imported in full size, ignoring crop information in DOCX.
|
|
239
|
+
|
|
238
240
|
### Disable Image Cropping
|
|
239
241
|
|
|
240
|
-
If you want to ignore crop information in DOCX and use full images:
|
|
242
|
+
If you want to explicitly ignore crop information in DOCX and use full images (this is the default behavior):
|
|
241
243
|
|
|
242
244
|
```typescript
|
|
243
245
|
const content = await parseDOCX(buffer, {
|
|
@@ -268,10 +270,11 @@ All colors are imported as hex values (e.g., "#FF0000", "#008000"). Color names
|
|
|
268
270
|
|
|
269
271
|
- Only embedded images are supported (external image links are not fetched)
|
|
270
272
|
- Image dimensions and title are extracted from DOCX metadata
|
|
271
|
-
- **Image Cropping in
|
|
273
|
+
- **Image Cropping**: By default, images are imported in full size (crop information is ignored)
|
|
274
|
+
- To enable cropping, set `enableImageCrop: true` in options
|
|
272
275
|
- In browser environments, cropping works natively with Canvas API
|
|
273
|
-
- In Node.js, you must provide `canvasImport` option with dynamic import of `@napi-rs/canvas`
|
|
274
|
-
- If `@napi-rs/canvas` is not available, images will be imported without cropping (graceful degradation)
|
|
276
|
+
- In Node.js, you must also provide `canvasImport` option with dynamic import of `@napi-rs/canvas`
|
|
277
|
+
- If `@napi-rs/canvas` is not available in Node.js, images will be imported without cropping (graceful degradation)
|
|
275
278
|
- Some DOCX image features (like advanced positioning or text wrapping) have limited support
|
|
276
279
|
|
|
277
280
|
### Table Cell Types
|
package/dist/index.cjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";const xastUtilFromXml=require("xast-util-from-xml"),fflate=require("fflate"),undio=require("undio"),imageMeta=require("image-meta");function findChild(n,t){for(const e of n.children)if(e.type==="element"&&e.name===t)return e}function findDeepChild(n,t){for(const e of n.children){if(e.type==="element"&&e.name===t)return e;if(e.type==="element"){const r=findDeepChild(e,t);if(r)return r}}}function findDeepChildren(n,t){const e=[];for(const r of n.children)r.type==="element"&&r.name===t&&e.push(r),r.type==="element"&&e.push(...findDeepChildren(r,t));return e}const s="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function uint8ArrayToBase64(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let i=0;for(let c=0;c<t;c+=3){const l=n[c],f=c+1<t?n[c+1]:0,b=c+2<t?n[c+2]:0,I=l>>2,k=(l&3)<<4|f>>4,R=(f&15)<<2|b>>6,P=b&63;r[i++]=s[I],r[i++]=s[k],r[i++]=c+1<t?s[R]:"=",r[i++]=c+2<t?s[P]:"="}return r.join("")}function base64ToUint8Array(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const isNode=globalThis.process?.release?.name==="node",isBrowser=typeof window<"u";async function h$2(n){const t=await n;return t.default||t}let o,u$1=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class DOMCanvasFactory extends u$1{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class NodeCanvasFactory extends u$1{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!o)throw new Error("@napi-rs/canvas module is not resolved");return o.createCanvas(t,e)}}async function resolveCanvasModule(n){o??=await h$2(n())}async function createCanvasFactory(n){if(isBrowser)return DOMCanvasFactory;if(isNode){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await resolveCanvasModule(n),NodeCanvasFactory}throw new Error("Unsupported environment for canvas operations")}async function cropImageIfNeeded(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await createCanvasFactory(e.canvasImport),i=await w$1(n,r),c=(t.left||0)/1e5*i.width,l=(t.top||0)/1e5*i.height,f=(t.right||0)/1e5*i.width,b=(t.bottom||0)/1e5*i.height,I=Math.round(i.width-c-f),k=Math.round(i.height-l-b);if(I<=0||k<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const R=new r().create(I,k);if(!R.context)throw new Error("Failed to get 2D context from canvas");R.context.drawImage(i,c,l,I,k,0,0,I,k);const P=R.canvas.toDataURL(),F=await(await fetch(P)).arrayBuffer();return new Uint8Array(F)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function w$1(n,t){if(isBrowser){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const i=new Image;return new Promise((c,l)=>{i.onload=()=>{URL.revokeObjectURL(r),c(i)},i.onerror=()=>{URL.revokeObjectURL(r),l(new Error("Failed to load image"))},i.src=r})}catch(i){throw URL.revokeObjectURL(r),i}}else{if(!o)throw new Error("@napi-rs/canvas module is not resolved");return await o.loadImage(Buffer.from(n))}}const j="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";function C$1(n){const t=parseInt(n,10);if(!isNaN(t))return Math.round(t/9525)}function B(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,i=n.attributes.b;if(!(!t&&!e&&!r&&!i))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:i?parseInt(i,10):void 0}}function N(n){const t=findChild(n,"wp:align"),e=findChild(n,"wp:posOffset"),r=t?.children[0]?.type==="text"?t.children[0].value:void 0,i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function findDrawingElement(n){let t=findChild(n,"w:drawing");if(t)return t;const e=findChild(n,"mc:AlternateContent"),r=e&&findChild(e,"mc:Choice");return r&&findChild(r,"w:drawing")}function O$1(n,t,e,r){const i=e/r,c=n/t;return Math.abs(i-c)>.1?i>c?{width:n,height:Math.round(n/i)}:{width:Math.round(t*i),height:t}:{width:n,height:t}}function extractImages(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=findChild(r,"Relationships");if(!i)return t;const c=findDeepChildren(i,"Relationship");for(const l of c)if(l.attributes.Type===j&&l.attributes.Id&&l.attributes.Target){const f="word/"+l.attributes.Target,b=n[f];if(!b)continue;let I,k,R="png";try{const $=imageMeta.imageMeta(b);I=$.width,k=$.height,$.type&&(R=$.type)}catch{}const P=uint8ArrayToBase64(b),F=`data:image/${R};base64,${P}`;t.set(l.attributes.Id,{src:F,width:I,height:k})}return t}async function extractImageFromDrawing(n,t){const{images:e,options:r}=t,i=findDeepChild(n,"a:blip");if(!i?.attributes["r:embed"])return null;const c=i.attributes["r:embed"],l=e.get(c);if(!l)return null;let f=l.src;const b=findDeepChild(n,"a:srcRect");if(b){const D=B(b);if(D&&f.startsWith("data:")){const[U,_]=f.split(",");if(_){const V=base64ToUint8Array(_);try{const q=await cropImageIfNeeded(V,D,{canvasImport:r?.canvasImport,enabled:r?.enableImageCrop!==!1}),G=uint8ArrayToBase64(q);f=`${U},${G}`}catch(q){console.warn("Image cropping failed, using original image:",q)}}}}const I=findDeepChild(n,"wp:extent");let k,R;if(I){const D=I.attributes.cx,U=I.attributes.cy;typeof D=="string"&&(k=C$1(D)),typeof U=="string"&&(R=C$1(U))}const P=findDeepChild(n,"a:xfrm");let F;if(P?.attributes.rot){const D=parseInt(P.attributes.rot,10);isNaN(D)||(F=D/6e4)}const $=findDeepChild(n,"wp:docPr")?.attributes.title,W=findDeepChild(n,"wp:positionH"),z=findDeepChild(n,"wp:positionV");let E;if(W||z){const D=W?N(W):void 0,U=z?N(z):void 0;E={horizontalPosition:{relative:W?.attributes.relativeFrom||"page",...D?.align&&{align:D.align},...D?.offset!==void 0&&{offset:D.offset}},verticalPosition:{relative:z?.attributes.relativeFrom||"page",...U?.align&&{align:U.align},...U?.offset!==void 0&&{offset:U.offset}}}}const H=findDeepChild(n,"pic:spPr");let X;if(H){const D=findDeepChild(H,"a:ln"),U=D&&findDeepChild(D,"a:solidFill"),_=U&&findDeepChild(U,"a:srgbClr");_?.attributes.val&&(X={type:"solidFill",solidFillType:"rgb",value:_.attributes.val})}return{type:"image",attrs:{src:f,alt:"",...k!==void 0&&{width:k},...R!==void 0&&{height:R},...F!==void 0&&{rotation:F},...$&&{title:$},...E&&{floating:E},...X&&{outline:X}}}}function S$2(n,t,e){if(t&&e&&n.width&&n.height){const r=O$1(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function extractImagesFromDrawing(n,t){const e=[],r=findChild(n,"wp:inline")||findChild(n,"wp:anchor");if(!r)return e;const i=findChild(r,"wp:extent");let c,l;if(i){const k=i.attributes.cx,R=i.attributes.cy;typeof k=="string"&&(c=C$1(k)),typeof R=="string"&&(l=C$1(R))}const f=findChild(r,"a:graphic");if(!f)return e;const b=findChild(f,"a:graphicData");if(!b)return e;const I=findChild(b,"wpg:wgp");if(I){const k=findChild(I,"wpg:grpSp"),R=k?[...findDeepChildren(k,"pic:pic"),...findDeepChildren(k,"pic")]:[...findDeepChildren(I,"pic:pic"),...findDeepChildren(I,"pic")];for(const P of R){const F=findChild(P,"a:graphic");if(!F){const E=findChild(P,"pic:blipFill")||findDeepChild(P,"a:blipFill");if(!E)continue;const H=findChild(E,"a:blip")||findDeepChild(E,"a:blip");if(!H?.attributes["r:embed"])continue;const X=H.attributes["r:embed"],D=t.images.get(X);if(!D)continue;e.push(S$2(D,c,l));continue}const $={children:[F]},W=await extractImageFromDrawing($,t);if(!W)continue;const z=$.children[0]?.type==="element"?findDeepChild($.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(c&&l&&z){const E=t.images.get(z);if(E?.width&&E?.height){const H=O$1(c,l,E.width,E.height);W.attrs.width=H.width,W.attrs.height=H.height}else W.attrs.width=c,W.attrs.height=l}e.push(W)}}else{const k=await extractImageFromDrawing(n,t);k&&e.push(k)}return e}const p="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function extractHyperlinks(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=findChild(r,"Relationships");if(!i)return t;const c=findDeepChildren(i,"Relationship");for(const l of c)l.attributes.Type===p&&l.attributes.Id&&l.attributes.Target&&t.set(l.attributes.Id,l.attributes.Target);return t}function parseNumberingXml(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const i=xastUtilFromXml.fromXml(new TextDecoder().decode(r)),c=new Map,l=findChild(i,"w:numbering");if(!l)return t;const f=findDeepChildren(l,"w:abstractNum");for(const I of f){const k=I.attributes["w:abstractNumId"],R=findChild(I,"w:lvl");if(!R)continue;const P=findChild(R,"w:numFmt");P?.attributes["w:val"]&&c.set(k,P.attributes["w:val"]);const F=findChild(R,"w:start");F?.attributes["w:val"]&&e.set(k,parseInt(F.attributes["w:val"],10))}const b=findDeepChildren(l,"w:num");for(const I of b){const k=I.attributes["w:numId"],R=findChild(I,"w:abstractNumId");if(!R?.attributes["w:val"])continue;const P=R.attributes["w:val"],F=c.get(P);if(!F)continue;const $=e.get(P);F==="bullet"?t.set(k,{type:"bullet"}):t.set(k,{type:"ordered",...$!==void 0&&{start:$}})}return t}function parseStylesXml(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=findChild(r,"w:styles");if(!i)return t;const c=findDeepChildren(i,"w:style").filter(l=>l.attributes["w:type"]==="paragraph");for(const l of c){const f=l.attributes["w:styleId"];if(!f)continue;const b={styleId:f},I=findChild(l,"w:name");I?.attributes["w:val"]&&(b.name=I.attributes["w:val"]);const k=findChild(l,"w:pPr");if(k){const P=findChild(k,"w:outlineLvl");P?.attributes["w:val"]!==void 0&&(b.outlineLvl=parseInt(P.attributes["w:val"],10))}const R=findChild(l,"w:rPr");if(R){const P={},F=findChild(R,"w:color");if(F?.attributes["w:val"]&&F.attributes["w:val"]!=="auto"){const z=F.attributes["w:val"];P.color=z.startsWith("#")?z:`#${z}`}findChild(R,"w:b")&&(P.bold=!0),findChild(R,"w:i")&&(P.italic=!0),findChild(R,"w:u")&&(P.underline=!0),findChild(R,"w:strike")&&(P.strike=!0);const $=findChild(R,"w:sz");if($?.attributes["w:val"]){const z=$.attributes["w:val"],E=parseInt(z,10);isNaN(E)||(P.fontSize=E)}const W=findChild(R,"w:rFonts");W?.attributes["w:ascii"]&&(P.fontFamily=W.attributes["w:ascii"]),Object.keys(P).length>0&&(b.charFormat=P)}t.set(f,b)}return t}function d(n,t){const e=findChild(n,"w:t");if(!e)return null;const r=e.children.find(c=>c.type==="text");if(!r?.value)return null;const i=extractMarks(n,t);return{type:"text",text:r.value,...i.length&&{marks:i}}}async function extractRuns(n,t){const e=[];for(const r of n.children)if(r.type==="element"){if(r.name==="w:hyperlink"){const i=r,c=i.attributes["r:id"],l=t.hyperlinks.get(c);if(!l)continue;for(const f of i.children){if(f.type!=="element"||f.name!=="w:r")continue;const b=f,I=findDrawingElement(b);if(I){const R=await extractImageFromDrawing(I,t);if(R){e.push(R);continue}const P=await extractImagesFromDrawing(I,t);if(P.length){e.push(...P);continue}}const k=d(b,t.styleInfo);k&&(k.marks=k.marks||[],k.marks.push({type:"link",attrs:{href:l}}),e.push(k))}}else if(r.name==="w:r"){const i=r,c=findDrawingElement(i);if(c){const f=await extractImagesFromDrawing(c,t);if(f.length){e.push(...f);continue}}if(findChild(i,"w:br")){const f=extractMarks(i,t.styleInfo);e.push({type:"hardBreak",...f.length&&{marks:f}})}const l=d(i,t.styleInfo);l&&e.push(l)}}return e}function extractMarks(n,t){const e=[],r=findChild(n,"w:rPr");let i={};if(t?.charFormat&&(i={...t.charFormat}),r){const c=findChild(r,"w:b");c&&(c.attributes["w:val"]==="false"?i.bold=!1:i.bold=!0);const l=findChild(r,"w:i");l&&(l.attributes["w:val"]==="false"?i.italic=!1:i.italic=!0),findChild(r,"w:u")&&(i.underline=!0),findChild(r,"w:strike")&&(i.strike=!0);const f=findChild(r,"w:color");if(f?.attributes["w:val"]&&f.attributes["w:val"]!=="auto"){const P=f.attributes["w:val"];i.color=P.startsWith("#")?P:`#${P}`}const b=findChild(r,"w:sz");if(b?.attributes["w:val"]){const P=b.attributes["w:val"],F=parseInt(P,10);isNaN(F)||(i.fontSize=F)}const I=findChild(r,"w:rFonts");I?.attributes["w:ascii"]&&(i.fontFamily=I.attributes["w:ascii"]);const k=findChild(r,"w:shd");if(k?.attributes["w:fill"]&&k.attributes["w:fill"]!=="auto"){const P=k.attributes["w:fill"];i.backgroundColor=P.startsWith("#")?P:`#${P}`}findChild(r,"w:highlight")&&e.push({type:"highlight"});const R=findChild(r,"w:vertAlign");if(R){const P=R.attributes["w:val"];P==="subscript"?e.push({type:"subscript"}):P==="superscript"&&e.push({type:"superscript"})}}if(i.bold&&e.push({type:"bold"}),i.italic&&e.push({type:"italic"}),i.underline&&e.push({type:"underline"}),i.strike&&e.push({type:"strike"}),i.color||i.backgroundColor||i.fontSize||i.fontFamily){const c={color:i.color||"",backgroundColor:i.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(i.fontSize){const l=Math.round(i.fontSize/1.5*10)/10;c.fontSize=`${l}px`}i.fontFamily&&(c.fontFamily=i.fontFamily),e.push({type:"textStyle",attrs:c})}return e}function extractAlignment(n){const t=findChild(n,"w:pPr");if(!t)return;const e=findChild(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],i={left:"left",right:"right",center:"center",both:"justify"}[r];return i?{textAlign:i}:void 0}function y$2(n){return`${Math.round(n/15)}px`}function m$1(n){const t=findChild(n,"w:pPr");if(!t)return null;const e={},r=findChild(t,"w:ind");if(r){const c=I=>{const k=r.attributes[I];if(typeof k!="string")return null;const R=parseInt(k,10);return isNaN(R)?null:y$2(R)},l=c("w:left");l&&(e.indentLeft=l);const f=c("w:right");f&&(e.indentRight=f);const b=c("w:firstLine");if(b)e.indentFirstLine=b;else{const I=c("w:hanging");I&&(e.indentFirstLine=`-${I}`)}}const i=findChild(t,"w:spacing");if(i){const c=b=>{const I=i.attributes[b];if(typeof I!="string")return null;const k=parseInt(I,10);return isNaN(k)?null:y$2(k)},l=c("w:before");l&&(e.spacingBefore=l);const f=c("w:after");f&&(e.spacingAfter=f)}return Object.keys(e).length?e:null}async function convertParagraph(n,t){const e=findChild(n,"w:pPr"),r=(e&&findChild(e,"w:pStyle"))?.attributes["w:val"];if(r&&t.styleMap){const f=t.styleMap.get(r);if(f?.outlineLvl!==void 0&&f.outlineLvl>=0&&f.outlineLvl<=5){const I=f.outlineLvl+1;return h$1(n,t,f,I)}const b=r.match(/^Heading(\d+)$/);if(b){const I=parseInt(b[1],10);return h$1(n,t,f,I)}}const i=r&&t.styleMap?t.styleMap.get(r):void 0,c=await extractRuns(n,{...t,styleInfo:i}),l={...extractAlignment(n),...m$1(n)};if(w(n)){const f=c.filter(b=>b.type!=="hardBreak");return[{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:f.length?f:void 0},{type:"horizontalRule"}]}if(c.length===1&&c[0].type==="hardBreak"){const f=findChild(n,"w:r");if((f&&findChild(f,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}return c.length===1&&c[0].type==="image"?c[0]:{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:c}}function w(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const i of r.children)i.type==="element"&&e(i)};return e(n),t.some(r=>findChild(r,"w:br")?.attributes["w:type"]==="page")}async function h$1(n,t,e,r){return{type:"heading",attrs:{level:r,...m$1(n)},content:await extractRuns(n,{...t,styleInfo:e})}}function parseBorder(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],i={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},c={};if(r&&r!=="auto"&&(c.color=`#${r}`),e){const l=parseInt(e);isNaN(l)||(c.width=Math.round(l/6))}return t&&i[t]&&(c.style=i[t]),Object.keys(c).length>0?c:null}function parseTableProperties(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=findChild(n,"w:tblPr");if(!e)return null;const r=findChild(e,"w:tblCellMar");if(!r)return null;const i=findChild(r,"w:top");if(i?.attributes["w:w"]){const b=parseInt(i.attributes["w:w"]);isNaN(b)||(t.marginTop=b)}const c=findChild(r,"w:bottom");if(c?.attributes["w:w"]){const b=parseInt(c.attributes["w:w"]);isNaN(b)||(t.marginBottom=b)}const l=findChild(r,"w:left");if(l?.attributes["w:w"]){const b=parseInt(l.attributes["w:w"]);isNaN(b)||(t.marginLeft=b)}const f=findChild(r,"w:right");if(f?.attributes["w:w"]){const b=parseInt(f.attributes["w:w"]);isNaN(b)||(t.marginRight=b)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function parseRowProperties(n){const t={rowHeight:null},e=findChild(n,"w:trPr");if(!e)return t;const r=findChild(e,"w:trHeight");if(r?.attributes["w:val"]){const i=parseInt(r.attributes["w:val"]),c=Math.round(i/15);t.rowHeight=`${c}px`}return t}function parseCellProperties(n){const t={colSpan:1,rowSpan:1,colWidth:null},e=findChild(n,"w:tcPr");if(!e)return t;const r=findChild(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colSpan=parseInt(r.attributes["w:val"])),findChild(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowSpan=0);const i=findChild(e,"w:tcW");if(i?.attributes["w:w"]){const b=parseInt(i.attributes["w:w"]);t.colWidth=Math.round(b/15)}const c=findChild(e,"w:shd");c?.attributes["w:fill"]&&(t.backgroundColor=`#${c.attributes["w:fill"]}`);const l=findChild(e,"w:vAlign");l?.attributes["w:val"]&&(t.verticalAlign=l.attributes["w:val"]);const f=findChild(e,"w:tcBorders");if(f){const b=parseBorder(findChild(f,"w:top"));b&&(t.borderTop=b);const I=parseBorder(findChild(f,"w:bottom"));I&&(t.borderBottom=I);const k=parseBorder(findChild(f,"w:left"));k&&(t.borderLeft=k);const R=parseBorder(findChild(f,"w:right"));R&&(t.borderRight=R)}return t}function isTable(n){return n.name==="w:tbl"}async function convertTable(n,t){const e=[];for(const l of n.children)l.type==="element"&&l.name==="w:tr"&&e.push(l);const r=new Map,i=await Promise.all(e.map((l,f)=>u(l,{...t,activeRowspans:r,rows:e,rowIndex:f}))),c=parseTableProperties(n);return{type:"table",...c&&{attrs:c},content:i}}async function u(n,t){const e=[];let r=0;const i=parseRowProperties(n);for(const c of n.children){if(c.type!=="element"||c.name!=="w:tc")continue;const l=t.activeRowspans.get(r);if(l&&l>0){t.activeRowspans.set(r,l-1),r++;continue}let f=parseCellProperties(c);if(f?.rowSpan===1){const I=y$1({...t,colIndex:r});I>1&&(f={...f,rowSpan:I})}if(f?.rowSpan&&f.rowSpan>1&&t.activeRowspans.set(r,f.rowSpan-1),f?.rowSpan===0){r++;continue}const b=await g(c,t);e.push({type:"tableCell",...f&&{attrs:f},content:b}),r+=f?.colSpan||1}return{type:"tableRow",...i&&{attrs:i},content:e}}function y$1(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const i=n.rows[r];let c=!1;for(const l of i.children){if(l.type!=="element"||l.name!=="w:tc")continue;const f=parseCellProperties(l),b=f?.colSpan||1;if(e>=0&&e<b){if(f?.rowSpan===0)t++,c=!0;else return t;break}e-=b}if(!c)break}return t}async function g(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const i=await convertParagraph(r,t);Array.isArray(i)?e.push(...i):e.push(i)}return e.length?e:[{type:"paragraph",content:[]}]}function isListItem(n){const t=findChild(n,"w:pPr");return!!t&&findChild(t,"w:numPr")!==void 0}function getListInfo(n){const t=findChild(n,"w:pPr"),e=t&&findChild(t,"w:numPr");if(!e)return null;const r=findChild(e,"w:ilvl"),i=findChild(e,"w:numId");return!r||!i?null:{numId:i.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function isCodeBlock(n){const t=findChild(n,"w:pPr"),e=(t&&findChild(t,"w:pStyle"))?.attributes["w:val"];return e==="CodeBlock"||e?.startsWith("Code")||!1}function getCodeBlockLanguage(n){const t=findChild(n,"w:pPr"),e=(t&&findChild(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith("CodeBlock")&&e.replace("CodeBlock","").toLowerCase()||void 0}const x="\u2610",a="\u2611";function m(n){const t=findChild(n,"w:r");if(!t)return null;const e=findChild(t,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");return r?.value&&r||null}function isTaskItem(n){const t=m(n);if(!t)return!1;const e=t.value;return e.startsWith(x)||e.startsWith(a)}function getTaskItemChecked(n){return m(n)?.value.startsWith(a)||!1}function convertTaskItem(n){return{type:"taskItem",attrs:{checked:getTaskItemChecked(n)},content:[h(n)]}}function h(n){const t=[];let e=!1;for(const i of n.children){if(i.type!=="element"||i.name!=="w:r")continue;if(!e){const f=findChild(i,"w:t")?.children.find(b=>b.type==="text");if(f?.value){const b=f.value;if(b.startsWith(x)||b.startsWith(a)){e=!0;const I=b.substring(2).trimStart();I&&t.push({type:"text",text:I});continue}}}const c=y(i),l=findChild(i,"w:t")?.children.find(f=>f.type==="text");if(l?.value){const f={type:"text",text:l.value};c.length&&(f.marks=c),t.push(f)}}const r=extractAlignment(n);return{type:"paragraph",...r&&{attrs:r},content:t.length?t:void 0}}function y(n){const t=[],e=findChild(n,"w:rPr");return e&&(findChild(e,"w:b")&&t.push({type:"bold"}),findChild(e,"w:i")&&t.push({type:"italic"}),findChild(e,"w:u")&&t.push({type:"underline"}),findChild(e,"w:strike")&&t.push({type:"strike"})),t}function isHorizontalRule(n){const t=findChild(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const i of t.children)i.type==="element"&&(i.name==="w:br"&&i.attributes["w:type"]==="page"?e=!0:i.name==="w:t"?i.children.find(c=>c.type==="text")?.value?.trim().length&&(r=!0):i.name!=="w:rPr"&&(r=!0));return e&&!r}const O=n=>{const t=[],e=findDeepChildren(n,"w:r");for(const r of e){const i=findChild(r,"w:t");if(!i)continue;const c=i.children.find(l=>l.type==="text");c&&"value"in c&&c.value&&t.push({type:"text",text:c.value})}return t},S$1=async(n,t,e)=>{const r=await convertTable(n[t],{hyperlinks:e.hyperlinks,images:e.images,options:e.options,styleMap:e.styleMap});let i=1;return t+1<n.length&&n[t+1].name==="w:p"&&C(n[t+1])&&i++,{nodes:[r],consumed:i}},J=async(n,t)=>{const e=[];let r=t;for(;r<n.length;){const i=n[r];if(i.name!=="w:p"||!isCodeBlock(i))break;const c=getCodeBlockLanguage(i),l={type:"codeBlock",...c&&{attrs:{language:c}},content:O(i)};e.push(l),r++}return{nodes:e,consumed:r-t}},M=async(n,t,e)=>{const{listTypeMap:r}=e,i=[];let c=t;for(;c<n.length;){const l=n[c];if(l.name!=="w:p"||!isListItem(l))break;const f=getListInfo(l);if(!f)break;const b=r.get(f.numId),I=b?.type||"bullet",k=[];for(;c<n.length;){const P=n[c];if(P.name!=="w:p"||!isListItem(P))break;const F=getListInfo(P);if(!F||F.numId!==f.numId)break;const $=await convertParagraph(P,e),W=Array.isArray($)?$[0]:$;k.push({type:"listItem",content:[W]}),c++}const R={type:I==="bullet"?"bulletList":"orderedList",content:k};I==="ordered"&&(R.attrs={type:null,...b?.start!==void 0&&{start:b.start}}),i.push(R)}return{nodes:i,consumed:c-t}},T=async(n,t)=>{const e=[];let r=t;for(;r<n.length;){const i=n[r];if(i.name!=="w:p"||!isTaskItem(i))break;const{convertTaskItem:c}=await import("./chunks/index.cjs"),l=c(i);e.push(l),r++}return{nodes:[{type:"taskList",content:e}],consumed:r-t}},v=async()=>({nodes:[{type:"horizontalRule"}],consumed:1}),L=async(n,t,e)=>{const r=await convertParagraph(n[t],e);return Array.isArray(r)?{nodes:r,consumed:1}:{nodes:[r],consumed:1}},C=n=>{const t=findDeepChildren(n,"w:r");for(const e of t){const r=findChild(e,"w:t");if(r){const c=r.children.find(l=>l.type==="text");if(c&&"value"in c&&c.value&&c.value.trim().length>0)return!1}if(findChild(e,"w:drawing")||findChild(e,"mc:AlternateContent")||findChild(e,"w:pict"))return!1;const i=findChild(e,"w:br");if(i&&i.attributes["w:type"]==="page")return!1}return!0},A=n=>n.name==="w:tbl"?S$1:n.name==="w:p"?isCodeBlock(n)?J:isTaskItem(n)?T:isListItem(n)?M:isHorizontalRule(n)?v:L:null,processElements=async(n,t)=>{const e=[];let r=0;for(;r<n.length;){const i=n[r],c=A(i);if(!c){r++;continue}if(i.name==="w:p"&&t.ignoreEmptyParagraphs&&C(i)){r++;continue}const{nodes:l,consumed:f}=await c(n,r,t);e.push(...l),r+=f}return e};async function parseDOCX(n,t={}){const{ignoreEmptyParagraphs:e=!1}=t,r=await undio.toUint8Array(n),i=fflate.unzipSync(r),c=extractHyperlinks(i),l=extractImages(i),f=i["word/document.xml"];if(!f)throw new Error("Invalid DOCX file: missing word/document.xml");const b=xastUtilFromXml.fromXml(new TextDecoder().decode(f)),I=parseNumberingXml(i),k=parseStylesXml(i);return await S(b,l,c,I,k,e,t)}async function S(n,t,e,r,i,c,l){if(n.type!=="root")return{type:"doc",content:[]};const f=findChild(n,"w:document");if(!f)return{type:"doc",content:[]};const b=findChild(f,"w:body");if(!b)return{type:"doc",content:[]};const I={hyperlinks:e,images:t,listTypeMap:r,styleMap:i,ignoreEmptyParagraphs:c,options:l};return{type:"doc",content:await processElements(b.children.filter(k=>k.type==="element"),I)}}exports.convertParagraph=convertParagraph,exports.convertTable=convertTable,exports.convertTaskItem=convertTaskItem,exports.extractAlignment=extractAlignment,exports.extractMarks=extractMarks,exports.extractRuns=extractRuns,exports.getCodeBlockLanguage=getCodeBlockLanguage,exports.getListInfo=getListInfo,exports.getTaskItemChecked=getTaskItemChecked,exports.isCodeBlock=isCodeBlock,exports.isHorizontalRule=isHorizontalRule,exports.isListItem=isListItem,exports.isTable=isTable,exports.isTaskItem=isTaskItem,exports.parseDOCX=parseDOCX;
|
|
1
|
+
"use strict";const xastUtilFromXml=require("xast-util-from-xml"),fflate=require("fflate"),undio=require("undio"),imageMeta=require("image-meta"),C=.6666666666666666,g={CODE_BLOCK:"CodeBlock",CODE_PREFIX:"Code"},_$1={docxToTipTap:{left:"left",right:"right",center:"center",both:"justify"}},o$1=96;function s$1(n){return Math.round(n*o$1/1440)}function A$1(n){return`${s$1(n)}px`}function c(n){return Math.round(n/(914400/o$1))}function I(n){const t=parseInt(n,10);if(!isNaN(t))return c(t)}function S$1(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"&&e.name===t)return e;return null}function F(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"){if(e.name===t)return e;const r=F(e,t);if(r)return r}return null}function f(n,t){const e=[];if(!n.children)return e;for(const r of n.children)r.type==="element"&&(r.name===t&&e.push(r),e.push(...f(r,t)));return e}function H$1(n,t){const e=n[t];if(!e)return;const r=parseInt(e,10);if(!isNaN(r))return e}function b$2(n){return t=>n.includes(t)}const s="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function uint8ArrayToBase64(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let i=0;for(let a=0;a<t;a+=3){const v=n[a],k=a+1<t?n[a+1]:0,T=a+2<t?n[a+2]:0,M=v>>2,O=(v&3)<<4|k>>4,E=(k&15)<<2|T>>6,$=T&63;r[i++]=s[M],r[i++]=s[O],r[i++]=a+1<t?s[E]:"=",r[i++]=a+2<t?s[$]:"="}return r.join("")}function base64ToUint8Array(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const isNode=globalThis.process?.release?.name==="node",isBrowser=typeof window<"u";async function h(n){const t=await n;return t.default||t}let o,u$1=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class DOMCanvasFactory extends u$1{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class NodeCanvasFactory extends u$1{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!o)throw new Error("@napi-rs/canvas module is not resolved");return o.createCanvas(t,e)}}async function resolveCanvasModule(n){o??=await h(n())}async function createCanvasFactory(n){if(isBrowser)return DOMCanvasFactory;if(isNode){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await resolveCanvasModule(n),NodeCanvasFactory}throw new Error("Unsupported environment for canvas operations")}async function cropImageIfNeeded(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await createCanvasFactory(e.canvasImport),i=await w$1(n,r),a=(t.left||0)/1e5*i.width,v=(t.top||0)/1e5*i.height,k=(t.right||0)/1e5*i.width,T=(t.bottom||0)/1e5*i.height,M=Math.round(i.width-a-k),O=Math.round(i.height-v-T);if(M<=0||O<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const E=new r().create(M,O);if(!E.context)throw new Error("Failed to get 2D context from canvas");E.context.drawImage(i,a,v,M,O,0,0,M,O);const $=E.canvas.toDataURL(),W=await(await fetch($)).arrayBuffer();return new Uint8Array(W)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function w$1(n,t){if(isBrowser){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const i=new Image;return new Promise((a,v)=>{i.onload=()=>{URL.revokeObjectURL(r),a(i)},i.onerror=()=>{URL.revokeObjectURL(r),v(new Error("Failed to load image"))},i.src=r})}catch(i){throw URL.revokeObjectURL(r),i}}else{if(!o)throw new Error("@napi-rs/canvas module is not resolved");return await o.loadImage(Buffer.from(n))}}const G="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",S=b$2(["left","right","center","inside","outside"]),X$1=b$2(["top","bottom","center","inside","outside"]),_=b$2(["page","character","column","margin","leftMargin","rightMargin","insideMargin","outsideMargin"]),j=b$2(["page","paragraph","margin","topMargin","bottomMargin","insideMargin","outsideMargin","line"]);function B$1(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,i=n.attributes.b;if(!(!t&&!e&&!r&&!i))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:i?parseInt(i,10):void 0}}function U(n){const t=S$1(n,"wp:align"),e=S$1(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const a=t.children[0].value;S(a)&&(r=a)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function H(n){const t=S$1(n,"wp:align"),e=S$1(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const a=t.children[0].value;X$1(a)&&(r=a)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function findDrawingElement(n){let t=S$1(n,"w:drawing");if(t)return t;const e=S$1(n,"mc:AlternateContent"),r=e&&S$1(e,"mc:Choice");return r?S$1(r,"w:drawing"):null}function N(n,t,e,r){const i=e/r,a=n/t;return Math.abs(i-a)>.1?i>a?{width:n,height:Math.round(n/i)}:{width:Math.round(t*i),height:t}:{width:n,height:t}}async function extractImages(n,t){const e=new Map,r=n["word/_rels/document.xml.rels"];if(!r)return e;const i=xastUtilFromXml.fromXml(new TextDecoder().decode(r)),a=S$1(i,"Relationships");if(!a)return e;const v=f(a,"Relationship");for(const k of v)if(k.attributes.Type===G&&k.attributes.Id&&k.attributes.Target){const T="word/"+k.attributes.Target,M=n[T];if(!M)continue;let O,E,$="png";try{const q=imageMeta.imageMeta(M);O=q.width,E=q.height,q.type&&($=q.type)}catch{}let W;if(t)W=(await t({id:k.attributes.Id,contentType:`image/${$}`,data:M})).src;else{const q=uint8ArrayToBase64(M);W=`data:image/${$};base64,${q}`}e.set(k.attributes.Id,{src:W,width:O,height:E})}return e}async function extractImageFromDrawing(n,t){const{context:e}=t,r=F(n,"a:blip");if(!r?.attributes["r:embed"])return null;const i=r.attributes["r:embed"],a=e.images.get(i);if(!a)return null;let v=a.src;const k=F(n,"a:srcRect");if(k){const K=B$1(k);if(K&&v.startsWith("data:")){const[V,tt]=v.split(",");if(tt){const et=base64ToUint8Array(tt);try{const nt=await cropImageIfNeeded(et,K,{canvasImport:e.image?.canvasImport,enabled:e.image?.enableImageCrop??!1}),rt=uint8ArrayToBase64(nt);v=`${V},${rt}`}catch(nt){console.warn("Image cropping failed, using original image:",nt)}}}}const T=F(n,"wp:extent");let M,O;if(T){const K=T.attributes.cx,V=T.attributes.cy;typeof K=="string"&&(M=I(K)),typeof V=="string"&&(O=I(V))}const E=F(n,"a:xfrm");let $;if(E?.attributes.rot){const K=parseInt(E.attributes.rot,10);isNaN(K)||($=K/6e4)}const W=F(n,"wp:docPr")?.attributes.title,q=F(n,"wp:positionH"),J=F(n,"wp:positionV");let Y;if(q||J){const K=q?U(q):void 0,V=J?H(J):void 0,tt=q?.attributes.relativeFrom,et=J?.attributes.relativeFrom,nt=typeof tt=="string"&&_(tt)?tt:"page",rt=typeof et=="string"&&j(et)?et:"page";Y={horizontalPosition:{relative:nt,...K?.align&&{align:K.align},...K?.offset!==void 0&&{offset:K.offset}},verticalPosition:{relative:rt,...V?.align&&{align:V.align},...V?.offset!==void 0&&{offset:V.offset}}}}const Q=F(n,"pic:spPr");let Z;if(Q){const K=F(Q,"a:ln"),V=K&&F(K,"a:solidFill"),tt=V&&F(V,"a:srgbClr");tt?.attributes.val&&(Z={type:"solidFill",solidFillType:"rgb",value:tt.attributes.val})}return{type:"image",attrs:{src:v,alt:"",...M!==void 0&&{width:M},...O!==void 0&&{height:O},...$!==void 0&&{rotation:$},...W&&{title:W},...Y&&{floating:Y},...Z&&{outline:Z}}}}function L$1(n,t,e){if(t&&e&&n.width&&n.height){const r=N(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function extractImagesFromDrawing(n,t){const e=[],r=S$1(n,"wp:inline")||S$1(n,"wp:anchor");if(!r)return e;const i=S$1(r,"wp:extent");let a,v;if(i){const O=i.attributes.cx,E=i.attributes.cy;typeof O=="string"&&(a=I(O)),typeof E=="string"&&(v=I(E))}const k=S$1(r,"a:graphic");if(!k)return e;const T=S$1(k,"a:graphicData");if(!T)return e;const M=S$1(T,"wpg:wgp");if(M){const O=S$1(M,"wpg:grpSp"),E=O?[...f(O,"pic:pic"),...f(O,"pic")]:[...f(M,"pic:pic"),...f(M,"pic")];for(const $ of E){const W=S$1($,"a:graphic");if(!W){const Q=S$1($,"pic:blipFill")||F($,"a:blipFill");if(!Q)continue;const Z=S$1(Q,"a:blip")||F(Q,"a:blip");if(!Z?.attributes["r:embed"])continue;const K=Z.attributes["r:embed"],V=t.context.images.get(K);if(!V)continue;e.push(L$1(V,a,v));continue}const q={children:[W]},J=await extractImageFromDrawing(q,t);if(!J)continue;const Y=q.children[0]?.type==="element"?F(q.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(a&&v&&Y){const Q=t.context.images.get(Y);if(Q?.width&&Q?.height){const Z=N(a,v,Q.width,Q.height);J.attrs.width=Z.width,J.attrs.height=Z.height}else J.attrs.width=a,J.attrs.height=v}e.push(J)}}else{const O=await extractImageFromDrawing(n,t);O&&e.push(O)}return e}const p="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function extractHyperlinks(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=S$1(r,"Relationships");if(!i)return t;const a=f(i,"Relationship");for(const v of a)v.attributes.Type===p&&v.attributes.Id&&v.attributes.Target&&t.set(v.attributes.Id,v.attributes.Target);return t}function parseNumberingXml(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const i=xastUtilFromXml.fromXml(new TextDecoder().decode(r)),a=new Map,v=S$1(i,"w:numbering");if(!v)return t;const k=f(v,"w:abstractNum");for(const M of k){const O=M.attributes["w:abstractNumId"],E=S$1(M,"w:lvl");if(!E)continue;const $=S$1(E,"w:numFmt");$?.attributes["w:val"]&&a.set(O,$.attributes["w:val"]);const W=S$1(E,"w:start");W?.attributes["w:val"]&&e.set(O,parseInt(W.attributes["w:val"],10))}const T=f(v,"w:num");for(const M of T){const O=M.attributes["w:numId"],E=S$1(M,"w:abstractNumId");if(!E?.attributes["w:val"])continue;const $=E.attributes["w:val"],W=a.get($);if(!W)continue;const q=e.get($);W==="bullet"?t.set(O,{type:"bullet"}):t.set(O,{type:"ordered",...q!==void 0&&{start:q}})}return t}function parseStylesXml(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=xastUtilFromXml.fromXml(new TextDecoder().decode(e)),i=S$1(r,"w:styles");if(!i)return t;const a=f(i,"w:style").filter(v=>v.attributes["w:type"]==="paragraph");for(const v of a){const k=v.attributes["w:styleId"];if(!k)continue;const T={styleId:k},M=S$1(v,"w:name");M?.attributes["w:val"]&&(T.name=M.attributes["w:val"]);const O=S$1(v,"w:pPr");if(O){const $=S$1(O,"w:outlineLvl");$?.attributes["w:val"]!==void 0&&(T.outlineLvl=parseInt($.attributes["w:val"],10))}const E=S$1(v,"w:rPr");if(E){const $={},W=S$1(E,"w:color");if(W?.attributes["w:val"]&&W.attributes["w:val"]!=="auto"){const Y=W.attributes["w:val"];$.color=Y.startsWith("#")?Y:`#${Y}`}S$1(E,"w:b")&&($.bold=!0),S$1(E,"w:i")&&($.italic=!0),S$1(E,"w:u")&&($.underline=!0),S$1(E,"w:strike")&&($.strike=!0);const q=S$1(E,"w:sz");if(q?.attributes["w:val"]){const Y=q.attributes["w:val"],Q=parseInt(Y,10);isNaN(Q)||($.fontSize=Q)}const J=S$1(E,"w:rFonts");J?.attributes["w:ascii"]&&($.fontFamily=J.attributes["w:ascii"]),Object.keys($).length>0&&(T.charFormat=$)}t.set(k,T)}return t}function w(n,t){const e=S$1(n,"w:t");if(!e)return null;const r=e.children.find(a=>a.type==="text");if(!r?.value)return null;const i=extractMarks(n,t);return{type:"text",text:r.value,...i.length&&{marks:i}}}async function extractRuns(n,t){const{context:e,styleInfo:r}=t,i=[];for(const a of n.children)if(a.type==="element"){if(a.name==="w:hyperlink"){const v=a,k=v.attributes["r:id"],T=e.hyperlinks.get(k);if(!T)continue;for(const M of v.children){if(M.type!=="element"||M.name!=="w:r")continue;const O=M,E=findDrawingElement(O);if(E){const W=await extractImageFromDrawing(E,{context:e});if(W){i.push(W);continue}const q=await extractImagesFromDrawing(E,{context:e});if(q.length){i.push(...q);continue}}const $=w(O,r);$&&($.marks=$.marks||[],$.marks.push({type:"link",attrs:{href:T}}),i.push($))}}else if(a.name==="w:r"){const v=a,k=findDrawingElement(v);if(k){const M=await extractImagesFromDrawing(k,{context:e});if(M.length){i.push(...M);continue}}if(S$1(v,"w:br")){const M=extractMarks(v,r);i.push({type:"hardBreak",...M.length&&{marks:M}})}const T=w(v,r);T&&i.push(T)}}return i}function extractMarks(n,t){const e=[],r=S$1(n,"w:rPr");let i={};if(t?.charFormat&&(i={...t.charFormat}),r){const a=S$1(r,"w:b");a&&(a.attributes["w:val"]==="false"?i.bold=!1:i.bold=!0);const v=S$1(r,"w:i");v&&(v.attributes["w:val"]==="false"?i.italic=!1:i.italic=!0),S$1(r,"w:u")&&(i.underline=!0),S$1(r,"w:strike")&&(i.strike=!0);const k=S$1(r,"w:color");if(k?.attributes["w:val"]&&k.attributes["w:val"]!=="auto"){const $=k.attributes["w:val"];i.color=$.startsWith("#")?$:`#${$}`}const T=S$1(r,"w:sz");if(T?.attributes["w:val"]){const $=T.attributes["w:val"],W=parseInt($,10);isNaN(W)||(i.fontSize=W)}const M=S$1(r,"w:rFonts");M?.attributes["w:ascii"]&&(i.fontFamily=M.attributes["w:ascii"]);const O=S$1(r,"w:shd");if(O?.attributes["w:fill"]&&O.attributes["w:fill"]!=="auto"){const $=O.attributes["w:fill"];i.backgroundColor=$.startsWith("#")?$:`#${$}`}S$1(r,"w:highlight")&&e.push({type:"highlight"});const E=S$1(r,"w:vertAlign");if(E){const $=E.attributes["w:val"];$==="subscript"?e.push({type:"subscript"}):$==="superscript"&&e.push({type:"superscript"})}}if(i.bold&&e.push({type:"bold"}),i.italic&&e.push({type:"italic"}),i.underline&&e.push({type:"underline"}),i.strike&&e.push({type:"strike"}),i.color||i.backgroundColor||i.fontSize||i.fontFamily){const a={color:i.color||"",backgroundColor:i.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(i.fontSize){const v=Math.round(i.fontSize*C*10)/10;a.fontSize=`${v}px`}i.fontFamily&&(a.fontFamily=i.fontFamily),e.push({type:"textStyle",attrs:a})}return e}function extractAlignment(n){const t=S$1(n,"w:pPr");if(!t)return;const e=S$1(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],i=_$1.docxToTipTap[r];return i?{textAlign:i}:void 0}function d(n){const t=S$1(n,"w:pPr");if(!t)return null;const e={},r=S$1(t,"w:ind");if(r){const a=H$1(r.attributes,"w:left");if(a){const T=parseInt(a,10);e.indentLeft=A$1(T)}const v=H$1(r.attributes,"w:right");if(v){const T=parseInt(v,10);e.indentRight=A$1(T)}const k=H$1(r.attributes,"w:firstLine");if(k){const T=parseInt(k,10);e.indentFirstLine=A$1(T)}else{const T=H$1(r.attributes,"w:hanging");if(T){const M=a?parseInt(a,10):0,O=parseInt(T,10),E=M-O;e.indentFirstLine=A$1(E)}}}const i=S$1(t,"w:spacing");if(i){const a=H$1(i.attributes,"w:before");if(a){const k=parseInt(a,10);e.spacingBefore=A$1(k)}const v=H$1(i.attributes,"w:after");if(v){const k=parseInt(v,10);e.spacingAfter=A$1(k)}}return Object.keys(e).length?e:null}async function convertParagraph(n,t){const{context:e,styleInfo:r}=t,i=S$1(n,"w:pPr"),a=(i&&S$1(i,"w:pStyle"))?.attributes["w:val"];if(a&&e.styleMap){const M=e.styleMap.get(a);if(M?.outlineLvl!==void 0&&M.outlineLvl>=0&&M.outlineLvl<=5){const E=M.outlineLvl+1;return b$1(n,t,M,E)}const O=a.match(/^Heading(\d+)$/);if(O){const E=parseInt(O[1],10);return b$1(n,t,M,E)}}const v=a&&e.styleMap?e.styleMap.get(a):void 0,k=await extractRuns(n,{context:e,styleInfo:r||v}),T={...extractAlignment(n),...d(n)};if(P(n)){const M=k.filter(O=>O.type!=="hardBreak");return[{type:"paragraph",...Object.keys(T).length&&{attrs:T},content:M.length?M:void 0},{type:"horizontalRule"}]}if(k.length===1&&k[0].type==="hardBreak"){const M=S$1(n,"w:r");if((M&&S$1(M,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}if(k.length===1&&k[0].type==="image"){const M=k[0];return{type:"paragraph",...Object.keys(T).length&&{attrs:T},content:[M]}}return{type:"paragraph",...Object.keys(T).length&&{attrs:T},content:k}}function P(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const i of r.children)i.type==="element"&&e(i)};return e(n),t.some(r=>S$1(r,"w:br")?.attributes["w:type"]==="page")}async function b$1(n,t,e,r){return{type:"heading",attrs:{level:r,...d(n)},content:await extractRuns(n,{context:t.context,styleInfo:e})}}function parseBorder(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],i={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},a={};if(r&&r!=="auto"&&(a.color=`#${r}`),e){const v=parseInt(e);isNaN(v)||(a.width=Math.round(v/6))}return t&&i[t]&&(a.style=i[t]),Object.keys(a).length>0?a:null}function parseTableProperties(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=S$1(n,"w:tblPr");if(!e)return null;const r=S$1(e,"w:tblCellMar");if(!r)return null;const i=S$1(r,"w:top");if(i?.attributes["w:w"]){const T=parseInt(i.attributes["w:w"]);isNaN(T)||(t.marginTop=T)}const a=S$1(r,"w:bottom");if(a?.attributes["w:w"]){const T=parseInt(a.attributes["w:w"]);isNaN(T)||(t.marginBottom=T)}const v=S$1(r,"w:left");if(v?.attributes["w:w"]){const T=parseInt(v.attributes["w:w"]);isNaN(T)||(t.marginLeft=T)}const k=S$1(r,"w:right");if(k?.attributes["w:w"]){const T=parseInt(k.attributes["w:w"]);isNaN(T)||(t.marginRight=T)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function parseRowProperties(n){const t={rowHeight:null},e=S$1(n,"w:trPr");if(!e)return t;const r=S$1(e,"w:trHeight");if(r?.attributes["w:val"]){const i=parseInt(r.attributes["w:val"]),a=s$1(i);t.rowHeight=`${a}px`}return t}function parseCellProperties(n){const t={colspan:1,rowspan:1,colwidth:null},e=S$1(n,"w:tcPr");if(!e)return t;const r=S$1(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colspan=parseInt(r.attributes["w:val"])),S$1(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowspan=0);const i=S$1(e,"w:tcW");if(i?.attributes["w:w"]){const T=parseInt(i.attributes["w:w"]),M=s$1(T);t.colwidth=[M]}const a=S$1(e,"w:shd");a?.attributes["w:fill"]&&(t.backgroundColor=`#${a.attributes["w:fill"]}`);const v=S$1(e,"w:vAlign");v?.attributes["w:val"]&&(t.verticalAlign=v.attributes["w:val"]);const k=S$1(e,"w:tcBorders");if(k){const T=parseBorder(S$1(k,"w:top"));T&&(t.borderTop=T);const M=parseBorder(S$1(k,"w:bottom"));M&&(t.borderBottom=M);const O=parseBorder(S$1(k,"w:left"));O&&(t.borderLeft=O);const E=parseBorder(S$1(k,"w:right"));E&&(t.borderRight=E)}return t}function isTable(n){return n.name==="w:tbl"}async function convertTable(n,t){const e=[];for(const v of n.children)v.type==="element"&&v.name==="w:tr"&&e.push(v);const r=new Map,i=await Promise.all(e.map((v,k)=>u(v,{context:t.context,activeRowspans:r,rows:e,rowIndex:k}))),a=parseTableProperties(n);return{type:"table",...a&&{attrs:a},content:i}}async function u(n,t){const e=[];let r=0;const i=parseRowProperties(n);for(const a of n.children){if(a.type!=="element"||a.name!=="w:tc")continue;const v=t.activeRowspans.get(r);if(v&&v>0){t.activeRowspans.set(r,v-1),r++;continue}let k=parseCellProperties(a);if(k?.rowspan===1){const M=x$1({rows:t.rows,rowIndex:t.rowIndex,colIndex:r});M>1&&(k={...k,rowspan:M})}if(k?.rowspan&&k.rowspan>1&&t.activeRowspans.set(r,k.rowspan-1),k?.rowspan===0){r++;continue}const T=await b(a,t);e.push({type:"tableCell",...k&&{attrs:k},content:T}),r+=k?.colspan||1}return{type:"tableRow",...i&&{attrs:i},content:e}}function x$1(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const i=n.rows[r];let a=!1;for(const v of i.children){if(v.type!=="element"||v.name!=="w:tc")continue;const k=parseCellProperties(v),T=k?.colspan||1;if(e>=0&&e<T){if(k?.rowspan===0)t++,a=!0;else return t;break}e-=T}if(!a)break}return t}async function b(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const i=await convertParagraph(r,t);Array.isArray(i)?e.push(...i):e.push(i)}return e.length?e:[{type:"paragraph",content:[]}]}const m="\u2610",l="\u2611";function x(n){const t=S$1(n,"w:r");if(!t)return null;const e=S$1(t,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");return r?.value&&r||null}function isTaskItem(n){const t=x(n);if(!t)return!1;const e=t.value;return e.startsWith(m)||e.startsWith(l)}function getTaskItemChecked(n){return x(n)?.value.startsWith(l)||!1}async function convertTaskItem(n,t){return{type:"taskItem",attrs:{checked:getTaskItemChecked(n)},content:[await y(n,t)]}}async function convertTaskList(n,t){const{siblings:e,index:r,processedIndices:i}=t,a=[];let v=r;for(;v<e.length;){const k=e[v];if(k.name!=="w:p"||!isTaskItem(k))break;i.add(v);const T=await convertTaskItem(k,{context:t.context,styleInfo:t.styleInfo});a.push(T),v++}return{type:"taskList",content:a}}async function y(n,t){const{context:e,styleInfo:r}=t,i=await extractRuns(n,{context:e,styleInfo:r});if(i.length>0&&i[0].type==="text"){const v=i[0],k=v.text;if(k.startsWith(m)||k.startsWith(l)){const T=k.substring(2).trimStart();T?v.text=T:i.shift()}}const a=extractAlignment(n);return{type:"paragraph",...a&&{attrs:a},content:i.length?i:void 0}}function isCodeBlock(n){const t=S$1(n,"w:pPr"),e=(t&&S$1(t,"w:pStyle"))?.attributes["w:val"];return e===g.CODE_BLOCK||e?.startsWith(g.CODE_PREFIX)||!1}function getCodeBlockLanguage(n){const t=S$1(n,"w:pPr"),e=(t&&S$1(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith(g.CODE_BLOCK)&&e.replace(g.CODE_BLOCK,"").toLowerCase()||void 0}function isListItem(n){const t=S$1(n,"w:pPr");return!!t&&S$1(t,"w:numPr")!==void 0}function getListInfo(n){const t=S$1(n,"w:pPr"),e=t&&S$1(t,"w:numPr");if(!e)return null;const r=S$1(e,"w:ilvl"),i=S$1(e,"w:numId");return!r||!i?null:{numId:i.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function isHorizontalRule(n){const t=S$1(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const i of t.children)i.type==="element"&&(i.name==="w:br"&&i.attributes["w:type"]==="page"?e=!0:i.name==="w:t"?i.children.find(a=>a.type==="text")?.value?.trim().length&&(r=!0):i.name!=="w:rPr"&&(r=!0));return e&&!r}async function parseDOCX(n,t={}){const e=await undio.toUint8Array(n),r=fflate.unzipSync(e),i=extractHyperlinks(r),a=await extractImages(r,t.image?.handler),v=r["word/document.xml"];if(!v)throw new Error("Invalid DOCX file: missing word/document.xml");const k=xastUtilFromXml.fromXml(new TextDecoder().decode(v)),T=parseNumberingXml(r),M=parseStylesXml(r),O={...t,hyperlinks:i,images:a,listTypeMap:T,styleMap:M};return await D(k,{context:O})}async function D(n,t){if(n.type!=="root")return{type:"doc",content:[]};const e=S$1(n,"w:document");if(!e)return{type:"doc",content:[]};const r=S$1(e,"w:body");return r?{type:"doc",content:await A(r.children.filter(i=>i.type==="element"),t)}:{type:"doc",content:[]}}async function A(n,t){const e=[],r=new Set;for(let i=0;i<n.length;i++){if(r.has(i))continue;const a=n[i];if(t.context.ignoreEmptyParagraphs&&a.name==="w:p"&&z(a))continue;const v=await L(a,n,i,t,r);Array.isArray(v)?e.push(...v):v&&e.push(v)}return e}async function L(n,t,e,r,i){switch(n.name){case"w:tbl":return await convertTable(n,r);case"w:p":return isCodeBlock(n)?await X(n):isTaskItem(n)?await convertTaskList(n,{...r,siblings:t,index:e,processedIndices:i}):isListItem(n)?await B(n,t,e,r,i):isHorizontalRule(n)?{type:"horizontalRule"}:await convertParagraph(n,r);default:return null}}async function X(n){const t=getCodeBlockLanguage(n),e=R(n);return{type:"codeBlock",...t&&{attrs:{language:t}},content:e}}async function B(n,t,e,r,i){const a=getListInfo(n);if(!a)return await convertParagraph(n,r);const v=r.context.listTypeMap.get(a.numId),k=v?.type||"bullet",T=[];let M=e;for(;M<t.length;){const E=t[M];if(E.name!=="w:p"||!isListItem(E))break;const $=getListInfo(E);if(!$||$.numId!==a.numId)break;i.add(M);const W=await convertParagraph(E,r),q=Array.isArray(W)?W[0]:W;T.push({type:"listItem",content:[q]}),M++}const O={type:k==="bullet"?"bulletList":"orderedList",content:T};return k==="ordered"&&(O.attrs={type:null,...v?.start!==void 0&&{start:v.start}}),O}function R(n){const t=[],e=f(n,"w:r");for(const r of e){const i=S$1(r,"w:t");if(!i)continue;const a=i.children.find(v=>v.type==="text");a&&"value"in a&&a.value&&t.push({type:"text",text:a.value})}return t}function z(n){const t=f(n,"w:r");for(const e of t){const r=S$1(e,"w:t");if(r){const a=r.children.find(v=>v.type==="text");if(a&&"value"in a&&a.value&&a.value.trim().length>0)return!1}if(S$1(e,"w:drawing")||S$1(e,"mc:AlternateContent")||S$1(e,"w:pict"))return!1;const i=S$1(e,"w:br");if(i&&i.attributes["w:type"]==="page")return!1}return!0}exports.convertParagraph=convertParagraph,exports.convertTable=convertTable,exports.convertTaskItem=convertTaskItem,exports.convertTaskList=convertTaskList,exports.extractAlignment=extractAlignment,exports.extractMarks=extractMarks,exports.extractRuns=extractRuns,exports.getCodeBlockLanguage=getCodeBlockLanguage,exports.getListInfo=getListInfo,exports.getTaskItemChecked=getTaskItemChecked,exports.isCodeBlock=isCodeBlock,exports.isHorizontalRule=isHorizontalRule,exports.isListItem=isListItem,exports.isTable=isTable,exports.isTaskItem=isTaskItem,exports.parseDOCX=parseDOCX;
|
package/dist/index.d.cts
CHANGED
|
@@ -2029,6 +2029,16 @@ declare class Transform {
|
|
|
2029
2029
|
*/
|
|
2030
2030
|
get docChanged(): boolean;
|
|
2031
2031
|
/**
|
|
2032
|
+
Return a single range, in post-transform document positions,
|
|
2033
|
+
that covers all content changed by this transform. Returns null
|
|
2034
|
+
if no replacements are made. Note that this will ignore changes
|
|
2035
|
+
that add/remove marks without replacing the underlying content.
|
|
2036
|
+
*/
|
|
2037
|
+
changedRange(): {
|
|
2038
|
+
from: number;
|
|
2039
|
+
to: number;
|
|
2040
|
+
} | null;
|
|
2041
|
+
/**
|
|
2032
2042
|
Replace the part of the document between `from` and `to` with the
|
|
2033
2043
|
given `slice`.
|
|
2034
2044
|
*/
|
|
@@ -6567,58 +6577,43 @@ interface Commands<ReturnType = any> {
|
|
|
6567
6577
|
interface Storage {
|
|
6568
6578
|
}
|
|
6569
6579
|
|
|
6570
|
-
/**
|
|
6571
|
-
* DOCX image information for custom converter
|
|
6572
|
-
*/
|
|
6573
6580
|
interface DocxImageInfo {
|
|
6574
|
-
/** Image ID (relationship ID in DOCX) */
|
|
6575
6581
|
id: string;
|
|
6576
|
-
/** Content type (e.g., "image/png", "image/jpeg") */
|
|
6577
6582
|
contentType: string;
|
|
6578
|
-
/** Raw image data */
|
|
6579
6583
|
data: Uint8Array;
|
|
6580
6584
|
}
|
|
6581
|
-
/**
|
|
6582
|
-
* Result of image conversion
|
|
6583
|
-
*/
|
|
6584
6585
|
interface DocxImageResult {
|
|
6585
|
-
/** Image src attribute value (URL or data URL) */
|
|
6586
6586
|
src: string;
|
|
6587
|
-
/** Optional alt text */
|
|
6588
6587
|
alt?: string;
|
|
6589
6588
|
}
|
|
6589
|
+
type DocxImageImportHandler = (info: DocxImageInfo) => Promise<DocxImageResult>;
|
|
6590
6590
|
/**
|
|
6591
|
-
*
|
|
6591
|
+
* List information extracted from numbering.xml
|
|
6592
6592
|
*/
|
|
6593
|
-
|
|
6594
|
-
|
|
6593
|
+
interface ListInfo {
|
|
6594
|
+
type: "bullet" | "ordered";
|
|
6595
|
+
start?: number;
|
|
6596
|
+
}
|
|
6597
|
+
/**
|
|
6598
|
+
* Map of numbering ID to list information
|
|
6599
|
+
*/
|
|
6600
|
+
type ListTypeMap = Map<string, ListInfo>;
|
|
6595
6601
|
/**
|
|
6596
|
-
*
|
|
6602
|
+
* Image information with dimensions (for round-trip conversion)
|
|
6597
6603
|
*/
|
|
6604
|
+
interface ImageInfo {
|
|
6605
|
+
src: string;
|
|
6606
|
+
width?: number;
|
|
6607
|
+
height?: number;
|
|
6608
|
+
}
|
|
6609
|
+
|
|
6598
6610
|
interface DocxImportOptions {
|
|
6599
|
-
|
|
6600
|
-
|
|
6601
|
-
|
|
6611
|
+
image?: {
|
|
6612
|
+
handler?: DocxImageImportHandler;
|
|
6613
|
+
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
|
|
6614
|
+
enableImageCrop?: boolean;
|
|
6615
|
+
};
|
|
6602
6616
|
ignoreEmptyParagraphs?: boolean;
|
|
6603
|
-
/**
|
|
6604
|
-
* Dynamic import function for @napi-rs/canvas
|
|
6605
|
-
* Required for image cropping in Node.js environment, ignored in browser
|
|
6606
|
-
*
|
|
6607
|
-
* @example
|
|
6608
|
-
* import { parseDOCX } from '@docen/import-docx';
|
|
6609
|
-
* const content = await parseDOCX(buffer, {
|
|
6610
|
-
* canvasImport: () => import('@napi-rs/canvas')
|
|
6611
|
-
* });
|
|
6612
|
-
*/
|
|
6613
|
-
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
|
|
6614
|
-
/**
|
|
6615
|
-
* Enable or disable image cropping during import
|
|
6616
|
-
* When true (default), images with crop information in DOCX will be cropped
|
|
6617
|
-
* When false, crop information is ignored and full image is used
|
|
6618
|
-
*
|
|
6619
|
-
* @default true
|
|
6620
|
-
*/
|
|
6621
|
-
enableImageCrop?: boolean;
|
|
6622
6617
|
}
|
|
6623
6618
|
|
|
6624
6619
|
/**
|
|
@@ -6644,17 +6639,15 @@ interface StyleInfo {
|
|
|
6644
6639
|
}
|
|
6645
6640
|
type StyleMap = Map<string, StyleInfo>;
|
|
6646
6641
|
|
|
6647
|
-
|
|
6648
|
-
|
|
6649
|
-
|
|
6650
|
-
|
|
6651
|
-
|
|
6652
|
-
|
|
6653
|
-
|
|
6654
|
-
|
|
6655
|
-
height?: number;
|
|
6642
|
+
/**
|
|
6643
|
+
* Parsing context containing all global resources from DOCX file
|
|
6644
|
+
*/
|
|
6645
|
+
interface ParseContext extends DocxImportOptions {
|
|
6646
|
+
hyperlinks: Map<string, string>;
|
|
6647
|
+
images: Map<string, ImageInfo>;
|
|
6648
|
+
listTypeMap: ListTypeMap;
|
|
6649
|
+
styleMap: StyleMap;
|
|
6656
6650
|
}
|
|
6657
|
-
|
|
6658
6651
|
/**
|
|
6659
6652
|
* Main entry point: Parse DOCX file and convert to TipTap JSON
|
|
6660
6653
|
*/
|
|
@@ -6664,19 +6657,15 @@ declare function parseDOCX(input: DataType, options?: DocxImportOptions): Promis
|
|
|
6664
6657
|
* Convert DOCX paragraph node to TipTap paragraph
|
|
6665
6658
|
*/
|
|
6666
6659
|
declare function convertParagraph(node: Element$1, params: {
|
|
6667
|
-
|
|
6668
|
-
|
|
6669
|
-
options?: DocxImportOptions;
|
|
6670
|
-
styleMap?: StyleMap;
|
|
6660
|
+
context: ParseContext;
|
|
6661
|
+
styleInfo?: StyleInfo;
|
|
6671
6662
|
}): Promise<JSONContent>;
|
|
6672
6663
|
|
|
6673
6664
|
/**
|
|
6674
6665
|
* Extract all text runs from paragraph
|
|
6675
6666
|
*/
|
|
6676
6667
|
declare function extractRuns(paragraph: Element$1, params: {
|
|
6677
|
-
|
|
6678
|
-
images: Map<string, ImageInfo>;
|
|
6679
|
-
options?: DocxImportOptions;
|
|
6668
|
+
context: ParseContext;
|
|
6680
6669
|
styleInfo?: StyleInfo;
|
|
6681
6670
|
}): Promise<Array<{
|
|
6682
6671
|
type: string;
|
|
@@ -6730,10 +6719,7 @@ declare function isTable(node: Element$1): boolean;
|
|
|
6730
6719
|
* Convert a table element to TipTap JSON
|
|
6731
6720
|
*/
|
|
6732
6721
|
declare function convertTable(node: Element$1, params: {
|
|
6733
|
-
|
|
6734
|
-
images: Map<string, ImageInfo>;
|
|
6735
|
-
options?: DocxImportOptions;
|
|
6736
|
-
styleMap?: StyleMap;
|
|
6722
|
+
context: ParseContext;
|
|
6737
6723
|
}): Promise<JSONContent>;
|
|
6738
6724
|
|
|
6739
6725
|
/**
|
|
@@ -6747,12 +6733,25 @@ declare function getTaskItemChecked(node: Element$1): boolean;
|
|
|
6747
6733
|
/**
|
|
6748
6734
|
* Convert a task item to TipTap JSON
|
|
6749
6735
|
*/
|
|
6750
|
-
declare function convertTaskItem(node: Element$1
|
|
6736
|
+
declare function convertTaskItem(node: Element$1, params: {
|
|
6737
|
+
context: ParseContext;
|
|
6738
|
+
styleInfo?: StyleInfo;
|
|
6739
|
+
}): Promise<JSONContent>;
|
|
6740
|
+
/**
|
|
6741
|
+
* Convert task list (handles consecutive task items)
|
|
6742
|
+
*/
|
|
6743
|
+
declare function convertTaskList(_node: Element$1, params: {
|
|
6744
|
+
context: ParseContext;
|
|
6745
|
+
styleInfo?: StyleInfo;
|
|
6746
|
+
siblings: Element$1[];
|
|
6747
|
+
index: number;
|
|
6748
|
+
processedIndices: Set<number>;
|
|
6749
|
+
}): Promise<JSONContent>;
|
|
6751
6750
|
|
|
6752
6751
|
/**
|
|
6753
6752
|
* Check if a paragraph is a horizontal rule (page break)
|
|
6754
6753
|
*/
|
|
6755
6754
|
declare function isHorizontalRule(node: Element$1): boolean;
|
|
6756
6755
|
|
|
6757
|
-
export { convertParagraph, convertTable, convertTaskItem, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|
|
6758
|
-
export type {
|
|
6756
|
+
export { convertParagraph, convertTable, convertTaskItem, convertTaskList, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|
|
6757
|
+
export type { DocxImageImportHandler, DocxImageInfo, DocxImageResult, DocxImportOptions, ImageInfo, ListInfo, ListTypeMap, ParseContext };
|
package/dist/index.d.mts
CHANGED
|
@@ -2029,6 +2029,16 @@ declare class Transform {
|
|
|
2029
2029
|
*/
|
|
2030
2030
|
get docChanged(): boolean;
|
|
2031
2031
|
/**
|
|
2032
|
+
Return a single range, in post-transform document positions,
|
|
2033
|
+
that covers all content changed by this transform. Returns null
|
|
2034
|
+
if no replacements are made. Note that this will ignore changes
|
|
2035
|
+
that add/remove marks without replacing the underlying content.
|
|
2036
|
+
*/
|
|
2037
|
+
changedRange(): {
|
|
2038
|
+
from: number;
|
|
2039
|
+
to: number;
|
|
2040
|
+
} | null;
|
|
2041
|
+
/**
|
|
2032
2042
|
Replace the part of the document between `from` and `to` with the
|
|
2033
2043
|
given `slice`.
|
|
2034
2044
|
*/
|
|
@@ -6567,58 +6577,43 @@ interface Commands<ReturnType = any> {
|
|
|
6567
6577
|
interface Storage {
|
|
6568
6578
|
}
|
|
6569
6579
|
|
|
6570
|
-
/**
|
|
6571
|
-
* DOCX image information for custom converter
|
|
6572
|
-
*/
|
|
6573
6580
|
interface DocxImageInfo {
|
|
6574
|
-
/** Image ID (relationship ID in DOCX) */
|
|
6575
6581
|
id: string;
|
|
6576
|
-
/** Content type (e.g., "image/png", "image/jpeg") */
|
|
6577
6582
|
contentType: string;
|
|
6578
|
-
/** Raw image data */
|
|
6579
6583
|
data: Uint8Array;
|
|
6580
6584
|
}
|
|
6581
|
-
/**
|
|
6582
|
-
* Result of image conversion
|
|
6583
|
-
*/
|
|
6584
6585
|
interface DocxImageResult {
|
|
6585
|
-
/** Image src attribute value (URL or data URL) */
|
|
6586
6586
|
src: string;
|
|
6587
|
-
/** Optional alt text */
|
|
6588
6587
|
alt?: string;
|
|
6589
6588
|
}
|
|
6589
|
+
type DocxImageImportHandler = (info: DocxImageInfo) => Promise<DocxImageResult>;
|
|
6590
6590
|
/**
|
|
6591
|
-
*
|
|
6591
|
+
* List information extracted from numbering.xml
|
|
6592
6592
|
*/
|
|
6593
|
-
|
|
6594
|
-
|
|
6593
|
+
interface ListInfo {
|
|
6594
|
+
type: "bullet" | "ordered";
|
|
6595
|
+
start?: number;
|
|
6596
|
+
}
|
|
6597
|
+
/**
|
|
6598
|
+
* Map of numbering ID to list information
|
|
6599
|
+
*/
|
|
6600
|
+
type ListTypeMap = Map<string, ListInfo>;
|
|
6595
6601
|
/**
|
|
6596
|
-
*
|
|
6602
|
+
* Image information with dimensions (for round-trip conversion)
|
|
6597
6603
|
*/
|
|
6604
|
+
interface ImageInfo {
|
|
6605
|
+
src: string;
|
|
6606
|
+
width?: number;
|
|
6607
|
+
height?: number;
|
|
6608
|
+
}
|
|
6609
|
+
|
|
6598
6610
|
interface DocxImportOptions {
|
|
6599
|
-
|
|
6600
|
-
|
|
6601
|
-
|
|
6611
|
+
image?: {
|
|
6612
|
+
handler?: DocxImageImportHandler;
|
|
6613
|
+
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
|
|
6614
|
+
enableImageCrop?: boolean;
|
|
6615
|
+
};
|
|
6602
6616
|
ignoreEmptyParagraphs?: boolean;
|
|
6603
|
-
/**
|
|
6604
|
-
* Dynamic import function for @napi-rs/canvas
|
|
6605
|
-
* Required for image cropping in Node.js environment, ignored in browser
|
|
6606
|
-
*
|
|
6607
|
-
* @example
|
|
6608
|
-
* import { parseDOCX } from '@docen/import-docx';
|
|
6609
|
-
* const content = await parseDOCX(buffer, {
|
|
6610
|
-
* canvasImport: () => import('@napi-rs/canvas')
|
|
6611
|
-
* });
|
|
6612
|
-
*/
|
|
6613
|
-
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
|
|
6614
|
-
/**
|
|
6615
|
-
* Enable or disable image cropping during import
|
|
6616
|
-
* When true (default), images with crop information in DOCX will be cropped
|
|
6617
|
-
* When false, crop information is ignored and full image is used
|
|
6618
|
-
*
|
|
6619
|
-
* @default true
|
|
6620
|
-
*/
|
|
6621
|
-
enableImageCrop?: boolean;
|
|
6622
6617
|
}
|
|
6623
6618
|
|
|
6624
6619
|
/**
|
|
@@ -6644,17 +6639,15 @@ interface StyleInfo {
|
|
|
6644
6639
|
}
|
|
6645
6640
|
type StyleMap = Map<string, StyleInfo>;
|
|
6646
6641
|
|
|
6647
|
-
|
|
6648
|
-
|
|
6649
|
-
|
|
6650
|
-
|
|
6651
|
-
|
|
6652
|
-
|
|
6653
|
-
|
|
6654
|
-
|
|
6655
|
-
height?: number;
|
|
6642
|
+
/**
|
|
6643
|
+
* Parsing context containing all global resources from DOCX file
|
|
6644
|
+
*/
|
|
6645
|
+
interface ParseContext extends DocxImportOptions {
|
|
6646
|
+
hyperlinks: Map<string, string>;
|
|
6647
|
+
images: Map<string, ImageInfo>;
|
|
6648
|
+
listTypeMap: ListTypeMap;
|
|
6649
|
+
styleMap: StyleMap;
|
|
6656
6650
|
}
|
|
6657
|
-
|
|
6658
6651
|
/**
|
|
6659
6652
|
* Main entry point: Parse DOCX file and convert to TipTap JSON
|
|
6660
6653
|
*/
|
|
@@ -6664,19 +6657,15 @@ declare function parseDOCX(input: DataType, options?: DocxImportOptions): Promis
|
|
|
6664
6657
|
* Convert DOCX paragraph node to TipTap paragraph
|
|
6665
6658
|
*/
|
|
6666
6659
|
declare function convertParagraph(node: Element$1, params: {
|
|
6667
|
-
|
|
6668
|
-
|
|
6669
|
-
options?: DocxImportOptions;
|
|
6670
|
-
styleMap?: StyleMap;
|
|
6660
|
+
context: ParseContext;
|
|
6661
|
+
styleInfo?: StyleInfo;
|
|
6671
6662
|
}): Promise<JSONContent>;
|
|
6672
6663
|
|
|
6673
6664
|
/**
|
|
6674
6665
|
* Extract all text runs from paragraph
|
|
6675
6666
|
*/
|
|
6676
6667
|
declare function extractRuns(paragraph: Element$1, params: {
|
|
6677
|
-
|
|
6678
|
-
images: Map<string, ImageInfo>;
|
|
6679
|
-
options?: DocxImportOptions;
|
|
6668
|
+
context: ParseContext;
|
|
6680
6669
|
styleInfo?: StyleInfo;
|
|
6681
6670
|
}): Promise<Array<{
|
|
6682
6671
|
type: string;
|
|
@@ -6730,10 +6719,7 @@ declare function isTable(node: Element$1): boolean;
|
|
|
6730
6719
|
* Convert a table element to TipTap JSON
|
|
6731
6720
|
*/
|
|
6732
6721
|
declare function convertTable(node: Element$1, params: {
|
|
6733
|
-
|
|
6734
|
-
images: Map<string, ImageInfo>;
|
|
6735
|
-
options?: DocxImportOptions;
|
|
6736
|
-
styleMap?: StyleMap;
|
|
6722
|
+
context: ParseContext;
|
|
6737
6723
|
}): Promise<JSONContent>;
|
|
6738
6724
|
|
|
6739
6725
|
/**
|
|
@@ -6747,12 +6733,25 @@ declare function getTaskItemChecked(node: Element$1): boolean;
|
|
|
6747
6733
|
/**
|
|
6748
6734
|
* Convert a task item to TipTap JSON
|
|
6749
6735
|
*/
|
|
6750
|
-
declare function convertTaskItem(node: Element$1
|
|
6736
|
+
declare function convertTaskItem(node: Element$1, params: {
|
|
6737
|
+
context: ParseContext;
|
|
6738
|
+
styleInfo?: StyleInfo;
|
|
6739
|
+
}): Promise<JSONContent>;
|
|
6740
|
+
/**
|
|
6741
|
+
* Convert task list (handles consecutive task items)
|
|
6742
|
+
*/
|
|
6743
|
+
declare function convertTaskList(_node: Element$1, params: {
|
|
6744
|
+
context: ParseContext;
|
|
6745
|
+
styleInfo?: StyleInfo;
|
|
6746
|
+
siblings: Element$1[];
|
|
6747
|
+
index: number;
|
|
6748
|
+
processedIndices: Set<number>;
|
|
6749
|
+
}): Promise<JSONContent>;
|
|
6751
6750
|
|
|
6752
6751
|
/**
|
|
6753
6752
|
* Check if a paragraph is a horizontal rule (page break)
|
|
6754
6753
|
*/
|
|
6755
6754
|
declare function isHorizontalRule(node: Element$1): boolean;
|
|
6756
6755
|
|
|
6757
|
-
export { convertParagraph, convertTable, convertTaskItem, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|
|
6758
|
-
export type {
|
|
6756
|
+
export { convertParagraph, convertTable, convertTaskItem, convertTaskList, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|
|
6757
|
+
export type { DocxImageImportHandler, DocxImageInfo, DocxImageResult, DocxImportOptions, ImageInfo, ListInfo, ListTypeMap, ParseContext };
|
package/dist/index.d.ts
CHANGED
|
@@ -2029,6 +2029,16 @@ declare class Transform {
|
|
|
2029
2029
|
*/
|
|
2030
2030
|
get docChanged(): boolean;
|
|
2031
2031
|
/**
|
|
2032
|
+
Return a single range, in post-transform document positions,
|
|
2033
|
+
that covers all content changed by this transform. Returns null
|
|
2034
|
+
if no replacements are made. Note that this will ignore changes
|
|
2035
|
+
that add/remove marks without replacing the underlying content.
|
|
2036
|
+
*/
|
|
2037
|
+
changedRange(): {
|
|
2038
|
+
from: number;
|
|
2039
|
+
to: number;
|
|
2040
|
+
} | null;
|
|
2041
|
+
/**
|
|
2032
2042
|
Replace the part of the document between `from` and `to` with the
|
|
2033
2043
|
given `slice`.
|
|
2034
2044
|
*/
|
|
@@ -6567,58 +6577,43 @@ interface Commands<ReturnType = any> {
|
|
|
6567
6577
|
interface Storage {
|
|
6568
6578
|
}
|
|
6569
6579
|
|
|
6570
|
-
/**
|
|
6571
|
-
* DOCX image information for custom converter
|
|
6572
|
-
*/
|
|
6573
6580
|
interface DocxImageInfo {
|
|
6574
|
-
/** Image ID (relationship ID in DOCX) */
|
|
6575
6581
|
id: string;
|
|
6576
|
-
/** Content type (e.g., "image/png", "image/jpeg") */
|
|
6577
6582
|
contentType: string;
|
|
6578
|
-
/** Raw image data */
|
|
6579
6583
|
data: Uint8Array;
|
|
6580
6584
|
}
|
|
6581
|
-
/**
|
|
6582
|
-
* Result of image conversion
|
|
6583
|
-
*/
|
|
6584
6585
|
interface DocxImageResult {
|
|
6585
|
-
/** Image src attribute value (URL or data URL) */
|
|
6586
6586
|
src: string;
|
|
6587
|
-
/** Optional alt text */
|
|
6588
6587
|
alt?: string;
|
|
6589
6588
|
}
|
|
6589
|
+
type DocxImageImportHandler = (info: DocxImageInfo) => Promise<DocxImageResult>;
|
|
6590
6590
|
/**
|
|
6591
|
-
*
|
|
6591
|
+
* List information extracted from numbering.xml
|
|
6592
6592
|
*/
|
|
6593
|
-
|
|
6594
|
-
|
|
6593
|
+
interface ListInfo {
|
|
6594
|
+
type: "bullet" | "ordered";
|
|
6595
|
+
start?: number;
|
|
6596
|
+
}
|
|
6597
|
+
/**
|
|
6598
|
+
* Map of numbering ID to list information
|
|
6599
|
+
*/
|
|
6600
|
+
type ListTypeMap = Map<string, ListInfo>;
|
|
6595
6601
|
/**
|
|
6596
|
-
*
|
|
6602
|
+
* Image information with dimensions (for round-trip conversion)
|
|
6597
6603
|
*/
|
|
6604
|
+
interface ImageInfo {
|
|
6605
|
+
src: string;
|
|
6606
|
+
width?: number;
|
|
6607
|
+
height?: number;
|
|
6608
|
+
}
|
|
6609
|
+
|
|
6598
6610
|
interface DocxImportOptions {
|
|
6599
|
-
|
|
6600
|
-
|
|
6601
|
-
|
|
6611
|
+
image?: {
|
|
6612
|
+
handler?: DocxImageImportHandler;
|
|
6613
|
+
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
|
|
6614
|
+
enableImageCrop?: boolean;
|
|
6615
|
+
};
|
|
6602
6616
|
ignoreEmptyParagraphs?: boolean;
|
|
6603
|
-
/**
|
|
6604
|
-
* Dynamic import function for @napi-rs/canvas
|
|
6605
|
-
* Required for image cropping in Node.js environment, ignored in browser
|
|
6606
|
-
*
|
|
6607
|
-
* @example
|
|
6608
|
-
* import { parseDOCX } from '@docen/import-docx';
|
|
6609
|
-
* const content = await parseDOCX(buffer, {
|
|
6610
|
-
* canvasImport: () => import('@napi-rs/canvas')
|
|
6611
|
-
* });
|
|
6612
|
-
*/
|
|
6613
|
-
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
|
|
6614
|
-
/**
|
|
6615
|
-
* Enable or disable image cropping during import
|
|
6616
|
-
* When true (default), images with crop information in DOCX will be cropped
|
|
6617
|
-
* When false, crop information is ignored and full image is used
|
|
6618
|
-
*
|
|
6619
|
-
* @default true
|
|
6620
|
-
*/
|
|
6621
|
-
enableImageCrop?: boolean;
|
|
6622
6617
|
}
|
|
6623
6618
|
|
|
6624
6619
|
/**
|
|
@@ -6644,17 +6639,15 @@ interface StyleInfo {
|
|
|
6644
6639
|
}
|
|
6645
6640
|
type StyleMap = Map<string, StyleInfo>;
|
|
6646
6641
|
|
|
6647
|
-
|
|
6648
|
-
|
|
6649
|
-
|
|
6650
|
-
|
|
6651
|
-
|
|
6652
|
-
|
|
6653
|
-
|
|
6654
|
-
|
|
6655
|
-
height?: number;
|
|
6642
|
+
/**
|
|
6643
|
+
* Parsing context containing all global resources from DOCX file
|
|
6644
|
+
*/
|
|
6645
|
+
interface ParseContext extends DocxImportOptions {
|
|
6646
|
+
hyperlinks: Map<string, string>;
|
|
6647
|
+
images: Map<string, ImageInfo>;
|
|
6648
|
+
listTypeMap: ListTypeMap;
|
|
6649
|
+
styleMap: StyleMap;
|
|
6656
6650
|
}
|
|
6657
|
-
|
|
6658
6651
|
/**
|
|
6659
6652
|
* Main entry point: Parse DOCX file and convert to TipTap JSON
|
|
6660
6653
|
*/
|
|
@@ -6664,19 +6657,15 @@ declare function parseDOCX(input: DataType, options?: DocxImportOptions): Promis
|
|
|
6664
6657
|
* Convert DOCX paragraph node to TipTap paragraph
|
|
6665
6658
|
*/
|
|
6666
6659
|
declare function convertParagraph(node: Element$1, params: {
|
|
6667
|
-
|
|
6668
|
-
|
|
6669
|
-
options?: DocxImportOptions;
|
|
6670
|
-
styleMap?: StyleMap;
|
|
6660
|
+
context: ParseContext;
|
|
6661
|
+
styleInfo?: StyleInfo;
|
|
6671
6662
|
}): Promise<JSONContent>;
|
|
6672
6663
|
|
|
6673
6664
|
/**
|
|
6674
6665
|
* Extract all text runs from paragraph
|
|
6675
6666
|
*/
|
|
6676
6667
|
declare function extractRuns(paragraph: Element$1, params: {
|
|
6677
|
-
|
|
6678
|
-
images: Map<string, ImageInfo>;
|
|
6679
|
-
options?: DocxImportOptions;
|
|
6668
|
+
context: ParseContext;
|
|
6680
6669
|
styleInfo?: StyleInfo;
|
|
6681
6670
|
}): Promise<Array<{
|
|
6682
6671
|
type: string;
|
|
@@ -6730,10 +6719,7 @@ declare function isTable(node: Element$1): boolean;
|
|
|
6730
6719
|
* Convert a table element to TipTap JSON
|
|
6731
6720
|
*/
|
|
6732
6721
|
declare function convertTable(node: Element$1, params: {
|
|
6733
|
-
|
|
6734
|
-
images: Map<string, ImageInfo>;
|
|
6735
|
-
options?: DocxImportOptions;
|
|
6736
|
-
styleMap?: StyleMap;
|
|
6722
|
+
context: ParseContext;
|
|
6737
6723
|
}): Promise<JSONContent>;
|
|
6738
6724
|
|
|
6739
6725
|
/**
|
|
@@ -6747,12 +6733,25 @@ declare function getTaskItemChecked(node: Element$1): boolean;
|
|
|
6747
6733
|
/**
|
|
6748
6734
|
* Convert a task item to TipTap JSON
|
|
6749
6735
|
*/
|
|
6750
|
-
declare function convertTaskItem(node: Element$1
|
|
6736
|
+
declare function convertTaskItem(node: Element$1, params: {
|
|
6737
|
+
context: ParseContext;
|
|
6738
|
+
styleInfo?: StyleInfo;
|
|
6739
|
+
}): Promise<JSONContent>;
|
|
6740
|
+
/**
|
|
6741
|
+
* Convert task list (handles consecutive task items)
|
|
6742
|
+
*/
|
|
6743
|
+
declare function convertTaskList(_node: Element$1, params: {
|
|
6744
|
+
context: ParseContext;
|
|
6745
|
+
styleInfo?: StyleInfo;
|
|
6746
|
+
siblings: Element$1[];
|
|
6747
|
+
index: number;
|
|
6748
|
+
processedIndices: Set<number>;
|
|
6749
|
+
}): Promise<JSONContent>;
|
|
6751
6750
|
|
|
6752
6751
|
/**
|
|
6753
6752
|
* Check if a paragraph is a horizontal rule (page break)
|
|
6754
6753
|
*/
|
|
6755
6754
|
declare function isHorizontalRule(node: Element$1): boolean;
|
|
6756
6755
|
|
|
6757
|
-
export { convertParagraph, convertTable, convertTaskItem, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|
|
6758
|
-
export type {
|
|
6756
|
+
export { convertParagraph, convertTable, convertTaskItem, convertTaskList, extractAlignment, extractMarks, extractRuns, getCodeBlockLanguage, getListInfo, getTaskItemChecked, isCodeBlock, isHorizontalRule, isListItem, isTable, isTaskItem, parseDOCX };
|
|
6757
|
+
export type { DocxImageImportHandler, DocxImageInfo, DocxImageResult, DocxImportOptions, ImageInfo, ListInfo, ListTypeMap, ParseContext };
|
package/dist/index.mjs
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{fromXml as P}from"xast-util-from-xml";import{unzipSync as lt}from"fflate";import{toUint8Array as ut}from"undio";import{imageMeta as ft}from"image-meta";function a(n,t){for(const e of n.children)if(e.type==="element"&&e.name===t)return e}function y(n,t){for(const e of n.children){if(e.type==="element"&&e.name===t)return e;if(e.type==="element"){const r=y(e,t);if(r)return r}}}function x(n,t){const e=[];for(const r of n.children)r.type==="element"&&r.name===t&&e.push(r),r.type==="element"&&e.push(...x(r,t));return e}const N="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function U(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let o=0;for(let i=0;i<t;i+=3){const s=n[i],c=i+1<t?n[i+1]:0,l=i+2<t?n[i+2]:0,u=s>>2,f=(s&3)<<4|c>>4,w=(c&15)<<2|l>>6,p=l&63;r[o++]=N[u],r[o++]=N[f],r[o++]=i+1<t?N[w]:"=",r[o++]=i+2<t?N[p]:"="}return r.join("")}function pt(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const wt=globalThis.process?.release?.name==="node",H=typeof window<"u";async function dt(n){const t=await n;return t.default||t}let F,_=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class ht extends _{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class gt extends _{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!F)throw new Error("@napi-rs/canvas module is not resolved");return F.createCanvas(t,e)}}async function mt(n){F??=await dt(n())}async function bt(n){if(H)return ht;if(wt){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await mt(n),gt}throw new Error("Unsupported environment for canvas operations")}async function yt(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await bt(e.canvasImport),o=await vt(n,r),i=(t.left||0)/1e5*o.width,s=(t.top||0)/1e5*o.height,c=(t.right||0)/1e5*o.width,l=(t.bottom||0)/1e5*o.height,u=Math.round(o.width-i-c),f=Math.round(o.height-s-l);if(u<=0||f<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const w=new r().create(u,f);if(!w.context)throw new Error("Failed to get 2D context from canvas");w.context.drawImage(o,i,s,u,f,0,0,u,f);const p=w.canvas.toDataURL(),d=await(await fetch(p)).arrayBuffer();return new Uint8Array(d)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function vt(n,t){if(H){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const o=new Image;return new Promise((i,s)=>{o.onload=()=>{URL.revokeObjectURL(r),i(o)},o.onerror=()=>{URL.revokeObjectURL(r),s(new Error("Failed to load image"))},o.src=r})}catch(o){throw URL.revokeObjectURL(r),o}}else{if(!F)throw new Error("@napi-rs/canvas module is not resolved");return await F.loadImage(Buffer.from(n))}}const It="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";function R(n){const t=parseInt(n,10);if(!isNaN(t))return Math.round(t/9525)}function xt(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,o=n.attributes.b;if(!(!t&&!e&&!r&&!o))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:o?parseInt(o,10):void 0}}function X(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset"),r=t?.children[0]?.type==="text"?t.children[0].value:void 0,o=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&o===void 0))return{...r&&{align:r},...o!==void 0&&{offset:o}}}function q(n){let t=a(n,"w:drawing");if(t)return t;const e=a(n,"mc:AlternateContent"),r=e&&a(e,"mc:Choice");return r&&a(r,"w:drawing")}function J(n,t,e,r){const o=e/r,i=n/t;return Math.abs(o-i)>.1?o>i?{width:n,height:Math.round(n/o)}:{width:Math.round(t*o),height:t}:{width:n,height:t}}function kt(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=P(new TextDecoder().decode(e)),o=a(r,"Relationships");if(!o)return t;const i=x(o,"Relationship");for(const s of i)if(s.attributes.Type===It&&s.attributes.Id&&s.attributes.Target){const c="word/"+s.attributes.Target,l=n[c];if(!l)continue;let u,f,w="png";try{const h=ft(l);u=h.width,f=h.height,h.type&&(w=h.type)}catch{}const p=U(l),d=`data:image/${w};base64,${p}`;t.set(s.attributes.Id,{src:d,width:u,height:f})}return t}async function $(n,t){const{images:e,options:r}=t,o=y(n,"a:blip");if(!o?.attributes["r:embed"])return null;const i=o.attributes["r:embed"],s=e.get(i);if(!s)return null;let c=s.src;const l=y(n,"a:srcRect");if(l){const g=xt(l);if(g&&c.startsWith("data:")){const[I,M]=c.split(",");if(M){const at=pt(M);try{const A=await yt(at,g,{canvasImport:r?.canvasImport,enabled:r?.enableImageCrop!==!1}),ct=U(A);c=`${I},${ct}`}catch(A){console.warn("Image cropping failed, using original image:",A)}}}}const u=y(n,"wp:extent");let f,w;if(u){const g=u.attributes.cx,I=u.attributes.cy;typeof g=="string"&&(f=R(g)),typeof I=="string"&&(w=R(I))}const p=y(n,"a:xfrm");let d;if(p?.attributes.rot){const g=parseInt(p.attributes.rot,10);isNaN(g)||(d=g/6e4)}const h=y(n,"wp:docPr")?.attributes.title,m=y(n,"wp:positionH"),v=y(n,"wp:positionV");let b;if(m||v){const g=m?X(m):void 0,I=v?X(v):void 0;b={horizontalPosition:{relative:m?.attributes.relativeFrom||"page",...g?.align&&{align:g.align},...g?.offset!==void 0&&{offset:g.offset}},verticalPosition:{relative:v?.attributes.relativeFrom||"page",...I?.align&&{align:I.align},...I?.offset!==void 0&&{offset:I.offset}}}}const k=y(n,"pic:spPr");let C;if(k){const g=y(k,"a:ln"),I=g&&y(g,"a:solidFill"),M=I&&y(I,"a:srgbClr");M?.attributes.val&&(C={type:"solidFill",solidFillType:"rgb",value:M.attributes.val})}return{type:"image",attrs:{src:c,alt:"",...f!==void 0&&{width:f},...w!==void 0&&{height:w},...d!==void 0&&{rotation:d},...h&&{title:h},...b&&{floating:b},...C&&{outline:C}}}}function Ct(n,t,e){if(t&&e&&n.width&&n.height){const r=J(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function V(n,t){const e=[],r=a(n,"wp:inline")||a(n,"wp:anchor");if(!r)return e;const o=a(r,"wp:extent");let i,s;if(o){const f=o.attributes.cx,w=o.attributes.cy;typeof f=="string"&&(i=R(f)),typeof w=="string"&&(s=R(w))}const c=a(r,"a:graphic");if(!c)return e;const l=a(c,"a:graphicData");if(!l)return e;const u=a(l,"wpg:wgp");if(u){const f=a(u,"wpg:grpSp"),w=f?[...x(f,"pic:pic"),...x(f,"pic")]:[...x(u,"pic:pic"),...x(u,"pic")];for(const p of w){const d=a(p,"a:graphic");if(!d){const b=a(p,"pic:blipFill")||y(p,"a:blipFill");if(!b)continue;const k=a(b,"a:blip")||y(b,"a:blip");if(!k?.attributes["r:embed"])continue;const C=k.attributes["r:embed"],g=t.images.get(C);if(!g)continue;e.push(Ct(g,i,s));continue}const h={children:[d]},m=await $(h,t);if(!m)continue;const v=h.children[0]?.type==="element"?y(h.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(i&&s&&v){const b=t.images.get(v);if(b?.width&&b?.height){const k=J(i,s,b.width,b.height);m.attrs.width=k.width,m.attrs.height=k.height}else m.attrs.width=i,m.attrs.height=s}e.push(m)}}else{const f=await $(n,t);f&&e.push(f)}return e}const Mt="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function Pt(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=P(new TextDecoder().decode(e)),o=a(r,"Relationships");if(!o)return t;const i=x(o,"Relationship");for(const s of i)s.attributes.Type===Mt&&s.attributes.Id&&s.attributes.Target&&t.set(s.attributes.Id,s.attributes.Target);return t}function Ft(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const o=P(new TextDecoder().decode(r)),i=new Map,s=a(o,"w:numbering");if(!s)return t;const c=x(s,"w:abstractNum");for(const u of c){const f=u.attributes["w:abstractNumId"],w=a(u,"w:lvl");if(!w)continue;const p=a(w,"w:numFmt");p?.attributes["w:val"]&&i.set(f,p.attributes["w:val"]);const d=a(w,"w:start");d?.attributes["w:val"]&&e.set(f,parseInt(d.attributes["w:val"],10))}const l=x(s,"w:num");for(const u of l){const f=u.attributes["w:numId"],w=a(u,"w:abstractNumId");if(!w?.attributes["w:val"])continue;const p=w.attributes["w:val"],d=i.get(p);if(!d)continue;const h=e.get(p);d==="bullet"?t.set(f,{type:"bullet"}):t.set(f,{type:"ordered",...h!==void 0&&{start:h}})}return t}function Nt(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=P(new TextDecoder().decode(e)),o=a(r,"w:styles");if(!o)return t;const i=x(o,"w:style").filter(s=>s.attributes["w:type"]==="paragraph");for(const s of i){const c=s.attributes["w:styleId"];if(!c)continue;const l={styleId:c},u=a(s,"w:name");u?.attributes["w:val"]&&(l.name=u.attributes["w:val"]);const f=a(s,"w:pPr");if(f){const p=a(f,"w:outlineLvl");p?.attributes["w:val"]!==void 0&&(l.outlineLvl=parseInt(p.attributes["w:val"],10))}const w=a(s,"w:rPr");if(w){const p={},d=a(w,"w:color");if(d?.attributes["w:val"]&&d.attributes["w:val"]!=="auto"){const v=d.attributes["w:val"];p.color=v.startsWith("#")?v:`#${v}`}a(w,"w:b")&&(p.bold=!0),a(w,"w:i")&&(p.italic=!0),a(w,"w:u")&&(p.underline=!0),a(w,"w:strike")&&(p.strike=!0);const h=a(w,"w:sz");if(h?.attributes["w:val"]){const v=h.attributes["w:val"],b=parseInt(v,10);isNaN(b)||(p.fontSize=b)}const m=a(w,"w:rFonts");m?.attributes["w:ascii"]&&(p.fontFamily=m.attributes["w:ascii"]),Object.keys(p).length>0&&(l.charFormat=p)}t.set(c,l)}return t}function G(n,t){const e=a(n,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");if(!r?.value)return null;const o=D(n,t);return{type:"text",text:r.value,...o.length&&{marks:o}}}async function B(n,t){const e=[];for(const r of n.children)if(r.type==="element"){if(r.name==="w:hyperlink"){const o=r,i=o.attributes["r:id"],s=t.hyperlinks.get(i);if(!s)continue;for(const c of o.children){if(c.type!=="element"||c.name!=="w:r")continue;const l=c,u=q(l);if(u){const w=await $(u,t);if(w){e.push(w);continue}const p=await V(u,t);if(p.length){e.push(...p);continue}}const f=G(l,t.styleInfo);f&&(f.marks=f.marks||[],f.marks.push({type:"link",attrs:{href:s}}),e.push(f))}}else if(r.name==="w:r"){const o=r,i=q(o);if(i){const c=await V(i,t);if(c.length){e.push(...c);continue}}if(a(o,"w:br")){const c=D(o,t.styleInfo);e.push({type:"hardBreak",...c.length&&{marks:c}})}const s=G(o,t.styleInfo);s&&e.push(s)}}return e}function D(n,t){const e=[],r=a(n,"w:rPr");let o={};if(t?.charFormat&&(o={...t.charFormat}),r){const i=a(r,"w:b");i&&(i.attributes["w:val"]==="false"?o.bold=!1:o.bold=!0);const s=a(r,"w:i");s&&(s.attributes["w:val"]==="false"?o.italic=!1:o.italic=!0),a(r,"w:u")&&(o.underline=!0),a(r,"w:strike")&&(o.strike=!0);const c=a(r,"w:color");if(c?.attributes["w:val"]&&c.attributes["w:val"]!=="auto"){const p=c.attributes["w:val"];o.color=p.startsWith("#")?p:`#${p}`}const l=a(r,"w:sz");if(l?.attributes["w:val"]){const p=l.attributes["w:val"],d=parseInt(p,10);isNaN(d)||(o.fontSize=d)}const u=a(r,"w:rFonts");u?.attributes["w:ascii"]&&(o.fontFamily=u.attributes["w:ascii"]);const f=a(r,"w:shd");if(f?.attributes["w:fill"]&&f.attributes["w:fill"]!=="auto"){const p=f.attributes["w:fill"];o.backgroundColor=p.startsWith("#")?p:`#${p}`}a(r,"w:highlight")&&e.push({type:"highlight"});const w=a(r,"w:vertAlign");if(w){const p=w.attributes["w:val"];p==="subscript"?e.push({type:"subscript"}):p==="superscript"&&e.push({type:"superscript"})}}if(o.bold&&e.push({type:"bold"}),o.italic&&e.push({type:"italic"}),o.underline&&e.push({type:"underline"}),o.strike&&e.push({type:"strike"}),o.color||o.backgroundColor||o.fontSize||o.fontFamily){const i={color:o.color||"",backgroundColor:o.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(o.fontSize){const s=Math.round(o.fontSize/1.5*10)/10;i.fontSize=`${s}px`}o.fontFamily&&(i.fontFamily=o.fontFamily),e.push({type:"textStyle",attrs:i})}return e}function W(n){const t=a(n,"w:pPr");if(!t)return;const e=a(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],o={left:"left",right:"right",center:"center",both:"justify"}[r];return o?{textAlign:o}:void 0}function K(n){return`${Math.round(n/15)}px`}function Q(n){const t=a(n,"w:pPr");if(!t)return null;const e={},r=a(t,"w:ind");if(r){const i=u=>{const f=r.attributes[u];if(typeof f!="string")return null;const w=parseInt(f,10);return isNaN(w)?null:K(w)},s=i("w:left");s&&(e.indentLeft=s);const c=i("w:right");c&&(e.indentRight=c);const l=i("w:firstLine");if(l)e.indentFirstLine=l;else{const u=i("w:hanging");u&&(e.indentFirstLine=`-${u}`)}}const o=a(t,"w:spacing");if(o){const i=l=>{const u=o.attributes[l];if(typeof u!="string")return null;const f=parseInt(u,10);return isNaN(f)?null:K(f)},s=i("w:before");s&&(e.spacingBefore=s);const c=i("w:after");c&&(e.spacingAfter=c)}return Object.keys(e).length?e:null}async function S(n,t){const e=a(n,"w:pPr"),r=(e&&a(e,"w:pStyle"))?.attributes["w:val"];if(r&&t.styleMap){const c=t.styleMap.get(r);if(c?.outlineLvl!==void 0&&c.outlineLvl>=0&&c.outlineLvl<=5){const u=c.outlineLvl+1;return Y(n,t,c,u)}const l=r.match(/^Heading(\d+)$/);if(l){const u=parseInt(l[1],10);return Y(n,t,c,u)}}const o=r&&t.styleMap?t.styleMap.get(r):void 0,i=await B(n,{...t,styleInfo:o}),s={...W(n),...Q(n)};if(Rt(n)){const c=i.filter(l=>l.type!=="hardBreak");return[{type:"paragraph",...Object.keys(s).length&&{attrs:s},content:c.length?c:void 0},{type:"horizontalRule"}]}if(i.length===1&&i[0].type==="hardBreak"){const c=a(n,"w:r");if((c&&a(c,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}return i.length===1&&i[0].type==="image"?i[0]:{type:"paragraph",...Object.keys(s).length&&{attrs:s},content:i}}function Rt(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const o of r.children)o.type==="element"&&e(o)};return e(n),t.some(r=>a(r,"w:br")?.attributes["w:type"]==="page")}async function Y(n,t,e,r){return{type:"heading",attrs:{level:r,...Q(n)},content:await B(n,{...t,styleInfo:e})}}function T(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],o={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},i={};if(r&&r!=="auto"&&(i.color=`#${r}`),e){const s=parseInt(e);isNaN(s)||(i.width=Math.round(s/6))}return t&&o[t]&&(i.style=o[t]),Object.keys(i).length>0?i:null}function St(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=a(n,"w:tblPr");if(!e)return null;const r=a(e,"w:tblCellMar");if(!r)return null;const o=a(r,"w:top");if(o?.attributes["w:w"]){const l=parseInt(o.attributes["w:w"]);isNaN(l)||(t.marginTop=l)}const i=a(r,"w:bottom");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]);isNaN(l)||(t.marginBottom=l)}const s=a(r,"w:left");if(s?.attributes["w:w"]){const l=parseInt(s.attributes["w:w"]);isNaN(l)||(t.marginLeft=l)}const c=a(r,"w:right");if(c?.attributes["w:w"]){const l=parseInt(c.attributes["w:w"]);isNaN(l)||(t.marginRight=l)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function Tt(n){const t={rowHeight:null},e=a(n,"w:trPr");if(!e)return t;const r=a(e,"w:trHeight");if(r?.attributes["w:val"]){const o=parseInt(r.attributes["w:val"]),i=Math.round(o/15);t.rowHeight=`${i}px`}return t}function Z(n){const t={colSpan:1,rowSpan:1,colWidth:null},e=a(n,"w:tcPr");if(!e)return t;const r=a(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colSpan=parseInt(r.attributes["w:val"])),a(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowSpan=0);const o=a(e,"w:tcW");if(o?.attributes["w:w"]){const l=parseInt(o.attributes["w:w"]);t.colWidth=Math.round(l/15)}const i=a(e,"w:shd");i?.attributes["w:fill"]&&(t.backgroundColor=`#${i.attributes["w:fill"]}`);const s=a(e,"w:vAlign");s?.attributes["w:val"]&&(t.verticalAlign=s.attributes["w:val"]);const c=a(e,"w:tcBorders");if(c){const l=T(a(c,"w:top"));l&&(t.borderTop=l);const u=T(a(c,"w:bottom"));u&&(t.borderBottom=u);const f=T(a(c,"w:left"));f&&(t.borderLeft=f);const w=T(a(c,"w:right"));w&&(t.borderRight=w)}return t}function Lt(n){return n.name==="w:tbl"}async function tt(n,t){const e=[];for(const s of n.children)s.type==="element"&&s.name==="w:tr"&&e.push(s);const r=new Map,o=await Promise.all(e.map((s,c)=>At(s,{...t,activeRowspans:r,rows:e,rowIndex:c}))),i=St(n);return{type:"table",...i&&{attrs:i},content:o}}async function At(n,t){const e=[];let r=0;const o=Tt(n);for(const i of n.children){if(i.type!=="element"||i.name!=="w:tc")continue;const s=t.activeRowspans.get(r);if(s&&s>0){t.activeRowspans.set(r,s-1),r++;continue}let c=Z(i);if(c?.rowSpan===1){const u=$t({...t,colIndex:r});u>1&&(c={...c,rowSpan:u})}if(c?.rowSpan&&c.rowSpan>1&&t.activeRowspans.set(r,c.rowSpan-1),c?.rowSpan===0){r++;continue}const l=await Bt(i,t);e.push({type:"tableCell",...c&&{attrs:c},content:l}),r+=c?.colSpan||1}return{type:"tableRow",...o&&{attrs:o},content:e}}function $t(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const o=n.rows[r];let i=!1;for(const s of o.children){if(s.type!=="element"||s.name!=="w:tc")continue;const c=Z(s),l=c?.colSpan||1;if(e>=0&&e<l){if(c?.rowSpan===0)t++,i=!0;else return t;break}e-=l}if(!i)break}return t}async function Bt(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const o=await S(r,t);Array.isArray(o)?e.push(...o):e.push(o)}return e.length?e:[{type:"paragraph",content:[]}]}function L(n){const t=a(n,"w:pPr");return!!t&&a(t,"w:numPr")!==void 0}function E(n){const t=a(n,"w:pPr"),e=t&&a(t,"w:numPr");if(!e)return null;const r=a(e,"w:ilvl"),o=a(e,"w:numId");return!r||!o?null:{numId:o.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function z(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e==="CodeBlock"||e?.startsWith("Code")||!1}function et(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith("CodeBlock")&&e.replace("CodeBlock","").toLowerCase()||void 0}const nt="\u2610",O="\u2611";function rt(n){const t=a(n,"w:r");if(!t)return null;const e=a(t,"w:t");if(!e)return null;const r=e.children.find(o=>o.type==="text");return r?.value&&r||null}function j(n){const t=rt(n);if(!t)return!1;const e=t.value;return e.startsWith(nt)||e.startsWith(O)}function ot(n){return rt(n)?.value.startsWith(O)||!1}function Dt(n){return{type:"taskItem",attrs:{checked:ot(n)},content:[Wt(n)]}}function Wt(n){const t=[];let e=!1;for(const o of n.children){if(o.type!=="element"||o.name!=="w:r")continue;if(!e){const c=a(o,"w:t")?.children.find(l=>l.type==="text");if(c?.value){const l=c.value;if(l.startsWith(nt)||l.startsWith(O)){e=!0;const u=l.substring(2).trimStart();u&&t.push({type:"text",text:u});continue}}}const i=Et(o),s=a(o,"w:t")?.children.find(c=>c.type==="text");if(s?.value){const c={type:"text",text:s.value};i.length&&(c.marks=i),t.push(c)}}const r=W(n);return{type:"paragraph",...r&&{attrs:r},content:t.length?t:void 0}}function Et(n){const t=[],e=a(n,"w:rPr");return e&&(a(e,"w:b")&&t.push({type:"bold"}),a(e,"w:i")&&t.push({type:"italic"}),a(e,"w:u")&&t.push({type:"underline"}),a(e,"w:strike")&&t.push({type:"strike"})),t}function it(n){const t=a(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const o of t.children)o.type==="element"&&(o.name==="w:br"&&o.attributes["w:type"]==="page"?e=!0:o.name==="w:t"?o.children.find(i=>i.type==="text")?.value?.trim().length&&(r=!0):o.name!=="w:rPr"&&(r=!0));return e&&!r}const zt=n=>{const t=[],e=x(n,"w:r");for(const r of e){const o=a(r,"w:t");if(!o)continue;const i=o.children.find(s=>s.type==="text");i&&"value"in i&&i.value&&t.push({type:"text",text:i.value})}return t},Ot=async(n,t,e)=>{const r=await tt(n[t],{hyperlinks:e.hyperlinks,images:e.images,options:e.options,styleMap:e.styleMap});let o=1;return t+1<n.length&&n[t+1].name==="w:p"&&st(n[t+1])&&o++,{nodes:[r],consumed:o}},jt=async(n,t)=>{const e=[];let r=t;for(;r<n.length;){const o=n[r];if(o.name!=="w:p"||!z(o))break;const i=et(o),s={type:"codeBlock",...i&&{attrs:{language:i}},content:zt(o)};e.push(s),r++}return{nodes:e,consumed:r-t}},Ut=async(n,t,e)=>{const{listTypeMap:r}=e,o=[];let i=t;for(;i<n.length;){const s=n[i];if(s.name!=="w:p"||!L(s))break;const c=E(s);if(!c)break;const l=r.get(c.numId),u=l?.type||"bullet",f=[];for(;i<n.length;){const p=n[i];if(p.name!=="w:p"||!L(p))break;const d=E(p);if(!d||d.numId!==c.numId)break;const h=await S(p,e),m=Array.isArray(h)?h[0]:h;f.push({type:"listItem",content:[m]}),i++}const w={type:u==="bullet"?"bulletList":"orderedList",content:f};u==="ordered"&&(w.attrs={type:null,...l?.start!==void 0&&{start:l.start}}),o.push(w)}return{nodes:o,consumed:i-t}},Ht=async(n,t)=>{const e=[];let r=t;for(;r<n.length;){const o=n[r];if(o.name!=="w:p"||!j(o))break;const{convertTaskItem:i}=await import("./chunks/index.mjs"),s=i(o);e.push(s),r++}return{nodes:[{type:"taskList",content:e}],consumed:r-t}},_t=async()=>({nodes:[{type:"horizontalRule"}],consumed:1}),Xt=async(n,t,e)=>{const r=await S(n[t],e);return Array.isArray(r)?{nodes:r,consumed:1}:{nodes:[r],consumed:1}},st=n=>{const t=x(n,"w:r");for(const e of t){const r=a(e,"w:t");if(r){const i=r.children.find(s=>s.type==="text");if(i&&"value"in i&&i.value&&i.value.trim().length>0)return!1}if(a(e,"w:drawing")||a(e,"mc:AlternateContent")||a(e,"w:pict"))return!1;const o=a(e,"w:br");if(o&&o.attributes["w:type"]==="page")return!1}return!0},qt=n=>n.name==="w:tbl"?Ot:n.name==="w:p"?z(n)?jt:j(n)?Ht:L(n)?Ut:it(n)?_t:Xt:null,Jt=async(n,t)=>{const e=[];let r=0;for(;r<n.length;){const o=n[r],i=qt(o);if(!i){r++;continue}if(o.name==="w:p"&&t.ignoreEmptyParagraphs&&st(o)){r++;continue}const{nodes:s,consumed:c}=await i(n,r,t);e.push(...s),r+=c}return e};async function Vt(n,t={}){const{ignoreEmptyParagraphs:e=!1}=t,r=await ut(n),o=lt(r),i=Pt(o),s=kt(o),c=o["word/document.xml"];if(!c)throw new Error("Invalid DOCX file: missing word/document.xml");const l=P(new TextDecoder().decode(c)),u=Ft(o),f=Nt(o);return await Gt(l,s,i,u,f,e,t)}async function Gt(n,t,e,r,o,i,s){if(n.type!=="root")return{type:"doc",content:[]};const c=a(n,"w:document");if(!c)return{type:"doc",content:[]};const l=a(c,"w:body");if(!l)return{type:"doc",content:[]};const u={hyperlinks:e,images:t,listTypeMap:r,styleMap:o,ignoreEmptyParagraphs:i,options:s};return{type:"doc",content:await Jt(l.children.filter(f=>f.type==="element"),u)}}export{S as convertParagraph,tt as convertTable,Dt as convertTaskItem,W as extractAlignment,D as extractMarks,B as extractRuns,et as getCodeBlockLanguage,E as getListInfo,ot as getTaskItemChecked,z as isCodeBlock,it as isHorizontalRule,L as isListItem,Lt as isTable,j as isTaskItem,Vt as parseDOCX};
|
|
1
|
+
import{fromXml as F}from"xast-util-from-xml";import{unzipSync as wt}from"fflate";import{toUint8Array as dt}from"undio";import{imageMeta as ht}from"image-meta";const gt=.6666666666666666,N={CODE_BLOCK:"CodeBlock",CODE_PREFIX:"Code"},mt={docxToTipTap:{left:"left",right:"right",center:"center",both:"justify"}},q=96;function z(n){return Math.round(n*q/1440)}function M(n){return`${z(n)}px`}function bt(n){return Math.round(n/(914400/q))}function A(n){const t=parseInt(n,10);if(!isNaN(t))return bt(t)}function a(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"&&e.name===t)return e;return null}function b(n,t){if(!n.children)return null;for(const e of n.children)if(e.type==="element"){if(e.name===t)return e;const r=b(e,t);if(r)return r}return null}function x(n,t){const e=[];if(!n.children)return e;for(const r of n.children)r.type==="element"&&(r.name===t&&e.push(r),e.push(...x(r,t)));return e}function T(n,t){const e=n[t];if(!e)return;const r=parseInt(e,10);if(!isNaN(r))return e}function B(n){return t=>n.includes(t)}const O="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";function G(n){const t=n.length,e=Math.ceil(t/3)*4,r=Array.from({length:e});let i=0;for(let o=0;o<t;o+=3){const s=n[o],c=o+1<t?n[o+1]:0,l=o+2<t?n[o+2]:0,u=s>>2,p=(s&3)<<4|c>>4,w=(c&15)<<2|l>>6,f=l&63;r[i++]=O[u],r[i++]=O[p],r[i++]=o+1<t?O[w]:"=",r[i++]=o+2<t?O[f]:"="}return r.join("")}function yt(n){const t=atob(n),e=new Uint8Array(t.length);for(let r=0;r<t.length;r++)e[r]=t.charCodeAt(r);return e}const vt=globalThis.process?.release?.name==="node",V=typeof window<"u";async function It(n){const t=await n;return t.default||t}let P,J=class{#t=!1;constructor({enableHWA:t=!1}={}){this.#t=t}create(t,e){const r=this._createCanvas(t,e);return{canvas:r,context:r.getContext("2d",{willReadFrequently:!this.#t})}}reset({canvas:t},e,r){if(!t)throw new Error("Canvas is not specified");t.width=e,t.height=r}destroy(t){if(!t.canvas)throw new Error("Canvas is not specified");t.canvas.width=0,t.canvas.height=0,t.canvas=void 0,t.context=void 0}_createCanvas(t,e){throw new Error("Not implemented")}};class xt extends J{_document;constructor({ownerDocument:t=globalThis.document,enableHWA:e=!1}={}){super({enableHWA:e}),this._document=t}_createCanvas(t,e){const r=this._document.createElement("canvas");return r.width=t,r.height=e,r}}class kt extends J{constructor({enableHWA:t=!1}={}){super({enableHWA:t})}_createCanvas(t,e){if(!P)throw new Error("@napi-rs/canvas module is not resolved");return P.createCanvas(t,e)}}async function Ct(n){P??=await It(n())}async function Mt(n){if(V)return xt;if(vt){if(!n)throw new Error("In Node.js environment, @napi-rs/canvas is required for image cropping. Please provide canvasImport parameter or install it: pnpm add @napi-rs/canvas");return await Ct(n),kt}throw new Error("Unsupported environment for canvas operations")}async function Tt(n,t,e={}){if(!t||!t.left&&!t.top&&!t.right&&!t.bottom||e.enabled===!1)return n;try{const r=await Mt(e.canvasImport),i=await Lt(n,r),o=(t.left||0)/1e5*i.width,s=(t.top||0)/1e5*i.height,c=(t.right||0)/1e5*i.width,l=(t.bottom||0)/1e5*i.height,u=Math.round(i.width-o-c),p=Math.round(i.height-s-l);if(u<=0||p<=0)return console.warn("Invalid crop dimensions, returning original image"),n;const w=new r().create(u,p);if(!w.context)throw new Error("Failed to get 2D context from canvas");w.context.drawImage(i,o,s,u,p,0,0,u,p);const f=w.canvas.toDataURL(),d=await(await fetch(f)).arrayBuffer();return new Uint8Array(d)}catch(r){return console.warn("Image cropping failed, returning original image:",r),n}}async function Lt(n,t){if(V){const e=new Blob([n.buffer]),r=URL.createObjectURL(e);try{const i=new Image;return new Promise((o,s)=>{i.onload=()=>{URL.revokeObjectURL(r),o(i)},i.onerror=()=>{URL.revokeObjectURL(r),s(new Error("Failed to load image"))},i.src=r})}catch(i){throw URL.revokeObjectURL(r),i}}else{if(!P)throw new Error("@napi-rs/canvas module is not resolved");return await P.loadImage(Buffer.from(n))}}const Rt="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",Ft=B(["left","right","center","inside","outside"]),Pt=B(["top","bottom","center","inside","outside"]),$t=B(["page","character","column","margin","leftMargin","rightMargin","insideMargin","outsideMargin"]),Nt=B(["page","paragraph","margin","topMargin","bottomMargin","insideMargin","outsideMargin","line"]);function At(n){const t=n.attributes.l,e=n.attributes.t,r=n.attributes.r,i=n.attributes.b;if(!(!t&&!e&&!r&&!i))return{left:t?parseInt(t,10):void 0,top:e?parseInt(e,10):void 0,right:r?parseInt(r,10):void 0,bottom:i?parseInt(i,10):void 0}}function Bt(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const o=t.children[0].value;Ft(o)&&(r=o)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function Ot(n){const t=a(n,"wp:align"),e=a(n,"wp:posOffset");let r;if(t?.children[0]?.type==="text"){const o=t.children[0].value;Pt(o)&&(r=o)}const i=e?.children[0]?.type==="text"?parseInt(e.children[0].value,10):void 0;if(!(!r&&i===void 0))return{...r&&{align:r},...i!==void 0&&{offset:i}}}function Q(n){let t=a(n,"w:drawing");if(t)return t;const e=a(n,"mc:AlternateContent"),r=e&&a(e,"mc:Choice");return r?a(r,"w:drawing"):null}function Y(n,t,e,r){const i=e/r,o=n/t;return Math.abs(i-o)>.1?i>o?{width:n,height:Math.round(n/i)}:{width:Math.round(t*i),height:t}:{width:n,height:t}}async function Dt(n,t){const e=new Map,r=n["word/_rels/document.xml.rels"];if(!r)return e;const i=F(new TextDecoder().decode(r)),o=a(i,"Relationships");if(!o)return e;const s=x(o,"Relationship");for(const c of s)if(c.attributes.Type===Rt&&c.attributes.Id&&c.attributes.Target){const l="word/"+c.attributes.Target,u=n[l];if(!u)continue;let p,w,f="png";try{const h=ht(u);p=h.width,w=h.height,h.type&&(f=h.type)}catch{}let d;if(t)d=(await t({id:c.attributes.Id,contentType:`image/${f}`,data:u})).src;else{const h=G(u);d=`data:image/${f};base64,${h}`}e.set(c.attributes.Id,{src:d,width:p,height:w})}return e}async function W(n,t){const{context:e}=t,r=b(n,"a:blip");if(!r?.attributes["r:embed"])return null;const i=r.attributes["r:embed"],o=e.images.get(i);if(!o)return null;let s=o.src;const c=b(n,"a:srcRect");if(c){const g=At(c);if(g&&s.startsWith("data:")){const[m,C]=s.split(",");if(C){const L=yt(C);try{const R=await Tt(L,g,{canvasImport:e.image?.canvasImport,enabled:e.image?.enableImageCrop??!1}),S=G(R);s=`${m},${S}`}catch(R){console.warn("Image cropping failed, using original image:",R)}}}}const l=b(n,"wp:extent");let u,p;if(l){const g=l.attributes.cx,m=l.attributes.cy;typeof g=="string"&&(u=A(g)),typeof m=="string"&&(p=A(m))}const w=b(n,"a:xfrm");let f;if(w?.attributes.rot){const g=parseInt(w.attributes.rot,10);isNaN(g)||(f=g/6e4)}const d=b(n,"wp:docPr")?.attributes.title,h=b(n,"wp:positionH"),y=b(n,"wp:positionV");let I;if(h||y){const g=h?Bt(h):void 0,m=y?Ot(y):void 0,C=h?.attributes.relativeFrom,L=y?.attributes.relativeFrom,R=typeof C=="string"&&$t(C)?C:"page",S=typeof L=="string"&&Nt(L)?L:"page";I={horizontalPosition:{relative:R,...g?.align&&{align:g.align},...g?.offset!==void 0&&{offset:g.offset}},verticalPosition:{relative:S,...m?.align&&{align:m.align},...m?.offset!==void 0&&{offset:m.offset}}}}const v=b(n,"pic:spPr");let k;if(v){const g=b(v,"a:ln"),m=g&&b(g,"a:solidFill"),C=m&&b(m,"a:srgbClr");C?.attributes.val&&(k={type:"solidFill",solidFillType:"rgb",value:C.attributes.val})}return{type:"image",attrs:{src:s,alt:"",...u!==void 0&&{width:u},...p!==void 0&&{height:p},...f!==void 0&&{rotation:f},...d&&{title:d},...I&&{floating:I},...k&&{outline:k}}}}function Et(n,t,e){if(t&&e&&n.width&&n.height){const r=Y(t,e,n.width,n.height);return{type:"image",attrs:{src:n.src,alt:"",width:r.width,height:r.height}}}return{type:"image",attrs:{src:n.src,alt:"",...t!==void 0&&{width:t},...e!==void 0&&{height:e}}}}async function Z(n,t){const e=[],r=a(n,"wp:inline")||a(n,"wp:anchor");if(!r)return e;const i=a(r,"wp:extent");let o,s;if(i){const p=i.attributes.cx,w=i.attributes.cy;typeof p=="string"&&(o=A(p)),typeof w=="string"&&(s=A(w))}const c=a(r,"a:graphic");if(!c)return e;const l=a(c,"a:graphicData");if(!l)return e;const u=a(l,"wpg:wgp");if(u){const p=a(u,"wpg:grpSp"),w=p?[...x(p,"pic:pic"),...x(p,"pic")]:[...x(u,"pic:pic"),...x(u,"pic")];for(const f of w){const d=a(f,"a:graphic");if(!d){const v=a(f,"pic:blipFill")||b(f,"a:blipFill");if(!v)continue;const k=a(v,"a:blip")||b(v,"a:blip");if(!k?.attributes["r:embed"])continue;const g=k.attributes["r:embed"],m=t.context.images.get(g);if(!m)continue;e.push(Et(m,o,s));continue}const h={children:[d]},y=await W(h,t);if(!y)continue;const I=h.children[0]?.type==="element"?b(h.children[0],"a:blip")?.attributes["r:embed"]:void 0;if(o&&s&&I){const v=t.context.images.get(I);if(v?.width&&v?.height){const k=Y(o,s,v.width,v.height);y.attrs.width=k.width,y.attrs.height=k.height}else y.attrs.width=o,y.attrs.height=s}e.push(y)}}else{const p=await W(n,t);p&&e.push(p)}return e}const St="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";function zt(n){const t=new Map,e=n["word/_rels/document.xml.rels"];if(!e)return t;const r=F(new TextDecoder().decode(e)),i=a(r,"Relationships");if(!i)return t;const o=x(i,"Relationship");for(const s of o)s.attributes.Type===St&&s.attributes.Id&&s.attributes.Target&&t.set(s.attributes.Id,s.attributes.Target);return t}function Wt(n){const t=new Map,e=new Map,r=n["word/numbering.xml"];if(!r)return t;const i=F(new TextDecoder().decode(r)),o=new Map,s=a(i,"w:numbering");if(!s)return t;const c=x(s,"w:abstractNum");for(const u of c){const p=u.attributes["w:abstractNumId"],w=a(u,"w:lvl");if(!w)continue;const f=a(w,"w:numFmt");f?.attributes["w:val"]&&o.set(p,f.attributes["w:val"]);const d=a(w,"w:start");d?.attributes["w:val"]&&e.set(p,parseInt(d.attributes["w:val"],10))}const l=x(s,"w:num");for(const u of l){const p=u.attributes["w:numId"],w=a(u,"w:abstractNumId");if(!w?.attributes["w:val"])continue;const f=w.attributes["w:val"],d=o.get(f);if(!d)continue;const h=e.get(f);d==="bullet"?t.set(p,{type:"bullet"}):t.set(p,{type:"ordered",...h!==void 0&&{start:h}})}return t}function _t(n){const t=new Map,e=n["word/styles.xml"];if(!e)return t;const r=F(new TextDecoder().decode(e)),i=a(r,"w:styles");if(!i)return t;const o=x(i,"w:style").filter(s=>s.attributes["w:type"]==="paragraph");for(const s of o){const c=s.attributes["w:styleId"];if(!c)continue;const l={styleId:c},u=a(s,"w:name");u?.attributes["w:val"]&&(l.name=u.attributes["w:val"]);const p=a(s,"w:pPr");if(p){const f=a(p,"w:outlineLvl");f?.attributes["w:val"]!==void 0&&(l.outlineLvl=parseInt(f.attributes["w:val"],10))}const w=a(s,"w:rPr");if(w){const f={},d=a(w,"w:color");if(d?.attributes["w:val"]&&d.attributes["w:val"]!=="auto"){const I=d.attributes["w:val"];f.color=I.startsWith("#")?I:`#${I}`}a(w,"w:b")&&(f.bold=!0),a(w,"w:i")&&(f.italic=!0),a(w,"w:u")&&(f.underline=!0),a(w,"w:strike")&&(f.strike=!0);const h=a(w,"w:sz");if(h?.attributes["w:val"]){const I=h.attributes["w:val"],v=parseInt(I,10);isNaN(v)||(f.fontSize=v)}const y=a(w,"w:rFonts");y?.attributes["w:ascii"]&&(f.fontFamily=y.attributes["w:ascii"]),Object.keys(f).length>0&&(l.charFormat=f)}t.set(c,l)}return t}function tt(n,t){const e=a(n,"w:t");if(!e)return null;const r=e.children.find(o=>o.type==="text");if(!r?.value)return null;const i=_(n,t);return{type:"text",text:r.value,...i.length&&{marks:i}}}async function D(n,t){const{context:e,styleInfo:r}=t,i=[];for(const o of n.children)if(o.type==="element"){if(o.name==="w:hyperlink"){const s=o,c=s.attributes["r:id"],l=e.hyperlinks.get(c);if(!l)continue;for(const u of s.children){if(u.type!=="element"||u.name!=="w:r")continue;const p=u,w=Q(p);if(w){const d=await W(w,{context:e});if(d){i.push(d);continue}const h=await Z(w,{context:e});if(h.length){i.push(...h);continue}}const f=tt(p,r);f&&(f.marks=f.marks||[],f.marks.push({type:"link",attrs:{href:l}}),i.push(f))}}else if(o.name==="w:r"){const s=o,c=Q(s);if(c){const u=await Z(c,{context:e});if(u.length){i.push(...u);continue}}if(a(s,"w:br")){const u=_(s,r);i.push({type:"hardBreak",...u.length&&{marks:u}})}const l=tt(s,r);l&&i.push(l)}}return i}function _(n,t){const e=[],r=a(n,"w:rPr");let i={};if(t?.charFormat&&(i={...t.charFormat}),r){const o=a(r,"w:b");o&&(o.attributes["w:val"]==="false"?i.bold=!1:i.bold=!0);const s=a(r,"w:i");s&&(s.attributes["w:val"]==="false"?i.italic=!1:i.italic=!0),a(r,"w:u")&&(i.underline=!0),a(r,"w:strike")&&(i.strike=!0);const c=a(r,"w:color");if(c?.attributes["w:val"]&&c.attributes["w:val"]!=="auto"){const f=c.attributes["w:val"];i.color=f.startsWith("#")?f:`#${f}`}const l=a(r,"w:sz");if(l?.attributes["w:val"]){const f=l.attributes["w:val"],d=parseInt(f,10);isNaN(d)||(i.fontSize=d)}const u=a(r,"w:rFonts");u?.attributes["w:ascii"]&&(i.fontFamily=u.attributes["w:ascii"]);const p=a(r,"w:shd");if(p?.attributes["w:fill"]&&p.attributes["w:fill"]!=="auto"){const f=p.attributes["w:fill"];i.backgroundColor=f.startsWith("#")?f:`#${f}`}a(r,"w:highlight")&&e.push({type:"highlight"});const w=a(r,"w:vertAlign");if(w){const f=w.attributes["w:val"];f==="subscript"?e.push({type:"subscript"}):f==="superscript"&&e.push({type:"superscript"})}}if(i.bold&&e.push({type:"bold"}),i.italic&&e.push({type:"italic"}),i.underline&&e.push({type:"underline"}),i.strike&&e.push({type:"strike"}),i.color||i.backgroundColor||i.fontSize||i.fontFamily){const o={color:i.color||"",backgroundColor:i.backgroundColor||"",fontSize:"",fontFamily:"",lineHeight:""};if(i.fontSize){const s=Math.round(i.fontSize*gt*10)/10;o.fontSize=`${s}px`}i.fontFamily&&(o.fontFamily=i.fontFamily),e.push({type:"textStyle",attrs:o})}return e}function j(n){const t=a(n,"w:pPr");if(!t)return;const e=a(t,"w:jc");if(!e?.attributes["w:val"])return;const r=e.attributes["w:val"],i=mt.docxToTipTap[r];return i?{textAlign:i}:void 0}function et(n){const t=a(n,"w:pPr");if(!t)return null;const e={},r=a(t,"w:ind");if(r){const o=T(r.attributes,"w:left");if(o){const l=parseInt(o,10);e.indentLeft=M(l)}const s=T(r.attributes,"w:right");if(s){const l=parseInt(s,10);e.indentRight=M(l)}const c=T(r.attributes,"w:firstLine");if(c){const l=parseInt(c,10);e.indentFirstLine=M(l)}else{const l=T(r.attributes,"w:hanging");if(l){const u=o?parseInt(o,10):0,p=parseInt(l,10),w=u-p;e.indentFirstLine=M(w)}}}const i=a(t,"w:spacing");if(i){const o=T(i.attributes,"w:before");if(o){const c=parseInt(o,10);e.spacingBefore=M(c)}const s=T(i.attributes,"w:after");if(s){const c=parseInt(s,10);e.spacingAfter=M(c)}}return Object.keys(e).length?e:null}async function $(n,t){const{context:e,styleInfo:r}=t,i=a(n,"w:pPr"),o=(i&&a(i,"w:pStyle"))?.attributes["w:val"];if(o&&e.styleMap){const u=e.styleMap.get(o);if(u?.outlineLvl!==void 0&&u.outlineLvl>=0&&u.outlineLvl<=5){const w=u.outlineLvl+1;return nt(n,t,u,w)}const p=o.match(/^Heading(\d+)$/);if(p){const w=parseInt(p[1],10);return nt(n,t,u,w)}}const s=o&&e.styleMap?e.styleMap.get(o):void 0,c=await D(n,{context:e,styleInfo:r||s}),l={...j(n),...et(n)};if(jt(n)){const u=c.filter(p=>p.type!=="hardBreak");return[{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:u.length?u:void 0},{type:"horizontalRule"}]}if(c.length===1&&c[0].type==="hardBreak"){const u=a(n,"w:r");if((u&&a(u,"w:br"))?.attributes["w:type"]==="page")return{type:"horizontalRule"}}if(c.length===1&&c[0].type==="image"){const u=c[0];return{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:[u]}}return{type:"paragraph",...Object.keys(l).length&&{attrs:l},content:c}}function jt(n){const t=[],e=r=>{if(r.name==="w:r")t.push(r);else for(const i of r.children)i.type==="element"&&e(i)};return e(n),t.some(r=>a(r,"w:br")?.attributes["w:type"]==="page")}async function nt(n,t,e,r){return{type:"heading",attrs:{level:r,...et(n)},content:await D(n,{context:t.context,styleInfo:e})}}function E(n){if(!n)return null;const t=n.attributes["w:val"],e=n.attributes["w:sz"],r=n.attributes["w:color"],i={single:"solid",dashed:"dashed",dotted:"dotted",double:"double",none:"none",nil:"none"},o={};if(r&&r!=="auto"&&(o.color=`#${r}`),e){const s=parseInt(e);isNaN(s)||(o.width=Math.round(s/6))}return t&&i[t]&&(o.style=i[t]),Object.keys(o).length>0?o:null}function Ht(n){const t={marginTop:void 0,marginBottom:void 0,marginLeft:void 0,marginRight:void 0},e=a(n,"w:tblPr");if(!e)return null;const r=a(e,"w:tblCellMar");if(!r)return null;const i=a(r,"w:top");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]);isNaN(l)||(t.marginTop=l)}const o=a(r,"w:bottom");if(o?.attributes["w:w"]){const l=parseInt(o.attributes["w:w"]);isNaN(l)||(t.marginBottom=l)}const s=a(r,"w:left");if(s?.attributes["w:w"]){const l=parseInt(s.attributes["w:w"]);isNaN(l)||(t.marginLeft=l)}const c=a(r,"w:right");if(c?.attributes["w:w"]){const l=parseInt(c.attributes["w:w"]);isNaN(l)||(t.marginRight=l)}return t.marginTop===void 0&&t.marginBottom===void 0&&t.marginLeft===void 0&&t.marginRight===void 0?null:t}function Ut(n){const t={rowHeight:null},e=a(n,"w:trPr");if(!e)return t;const r=a(e,"w:trHeight");if(r?.attributes["w:val"]){const i=parseInt(r.attributes["w:val"]),o=z(i);t.rowHeight=`${o}px`}return t}function rt(n){const t={colspan:1,rowspan:1,colwidth:null},e=a(n,"w:tcPr");if(!e)return t;const r=a(e,"w:gridSpan");r?.attributes["w:val"]&&(t.colspan=parseInt(r.attributes["w:val"])),a(e,"w:vMerge")?.attributes["w:val"]==="continue"&&(t.rowspan=0);const i=a(e,"w:tcW");if(i?.attributes["w:w"]){const l=parseInt(i.attributes["w:w"]),u=z(l);t.colwidth=[u]}const o=a(e,"w:shd");o?.attributes["w:fill"]&&(t.backgroundColor=`#${o.attributes["w:fill"]}`);const s=a(e,"w:vAlign");s?.attributes["w:val"]&&(t.verticalAlign=s.attributes["w:val"]);const c=a(e,"w:tcBorders");if(c){const l=E(a(c,"w:top"));l&&(t.borderTop=l);const u=E(a(c,"w:bottom"));u&&(t.borderBottom=u);const p=E(a(c,"w:left"));p&&(t.borderLeft=p);const w=E(a(c,"w:right"));w&&(t.borderRight=w)}return t}function Xt(n){return n.name==="w:tbl"}async function it(n,t){const e=[];for(const s of n.children)s.type==="element"&&s.name==="w:tr"&&e.push(s);const r=new Map,i=await Promise.all(e.map((s,c)=>Kt(s,{context:t.context,activeRowspans:r,rows:e,rowIndex:c}))),o=Ht(n);return{type:"table",...o&&{attrs:o},content:i}}async function Kt(n,t){const e=[];let r=0;const i=Ut(n);for(const o of n.children){if(o.type!=="element"||o.name!=="w:tc")continue;const s=t.activeRowspans.get(r);if(s&&s>0){t.activeRowspans.set(r,s-1),r++;continue}let c=rt(o);if(c?.rowspan===1){const u=qt({rows:t.rows,rowIndex:t.rowIndex,colIndex:r});u>1&&(c={...c,rowspan:u})}if(c?.rowspan&&c.rowspan>1&&t.activeRowspans.set(r,c.rowspan-1),c?.rowspan===0){r++;continue}const l=await Gt(o,t);e.push({type:"tableCell",...c&&{attrs:c},content:l}),r+=c?.colspan||1}return{type:"tableRow",...i&&{attrs:i},content:e}}function qt(n){let t=1,e=n.colIndex;for(let r=n.rowIndex+1;r<n.rows.length;r++){const i=n.rows[r];let o=!1;for(const s of i.children){if(s.type!=="element"||s.name!=="w:tc")continue;const c=rt(s),l=c?.colspan||1;if(e>=0&&e<l){if(c?.rowspan===0)t++,o=!0;else return t;break}e-=l}if(!o)break}return t}async function Gt(n,t){const e=[];for(const r of n.children)if(r.type==="element"&&r.name==="w:p"){const i=await $(r,t);Array.isArray(i)?e.push(...i):e.push(i)}return e.length?e:[{type:"paragraph",content:[]}]}const ot="\u2610",H="\u2611";function st(n){const t=a(n,"w:r");if(!t)return null;const e=a(t,"w:t");if(!e)return null;const r=e.children.find(i=>i.type==="text");return r?.value&&r||null}function U(n){const t=st(n);if(!t)return!1;const e=t.value;return e.startsWith(ot)||e.startsWith(H)}function at(n){return st(n)?.value.startsWith(H)||!1}async function ct(n,t){return{type:"taskItem",attrs:{checked:at(n)},content:[await Vt(n,t)]}}async function lt(n,t){const{siblings:e,index:r,processedIndices:i}=t,o=[];let s=r;for(;s<e.length;){const c=e[s];if(c.name!=="w:p"||!U(c))break;i.add(s);const l=await ct(c,{context:t.context,styleInfo:t.styleInfo});o.push(l),s++}return{type:"taskList",content:o}}async function Vt(n,t){const{context:e,styleInfo:r}=t,i=await D(n,{context:e,styleInfo:r});if(i.length>0&&i[0].type==="text"){const s=i[0],c=s.text;if(c.startsWith(ot)||c.startsWith(H)){const l=c.substring(2).trimStart();l?s.text=l:i.shift()}}const o=j(n);return{type:"paragraph",...o&&{attrs:o},content:i.length?i:void 0}}function ut(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e===N.CODE_BLOCK||e?.startsWith(N.CODE_PREFIX)||!1}function ft(n){const t=a(n,"w:pPr"),e=(t&&a(t,"w:pStyle"))?.attributes["w:val"];return e?.startsWith(N.CODE_BLOCK)&&e.replace(N.CODE_BLOCK,"").toLowerCase()||void 0}function X(n){const t=a(n,"w:pPr");return!!t&&a(t,"w:numPr")!==void 0}function K(n){const t=a(n,"w:pPr"),e=t&&a(t,"w:numPr");if(!e)return null;const r=a(e,"w:ilvl"),i=a(e,"w:numId");return!r||!i?null:{numId:i.attributes["w:val"],level:parseInt(r.attributes["w:val"]||"0",10)}}function pt(n){const t=a(n,"w:r");if(!t)return!1;let e=!1,r=!1;for(const i of t.children)i.type==="element"&&(i.name==="w:br"&&i.attributes["w:type"]==="page"?e=!0:i.name==="w:t"?i.children.find(o=>o.type==="text")?.value?.trim().length&&(r=!0):i.name!=="w:rPr"&&(r=!0));return e&&!r}async function Jt(n,t={}){const e=await dt(n),r=wt(e),i=zt(r),o=await Dt(r,t.image?.handler),s=r["word/document.xml"];if(!s)throw new Error("Invalid DOCX file: missing word/document.xml");const c=F(new TextDecoder().decode(s)),l=Wt(r),u=_t(r),p={...t,hyperlinks:i,images:o,listTypeMap:l,styleMap:u};return await Qt(c,{context:p})}async function Qt(n,t){if(n.type!=="root")return{type:"doc",content:[]};const e=a(n,"w:document");if(!e)return{type:"doc",content:[]};const r=a(e,"w:body");return r?{type:"doc",content:await Yt(r.children.filter(i=>i.type==="element"),t)}:{type:"doc",content:[]}}async function Yt(n,t){const e=[],r=new Set;for(let i=0;i<n.length;i++){if(r.has(i))continue;const o=n[i];if(t.context.ignoreEmptyParagraphs&&o.name==="w:p"&&re(o))continue;const s=await Zt(o,n,i,t,r);Array.isArray(s)?e.push(...s):s&&e.push(s)}return e}async function Zt(n,t,e,r,i){switch(n.name){case"w:tbl":return await it(n,r);case"w:p":return ut(n)?await te(n):U(n)?await lt(n,{...r,siblings:t,index:e,processedIndices:i}):X(n)?await ee(n,t,e,r,i):pt(n)?{type:"horizontalRule"}:await $(n,r);default:return null}}async function te(n){const t=ft(n),e=ne(n);return{type:"codeBlock",...t&&{attrs:{language:t}},content:e}}async function ee(n,t,e,r,i){const o=K(n);if(!o)return await $(n,r);const s=r.context.listTypeMap.get(o.numId),c=s?.type||"bullet",l=[];let u=e;for(;u<t.length;){const w=t[u];if(w.name!=="w:p"||!X(w))break;const f=K(w);if(!f||f.numId!==o.numId)break;i.add(u);const d=await $(w,r),h=Array.isArray(d)?d[0]:d;l.push({type:"listItem",content:[h]}),u++}const p={type:c==="bullet"?"bulletList":"orderedList",content:l};return c==="ordered"&&(p.attrs={type:null,...s?.start!==void 0&&{start:s.start}}),p}function ne(n){const t=[],e=x(n,"w:r");for(const r of e){const i=a(r,"w:t");if(!i)continue;const o=i.children.find(s=>s.type==="text");o&&"value"in o&&o.value&&t.push({type:"text",text:o.value})}return t}function re(n){const t=x(n,"w:r");for(const e of t){const r=a(e,"w:t");if(r){const o=r.children.find(s=>s.type==="text");if(o&&"value"in o&&o.value&&o.value.trim().length>0)return!1}if(a(e,"w:drawing")||a(e,"mc:AlternateContent")||a(e,"w:pict"))return!1;const i=a(e,"w:br");if(i&&i.attributes["w:type"]==="page")return!1}return!0}export{$ as convertParagraph,it as convertTable,ct as convertTaskItem,lt as convertTaskList,j as extractAlignment,_ as extractMarks,D as extractRuns,ft as getCodeBlockLanguage,K as getListInfo,at as getTaskItemChecked,ut as isCodeBlock,pt as isHorizontalRule,X as isListItem,Xt as isTable,U as isTaskItem,Jt as parseDOCX};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@docen/import-docx",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.10",
|
|
4
4
|
"description": "A powerful TipTap/ProseMirror extension that imports Microsoft Word DOCX files to editor content",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"converter",
|
|
@@ -50,9 +50,10 @@
|
|
|
50
50
|
"xast-util-from-xml": "4.0.0"
|
|
51
51
|
},
|
|
52
52
|
"devDependencies": {
|
|
53
|
-
"@tiptap/core": "3.
|
|
53
|
+
"@tiptap/core": "3.19.0",
|
|
54
54
|
"@types/xast": "2.0.4",
|
|
55
|
-
"@docen/extensions": "0.0.
|
|
55
|
+
"@docen/extensions": "0.0.10",
|
|
56
|
+
"@docen/utils": "0.0.10"
|
|
56
57
|
},
|
|
57
58
|
"peerDependencies": {
|
|
58
59
|
"@napi-rs/canvas": "^0.1.88"
|
|
@@ -63,7 +64,7 @@
|
|
|
63
64
|
}
|
|
64
65
|
},
|
|
65
66
|
"optionalDependencies": {
|
|
66
|
-
"@napi-rs/canvas": "^0.1.
|
|
67
|
+
"@napi-rs/canvas": "^0.1.91"
|
|
67
68
|
},
|
|
68
69
|
"scripts": {
|
|
69
70
|
"dev": "unbuild --stub",
|
package/dist/chunks/index.cjs
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"use strict";const index=require("../index.cjs");require("xast-util-from-xml"),require("fflate"),require("undio"),require("image-meta"),exports.convertParagraph=index.convertParagraph,exports.convertTable=index.convertTable,exports.convertTaskItem=index.convertTaskItem,exports.extractAlignment=index.extractAlignment,exports.extractMarks=index.extractMarks,exports.extractRuns=index.extractRuns,exports.getCodeBlockLanguage=index.getCodeBlockLanguage,exports.getListInfo=index.getListInfo,exports.getTaskItemChecked=index.getTaskItemChecked,exports.isCodeBlock=index.isCodeBlock,exports.isHorizontalRule=index.isHorizontalRule,exports.isListItem=index.isListItem,exports.isTable=index.isTable,exports.isTaskItem=index.isTaskItem;
|
package/dist/chunks/index.mjs
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export{convertParagraph,convertTable,convertTaskItem,extractAlignment,extractMarks,extractRuns,getCodeBlockLanguage,getListInfo,getTaskItemChecked,isCodeBlock,isHorizontalRule,isListItem,isTable,isTaskItem}from"../index.mjs";import"xast-util-from-xml";import"fflate";import"undio";import"image-meta";
|