@wonderwhy-er/desktop-commander 0.2.34 → 0.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/dist/tools/docx/builders/html-builder.d.ts +17 -0
  2. package/dist/tools/docx/builders/html-builder.js +92 -0
  3. package/dist/tools/docx/builders/index.d.ts +5 -0
  4. package/dist/tools/docx/builders/index.js +5 -0
  5. package/dist/tools/docx/builders/markdown-builder.d.ts +2 -0
  6. package/dist/tools/docx/builders/markdown-builder.js +260 -0
  7. package/dist/tools/docx/constants.d.ts +36 -0
  8. package/dist/tools/docx/constants.js +57 -0
  9. package/dist/tools/docx/converters/markdown-to-html.d.ts +17 -0
  10. package/dist/tools/docx/converters/markdown-to-html.js +111 -0
  11. package/dist/tools/docx/errors.d.ts +28 -0
  12. package/dist/tools/docx/errors.js +48 -0
  13. package/dist/tools/docx/extractors/images.d.ts +14 -0
  14. package/dist/tools/docx/extractors/images.js +40 -0
  15. package/dist/tools/docx/extractors/metadata.d.ts +14 -0
  16. package/dist/tools/docx/extractors/metadata.js +64 -0
  17. package/dist/tools/docx/extractors/sections.d.ts +14 -0
  18. package/dist/tools/docx/extractors/sections.js +61 -0
  19. package/dist/tools/docx/html.d.ts +17 -0
  20. package/dist/tools/docx/html.js +111 -0
  21. package/dist/tools/docx/index.d.ts +14 -0
  22. package/dist/tools/docx/index.js +16 -0
  23. package/dist/tools/docx/markdown.d.ts +84 -0
  24. package/dist/tools/docx/markdown.js +507 -0
  25. package/dist/tools/docx/operations/handlers/index.d.ts +39 -0
  26. package/dist/tools/docx/operations/handlers/index.js +152 -0
  27. package/dist/tools/docx/operations/html-manipulator.d.ts +24 -0
  28. package/dist/tools/docx/operations/html-manipulator.js +352 -0
  29. package/dist/tools/docx/operations/index.d.ts +14 -0
  30. package/dist/tools/docx/operations/index.js +61 -0
  31. package/dist/tools/docx/operations/operation-handlers.d.ts +3 -0
  32. package/dist/tools/docx/operations/operation-handlers.js +67 -0
  33. package/dist/tools/docx/operations/preprocessor.d.ts +14 -0
  34. package/dist/tools/docx/operations/preprocessor.js +44 -0
  35. package/dist/tools/docx/operations/xml-replacer.d.ts +9 -0
  36. package/dist/tools/docx/operations/xml-replacer.js +35 -0
  37. package/dist/tools/docx/operations.d.ts +13 -0
  38. package/dist/tools/docx/operations.js +13 -0
  39. package/dist/tools/docx/parsers/image-extractor.d.ts +18 -0
  40. package/dist/tools/docx/parsers/image-extractor.js +61 -0
  41. package/dist/tools/docx/parsers/index.d.ts +9 -0
  42. package/dist/tools/docx/parsers/index.js +9 -0
  43. package/dist/tools/docx/parsers/paragraph-parser.d.ts +2 -0
  44. package/dist/tools/docx/parsers/paragraph-parser.js +88 -0
  45. package/dist/tools/docx/parsers/table-parser.d.ts +9 -0
  46. package/dist/tools/docx/parsers/table-parser.js +72 -0
  47. package/dist/tools/docx/parsers/xml-parser.d.ts +25 -0
  48. package/dist/tools/docx/parsers/xml-parser.js +71 -0
  49. package/dist/tools/docx/parsers/zip-reader.d.ts +23 -0
  50. package/dist/tools/docx/parsers/zip-reader.js +52 -0
  51. package/dist/tools/docx/structure.d.ts +25 -0
  52. package/dist/tools/docx/structure.js +102 -0
  53. package/dist/tools/docx/styled-html-parser.d.ts +23 -0
  54. package/dist/tools/docx/styled-html-parser.js +1262 -0
  55. package/dist/tools/docx/types.d.ts +114 -0
  56. package/dist/tools/docx/types.js +8 -0
  57. package/dist/tools/docx/utils/escaping.d.ts +13 -0
  58. package/dist/tools/docx/utils/escaping.js +26 -0
  59. package/dist/tools/docx/utils/images.d.ts +9 -0
  60. package/dist/tools/docx/utils/images.js +26 -0
  61. package/dist/tools/docx/utils/index.d.ts +12 -0
  62. package/dist/tools/docx/utils/index.js +17 -0
  63. package/dist/tools/docx/utils/markdown.d.ts +13 -0
  64. package/dist/tools/docx/utils/markdown.js +32 -0
  65. package/dist/tools/docx/utils/paths.d.ts +15 -0
  66. package/dist/tools/docx/utils/paths.js +27 -0
  67. package/dist/tools/docx/utils/versioning.d.ts +25 -0
  68. package/dist/tools/docx/utils/versioning.js +55 -0
  69. package/dist/tools/docx/utils.d.ts +101 -0
  70. package/dist/tools/docx/utils.js +299 -0
  71. package/dist/tools/docx/validators.d.ts +13 -0
  72. package/dist/tools/docx/validators.js +40 -0
  73. package/dist/utils/capture.js +4 -4
  74. package/dist/utils/files/docx.d.ts +41 -0
  75. package/dist/utils/files/docx.js +245 -0
  76. package/dist/version.d.ts +1 -1
  77. package/dist/version.js +1 -1
  78. package/package.json +1 -1
@@ -0,0 +1,48 @@
1
+ /**
2
+ * DOCX Error Handling
3
+ *
4
+ * Centralised error class and async error-wrapping utility.
5
+ *
6
+ * @module docx/errors
7
+ */
8
+ export class DocxError extends Error {
9
+ constructor(message, code, context) {
10
+ super(message);
11
+ this.code = code;
12
+ this.context = context;
13
+ this.name = 'DocxError';
14
+ Error.captureStackTrace?.(this, DocxError);
15
+ }
16
+ toJSON() {
17
+ return { name: this.name, message: this.message, code: this.code, context: this.context };
18
+ }
19
+ }
20
+ export var DocxErrorCode;
21
+ (function (DocxErrorCode) {
22
+ DocxErrorCode["INVALID_DOCX"] = "INVALID_DOCX";
23
+ DocxErrorCode["INVALID_PATH"] = "INVALID_PATH";
24
+ DocxErrorCode["OPERATION_FAILED"] = "OPERATION_FAILED";
25
+ DocxErrorCode["UNKNOWN_OPERATION"] = "UNKNOWN_OPERATION";
26
+ DocxErrorCode["UNSUPPORTED_OPERATION"] = "UNSUPPORTED_OPERATION";
27
+ DocxErrorCode["DOCX_CREATE_FAILED"] = "DOCX_CREATE_FAILED";
28
+ DocxErrorCode["DOCX_EDIT_FAILED"] = "DOCX_EDIT_FAILED";
29
+ DocxErrorCode["DOCX_READ_FAILED"] = "DOCX_READ_FAILED";
30
+ DocxErrorCode["INVALID_IMAGE_FILE"] = "INVALID_IMAGE_FILE";
31
+ DocxErrorCode["INVALID_IMAGE_DATA_URL"] = "INVALID_IMAGE_DATA_URL";
32
+ DocxErrorCode["GET_INFO_FAILED"] = "GET_INFO_FAILED";
33
+ })(DocxErrorCode || (DocxErrorCode = {}));
34
+ /** Wrap an async operation — re-throws existing DocxErrors, wraps everything else. */
35
+ export async function withErrorContext(operation, errorCode, context) {
36
+ try {
37
+ return await operation();
38
+ }
39
+ catch (error) {
40
+ if (error instanceof DocxError)
41
+ throw error;
42
+ const message = error instanceof Error ? error.message : String(error);
43
+ throw new DocxError(message, errorCode, {
44
+ ...context,
45
+ originalError: error instanceof Error ? error.stack : String(error),
46
+ });
47
+ }
48
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * DOCX Image Extractor (Mammoth Fallback)
3
+ *
4
+ * Extracts images from HTML generated by mammoth.js fallback.
5
+ * Only used when styled parser is not available.
6
+ *
7
+ * @module docx/extractors/images
8
+ */
9
+ import type { DocxImage } from '../types.js';
10
+ /**
11
+ * Extract base64-encoded images from HTML (mammoth.js fallback only).
12
+ * Returns empty array if parsing fails.
13
+ */
14
+ export declare function extractImagesFromHtml(html: string): DocxImage[];
@@ -0,0 +1,40 @@
1
+ /**
2
+ * DOCX Image Extractor (Mammoth Fallback)
3
+ *
4
+ * Extracts images from HTML generated by mammoth.js fallback.
5
+ * Only used when styled parser is not available.
6
+ *
7
+ * @module docx/extractors/images
8
+ */
9
+ import { createRequire } from 'module';
10
+ const require = createRequire(import.meta.url);
11
+ const { DOMParser } = require('@xmldom/xmldom');
12
+ /**
13
+ * Extract base64-encoded images from HTML (mammoth.js fallback only).
14
+ * Returns empty array if parsing fails.
15
+ */
16
+ export function extractImagesFromHtml(html) {
17
+ const images = [];
18
+ try {
19
+ const doc = new DOMParser().parseFromString(html, 'text/html');
20
+ const imgElements = doc.getElementsByTagName('img');
21
+ for (let i = 0; i < imgElements.length; i++) {
22
+ const src = imgElements[i].getAttribute('src') || '';
23
+ const alt = imgElements[i].getAttribute('alt') || '';
24
+ const match = src.match(/^data:([^;]+);base64,(.+)$/);
25
+ if (match) {
26
+ images.push({
27
+ id: `img_${i}`,
28
+ data: match[2],
29
+ mimeType: match[1],
30
+ altText: alt || undefined,
31
+ originalSize: Buffer.from(match[2], 'base64').length,
32
+ });
33
+ }
34
+ }
35
+ }
36
+ catch {
37
+ // Non-critical
38
+ }
39
+ return images;
40
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * DOCX Metadata Extractor
3
+ *
4
+ * Extracts document metadata (title, author, dates, etc.) from DOCX core properties.
5
+ * Follows Single Responsibility Principle — only handles metadata extraction.
6
+ *
7
+ * @module docx/extractors/metadata
8
+ */
9
+ import type { DocxMetadata } from '../types.js';
10
+ /**
11
+ * Extract metadata from a DOCX buffer.
12
+ * Returns minimal metadata if extraction fails (non-critical operation).
13
+ */
14
+ export declare function extractDocxMetadata(buffer: Buffer, fileSize?: number): Promise<DocxMetadata>;
@@ -0,0 +1,64 @@
1
+ /**
2
+ * DOCX Metadata Extractor
3
+ *
4
+ * Extracts document metadata (title, author, dates, etc.) from DOCX core properties.
5
+ * Follows Single Responsibility Principle — only handles metadata extraction.
6
+ *
7
+ * @module docx/extractors/metadata
8
+ */
9
+ import { createRequire } from 'module';
10
+ import { CORE_PROPERTIES_PATH, DOCX_NAMESPACES } from '../constants.js';
11
+ const require = createRequire(import.meta.url);
12
+ const { DOMParser } = require('@xmldom/xmldom');
13
+ /**
14
+ * Extract metadata from a DOCX buffer.
15
+ * Returns minimal metadata if extraction fails (non-critical operation).
16
+ */
17
+ export async function extractDocxMetadata(buffer, fileSize) {
18
+ const metadata = { fileSize };
19
+ try {
20
+ const JSZip = require('jszip');
21
+ const zip = await JSZip.loadAsync(buffer);
22
+ const corePropsFile = zip.file(CORE_PROPERTIES_PATH);
23
+ if (!corePropsFile)
24
+ return metadata;
25
+ const corePropsXml = await corePropsFile.async('string');
26
+ const doc = new DOMParser().parseFromString(corePropsXml, 'application/xml');
27
+ /** Extract text content from a namespaced tag. */
28
+ const getText = (tag, nsList = [DOCX_NAMESPACES.DUBLIN_CORE, DOCX_NAMESPACES.CUSTOM_PROPERTIES]) => {
29
+ for (const ns of nsList) {
30
+ const els = doc.getElementsByTagName(`${ns}:${tag}`);
31
+ if (els.length > 0 && els[0].textContent) {
32
+ const text = els[0].textContent.trim();
33
+ return text || undefined;
34
+ }
35
+ }
36
+ return undefined;
37
+ };
38
+ /** Extract date from a DCTERMS namespaced tag. */
39
+ const getDate = (tag) => {
40
+ const els = doc.getElementsByTagName(`${DOCX_NAMESPACES.DCTERMS}:${tag}`);
41
+ if (els.length > 0 && els[0].textContent) {
42
+ const dateStr = els[0].textContent.trim();
43
+ if (dateStr) {
44
+ const d = new Date(dateStr);
45
+ if (!isNaN(d.getTime()))
46
+ return d;
47
+ }
48
+ }
49
+ return undefined;
50
+ };
51
+ metadata.title = getText('title');
52
+ metadata.author = getText('creator');
53
+ metadata.subject = getText('subject');
54
+ metadata.description = getText('description');
55
+ metadata.lastModifiedBy = getText('lastModifiedBy', [DOCX_NAMESPACES.CUSTOM_PROPERTIES]);
56
+ metadata.revision = getText('revision', [DOCX_NAMESPACES.CUSTOM_PROPERTIES]);
57
+ metadata.creationDate = getDate('created');
58
+ metadata.modificationDate = getDate('modified');
59
+ }
60
+ catch {
61
+ // Non-critical — return metadata with fileSize only
62
+ }
63
+ return metadata;
64
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * DOCX Section Parser
3
+ *
4
+ * Parses HTML into structured sections (headings, paragraphs, tables, lists, images).
5
+ * Follows Single Responsibility Principle — only handles section parsing.
6
+ *
7
+ * @module docx/extractors/sections
8
+ */
9
+ import type { DocxSection } from '../types.js';
10
+ /**
11
+ * Parse HTML into structured sections.
12
+ * Returns a single paragraph section if parsing fails.
13
+ */
14
+ export declare function parseHtmlIntoSections(html: string): DocxSection[];
@@ -0,0 +1,61 @@
1
+ /**
2
+ * DOCX Section Parser
3
+ *
4
+ * Parses HTML into structured sections (headings, paragraphs, tables, lists, images).
5
+ * Follows Single Responsibility Principle — only handles section parsing.
6
+ *
7
+ * @module docx/extractors/sections
8
+ */
9
+ import { createRequire } from 'module';
10
+ const require = createRequire(import.meta.url);
11
+ const { DOMParser } = require('@xmldom/xmldom');
12
+ /**
13
+ * Parse HTML into structured sections.
14
+ * Returns a single paragraph section if parsing fails.
15
+ */
16
+ export function parseHtmlIntoSections(html) {
17
+ const sections = [];
18
+ try {
19
+ const doc = new DOMParser().parseFromString(html, 'text/html');
20
+ const body = doc.getElementsByTagName('body')[0];
21
+ if (!body) {
22
+ sections.push({ type: 'paragraph', content: html });
23
+ return sections;
24
+ }
25
+ for (let i = 0; i < body.childNodes.length; i++) {
26
+ const child = body.childNodes[i];
27
+ if (child.nodeType !== 1)
28
+ continue;
29
+ const element = child;
30
+ const tag = element.tagName.toLowerCase();
31
+ const content = element.outerHTML || element.innerHTML;
32
+ // Heading detection
33
+ const headingMatch = tag.match(/^h([1-6])$/);
34
+ if (headingMatch) {
35
+ sections.push({ type: 'heading', level: parseInt(headingMatch[1], 10), content });
36
+ continue;
37
+ }
38
+ // Other element types
39
+ switch (tag) {
40
+ case 'img':
41
+ sections.push({ type: 'image', content });
42
+ break;
43
+ case 'table':
44
+ sections.push({ type: 'table', content });
45
+ break;
46
+ case 'ul':
47
+ case 'ol':
48
+ sections.push({ type: 'list', content });
49
+ break;
50
+ case 'p':
51
+ case 'div':
52
+ sections.push({ type: 'paragraph', content });
53
+ break;
54
+ }
55
+ }
56
+ }
57
+ catch {
58
+ sections.push({ type: 'paragraph', content: html });
59
+ }
60
+ return sections;
61
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * DOCX → HTML Conversion
3
+ *
4
+ * Primary: Direct DOCX XML parsing (`styled-html-parser`) — preserves inline styles
5
+ * (font colours, sizes, families, alignment, highlights, etc.)
6
+ * Fallback: mammoth.js — semantic-only conversion, strips visual styles.
7
+ *
8
+ * @module docx/html
9
+ */
10
+ import type { DocxParseResult, DocxParseOptions } from './types.js';
11
+ /**
12
+ * Parse a DOCX file to styled HTML.
13
+ *
14
+ * Uses direct XML parsing when `preserveFormatting` is true (default).
15
+ * Falls back to mammoth.js if direct parsing fails or a custom `styleMap` is provided.
16
+ */
17
+ export declare function parseDocxToHtml(source: string, options?: DocxParseOptions): Promise<DocxParseResult>;
@@ -0,0 +1,111 @@
1
+ /**
2
+ * DOCX → HTML Conversion
3
+ *
4
+ * Primary: Direct DOCX XML parsing (`styled-html-parser`) — preserves inline styles
5
+ * (font colours, sizes, families, alignment, highlights, etc.)
6
+ * Fallback: mammoth.js — semantic-only conversion, strips visual styles.
7
+ *
8
+ * @module docx/html
9
+ */
10
+ import fs from 'fs/promises';
11
+ import { createRequire } from 'module';
12
+ import { DocxError, DocxErrorCode, withErrorContext } from './errors.js';
13
+ import { DEFAULT_CONVERSION_OPTIONS } from './constants.js';
14
+ import { isUrl } from './utils/paths.js';
15
+ import { convertDocxToStyledHtml } from './styled-html-parser.js';
16
+ import { extractDocxMetadata } from './extractors/metadata.js';
17
+ import { parseHtmlIntoSections } from './extractors/sections.js';
18
+ import { extractImagesFromHtml } from './extractors/images.js';
19
+ const require = createRequire(import.meta.url);
20
+ const mammoth = require('mammoth');
21
+ // ─── Public API ──────────────────────────────────────────────────────────────
22
+ /**
23
+ * Parse a DOCX file to styled HTML.
24
+ *
25
+ * Uses direct XML parsing when `preserveFormatting` is true (default).
26
+ * Falls back to mammoth.js if direct parsing fails or a custom `styleMap` is provided.
27
+ */
28
+ export async function parseDocxToHtml(source, options = {}) {
29
+ return withErrorContext(async () => {
30
+ const { includeImages = DEFAULT_CONVERSION_OPTIONS.includeImages, preserveFormatting = DEFAULT_CONVERSION_OPTIONS.preserveFormatting, styleMap = DEFAULT_CONVERSION_OPTIONS.styleMap, } = options;
31
+ const buffer = await loadDocxToBuffer(source);
32
+ let fileSize;
33
+ if (!isUrl(source)) {
34
+ try {
35
+ fileSize = (await fs.stat(source)).size;
36
+ }
37
+ catch { /* ignore */ }
38
+ }
39
+ const { html: rawHtml, images, documentDefaults } = await convertToHtml(buffer, includeImages, preserveFormatting, styleMap);
40
+ const metadata = await extractDocxMetadata(buffer, fileSize);
41
+ const html = postProcessHtml(rawHtml);
42
+ const sections = parseHtmlIntoSections(html);
43
+ return { html, metadata, images, sections, documentDefaults };
44
+ }, DocxErrorCode.DOCX_READ_FAILED, { path: source });
45
+ }
46
+ // ─── Buffer Loading ──────────────────────────────────────────────────────────
47
+ async function loadDocxToBuffer(source) {
48
+ return withErrorContext(async () => {
49
+ if (isUrl(source)) {
50
+ const response = await fetch(source);
51
+ if (!response.ok) {
52
+ throw new DocxError(`Failed to fetch DOCX from URL: ${response.statusText}`, DocxErrorCode.DOCX_READ_FAILED, { url: source, status: response.status });
53
+ }
54
+ return Buffer.from(await response.arrayBuffer());
55
+ }
56
+ return await fs.readFile(source);
57
+ }, DocxErrorCode.DOCX_READ_FAILED, { source });
58
+ }
59
+ // ─── Conversion Dispatch ─────────────────────────────────────────────────────
60
+ /**
61
+ * Pick the best converter: direct XML parser (preserves styles) or mammoth.js (semantic only).
62
+ */
63
+ async function convertToHtml(buffer, includeImages, preserveFormatting, styleMap) {
64
+ // Use the styled XML parser when no custom styleMap is provided and formatting is requested
65
+ if (preserveFormatting && styleMap.length === 0) {
66
+ try {
67
+ return await convertDocxToStyledHtml(buffer, includeImages);
68
+ }
69
+ catch {
70
+ // Fall through to mammoth.js fallback
71
+ }
72
+ }
73
+ const mammothResult = await convertWithMammoth(buffer, includeImages, styleMap, preserveFormatting);
74
+ return { ...mammothResult, documentDefaults: undefined };
75
+ }
76
+ /** Fallback: mammoth.js (semantic-only — strips visual styles). */
77
+ async function convertWithMammoth(buffer, includeImages, styleMap, preserveFormatting) {
78
+ const mammothOptions = {};
79
+ if (includeImages) {
80
+ mammothOptions.convertImage = mammoth.images.imgElement((image) => image.read('base64').then((base64Data) => ({
81
+ src: `data:${image.contentType};base64,${base64Data}`,
82
+ })));
83
+ }
84
+ if (styleMap.length > 0) {
85
+ mammothOptions.styleMap = [...styleMap];
86
+ }
87
+ else if (preserveFormatting) {
88
+ mammothOptions.styleMap = [
89
+ "p[style-name='Heading 1'] => h1:fresh",
90
+ "p[style-name='Heading 2'] => h2:fresh",
91
+ "p[style-name='Heading 3'] => h3:fresh",
92
+ "p[style-name='Heading 4'] => h4:fresh",
93
+ "p[style-name='Heading 5'] => h5:fresh",
94
+ "p[style-name='Heading 6'] => h6:fresh",
95
+ "p[style-name='Title'] => h1:fresh",
96
+ "p[style-name='Subtitle'] => h2:fresh",
97
+ "p[style-name='Quote'] => blockquote:fresh",
98
+ "r[style-name='Strong'] => strong",
99
+ "r[style-name='Emphasis'] => em",
100
+ ];
101
+ }
102
+ const result = await mammoth.convertToHtml({ buffer }, mammothOptions);
103
+ const html = result.value;
104
+ const images = extractImagesFromHtml(html);
105
+ return { html, images };
106
+ }
107
+ // ─── Post-Processing ─────────────────────────────────────────────────────────
108
+ /** Minimal whitespace cleanup — preserves all inline style attributes. */
109
+ function postProcessHtml(html) {
110
+ return html.replace(/>\s{2,}</g, '>\n<').trim();
111
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * DOCX Operations Library — Public API
3
+ *
4
+ * Re-exports only the symbols that external consumers need.
5
+ * Internal modules (styled-html-parser, validators, converters, etc.)
6
+ * are consumed by sibling files and are NOT part of the public surface.
7
+ *
8
+ * @module docx
9
+ */
10
+ export { parseDocxToHtml } from './html.js';
11
+ export { createDocxFromHtml } from './builders/html-builder.js';
12
+ export { editDocxWithOperations } from './operations/index.js';
13
+ export type { DocxParseResult, DocxMetadata, DocxImage, DocxSection, DocxOperation, DocxDocumentDefaults, } from './types.js';
14
+ export { DocxError, DocxErrorCode } from './errors.js';
@@ -0,0 +1,16 @@
1
+ /**
2
+ * DOCX Operations Library — Public API
3
+ *
4
+ * Re-exports only the symbols that external consumers need.
5
+ * Internal modules (styled-html-parser, validators, converters, etc.)
6
+ * are consumed by sibling files and are NOT part of the public surface.
7
+ *
8
+ * @module docx
9
+ */
10
+ // ── Reading ─────────────────────────────────────────────────────────────────
11
+ export { parseDocxToHtml } from './html.js';
12
+ // ── Writing / Editing ───────────────────────────────────────────────────────
13
+ export { createDocxFromHtml } from './builders/html-builder.js';
14
+ export { editDocxWithOperations } from './operations/index.js';
15
+ // ── Errors ──────────────────────────────────────────────────────────────────
16
+ export { DocxError, DocxErrorCode } from './errors.js';
@@ -0,0 +1,84 @@
1
+ /**
2
+ * DOCX to Markdown Conversion
3
+ * Uses Docxtemplater + XML parsing for reading Word documents
4
+ */
5
+ /**
6
+ * DOCX metadata structure
7
+ */
8
+ export interface DocxMetadata {
9
+ /** Document title from core properties */
10
+ title?: string;
11
+ /** Document author */
12
+ author?: string;
13
+ /** Document creator */
14
+ creator?: string;
15
+ /** Document subject */
16
+ subject?: string;
17
+ /** Document description */
18
+ description?: string;
19
+ /** Creation date */
20
+ creationDate?: Date;
21
+ /** Last modification date */
22
+ modificationDate?: Date;
23
+ /** Last modified by */
24
+ lastModifiedBy?: string;
25
+ /** Document revision number */
26
+ revision?: string;
27
+ /** File size in bytes */
28
+ fileSize?: number;
29
+ }
30
+ /**
31
+ * Embedded image information
32
+ */
33
+ export interface DocxImage {
34
+ /** Unique identifier for the image */
35
+ id: string;
36
+ /** Base64-encoded image data */
37
+ data: string;
38
+ /** MIME type (e.g., "image/png", "image/jpeg") */
39
+ mimeType: string;
40
+ /** Alt text if available */
41
+ altText?: string;
42
+ /** Original size in bytes */
43
+ originalSize?: number;
44
+ }
45
+ /**
46
+ * DOCX section/paragraph structure
47
+ */
48
+ export interface DocxSection {
49
+ /** Section type: heading, paragraph, list, table */
50
+ type: 'heading' | 'paragraph' | 'list' | 'table' | 'image';
51
+ /** Section content as markdown */
52
+ content: string;
53
+ /** Heading level if type is heading */
54
+ level?: number;
55
+ /** Associated images if any */
56
+ images?: DocxImage[];
57
+ }
58
+ /**
59
+ * Complete DOCX parse result
60
+ */
61
+ export interface DocxParseResult {
62
+ /** Document content as markdown */
63
+ markdown: string;
64
+ /** Document metadata */
65
+ metadata: DocxMetadata;
66
+ /** Extracted images */
67
+ images: DocxImage[];
68
+ /** Structured sections (optional, for advanced parsing) */
69
+ sections?: DocxSection[];
70
+ }
71
+ /**
72
+ * Convert DOCX to Markdown using Docxtemplater + XML parsing
73
+ * @param source Path to DOCX file or URL
74
+ * @param options Conversion options
75
+ * @returns Parsed DOCX result with markdown and metadata
76
+ */
77
+ export declare function parseDocxToMarkdown(source: string, options?: {
78
+ /** Extract images as base64 */
79
+ includeImages?: boolean;
80
+ /** Preserve inline formatting (bold, italic) */
81
+ preserveFormatting?: boolean;
82
+ /** Custom style mapping */
83
+ styleMap?: string[];
84
+ }): Promise<DocxParseResult>;