@origints/mammoth 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Franco Ponticelli
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,149 @@
1
+ # @origints/mammoth
2
+
3
+ > DOCX to HTML/text conversion for Origins using mammoth.js.
4
+
5
+ ---
6
+
7
+ ## Features
8
+
9
+ - Convert DOCX to semantic HTML
10
+ - Convert DOCX to plain text
11
+ - Custom style mapping for headings, lists, and more
12
+ - Configurable image handling
13
+ - Conversion warnings and messages
14
+ - Integrates with Origins transform registry
15
+
16
+ ---
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ npm install @origints/mammoth @origints/core
22
+ ```
23
+
24
+ ---
25
+
26
+ ## Usage with Planner
27
+
28
+ ### Convert a DOCX file and extract the HTML
29
+
30
+ ```ts
31
+ import { Planner, loadFile, run } from '@origints/core'
32
+ import { docxToHtml } from '@origints/mammoth'
33
+
34
+ const plan = new Planner()
35
+ .in(loadFile('document.docx'))
36
+ .mapIn(docxToHtml())
37
+ .emit((out, $) => out.add('html', $.get('html').string()))
38
+ .compile()
39
+
40
+ const result = await run(plan, { readFile, registry })
41
+ // result.value: { html: '<h1>Title</h1><p>Content...</p>' }
42
+ ```
43
+
44
+ ### Convert with custom style mapping
45
+
46
+ ```ts
47
+ import { docxToHtml } from '@origints/mammoth'
48
+
49
+ const plan = new Planner()
50
+ .in(loadFile('report.docx'))
51
+ .mapIn(
52
+ docxToHtml({
53
+ styleMap: [
54
+ "p[style-name='Title'] => h1.document-title",
55
+ "p[style-name='Heading 1'] => h1",
56
+ "p[style-name='Heading 2'] => h2",
57
+ "p[style-name='Quote'] => blockquote",
58
+ ],
59
+ idPrefix: 'doc-',
60
+ })
61
+ )
62
+ .emit((out, $) => out.add('content', $.get('html').string()))
63
+ .compile()
64
+ ```
65
+
66
+ ### Extract plain text from a DOCX file
67
+
68
+ ```ts
69
+ import { docxToText } from '@origints/mammoth'
70
+
71
+ const plan = new Planner()
72
+ .in(loadFile('document.docx'))
73
+ .mapIn(docxToText())
74
+ .emit((out, $) => out.add('text', $.get('text').string()))
75
+ .compile()
76
+
77
+ const result = await run(plan, { readFile, registry })
78
+ // result.value: { text: 'Document Title\nContent here...' }
79
+ ```
80
+
81
+ ### Combine DOCX with other sources
82
+
83
+ ```ts
84
+ const plan = new Planner()
85
+ .in(loadFile('report.docx'))
86
+ .mapIn(docxToHtml())
87
+ .emit((out, $) => out.add('reportHtml', $.get('html').string()))
88
+ .in(loadFile('metadata.json'))
89
+ .mapIn(parseJson())
90
+ .emit((out, $) =>
91
+ out
92
+ .add('author', $.get('author').string())
93
+ .add('date', $.get('date').string())
94
+ )
95
+ .compile()
96
+ ```
97
+
98
+ ### Standalone usage (without Planner)
99
+
100
+ ```ts
101
+ import * as fs from 'fs'
102
+ import { docxToHtmlImpl, docxToTextImpl } from '@origints/mammoth'
103
+
104
+ const buffer = fs.readFileSync('document.docx')
105
+
106
+ // Convert to HTML
107
+ const htmlResult = await docxToHtmlImpl.execute(buffer)
108
+ console.log(htmlResult.html)
109
+
110
+ // Log conversion warnings
111
+ for (const msg of htmlResult.messages) {
112
+ console.warn(msg.message)
113
+ }
114
+
115
+ // Convert to plain text
116
+ const textResult = await docxToTextImpl.execute(buffer)
117
+ console.log(textResult.text)
118
+ ```
119
+
120
+ ### Image handling
121
+
122
+ ```ts
123
+ import { docxToHtml } from '@origints/mammoth'
124
+
125
+ // Omit images
126
+ const plan = new Planner()
127
+ .in(loadFile('document.docx'))
128
+ .mapIn(docxToHtml({ imageHandling: 'omit' }))
129
+ .emit((out, $) => out.add('html', $.get('html').string()))
130
+ .compile()
131
+ ```
132
+
133
+ ---
134
+
135
+ ## API
136
+
137
+ | Export | Description |
138
+ | ------------------------------------- | -------------------------------------------------- |
139
+ | `docxToHtml(options?)` | Create a transform AST for HTML conversion |
140
+ | `docxToText(options?)` | Create a transform AST for text conversion |
141
+ | `docxToHtmlImpl` | Async transform implementation for HTML conversion |
142
+ | `docxToTextImpl` | Async transform implementation for text conversion |
143
+ | `registerMammothTransforms(registry)` | Register all mammoth transforms with a registry |
144
+
145
+ ---
146
+
147
+ ## License
148
+
149
+ MIT
package/dist/convert.d.ts CHANGED
@@ -5,15 +5,15 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
5
5
  *
6
6
  * @example
7
7
  * ```ts
8
- * const plan = Planner.in(loadFile('document.docx'))
8
+ * const plan = new Planner().in(loadFile('document.docx'))
9
9
  * .mapIn(docxToHtml())
10
- * .emit((out, $) => out.add('html', $.get('html').asString()))
10
+ * .emit((out, $) => out.add('html', $.get('html').string()))
11
11
  * .compile()
12
12
  * ```
13
13
  *
14
14
  * @example With custom style mapping
15
15
  * ```ts
16
- * const plan = Planner.in(loadFile('document.docx'))
16
+ * const plan = new Planner().in(loadFile('document.docx'))
17
17
  * .mapIn(docxToHtml({
18
18
  * styleMap: [
19
19
  * "p[style-name='Title'] => h1.document-title",
@@ -21,7 +21,7 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
21
21
  * ],
22
22
  * idPrefix: 'doc-',
23
23
  * }))
24
- * .emit((out, $) => out.add('content', $.get('html').asString()))
24
+ * .emit((out, $) => out.add('content', $.get('html').string()))
25
25
  * .compile()
26
26
  * ```
27
27
  */
@@ -31,9 +31,9 @@ export declare function docxToHtml(options?: DocxToHtmlOptions): TransformAst;
31
31
  *
32
32
  * @example
33
33
  * ```ts
34
- * const plan = Planner.in(loadFile('document.docx'))
34
+ * const plan = new Planner().in(loadFile('document.docx'))
35
35
  * .mapIn(docxToText())
36
- * .emit((out, $) => out.add('text', $.get('text').asString()))
36
+ * .emit((out, $) => out.add('text', $.get('text').string()))
37
37
  * .compile()
38
38
  * ```
39
39
  */
package/dist/index.cjs CHANGED
@@ -1,2 +1,2 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function u(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToHtml",args:g(e)}}function o(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToText",args:e}}const c={namespace:"@origints/mammoth",name:"docxToHtml",async execute(e,r){const t=y(r),a=await d(e),n=p(t),i=await m.convertToHtml({buffer:a},n);return{html:i.value,messages:i.messages.map(f)}}},l={namespace:"@origints/mammoth",name:"docxToText",async execute(e){const r=await d(e),t=await m.extractRawText({buffer:r});return{text:t.value,messages:t.messages.map(f)}}};function g(e){if(!e)return;const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),(e.imageHandling==="inline"||e.imageHandling==="omit")&&(r.imageHandling=e.imageHandling),e.preserveEmptyParagraphs!==void 0&&(r.preserveEmptyParagraphs=e.preserveEmptyParagraphs),Object.keys(r).length>0?r:void 0}function y(e){return e||{}}async function d(e){if(Buffer.isBuffer(e))return e;if(e instanceof ReadableStream)return s(e);throw new Error(`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`)}function p(e){const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),e.preserveEmptyParagraphs===!1?r.ignoreEmptyParagraphs=!0:e.preserveEmptyParagraphs===!0&&(r.ignoreEmptyParagraphs=!1),e.imageHandling&&(r.convertImage=x(e.imageHandling)),e.transformDocument&&(r.transformDocument=e.transformDocument),r}function x(e){return e==="omit"?m.images.imgElement(()=>Promise.resolve({src:""})):e==="inline"?m.images.imgElement(r=>r.readAsBase64String().then(t=>({src:`data:${r.contentType};base64,${t}`}))):m.images.imgElement(async r=>{const t={contentType:r.contentType,read:(n=>n==="base64"?r.readAsBase64String():r.readAsArrayBuffer())},a=await e(t);return a===null?{src:""}:a})}function f(e){return{type:e.type==="error"?"error":"warning",message:e.message}}function T(e){e.register(c),e.register(l)}exports.docxToHtml=u;exports.docxToHtmlImpl=c;exports.docxToText=o;exports.docxToTextImpl=l;exports.registerMammothTransforms=T;exports.streamToBuffer=s;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth"),o=require("@origints/core");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function g(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToHtml",args:p(e)}}function y(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToText",args:e}}const c={namespace:"@origints/mammoth",name:"docxToHtml",async execute(e,r){const t=x(r),a=await d(e),n=T(t),i=await m.convertToHtml({buffer:a},n);return{html:i.value,messages:i.messages.map(f)}}},l={namespace:"@origints/mammoth",name:"docxToText",async execute(e){const r=await d(e),t=await m.extractRawText({buffer:r});return{text:t.value,messages:t.messages.map(f)}}};function p(e){if(!e)return;const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),(e.imageHandling==="inline"||e.imageHandling==="omit")&&(r.imageHandling=e.imageHandling),e.preserveEmptyParagraphs!==void 0&&(r.preserveEmptyParagraphs=e.preserveEmptyParagraphs),Object.keys(r).length>0?r:void 0}function x(e){return e||{}}async function d(e){if(Buffer.isBuffer(e))return e;if(e instanceof ReadableStream)return s(e);throw new Error(`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`)}function T(e){const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),e.preserveEmptyParagraphs===!1?r.ignoreEmptyParagraphs=!0:e.preserveEmptyParagraphs===!0&&(r.ignoreEmptyParagraphs=!1),e.imageHandling&&(r.convertImage=M(e.imageHandling)),e.transformDocument&&(r.transformDocument=e.transformDocument),r}function M(e){return e==="omit"?m.images.imgElement(()=>Promise.resolve({src:""})):e==="inline"?m.images.imgElement(r=>r.readAsBase64String().then(t=>({src:`data:${r.contentType};base64,${t}`}))):m.images.imgElement(async r=>{const t={contentType:r.contentType,read:(n=>n==="base64"?r.readAsBase64String():r.readAsArrayBuffer())},a=await e(t);return a===null?{src:""}:a})}function f(e){return{type:e.type==="error"?"error":"warning",message:e.message}}function u(e){e.register(c),e.register(l)}u(o.globalRegistry);exports.docxToHtml=g;exports.docxToHtmlImpl=c;exports.docxToText=y;exports.docxToTextImpl=l;exports.registerMammothTransforms=u;exports.streamToBuffer=s;
2
2
  //# sourceMappingURL=index.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":"2GASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QACJC,EACAC,EAC+B,CAC/B,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAG9BA,EAAQ,gBAAkB,UAC1BA,EAAQ,gBAAkB,UAE1Be,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAAwD,CAChF,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC7OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC"}
1
+ {"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":"uIASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQC,EAAgBC,EAA+C,CAC3E,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAE5BA,EAAQ,gBAAkB,UAAYA,EAAQ,gBAAkB,UAClEe,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAGP,CACjB,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC5OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC,CAGAU,EAA0BE,gBAAc"}
package/dist/index.d.ts CHANGED
@@ -9,21 +9,18 @@
9
9
  * @example Basic usage
10
10
  * ```ts
11
11
  * import { Planner, loadFile } from '@origints/core'
12
- * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'
13
- *
14
- * // Register transforms
15
- * registerMammothTransforms(globalRegistry)
12
+ * import { docxToHtml } from '@origints/mammoth'
16
13
  *
17
14
  * // Create a plan
18
- * const plan = Planner.in(loadFile('document.docx'))
15
+ * const plan = new Planner().in(loadFile('document.docx'))
19
16
  * .mapIn(docxToHtml())
20
- * .emit((out, $) => out.add('html', $.get('html').asString()))
17
+ * .emit((out, $) => out.add('html', $.get('html').string()))
21
18
  * .compile()
22
19
  * ```
23
20
  *
24
21
  * @example With custom style mapping
25
22
  * ```ts
26
- * const plan = Planner.in(loadFile('document.docx'))
23
+ * const plan = new Planner().in(loadFile('document.docx'))
27
24
  * .mapIn(docxToHtml({
28
25
  * styleMap: [
29
26
  * "p[style-name='Title'] => h1.document-title",
@@ -33,7 +30,7 @@
33
30
  * idPrefix: 'doc-',
34
31
  * imageHandling: 'omit',
35
32
  * }))
36
- * .emit((out, $) => out.add('content', $.get('html').asString()))
33
+ * .emit((out, $) => out.add('content', $.get('html').string()))
37
34
  * .compile()
38
35
  * ```
39
36
  */
package/dist/index.es.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import m from "mammoth";
2
+ import { globalRegistry as l } from "@origints/core";
2
3
  async function f(e) {
3
4
  const r = e.getReader(), a = [];
4
5
  try {
@@ -12,15 +13,15 @@ async function f(e) {
12
13
  r.releaseLock();
13
14
  }
14
15
  }
15
- function x(e) {
16
+ function T(e) {
16
17
  return {
17
18
  kind: "transform",
18
19
  namespace: "@origints/mammoth",
19
20
  name: "docxToHtml",
20
- args: u(e)
21
+ args: o(e)
21
22
  };
22
23
  }
23
- function h(e) {
24
+ function v(e) {
24
25
  return {
25
26
  kind: "transform",
26
27
  namespace: "@origints/mammoth",
@@ -28,19 +29,19 @@ function h(e) {
28
29
  args: e
29
30
  };
30
31
  }
31
- const l = {
32
+ const d = {
32
33
  namespace: "@origints/mammoth",
33
34
  name: "docxToHtml",
34
35
  async execute(e, r) {
35
- const a = o(
36
+ const a = g(
36
37
  r
37
- ), t = await s(e), n = g(a), i = await m.convertToHtml({ buffer: t }, n);
38
+ ), t = await s(e), n = y(a), i = await m.convertToHtml({ buffer: t }, n);
38
39
  return {
39
40
  html: i.value,
40
41
  messages: i.messages.map(c)
41
42
  };
42
43
  }
43
- }, d = {
44
+ }, u = {
44
45
  namespace: "@origints/mammoth",
45
46
  name: "docxToText",
46
47
  async execute(e) {
@@ -51,12 +52,12 @@ const l = {
51
52
  };
52
53
  }
53
54
  };
54
- function u(e) {
55
+ function o(e) {
55
56
  if (!e) return;
56
57
  const r = {};
57
58
  return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), (e.imageHandling === "inline" || e.imageHandling === "omit") && (r.imageHandling = e.imageHandling), e.preserveEmptyParagraphs !== void 0 && (r.preserveEmptyParagraphs = e.preserveEmptyParagraphs), Object.keys(r).length > 0 ? r : void 0;
58
59
  }
59
- function o(e) {
60
+ function g(e) {
60
61
  return e || {};
61
62
  }
62
63
  async function s(e) {
@@ -68,11 +69,11 @@ async function s(e) {
68
69
  `docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`
69
70
  );
70
71
  }
71
- function g(e) {
72
+ function y(e) {
72
73
  const r = {};
73
- return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage = y(e.imageHandling)), e.transformDocument && (r.transformDocument = e.transformDocument), r;
74
+ return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage = p(e.imageHandling)), e.transformDocument && (r.transformDocument = e.transformDocument), r;
74
75
  }
75
- function y(e) {
76
+ function p(e) {
76
77
  return e === "omit" ? m.images.imgElement(() => Promise.resolve({ src: "" })) : e === "inline" ? m.images.imgElement(
77
78
  (r) => r.readAsBase64String().then((a) => ({
78
79
  src: `data:${r.contentType};base64,${a}`
@@ -91,15 +92,16 @@ function c(e) {
91
92
  message: e.message
92
93
  };
93
94
  }
94
- function M(e) {
95
- e.register(l), e.register(d);
95
+ function x(e) {
96
+ e.register(d), e.register(u);
96
97
  }
98
+ x(l);
97
99
  export {
98
- x as docxToHtml,
99
- l as docxToHtmlImpl,
100
- h as docxToText,
101
- d as docxToTextImpl,
102
- M as registerMammothTransforms,
100
+ T as docxToHtml,
101
+ d as docxToHtmlImpl,
102
+ v as docxToText,
103
+ u as docxToTextImpl,
104
+ x as registerMammothTransforms,
103
105
  f as streamToBuffer
104
106
  };
105
107
  //# sourceMappingURL=index.es.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":";AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QACJC,GACAC,GAC+B;AAC/B,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAG9BA,EAAQ,kBAAkB,YAC1BA,EAAQ,kBAAkB,YAE1Be,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAAwD;AAChF,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC7OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;"}
1
+ {"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":";;AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQC,GAAgBC,GAA+C;AAC3E,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAE5BA,EAAQ,kBAAkB,YAAYA,EAAQ,kBAAkB,YAClEe,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAGP;AACjB,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC5OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;AAGAU,EAA0BE,CAAc;"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@origints/mammoth",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "DOCX to HTML conversion for Origins using mammoth.js",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -20,11 +20,30 @@
20
20
  "publishConfig": {
21
21
  "access": "public"
22
22
  },
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "https://github.com/fponticelli/origints.git",
26
+ "directory": "packages/mammoth"
27
+ },
28
+ "homepage": "https://origints.dev",
29
+ "bugs": "https://github.com/fponticelli/origints/issues",
30
+ "keywords": [
31
+ "origints",
32
+ "data-extraction",
33
+ "lineage",
34
+ "provenance",
35
+ "docx",
36
+ "word",
37
+ "document-conversion"
38
+ ],
39
+ "engines": {
40
+ "node": ">=18"
41
+ },
23
42
  "dependencies": {
24
43
  "mammoth": "^1.11.0"
25
44
  },
26
45
  "peerDependencies": {
27
- "@origints/core": "^0.1.0"
46
+ "@origints/core": "^0.2.0"
28
47
  },
29
48
  "devDependencies": {
30
49
  "@types/node": "25.0.6",
@@ -35,7 +54,7 @@
35
54
  "vite": "7.3.1",
36
55
  "vite-plugin-dts": "4.5.4",
37
56
  "vitest": "4.0.16",
38
- "@origints/core": "0.1.0"
57
+ "@origints/core": "0.2.0"
39
58
  },
40
59
  "scripts": {
41
60
  "build": "vite build",