@origints/mammoth 0.1.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,14 +4,6 @@
4
4
 
5
5
  ---
6
6
 
7
- ## Why
8
-
9
- Word documents are everywhere in enterprise workflows, but extracting their content programmatically is challenging. You need to convert them to a usable format while preserving semantic structure.
10
-
11
- This package wraps mammoth.js and exposes it as Origins transforms. Convert DOCX files to clean HTML or plain text, with full control over style mapping and conversion options.
12
-
13
- ---
14
-
15
7
  ## Features
16
8
 
17
9
  - Convert DOCX to semantic HTML
@@ -23,80 +15,39 @@ This package wraps mammoth.js and exposes it as Origins transforms. Convert DOCX
23
15
 
24
16
  ---
25
17
 
26
- ## Quick Start
27
-
28
- ```bash
29
- npm install @origints/mammoth @origints/core
30
- ```
31
-
32
- ```ts
33
- import { Planner, loadFile, run, globalRegistry } from "@origints/core";
34
- import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
35
-
36
- registerMammothTransforms(globalRegistry);
37
-
38
- const plan = Planner.in(loadFile("document.docx"))
39
- .mapIn(docxToHtml())
40
- .emit((out, $) => out.add("html", $.get("html").asString()))
41
- .compile();
42
-
43
- const result = await run(plan, {}, globalRegistry);
44
-
45
- if (result.ok) {
46
- console.log(result.value.html);
47
- }
48
- ```
49
-
50
- Expected output:
51
-
52
- ```
53
- <h1>Document Title</h1><p>Content here...</p>
54
- ```
55
-
56
- ---
57
-
58
18
  ## Installation
59
19
 
60
- - Supported platforms:
61
- - macOS / Linux / Windows
62
- - Runtime requirements:
63
- - Node.js >= 18
64
- - Package managers:
65
- - npm, pnpm, yarn
66
- - Peer dependencies:
67
- - @origints/core ^0.1.0
68
-
69
20
  ```bash
70
21
  npm install @origints/mammoth @origints/core
71
- # or
72
- pnpm add @origints/mammoth @origints/core
73
22
  ```
74
23
 
75
24
  ---
76
25
 
77
- ## Usage
26
+ ## Usage with Planner
78
27
 
79
- ### Basic HTML conversion
28
+ ### Convert a DOCX file and extract the HTML
80
29
 
81
30
  ```ts
82
- import { Planner, loadFile, globalRegistry } from "@origints/core";
83
- import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
84
-
85
- registerMammothTransforms(globalRegistry);
31
+ import { Planner, loadFile, run } from '@origints/core'
32
+ import { docxToHtml } from '@origints/mammoth'
86
33
 
87
- const plan = Planner.in(loadFile("report.docx"))
34
+ const plan = new Planner()
35
+ .in(loadFile('document.docx'))
88
36
  .mapIn(docxToHtml())
89
- .emit((out, $) => {
90
- out.add("html", $.get("html").asString());
91
- out.add("messages", $.get("messages").asArray());
92
- })
93
- .compile();
37
+ .emit((out, $) => out.add('html', $.get('html').string()))
38
+ .compile()
39
+
40
+ const result = await run(plan, { readFile, registry })
41
+ // result.value: { html: '<h1>Title</h1><p>Content...</p>' }
94
42
  ```
95
43
 
96
- ### Custom style mapping
44
+ ### Convert with custom style mapping
97
45
 
98
46
  ```ts
99
- const plan = Planner.in(loadFile("document.docx"))
47
+ import { docxToHtml } from '@origints/mammoth'
48
+
49
+ const plan = new Planner()
50
+ .in(loadFile('report.docx'))
100
51
  .mapIn(
101
52
  docxToHtml({
102
53
  styleMap: [
@@ -105,64 +56,91 @@ const plan = Planner.in(loadFile("document.docx"))
105
56
  "p[style-name='Heading 2'] => h2",
106
57
  "p[style-name='Quote'] => blockquote",
107
58
  ],
59
+ idPrefix: 'doc-',
108
60
  })
109
61
  )
110
- .emit((out, $) => out.add("html", $.get("html").asString()))
111
- .compile();
62
+ .emit((out, $) => out.add('content', $.get('html').string()))
63
+ .compile()
112
64
  ```
113
65
 
114
- ### Convert to plain text
66
+ ### Extract plain text from a DOCX file
115
67
 
116
68
  ```ts
117
- import { docxToText } from "@origints/mammoth";
69
+ import { docxToText } from '@origints/mammoth'
118
70
 
119
- const plan = Planner.in(loadFile("document.docx"))
71
+ const plan = new Planner()
72
+ .in(loadFile('document.docx'))
120
73
  .mapIn(docxToText())
121
- .emit((out, $) => out.add("text", $.get("text").asString()))
122
- .compile();
74
+ .emit((out, $) => out.add('text', $.get('text').string()))
75
+ .compile()
76
+
77
+ const result = await run(plan, { readFile, registry })
78
+ // result.value: { text: 'Document Title\nContent here...' }
123
79
  ```
124
80
 
125
- ### Image handling options
81
+ ### Combine DOCX with other sources
126
82
 
127
83
  ```ts
128
- const plan = Planner.in(loadFile("document.docx"))
129
- .mapIn(
130
- docxToHtml({
131
- imageHandling: "omit", // or 'base64'
132
- })
84
+ const plan = new Planner()
85
+ .in(loadFile('report.docx'))
86
+ .mapIn(docxToHtml())
87
+ .emit((out, $) => out.add('reportHtml', $.get('html').string()))
88
+ .in(loadFile('metadata.json'))
89
+ .mapIn(parseJson())
90
+ .emit((out, $) =>
91
+ out
92
+ .add('author', $.get('author').string())
93
+ .add('date', $.get('date').string())
133
94
  )
134
- .emit((out, $) => out.add("html", $.get("html").asString()))
135
- .compile();
95
+ .compile()
136
96
  ```
137
97
 
138
- ---
139
-
140
- ## Project Status
98
+ ### Standalone usage (without Planner)
141
99
 
142
- - **Experimental** - APIs may change
100
+ ```ts
101
+ import * as fs from 'fs'
102
+ import { docxToHtmlImpl, docxToTextImpl } from '@origints/mammoth'
143
103
 
144
- ---
104
+ const buffer = fs.readFileSync('document.docx')
145
105
 
146
- ## Non-Goals
106
+ // Convert to HTML
107
+ const htmlResult = await docxToHtmlImpl.execute(buffer)
108
+ console.log(htmlResult.html)
147
109
 
148
- - Not a DOCX writer/generator
149
- - Not a full Word document parser (no styles, comments, etc.)
150
- - Not a PDF converter
110
+ // Log conversion warnings
111
+ for (const msg of htmlResult.messages) {
112
+ console.warn(msg.message)
113
+ }
151
114
 
152
- ---
115
+ // Convert to plain text
116
+ const textResult = await docxToTextImpl.execute(buffer)
117
+ console.log(textResult.text)
118
+ ```
153
119
 
154
- ## Documentation
120
+ ### Image handling
155
121
 
156
- - See `@origints/core` for Origins concepts
157
- - See [mammoth.js](https://www.npmjs.com/package/mammoth) for conversion details
122
+ ```ts
123
+ import { docxToHtml } from '@origints/mammoth'
124
+
125
+ // Omit images
126
+ const plan = new Planner()
127
+ .in(loadFile('document.docx'))
128
+ .mapIn(docxToHtml({ imageHandling: 'omit' }))
129
+ .emit((out, $) => out.add('html', $.get('html').string()))
130
+ .compile()
131
+ ```
158
132
 
159
133
  ---
160
134
 
161
- ## Contributing
135
+ ## API
162
136
 
163
- - Open an issue before large changes
164
- - Keep PRs focused
165
- - Tests required for new features
137
+ | Export | Description |
138
+ | ------------------------------------- | -------------------------------------------------- |
139
+ | `docxToHtml(options?)` | Create a transform AST for HTML conversion |
140
+ | `docxToText(options?)` | Create a transform AST for text conversion |
141
+ | `docxToHtmlImpl` | Async transform implementation for HTML conversion |
142
+ | `docxToTextImpl` | Async transform implementation for text conversion |
143
+ | `registerMammothTransforms(registry)` | Register all mammoth transforms with a registry |
166
144
 
167
145
  ---
168
146
 
package/dist/convert.d.ts CHANGED
@@ -5,15 +5,15 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
5
5
  *
6
6
  * @example
7
7
  * ```ts
8
- * const plan = Planner.in(loadFile('document.docx'))
8
+ * const plan = new Planner().in(loadFile('document.docx'))
9
9
  * .mapIn(docxToHtml())
10
- * .emit((out, $) => out.add('html', $.get('html').asString()))
10
+ * .emit((out, $) => out.add('html', $.get('html').string()))
11
11
  * .compile()
12
12
  * ```
13
13
  *
14
14
  * @example With custom style mapping
15
15
  * ```ts
16
- * const plan = Planner.in(loadFile('document.docx'))
16
+ * const plan = new Planner().in(loadFile('document.docx'))
17
17
  * .mapIn(docxToHtml({
18
18
  * styleMap: [
19
19
  * "p[style-name='Title'] => h1.document-title",
@@ -21,7 +21,7 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
21
21
  * ],
22
22
  * idPrefix: 'doc-',
23
23
  * }))
24
- * .emit((out, $) => out.add('content', $.get('html').asString()))
24
+ * .emit((out, $) => out.add('content', $.get('html').string()))
25
25
  * .compile()
26
26
  * ```
27
27
  */
@@ -31,9 +31,9 @@ export declare function docxToHtml(options?: DocxToHtmlOptions): TransformAst;
31
31
  *
32
32
  * @example
33
33
  * ```ts
34
- * const plan = Planner.in(loadFile('document.docx'))
34
+ * const plan = new Planner().in(loadFile('document.docx'))
35
35
  * .mapIn(docxToText())
36
- * .emit((out, $) => out.add('text', $.get('text').asString()))
36
+ * .emit((out, $) => out.add('text', $.get('text').string()))
37
37
  * .compile()
38
38
  * ```
39
39
  */
package/dist/index.cjs CHANGED
@@ -1,2 +1,2 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function u(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToHtml",args:g(e)}}function o(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToText",args:e}}const c={namespace:"@origints/mammoth",name:"docxToHtml",async execute(e,r){const t=y(r),a=await d(e),n=p(t),i=await m.convertToHtml({buffer:a},n);return{html:i.value,messages:i.messages.map(f)}}},l={namespace:"@origints/mammoth",name:"docxToText",async execute(e){const r=await d(e),t=await m.extractRawText({buffer:r});return{text:t.value,messages:t.messages.map(f)}}};function g(e){if(!e)return;const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),(e.imageHandling==="inline"||e.imageHandling==="omit")&&(r.imageHandling=e.imageHandling),e.preserveEmptyParagraphs!==void 0&&(r.preserveEmptyParagraphs=e.preserveEmptyParagraphs),Object.keys(r).length>0?r:void 0}function y(e){return e||{}}async function d(e){if(Buffer.isBuffer(e))return e;if(e instanceof ReadableStream)return s(e);throw new Error(`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`)}function p(e){const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),e.preserveEmptyParagraphs===!1?r.ignoreEmptyParagraphs=!0:e.preserveEmptyParagraphs===!0&&(r.ignoreEmptyParagraphs=!1),e.imageHandling&&(r.convertImage=x(e.imageHandling)),e.transformDocument&&(r.transformDocument=e.transformDocument),r}function x(e){return e==="omit"?m.images.imgElement(()=>Promise.resolve({src:""})):e==="inline"?m.images.imgElement(r=>r.readAsBase64String().then(t=>({src:`data:${r.contentType};base64,${t}`}))):m.images.imgElement(async r=>{const t={contentType:r.contentType,read:(n=>n==="base64"?r.readAsBase64String():r.readAsArrayBuffer())},a=await e(t);return a===null?{src:""}:a})}function f(e){return{type:e.type==="error"?"error":"warning",message:e.message}}function T(e){e.register(c),e.register(l)}exports.docxToHtml=u;exports.docxToHtmlImpl=c;exports.docxToText=o;exports.docxToTextImpl=l;exports.registerMammothTransforms=T;exports.streamToBuffer=s;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth"),o=require("@origints/core");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function g(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToHtml",args:p(e)}}function y(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToText",args:e}}const c={namespace:"@origints/mammoth",name:"docxToHtml",async execute(e,r){const t=x(r),a=await d(e),n=T(t),i=await m.convertToHtml({buffer:a},n);return{html:i.value,messages:i.messages.map(f)}}},l={namespace:"@origints/mammoth",name:"docxToText",async execute(e){const r=await d(e),t=await m.extractRawText({buffer:r});return{text:t.value,messages:t.messages.map(f)}}};function p(e){if(!e)return;const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),(e.imageHandling==="inline"||e.imageHandling==="omit")&&(r.imageHandling=e.imageHandling),e.preserveEmptyParagraphs!==void 0&&(r.preserveEmptyParagraphs=e.preserveEmptyParagraphs),Object.keys(r).length>0?r:void 0}function x(e){return e||{}}async function d(e){if(Buffer.isBuffer(e))return e;if(e instanceof ReadableStream)return s(e);throw new Error(`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`)}function T(e){const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),e.preserveEmptyParagraphs===!1?r.ignoreEmptyParagraphs=!0:e.preserveEmptyParagraphs===!0&&(r.ignoreEmptyParagraphs=!1),e.imageHandling&&(r.convertImage=M(e.imageHandling)),e.transformDocument&&(r.transformDocument=e.transformDocument),r}function M(e){return e==="omit"?m.images.imgElement(()=>Promise.resolve({src:""})):e==="inline"?m.images.imgElement(r=>r.readAsBase64String().then(t=>({src:`data:${r.contentType};base64,${t}`}))):m.images.imgElement(async r=>{const t={contentType:r.contentType,read:(n=>n==="base64"?r.readAsBase64String():r.readAsArrayBuffer())},a=await e(t);return a===null?{src:""}:a})}function f(e){return{type:e.type==="error"?"error":"warning",message:e.message}}function u(e){e.register(c),e.register(l)}u(o.globalRegistry);exports.docxToHtml=g;exports.docxToHtmlImpl=c;exports.docxToText=y;exports.docxToTextImpl=l;exports.registerMammothTransforms=u;exports.streamToBuffer=s;
2
2
  //# sourceMappingURL=index.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":"2GASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QACJC,EACAC,EAC+B,CAC/B,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAG9BA,EAAQ,gBAAkB,UAC1BA,EAAQ,gBAAkB,UAE1Be,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAAwD,CAChF,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC7OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC"}
1
+ {"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":"uIASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQC,EAAgBC,EAA+C,CAC3E,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAE5BA,EAAQ,gBAAkB,UAAYA,EAAQ,gBAAkB,UAClEe,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAGP,CACjB,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC5OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC,CAGAU,EAA0BE,gBAAc"}
package/dist/index.d.ts CHANGED
@@ -9,21 +9,18 @@
9
9
  * @example Basic usage
10
10
  * ```ts
11
11
  * import { Planner, loadFile } from '@origints/core'
12
- * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'
13
- *
14
- * // Register transforms
15
- * registerMammothTransforms(globalRegistry)
12
+ * import { docxToHtml } from '@origints/mammoth'
16
13
  *
17
14
  * // Create a plan
18
- * const plan = Planner.in(loadFile('document.docx'))
15
+ * const plan = new Planner().in(loadFile('document.docx'))
19
16
  * .mapIn(docxToHtml())
20
- * .emit((out, $) => out.add('html', $.get('html').asString()))
17
+ * .emit((out, $) => out.add('html', $.get('html').string()))
21
18
  * .compile()
22
19
  * ```
23
20
  *
24
21
  * @example With custom style mapping
25
22
  * ```ts
26
- * const plan = Planner.in(loadFile('document.docx'))
23
+ * const plan = new Planner().in(loadFile('document.docx'))
27
24
  * .mapIn(docxToHtml({
28
25
  * styleMap: [
29
26
  * "p[style-name='Title'] => h1.document-title",
@@ -33,7 +30,7 @@
33
30
  * idPrefix: 'doc-',
34
31
  * imageHandling: 'omit',
35
32
  * }))
36
- * .emit((out, $) => out.add('content', $.get('html').asString()))
33
+ * .emit((out, $) => out.add('content', $.get('html').string()))
37
34
  * .compile()
38
35
  * ```
39
36
  */
package/dist/index.es.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import m from "mammoth";
2
+ import { globalRegistry as l } from "@origints/core";
2
3
  async function f(e) {
3
4
  const r = e.getReader(), a = [];
4
5
  try {
@@ -12,15 +13,15 @@ async function f(e) {
12
13
  r.releaseLock();
13
14
  }
14
15
  }
15
- function x(e) {
16
+ function T(e) {
16
17
  return {
17
18
  kind: "transform",
18
19
  namespace: "@origints/mammoth",
19
20
  name: "docxToHtml",
20
- args: u(e)
21
+ args: o(e)
21
22
  };
22
23
  }
23
- function h(e) {
24
+ function v(e) {
24
25
  return {
25
26
  kind: "transform",
26
27
  namespace: "@origints/mammoth",
@@ -28,19 +29,19 @@ function h(e) {
28
29
  args: e
29
30
  };
30
31
  }
31
- const l = {
32
+ const d = {
32
33
  namespace: "@origints/mammoth",
33
34
  name: "docxToHtml",
34
35
  async execute(e, r) {
35
- const a = o(
36
+ const a = g(
36
37
  r
37
- ), t = await s(e), n = g(a), i = await m.convertToHtml({ buffer: t }, n);
38
+ ), t = await s(e), n = y(a), i = await m.convertToHtml({ buffer: t }, n);
38
39
  return {
39
40
  html: i.value,
40
41
  messages: i.messages.map(c)
41
42
  };
42
43
  }
43
- }, d = {
44
+ }, u = {
44
45
  namespace: "@origints/mammoth",
45
46
  name: "docxToText",
46
47
  async execute(e) {
@@ -51,12 +52,12 @@ const l = {
51
52
  };
52
53
  }
53
54
  };
54
- function u(e) {
55
+ function o(e) {
55
56
  if (!e) return;
56
57
  const r = {};
57
58
  return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), (e.imageHandling === "inline" || e.imageHandling === "omit") && (r.imageHandling = e.imageHandling), e.preserveEmptyParagraphs !== void 0 && (r.preserveEmptyParagraphs = e.preserveEmptyParagraphs), Object.keys(r).length > 0 ? r : void 0;
58
59
  }
59
- function o(e) {
60
+ function g(e) {
60
61
  return e || {};
61
62
  }
62
63
  async function s(e) {
@@ -68,11 +69,11 @@ async function s(e) {
68
69
  `docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`
69
70
  );
70
71
  }
71
- function g(e) {
72
+ function y(e) {
72
73
  const r = {};
73
- return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage = y(e.imageHandling)), e.transformDocument && (r.transformDocument = e.transformDocument), r;
74
+ return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage = p(e.imageHandling)), e.transformDocument && (r.transformDocument = e.transformDocument), r;
74
75
  }
75
- function y(e) {
76
+ function p(e) {
76
77
  return e === "omit" ? m.images.imgElement(() => Promise.resolve({ src: "" })) : e === "inline" ? m.images.imgElement(
77
78
  (r) => r.readAsBase64String().then((a) => ({
78
79
  src: `data:${r.contentType};base64,${a}`
@@ -91,15 +92,16 @@ function c(e) {
91
92
  message: e.message
92
93
  };
93
94
  }
94
- function M(e) {
95
- e.register(l), e.register(d);
95
+ function x(e) {
96
+ e.register(d), e.register(u);
96
97
  }
98
+ x(l);
97
99
  export {
98
- x as docxToHtml,
99
- l as docxToHtmlImpl,
100
- h as docxToText,
101
- d as docxToTextImpl,
102
- M as registerMammothTransforms,
100
+ T as docxToHtml,
101
+ d as docxToHtmlImpl,
102
+ v as docxToText,
103
+ u as docxToTextImpl,
104
+ x as registerMammothTransforms,
103
105
  f as streamToBuffer
104
106
  };
105
107
  //# sourceMappingURL=index.es.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":";AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QACJC,GACAC,GAC+B;AAC/B,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAG9BA,EAAQ,kBAAkB,YAC1BA,EAAQ,kBAAkB,YAE1Be,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAAwD;AAChF,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC7OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;"}
1
+ {"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":";;AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQC,GAAgBC,GAA+C;AAC3E,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAE5BA,EAAQ,kBAAkB,YAAYA,EAAQ,kBAAkB,YAClEe,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAGP;AACjB,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC5OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;AAGAU,EAA0BE,CAAc;"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@origints/mammoth",
3
- "version": "0.1.1",
3
+ "version": "0.3.2",
4
4
  "description": "DOCX to HTML conversion for Origins using mammoth.js",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -27,11 +27,30 @@
27
27
  "lint": "eslint \"{src,tests}/**/*.{ts,tsx}\" --max-warnings 0",
28
28
  "typecheck": "tsc -p tsconfig.json --noEmit"
29
29
  },
30
+ "repository": {
31
+ "type": "git",
32
+ "url": "https://github.com/fponticelli/origints.git",
33
+ "directory": "packages/mammoth"
34
+ },
35
+ "homepage": "https://origints.dev",
36
+ "bugs": "https://github.com/fponticelli/origints/issues",
37
+ "keywords": [
38
+ "origints",
39
+ "data-extraction",
40
+ "lineage",
41
+ "provenance",
42
+ "docx",
43
+ "word",
44
+ "document-conversion"
45
+ ],
46
+ "engines": {
47
+ "node": ">=18"
48
+ },
30
49
  "dependencies": {
31
50
  "mammoth": "^1.11.0"
32
51
  },
33
52
  "peerDependencies": {
34
- "@origints/core": "^0.1.0"
53
+ "@origints/core": "^0.3.0"
35
54
  },
36
55
  "devDependencies": {
37
56
  "@origints/core": "workspace:*",