@vertesia/converters 0.80.0-dev.20251121 → 0.80.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +139 -0
- package/package.json +12 -1
- package/lib/esm/image.js +0 -57
- package/lib/esm/image.js.map +0 -1
- package/lib/esm/index.js +0 -5
- package/lib/esm/index.js.map +0 -1
- package/lib/esm/mutool.js +0 -39
- package/lib/esm/mutool.js.map +0 -1
- package/lib/esm/pandoc.js +0 -40
- package/lib/esm/pandoc.js.map +0 -1
- package/lib/types/image.d.ts +0 -17
- package/lib/types/image.d.ts.map +0 -1
- package/lib/types/index.d.ts +0 -5
- package/lib/types/index.d.ts.map +0 -1
- package/lib/types/mutool.d.ts +0 -4
- package/lib/types/mutool.d.ts.map +0 -1
- package/lib/types/pandoc.d.ts +0 -7
- package/lib/types/pandoc.d.ts.map +0 -1
package/README.md
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# @vertesia/converters
|
|
2
|
+
|
|
3
|
+
Image and document conversion utilities for Node.js. Provides functions for image transformation, PDF to text extraction, and document to Markdown conversion.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Image Transformation**: Resize and convert images using Sharp
|
|
8
|
+
- **PDF to Text**: Extract text from PDF files using MuTool
|
|
9
|
+
- **Document to Markdown**: Convert various document formats to Markdown using Pandoc
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install @vertesia/converters
|
|
15
|
+
# or
|
|
16
|
+
pnpm add @vertesia/converters
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### System Dependencies
|
|
20
|
+
|
|
21
|
+
Some converters require external tools to be installed:
|
|
22
|
+
|
|
23
|
+
- **Image conversion**: No external dependencies (uses Sharp)
|
|
24
|
+
- **PDF to text**: Requires [MuTool](https://mupdf.com/docs/mutool.html) (`mutool` command)
|
|
25
|
+
- **Document to Markdown**: Requires [Pandoc](https://pandoc.org/) (`pandoc` command)
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### Image Transformation
|
|
30
|
+
|
|
31
|
+
Transform images with resizing and format conversion:
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
import {
|
|
35
|
+
transformImage,
|
|
36
|
+
transformImageToBuffer,
|
|
37
|
+
transformImageToFile
|
|
38
|
+
} from '@vertesia/converters';
|
|
39
|
+
|
|
40
|
+
// Transform image to a stream
|
|
41
|
+
import { createReadStream, createWriteStream } from 'fs';
|
|
42
|
+
|
|
43
|
+
const input = createReadStream('input.jpg');
|
|
44
|
+
const output = createWriteStream('output.webp');
|
|
45
|
+
|
|
46
|
+
await transformImage(input, output, {
|
|
47
|
+
max_hw: 1024, // Max width/height (maintains aspect ratio)
|
|
48
|
+
format: 'webp' // Output format
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// Transform image to buffer
|
|
52
|
+
const buffer = await transformImageToBuffer(inputBuffer, {
|
|
53
|
+
max_hw: 800,
|
|
54
|
+
format: 'png'
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// Transform image to file
|
|
58
|
+
await transformImageToFile(inputBuffer, 'output.jpg', {
|
|
59
|
+
max_hw: 1200,
|
|
60
|
+
format: 'jpeg'
|
|
61
|
+
});
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### PDF to Text
|
|
65
|
+
|
|
66
|
+
Extract text content from PDF files:
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
import { pdfToText, pdfToTextBuffer, pdfFileToText } from '@vertesia/converters';
|
|
70
|
+
|
|
71
|
+
// From buffer to string
|
|
72
|
+
const text = await pdfToText(pdfBuffer);
|
|
73
|
+
|
|
74
|
+
// From buffer to buffer
|
|
75
|
+
const textBuffer = await pdfToTextBuffer(pdfBuffer);
|
|
76
|
+
|
|
77
|
+
// From file to file
|
|
78
|
+
await pdfFileToText('input.pdf', 'output.txt');
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Document to Markdown
|
|
82
|
+
|
|
83
|
+
Convert documents to Markdown format using Pandoc:
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
import { manyToMarkdown } from '@vertesia/converters';
|
|
87
|
+
import { createReadStream } from 'fs';
|
|
88
|
+
|
|
89
|
+
// Convert DOCX to Markdown
|
|
90
|
+
const stream = createReadStream('document.docx');
|
|
91
|
+
const markdown = await manyToMarkdown(stream, 'docx');
|
|
92
|
+
|
|
93
|
+
// Convert HTML to Markdown
|
|
94
|
+
const htmlStream = createReadStream('page.html');
|
|
95
|
+
const md = await manyToMarkdown(htmlStream, 'html');
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Supported input formats include all formats supported by Pandoc: `docx`, `html`, `latex`, `rst`, `textile`, `org`, `mediawiki`, and many more.
|
|
99
|
+
|
|
100
|
+
## API Reference
|
|
101
|
+
|
|
102
|
+
### Image Functions
|
|
103
|
+
|
|
104
|
+
| Function | Description |
|
|
105
|
+
|----------|-------------|
|
|
106
|
+
| `transformImage(input, output, opts)` | Transform image from stream to stream |
|
|
107
|
+
| `transformImageToBuffer(input, opts)` | Transform image to buffer |
|
|
108
|
+
| `transformImageToFile(input, output, opts)` | Transform image to file |
|
|
109
|
+
|
|
110
|
+
#### TransformOptions
|
|
111
|
+
|
|
112
|
+
| Option | Type | Description |
|
|
113
|
+
|--------|------|-------------|
|
|
114
|
+
| `max_hw` | `number` | Maximum width/height (maintains aspect ratio, no upscaling) |
|
|
115
|
+
| `format` | `string` | Output format (`jpeg`, `png`, `webp`, `avif`, etc.) |
|
|
116
|
+
|
|
117
|
+
### PDF Functions
|
|
118
|
+
|
|
119
|
+
| Function | Description |
|
|
120
|
+
|----------|-------------|
|
|
121
|
+
| `pdfToText(buffer)` | Convert PDF buffer to text string |
|
|
122
|
+
| `pdfToTextBuffer(buffer)` | Convert PDF buffer to text buffer |
|
|
123
|
+
| `pdfFileToText(input, output)` | Convert PDF file to text file |
|
|
124
|
+
|
|
125
|
+
### Document Functions
|
|
126
|
+
|
|
127
|
+
| Function | Description |
|
|
128
|
+
|----------|-------------|
|
|
129
|
+
| `manyToMarkdown(stream, format)` | Convert document stream to Markdown |
|
|
130
|
+
|
|
131
|
+
## Requirements
|
|
132
|
+
|
|
133
|
+
- Node.js 18+
|
|
134
|
+
- MuTool (for PDF conversion)
|
|
135
|
+
- Pandoc (for document conversion)
|
|
136
|
+
|
|
137
|
+
## License
|
|
138
|
+
|
|
139
|
+
Apache-2.0
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vertesia/converters",
|
|
3
|
-
"version": "0.80.
|
|
3
|
+
"version": "0.80.1",
|
|
4
4
|
"description": "Image and content converters",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"types": "./lib/types/index.d.ts",
|
|
@@ -29,6 +29,17 @@
|
|
|
29
29
|
"url": "https://github.com/vertesia/composableai.git",
|
|
30
30
|
"directory": "packages/converters"
|
|
31
31
|
},
|
|
32
|
+
"keywords": [
|
|
33
|
+
"vertesia",
|
|
34
|
+
"converters",
|
|
35
|
+
"image",
|
|
36
|
+
"pdf",
|
|
37
|
+
"markdown",
|
|
38
|
+
"sharp",
|
|
39
|
+
"pandoc",
|
|
40
|
+
"mutool",
|
|
41
|
+
"typescript"
|
|
42
|
+
],
|
|
32
43
|
"ts_dual_module": {
|
|
33
44
|
"outDir": "lib"
|
|
34
45
|
},
|
package/lib/esm/image.js
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import sharp from "sharp";
|
|
2
|
-
export function createImageTransformer(input, opts) {
|
|
3
|
-
const isInputStream = !!input.pipe;
|
|
4
|
-
let sh = isInputStream ? input.pipe(sharp()) : sharp(input);
|
|
5
|
-
if (opts.max_hw) {
|
|
6
|
-
sh = sh.resize({
|
|
7
|
-
width: opts.max_hw,
|
|
8
|
-
height: opts.max_hw,
|
|
9
|
-
fit: sharp.fit.inside,
|
|
10
|
-
withoutEnlargement: true,
|
|
11
|
-
});
|
|
12
|
-
}
|
|
13
|
-
if (opts.format) {
|
|
14
|
-
sh = sh.toFormat(opts.format);
|
|
15
|
-
}
|
|
16
|
-
return sh;
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* @param max_hw
|
|
20
|
-
* @param format
|
|
21
|
-
* @returns
|
|
22
|
-
*/
|
|
23
|
-
export async function transformImage(input, output, opts) {
|
|
24
|
-
const sh = createImageTransformer(input, opts);
|
|
25
|
-
sh.pipe(output);
|
|
26
|
-
return new Promise((resolve, reject) => {
|
|
27
|
-
const handleError = (err) => {
|
|
28
|
-
console.error('Failed to transform', err);
|
|
29
|
-
try {
|
|
30
|
-
if (input.pipe && input.destroy) {
|
|
31
|
-
input.destroy();
|
|
32
|
-
}
|
|
33
|
-
if (output.destroy) {
|
|
34
|
-
output.destroy();
|
|
35
|
-
}
|
|
36
|
-
sh.destroy();
|
|
37
|
-
}
|
|
38
|
-
finally {
|
|
39
|
-
reject(err);
|
|
40
|
-
}
|
|
41
|
-
};
|
|
42
|
-
output.on('error', handleError);
|
|
43
|
-
input.pipe && input.on && input.on('error', handleError);
|
|
44
|
-
output.on("finish", () => {
|
|
45
|
-
resolve(sh);
|
|
46
|
-
});
|
|
47
|
-
});
|
|
48
|
-
}
|
|
49
|
-
export function transformImageToBuffer(input, opts) {
|
|
50
|
-
const sh = createImageTransformer(input, opts);
|
|
51
|
-
return sh.toBuffer();
|
|
52
|
-
}
|
|
53
|
-
export async function transformImageToFile(input, output, opts) {
|
|
54
|
-
const sh = createImageTransformer(input, opts);
|
|
55
|
-
await sh.toFile(output);
|
|
56
|
-
}
|
|
57
|
-
//# sourceMappingURL=image.js.map
|
package/lib/esm/image.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/image.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAoB1B,MAAM,UAAU,sBAAsB,CAAC,KAAqB,EAAE,IAAsB;IAChF,MAAM,aAAa,GAAG,CAAC,CAAE,KAA+B,CAAC,IAAI,CAAC;IAC9D,IAAI,EAAE,GAAG,aAAa,CAAC,CAAC,CAAE,KAA+B,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAY,CAAC,CAAC;IAC9F,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QACd,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC;YACX,KAAK,EAAE,IAAI,CAAC,MAAM;YAClB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,GAAG,EAAE,KAAK,CAAC,GAAG,CAAC,MAAM;YACrB,kBAAkB,EAAE,IAAI;SAC3B,CAAC,CAAC;IACP,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QACd,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,EAAE,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAAqB,EAAE,MAA6B,EAAE,IAAsB;IAC7G,MAAM,EAAE,GAAG,sBAAsB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAC/C,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACnC,MAAM,WAAW,GAAG,CAAC,GAAQ,EAAE,EAAE;YAC7B,OAAO,CAAC,KAAK,CAAC,qBAAqB,EAAE,GAAG,CAAC,CAAC;YAC1C,IAAI,CAAC;gBACD,IAAK,KAAa,CAAC,IAAI,IAAK,KAAa,CAAC,OAAO,EAAE,CAAC;oBAC/C,KAAa,CAAC,OAAO,EAAE,CAAC;gBAC7B,CAAC;gBACD,IAAK,MAAc,CAAC,OAAO,EAAE,CAAC;oBACzB,MAAc,CAAC,OAAO,EAAE,CAAC;gBAC9B,CAAC;gBACD,EAAE,CAAC,OAAO,EAAE,CAAC;YACjB,CAAC;oBAAS,CAAC;gBACP,MAAM,CAAC,GAAG,CAAC,CAAC;YAChB,CAAC;QACL,CAAC,CAAA;QACD,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QAC/B,KAAa,CAAC,IAAI,IAAK,KAAa,CAAC,EAAE,IAAK,KAAa,CAAC,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QACpF,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;YACrB,OAAO,CAAC,EAAE,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,KAAqB,EAAE,IAAsB;IAChF,MAAM,EAAE,GAAG,sBAAsB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAC/C,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC;AACzB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,KAAqB,EAAE,MAAc,EAAE,IAAsB;IACpG,MAAM,EAAE,GAAG,sBAAsB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAC/C,MAAM,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC"}
|
package/lib/esm/index.js
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import { transformImage, transformImageToBuffer, transformImageToFile } from './image.js';
|
|
2
|
-
import { pdfFileToText, pdfToText, pdfToTextBuffer } from './mutool.js';
|
|
3
|
-
import { manyToMarkdown } from './pandoc.js';
|
|
4
|
-
export { manyToMarkdown, pdfFileToText, pdfToText, pdfToTextBuffer, transformImage, transformImageToBuffer, transformImageToFile };
|
|
5
|
-
//# sourceMappingURL=index.js.map
|
package/lib/esm/index.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,sBAAsB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAC1F,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE7C,OAAO,EACH,cAAc,EACd,aAAa,EAAE,SAAS,EACxB,eAAe,EACf,cAAc,EACd,sBAAsB,EACtB,oBAAoB,EACvB,CAAC"}
|
package/lib/esm/mutool.js
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'child_process';
|
|
2
|
-
import fs from 'fs';
|
|
3
|
-
import { readFile } from "fs/promises";
|
|
4
|
-
import tmp from 'tmp';
|
|
5
|
-
tmp.setGracefulCleanup();
|
|
6
|
-
export function pdfFileToText(input, output) {
|
|
7
|
-
return new Promise((resolve, reject) => {
|
|
8
|
-
const command = spawn("mutool", ["convert", "-o", output, input]);
|
|
9
|
-
command.on('exit', function (code) {
|
|
10
|
-
if (code) {
|
|
11
|
-
reject(new Error(`mutool exited with code ${code}`));
|
|
12
|
-
}
|
|
13
|
-
});
|
|
14
|
-
command.on('close', function (code) {
|
|
15
|
-
if (code) {
|
|
16
|
-
reject(new Error(`mutool exited with code ${code}`));
|
|
17
|
-
}
|
|
18
|
-
else {
|
|
19
|
-
return resolve(output);
|
|
20
|
-
}
|
|
21
|
-
;
|
|
22
|
-
});
|
|
23
|
-
command.on('error', (err) => {
|
|
24
|
-
reject(err);
|
|
25
|
-
});
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
export function pdfToText(buffer) {
|
|
29
|
-
return pdfToTextBuffer(buffer).then((buffer) => buffer.toString('utf-8'));
|
|
30
|
-
}
|
|
31
|
-
export function pdfToTextBuffer(buffer) {
|
|
32
|
-
const inputFile = tmp.fileSync({ postfix: '.pdf' });
|
|
33
|
-
const targetFileName = tmp.tmpNameSync({ postfix: '.txt' });
|
|
34
|
-
fs.writeSync(inputFile.fd, buffer);
|
|
35
|
-
return pdfFileToText(inputFile.name, targetFileName).then(() => {
|
|
36
|
-
return readFile(targetFileName);
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
//# sourceMappingURL=mutool.js.map
|
package/lib/esm/mutool.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"mutool.js","sourceRoot":"","sources":["../../src/mutool.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,GAAG,CAAC,kBAAkB,EAAE,CAAC;AAEzB,MAAM,UAAU,aAAa,CAAC,KAAa,EAAE,MAAc;IACvD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAEnC,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;QAElE,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,UAAU,IAAI;YAC7B,IAAI,IAAI,EAAE,CAAC;gBACP,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC,CAAC;YACzD,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,IAAI;YAC9B,IAAI,IAAI,EAAE,CAAC;gBACP,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC,CAAC;YACzD,CAAC;iBAAM,CAAC;gBACJ,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC;YAC3B,CAAC;YAAA,CAAC;QACN,CAAC,CAAC,CAAC;QAEH,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACxB,MAAM,CAAC,GAAG,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;IAEP,CAAC,CAAC,CAAC;AAEP,CAAC;AACD,MAAM,UAAU,SAAS,CAAC,MAAc;IACpC,OAAO,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;AAC9E,CAAC;AACD,MAAM,UAAU,eAAe,CAAC,MAAc;IAC1C,MAAM,SAAS,GAAG,GAAG,CAAC,QAAQ,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IACpD,MAAM,cAAc,GAAG,GAAG,CAAC,WAAW,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IAE5D,EAAE,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;IAEnC,OAAO,aAAa,CAAC,SAAS,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE;QAC3D,OAAO,QAAQ,CAAC,cAAc,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACP,CAAC"}
|
package/lib/esm/pandoc.js
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'child_process';
|
|
2
|
-
import { PassThrough } from 'stream';
|
|
3
|
-
export function manyToMarkdownFromBuffer(buffer, fromFormat) {
|
|
4
|
-
const input = new PassThrough();
|
|
5
|
-
input.end(buffer);
|
|
6
|
-
return manyToMarkdown(input, fromFormat);
|
|
7
|
-
}
|
|
8
|
-
/**
|
|
9
|
-
* Requires pandoc to be installed on the system.
|
|
10
|
-
* @param fromFormat is the format of the input buffer.
|
|
11
|
-
*/
|
|
12
|
-
export function manyToMarkdown(input, fromFormat) {
|
|
13
|
-
return new Promise((resolve, reject) => {
|
|
14
|
-
let result = [];
|
|
15
|
-
const command = spawn("pandoc", ["-t", "markdown", '-f', fromFormat], {
|
|
16
|
-
stdio: 'pipe',
|
|
17
|
-
});
|
|
18
|
-
input.pipe(command.stdin);
|
|
19
|
-
command.stdout.on('data', function (data) {
|
|
20
|
-
result.push(data.toString());
|
|
21
|
-
});
|
|
22
|
-
command.on('exit', function (code) {
|
|
23
|
-
if (code) {
|
|
24
|
-
reject(new Error(`pandoc exited with code ${code}`));
|
|
25
|
-
}
|
|
26
|
-
});
|
|
27
|
-
command.on('close', function (code) {
|
|
28
|
-
if (code) {
|
|
29
|
-
reject(new Error(`pandoc exited with code ${code}`));
|
|
30
|
-
}
|
|
31
|
-
else {
|
|
32
|
-
resolve(result.join(''));
|
|
33
|
-
}
|
|
34
|
-
});
|
|
35
|
-
command.on('error', (err) => {
|
|
36
|
-
reject(err);
|
|
37
|
-
});
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
//# sourceMappingURL=pandoc.js.map
|
package/lib/esm/pandoc.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pandoc.js","sourceRoot":"","sources":["../../src/pandoc.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,WAAW,EAAE,MAAM,QAAQ,CAAC;AAErC,MAAM,UAAU,wBAAwB,CAAC,MAAc,EAAE,UAAkB;IACzE,MAAM,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC;IAChC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAClB,OAAO,cAAc,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;AAE3C,CAAC;AACD;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,KAA4B,EAAE,UAAkB;IAE7E,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,IAAI,MAAM,GAAa,EAAE,CAAC;QAE1B,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,UAAU,CAAC,EAAE;YACpE,KAAK,EAAE,MAAM;SACd,CAAC,CAAC;QACH,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAE1B,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,UAAU,IAAY;YAC9C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,UAAU,IAAI;YAC/B,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC,CAAC;YACvD,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,IAAI;YAChC,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC,CAAC;YACvD,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;YAC1B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YAC1B,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;IAEL,CAAC,CAAC,CAAC;AAEL,CAAC"}
|
package/lib/types/image.d.ts
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import sharp from "sharp";
|
|
2
|
-
export interface TransformOptions {
|
|
3
|
-
max_hw?: number;
|
|
4
|
-
format?: keyof sharp.FormatEnum;
|
|
5
|
-
}
|
|
6
|
-
type SharpInputType = Buffer | ArrayBuffer | Uint8Array | Uint8ClampedArray | Int8Array | Uint16Array | Int16Array | Uint32Array | Int32Array | Float32Array | Float64Array | string | NodeJS.ReadableStream;
|
|
7
|
-
export declare function createImageTransformer(input: SharpInputType, opts: TransformOptions): sharp.Sharp;
|
|
8
|
-
/**
|
|
9
|
-
* @param max_hw
|
|
10
|
-
* @param format
|
|
11
|
-
* @returns
|
|
12
|
-
*/
|
|
13
|
-
export declare function transformImage(input: SharpInputType, output: NodeJS.WritableStream, opts: TransformOptions): Promise<sharp.Sharp>;
|
|
14
|
-
export declare function transformImageToBuffer(input: SharpInputType, opts: TransformOptions): Promise<Buffer>;
|
|
15
|
-
export declare function transformImageToFile(input: SharpInputType, output: string, opts: TransformOptions): Promise<void>;
|
|
16
|
-
export {};
|
|
17
|
-
//# sourceMappingURL=image.d.ts.map
|
package/lib/types/image.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/image.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,WAAW,gBAAgB;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,KAAK,CAAC,UAAU,CAAA;CAClC;AAED,KAAK,cAAc,GAAG,MAAM,GACtB,WAAW,GACX,UAAU,GACV,iBAAiB,GACjB,SAAS,GACT,WAAW,GACX,UAAU,GACV,WAAW,GACX,UAAU,GACV,YAAY,GACZ,YAAY,GACZ,MAAM,GACN,MAAM,CAAC,cAAc,CAAA;AAC3B,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,gBAAgB,eAenF;AAED;;;;GAIG;AACH,wBAAsB,cAAc,CAAC,KAAK,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,CAAC,cAAc,EAAE,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAyBvI;AAED,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAGrG;AAED,wBAAsB,oBAAoB,CAAC,KAAK,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CAGvH"}
|
package/lib/types/index.d.ts
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import { transformImage, transformImageToBuffer, transformImageToFile } from './image.js';
|
|
2
|
-
import { pdfFileToText, pdfToText, pdfToTextBuffer } from './mutool.js';
|
|
3
|
-
import { manyToMarkdown } from './pandoc.js';
|
|
4
|
-
export { manyToMarkdown, pdfFileToText, pdfToText, pdfToTextBuffer, transformImage, transformImageToBuffer, transformImageToFile };
|
|
5
|
-
//# sourceMappingURL=index.d.ts.map
|
package/lib/types/index.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,sBAAsB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAC1F,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE7C,OAAO,EACH,cAAc,EACd,aAAa,EAAE,SAAS,EACxB,eAAe,EACf,cAAc,EACd,sBAAsB,EACtB,oBAAoB,EACvB,CAAC"}
|
package/lib/types/mutool.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"mutool.d.ts","sourceRoot":"","sources":["../../src/mutool.ts"],"names":[],"mappings":"AAMA,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,oBAyB1D;AACD,wBAAgB,SAAS,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAEzD;AACD,wBAAgB,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAS/D"}
|
package/lib/types/pandoc.d.ts
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
export declare function manyToMarkdownFromBuffer(buffer: Buffer, fromFormat: string): Promise<string>;
|
|
2
|
-
/**
|
|
3
|
-
* Requires pandoc to be installed on the system.
|
|
4
|
-
* @param fromFormat is the format of the input buffer.
|
|
5
|
-
*/
|
|
6
|
-
export declare function manyToMarkdown(input: NodeJS.ReadableStream, fromFormat: string): Promise<string>;
|
|
7
|
-
//# sourceMappingURL=pandoc.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pandoc.d.ts","sourceRoot":"","sources":["../../src/pandoc.ts"],"names":[],"mappings":"AAGA,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAK5F;AACD;;;GAGG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAgChG"}
|