@elizaos/plugin-pdf 2.0.0-alpha.9 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -43
- package/dist/browser/index.browser.js +5 -63
- package/dist/browser/index.browser.js.map +6 -7
- package/dist/generated/specs/specs.d.ts +1 -18
- package/dist/generated/specs/specs.d.ts.map +1 -1
- package/dist/index.browser.d.ts +6 -2
- package/dist/index.browser.d.ts.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.node.d.ts +6 -2
- package/dist/index.node.d.ts.map +1 -1
- package/dist/node/index.node.js +88 -38374
- package/dist/node/index.node.js.map +6 -7
- package/dist/services/index.d.ts +1 -1
- package/dist/services/index.d.ts.map +1 -1
- package/dist/services/pdf.d.ts +4 -3
- package/dist/services/pdf.d.ts.map +1 -1
- package/package.json +20 -14
- package/dist/build.d.ts +0 -4
- package/dist/build.d.ts.map +0 -1
- package/dist/cjs/index.d.ts +0 -2
- package/dist/cjs/index.node.cjs +0 -38522
- package/dist/cjs/index.node.js.map +0 -13
- package/dist/tsconfig.tsbuildinfo +0 -1
package/README.md
CHANGED
|
@@ -1,26 +1,28 @@
|
|
|
1
1
|
# @elizaos/plugin-pdf
|
|
2
2
|
|
|
3
|
-
PDF text extraction plugin for
|
|
3
|
+
PDF text extraction plugin for elizaOS.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Adds `PdfService` to an Eliza agent runtime so that PDF buffers can be parsed and their text content extracted. The service is available to any action, provider, or agent code via `runtime.getService(ServiceType.PDF)`.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
## Installation
|
|
8
8
|
|
|
9
9
|
```bash
|
|
10
10
|
elizaos plugins add @elizaos/plugin-pdf
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
or with bun directly:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
14
16
|
bun add @elizaos/plugin-pdf
|
|
15
17
|
```
|
|
16
18
|
|
|
17
19
|
## Configuration
|
|
18
20
|
|
|
19
|
-
No configuration required. Uses `
|
|
21
|
+
No environment variables or configuration required. Uses [`unpdf`](https://github.com/unjs/unpdf) for local, self-contained PDF processing.
|
|
20
22
|
|
|
21
|
-
##
|
|
23
|
+
## Enabling the Plugin
|
|
22
24
|
|
|
23
|
-
|
|
25
|
+
Add the package name to the `plugins` array in your character file:
|
|
24
26
|
|
|
25
27
|
```typescript
|
|
26
28
|
const character: Partial<Character> = {
|
|
@@ -29,54 +31,66 @@ const character: Partial<Character> = {
|
|
|
29
31
|
};
|
|
30
32
|
```
|
|
31
33
|
|
|
32
|
-
##
|
|
34
|
+
## PdfService API
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
Retrieve the service instance from the runtime:
|
|
35
37
|
|
|
36
|
-
|
|
38
|
+
```typescript
|
|
39
|
+
import { ServiceType } from "@elizaos/core";
|
|
40
|
+
import type { PdfService } from "@elizaos/plugin-pdf";
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
const pdfService = runtime.getService<PdfService>(ServiceType.PDF);
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Methods
|
|
39
46
|
|
|
40
|
-
|
|
41
|
-
- `convertPdfToTextWithOptions(pdfBuffer: Buffer, options): Promise<PdfConversionResult>` - Convert with options
|
|
42
|
-
- `getDocumentInfo(pdfBuffer: Buffer): Promise<PdfDocumentInfo>` - Get full document information
|
|
47
|
+
**`convertPdfToText(pdfBuffer: Buffer): Promise<string>`**
|
|
43
48
|
|
|
44
|
-
|
|
49
|
+
Extracts all text from every page as a single cleaned string.
|
|
45
50
|
|
|
46
51
|
```typescript
|
|
47
52
|
import * as fs from "node:fs/promises";
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
} catch (error) {
|
|
68
|
-
console.error("Error extracting text from PDF:", error);
|
|
69
|
-
}
|
|
53
|
+
|
|
54
|
+
const buffer = await fs.readFile("document.pdf");
|
|
55
|
+
const text = await pdfService.convertPdfToText(buffer);
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**`convertPdfToTextWithOptions(pdfBuffer: Buffer, options?: PdfExtractionOptions): Promise<PdfConversionResult>`**
|
|
59
|
+
|
|
60
|
+
Extracts text with control over page range, whitespace, and cleanup. Returns a result object with `success`, `text`, `pageCount`, and `error` fields.
|
|
61
|
+
|
|
62
|
+
```typescript
|
|
63
|
+
const result = await pdfService.convertPdfToTextWithOptions(buffer, {
|
|
64
|
+
startPage: 1,
|
|
65
|
+
endPage: 5,
|
|
66
|
+
preserveWhitespace: false,
|
|
67
|
+
cleanContent: true,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
if (result.success) {
|
|
71
|
+
console.log(result.text);
|
|
70
72
|
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**`getDocumentInfo(pdfBuffer: Buffer): Promise<PdfDocumentInfo>`**
|
|
76
|
+
|
|
77
|
+
Returns full document information: page count, per-page dimensions + text, and metadata (title, author, subject, keywords, creator, producer, creation/modification dates).
|
|
71
78
|
|
|
72
|
-
|
|
73
|
-
|
|
79
|
+
## Exported Types
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
PdfConversionResult // { success, text?, pageCount?, error? }
|
|
83
|
+
PdfExtractionOptions // { startPage?, endPage?, preserveWhitespace?, cleanContent? }
|
|
84
|
+
PdfPageInfo // { pageNumber, width, height, text }
|
|
85
|
+
PdfMetadata // { title?, author?, subject?, keywords?, creator?, producer?, creationDate?, modificationDate? }
|
|
86
|
+
PdfDocumentInfo // { pageCount, metadata, text, pages }
|
|
74
87
|
```
|
|
75
88
|
|
|
76
|
-
##
|
|
89
|
+
## Platform Support
|
|
77
90
|
|
|
78
|
-
|
|
91
|
+
Builds for both Node.js and browser environments. The `exports` field in `package.json` selects the correct entry point automatically.
|
|
92
|
+
|
|
93
|
+
## Dependencies
|
|
79
94
|
|
|
80
|
-
|
|
95
|
+
- [`unpdf`](https://github.com/unjs/unpdf) — PDF parsing (wraps PDF.js for Node + browser)
|
|
81
96
|
|
|
82
|
-
MIT
|