@elizaos/plugin-pdf 1.0.1 → 2.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ import{logger as x,Service as C,ServiceType as h}from"@elizaos/core";import w from"pdfjs-dist";var{getDocument:m}=w;function l(e){return"str"in e}class f extends C{static serviceType=h.PDF;capabilityDescription="The agent is able to convert PDF files to text";static async start(e){return new f(e)}static async stop(e){let t=e.getService(h.PDF);if(t)await t.stop()}async stop(){}async convertPdfToText(e){try{let t=new Uint8Array(e),a=await m({data:t}).promise,s=a.numPages,n=[];for(let i=1;i<=s;i++){let o=(await(await a.getPage(i)).getTextContent()).items.filter(l).map((g)=>g.str).join(" ");n.push(o)}let r=n.join(`
2
+ `);return this.cleanUpContent(r)}catch(t){throw x.error(`PdfService: Failed to convert PDF to text - error: ${t}, bufferSize: ${e.length}`),t}}async convertPdfToTextWithOptions(e,t={}){try{let a=new Uint8Array(e),s=await m({data:a}).promise,n=s.numPages,r=Math.max(1,t.startPage||1),i=Math.min(n,t.endPage||n),d=[];for(let o=r;o<=i;o++){let P=(await(await s.getPage(o)).getTextContent()).items.filter(l).map((p)=>p.str).join(t.preserveWhitespace?"":" ");d.push(P)}let c=d.join(`
3
+ `);if(t.cleanContent!==!1)c=this.cleanUpContent(c);return{success:!0,text:c,pageCount:n}}catch(a){return{success:!1,error:a instanceof Error?a.message:String(a)}}}async getDocumentInfo(e){let t=new Uint8Array(e),a=await m({data:t}).promise,s=a.numPages,r=(await a.getMetadata()).info,i={title:r.Title,author:r.Author,subject:r.Subject,keywords:r.Keywords,creator:r.Creator,producer:r.Producer,creationDate:r.CreationDate?new Date(r.CreationDate):void 0,modificationDate:r.ModDate?new Date(r.ModDate):void 0},d=[],c=[];for(let o=1;o<=s;o++){let g=await a.getPage(o),u=g.getViewport({scale:1}),p=(await g.getTextContent()).items.filter(l).map((y)=>y.str).join(" ");d.push({pageNumber:o,width:u.width,height:u.height,text:this.cleanUpContent(p)}),c.push(p)}return{pageCount:s,metadata:i,text:this.cleanUpContent(c.join(`
4
+ `)),pages:d}}cleanUpContent(e){try{return e.split("").filter((s)=>{let n=s.charCodeAt(0);return!(n===0||n>=1&&n<=8||n>=11&&n<=12||n>=14&&n<=31||n===127)}).join("").replace(/[^\S\r\n]+/g," ").replace(/[ \t]+(\r?\n)/g,"$1").trim()}catch(t){return x.error(`PdfService: Failed to clean up content - error: ${t}, contentLength: ${e.length}`),e}}}var T={name:"pdf",description:"Plugin for PDF reading and text extraction",services:[f],actions:[]},v=T;export{T as pdfPlugin,v as default,f as PdfService};
5
+
6
+ //# debugId=C0CE41DC4C7986B264756E2164756E21
@@ -0,0 +1,11 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../services/pdf.ts", "../../index.ts"],
4
+ "sourcesContent": [
5
+ "import type { IAgentRuntime } from \"@elizaos/core\";\nimport { logger, Service, ServiceType } from \"@elizaos/core\";\nimport pkg from \"pdfjs-dist\";\n\nconst { getDocument } = pkg;\n\nimport type { TextItem, TextMarkedContent } from \"pdfjs-dist/types/src/display/api\";\n\nimport type {\n PdfConversionResult,\n PdfDocumentInfo,\n PdfExtractionOptions,\n PdfMetadata,\n PdfPageInfo,\n} from \"../types\";\n\nfunction isTextItem(item: TextItem | TextMarkedContent): item is TextItem {\n return \"str\" in item;\n}\n\nexport class PdfService extends Service {\n static serviceType = ServiceType.PDF;\n capabilityDescription = \"The agent is able to convert PDF files to text\";\n\n static async start(runtime: IAgentRuntime): Promise<PdfService> {\n const service = new PdfService(runtime);\n return service;\n }\n\n static async stop(runtime: IAgentRuntime): Promise<void> {\n const service = runtime.getService(ServiceType.PDF);\n if (service) {\n await service.stop();\n }\n }\n\n async stop(): Promise<void> {}\n\n async convertPdfToText(pdfBuffer: Buffer): Promise<string> {\n try {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const textPages: string[] = [];\n\n for (let pageNum = 1; pageNum <= numPages; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const textContent = await page.getTextContent();\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(\" \");\n textPages.push(pageText);\n }\n\n const rawText = textPages.join(\"\\n\");\n return this.cleanUpContent(rawText);\n } catch (error) {\n logger.error(\n `PdfService: Failed to convert PDF to text - error: ${error}, bufferSize: ${pdfBuffer.length}`\n );\n throw error;\n }\n }\n\n async convertPdfToTextWithOptions(\n pdfBuffer: Buffer,\n options: PdfExtractionOptions = {}\n ): Promise<PdfConversionResult> {\n try {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const startPage = Math.max(1, options.startPage || 1);\n const endPage = Math.min(numPages, options.endPage || numPages);\n\n const textPages: string[] = [];\n\n for (let pageNum = startPage; pageNum <= endPage; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const textContent = await page.getTextContent();\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(options.preserveWhitespace ? \"\" : \" \");\n textPages.push(pageText);\n }\n\n let text = textPages.join(\"\\n\");\n\n if (options.cleanContent !== false) {\n text = this.cleanUpContent(text);\n }\n\n return {\n success: true,\n text,\n pageCount: numPages,\n };\n } catch (error) {\n return {\n success: false,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n }\n\n async getDocumentInfo(pdfBuffer: Buffer): Promise<PdfDocumentInfo> {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const metadataResult = await pdf.getMetadata();\n const info = metadataResult.info as Record<string, string | Date | undefined>;\n\n const metadata: PdfMetadata = {\n title: info.Title as string | undefined,\n author: info.Author as string | undefined,\n subject: info.Subject as string | undefined,\n keywords: info.Keywords as string | undefined,\n creator: info.Creator as string | undefined,\n producer: info.Producer as string | undefined,\n creationDate: info.CreationDate ? new Date(info.CreationDate as string) : undefined,\n modificationDate: info.ModDate ? new Date(info.ModDate as string) : undefined,\n };\n\n const pages: PdfPageInfo[] = [];\n const allText: string[] = [];\n\n for (let pageNum = 1; pageNum <= numPages; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const viewport = page.getViewport({ scale: 1.0 });\n const textContent = await page.getTextContent();\n\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(\" \");\n\n pages.push({\n pageNumber: pageNum,\n width: viewport.width,\n height: viewport.height,\n text: this.cleanUpContent(pageText),\n });\n\n allText.push(pageText);\n }\n\n return {\n pageCount: numPages,\n metadata,\n text: this.cleanUpContent(allText.join(\"\\n\")),\n pages,\n };\n }\n\n cleanUpContent(content: string): string {\n try {\n const filtered = content\n .split(\"\")\n .filter((char) => {\n const charCode = char.charCodeAt(0);\n return !(\n charCode === 0 ||\n (charCode >= 1 && charCode <= 8) ||\n (charCode >= 11 && charCode <= 12) ||\n (charCode >= 14 && charCode <= 31) ||\n charCode === 127\n );\n })\n .join(\"\");\n\n const cleaned = filtered\n .replace(/[^\\S\\r\\n]+/g, \" \")\n .replace(/[ \\t]+(\\r?\\n)/g, \"$1\")\n .trim();\n\n return cleaned;\n } catch (error) {\n logger.error(\n `PdfService: Failed to clean up content - error: ${error}, contentLength: ${content.length}`\n );\n return content;\n }\n }\n}\n\nexport default PdfService;\n",
6
+ "import type { Plugin } from \"@elizaos/core\";\nimport { PdfService } from \"./services/pdf\";\n\nexport * from \"./services\";\nexport * from \"./types\";\n\nexport const pdfPlugin: Plugin = {\n name: \"pdf\",\n description: \"Plugin for PDF reading and text extraction\",\n services: [PdfService],\n actions: [],\n};\n\nexport default pdfPlugin;\n"
7
+ ],
8
+ "mappings": "AACA,iBAAS,aAAQ,iBAAS,sBAC1B,0BAEA,IAAQ,eAAgB,EAYxB,SAAS,CAAU,CAAC,EAAsD,CACxE,MAAO,QAAS,EAGX,MAAM,UAAmB,CAAQ,OAC/B,aAAc,EAAY,IACjC,sBAAwB,6DAEX,MAAK,CAAC,EAA6C,CAE9D,OADgB,IAAI,EAAW,CAAO,cAI3B,KAAI,CAAC,EAAuC,CACvD,IAAM,EAAU,EAAQ,WAAW,EAAY,GAAG,EAClD,GAAI,EACF,MAAM,EAAQ,KAAK,OAIjB,KAAI,EAAkB,OAEtB,iBAAgB,CAAC,EAAoC,CACzD,GAAI,CACF,IAAM,EAAa,IAAI,WAAW,CAAS,EACrC,EAAM,MAAM,EAAY,CAAE,KAAM,CAAW,CAAC,EAAE,QAC9C,EAAW,EAAI,SAEf,EAAsB,CAAC,EAE7B,QAAS,EAAU,EAAG,GAAW,EAAU,IAAW,CAGpD,IAAM,GADc,MADP,MAAM,EAAI,QAAQ,CAAO,GACP,eAAe,GACjB,MAC1B,OAAO,CAAU,EACjB,IAAI,CAAC,IAAmB,EAAK,GAAG,EAChC,KAAK,GAAG,EACX,EAAU,KAAK,CAAQ,EAGzB,IAAM,EAAU,EAAU,KAAK;AAAA,CAAI,EACnC,OAAO,KAAK,eAAe,CAAO,EAClC,MAAO,EAAO,CAId,MAHA,EAAO,MACL,sDAAsD,kBAAsB,EAAU,QACxF,EACM,QAIJ,4BAA2B,CAC/B,EACA,EAAgC,CAAC,EACH,CAC9B,GAAI,CACF,IAAM,EAAa,IAAI,WAAW,CAAS,EACrC,EAAM,MAAM,EAAY,CAAE,KAAM,CAAW,CAAC,EAAE,QAC9C,EAAW,EAAI,SAEf,EAAY,KAAK,IAAI,EAAG,EAAQ,WAAa,CAAC,EAC9C,EAAU,KAAK,IAAI,EAAU,EAAQ,SAAW,CAAQ,EAExD,EAAsB,CAAC,EAE7B,QAAS,EAAU,EAAW,GAAW,EAAS,IAAW,CAG3D,IAAM,GADc,MADP,MAAM,EAAI,QAAQ,CAAO,GACP,eAAe,GACjB,MAC1B,OAAO,CAAU,EACjB,IAAI,CAAC,IAAmB,EAAK,GAAG,EAChC,KAAK,EAAQ,mBAAqB,GAAK,GAAG,EAC7C,EAAU,KAAK,CAAQ,EAGzB,IAAI,EAAO,EAAU,KAAK;AAAA,CAAI,EAE9B,GAAI,EAAQ,eAAiB,GAC3B,EAAO,KAAK,eAAe,CAAI,EAGjC,MAAO,CACL,QAAS,GACT,OACA,UAAW,CACb,EACA,MAAO,EAAO,CACd,MAAO,CACL,QAAS,GACT,MAAO,aAAiB,MAAQ,EAAM,QAAU,OAAO,CAAK,CAC9D,QAIE,gBAAe,CAAC,EAA6C,CACjE,IAAM,EAAa,IAAI,WAAW,CAAS,EACrC,EAAM,MAAM,EAAY,CAAE,KAAM,CAAW,CAAC,EAAE,QAC9C,EAAW,EAAI,SAGf,GADiB,MAAM,EAAI,YAAY,GACjB,KAEtB,EAAwB,CAC5B,MAAO,EAAK,MACZ,OAAQ,EAAK,OACb,QAAS,EAAK,QACd,SAAU,EAAK,SACf,QAAS,EAAK,QACd,SAAU,EAAK,SACf,aAAc,EAAK,aAAe,IAAI,KAAK,EAAK,YAAsB,EAAI,OAC1E,iBAAkB,EAAK,QAAU,IAAI,KAAK,EAAK,OAAiB,EAAI,MACtE,EAEM,EAAuB,CAAC,EACxB,EAAoB,CAAC,EAE3B,QAAS,EAAU,EAAG,GAAW,EAAU,IAAW,CACpD,IAAM,EAAO,MAAM,EAAI,QAAQ,CAAO,EAChC,EAAW,EAAK,YAAY,CAAE,MAAO,CAAI,CAAC,EAG1C,GAFc,MAAM,EAAK,eAAe,GAEjB,MAC1B,OAAO,CAAU,EACjB,IAAI,CAAC,IAAmB,EAAK,GAAG,EAChC,KAAK,GAAG,EAEX,EAAM,KAAK,CACT,WAAY,EACZ,MAAO,EAAS,MAChB,OAAQ,EAAS,OACjB,KAAM,KAAK,eAAe,CAAQ,CACpC,CAAC,EAED,EAAQ,KAAK,CAAQ,EAGvB,MAAO,CACL,UAAW,EACX,WACA,KAAM,KAAK,eAAe,EAAQ,KAAK;AAAA,CAAI,CAAC,EAC5C,OACF,EAGF,cAAc,CAAC,EAAyB,CACtC,GAAI,CAoBF,OAnBiB,EACd,MAAM,EAAE,EACR,OAAO,CAAC,IAAS,CAChB,IAAM,EAAW,EAAK,WAAW,CAAC,EAClC,MAAO,EACL,IAAa,GACZ,GAAY,GAAK,GAAY,GAC7B,GAAY,IAAM,GAAY,IAC9B,GAAY,IAAM,GAAY,IAC/B,IAAa,KAEhB,EACA,KAAK,EAAE,EAGP,QAAQ,cAAe,GAAG,EAC1B,QAAQ,iBAAkB,IAAI,EAC9B,KAAK,EAGR,MAAO,EAAO,CAId,OAHA,EAAO,MACL,mDAAmD,qBAAyB,EAAQ,QACtF,EACO,GAGb,CCtLO,IAAM,EAAoB,CAC/B,KAAM,MACN,YAAa,6CACb,SAAU,CAAC,CAAU,EACrB,QAAS,CAAC,CACZ,EAEe",
9
+ "debugId": "C0CE41DC4C7986B264756E2164756E21",
10
+ "names": []
11
+ }
@@ -0,0 +1,2 @@
1
+ export * from '../index';
2
+ export { default } from '../index';
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env bun
2
+ declare const externalDeps: string[];
3
+ declare function build(): Promise<void>;
4
+ //# sourceMappingURL=build.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"build.d.ts","sourceRoot":"","sources":["../build.ts"],"names":[],"mappings":";AAEA,QAAA,MAAM,YAAY,UAAkC,CAAC;AAErD,iBAAe,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CA+FpC"}
@@ -0,0 +1,2 @@
1
+ export * from '../index';
2
+ export { default } from '../index';
@@ -0,0 +1,186 @@
1
+ var __create = Object.create;
2
+ var __getProtoOf = Object.getPrototypeOf;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
7
+ var __toESM = (mod, isNodeMode, target) => {
8
+ target = mod != null ? __create(__getProtoOf(mod)) : {};
9
+ const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
10
+ for (let key of __getOwnPropNames(mod))
11
+ if (!__hasOwnProp.call(to, key))
12
+ __defProp(to, key, {
13
+ get: () => mod[key],
14
+ enumerable: true
15
+ });
16
+ return to;
17
+ };
18
+ var __moduleCache = /* @__PURE__ */ new WeakMap;
19
+ var __toCommonJS = (from) => {
20
+ var entry = __moduleCache.get(from), desc;
21
+ if (entry)
22
+ return entry;
23
+ entry = __defProp({}, "__esModule", { value: true });
24
+ if (from && typeof from === "object" || typeof from === "function")
25
+ __getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
26
+ get: () => from[key],
27
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
28
+ }));
29
+ __moduleCache.set(from, entry);
30
+ return entry;
31
+ };
32
+ var __export = (target, all) => {
33
+ for (var name in all)
34
+ __defProp(target, name, {
35
+ get: all[name],
36
+ enumerable: true,
37
+ configurable: true,
38
+ set: (newValue) => all[name] = () => newValue
39
+ });
40
+ };
41
+
42
+ // index.node.ts
43
+ var exports_index_node = {};
44
+ __export(exports_index_node, {
45
+ pdfPlugin: () => pdfPlugin,
46
+ default: () => typescript_default,
47
+ PdfService: () => PdfService
48
+ });
49
+ module.exports = __toCommonJS(exports_index_node);
50
+
51
+ // services/pdf.ts
52
+ var import_core = require("@elizaos/core");
53
+ var import_pdfjs_dist = __toESM(require("pdfjs-dist"));
54
+ var { getDocument } = import_pdfjs_dist.default;
55
+ function isTextItem(item) {
56
+ return "str" in item;
57
+ }
58
+
59
+ class PdfService extends import_core.Service {
60
+ static serviceType = import_core.ServiceType.PDF;
61
+ capabilityDescription = "The agent is able to convert PDF files to text";
62
+ static async start(runtime) {
63
+ const service = new PdfService(runtime);
64
+ return service;
65
+ }
66
+ static async stop(runtime) {
67
+ const service = runtime.getService(import_core.ServiceType.PDF);
68
+ if (service) {
69
+ await service.stop();
70
+ }
71
+ }
72
+ async stop() {}
73
+ async convertPdfToText(pdfBuffer) {
74
+ try {
75
+ const uint8Array = new Uint8Array(pdfBuffer);
76
+ const pdf = await getDocument({ data: uint8Array }).promise;
77
+ const numPages = pdf.numPages;
78
+ const textPages = [];
79
+ for (let pageNum = 1;pageNum <= numPages; pageNum++) {
80
+ const page = await pdf.getPage(pageNum);
81
+ const textContent = await page.getTextContent();
82
+ const pageText = textContent.items.filter(isTextItem).map((item) => item.str).join(" ");
83
+ textPages.push(pageText);
84
+ }
85
+ const rawText = textPages.join(`
86
+ `);
87
+ return this.cleanUpContent(rawText);
88
+ } catch (error) {
89
+ import_core.logger.error(`PdfService: Failed to convert PDF to text - error: ${error}, bufferSize: ${pdfBuffer.length}`);
90
+ throw error;
91
+ }
92
+ }
93
+ async convertPdfToTextWithOptions(pdfBuffer, options = {}) {
94
+ try {
95
+ const uint8Array = new Uint8Array(pdfBuffer);
96
+ const pdf = await getDocument({ data: uint8Array }).promise;
97
+ const numPages = pdf.numPages;
98
+ const startPage = Math.max(1, options.startPage || 1);
99
+ const endPage = Math.min(numPages, options.endPage || numPages);
100
+ const textPages = [];
101
+ for (let pageNum = startPage;pageNum <= endPage; pageNum++) {
102
+ const page = await pdf.getPage(pageNum);
103
+ const textContent = await page.getTextContent();
104
+ const pageText = textContent.items.filter(isTextItem).map((item) => item.str).join(options.preserveWhitespace ? "" : " ");
105
+ textPages.push(pageText);
106
+ }
107
+ let text = textPages.join(`
108
+ `);
109
+ if (options.cleanContent !== false) {
110
+ text = this.cleanUpContent(text);
111
+ }
112
+ return {
113
+ success: true,
114
+ text,
115
+ pageCount: numPages
116
+ };
117
+ } catch (error) {
118
+ return {
119
+ success: false,
120
+ error: error instanceof Error ? error.message : String(error)
121
+ };
122
+ }
123
+ }
124
+ async getDocumentInfo(pdfBuffer) {
125
+ const uint8Array = new Uint8Array(pdfBuffer);
126
+ const pdf = await getDocument({ data: uint8Array }).promise;
127
+ const numPages = pdf.numPages;
128
+ const metadataResult = await pdf.getMetadata();
129
+ const info = metadataResult.info;
130
+ const metadata = {
131
+ title: info.Title,
132
+ author: info.Author,
133
+ subject: info.Subject,
134
+ keywords: info.Keywords,
135
+ creator: info.Creator,
136
+ producer: info.Producer,
137
+ creationDate: info.CreationDate ? new Date(info.CreationDate) : undefined,
138
+ modificationDate: info.ModDate ? new Date(info.ModDate) : undefined
139
+ };
140
+ const pages = [];
141
+ const allText = [];
142
+ for (let pageNum = 1;pageNum <= numPages; pageNum++) {
143
+ const page = await pdf.getPage(pageNum);
144
+ const viewport = page.getViewport({ scale: 1 });
145
+ const textContent = await page.getTextContent();
146
+ const pageText = textContent.items.filter(isTextItem).map((item) => item.str).join(" ");
147
+ pages.push({
148
+ pageNumber: pageNum,
149
+ width: viewport.width,
150
+ height: viewport.height,
151
+ text: this.cleanUpContent(pageText)
152
+ });
153
+ allText.push(pageText);
154
+ }
155
+ return {
156
+ pageCount: numPages,
157
+ metadata,
158
+ text: this.cleanUpContent(allText.join(`
159
+ `)),
160
+ pages
161
+ };
162
+ }
163
+ cleanUpContent(content) {
164
+ try {
165
+ const filtered = content.split("").filter((char) => {
166
+ const charCode = char.charCodeAt(0);
167
+ return !(charCode === 0 || charCode >= 1 && charCode <= 8 || charCode >= 11 && charCode <= 12 || charCode >= 14 && charCode <= 31 || charCode === 127);
168
+ }).join("");
169
+ const cleaned = filtered.replace(/[^\S\r\n]+/g, " ").replace(/[ \t]+(\r?\n)/g, "$1").trim();
170
+ return cleaned;
171
+ } catch (error) {
172
+ import_core.logger.error(`PdfService: Failed to clean up content - error: ${error}, contentLength: ${content.length}`);
173
+ return content;
174
+ }
175
+ }
176
+ }
177
+ // index.ts
178
+ var pdfPlugin = {
179
+ name: "pdf",
180
+ description: "Plugin for PDF reading and text extraction",
181
+ services: [PdfService],
182
+ actions: []
183
+ };
184
+ var typescript_default = pdfPlugin;
185
+
186
+ //# debugId=307517DC8AFB564464756E2164756E21
@@ -0,0 +1,11 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../services/pdf.ts", "../../index.ts"],
4
+ "sourcesContent": [
5
+ "import type { IAgentRuntime } from \"@elizaos/core\";\nimport { logger, Service, ServiceType } from \"@elizaos/core\";\nimport pkg from \"pdfjs-dist\";\n\nconst { getDocument } = pkg;\n\nimport type { TextItem, TextMarkedContent } from \"pdfjs-dist/types/src/display/api\";\n\nimport type {\n PdfConversionResult,\n PdfDocumentInfo,\n PdfExtractionOptions,\n PdfMetadata,\n PdfPageInfo,\n} from \"../types\";\n\nfunction isTextItem(item: TextItem | TextMarkedContent): item is TextItem {\n return \"str\" in item;\n}\n\nexport class PdfService extends Service {\n static serviceType = ServiceType.PDF;\n capabilityDescription = \"The agent is able to convert PDF files to text\";\n\n static async start(runtime: IAgentRuntime): Promise<PdfService> {\n const service = new PdfService(runtime);\n return service;\n }\n\n static async stop(runtime: IAgentRuntime): Promise<void> {\n const service = runtime.getService(ServiceType.PDF);\n if (service) {\n await service.stop();\n }\n }\n\n async stop(): Promise<void> {}\n\n async convertPdfToText(pdfBuffer: Buffer): Promise<string> {\n try {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const textPages: string[] = [];\n\n for (let pageNum = 1; pageNum <= numPages; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const textContent = await page.getTextContent();\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(\" \");\n textPages.push(pageText);\n }\n\n const rawText = textPages.join(\"\\n\");\n return this.cleanUpContent(rawText);\n } catch (error) {\n logger.error(\n `PdfService: Failed to convert PDF to text - error: ${error}, bufferSize: ${pdfBuffer.length}`\n );\n throw error;\n }\n }\n\n async convertPdfToTextWithOptions(\n pdfBuffer: Buffer,\n options: PdfExtractionOptions = {}\n ): Promise<PdfConversionResult> {\n try {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const startPage = Math.max(1, options.startPage || 1);\n const endPage = Math.min(numPages, options.endPage || numPages);\n\n const textPages: string[] = [];\n\n for (let pageNum = startPage; pageNum <= endPage; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const textContent = await page.getTextContent();\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(options.preserveWhitespace ? \"\" : \" \");\n textPages.push(pageText);\n }\n\n let text = textPages.join(\"\\n\");\n\n if (options.cleanContent !== false) {\n text = this.cleanUpContent(text);\n }\n\n return {\n success: true,\n text,\n pageCount: numPages,\n };\n } catch (error) {\n return {\n success: false,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n }\n\n async getDocumentInfo(pdfBuffer: Buffer): Promise<PdfDocumentInfo> {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const metadataResult = await pdf.getMetadata();\n const info = metadataResult.info as Record<string, string | Date | undefined>;\n\n const metadata: PdfMetadata = {\n title: info.Title as string | undefined,\n author: info.Author as string | undefined,\n subject: info.Subject as string | undefined,\n keywords: info.Keywords as string | undefined,\n creator: info.Creator as string | undefined,\n producer: info.Producer as string | undefined,\n creationDate: info.CreationDate ? new Date(info.CreationDate as string) : undefined,\n modificationDate: info.ModDate ? new Date(info.ModDate as string) : undefined,\n };\n\n const pages: PdfPageInfo[] = [];\n const allText: string[] = [];\n\n for (let pageNum = 1; pageNum <= numPages; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const viewport = page.getViewport({ scale: 1.0 });\n const textContent = await page.getTextContent();\n\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(\" \");\n\n pages.push({\n pageNumber: pageNum,\n width: viewport.width,\n height: viewport.height,\n text: this.cleanUpContent(pageText),\n });\n\n allText.push(pageText);\n }\n\n return {\n pageCount: numPages,\n metadata,\n text: this.cleanUpContent(allText.join(\"\\n\")),\n pages,\n };\n }\n\n cleanUpContent(content: string): string {\n try {\n const filtered = content\n .split(\"\")\n .filter((char) => {\n const charCode = char.charCodeAt(0);\n return !(\n charCode === 0 ||\n (charCode >= 1 && charCode <= 8) ||\n (charCode >= 11 && charCode <= 12) ||\n (charCode >= 14 && charCode <= 31) ||\n charCode === 127\n );\n })\n .join(\"\");\n\n const cleaned = filtered\n .replace(/[^\\S\\r\\n]+/g, \" \")\n .replace(/[ \\t]+(\\r?\\n)/g, \"$1\")\n .trim();\n\n return cleaned;\n } catch (error) {\n logger.error(\n `PdfService: Failed to clean up content - error: ${error}, contentLength: ${content.length}`\n );\n return content;\n }\n }\n}\n\nexport default PdfService;\n",
6
+ "import type { Plugin } from \"@elizaos/core\";\nimport { PdfService } from \"./services/pdf\";\n\nexport * from \"./services\";\nexport * from \"./types\";\n\nexport const pdfPlugin: Plugin = {\n name: \"pdf\",\n description: \"Plugin for PDF reading and text extraction\",\n services: [PdfService],\n actions: [],\n};\n\nexport default pdfPlugin;\n"
7
+ ],
8
+ "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAC6C,IAA7C;AACgB,IAAhB;AAEA,MAAQ,gBAAgB;AAYxB,SAAS,UAAU,CAAC,MAAsD;AAAA,EACxE,OAAO,SAAS;AAAA;AAAA;AAGX,MAAM,mBAAmB,oBAAQ;AAAA,SAC/B,cAAc,wBAAY;AAAA,EACjC,wBAAwB;AAAA,cAEX,MAAK,CAAC,SAA6C;AAAA,IAC9D,MAAM,UAAU,IAAI,WAAW,OAAO;AAAA,IACtC,OAAO;AAAA;AAAA,cAGI,KAAI,CAAC,SAAuC;AAAA,IACvD,MAAM,UAAU,QAAQ,WAAW,wBAAY,GAAG;AAAA,IAClD,IAAI,SAAS;AAAA,MACX,MAAM,QAAQ,KAAK;AAAA,IACrB;AAAA;AAAA,OAGI,KAAI,GAAkB;AAAA,OAEtB,iBAAgB,CAAC,WAAoC;AAAA,IACzD,IAAI;AAAA,MACF,MAAM,aAAa,IAAI,WAAW,SAAS;AAAA,MAC3C,MAAM,MAAM,MAAM,YAAY,EAAE,MAAM,WAAW,CAAC,EAAE;AAAA,MACpD,MAAM,WAAW,IAAI;AAAA,MAErB,MAAM,YAAsB,CAAC;AAAA,MAE7B,SAAS,UAAU,EAAG,WAAW,UAAU,WAAW;AAAA,QACpD,MAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AAAA,QACtC,MAAM,cAAc,MAAM,KAAK,eAAe;AAAA,QAC9C,MAAM,WAAW,YAAY,MAC1B,OAAO,UAAU,EACjB,IAAI,CAAC,SAAmB,KAAK,GAAG,EAChC,KAAK,GAAG;AAAA,QACX,UAAU,KAAK,QAAQ;AAAA,MACzB;AAAA,MAEA,MAAM,UAAU,UAAU,KAAK;AAAA,CAAI;AAAA,MACnC,OAAO,KAAK,eAAe,OAAO;AAAA,MAClC,OAAO,OAAO;AAAA,MACd,mBAAO,MACL,sDAAsD,sBAAsB,UAAU,QACxF;AAAA,MACA,MAAM;AAAA;AAAA;AAAA,OAIJ,4BAA2B,CAC/B,WACA,UAAgC,CAAC,GACH;AAAA,IAC9B,IAAI;AAAA,MACF,MAAM,aAAa,IAAI,WAAW,SAAS;AAAA,MAC3C,MAAM,MAAM,MAAM,YAAY,EAAE,MAAM,WAAW,CAAC,EAAE;AAAA,MACpD,MAAM,WAAW,IAAI;AAAA,MAErB,MAAM,YAAY,KAAK,IAAI,GAAG,QAAQ,aAAa,CAAC;AAAA,MACpD,MAAM,UAAU,KAAK,IAAI,UAAU,QAAQ,WAAW,QAAQ;AAAA,MAE9D,MAAM,YAAsB,CAAC;AAAA,MAE7B,SAAS,UAAU,UAAW,WAAW,SAAS,WAAW;AAAA,QAC3D,MAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AAAA,QACtC,MAAM,cAAc,MAAM,KAAK,eAAe;AAAA,QAC9C,MAAM,WAAW,YAAY,MAC1B,OAAO,UAAU,EACjB,IAAI,CAAC,SAAmB,KAAK,GAAG,EAChC,KAAK,QAAQ,qBAAqB,KAAK,GAAG;AAAA,QAC7C,UAAU,KAAK,QAAQ;AAAA,MACzB;AAAA,MAEA,IAAI,OAAO,UAAU,KAAK;AAAA,CAAI;AAAA,MAE9B,IAAI,QAAQ,iBAAiB,OAAO;AAAA,QAClC,OAAO,KAAK,eAAe,IAAI;AAAA,MACjC;AAAA,MAEA,OAAO;AAAA,QACL,SAAS;AAAA,QACT;AAAA,QACA,WAAW;AAAA,MACb;AAAA,MACA,OAAO,OAAO;AAAA,MACd,OAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,MAC9D;AAAA;AAAA;AAAA,OAIE,gBAAe,CAAC,WAA6C;AAAA,IACjE,MAAM,aAAa,IAAI,WAAW,SAAS;AAAA,IAC3C,MAAM,MAAM,MAAM,YAAY,EAAE,MAAM,WAAW,CAAC,EAAE;AAAA,IACpD,MAAM,WAAW,IAAI;AAAA,IAErB,MAAM,iBAAiB,MAAM,IAAI,YAAY;AAAA,IAC7C,MAAM,OAAO,eAAe;AAAA,IAE5B,MAAM,WAAwB;AAAA,MAC5B,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,SAAS,KAAK;AAAA,MACd,UAAU,KAAK;AAAA,MACf,SAAS,KAAK;AAAA,MACd,UAAU,KAAK;AAAA,MACf,cAAc,KAAK,eAAe,IAAI,KAAK,KAAK,YAAsB,IAAI;AAAA,MAC1E,kBAAkB,KAAK,UAAU,IAAI,KAAK,KAAK,OAAiB,IAAI;AAAA,IACtE;AAAA,IAEA,MAAM,QAAuB,CAAC;AAAA,IAC9B,MAAM,UAAoB,CAAC;AAAA,IAE3B,SAAS,UAAU,EAAG,WAAW,UAAU,WAAW;AAAA,MACpD,MAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AAAA,MACtC,MAAM,WAAW,KAAK,YAAY,EAAE,OAAO,EAAI,CAAC;AAAA,MAChD,MAAM,cAAc,MAAM,KAAK,eAAe;AAAA,MAE9C,MAAM,WAAW,YAAY,MAC1B,OAAO,UAAU,EACjB,IAAI,CAAC,SAAmB,KAAK,GAAG,EAChC,KAAK,GAAG;AAAA,MAEX,MAAM,KAAK;AAAA,QACT,YAAY;AAAA,QACZ,OAAO,SAAS;AAAA,QAChB,QAAQ,SAAS;AAAA,QACjB,MAAM,KAAK,eAAe,QAAQ;AAAA,MACpC,CAAC;AAAA,MAED,QAAQ,KAAK,QAAQ;AAAA,IACvB;AAAA,IAEA,OAAO;AAAA,MACL,WAAW;AAAA,MACX;AAAA,MACA,MAAM,KAAK,eAAe,QAAQ,KAAK;AAAA,CAAI,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA;AAAA,EAGF,cAAc,CAAC,SAAyB;AAAA,IACtC,IAAI;AAAA,MACF,MAAM,WAAW,QACd,MAAM,EAAE,EACR,OAAO,CAAC,SAAS;AAAA,QAChB,MAAM,WAAW,KAAK,WAAW,CAAC;AAAA,QAClC,OAAO,EACL,aAAa,KACZ,YAAY,KAAK,YAAY,KAC7B,YAAY,MAAM,YAAY,MAC9B,YAAY,MAAM,YAAY,MAC/B,aAAa;AAAA,OAEhB,EACA,KAAK,EAAE;AAAA,MAEV,MAAM,UAAU,SACb,QAAQ,eAAe,GAAG,EAC1B,QAAQ,kBAAkB,IAAI,EAC9B,KAAK;AAAA,MAER,OAAO;AAAA,MACP,OAAO,OAAO;AAAA,MACd,mBAAO,MACL,mDAAmD,yBAAyB,QAAQ,QACtF;AAAA,MACA,OAAO;AAAA;AAAA;AAGb;;ACtLO,IAAM,YAAoB;AAAA,EAC/B,MAAM;AAAA,EACN,aAAa;AAAA,EACb,UAAU,CAAC,UAAU;AAAA,EACrB,SAAS,CAAC;AACZ;AAEA,IAAe;",
9
+ "debugId": "307517DC8AFB564464756E2164756E21",
10
+ "names": []
11
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Auto-generated canonical action/provider/evaluator docs for plugin-pdf.
3
+ * DO NOT EDIT - Generated from prompts/specs/**.
4
+ */
5
+ export type ActionDoc = {
6
+ name: string;
7
+ description: string;
8
+ similes?: readonly string[];
9
+ parameters?: readonly unknown[];
10
+ examples?: readonly (readonly unknown[])[];
11
+ };
12
+ export type ProviderDoc = {
13
+ name: string;
14
+ description: string;
15
+ position?: number;
16
+ dynamic?: boolean;
17
+ };
18
+ export type EvaluatorDoc = {
19
+ name: string;
20
+ description: string;
21
+ similes?: readonly string[];
22
+ alwaysRun?: boolean;
23
+ examples?: readonly unknown[];
24
+ };
25
+ export declare const coreActionsSpec: {
26
+ readonly version: "1.0.0";
27
+ readonly actions: readonly [];
28
+ };
29
+ export declare const allActionsSpec: {
30
+ readonly version: "1.0.0";
31
+ readonly actions: readonly [];
32
+ };
33
+ export declare const coreProvidersSpec: {
34
+ readonly version: "1.0.0";
35
+ readonly providers: readonly [];
36
+ };
37
+ export declare const allProvidersSpec: {
38
+ readonly version: "1.0.0";
39
+ readonly providers: readonly [];
40
+ };
41
+ export declare const coreEvaluatorsSpec: {
42
+ readonly version: "1.0.0";
43
+ readonly evaluators: readonly [];
44
+ };
45
+ export declare const allEvaluatorsSpec: {
46
+ readonly version: "1.0.0";
47
+ readonly evaluators: readonly [];
48
+ };
49
+ export declare const coreActionDocs: readonly ActionDoc[];
50
+ export declare const allActionDocs: readonly ActionDoc[];
51
+ export declare const coreProviderDocs: readonly ProviderDoc[];
52
+ export declare const allProviderDocs: readonly ProviderDoc[];
53
+ export declare const coreEvaluatorDocs: readonly EvaluatorDoc[];
54
+ export declare const allEvaluatorDocs: readonly EvaluatorDoc[];
55
+ //# sourceMappingURL=specs.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"specs.d.ts","sourceRoot":"","sources":["../../../generated/specs/specs.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,SAAS,GAAG;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC5B,UAAU,CAAC,EAAE,SAAS,OAAO,EAAE,CAAC;IAChC,QAAQ,CAAC,EAAE,SAAS,CAAC,SAAS,OAAO,EAAE,CAAC,EAAE,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,WAAW,GAAG;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC5B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,SAAS,OAAO,EAAE,CAAC;CAC/B,CAAC;AAEF,eAAO,MAAM,eAAe;;;CAGlB,CAAC;AACX,eAAO,MAAM,cAAc;;;CAGjB,CAAC;AACX,eAAO,MAAM,iBAAiB;;;CAGpB,CAAC;AACX,eAAO,MAAM,gBAAgB;;;CAGnB,CAAC;AACX,eAAO,MAAM,kBAAkB;;;CAGrB,CAAC;AACX,eAAO,MAAM,iBAAiB;;;CAGpB,CAAC;AAEX,eAAO,MAAM,cAAc,EAAE,SAAS,SAAS,EAA4B,CAAC;AAC5E,eAAO,MAAM,aAAa,EAAE,SAAS,SAAS,EAA2B,CAAC;AAC1E,eAAO,MAAM,gBAAgB,EAAE,SAAS,WAAW,EAAgC,CAAC;AACpF,eAAO,MAAM,eAAe,EAAE,SAAS,WAAW,EAA+B,CAAC;AAClF,eAAO,MAAM,iBAAiB,EAAE,SAAS,YAAY,EAAkC,CAAC;AACxF,eAAO,MAAM,gBAAgB,EAAE,SAAS,YAAY,EAAiC,CAAC"}
@@ -0,0 +1,3 @@
1
+ export * from "./index";
2
+ export { default } from "./index";
3
+ //# sourceMappingURL=index.browser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.browser.d.ts","sourceRoot":"","sources":["../index.browser.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC"}
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
- import { Plugin } from '@elizaos/core';
2
-
3
- declare const pdfPlugin: Plugin;
4
-
5
- export { pdfPlugin as default, pdfPlugin };
1
+ import type { Plugin } from "@elizaos/core";
2
+ export * from "./services";
3
+ export * from "./types";
4
+ export declare const pdfPlugin: Plugin;
5
+ export default pdfPlugin;
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAG5C,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC;AAExB,eAAO,MAAM,SAAS,EAAE,MAKvB,CAAC;AAEF,eAAe,SAAS,CAAC"}
@@ -0,0 +1,3 @@
1
+ export * from "./index";
2
+ export { default } from "./index";
3
+ //# sourceMappingURL=index.node.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.node.d.ts","sourceRoot":"","sources":["../index.node.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC"}
@@ -0,0 +1,2 @@
1
+ export * from '../index';
2
+ export { default } from '../index';
@@ -0,0 +1,141 @@
1
+ // services/pdf.ts
2
+ import { logger, Service, ServiceType } from "@elizaos/core";
3
+ import pkg from "pdfjs-dist";
4
+ var { getDocument } = pkg;
5
+ function isTextItem(item) {
6
+ return "str" in item;
7
+ }
8
+
9
+ class PdfService extends Service {
10
+ static serviceType = ServiceType.PDF;
11
+ capabilityDescription = "The agent is able to convert PDF files to text";
12
+ static async start(runtime) {
13
+ const service = new PdfService(runtime);
14
+ return service;
15
+ }
16
+ static async stop(runtime) {
17
+ const service = runtime.getService(ServiceType.PDF);
18
+ if (service) {
19
+ await service.stop();
20
+ }
21
+ }
22
+ async stop() {}
23
+ async convertPdfToText(pdfBuffer) {
24
+ try {
25
+ const uint8Array = new Uint8Array(pdfBuffer);
26
+ const pdf = await getDocument({ data: uint8Array }).promise;
27
+ const numPages = pdf.numPages;
28
+ const textPages = [];
29
+ for (let pageNum = 1;pageNum <= numPages; pageNum++) {
30
+ const page = await pdf.getPage(pageNum);
31
+ const textContent = await page.getTextContent();
32
+ const pageText = textContent.items.filter(isTextItem).map((item) => item.str).join(" ");
33
+ textPages.push(pageText);
34
+ }
35
+ const rawText = textPages.join(`
36
+ `);
37
+ return this.cleanUpContent(rawText);
38
+ } catch (error) {
39
+ logger.error(`PdfService: Failed to convert PDF to text - error: ${error}, bufferSize: ${pdfBuffer.length}`);
40
+ throw error;
41
+ }
42
+ }
43
+ async convertPdfToTextWithOptions(pdfBuffer, options = {}) {
44
+ try {
45
+ const uint8Array = new Uint8Array(pdfBuffer);
46
+ const pdf = await getDocument({ data: uint8Array }).promise;
47
+ const numPages = pdf.numPages;
48
+ const startPage = Math.max(1, options.startPage || 1);
49
+ const endPage = Math.min(numPages, options.endPage || numPages);
50
+ const textPages = [];
51
+ for (let pageNum = startPage;pageNum <= endPage; pageNum++) {
52
+ const page = await pdf.getPage(pageNum);
53
+ const textContent = await page.getTextContent();
54
+ const pageText = textContent.items.filter(isTextItem).map((item) => item.str).join(options.preserveWhitespace ? "" : " ");
55
+ textPages.push(pageText);
56
+ }
57
+ let text = textPages.join(`
58
+ `);
59
+ if (options.cleanContent !== false) {
60
+ text = this.cleanUpContent(text);
61
+ }
62
+ return {
63
+ success: true,
64
+ text,
65
+ pageCount: numPages
66
+ };
67
+ } catch (error) {
68
+ return {
69
+ success: false,
70
+ error: error instanceof Error ? error.message : String(error)
71
+ };
72
+ }
73
+ }
74
+ async getDocumentInfo(pdfBuffer) {
75
+ const uint8Array = new Uint8Array(pdfBuffer);
76
+ const pdf = await getDocument({ data: uint8Array }).promise;
77
+ const numPages = pdf.numPages;
78
+ const metadataResult = await pdf.getMetadata();
79
+ const info = metadataResult.info;
80
+ const metadata = {
81
+ title: info.Title,
82
+ author: info.Author,
83
+ subject: info.Subject,
84
+ keywords: info.Keywords,
85
+ creator: info.Creator,
86
+ producer: info.Producer,
87
+ creationDate: info.CreationDate ? new Date(info.CreationDate) : undefined,
88
+ modificationDate: info.ModDate ? new Date(info.ModDate) : undefined
89
+ };
90
+ const pages = [];
91
+ const allText = [];
92
+ for (let pageNum = 1;pageNum <= numPages; pageNum++) {
93
+ const page = await pdf.getPage(pageNum);
94
+ const viewport = page.getViewport({ scale: 1 });
95
+ const textContent = await page.getTextContent();
96
+ const pageText = textContent.items.filter(isTextItem).map((item) => item.str).join(" ");
97
+ pages.push({
98
+ pageNumber: pageNum,
99
+ width: viewport.width,
100
+ height: viewport.height,
101
+ text: this.cleanUpContent(pageText)
102
+ });
103
+ allText.push(pageText);
104
+ }
105
+ return {
106
+ pageCount: numPages,
107
+ metadata,
108
+ text: this.cleanUpContent(allText.join(`
109
+ `)),
110
+ pages
111
+ };
112
+ }
113
+ cleanUpContent(content) {
114
+ try {
115
+ const filtered = content.split("").filter((char) => {
116
+ const charCode = char.charCodeAt(0);
117
+ return !(charCode === 0 || charCode >= 1 && charCode <= 8 || charCode >= 11 && charCode <= 12 || charCode >= 14 && charCode <= 31 || charCode === 127);
118
+ }).join("");
119
+ const cleaned = filtered.replace(/[^\S\r\n]+/g, " ").replace(/[ \t]+(\r?\n)/g, "$1").trim();
120
+ return cleaned;
121
+ } catch (error) {
122
+ logger.error(`PdfService: Failed to clean up content - error: ${error}, contentLength: ${content.length}`);
123
+ return content;
124
+ }
125
+ }
126
+ }
127
+ // index.ts
128
+ var pdfPlugin = {
129
+ name: "pdf",
130
+ description: "Plugin for PDF reading and text extraction",
131
+ services: [PdfService],
132
+ actions: []
133
+ };
134
+ var typescript_default = pdfPlugin;
135
+ export {
136
+ pdfPlugin,
137
+ typescript_default as default,
138
+ PdfService
139
+ };
140
+
141
+ //# debugId=D3E0640C90E88EE064756E2164756E21
@@ -0,0 +1,11 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../services/pdf.ts", "../../index.ts"],
4
+ "sourcesContent": [
5
+ "import type { IAgentRuntime } from \"@elizaos/core\";\nimport { logger, Service, ServiceType } from \"@elizaos/core\";\nimport pkg from \"pdfjs-dist\";\n\nconst { getDocument } = pkg;\n\nimport type { TextItem, TextMarkedContent } from \"pdfjs-dist/types/src/display/api\";\n\nimport type {\n PdfConversionResult,\n PdfDocumentInfo,\n PdfExtractionOptions,\n PdfMetadata,\n PdfPageInfo,\n} from \"../types\";\n\nfunction isTextItem(item: TextItem | TextMarkedContent): item is TextItem {\n return \"str\" in item;\n}\n\nexport class PdfService extends Service {\n static serviceType = ServiceType.PDF;\n capabilityDescription = \"The agent is able to convert PDF files to text\";\n\n static async start(runtime: IAgentRuntime): Promise<PdfService> {\n const service = new PdfService(runtime);\n return service;\n }\n\n static async stop(runtime: IAgentRuntime): Promise<void> {\n const service = runtime.getService(ServiceType.PDF);\n if (service) {\n await service.stop();\n }\n }\n\n async stop(): Promise<void> {}\n\n async convertPdfToText(pdfBuffer: Buffer): Promise<string> {\n try {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const textPages: string[] = [];\n\n for (let pageNum = 1; pageNum <= numPages; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const textContent = await page.getTextContent();\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(\" \");\n textPages.push(pageText);\n }\n\n const rawText = textPages.join(\"\\n\");\n return this.cleanUpContent(rawText);\n } catch (error) {\n logger.error(\n `PdfService: Failed to convert PDF to text - error: ${error}, bufferSize: ${pdfBuffer.length}`\n );\n throw error;\n }\n }\n\n async convertPdfToTextWithOptions(\n pdfBuffer: Buffer,\n options: PdfExtractionOptions = {}\n ): Promise<PdfConversionResult> {\n try {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const startPage = Math.max(1, options.startPage || 1);\n const endPage = Math.min(numPages, options.endPage || numPages);\n\n const textPages: string[] = [];\n\n for (let pageNum = startPage; pageNum <= endPage; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const textContent = await page.getTextContent();\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(options.preserveWhitespace ? \"\" : \" \");\n textPages.push(pageText);\n }\n\n let text = textPages.join(\"\\n\");\n\n if (options.cleanContent !== false) {\n text = this.cleanUpContent(text);\n }\n\n return {\n success: true,\n text,\n pageCount: numPages,\n };\n } catch (error) {\n return {\n success: false,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n }\n\n async getDocumentInfo(pdfBuffer: Buffer): Promise<PdfDocumentInfo> {\n const uint8Array = new Uint8Array(pdfBuffer);\n const pdf = await getDocument({ data: uint8Array }).promise;\n const numPages = pdf.numPages;\n\n const metadataResult = await pdf.getMetadata();\n const info = metadataResult.info as Record<string, string | Date | undefined>;\n\n const metadata: PdfMetadata = {\n title: info.Title as string | undefined,\n author: info.Author as string | undefined,\n subject: info.Subject as string | undefined,\n keywords: info.Keywords as string | undefined,\n creator: info.Creator as string | undefined,\n producer: info.Producer as string | undefined,\n creationDate: info.CreationDate ? new Date(info.CreationDate as string) : undefined,\n modificationDate: info.ModDate ? new Date(info.ModDate as string) : undefined,\n };\n\n const pages: PdfPageInfo[] = [];\n const allText: string[] = [];\n\n for (let pageNum = 1; pageNum <= numPages; pageNum++) {\n const page = await pdf.getPage(pageNum);\n const viewport = page.getViewport({ scale: 1.0 });\n const textContent = await page.getTextContent();\n\n const pageText = textContent.items\n .filter(isTextItem)\n .map((item: TextItem) => item.str)\n .join(\" \");\n\n pages.push({\n pageNumber: pageNum,\n width: viewport.width,\n height: viewport.height,\n text: this.cleanUpContent(pageText),\n });\n\n allText.push(pageText);\n }\n\n return {\n pageCount: numPages,\n metadata,\n text: this.cleanUpContent(allText.join(\"\\n\")),\n pages,\n };\n }\n\n cleanUpContent(content: string): string {\n try {\n const filtered = content\n .split(\"\")\n .filter((char) => {\n const charCode = char.charCodeAt(0);\n return !(\n charCode === 0 ||\n (charCode >= 1 && charCode <= 8) ||\n (charCode >= 11 && charCode <= 12) ||\n (charCode >= 14 && charCode <= 31) ||\n charCode === 127\n );\n })\n .join(\"\");\n\n const cleaned = filtered\n .replace(/[^\\S\\r\\n]+/g, \" \")\n .replace(/[ \\t]+(\\r?\\n)/g, \"$1\")\n .trim();\n\n return cleaned;\n } catch (error) {\n logger.error(\n `PdfService: Failed to clean up content - error: ${error}, contentLength: ${content.length}`\n );\n return content;\n }\n }\n}\n\nexport default PdfService;\n",
6
+ "import type { Plugin } from \"@elizaos/core\";\nimport { PdfService } from \"./services/pdf\";\n\nexport * from \"./services\";\nexport * from \"./types\";\n\nexport const pdfPlugin: Plugin = {\n name: \"pdf\",\n description: \"Plugin for PDF reading and text extraction\",\n services: [PdfService],\n actions: [],\n};\n\nexport default pdfPlugin;\n"
7
+ ],
8
+ "mappings": ";AACA;AACA;AAEA,MAAQ,gBAAgB;AAYxB,SAAS,UAAU,CAAC,MAAsD;AAAA,EACxE,OAAO,SAAS;AAAA;AAAA;AAGX,MAAM,mBAAmB,QAAQ;AAAA,SAC/B,cAAc,YAAY;AAAA,EACjC,wBAAwB;AAAA,cAEX,MAAK,CAAC,SAA6C;AAAA,IAC9D,MAAM,UAAU,IAAI,WAAW,OAAO;AAAA,IACtC,OAAO;AAAA;AAAA,cAGI,KAAI,CAAC,SAAuC;AAAA,IACvD,MAAM,UAAU,QAAQ,WAAW,YAAY,GAAG;AAAA,IAClD,IAAI,SAAS;AAAA,MACX,MAAM,QAAQ,KAAK;AAAA,IACrB;AAAA;AAAA,OAGI,KAAI,GAAkB;AAAA,OAEtB,iBAAgB,CAAC,WAAoC;AAAA,IACzD,IAAI;AAAA,MACF,MAAM,aAAa,IAAI,WAAW,SAAS;AAAA,MAC3C,MAAM,MAAM,MAAM,YAAY,EAAE,MAAM,WAAW,CAAC,EAAE;AAAA,MACpD,MAAM,WAAW,IAAI;AAAA,MAErB,MAAM,YAAsB,CAAC;AAAA,MAE7B,SAAS,UAAU,EAAG,WAAW,UAAU,WAAW;AAAA,QACpD,MAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AAAA,QACtC,MAAM,cAAc,MAAM,KAAK,eAAe;AAAA,QAC9C,MAAM,WAAW,YAAY,MAC1B,OAAO,UAAU,EACjB,IAAI,CAAC,SAAmB,KAAK,GAAG,EAChC,KAAK,GAAG;AAAA,QACX,UAAU,KAAK,QAAQ;AAAA,MACzB;AAAA,MAEA,MAAM,UAAU,UAAU,KAAK;AAAA,CAAI;AAAA,MACnC,OAAO,KAAK,eAAe,OAAO;AAAA,MAClC,OAAO,OAAO;AAAA,MACd,OAAO,MACL,sDAAsD,sBAAsB,UAAU,QACxF;AAAA,MACA,MAAM;AAAA;AAAA;AAAA,OAIJ,4BAA2B,CAC/B,WACA,UAAgC,CAAC,GACH;AAAA,IAC9B,IAAI;AAAA,MACF,MAAM,aAAa,IAAI,WAAW,SAAS;AAAA,MAC3C,MAAM,MAAM,MAAM,YAAY,EAAE,MAAM,WAAW,CAAC,EAAE;AAAA,MACpD,MAAM,WAAW,IAAI;AAAA,MAErB,MAAM,YAAY,KAAK,IAAI,GAAG,QAAQ,aAAa,CAAC;AAAA,MACpD,MAAM,UAAU,KAAK,IAAI,UAAU,QAAQ,WAAW,QAAQ;AAAA,MAE9D,MAAM,YAAsB,CAAC;AAAA,MAE7B,SAAS,UAAU,UAAW,WAAW,SAAS,WAAW;AAAA,QAC3D,MAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AAAA,QACtC,MAAM,cAAc,MAAM,KAAK,eAAe;AAAA,QAC9C,MAAM,WAAW,YAAY,MAC1B,OAAO,UAAU,EACjB,IAAI,CAAC,SAAmB,KAAK,GAAG,EAChC,KAAK,QAAQ,qBAAqB,KAAK,GAAG;AAAA,QAC7C,UAAU,KAAK,QAAQ;AAAA,MACzB;AAAA,MAEA,IAAI,OAAO,UAAU,KAAK;AAAA,CAAI;AAAA,MAE9B,IAAI,QAAQ,iBAAiB,OAAO;AAAA,QAClC,OAAO,KAAK,eAAe,IAAI;AAAA,MACjC;AAAA,MAEA,OAAO;AAAA,QACL,SAAS;AAAA,QACT;AAAA,QACA,WAAW;AAAA,MACb;AAAA,MACA,OAAO,OAAO;AAAA,MACd,OAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,MAC9D;AAAA;AAAA;AAAA,OAIE,gBAAe,CAAC,WAA6C;AAAA,IACjE,MAAM,aAAa,IAAI,WAAW,SAAS;AAAA,IAC3C,MAAM,MAAM,MAAM,YAAY,EAAE,MAAM,WAAW,CAAC,EAAE;AAAA,IACpD,MAAM,WAAW,IAAI;AAAA,IAErB,MAAM,iBAAiB,MAAM,IAAI,YAAY;AAAA,IAC7C,MAAM,OAAO,eAAe;AAAA,IAE5B,MAAM,WAAwB;AAAA,MAC5B,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,SAAS,KAAK;AAAA,MACd,UAAU,KAAK;AAAA,MACf,SAAS,KAAK;AAAA,MACd,UAAU,KAAK;AAAA,MACf,cAAc,KAAK,eAAe,IAAI,KAAK,KAAK,YAAsB,IAAI;AAAA,MAC1E,kBAAkB,KAAK,UAAU,IAAI,KAAK,KAAK,OAAiB,IAAI;AAAA,IACtE;AAAA,IAEA,MAAM,QAAuB,CAAC;AAAA,IAC9B,MAAM,UAAoB,CAAC;AAAA,IAE3B,SAAS,UAAU,EAAG,WAAW,UAAU,WAAW;AAAA,MACpD,MAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AAAA,MACtC,MAAM,WAAW,KAAK,YAAY,EAAE,OAAO,EAAI,CAAC;AAAA,MAChD,MAAM,cAAc,MAAM,KAAK,eAAe;AAAA,MAE9C,MAAM,WAAW,YAAY,MAC1B,OAAO,UAAU,EACjB,IAAI,CAAC,SAAmB,KAAK,GAAG,EAChC,KAAK,GAAG;AAAA,MAEX,MAAM,KAAK;AAAA,QACT,YAAY;AAAA,QACZ,OAAO,SAAS;AAAA,QAChB,QAAQ,SAAS;AAAA,QACjB,MAAM,KAAK,eAAe,QAAQ;AAAA,MACpC,CAAC;AAAA,MAED,QAAQ,KAAK,QAAQ;AAAA,IACvB;AAAA,IAEA,OAAO;AAAA,MACL,WAAW;AAAA,MACX;AAAA,MACA,MAAM,KAAK,eAAe,QAAQ,KAAK;AAAA,CAAI,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA;AAAA,EAGF,cAAc,CAAC,SAAyB;AAAA,IACtC,IAAI;AAAA,MACF,MAAM,WAAW,QACd,MAAM,EAAE,EACR,OAAO,CAAC,SAAS;AAAA,QAChB,MAAM,WAAW,KAAK,WAAW,CAAC;AAAA,QAClC,OAAO,EACL,aAAa,KACZ,YAAY,KAAK,YAAY,KAC7B,YAAY,MAAM,YAAY,MAC9B,YAAY,MAAM,YAAY,MAC/B,aAAa;AAAA,OAEhB,EACA,KAAK,EAAE;AAAA,MAEV,MAAM,UAAU,SACb,QAAQ,eAAe,GAAG,EAC1B,QAAQ,kBAAkB,IAAI,EAC9B,KAAK;AAAA,MAER,OAAO;AAAA,MACP,OAAO,OAAO;AAAA,MACd,OAAO,MACL,mDAAmD,yBAAyB,QAAQ,QACtF;AAAA,MACA,OAAO;AAAA;AAAA;AAGb;;ACtLO,IAAM,YAAoB;AAAA,EAC/B,MAAM;AAAA,EACN,aAAa;AAAA,EACb,UAAU,CAAC,UAAU;AAAA,EACrB,SAAS,CAAC;AACZ;AAEA,IAAe;",
9
+ "debugId": "D3E0640C90E88EE064756E2164756E21",
10
+ "names": []
11
+ }
@@ -0,0 +1,2 @@
1
+ export { default, PdfService } from "./pdf";
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../services/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,OAAO,CAAC"}
@@ -0,0 +1,16 @@
1
+ import type { IAgentRuntime } from "@elizaos/core";
2
+ import { Service } from "@elizaos/core";
3
+ import type { PdfConversionResult, PdfDocumentInfo, PdfExtractionOptions } from "../types";
4
+ export declare class PdfService extends Service {
5
+ static serviceType: "pdf";
6
+ capabilityDescription: string;
7
+ static start(runtime: IAgentRuntime): Promise<PdfService>;
8
+ static stop(runtime: IAgentRuntime): Promise<void>;
9
+ stop(): Promise<void>;
10
+ convertPdfToText(pdfBuffer: Buffer): Promise<string>;
11
+ convertPdfToTextWithOptions(pdfBuffer: Buffer, options?: PdfExtractionOptions): Promise<PdfConversionResult>;
12
+ getDocumentInfo(pdfBuffer: Buffer): Promise<PdfDocumentInfo>;
13
+ cleanUpContent(content: string): string;
14
+ }
15
+ export default PdfService;
16
+ //# sourceMappingURL=pdf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../services/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,EAAU,OAAO,EAAe,MAAM,eAAe,CAAC;AAO7D,OAAO,KAAK,EACV,mBAAmB,EACnB,eAAe,EACf,oBAAoB,EAGrB,MAAM,UAAU,CAAC;AAMlB,qBAAa,UAAW,SAAQ,OAAO;IACrC,MAAM,CAAC,WAAW,QAAmB;IACrC,qBAAqB,SAAoD;WAE5D,KAAK,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,UAAU,CAAC;WAKlD,IAAI,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAOlD,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAErB,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IA4BpD,2BAA2B,CAC/B,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,mBAAmB,CAAC;IAwCzB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAkDlE,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM;CA6BxC;AAED,eAAe,UAAU,CAAC"}