@imayuur/contexthub-plugin-pdf 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +11 -0
- package/dist/index.js +71 -0
- package/package.json +53 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export declare class PdfParser {
|
|
2
|
+
/**
|
|
3
|
+
* Parse a PDF file and extract its text content.
|
|
4
|
+
* Enforces security constraints on file size and page count.
|
|
5
|
+
*/
|
|
6
|
+
parsePdf(filePath: string): Promise<{
|
|
7
|
+
text: string;
|
|
8
|
+
pages: number;
|
|
9
|
+
metadata: any;
|
|
10
|
+
}>;
|
|
11
|
+
}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.PdfParser = void 0;
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
const pdf_parse_1 = __importDefault(require("pdf-parse"));
|
|
42
|
+
const contexthub_core_1 = require("@imayuur/contexthub-core");
|
|
43
|
+
class PdfParser {
|
|
44
|
+
/**
|
|
45
|
+
* Parse a PDF file and extract its text content.
|
|
46
|
+
* Enforces security constraints on file size and page count.
|
|
47
|
+
*/
|
|
48
|
+
async parsePdf(filePath) {
|
|
49
|
+
// 1. Check file size
|
|
50
|
+
const stats = fs.statSync(filePath);
|
|
51
|
+
if (stats.size > contexthub_core_1.MAX_PDF_SIZE_BYTES) {
|
|
52
|
+
throw new Error(`PDF file exceeds maximum allowed size of ${contexthub_core_1.MAX_PDF_SIZE_BYTES / (1024 * 1024)}MB`);
|
|
53
|
+
}
|
|
54
|
+
// 2. Read file
|
|
55
|
+
const dataBuffer = fs.readFileSync(filePath);
|
|
56
|
+
// 3. Parse PDF with limit checking
|
|
57
|
+
const data = await (0, pdf_parse_1.default)(dataBuffer, {
|
|
58
|
+
max: contexthub_core_1.MAX_PDF_PAGES, // Stop parsing if it exceeds max pages
|
|
59
|
+
});
|
|
60
|
+
// 4. Verify page count (pdfParse max option limits what's returned, but we should also enforce it explicitly)
|
|
61
|
+
if (data.numpages > contexthub_core_1.MAX_PDF_PAGES) {
|
|
62
|
+
throw new Error(`PDF exceeds maximum allowed page count of ${contexthub_core_1.MAX_PDF_PAGES}`);
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
text: data.text,
|
|
66
|
+
pages: data.numpages,
|
|
67
|
+
metadata: data.info
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
exports.PdfParser = PdfParser;
|
package/package.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@imayuur/contexthub-plugin-pdf",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Optional PDF ingest plugin for ContextHub",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"dev": "tsc --watch",
|
|
10
|
+
"prepublishOnly": "npm run build"
|
|
11
|
+
},
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"pdf-parse": "^1.1.1",
|
|
14
|
+
"@imayuur/contexthub-core": "^1.0.0"
|
|
15
|
+
},
|
|
16
|
+
"devDependencies": {
|
|
17
|
+
"typescript": "^5.0.0",
|
|
18
|
+
"@types/node": "^18.0.0"
|
|
19
|
+
},
|
|
20
|
+
"author": "Mayur Dattatray Patil",
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"repository": {
|
|
23
|
+
"type": "git",
|
|
24
|
+
"url": "git+https://github.com/iMayuuR/contexthub.git",
|
|
25
|
+
"directory": "packages/plugin-pdf"
|
|
26
|
+
},
|
|
27
|
+
"bugs": {
|
|
28
|
+
"url": "https://github.com/iMayuuR/contexthub/issues"
|
|
29
|
+
},
|
|
30
|
+
"homepage": "https://github.com/iMayuuR/contexthub#readme",
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public"
|
|
33
|
+
},
|
|
34
|
+
"engines": {
|
|
35
|
+
"node": ">=18"
|
|
36
|
+
},
|
|
37
|
+
"keywords": [
|
|
38
|
+
"contexthub",
|
|
39
|
+
"mcp",
|
|
40
|
+
"ai-memory",
|
|
41
|
+
"cursor",
|
|
42
|
+
"claude"
|
|
43
|
+
],
|
|
44
|
+
"exports": {
|
|
45
|
+
".": {
|
|
46
|
+
"types": "./dist/index.d.ts",
|
|
47
|
+
"default": "./dist/index.js"
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"files": [
|
|
51
|
+
"dist"
|
|
52
|
+
]
|
|
53
|
+
}
|