@intelligentelectron/pdf-analyzer 0.1.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -9
- package/dist/cli/commands.d.ts +9 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +96 -25
- package/dist/cli/commands.js.map +1 -1
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -4
- package/dist/index.js.map +1 -1
- package/dist/keychain.d.ts +34 -0
- package/dist/keychain.d.ts.map +1 -0
- package/dist/keychain.js +208 -0
- package/dist/keychain.js.map +1 -0
- package/dist/pdf-utils.d.ts +28 -0
- package/dist/pdf-utils.d.ts.map +1 -0
- package/dist/pdf-utils.js +93 -0
- package/dist/pdf-utils.js.map +1 -0
- package/dist/providers/anthropic.d.ts +9 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +69 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/google.d.ts +13 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +120 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/openai.d.ts +9 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +70 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/registry.d.ts +23 -0
- package/dist/providers/registry.d.ts.map +1 -0
- package/dist/providers/registry.js +44 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/providers/types.d.ts +70 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/server.d.ts +2 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +27 -35
- package/dist/server.js.map +1 -1
- package/dist/service.d.ts +7 -35
- package/dist/service.d.ts.map +1 -1
- package/dist/service.js +99 -302
- package/dist/service.js.map +1 -1
- package/dist/types.d.ts +20 -12
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +15 -45
- package/dist/types.js.map +1 -1
- package/package.json +12 -4
package/dist/server.js
CHANGED
|
@@ -1,51 +1,53 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PDF Analyzer MCP Server
|
|
3
3
|
*
|
|
4
|
-
* Model Context Protocol server for analyzing PDF documents using
|
|
4
|
+
* Model Context Protocol server for analyzing PDF documents using
|
|
5
|
+
* a configurable LLM provider (Google Gemini, Anthropic Claude, or OpenAI).
|
|
5
6
|
*/
|
|
6
7
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
7
8
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
8
9
|
import { z } from "zod";
|
|
9
10
|
import { VERSION } from "./version.js";
|
|
10
|
-
import {
|
|
11
|
+
import { analyzePdf } from "./service.js";
|
|
12
|
+
import { resolveActiveProvider } from "./providers/registry.js";
|
|
11
13
|
// =============================================================================
|
|
12
14
|
// Server Instructions
|
|
13
15
|
// =============================================================================
|
|
14
16
|
const SERVER_INSTRUCTIONS = `
|
|
15
17
|
# PDF Analyzer MCP Server
|
|
16
18
|
|
|
17
|
-
Analyzes PDF documents using
|
|
19
|
+
Analyzes PDF documents using AI vision capabilities. Supports multiple LLM providers
|
|
20
|
+
(Google Gemini, Anthropic Claude, OpenAI).
|
|
18
21
|
|
|
19
22
|
## Tool: analyze_pdf
|
|
20
23
|
|
|
21
|
-
Pass an absolute file path, URL, or
|
|
22
|
-
sends it to
|
|
24
|
+
Pass an absolute file path, URL, or cached file URI(s) and a list of queries. The server reads the PDF,
|
|
25
|
+
sends it to the configured LLM with your queries, and returns structured responses.
|
|
23
26
|
|
|
24
|
-
Large PDFs that exceed
|
|
27
|
+
Large PDFs that exceed the model's token limit are automatically split into chunks and processed
|
|
25
28
|
sequentially with rolling context. No user intervention is needed.
|
|
26
29
|
|
|
27
|
-
## Caching Strategy
|
|
30
|
+
## Caching Strategy (Google provider only)
|
|
28
31
|
|
|
29
|
-
|
|
30
|
-
queries on the same document. This avoids re-uploading
|
|
32
|
+
When using Google Gemini, the response includes a \`cached_uris\` array (Gemini File API URIs)
|
|
33
|
+
that you should reuse for subsequent queries on the same document. This avoids re-uploading
|
|
34
|
+
and is cached by Gemini for 48 hours. Other providers return an empty \`cached_uris\` array.
|
|
31
35
|
|
|
32
36
|
**Input types accepted:**
|
|
33
37
|
- Local file path: \`/Users/name/docs/report.pdf\`
|
|
34
38
|
- Web URL: \`https://example.com/doc.pdf\`
|
|
35
|
-
- Gemini file URI: \`https://generativelanguage.googleapis.com/v1beta/files/abc123\` (from previous response)
|
|
36
|
-
- Array of Gemini file URIs: for re-analyzing a previously chunked document
|
|
39
|
+
- Gemini file URI: \`https://generativelanguage.googleapis.com/v1beta/files/abc123\` (Google only, from previous response)
|
|
40
|
+
- Array of Gemini file URIs: for re-analyzing a previously chunked document (Google only)
|
|
37
41
|
|
|
38
|
-
**Workflow for multiple queries on same document:**
|
|
39
|
-
1. First call: pass local path or URL
|
|
40
|
-
2. Subsequent calls: pass the \`cached_uris\` value as \`pdf_source\`
|
|
41
|
-
- If \`cached_uris\` has one element, pass the single URI string
|
|
42
|
-
- If \`cached_uris\` has multiple elements (chunked PDF), pass the full array
|
|
42
|
+
**Workflow for multiple queries on same document (Google provider):**
|
|
43
|
+
1. First call: pass local path or URL -> receive \`cached_uris\` in response
|
|
44
|
+
2. Subsequent calls: pass the \`cached_uris\` value as \`pdf_source\` -> no re-upload, faster response
|
|
43
45
|
|
|
44
46
|
## Usage Tips
|
|
45
47
|
|
|
46
48
|
- Ask specific, focused queries for best results
|
|
47
49
|
- For multi-page PDFs, reference page numbers in queries when relevant
|
|
48
|
-
-
|
|
50
|
+
- With Google provider, reuse the returned \`cached_uris\` for follow-up questions
|
|
49
51
|
|
|
50
52
|
## Example
|
|
51
53
|
|
|
@@ -63,13 +65,9 @@ queries on the same document. This avoids re-uploading and is cached by Gemini f
|
|
|
63
65
|
## Error Handling
|
|
64
66
|
|
|
65
67
|
Common errors and solutions:
|
|
66
|
-
- Missing
|
|
68
|
+
- Missing provider/API key: Run \`pdf-analyzer --setup\` to choose a provider and store your API key
|
|
67
69
|
- PDF not found: Verify the path is absolute and file exists
|
|
68
70
|
- URL fetch failed: Check that the URL is accessible and points to a valid PDF
|
|
69
|
-
|
|
70
|
-
## Environment Variables
|
|
71
|
-
|
|
72
|
-
- GEMINI_API_KEY: Required. Get your key from https://aistudio.google.com/apikey
|
|
73
71
|
`.trim();
|
|
74
72
|
// =============================================================================
|
|
75
73
|
// Helper Functions
|
|
@@ -113,11 +111,11 @@ export const createServer = () => {
|
|
|
113
111
|
// Tool: analyze_pdf
|
|
114
112
|
// -------------------------------------------------------------------------
|
|
115
113
|
server.registerTool("analyze_pdf", {
|
|
116
|
-
description: "Analyze a PDF document using
|
|
114
|
+
description: "Analyze a PDF document using AI. Provide an absolute file path, URL, cached file URI (from a previous response, Google only), or array of cached file URIs (from a previous chunked response, Google only) and a list of questions to ask about the PDF content. With the Google provider, returns a cached_uris array that can be reused for subsequent queries on the same document.",
|
|
117
115
|
inputSchema: {
|
|
118
116
|
pdf_source: z
|
|
119
117
|
.union([z.string(), z.array(z.string().min(1)).min(1)])
|
|
120
|
-
.describe("PDF source: absolute local file path
|
|
118
|
+
.describe("PDF source: absolute local file path, URL, cached file URI from a previous response (Google only), or array of cached file URIs from a previous chunked response (Google only)"),
|
|
121
119
|
queries: z
|
|
122
120
|
.array(z.string().min(1))
|
|
123
121
|
.min(1)
|
|
@@ -125,25 +123,19 @@ export const createServer = () => {
|
|
|
125
123
|
},
|
|
126
124
|
}, async ({ pdf_source, queries }) => {
|
|
127
125
|
try {
|
|
128
|
-
const
|
|
129
|
-
const result = await analyzePdf(
|
|
126
|
+
const { provider, apiKey, modelId } = resolveActiveProvider();
|
|
127
|
+
const result = await analyzePdf(provider, apiKey, modelId, { pdf_source, queries });
|
|
130
128
|
return formatResult(result);
|
|
131
129
|
}
|
|
132
130
|
catch (error) {
|
|
133
|
-
// Handle typed Gemini API errors
|
|
134
|
-
if (isApiError(error)) {
|
|
135
|
-
const { message, details } = getApiErrorMessage(error);
|
|
136
|
-
return formatError(message, details);
|
|
137
|
-
}
|
|
138
131
|
const message = error instanceof Error ? error.message : "Unknown error occurred";
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
return formatError(message, "Set the GEMINI_API_KEY environment variable in your MCP client configuration.");
|
|
132
|
+
if (message.includes("No provider configured") || message.includes("API key not found")) {
|
|
133
|
+
return formatError(message);
|
|
142
134
|
}
|
|
143
135
|
if (message.includes("not found")) {
|
|
144
136
|
return formatError(message, "Ensure the path is absolute and the file exists.");
|
|
145
137
|
}
|
|
146
|
-
if (message.includes("Failed to fetch
|
|
138
|
+
if (message.includes("Failed to fetch")) {
|
|
147
139
|
return formatError(message, "Check that the URL is accessible and points to a valid PDF file.");
|
|
148
140
|
}
|
|
149
141
|
return formatError(message);
|
package/dist/server.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.js","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"server.js","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AAEhE,gFAAgF;AAChF,sBAAsB;AACtB,gFAAgF;AAEhF,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuD3B,CAAC,IAAI,EAAE,CAAC;AAET,gFAAgF;AAChF,mBAAmB;AACnB,gFAAgF;AAEhF;;GAEG;AACH,MAAM,YAAY,GAAG,CAAC,MAAe,EAAiD,EAAE;IACtF,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IAC7C,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;KAClC,CAAC;AACJ,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,GAAG,CAClB,KAAa,EACb,OAAgB,EAC8C,EAAE;IAChE,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC;IACxD,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;QAClE,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC,CAAC;AAEF,gFAAgF;AAChF,eAAe;AACf,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,GAAc,EAAE;IAC1C,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;QACE,IAAI,EAAE,cAAc;QACpB,OAAO,EAAE,OAAO;KACjB,EACD;QACE,YAAY,EAAE;YACZ,KAAK,EAAE,EAAE;SACV;QACD,YAAY,EAAE,mBAAmB;KAClC,CACF,CAAC;IAEF,4EAA4E;IAC5E,oBAAoB;IACpB,4EAA4E;IAC5E,MAAM,CAAC,YAAY,CACjB,aAAa,EACb;QACE,WAAW,EACT,wXAAwX;QAC1X,WAAW,EAAE;YACX,UAAU,EAAE,CAAC;iBACV,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;iBACtD,QAAQ,CACP,gLAAgL,CACjL;YACH,OAAO,EAAE,CAAC;iBACP,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;iBACxB,GAAG,CAAC,CAAC,CAAC;iBACN,QAAQ,CAAC,yCAAyC,CAAC;SACvD;KACF,EACD,KAAK,EAAE,EAAE,UAAU,EAAE,OAAO,EAAE,EAAE,EAAE;QAChC,IAAI,CAAC;YACH,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,qBAAqB,EAAE,CAAC;YAC9D,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,CAAC;YACpF,OAAO,YAAY,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,wBAAwB,CAAC;YAElF,IAAI,OAAO,CAAC,QAAQ,CAAC,wBAAwB,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBACxF,OAAO,WAAW,CAAC,OAAO,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClC,OAAO,WAAW,CAAC,OAAO,EAAE,kDAAkD,CAAC,CAAC;YAClF,CAAC;YAED,IAAI,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBACxC,OAAO,WAAW,CAChB,OAAO,EACP,kEAAkE,CACnE,CAAC;YACJ,CAAC;YAED,OAAO,WAAW,CAAC,OAAO,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,SAAS,GAAG,KAAK,IAAmB,EAAE;IACjD,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;IAC9B,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAClC,CAAC,CAAC"}
|
package/dist/service.d.ts
CHANGED
|
@@ -1,43 +1,15 @@
|
|
|
1
|
-
import { GoogleGenAI, ApiError } from "@google/genai";
|
|
2
1
|
import type { AnalyzePdfInput, AnalyzePdfResponse } from "./types.js";
|
|
2
|
+
import type { ProviderConfig } from "./providers/types.js";
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
* Loads GEMINI_API_KEY from .env file if not already set in environment.
|
|
6
|
-
*/
|
|
7
|
-
export declare function createGeminiClient(): GoogleGenAI;
|
|
8
|
-
/**
|
|
9
|
-
* Check if a string is a Gemini File API URI.
|
|
10
|
-
*/
|
|
11
|
-
export declare function isGeminiFileUri(source: string): boolean;
|
|
12
|
-
/**
|
|
13
|
-
* Check if a string is a URL (excluding Gemini File API URIs).
|
|
14
|
-
*/
|
|
15
|
-
export declare function isUrl(source: string): boolean;
|
|
16
|
-
/**
|
|
17
|
-
* Validates a local PDF file path.
|
|
18
|
-
* Throws descriptive errors for common issues.
|
|
19
|
-
*/
|
|
20
|
-
export declare function validateLocalPath(pdfPath: string): void;
|
|
21
|
-
/**
|
|
22
|
-
* Analyzes a PDF document using Gemini.
|
|
4
|
+
* Analyzes a PDF document using the configured provider.
|
|
23
5
|
*
|
|
24
6
|
* Routing:
|
|
25
|
-
* - string[]
|
|
26
|
-
* - Gemini URI string
|
|
27
|
-
* - path/URL
|
|
28
|
-
*/
|
|
29
|
-
export declare function analyzePdf(client: GoogleGenAI, input: AnalyzePdfInput): Promise<AnalyzePdfResponse>;
|
|
30
|
-
/**
|
|
31
|
-
* Check if an error is an ApiError and return typed error info.
|
|
32
|
-
*/
|
|
33
|
-
export declare function isApiError(error: unknown): error is ApiError;
|
|
34
|
-
/**
|
|
35
|
-
* Get error message from ApiError, preserving the actual API response.
|
|
7
|
+
* - string[] -> cached chunk URIs, sequential processing with rolling findings (Google only)
|
|
8
|
+
* - Gemini URI string -> direct single-file analysis via cached URI (Google only)
|
|
9
|
+
* - path/URL -> prepare via provider, try full PDF first, split on token limit error
|
|
36
10
|
*/
|
|
37
|
-
export declare function
|
|
38
|
-
message: string;
|
|
39
|
-
details?: string;
|
|
40
|
-
};
|
|
11
|
+
export declare function analyzePdf(provider: ProviderConfig, apiKey: string, modelId: string, input: AnalyzePdfInput): Promise<AnalyzePdfResponse>;
|
|
41
12
|
export type { AnalyzePdfInput, AnalyzePdfResponse, QueryResponse } from "./types.js";
|
|
42
13
|
export { AnalyzePdfInputSchema } from "./types.js";
|
|
14
|
+
export { isGeminiFileUri, isUrl, validateLocalPath } from "./pdf-utils.js";
|
|
43
15
|
//# sourceMappingURL=service.d.ts.map
|
package/dist/service.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"service.d.ts","sourceRoot":"","sources":["../src/service.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,eAAe,EACf,kBAAkB,EAGnB,MAAM,YAAY,CAAC;AAIpB,OAAO,KAAK,EAAE,cAAc,EAAa,MAAM,sBAAsB,CAAC;AAqRtE;;;;;;;GAOG;AACH,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,cAAc,EACxB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,eAAe,GACrB,OAAO,CAAC,kBAAkB,CAAC,CAoC7B;AAGD,YAAY,EAAE,eAAe,EAAE,kBAAkB,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AACrF,OAAO,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,KAAK,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC"}
|