@kaenova/document-intelligence-mcp 0.0.0-20260520.h0940
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +254 -0
- package/dist/formatters/layout-formatter.d.ts +3 -0
- package/dist/formatters/layout-formatter.d.ts.map +1 -0
- package/dist/formatters/layout-formatter.js +89 -0
- package/dist/formatters/layout-formatter.js.map +1 -0
- package/dist/formatters/read-formatter.d.ts +3 -0
- package/dist/formatters/read-formatter.d.ts.map +1 -0
- package/dist/formatters/read-formatter.js +48 -0
- package/dist/formatters/read-formatter.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +32 -0
- package/dist/index.js.map +1 -0
- package/dist/services/cache.d.ts +35 -0
- package/dist/services/cache.d.ts.map +1 -0
- package/dist/services/cache.js +93 -0
- package/dist/services/cache.js.map +1 -0
- package/dist/services/document-intelligence.d.ts +14 -0
- package/dist/services/document-intelligence.d.ts.map +1 -0
- package/dist/services/document-intelligence.js +74 -0
- package/dist/services/document-intelligence.js.map +1 -0
- package/dist/tools/analyze-document.d.ts +7 -0
- package/dist/tools/analyze-document.d.ts.map +1 -0
- package/dist/tools/analyze-document.js +82 -0
- package/dist/tools/analyze-document.js.map +1 -0
- package/dist/tools/schemas.d.ts +10 -0
- package/dist/tools/schemas.d.ts.map +1 -0
- package/dist/tools/schemas.js +18 -0
- package/dist/tools/schemas.js.map +1 -0
- package/dist/types.d.ts +26 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/package.json +40 -0
package/README.md
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# document-intelligence-mcp
|
|
2
|
+
|
|
3
|
+
MCP server for Azure AI Document Intelligence. Exposes a single `analyze_document` tool that lets agents choose between:
|
|
4
|
+
|
|
5
|
+
- **`read`** — OCR-only text extraction (fast, lightweight)
|
|
6
|
+
- **`layout`** — Rich document understanding with tables, selection marks, and structure (recommended for complex documents)
|
|
7
|
+
|
|
8
|
+
Results are automatically cached using SQLite (via `better-sqlite3`) for fast repeated analysis of the same document.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
### 1. Prerequisites
|
|
15
|
+
|
|
16
|
+
- Bun runtime
|
|
17
|
+
- Azure Document Intelligence resource (create one in Azure Portal if you don't have it)
|
|
18
|
+
|
|
19
|
+
### 2. Install Dependencies
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bun install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### 3. Configure Environment
|
|
26
|
+
|
|
27
|
+
Copy the example env file and fill in your Azure credentials:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
cp .env.example .env
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Edit `.env` with your Azure endpoint and key:
|
|
34
|
+
|
|
35
|
+
```env
|
|
36
|
+
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
|
|
37
|
+
AZURE_DOCUMENT_INTELLIGENCE_KEY=your-api-key-here
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### 4. Run the Server
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
bun run dev
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The server runs in stdio mode and is ready to be used by any MCP-compatible client.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Configuration in Agent Harness (Pi)
|
|
51
|
+
|
|
52
|
+
Add this MCP server to your Pi agent by editing the configuration file:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
~/.pi/agent/mcp.json
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Option 1: Using the Published Package (Recommended)
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"mcpServers": {
|
|
63
|
+
"document-intelligence": {
|
|
64
|
+
"command": "bunx",
|
|
65
|
+
"args": ["@kaenova/document-intelligence-mcp"],
|
|
66
|
+
"env": {
|
|
67
|
+
"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT": "https://your-resource.cognitiveservices.azure.com/",
|
|
68
|
+
"AZURE_DOCUMENT_INTELLIGENCE_KEY": "your-api-key"
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Option 2: Local Development (Cloned Repository)
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"mcpServers": {
|
|
80
|
+
"document-intelligence": {
|
|
81
|
+
"command": "bun",
|
|
82
|
+
"args": ["run", "src/index.ts"],
|
|
83
|
+
"cwd": "/absolute/path/to/your/document-intelligence-mcp",
|
|
84
|
+
"env": {
|
|
85
|
+
"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT": "https://your-resource.cognitiveservices.azure.com/",
|
|
86
|
+
"AZURE_DOCUMENT_INTELLIGENCE_KEY": "your-api-key"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Option 3: Using npx
|
|
94
|
+
|
|
95
|
+
```json
|
|
96
|
+
{
|
|
97
|
+
"mcpServers": {
|
|
98
|
+
"document-intelligence": {
|
|
99
|
+
"command": "npx",
|
|
100
|
+
"args": ["-y", "@kaenova/document-intelligence-mcp"],
|
|
101
|
+
"env": {
|
|
102
|
+
"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT": "https://your-resource.cognitiveservices.azure.com/",
|
|
103
|
+
"AZURE_DOCUMENT_INTELLIGENCE_KEY": "your-api-key"
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Notes
|
|
111
|
+
|
|
112
|
+
- After editing `mcp.json`, restart your Pi agent or reload the MCP configuration.
|
|
113
|
+
- For local development, make sure to replace the `cwd` path with the actual location of your cloned repository.
|
|
114
|
+
- You can also load credentials from a `.env` file instead of hardcoding them in `mcp.json`.
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Installation via npm (Recommended for most users)
|
|
119
|
+
|
|
120
|
+
Once published, you can use the package **without cloning** the repository.
|
|
121
|
+
|
|
122
|
+
> **Note:** npm requires semver, so published versions use a semver-compatible date stamp in GMT+7, for example: `0.0.0-20260520.h1530`
|
|
123
|
+
|
|
124
|
+
### Using with `bunx` (recommended if you have Bun)
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"mcpServers": {
|
|
129
|
+
"document-intelligence": {
|
|
130
|
+
"command": "bunx",
|
|
131
|
+
"args": ["@kaenova/document-intelligence-mcp"],
|
|
132
|
+
"env": {
|
|
133
|
+
"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT": "https://your-resource.cognitiveservices.azure.com/",
|
|
134
|
+
"AZURE_DOCUMENT_INTELLIGENCE_KEY": "your-api-key-here"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Using with `npx`
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"mcpServers": {
|
|
146
|
+
"document-intelligence": {
|
|
147
|
+
"command": "npx",
|
|
148
|
+
"args": ["-y", "@kaenova/document-intelligence-mcp"],
|
|
149
|
+
"env": {
|
|
150
|
+
"AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT": "https://your-resource.cognitiveservices.azure.com/",
|
|
151
|
+
"AZURE_DOCUMENT_INTELLIGENCE_KEY": "your-api-key-here"
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Usage
|
|
161
|
+
|
|
162
|
+
### The `analyze_document` Tool
|
|
163
|
+
|
|
164
|
+
This MCP server exposes a single powerful tool:
|
|
165
|
+
|
|
166
|
+
```json
|
|
167
|
+
analyze_document(model, source)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
#### Parameters
|
|
171
|
+
|
|
172
|
+
| Parameter | Type | Description |
|
|
173
|
+
|-----------|-------------------------|-----------------------------------------------------------------------------|
|
|
174
|
+
| `model` | `"read"` \| `"layout"` | The analysis model to use |
|
|
175
|
+
| `source` | `string` | Local file path **or** public HTTPS URL (automatically detected) |
|
|
176
|
+
|
|
177
|
+
#### Choosing the Right Model
|
|
178
|
+
|
|
179
|
+
| Model | Best For | Output Highlights | Speed |
|
|
180
|
+
|----------|-----------------------------------------------|---------------------------------------|-----------|
|
|
181
|
+
| `read` | Simple text extraction, language detection | Raw text, pages, detected languages | Fast |
|
|
182
|
+
| `layout` | Documents with tables, forms, structure | Tables, selection marks, rich layout | Slightly slower |
|
|
183
|
+
|
|
184
|
+
**Recommendation:** Use `layout` unless you specifically only need raw OCR text.
|
|
185
|
+
|
|
186
|
+
#### Examples
|
|
187
|
+
|
|
188
|
+
**Local PDF file with layout analysis:**
|
|
189
|
+
|
|
190
|
+
```json
|
|
191
|
+
{
|
|
192
|
+
"model": "layout",
|
|
193
|
+
"source": "/Users/me/invoices/Q2-report.pdf"
|
|
194
|
+
}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**Public URL with read-only OCR:**
|
|
198
|
+
|
|
199
|
+
```json
|
|
200
|
+
{
|
|
201
|
+
"model": "read",
|
|
202
|
+
"source": "https://example.com/annual-report-2025.pdf"
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
**Image file:**
|
|
207
|
+
|
|
208
|
+
```json
|
|
209
|
+
{
|
|
210
|
+
"model": "layout",
|
|
211
|
+
"source": "./screenshots/contract-page1.png"
|
|
212
|
+
}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Caching
|
|
216
|
+
|
|
217
|
+
- Results are automatically cached based on the **file content hash** + model.
|
|
218
|
+
- If you analyze the same file again with the same model, you get the cached result instantly.
|
|
219
|
+
- Cache is stored in SQLite (`DI_CACHE_PATH`, default: `.cache/di-cache.sqlite`).
|
|
220
|
+
|
|
221
|
+
### Supported File Types
|
|
222
|
+
|
|
223
|
+
- **PDF**
|
|
224
|
+
- **Images**: JPG, JPEG, PNG, BMP, TIFF, HEIF
|
|
225
|
+
- **Office Documents**: DOCX, PPTX, XLSX
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
The tool returns a well-formatted Markdown document with extracted content, tables (when using `layout`), pages, and language information. Results are cached based on file content hash + model.
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## Development
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
# Watch mode
|
|
237
|
+
bun run dev
|
|
238
|
+
|
|
239
|
+
# Type checking
|
|
240
|
+
bun run typecheck
|
|
241
|
+
|
|
242
|
+
# Build for production
|
|
243
|
+
bun run build
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
MIT
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
*Built with FastMCP and the official Azure AI Form Recognizer SDK.*
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"layout-formatter.d.ts","sourceRoot":"","sources":["../../src/formatters/layout-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAwC,MAAM,2BAA2B,CAAC;AAErG,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAkDhF"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
export function formatLayoutResult(result, source) {
|
|
2
|
+
const lines = [];
|
|
3
|
+
lines.push(`# Document Analysis — Layout Model`);
|
|
4
|
+
lines.push(`**Source:** \`${source}\``);
|
|
5
|
+
lines.push("");
|
|
6
|
+
// Content
|
|
7
|
+
lines.push("## Content");
|
|
8
|
+
if (result.content) {
|
|
9
|
+
lines.push(result.content);
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
lines.push("_No text content extracted._");
|
|
13
|
+
}
|
|
14
|
+
lines.push("");
|
|
15
|
+
// Pages
|
|
16
|
+
lines.push("## Pages");
|
|
17
|
+
if (result.pages && result.pages.length > 0) {
|
|
18
|
+
for (const page of result.pages) {
|
|
19
|
+
const pageNum = page.pageNumber ?? "?";
|
|
20
|
+
const width = page.width ?? "?";
|
|
21
|
+
const height = page.height ?? "?";
|
|
22
|
+
const unit = page.unit ?? "pixel";
|
|
23
|
+
lines.push(`- Page ${pageNum}: ${width}×${height} ${unit}`);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
lines.push("- No page information available.");
|
|
28
|
+
}
|
|
29
|
+
lines.push("");
|
|
30
|
+
// Tables
|
|
31
|
+
if (result.tables && result.tables.length > 0) {
|
|
32
|
+
lines.push("## Tables");
|
|
33
|
+
result.tables.forEach((table, idx) => {
|
|
34
|
+
lines.push(formatTable(table, idx + 1));
|
|
35
|
+
lines.push("");
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
// Selection Marks
|
|
39
|
+
if (result.selectionMarks && result.selectionMarks.length > 0) {
|
|
40
|
+
lines.push("## Selection Marks");
|
|
41
|
+
result.selectionMarks.forEach((mark, idx) => {
|
|
42
|
+
lines.push(formatSelectionMark(mark, idx + 1));
|
|
43
|
+
});
|
|
44
|
+
lines.push("");
|
|
45
|
+
}
|
|
46
|
+
return lines.join("\n");
|
|
47
|
+
}
|
|
48
|
+
function formatTable(table, tableNum) {
|
|
49
|
+
const lines = [];
|
|
50
|
+
const rowCount = table.rowCount ?? 0;
|
|
51
|
+
const colCount = table.columnCount ?? 0;
|
|
52
|
+
lines.push(`### Table ${tableNum} (${colCount} columns × ${rowCount} rows)`);
|
|
53
|
+
// Build markdown table
|
|
54
|
+
const cells = Array.from({ length: rowCount }, () => Array(colCount).fill(""));
|
|
55
|
+
if (table.cells) {
|
|
56
|
+
for (const cell of table.cells) {
|
|
57
|
+
const rowIdx = cell.rowIndex ?? 0;
|
|
58
|
+
const colIdx = cell.columnIndex ?? 0;
|
|
59
|
+
const content = cell.content ?? "";
|
|
60
|
+
if (cells[rowIdx]) {
|
|
61
|
+
cells[rowIdx][colIdx] = content.replace(/\n/g, " ").trim();
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (cells.length > 0 && cells[0]) {
|
|
66
|
+
// Header row
|
|
67
|
+
lines.push("| " + cells[0].join(" | ") + " |");
|
|
68
|
+
lines.push("| " + cells[0].map(() => "---").join(" | ") + " |");
|
|
69
|
+
// Data rows
|
|
70
|
+
for (let i = 1; i < cells.length; i++) {
|
|
71
|
+
const row = cells[i];
|
|
72
|
+
if (row) {
|
|
73
|
+
lines.push("| " + row.join(" | ") + " |");
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
lines.push("_Empty table_");
|
|
79
|
+
}
|
|
80
|
+
return lines.join("\n");
|
|
81
|
+
}
|
|
82
|
+
function formatSelectionMark(mark, markNum) {
|
|
83
|
+
const state = mark.state === "selected" ? "[x]" : "[ ]";
|
|
84
|
+
const confidence = mark.confidence !== undefined
|
|
85
|
+
? ` (confidence: ${(mark.confidence * 100).toFixed(1)}%)`
|
|
86
|
+
: "";
|
|
87
|
+
return `- ${state} Selection mark ${markNum}${confidence}`;
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=layout-formatter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"layout-formatter.js","sourceRoot":"","sources":["../../src/formatters/layout-formatter.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,kBAAkB,CAAC,MAAqB,EAAE,MAAc;IACtE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IACjD,KAAK,CAAC,IAAI,CAAC,iBAAiB,MAAM,IAAI,CAAC,CAAC;IACxC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,UAAU;IACV,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACzB,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAC7C,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,QAAQ;IACR,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACvB,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,IAAI,GAAG,CAAC;YACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,GAAG,CAAC;YAChC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC;YAClC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,OAAO,CAAC;YAClC,KAAK,CAAC,IAAI,CAAC,UAAU,OAAO,KAAK,KAAK,IAAI,MAAM,IAAI,IAAI,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACjD,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,SAAS;IACT,IAAI,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9C,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxB,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;YACnC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,kBAAkB;IAClB,IAAK,MAAc,CAAC,cAAc,IAAK,MAAc,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChF,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAChC,MAAc,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,IAAS,EAAE,GAAW,EAAE,EAAE;YAChE,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,IAAI,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,WAAW,CAAC,KAAoB,EAAE,QAAgB;IACzD,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ,IAAI,CAAC,CAAC;IACrC,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,IAAI,CAAC,CAAC;IACxC,KAAK,CAAC,IAAI,CAAC,aAAa,QAAQ,KAAK,QAAQ,cAAc,QAAQ,QAAQ,CAAC,CAAC;IAE7E,uBAAuB;IACvB,MAAM,KAAK,GAAe,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IAE3F,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QAChB,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;YAClC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC;YACrC,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;YACnC,IAAI,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClB,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAC7D,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QACjC,aAAa;QACb,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAC/C,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;QAEhE,YAAY;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACrB,IAAI,GAAG,EAAE,CAAC;gBACR,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAS,EAAE,OAAe;IACrD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;IACxD,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,KAAK,SAAS;QAC9C,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;QACzD,CAAC,CAAC,EAAE,CAAC;IACP,OAAO,KAAK,KAAK,mBAAmB,OAAO,GAAG,UAAU,EAAE,CAAC;AAC7D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"read-formatter.d.ts","sourceRoot":"","sources":["../../src/formatters/read-formatter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAmD9E"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export function formatReadResult(result, source) {
|
|
2
|
+
const lines = [];
|
|
3
|
+
lines.push(`# Document Analysis — Read Model`);
|
|
4
|
+
lines.push(`**Source:** \`${source}\``);
|
|
5
|
+
lines.push("");
|
|
6
|
+
// Content
|
|
7
|
+
lines.push("## Content");
|
|
8
|
+
if (result.content) {
|
|
9
|
+
lines.push(result.content);
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
lines.push("_No text content extracted._");
|
|
13
|
+
}
|
|
14
|
+
lines.push("");
|
|
15
|
+
// Pages
|
|
16
|
+
lines.push("## Pages");
|
|
17
|
+
if (result.pages && result.pages.length > 0) {
|
|
18
|
+
for (const page of result.pages) {
|
|
19
|
+
const pageNum = page.pageNumber ?? "?";
|
|
20
|
+
const width = page.width ?? "?";
|
|
21
|
+
const height = page.height ?? "?";
|
|
22
|
+
const unit = page.unit ?? "pixel";
|
|
23
|
+
const wordCount = page.words?.length ?? 0;
|
|
24
|
+
const lineCount = page.lines?.length ?? 0;
|
|
25
|
+
lines.push(`- Page ${pageNum}: ${width}×${height} ${unit}, ${wordCount} words, ${lineCount} lines`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
lines.push("- No page information available.");
|
|
30
|
+
}
|
|
31
|
+
lines.push("");
|
|
32
|
+
// Languages
|
|
33
|
+
lines.push("## Languages");
|
|
34
|
+
if (result.languages && result.languages.length > 0) {
|
|
35
|
+
for (const lang of result.languages) {
|
|
36
|
+
const locale = lang.locale ?? "unknown";
|
|
37
|
+
const confidence = lang.confidence !== undefined
|
|
38
|
+
? `${(lang.confidence * 100).toFixed(1)}%`
|
|
39
|
+
: "N/A";
|
|
40
|
+
lines.push(`- ${locale} (confidence: ${confidence})`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
lines.push("- No language information detected.");
|
|
45
|
+
}
|
|
46
|
+
return lines.join("\n");
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=read-formatter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"read-formatter.js","sourceRoot":"","sources":["../../src/formatters/read-formatter.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,gBAAgB,CAAC,MAAqB,EAAE,MAAc;IACpE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IAC/C,KAAK,CAAC,IAAI,CAAC,iBAAiB,MAAM,IAAI,CAAC,CAAC;IACxC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,UAAU;IACV,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACzB,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAC7C,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,QAAQ;IACR,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACvB,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,IAAI,GAAG,CAAC;YACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,GAAG,CAAC;YAChC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC;YAClC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,OAAO,CAAC;YAClC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,EAAE,MAAM,IAAI,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,EAAE,MAAM,IAAI,CAAC,CAAC;YAE1C,KAAK,CAAC,IAAI,CACR,UAAU,OAAO,KAAK,KAAK,IAAI,MAAM,IAAI,IAAI,KAAK,SAAS,WAAW,SAAS,QAAQ,CACxF,CAAC;QACJ,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACjD,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,YAAY;IACZ,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC3B,IAAI,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,SAAS,CAAC;YACxC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,KAAK,SAAS;gBAC9C,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBAC1C,CAAC,CAAC,KAAK,CAAC;YACV,KAAK,CAAC,IAAI,CAAC,KAAK,MAAM,iBAAiB,UAAU,GAAG,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACpD,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
import { FastMCP } from "fastmcp";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
import { AnalyzeDocumentSchema } from "./tools/schemas.js";
|
|
5
|
+
import { analyzeDocument } from "./tools/analyze-document.js";
|
|
6
|
+
const server = new FastMCP({
|
|
7
|
+
name: "document-intelligence",
|
|
8
|
+
version: "0.1.0",
|
|
9
|
+
});
|
|
10
|
+
server.addTool({
|
|
11
|
+
name: "analyze_document",
|
|
12
|
+
description: "Analyze a document using Azure AI Document Intelligence. " +
|
|
13
|
+
"Choose 'read' for OCR-only text extraction or 'layout' for rich structure including tables and selection marks. " +
|
|
14
|
+
"The 'source' parameter accepts either a local file path or a public URL (auto-detected). " +
|
|
15
|
+
"Results are cached for faster subsequent calls on unchanged documents.",
|
|
16
|
+
parameters: AnalyzeDocumentSchema,
|
|
17
|
+
execute: async (args, { log }) => {
|
|
18
|
+
log.info("Starting document analysis", { model: args.model, source: args.source });
|
|
19
|
+
const result = await analyzeDocument(args);
|
|
20
|
+
log.info("Document analysis completed", {
|
|
21
|
+
model: args.model,
|
|
22
|
+
source: args.source,
|
|
23
|
+
resultLength: result.length
|
|
24
|
+
});
|
|
25
|
+
return result;
|
|
26
|
+
},
|
|
27
|
+
});
|
|
28
|
+
server.start({
|
|
29
|
+
transportType: "stdio",
|
|
30
|
+
});
|
|
31
|
+
console.error("Document Intelligence MCP Server started (stdio mode)");
|
|
32
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAE9D,MAAM,MAAM,GAAG,IAAI,OAAO,CAAC;IACzB,IAAI,EAAE,uBAAuB;IAC7B,OAAO,EAAE,OAAO;CACjB,CAAC,CAAC;AAEH,MAAM,CAAC,OAAO,CAAC;IACb,IAAI,EAAE,kBAAkB;IACxB,WAAW,EACT,2DAA2D;QAC3D,kHAAkH;QAClH,2FAA2F;QAC3F,wEAAwE;IAC1E,UAAU,EAAE,qBAAqB;IACjC,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;QAC/B,GAAG,CAAC,IAAI,CAAC,4BAA4B,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAEnF,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,IAAI,CAAC,CAAC;QAE3C,GAAG,CAAC,IAAI,CAAC,6BAA6B,EAAE;YACtC,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,YAAY,EAAE,MAAM,CAAC,MAAM;SAC5B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,CAAC;IACX,aAAa,EAAE,OAAO;CACvB,CAAC,CAAC;AAEH,OAAO,CAAC,KAAK,CAAC,uDAAuD,CAAC,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { AnalysisModel } from "../types.js";
|
|
2
|
+
export declare class AnalysisCache {
|
|
3
|
+
private db;
|
|
4
|
+
constructor(cachePath?: string);
|
|
5
|
+
private initSchema;
|
|
6
|
+
/**
|
|
7
|
+
* Compute SHA-256 hash of the input string.
|
|
8
|
+
*/
|
|
9
|
+
private hash;
|
|
10
|
+
/**
|
|
11
|
+
* Compute cache key from model + content hash.
|
|
12
|
+
*/
|
|
13
|
+
computeCacheKey(model: AnalysisModel, contentHash: string): string;
|
|
14
|
+
/**
|
|
15
|
+
* Compute SHA-256 hash of file content (for local files).
|
|
16
|
+
*/
|
|
17
|
+
computeFileHash(filePath: string): Promise<string>;
|
|
18
|
+
/**
|
|
19
|
+
* Compute hash for URL source (hash of the URL string itself).
|
|
20
|
+
*/
|
|
21
|
+
computeUrlHash(url: string): string;
|
|
22
|
+
/**
|
|
23
|
+
* Retrieve cached result if exists.
|
|
24
|
+
*/
|
|
25
|
+
get(cacheKey: string): string | null;
|
|
26
|
+
/**
|
|
27
|
+
* Store result in cache.
|
|
28
|
+
*/
|
|
29
|
+
set(cacheKey: string, model: AnalysisModel, sourceType: "file" | "url", contentHash: string, result: string): void;
|
|
30
|
+
/**
|
|
31
|
+
* Close the database connection.
|
|
32
|
+
*/
|
|
33
|
+
close(): void;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/services/cache.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAMjD,qBAAa,aAAa;IACxB,OAAO,CAAC,EAAE,CAAoB;gBAElB,SAAS,GAAE,MAA2B;IAWlD,OAAO,CAAC,UAAU;IAgBlB;;OAEG;IACH,OAAO,CAAC,IAAI;IAIZ;;OAEG;IACH,eAAe,CAAC,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM;IAIlE;;OAEG;IACG,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAKxD;;OAEG;IACH,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM;IAInC;;OAEG;IACH,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAiBpC;;OAEG;IACH,GAAG,CACD,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,aAAa,EACpB,UAAU,EAAE,MAAM,GAAG,KAAK,EAC1B,WAAW,EAAE,MAAM,EACnB,MAAM,EAAE,MAAM,GACb,IAAI;IASP;;OAEG;IACH,KAAK,IAAI,IAAI;CAGd"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import Database from "better-sqlite3";
|
|
2
|
+
import { createHash } from "crypto";
|
|
3
|
+
import * as fs from "fs";
|
|
4
|
+
import * as path from "path";
|
|
5
|
+
const DEFAULT_CACHE_PATH = "./.cache/di-cache.sqlite";
|
|
6
|
+
export class AnalysisCache {
|
|
7
|
+
db;
|
|
8
|
+
constructor(cachePath = DEFAULT_CACHE_PATH) {
|
|
9
|
+
// Ensure cache directory exists
|
|
10
|
+
const dir = path.dirname(cachePath);
|
|
11
|
+
if (!fs.existsSync(dir)) {
|
|
12
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
13
|
+
}
|
|
14
|
+
this.db = new Database(cachePath);
|
|
15
|
+
this.initSchema();
|
|
16
|
+
}
|
|
17
|
+
initSchema() {
|
|
18
|
+
this.db.exec(`
|
|
19
|
+
CREATE TABLE IF NOT EXISTS analysis_cache (
|
|
20
|
+
cache_key TEXT PRIMARY KEY,
|
|
21
|
+
model TEXT NOT NULL,
|
|
22
|
+
source_type TEXT NOT NULL,
|
|
23
|
+
content_hash TEXT NOT NULL,
|
|
24
|
+
result TEXT NOT NULL,
|
|
25
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
26
|
+
accessed_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
CREATE INDEX IF NOT EXISTS idx_accessed_at ON analysis_cache(accessed_at);
|
|
30
|
+
`);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Compute SHA-256 hash of the input string.
|
|
34
|
+
*/
|
|
35
|
+
hash(input) {
|
|
36
|
+
return createHash("sha256").update(input).digest("hex");
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Compute cache key from model + content hash.
|
|
40
|
+
*/
|
|
41
|
+
computeCacheKey(model, contentHash) {
|
|
42
|
+
return this.hash(`${model}|${contentHash}`);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Compute SHA-256 hash of file content (for local files).
|
|
46
|
+
*/
|
|
47
|
+
async computeFileHash(filePath) {
|
|
48
|
+
const fileBuffer = await fs.promises.readFile(filePath);
|
|
49
|
+
return this.hash(fileBuffer.toString("binary")); // binary to handle any file type
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Compute hash for URL source (hash of the URL string itself).
|
|
53
|
+
*/
|
|
54
|
+
computeUrlHash(url) {
|
|
55
|
+
return this.hash(url);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Retrieve cached result if exists.
|
|
59
|
+
*/
|
|
60
|
+
get(cacheKey) {
|
|
61
|
+
const stmt = this.db.prepare(`
|
|
62
|
+
SELECT result FROM analysis_cache WHERE cache_key = ?
|
|
63
|
+
`);
|
|
64
|
+
const row = stmt.get(cacheKey);
|
|
65
|
+
if (row) {
|
|
66
|
+
// Update accessed_at
|
|
67
|
+
const updateStmt = this.db.prepare(`
|
|
68
|
+
UPDATE analysis_cache SET accessed_at = datetime('now') WHERE cache_key = ?
|
|
69
|
+
`);
|
|
70
|
+
updateStmt.run(cacheKey);
|
|
71
|
+
return row.result;
|
|
72
|
+
}
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Store result in cache.
|
|
77
|
+
*/
|
|
78
|
+
set(cacheKey, model, sourceType, contentHash, result) {
|
|
79
|
+
const stmt = this.db.prepare(`
|
|
80
|
+
INSERT OR REPLACE INTO analysis_cache
|
|
81
|
+
(cache_key, model, source_type, content_hash, result, created_at, accessed_at)
|
|
82
|
+
VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))
|
|
83
|
+
`);
|
|
84
|
+
stmt.run(cacheKey, model, sourceType, contentHash, result);
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Close the database connection.
|
|
88
|
+
*/
|
|
89
|
+
close() {
|
|
90
|
+
this.db.close();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/services/cache.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAEpC,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B,MAAM,kBAAkB,GAAG,0BAA0B,CAAC;AAEtD,MAAM,OAAO,aAAa;IAChB,EAAE,CAAoB;IAE9B,YAAY,YAAoB,kBAAkB;QAChD,gCAAgC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,IAAI,CAAC,EAAE,GAAG,IAAI,QAAQ,CAAC,SAAS,CAAC,CAAC;QAClC,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,CAAC;IAEO,UAAU;QAChB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;KAYZ,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,IAAI,CAAC,KAAa;QACxB,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC1D,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,KAAoB,EAAE,WAAmB;QACvD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,KAAK,IAAI,WAAW,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CAAC,QAAgB;QACpC,MAAM,UAAU,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,iCAAiC;IACpF,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,GAAW;QACxB,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,QAAgB;QAClB,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;KAE5B,CAAC,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAmC,CAAC;QAEjE,IAAI,GAAG,EAAE,CAAC;YACR,qBAAqB;YACrB,MAAM,UAAU,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;OAElC,CAAC,CAAC;YACH,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACzB,OAAO,GAAG,CAAC,MAAM,CAAC;QACpB,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,GAAG,CACD,QAAgB,EAChB,KAAoB,EACpB,UAA0B,EAC1B,WAAmB,EACnB,MAAc;QAEd,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;;KAI5B,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;CACF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { AnalyzeResult } from "@azure/ai-form-recognizer";
|
|
2
|
+
import type { AnalysisModel } from "../types.js";
|
|
3
|
+
export declare class DocumentIntelligenceService {
|
|
4
|
+
private client;
|
|
5
|
+
constructor();
|
|
6
|
+
/**
|
|
7
|
+
* Analyze a document using the specified model.
|
|
8
|
+
* Supports both local file paths and public URLs.
|
|
9
|
+
*/
|
|
10
|
+
analyzeDocument(source: string, model: AnalysisModel): Promise<AnalyzeResult>;
|
|
11
|
+
private getModelId;
|
|
12
|
+
private getContentType;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=document-intelligence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-intelligence.d.ts","sourceRoot":"","sources":["../../src/services/document-intelligence.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAMjD,qBAAa,2BAA2B;IACtC,OAAO,CAAC,MAAM,CAAyB;;IAoBvC;;;OAGG;IACG,eAAe,CACnB,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,aAAa,CAAC;IAgBzB,OAAO,CAAC,UAAU;IAWlB,OAAO,CAAC,cAAc;CA2BvB"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { DocumentAnalysisClient, AzureKeyCredential, } from "@azure/ai-form-recognizer";
|
|
2
|
+
import { readFile } from "fs/promises";
|
|
3
|
+
import { Readable } from "stream";
|
|
4
|
+
const DEFAULT_API_VERSION = "2024-11-30";
|
|
5
|
+
export class DocumentIntelligenceService {
|
|
6
|
+
client;
|
|
7
|
+
constructor() {
|
|
8
|
+
const endpoint = process.env.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT;
|
|
9
|
+
const key = process.env.AZURE_DOCUMENT_INTELLIGENCE_KEY;
|
|
10
|
+
const apiVersion = process.env.AZURE_DOCUMENT_INTELLIGENCE_API_VERSION || DEFAULT_API_VERSION;
|
|
11
|
+
if (!endpoint || !key) {
|
|
12
|
+
throw new Error("Missing Azure Document Intelligence credentials. " +
|
|
13
|
+
"Please set AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT and AZURE_DOCUMENT_INTELLIGENCE_KEY environment variables.");
|
|
14
|
+
}
|
|
15
|
+
this.client = new DocumentAnalysisClient(endpoint, new AzureKeyCredential(key));
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Analyze a document using the specified model.
|
|
19
|
+
* Supports both local file paths and public URLs.
|
|
20
|
+
*/
|
|
21
|
+
async analyzeDocument(source, model) {
|
|
22
|
+
const modelId = this.getModelId(model);
|
|
23
|
+
if (/^https?:\/\//i.test(source)) {
|
|
24
|
+
// URL source
|
|
25
|
+
const poller = await this.client.beginAnalyzeDocumentFromUrl(modelId, source);
|
|
26
|
+
return await poller.pollUntilDone();
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
// Local file source
|
|
30
|
+
const fileBuffer = await readFile(source);
|
|
31
|
+
const stream = Readable.from(fileBuffer);
|
|
32
|
+
const poller = await this.client.beginAnalyzeDocument(modelId, stream);
|
|
33
|
+
return await poller.pollUntilDone();
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
getModelId(model) {
|
|
37
|
+
switch (model) {
|
|
38
|
+
case "read":
|
|
39
|
+
return "prebuilt-read";
|
|
40
|
+
case "layout":
|
|
41
|
+
return "prebuilt-layout";
|
|
42
|
+
default:
|
|
43
|
+
throw new Error(`Unknown model: ${model}`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
getContentType(filePath) {
|
|
47
|
+
const ext = filePath.toLowerCase().split(".").pop() || "";
|
|
48
|
+
switch (ext) {
|
|
49
|
+
case "pdf":
|
|
50
|
+
return "application/pdf";
|
|
51
|
+
case "jpg":
|
|
52
|
+
case "jpeg":
|
|
53
|
+
return "image/jpeg";
|
|
54
|
+
case "png":
|
|
55
|
+
return "image/png";
|
|
56
|
+
case "bmp":
|
|
57
|
+
return "image/bmp";
|
|
58
|
+
case "tiff":
|
|
59
|
+
case "tif":
|
|
60
|
+
return "image/tiff";
|
|
61
|
+
case "heif":
|
|
62
|
+
return "image/heif";
|
|
63
|
+
case "docx":
|
|
64
|
+
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
|
65
|
+
case "pptx":
|
|
66
|
+
return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
|
|
67
|
+
case "xlsx":
|
|
68
|
+
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
|
69
|
+
default:
|
|
70
|
+
return "application/octet-stream";
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=document-intelligence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-intelligence.js","sourceRoot":"","sources":["../../src/services/document-intelligence.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,sBAAsB,EACtB,kBAAkB,GACnB,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AAElC,MAAM,mBAAmB,GAAG,YAAY,CAAC;AAEzC,MAAM,OAAO,2BAA2B;IAC9B,MAAM,CAAyB;IAEvC;QACE,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC;QAClE,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC;QACxD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,uCAAuC,IAAI,mBAAmB,CAAC;QAE9F,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CACb,mDAAmD;gBACjD,4GAA4G,CAC/G,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,IAAI,sBAAsB,CACtC,QAAQ,EACR,IAAI,kBAAkB,CAAC,GAAG,CAAC,CAC5B,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,eAAe,CACnB,MAAc,EACd,KAAoB;QAEpB,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAEvC,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;YACjC,aAAa;YACb,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,2BAA2B,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAC9E,OAAO,MAAM,MAAM,CAAC,aAAa,EAAE,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,oBAAoB;YACpB,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACzC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YACvE,OAAO,MAAM,MAAM,CAAC,aAAa,EAAE,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,UAAU,CAAC,KAAoB;QACrC,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,MAAM;gBACT,OAAO,eAAe,CAAC;YACzB,KAAK,QAAQ;gBACX,OAAO,iBAAiB,CAAC;YAC3B;gBACE,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAEO,cAAc,CAAC,QAAgB;QACrC,MAAM,GAAG,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;QAC1D,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,KAAK;gBACR,OAAO,iBAAiB,CAAC;YAC3B,KAAK,KAAK,CAAC;YACX,KAAK,MAAM;gBACT,OAAO,YAAY,CAAC;YACtB,KAAK,KAAK;gBACR,OAAO,WAAW,CAAC;YACrB,KAAK,KAAK;gBACR,OAAO,WAAW,CAAC;YACrB,KAAK,MAAM,CAAC;YACZ,KAAK,KAAK;gBACR,OAAO,YAAY,CAAC;YACtB,KAAK,MAAM;gBACT,OAAO,YAAY,CAAC;YACtB,KAAK,MAAM;gBACT,OAAO,yEAAyE,CAAC;YACnF,KAAK,MAAM;gBACT,OAAO,2EAA2E,CAAC;YACrF,KAAK,MAAM;gBACT,OAAO,mEAAmE,CAAC;YAC7E;gBACE,OAAO,0BAA0B,CAAC;QACtC,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { AnalyzeDocumentSchema } from "./schemas.js";
|
|
3
|
+
/**
|
|
4
|
+
* Main tool handler for analyze_document.
|
|
5
|
+
*/
|
|
6
|
+
export declare function analyzeDocument(params: z.infer<typeof AnalyzeDocumentSchema>): Promise<string>;
|
|
7
|
+
//# sourceMappingURL=analyze-document.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-document.d.ts","sourceRoot":"","sources":["../../src/tools/analyze-document.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AA0BrD;;GAEG;AACH,wBAAsB,eAAe,CAAC,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAgEpG"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { UserError } from "fastmcp";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { AnalyzeDocumentSchema } from "./schemas.js";
|
|
4
|
+
import { DocumentIntelligenceService } from "../services/document-intelligence.js";
|
|
5
|
+
import { AnalysisCache } from "../services/cache.js";
|
|
6
|
+
import { formatReadResult } from "../formatters/read-formatter.js";
|
|
7
|
+
import { formatLayoutResult } from "../formatters/layout-formatter.js";
|
|
8
|
+
import * as fs from "fs";
|
|
9
|
+
let diService = null;
|
|
10
|
+
let cache = null;
|
|
11
|
+
function getDIService() {
|
|
12
|
+
if (!diService) {
|
|
13
|
+
diService = new DocumentIntelligenceService();
|
|
14
|
+
}
|
|
15
|
+
return diService;
|
|
16
|
+
}
|
|
17
|
+
function getCache() {
|
|
18
|
+
if (!cache) {
|
|
19
|
+
const cachePath = process.env.DI_CACHE_PATH;
|
|
20
|
+
cache = new AnalysisCache(cachePath);
|
|
21
|
+
}
|
|
22
|
+
return cache;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Main tool handler for analyze_document.
|
|
26
|
+
*/
|
|
27
|
+
export async function analyzeDocument(params) {
|
|
28
|
+
const { model, source } = params;
|
|
29
|
+
const cacheService = getCache();
|
|
30
|
+
const diService = getDIService();
|
|
31
|
+
// Determine source type
|
|
32
|
+
const isUrl = /^https?:\/\//i.test(source);
|
|
33
|
+
const sourceType = isUrl ? "url" : "file";
|
|
34
|
+
// Validate local file exists
|
|
35
|
+
if (!isUrl) {
|
|
36
|
+
if (!fs.existsSync(source)) {
|
|
37
|
+
throw new UserError(`File not found: ${source}`);
|
|
38
|
+
}
|
|
39
|
+
// Basic extension check
|
|
40
|
+
const ext = source.toLowerCase().split(".").pop();
|
|
41
|
+
const supported = ["pdf", "jpg", "jpeg", "png", "bmp", "tiff", "tif", "heif", "docx", "pptx", "xlsx"];
|
|
42
|
+
if (ext && !supported.includes(ext)) {
|
|
43
|
+
throw new UserError(`Unsupported file type: .${ext}. Supported: PDF, images (JPG, PNG, BMP, TIFF, HEIF), DOCX, PPTX, XLSX`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
// Compute content hash / cache key
|
|
47
|
+
let contentHash;
|
|
48
|
+
if (isUrl) {
|
|
49
|
+
contentHash = cacheService.computeUrlHash(source);
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
contentHash = await cacheService.computeFileHash(source);
|
|
53
|
+
}
|
|
54
|
+
const cacheKey = cacheService.computeCacheKey(model, contentHash);
|
|
55
|
+
// Check cache
|
|
56
|
+
const cachedResult = cacheService.get(cacheKey);
|
|
57
|
+
if (cachedResult) {
|
|
58
|
+
return cachedResult;
|
|
59
|
+
}
|
|
60
|
+
// Cache miss — call Azure
|
|
61
|
+
try {
|
|
62
|
+
const analysisResult = await diService.analyzeDocument(source, model);
|
|
63
|
+
// Format based on model
|
|
64
|
+
let markdown;
|
|
65
|
+
if (model === "read") {
|
|
66
|
+
markdown = formatReadResult(analysisResult, source);
|
|
67
|
+
}
|
|
68
|
+
else {
|
|
69
|
+
markdown = formatLayoutResult(analysisResult, source);
|
|
70
|
+
}
|
|
71
|
+
// Store in cache
|
|
72
|
+
cacheService.set(cacheKey, model, sourceType, contentHash, markdown);
|
|
73
|
+
return markdown;
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
if (error instanceof UserError)
|
|
77
|
+
throw error;
|
|
78
|
+
console.error("Azure Document Intelligence error:", error);
|
|
79
|
+
throw new UserError(`Failed to analyze document: ${error.message || "Unknown error"}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=analyze-document.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze-document.js","sourceRoot":"","sources":["../../src/tools/analyze-document.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,2BAA2B,EAAE,MAAM,sCAAsC,CAAC;AACnF,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AAEzB,IAAI,SAAS,GAAuC,IAAI,CAAC;AACzD,IAAI,KAAK,GAAyB,IAAI,CAAC;AAEvC,SAAS,YAAY;IACnB,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,SAAS,GAAG,IAAI,2BAA2B,EAAE,CAAC;IAChD,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,QAAQ;IACf,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC;QAC5C,KAAK,GAAG,IAAI,aAAa,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAA6C;IACjF,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAEjC,MAAM,YAAY,GAAG,QAAQ,EAAE,CAAC;IAChC,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;IAEjC,wBAAwB;IACxB,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAmB,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC;IAE1D,6BAA6B;IAC7B,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,MAAM,IAAI,SAAS,CAAC,mBAAmB,MAAM,EAAE,CAAC,CAAC;QACnD,CAAC;QACD,wBAAwB;QACxB,MAAM,GAAG,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;QAClD,MAAM,SAAS,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QACtG,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,SAAS,CACjB,2BAA2B,GAAG,wEAAwE,CACvG,CAAC;QACJ,CAAC;IACH,CAAC;IAED,mCAAmC;IACnC,IAAI,WAAmB,CAAC;IACxB,IAAI,KAAK,EAAE,CAAC;QACV,WAAW,GAAG,YAAY,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,WAAW,GAAG,MAAM,YAAY,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IAC3D,CAAC;IAED,MAAM,QAAQ,GAAG,YAAY,CAAC,eAAe,CAAC,KAAsB,EAAE,WAAW,CAAC,CAAC;IAEnF,cAAc;IACd,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAChD,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,0BAA0B;IAC1B,IAAI,CAAC;QACH,MAAM,cAAc,GAAG,MAAM,SAAS,CAAC,eAAe,CAAC,MAAM,EAAE,KAAsB,CAAC,CAAC;QAEvF,wBAAwB;QACxB,IAAI,QAAgB,CAAC;QACrB,IAAI,KAAK,KAAK,MAAM,EAAE,CAAC;YACrB,QAAQ,GAAG,gBAAgB,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACtD,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,kBAAkB,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACxD,CAAC;QAED,iBAAiB;QACjB,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAsB,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;QAEtF,OAAO,QAAQ,CAAC;IAClB,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,IAAI,KAAK,YAAY,SAAS;YAAE,MAAM,KAAK,CAAC;QAC5C,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAC;QAC3D,MAAM,IAAI,SAAS,CACjB,+BAA+B,KAAK,CAAC,OAAO,IAAI,eAAe,EAAE,CAClE,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const AnalyzeDocumentSchema: z.ZodObject<{
|
|
3
|
+
model: z.ZodEnum<{
|
|
4
|
+
read: "read";
|
|
5
|
+
layout: "layout";
|
|
6
|
+
}>;
|
|
7
|
+
source: z.ZodString;
|
|
8
|
+
}, z.core.$strip>;
|
|
9
|
+
export type AnalyzeDocumentInput = z.infer<typeof AnalyzeDocumentSchema>;
|
|
10
|
+
//# sourceMappingURL=schemas.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../src/tools/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,qBAAqB;;;;;;iBAqBjC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export const AnalyzeDocumentSchema = z.object({
|
|
3
|
+
model: z.enum(["read", "layout"]).describe("Analysis model: 'read' for OCR-only text extraction, " +
|
|
4
|
+
"'layout' for rich structure including tables and selection marks. " +
|
|
5
|
+
"Use 'read' for simple text content, 'layout' for documents with tables or complex structure."),
|
|
6
|
+
source: z.string().min(1).describe("Document source: a local file path (e.g., '/path/to/doc.pdf') " +
|
|
7
|
+
"or a publicly accessible URL (e.g., 'https://example.com/doc.pdf'). " +
|
|
8
|
+
"The server auto-detects whether it's a file path or URL."),
|
|
9
|
+
}).refine((data) => {
|
|
10
|
+
const isUrl = /^https?:\/\//i.test(data.source);
|
|
11
|
+
if (isUrl)
|
|
12
|
+
return true;
|
|
13
|
+
// For local paths, we will validate existence at runtime in the tool handler
|
|
14
|
+
return true;
|
|
15
|
+
}, {
|
|
16
|
+
message: "Source must be a valid URL (starting with http:// or https://) or a local file path.",
|
|
17
|
+
});
|
|
18
|
+
//# sourceMappingURL=schemas.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemas.js","sourceRoot":"","sources":["../../src/tools/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5C,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,QAAQ,CACxC,uDAAuD;QACrD,oEAAoE;QACpE,8FAA8F,CACjG;IACD,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAChC,gEAAgE;QAC9D,sEAAsE;QACtE,0DAA0D,CAC7D;CACF,CAAC,CAAC,MAAM,CACP,CAAC,IAAI,EAAE,EAAE;IACP,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAChD,IAAI,KAAK;QAAE,OAAO,IAAI,CAAC;IACvB,6EAA6E;IAC7E,OAAO,IAAI,CAAC;AACd,CAAC,EACD;IACE,OAAO,EAAE,sFAAsF;CAChG,CACF,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared TypeScript types for the Document Intelligence MCP server.
|
|
3
|
+
*/
|
|
4
|
+
export type AnalysisModel = "read" | "layout";
|
|
5
|
+
export interface AnalyzeDocumentParams {
|
|
6
|
+
model: AnalysisModel;
|
|
7
|
+
source: string;
|
|
8
|
+
}
|
|
9
|
+
export interface AnalysisResult {
|
|
10
|
+
model: AnalysisModel;
|
|
11
|
+
source: string;
|
|
12
|
+
sourceType: "file" | "url";
|
|
13
|
+
contentHash: string;
|
|
14
|
+
markdown: string;
|
|
15
|
+
cached: boolean;
|
|
16
|
+
}
|
|
17
|
+
export interface CacheEntry {
|
|
18
|
+
cacheKey: string;
|
|
19
|
+
model: AnalysisModel;
|
|
20
|
+
sourceType: "file" | "url";
|
|
21
|
+
contentHash: string;
|
|
22
|
+
result: string;
|
|
23
|
+
createdAt: string;
|
|
24
|
+
accessedAt: string;
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,MAAM,aAAa,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE9C,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,aAAa,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,aAAa,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,GAAG,KAAK,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,aAAa,CAAC;IACrB,UAAU,EAAE,MAAM,GAAG,KAAK,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG"}
|
package/package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kaenova/document-intelligence-mcp",
|
|
3
|
+
"version": "0.0.0-20260520.h0940",
|
|
4
|
+
"description": "MCP server for Azure AI Document Intelligence with Read and Layout models",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "https://github.com/kaenova/document-intelligence-mcp"
|
|
8
|
+
},
|
|
9
|
+
"type": "module",
|
|
10
|
+
"main": "dist/index.js",
|
|
11
|
+
"module": "dist/index.js",
|
|
12
|
+
"types": "dist/index.d.ts",
|
|
13
|
+
"bin": {
|
|
14
|
+
"document-intelligence-mcp": "dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist"
|
|
18
|
+
],
|
|
19
|
+
"scripts": {
|
|
20
|
+
"dev": "bun run --watch src/index.ts",
|
|
21
|
+
"start": "bun run src/index.ts",
|
|
22
|
+
"build": "bunx tsc -p tsconfig.build.json",
|
|
23
|
+
"typecheck": "bunx tsc -p tsconfig.json --noEmit",
|
|
24
|
+
"prepublishOnly": "bun run build && bun run typecheck"
|
|
25
|
+
},
|
|
26
|
+
"publishConfig": {
|
|
27
|
+
"access": "public"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
31
|
+
"@types/bun": "latest",
|
|
32
|
+
"typescript": "^5.9.3"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"@azure/ai-form-recognizer": "^5.1.0",
|
|
36
|
+
"better-sqlite3": "^12.10.0",
|
|
37
|
+
"fastmcp": "^4.0.1",
|
|
38
|
+
"zod": "^4.4.3"
|
|
39
|
+
}
|
|
40
|
+
}
|