@kjanat/paperless-mcp 1.0.1-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +445 -0
- package/dist/index.js +330 -0
- package/package.json +54 -0
- package/skills/paperless-ngx/SKILL.md +122 -0
- package/skills/paperless-ngx/references/query-syntax.md +70 -0
- package/skills/paperless-ngx/references/tools.md +161 -0
- package/skills/paperless-ngx/references/workflows.md +139 -0
- package/skills/paperless-ngx/scripts/encode-file.sh +45 -0
- package/skills/paperless-ngx/scripts/test-connection.sh +70 -0
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kjanat/paperless-mcp",
|
|
3
|
+
"version": "1.0.1-dev.0",
|
|
4
|
+
"description": "MCP server for interacting with Paperless-ngx document management system.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"mcp",
|
|
7
|
+
"paperless-ngx",
|
|
8
|
+
"document-management",
|
|
9
|
+
"ai",
|
|
10
|
+
"claude",
|
|
11
|
+
"model-context-protocol",
|
|
12
|
+
"paperless"
|
|
13
|
+
],
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "git+https://github.com/kjanat/paperless-mcp.git"
|
|
17
|
+
},
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"author": "Kaj Kowalski",
|
|
20
|
+
"main": "src/index.ts",
|
|
21
|
+
"bin": {
|
|
22
|
+
"paperless-mcp": "dist/index.js"
|
|
23
|
+
},
|
|
24
|
+
"files": [
|
|
25
|
+
"dist",
|
|
26
|
+
"skills"
|
|
27
|
+
],
|
|
28
|
+
"scripts": {
|
|
29
|
+
"bd": "bun build src/index.ts --minify --outdir=dist --target=node",
|
|
30
|
+
"fmt": "dprint fmt .",
|
|
31
|
+
"fmt:check": "dprint check .",
|
|
32
|
+
"inspect": "bunx @modelcontextprotocol/inspector bun src/index.ts",
|
|
33
|
+
"prepack": "bun bd",
|
|
34
|
+
"start": "bun src/index.ts",
|
|
35
|
+
"typecheck": "tsgo --noEmit"
|
|
36
|
+
},
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
39
|
+
"zod": "^4.3.6"
|
|
40
|
+
},
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@types/bun": "^1.3.9",
|
|
43
|
+
"@typescript/native-preview": "^7.0.0-dev.20260220.1",
|
|
44
|
+
"typescript": "^5.9.3"
|
|
45
|
+
},
|
|
46
|
+
"optionalDependencies": {
|
|
47
|
+
"express": "^5.2.1"
|
|
48
|
+
},
|
|
49
|
+
"packageManager": "bun@1.3.9",
|
|
50
|
+
"publishConfig": {
|
|
51
|
+
"access": "public",
|
|
52
|
+
"tag": "dev"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: paperless-ngx
|
|
3
|
+
description: Manage documents in Paperless-NGX via MCP tools. Search, upload, tag, organize, and bulk-edit documents, correspondents, and document types. Use when working with Paperless-NGX, document management, OCR, or any mcp_paperless_* tool task.
|
|
4
|
+
license: ISC
|
|
5
|
+
compatibility: Requires a running Paperless-NGX instance with API token. MCP server must be connected with mcp_paperless_* tools available.
|
|
6
|
+
metadata:
|
|
7
|
+
author: kjanat
|
|
8
|
+
version: "1.0"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Paperless-NGX Document Management
|
|
12
|
+
|
|
13
|
+
Orchestrate Paperless-NGX through 16 MCP tools across 4 domains.
|
|
14
|
+
|
|
15
|
+
## Tool Catalog
|
|
16
|
+
|
|
17
|
+
### Documents (5 tools)
|
|
18
|
+
|
|
19
|
+
| Tool | Operation | Key Params |
|
|
20
|
+
| --------------------- | ---------------- | ------------------------------------- |
|
|
21
|
+
| `search_documents` | Full-text search | `query`, `page`, `page_size` |
|
|
22
|
+
| `get_document` | Full details | `id` |
|
|
23
|
+
| `post_document` | Upload file | `file` (base64), `filename`, metadata |
|
|
24
|
+
| `download_document` | Get file base64 | `id`, `original` (bool) |
|
|
25
|
+
| `bulk_edit_documents` | Batch operations | `documents` (IDs), `method`, params |
|
|
26
|
+
|
|
27
|
+
### Tags (5 tools)
|
|
28
|
+
|
|
29
|
+
| Tool | Operation |
|
|
30
|
+
| ---------------- | ---------------------------- |
|
|
31
|
+
| `list_tags` | All tags + colors + matching |
|
|
32
|
+
| `create_tag` | New tag, optional auto-match |
|
|
33
|
+
| `update_tag` | Modify name/color/matching |
|
|
34
|
+
| `delete_tag` | Remove permanently |
|
|
35
|
+
| `bulk_edit_tags` | Batch permissions/deletion |
|
|
36
|
+
|
|
37
|
+
### Correspondents (3 tools)
|
|
38
|
+
|
|
39
|
+
| Tool | Operation |
|
|
40
|
+
| -------------------------- | ------------------------ |
|
|
41
|
+
| `list_correspondents` | All correspondents |
|
|
42
|
+
| `create_correspondent` | New, optional auto-match |
|
|
43
|
+
| `bulk_edit_correspondents` | Batch permissions/delete |
|
|
44
|
+
|
|
45
|
+
### Document Types (3 tools)
|
|
46
|
+
|
|
47
|
+
| Tool | Operation |
|
|
48
|
+
| -------------------------- | ------------------------ |
|
|
49
|
+
| `list_document_types` | All document types |
|
|
50
|
+
| `create_document_type` | New, optional auto-match |
|
|
51
|
+
| `bulk_edit_document_types` | Batch permissions/delete |
|
|
52
|
+
|
|
53
|
+
## Decision Trees
|
|
54
|
+
|
|
55
|
+
### Find a Document
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
What do you know?
|
|
59
|
+
├─ Keywords/content → search_documents(query="term1 term2")
|
|
60
|
+
├─ Document ID → get_document(id=N)
|
|
61
|
+
├─ By tag → search_documents(query="tag:tagname")
|
|
62
|
+
├─ By type → search_documents(query="type:typename")
|
|
63
|
+
├─ By correspondent → search_documents(query="correspondent:name")
|
|
64
|
+
├─ By date → search_documents(query="created:[2024 to 2025]")
|
|
65
|
+
└─ Combined → search_documents(query="tag:X correspondent:Y created:[2024 to 2025]")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Organize Documents
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
What operation?
|
|
72
|
+
├─ Add tag → bulk_edit_documents(method="add_tag", tag=ID)
|
|
73
|
+
├─ Remove tag → bulk_edit_documents(method="remove_tag", tag=ID)
|
|
74
|
+
├─ Multi-tag → bulk_edit_documents(method="modify_tags", add_tags=[...], remove_tags=[...])
|
|
75
|
+
├─ Set type → bulk_edit_documents(method="set_document_type", document_type=ID)
|
|
76
|
+
├─ Set sender → bulk_edit_documents(method="set_correspondent", correspondent=ID)
|
|
77
|
+
├─ Merge PDFs → bulk_edit_documents(method="merge", metadata_document_id=ID)
|
|
78
|
+
├─ Rotate pages → bulk_edit_documents(method="rotate", degrees=90|180|270)
|
|
79
|
+
├─ Delete pages → bulk_edit_documents(method="delete_pages", pages="1,3,5-7")
|
|
80
|
+
├─ Reprocess OCR → bulk_edit_documents(method="reprocess")
|
|
81
|
+
└─ Delete → bulk_edit_documents(method="delete") !! PERMANENT !!
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Upload a Document
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
1. Resolve metadata IDs first:
|
|
88
|
+
├─ list_tags → find or create_tag
|
|
89
|
+
├─ list_correspondents → find or create_correspondent
|
|
90
|
+
└─ list_document_types → find or create_document_type
|
|
91
|
+
2. post_document(file=<base64>, filename="name.pdf", tags=[...], correspondent=ID, ...)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Manage Taxonomy (Tags/Correspondents/Types)
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
Need to change metadata objects?
|
|
98
|
+
├─ View all → list_tags / list_correspondents / list_document_types
|
|
99
|
+
├─ Create new → create_tag / create_correspondent / create_document_type
|
|
100
|
+
├─ Edit tag → update_tag(id, name, color, match, matching_algorithm)
|
|
101
|
+
├─ Delete one tag → delete_tag(id)
|
|
102
|
+
├─ Batch delete/perm → bulk_edit_tags / bulk_edit_correspondents / bulk_edit_document_types
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Critical Notes
|
|
106
|
+
|
|
107
|
+
- **search_documents strips `content`** to save tokens. Use `get_document` for
|
|
108
|
+
full OCR text.
|
|
109
|
+
- **post_document requires base64** file content, not file paths.
|
|
110
|
+
- **matching_algorithm inconsistency**: numeric `0-4` for tags, string enum
|
|
111
|
+
(`"any"`, `"all"`, `"exact"`, `"regular expression"`, `"fuzzy"`) for
|
|
112
|
+
correspondents/document types. See [tools.md](references/tools.md).
|
|
113
|
+
- **Bulk delete is permanent and irreversible.**
|
|
114
|
+
- **download_document** returns base64 blob + filename from content-disposition.
|
|
115
|
+
|
|
116
|
+
## References
|
|
117
|
+
|
|
118
|
+
| Task | File |
|
|
119
|
+
| ----------------------- | --------------------------------------------- |
|
|
120
|
+
| Tool parameters & types | [tools.md](references/tools.md) |
|
|
121
|
+
| Search query syntax | [query-syntax.md](references/query-syntax.md) |
|
|
122
|
+
| Multi-step workflows | [workflows.md](references/workflows.md) |
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Paperless-NGX Search Query Syntax
|
|
2
|
+
|
|
3
|
+
Reference for the `query` parameter of `search_documents`.
|
|
4
|
+
|
|
5
|
+
## Basic Search
|
|
6
|
+
|
|
7
|
+
Words separated by spaces match documents containing **ALL** words.
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
invoice electricity # docs with BOTH "invoice" AND "electricity"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Field Searches
|
|
14
|
+
|
|
15
|
+
| Field | Syntax | Example |
|
|
16
|
+
| ------------- | -------------------- | -------------------------- |
|
|
17
|
+
| Tag | `tag:name` | `tag:unpaid` |
|
|
18
|
+
| Document type | `type:name` | `type:invoice` |
|
|
19
|
+
| Correspondent | `correspondent:name` | `correspondent:university` |
|
|
20
|
+
| Title | `title:text` | `title:electricity` |
|
|
21
|
+
| Content | (default, no prefix) | `electricity bill` |
|
|
22
|
+
| ASN | `asn:number` | `asn:1234` |
|
|
23
|
+
|
|
24
|
+
## Logical Operators
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
term1 AND term2 # Both required (default behavior)
|
|
28
|
+
term1 OR term2 # Either matches
|
|
29
|
+
NOT term1 # Exclude term
|
|
30
|
+
term1 AND (term2 OR term3) # Grouping with parentheses
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Date Ranges
|
|
34
|
+
|
|
35
|
+
| Syntax | Matches |
|
|
36
|
+
| ------------------------------ | -------------------- |
|
|
37
|
+
| `created:[2024 to 2025]` | Created in 2024-2025 |
|
|
38
|
+
| `created:2024` | Created in 2024 |
|
|
39
|
+
| `added:yesterday` | Added yesterday |
|
|
40
|
+
| `added:today` | Added today |
|
|
41
|
+
| `modified:today` | Modified today |
|
|
42
|
+
| `created:[2024-01 to 2024-06]` | Jan-Jun 2024 |
|
|
43
|
+
|
|
44
|
+
## Wildcards
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
prod*name # Matches "production name", "product name", etc.
|
|
48
|
+
inv?ice # Single character wildcard
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Combined Queries
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
# Unpaid invoices from 2024
|
|
55
|
+
tag:unpaid type:invoice created:2024
|
|
56
|
+
|
|
57
|
+
# Bank statements NOT from Chase
|
|
58
|
+
type:statement correspondent:bank NOT correspondent:chase
|
|
59
|
+
|
|
60
|
+
# Recent electricity or gas bills
|
|
61
|
+
(electricity OR gas) type:bill added:[2024-01 to 2025-01]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Tips
|
|
65
|
+
|
|
66
|
+
- Queries search across content, title, correspondent, type, and tags.
|
|
67
|
+
- Results are paginated: use `page` and `page_size` params.
|
|
68
|
+
- Results exclude `content` field to save tokens -- use `get_document` for
|
|
69
|
+
full OCR text of specific results.
|
|
70
|
+
- Max `page_size` is 100.
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# MCP Tool Reference
|
|
2
|
+
|
|
3
|
+
Full parameter signatures for all 16 Paperless-NGX MCP tools.
|
|
4
|
+
|
|
5
|
+
## Document Tools
|
|
6
|
+
|
|
7
|
+
### search_documents
|
|
8
|
+
|
|
9
|
+
| Param | Type | Required | Notes |
|
|
10
|
+
| ----------- | ------ | -------- | --------------------------- |
|
|
11
|
+
| `query` | string | yes | Paperless-NGX search syntax |
|
|
12
|
+
| `page` | number | no | Pagination, starts at 1 |
|
|
13
|
+
| `page_size` | number | no | Default 25, max 100 |
|
|
14
|
+
|
|
15
|
+
Returns metadata **without** `content` field. Use `get_document` for full text.
|
|
16
|
+
|
|
17
|
+
### get_document
|
|
18
|
+
|
|
19
|
+
| Param | Type | Required | Notes |
|
|
20
|
+
| ----- | ------ | -------- | ------------------------- |
|
|
21
|
+
| `id` | number | yes | Returns full content+meta |
|
|
22
|
+
|
|
23
|
+
### post_document
|
|
24
|
+
|
|
25
|
+
| Param | Type | Required | Notes |
|
|
26
|
+
| ----------------------- | -------- | -------- | ------------------------------- |
|
|
27
|
+
| `file` | string | yes | Base64-encoded file content |
|
|
28
|
+
| `filename` | string | yes | With extension: `"invoice.pdf"` |
|
|
29
|
+
| `title` | string | no | Auto-extracted if omitted |
|
|
30
|
+
| `created` | string | no | ISO date: `YYYY-MM-DD` |
|
|
31
|
+
| `correspondent` | number | no | Correspondent ID |
|
|
32
|
+
| `document_type` | number | no | Document type ID |
|
|
33
|
+
| `storage_path` | number | no | Storage path ID |
|
|
34
|
+
| `tags` | number[] | no | Array of tag IDs |
|
|
35
|
+
| `archive_serial_number` | string | no | External filing reference |
|
|
36
|
+
| `custom_fields` | number[] | no | Custom field IDs |
|
|
37
|
+
|
|
38
|
+
### download_document
|
|
39
|
+
|
|
40
|
+
| Param | Type | Required | Notes |
|
|
41
|
+
| ---------- | ------- | -------- | ------------------------------------------- |
|
|
42
|
+
| `id` | number | yes | Document ID |
|
|
43
|
+
| `original` | boolean | no | `true`=original upload, `false`=OCR version |
|
|
44
|
+
|
|
45
|
+
Returns `{ blob: string, filename: string }`.
|
|
46
|
+
|
|
47
|
+
### bulk_edit_documents
|
|
48
|
+
|
|
49
|
+
| Param | Type | Required | Notes |
|
|
50
|
+
| ---------------------- | -------- | -------- | ------------------------------- |
|
|
51
|
+
| `documents` | number[] | yes | Document IDs |
|
|
52
|
+
| `method` | enum | yes | See method table below |
|
|
53
|
+
| `correspondent` | number | no | For `set_correspondent` |
|
|
54
|
+
| `document_type` | number | no | For `set_document_type` |
|
|
55
|
+
| `storage_path` | number | no | For `set_storage_path` |
|
|
56
|
+
| `tag` | number | no | For `add_tag` / `remove_tag` |
|
|
57
|
+
| `add_tags` | number[] | no | For `modify_tags` |
|
|
58
|
+
| `remove_tags` | number[] | no | For `modify_tags` |
|
|
59
|
+
| `permissions` | object | no | For `set_permissions` |
|
|
60
|
+
| `metadata_document_id` | number | no | For `merge` -- source metadata |
|
|
61
|
+
| `delete_originals` | boolean | no | For `merge`/`split` |
|
|
62
|
+
| `degrees` | number | no | For `rotate`: 90, 180, 270 |
|
|
63
|
+
| `pages` | string | no | For `delete_pages`: `"1,3,5-7"` |
|
|
64
|
+
|
|
65
|
+
**Method enum:**
|
|
66
|
+
`set_correspondent`, `set_document_type`, `set_storage_path`, `add_tag`,
|
|
67
|
+
`remove_tag`, `modify_tags`, `delete`, `reprocess`, `set_permissions`,
|
|
68
|
+
`merge`, `split`, `rotate`, `delete_pages`
|
|
69
|
+
|
|
70
|
+
## Tag Tools
|
|
71
|
+
|
|
72
|
+
### list_tags
|
|
73
|
+
|
|
74
|
+
No parameters. Returns all tags with name, color, matching rules.
|
|
75
|
+
|
|
76
|
+
### create_tag
|
|
77
|
+
|
|
78
|
+
| Param | Type | Required | Notes |
|
|
79
|
+
| -------------------- | ------ | -------- | ------------------------------------------------- |
|
|
80
|
+
| `name` | string | yes | Unique tag name |
|
|
81
|
+
| `color` | string | no | Hex: `#FF0000` |
|
|
82
|
+
| `match` | string | no | Auto-assign pattern |
|
|
83
|
+
| `matching_algorithm` | int | no | `0`=any, `1`=all, `2`=exact, `3`=regex, `4`=fuzzy |
|
|
84
|
+
|
|
85
|
+
### update_tag
|
|
86
|
+
|
|
87
|
+
| Param | Type | Required | Notes |
|
|
88
|
+
| -------------------- | ------ | -------- | ------------------------ |
|
|
89
|
+
| `id` | number | yes | Tag ID from list_tags |
|
|
90
|
+
| `name` | string | yes | New name |
|
|
91
|
+
| `color` | string | no | Hex color |
|
|
92
|
+
| `match` | string | no | Auto-assign pattern |
|
|
93
|
+
| `matching_algorithm` | int | no | `0`-`4` (same as create) |
|
|
94
|
+
|
|
95
|
+
### delete_tag
|
|
96
|
+
|
|
97
|
+
| Param | Type | Required | Notes |
|
|
98
|
+
| ----- | ------ | -------- | -------------------------------------- |
|
|
99
|
+
| `id` | number | yes | Removes from all documents. Permanent. |
|
|
100
|
+
|
|
101
|
+
### bulk_edit_tags
|
|
102
|
+
|
|
103
|
+
| Param | Type | Required | Notes |
|
|
104
|
+
| ------------- | -------- | -------- | --------------------------------------------- |
|
|
105
|
+
| `tag_ids` | number[] | yes | Tag IDs |
|
|
106
|
+
| `operation` | enum | yes | `"set_permissions"` or `"delete"` |
|
|
107
|
+
| `owner` | number | no | For `set_permissions` |
|
|
108
|
+
| `permissions` | object | no | `{view:{users,groups},change:{users,groups}}` |
|
|
109
|
+
| `merge` | boolean | no | Merge or replace permissions |
|
|
110
|
+
|
|
111
|
+
## Correspondent Tools
|
|
112
|
+
|
|
113
|
+
### list_correspondents
|
|
114
|
+
|
|
115
|
+
No parameters.
|
|
116
|
+
|
|
117
|
+
### create_correspondent
|
|
118
|
+
|
|
119
|
+
| Param | Type | Required | Notes |
|
|
120
|
+
| -------------------- | ------ | -------- | ---------------------------------------------------------- |
|
|
121
|
+
| `name` | string | yes | Person/company/org name |
|
|
122
|
+
| `match` | string | no | Auto-assign pattern |
|
|
123
|
+
| `matching_algorithm` | enum | no | `"any"`,`"all"`,`"exact"`,`"regular expression"`,`"fuzzy"` |
|
|
124
|
+
|
|
125
|
+
**Note:** String enum, not numeric like tags.
|
|
126
|
+
|
|
127
|
+
### bulk_edit_correspondents
|
|
128
|
+
|
|
129
|
+
| Param | Type | Required | Notes |
|
|
130
|
+
| ------------------- | -------- | -------- | --------------------------------- |
|
|
131
|
+
| `correspondent_ids` | number[] | yes | Correspondent IDs |
|
|
132
|
+
| `operation` | enum | yes | `"set_permissions"` or `"delete"` |
|
|
133
|
+
| `owner` | number | no | For `set_permissions` |
|
|
134
|
+
| `permissions` | object | no | Same shape as tags |
|
|
135
|
+
| `merge` | boolean | no | Merge or replace permissions |
|
|
136
|
+
|
|
137
|
+
## Document Type Tools
|
|
138
|
+
|
|
139
|
+
### list_document_types
|
|
140
|
+
|
|
141
|
+
No parameters.
|
|
142
|
+
|
|
143
|
+
### create_document_type
|
|
144
|
+
|
|
145
|
+
| Param | Type | Required | Notes |
|
|
146
|
+
| -------------------- | ------ | -------- | ---------------------------------------------------------- |
|
|
147
|
+
| `name` | string | yes | Type name (Invoice, Receipt, etc.) |
|
|
148
|
+
| `match` | string | no | Auto-assign pattern |
|
|
149
|
+
| `matching_algorithm` | enum | no | `"any"`,`"all"`,`"exact"`,`"regular expression"`,`"fuzzy"` |
|
|
150
|
+
|
|
151
|
+
**Note:** String enum, not numeric like tags.
|
|
152
|
+
|
|
153
|
+
### bulk_edit_document_types
|
|
154
|
+
|
|
155
|
+
| Param | Type | Required | Notes |
|
|
156
|
+
| ------------------- | -------- | -------- | --------------------------------- |
|
|
157
|
+
| `document_type_ids` | number[] | yes | Document type IDs |
|
|
158
|
+
| `operation` | enum | yes | `"set_permissions"` or `"delete"` |
|
|
159
|
+
| `owner` | number | no | For `set_permissions` |
|
|
160
|
+
| `permissions` | object | no | Same shape as tags |
|
|
161
|
+
| `merge` | boolean | no | Merge or replace permissions |
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Common Workflows
|
|
2
|
+
|
|
3
|
+
Multi-step operations for Paperless-NGX document management.
|
|
4
|
+
|
|
5
|
+
## 1. Classify Untagged Documents
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
1. search_documents(query="NOT tag:*")
|
|
9
|
+
→ get list of untagged document IDs + titles
|
|
10
|
+
|
|
11
|
+
2. For each document of interest:
|
|
12
|
+
get_document(id=N)
|
|
13
|
+
→ read full OCR content
|
|
14
|
+
|
|
15
|
+
3. list_tags()
|
|
16
|
+
→ find matching tag IDs (or create_tag if needed)
|
|
17
|
+
|
|
18
|
+
4. bulk_edit_documents(
|
|
19
|
+
documents=[id1, id2, ...],
|
|
20
|
+
method="add_tag",
|
|
21
|
+
tag=TAG_ID
|
|
22
|
+
)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 2. Bulk Reclassify by Correspondent
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
1. list_correspondents()
|
|
29
|
+
→ identify current + target correspondent IDs
|
|
30
|
+
|
|
31
|
+
2. search_documents(query="correspondent:old-name")
|
|
32
|
+
→ collect document IDs
|
|
33
|
+
|
|
34
|
+
3. bulk_edit_documents(
|
|
35
|
+
documents=[...],
|
|
36
|
+
method="set_correspondent",
|
|
37
|
+
correspondent=NEW_ID
|
|
38
|
+
)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 3. Upload and Categorize a Batch
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
For each file:
|
|
45
|
+
|
|
46
|
+
1. Resolve metadata:
|
|
47
|
+
list_tags() → tag IDs
|
|
48
|
+
list_correspondents() → correspondent ID (or create_correspondent)
|
|
49
|
+
list_document_types() → type ID (or create_document_type)
|
|
50
|
+
|
|
51
|
+
2. post_document(
|
|
52
|
+
file=<base64 content>,
|
|
53
|
+
filename="receipt-2024-03.pdf",
|
|
54
|
+
tags=[1, 5],
|
|
55
|
+
correspondent=3,
|
|
56
|
+
document_type=2,
|
|
57
|
+
created="2024-03-15"
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 4. Merge Related Documents
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
1. search_documents(query="invoice acme created:[2024-01 to 2024-03]")
|
|
65
|
+
→ collect document IDs
|
|
66
|
+
|
|
67
|
+
2. Pick the primary document (whose metadata to keep):
|
|
68
|
+
get_document(id=PRIMARY_ID) → verify it's correct
|
|
69
|
+
|
|
70
|
+
3. bulk_edit_documents(
|
|
71
|
+
documents=[PRIMARY_ID, SECONDARY_ID_1, SECONDARY_ID_2],
|
|
72
|
+
method="merge",
|
|
73
|
+
metadata_document_id=PRIMARY_ID,
|
|
74
|
+
delete_originals=false # keep originals until verified
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## 5. Export Documents for External Use
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
1. search_documents(query="tag:export-ready")
|
|
82
|
+
→ collect IDs
|
|
83
|
+
|
|
84
|
+
2. For each:
|
|
85
|
+
download_document(id=N, original=true)
|
|
86
|
+
→ returns { blob: "<base64>", filename: "original-name.pdf" }
|
|
87
|
+
|
|
88
|
+
3. Decode base64 and save/forward as needed
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## 6. Set Up Auto-Classification Rules
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
1. create_tag(
|
|
95
|
+
name="electricity",
|
|
96
|
+
match="electricity power grid kwh",
|
|
97
|
+
matching_algorithm=0 # any word matches
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
2. create_correspondent(
|
|
101
|
+
name="Power Company",
|
|
102
|
+
match="Power Co energy provider",
|
|
103
|
+
matching_algorithm="fuzzy" # note: string enum for correspondents
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
3. create_document_type(
|
|
107
|
+
name="Utility Bill",
|
|
108
|
+
match="utility bill statement due",
|
|
109
|
+
matching_algorithm="any"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
Future uploads auto-classified by Paperless-NGX matching engine.
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## 7. Audit and Clean Up Tags
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
1. list_tags()
|
|
119
|
+
→ review all tags, identify duplicates/unused
|
|
120
|
+
|
|
121
|
+
2. search_documents(query="tag:old-tag-name")
|
|
122
|
+
→ check usage count
|
|
123
|
+
|
|
124
|
+
3. If migrating:
|
|
125
|
+
search → collect IDs → bulk_edit add new tag → bulk_edit remove old tag
|
|
126
|
+
|
|
127
|
+
4. delete_tag(id=OLD_TAG_ID)
|
|
128
|
+
or bulk_edit_tags(tag_ids=[...], operation="delete")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Pattern: Always Resolve IDs First
|
|
132
|
+
|
|
133
|
+
All document operations use numeric IDs, not names. Always:
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
list_*() → find ID for name
|
|
137
|
+
→ if not found: create_*() → get ID from response
|
|
138
|
+
→ then use ID in subsequent operations
|
|
139
|
+
```
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Base64-encode a file for use with the post_document MCP tool.
|
|
5
|
+
# Outputs JSON with "file" (base64) and "filename" fields ready to paste.
|
|
6
|
+
# Usage: encode-file.sh <path>
|
|
7
|
+
# Example: encode-file.sh ~/Documents/invoice.pdf
|
|
8
|
+
|
|
9
|
+
if [[ $# -lt 1 ]]; then
|
|
10
|
+
echo "Usage: $0 <file_path>" >&2
|
|
11
|
+
echo "Example: $0 ~/Documents/invoice.pdf" >&2
|
|
12
|
+
exit 1
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
FILE_PATH="$1"
|
|
16
|
+
|
|
17
|
+
if [[ ! -f "${FILE_PATH}" ]]; then
|
|
18
|
+
echo "Error: File not found: ${FILE_PATH}" >&2
|
|
19
|
+
exit 1
|
|
20
|
+
fi
|
|
21
|
+
|
|
22
|
+
FILENAME=$(basename "${FILE_PATH}")
|
|
23
|
+
SIZE=$(wc -c <"${FILE_PATH}" | tr -d ' ')
|
|
24
|
+
|
|
25
|
+
# Warn on large files (>50MB)
|
|
26
|
+
if [[ "${SIZE}" -gt 52428800 ]]; then
|
|
27
|
+
echo "Warning: File is $((SIZE / 1048576))MB. Large uploads may be slow." >&2
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
# Detect base64 flags (GNU vs BSD)
|
|
31
|
+
if base64 --help 2>&1 | grep -q '\-w'; then
|
|
32
|
+
B64=$(base64 -w0 "${FILE_PATH}")
|
|
33
|
+
else
|
|
34
|
+
B64=$(base64 -i "${FILE_PATH}")
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
echo "Encoded ${FILENAME} (${SIZE} bytes)"
|
|
38
|
+
echo ""
|
|
39
|
+
echo "Use with post_document:"
|
|
40
|
+
echo " file: <base64 string, ${#B64} chars>"
|
|
41
|
+
echo " filename: \"${FILENAME}\""
|
|
42
|
+
echo ""
|
|
43
|
+
echo "--- BASE64 START ---"
|
|
44
|
+
echo "${B64}"
|
|
45
|
+
echo "--- BASE64 END ---"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Test Paperless-NGX API connectivity and authentication.
|
|
5
|
+
# Usage: test-connection.sh <base_url> <api_token>
|
|
6
|
+
# Example: test-connection.sh https://docs.example.com abc123token
|
|
7
|
+
|
|
8
|
+
if [[ $# -lt 2 ]]; then
|
|
9
|
+
echo "Usage: $0 <base_url> <api_token>" >&2
|
|
10
|
+
echo "Example: $0 https://docs.example.com abc123token" >&2
|
|
11
|
+
exit 1
|
|
12
|
+
fi
|
|
13
|
+
|
|
14
|
+
BASE_URL="${1%/}"
|
|
15
|
+
TOKEN="$2"
|
|
16
|
+
|
|
17
|
+
echo "Testing connection to ${BASE_URL}..."
|
|
18
|
+
|
|
19
|
+
# 1. Basic connectivity
|
|
20
|
+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
|
21
|
+
--max-time 10 \
|
|
22
|
+
"${BASE_URL}/api/" 2>/dev/null) || {
|
|
23
|
+
echo "FAIL: Cannot reach ${BASE_URL} (network error or timeout)" >&2
|
|
24
|
+
exit 1
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if [[ "${HTTP_CODE}" == "000" ]]; then
|
|
28
|
+
echo "FAIL: Cannot reach ${BASE_URL} (DNS or connection refused)" >&2
|
|
29
|
+
exit 1
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
echo " Reachable (HTTP ${HTTP_CODE})"
|
|
33
|
+
|
|
34
|
+
# 2. Authentication
|
|
35
|
+
AUTH_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
|
36
|
+
--max-time 10 \
|
|
37
|
+
-H "Authorization: Token ${TOKEN}" \
|
|
38
|
+
-H "Accept: application/json; version=5" \
|
|
39
|
+
"${BASE_URL}/api/documents/" 2>/dev/null)
|
|
40
|
+
|
|
41
|
+
if [[ "${AUTH_CODE}" == "401" || "${AUTH_CODE}" == "403" ]]; then
|
|
42
|
+
echo "FAIL: Authentication failed (HTTP ${AUTH_CODE}). Check API token." >&2
|
|
43
|
+
exit 1
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
if [[ "${AUTH_CODE}" != "200" ]]; then
|
|
47
|
+
echo "FAIL: Unexpected status ${AUTH_CODE} from documents endpoint." >&2
|
|
48
|
+
exit 1
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
echo " Authenticated (HTTP ${AUTH_CODE})"
|
|
52
|
+
|
|
53
|
+
# 3. Quick stats
|
|
54
|
+
RESPONSE=$(curl -s --max-time 10 \
|
|
55
|
+
-H "Authorization: Token ${TOKEN}" \
|
|
56
|
+
-H "Accept: application/json; version=5" \
|
|
57
|
+
"${BASE_URL}/api/documents/?page_size=1" 2>/dev/null)
|
|
58
|
+
|
|
59
|
+
DOC_COUNT=$(echo "${RESPONSE}" | grep -o '"count":[0-9]*' | head -1 | cut -d: -f2)
|
|
60
|
+
echo " Documents: ${DOC_COUNT:-unknown}"
|
|
61
|
+
|
|
62
|
+
TAG_RESPONSE=$(curl -s --max-time 10 \
|
|
63
|
+
-H "Authorization: Token ${TOKEN}" \
|
|
64
|
+
-H "Accept: application/json; version=5" \
|
|
65
|
+
"${BASE_URL}/api/tags/" 2>/dev/null)
|
|
66
|
+
|
|
67
|
+
TAG_COUNT=$(echo "${TAG_RESPONSE}" | grep -o '"count":[0-9]*' | head -1 | cut -d: -f2)
|
|
68
|
+
echo " Tags: ${TAG_COUNT:-unknown}"
|
|
69
|
+
|
|
70
|
+
echo "OK: Connection successful."
|