@sidub-inc/docuoria.cli 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1056 -0
- package/package.json +56 -0
- package/payload/.claude-plugin/plugin.json +21 -0
- package/payload/MANIFEST.json +322 -0
- package/payload/SKILL.md +88 -0
- package/payload/assets/lib/Docuoria.dll +0 -0
- package/payload/assets/schemas/template-schema.json +413 -0
- package/payload/commands/classify.md +11 -0
- package/payload/commands/diagnose.md +11 -0
- package/payload/commands/extract.md +11 -0
- package/payload/commands/inspect.md +11 -0
- package/payload/commands/validate-template.md +11 -0
- package/payload/examples/01-extract-to-csv.md +49 -0
- package/payload/examples/02-classify-unknown-pdf.md +102 -0
- package/payload/examples/03-diagnose-failed-result.md +68 -0
- package/payload/references/classification.md +363 -0
- package/payload/references/decision-tree.md +43 -0
- package/payload/references/failure-tree.md +169 -0
- package/payload/references/pattern-authoring.md +40 -0
- package/payload/references/patterns.md +97 -0
- package/payload/references/privacy.md +36 -0
- package/payload/references/scripts.md +361 -0
- package/payload/references/template-reference.md +606 -0
- package/payload/references/workflow.md +163 -0
- package/payload/scripts/_common.csx +250 -0
- package/payload/scripts/classify.csx +53 -0
- package/payload/scripts/dry-run.csx +85 -0
- package/payload/scripts/evaluate-match.csx +72 -0
- package/payload/scripts/execute.csx +89 -0
- package/payload/scripts/inspect.csx +43 -0
- package/payload/scripts/list-templates.csx +34 -0
- package/payload/scripts/load-template.csx +54 -0
- package/payload/scripts/save-template.csx +53 -0
- package/payload/scripts/schema-info.csx +84 -0
- package/payload/scripts/test-groups.csx +44 -0
- package/payload/scripts/test-pattern.csx +61 -0
- package/payload/scripts/validate-template.csx +54 -0
- package/payload/skill/SKILL.md +88 -0
- package/payload/skill/assets/lib/Docuoria.dll +0 -0
- package/payload/skill/assets/schemas/template-schema.json +413 -0
- package/payload/skill/examples/01-extract-to-csv.md +49 -0
- package/payload/skill/examples/02-classify-unknown-pdf.md +102 -0
- package/payload/skill/examples/03-diagnose-failed-result.md +68 -0
- package/payload/skill/references/classification.md +363 -0
- package/payload/skill/references/decision-tree.md +43 -0
- package/payload/skill/references/failure-tree.md +169 -0
- package/payload/skill/references/pattern-authoring.md +40 -0
- package/payload/skill/references/patterns.md +97 -0
- package/payload/skill/references/privacy.md +36 -0
- package/payload/skill/references/scripts.md +361 -0
- package/payload/skill/references/template-reference.md +606 -0
- package/payload/skill/references/workflow.md +163 -0
- package/payload/skill/scripts/_common.csx +250 -0
- package/payload/skill/scripts/classify.csx +53 -0
- package/payload/skill/scripts/dry-run.csx +85 -0
- package/payload/skill/scripts/evaluate-match.csx +72 -0
- package/payload/skill/scripts/execute.csx +89 -0
- package/payload/skill/scripts/inspect.csx +43 -0
- package/payload/skill/scripts/list-templates.csx +34 -0
- package/payload/skill/scripts/load-template.csx +54 -0
- package/payload/skill/scripts/save-template.csx +53 -0
- package/payload/skill/scripts/schema-info.csx +84 -0
- package/payload/skill/scripts/test-groups.csx +44 -0
- package/payload/skill/scripts/test-pattern.csx +61 -0
- package/payload/skill/scripts/validate-template.csx +54 -0
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@sidub-inc/docuoria.cli",
|
|
3
|
+
"version": "1.0.15",
|
|
4
|
+
"description": "Install the Docuoria AI plugin into any AI tool skill directory.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"docuoria",
|
|
7
|
+
"ai-agent",
|
|
8
|
+
"skill",
|
|
9
|
+
"agentskills",
|
|
10
|
+
"pdf",
|
|
11
|
+
"extraction"
|
|
12
|
+
],
|
|
13
|
+
"license": "MIT",
|
|
14
|
+
"author": "Sidub Inc.",
|
|
15
|
+
"homepage": "https://github.com/Sidub-Inc/Docuoria",
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "https://github.com/Sidub-Inc/Docuoria.git",
|
|
19
|
+
"directory": "src/hosts/docuoria-cli-npm"
|
|
20
|
+
},
|
|
21
|
+
"bugs": {
|
|
22
|
+
"url": "https://github.com/Sidub-Inc/Docuoria/issues"
|
|
23
|
+
},
|
|
24
|
+
"engines": {
|
|
25
|
+
"node": ">=20.0.0",
|
|
26
|
+
"pnpm": ">=9.0.0"
|
|
27
|
+
},
|
|
28
|
+
"bin": {
|
|
29
|
+
"docuoria": "./bin/docuoria.js"
|
|
30
|
+
},
|
|
31
|
+
"main": "./dist/index.js",
|
|
32
|
+
"files": [
|
|
33
|
+
"bin/",
|
|
34
|
+
"dist/",
|
|
35
|
+
"payload/"
|
|
36
|
+
],
|
|
37
|
+
"publishConfig": {
|
|
38
|
+
"access": "public"
|
|
39
|
+
},
|
|
40
|
+
"scripts": {
|
|
41
|
+
"build": "tsup",
|
|
42
|
+
"test": "vitest run",
|
|
43
|
+
"prepublishOnly": "node scripts/verify-payload.mjs"
|
|
44
|
+
},
|
|
45
|
+
"dependencies": {
|
|
46
|
+
"@inquirer/prompts": "^7.0.0",
|
|
47
|
+
"commander": "^12.0.0",
|
|
48
|
+
"picocolors": "^1.1.0"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"@types/node": "^20.0.0",
|
|
52
|
+
"tsup": "^8.0.0",
|
|
53
|
+
"typescript": "^5.4.0",
|
|
54
|
+
"vitest": "^2.0.0"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "docuoria",
|
|
3
|
+
"displayName": "Sidub Docuoria",
|
|
4
|
+
"version": "1.0.15",
|
|
5
|
+
"description": "Extract structured data from PDFs using template-driven pipelines. Author, classify, inspect, test, and execute extraction templates with a dotnet-script CLI surface.",
|
|
6
|
+
"author": {
|
|
7
|
+
"name": "Sidub Inc.",
|
|
8
|
+
"url": "https://github.com/Sidub-Inc"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://github.com/Sidub-Inc/Docuoria",
|
|
11
|
+
"repository": "https://github.com/Sidub-Inc/Docuoria",
|
|
12
|
+
"license": "MIT",
|
|
13
|
+
"keywords": [
|
|
14
|
+
"pdf",
|
|
15
|
+
"extraction",
|
|
16
|
+
"pipeline",
|
|
17
|
+
"template",
|
|
18
|
+
"classification",
|
|
19
|
+
"dotnet-script"
|
|
20
|
+
]
|
|
21
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
{
|
|
2
|
+
"package": "docuoria",
|
|
3
|
+
"version": "1.0.15",
|
|
4
|
+
"builtAt": "2026-06-03T02:39:22.6166355Z",
|
|
5
|
+
"sdk": {
|
|
6
|
+
"assembly": "Docuoria.dll",
|
|
7
|
+
"fileVersion": "1.0.15.0",
|
|
8
|
+
"informationalVersion": "1.0.15+Branch.main.Sha.67cad57e05c48eb1b6025c0077fa9357e32c6cdc.67cad57e05c48eb1b6025c0077fa9357e32c6cdc"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
{
|
|
12
|
+
"path": ".claude-plugin/plugin.json",
|
|
13
|
+
"sha256": "4ace45efa10dc2513678c7e262c1c0292c0ecfb67fb325657dd7ce0732ff967d",
|
|
14
|
+
"bytes": 632
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"path": "assets/lib/Docuoria.dll",
|
|
18
|
+
"sha256": "64af912f8a90ec4ca5cb2c087f5e1e105ae2eb8213829431cafcac4789e70f6c",
|
|
19
|
+
"bytes": 287744
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"path": "assets/schemas/template-schema.json",
|
|
23
|
+
"sha256": "157f4f6f0643232c892c5362a2aaaeb3927528d7b9222237de1e387afe3d710c",
|
|
24
|
+
"bytes": 15327
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"path": "commands/classify.md",
|
|
28
|
+
"sha256": "5df5dabdb4d8dcc1b9666aec1510e6db98fb9ed05d8ecece11aa075cf4fd447c",
|
|
29
|
+
"bytes": 262
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"path": "commands/diagnose.md",
|
|
33
|
+
"sha256": "11862ac6695e5196c131b9f13a5bdfa83053b8196fa7b3b562584e01016e07f6",
|
|
34
|
+
"bytes": 275
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"path": "commands/extract.md",
|
|
38
|
+
"sha256": "f685936aa8bf5a3c8c45ce86e4ef0dcdb3996fd26f15015ed2a59212ce49e55b",
|
|
39
|
+
"bytes": 256
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"path": "commands/inspect.md",
|
|
43
|
+
"sha256": "a70e3e4d7b06a9a1e6367af419b590005b0c5b55e7c3090bb14efd57570e6486",
|
|
44
|
+
"bytes": 265
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"path": "commands/validate-template.md",
|
|
48
|
+
"sha256": "be1fcc943ca1545bcc2705f1d866c1f3b99c92d1d9dec447f0d64dee1d741179",
|
|
49
|
+
"bytes": 277
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"path": "examples/01-extract-to-csv.md",
|
|
53
|
+
"sha256": "8742f36efad83be45a7b76410f90bd1e3a3c5a4889df30237cda19831fdf1b0d",
|
|
54
|
+
"bytes": 3641
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"path": "examples/02-classify-unknown-pdf.md",
|
|
58
|
+
"sha256": "9d24dd10b75693104d19f95f0dfb98ee56deeed8ef69ace07968b27c573418e3",
|
|
59
|
+
"bytes": 4943
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"path": "examples/03-diagnose-failed-result.md",
|
|
63
|
+
"sha256": "4b86cb4bf80460c1d483dd02b4454f858d04594bfe3c1676949b4cb5179de926",
|
|
64
|
+
"bytes": 3955
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"path": "references/classification.md",
|
|
68
|
+
"sha256": "1ef5549515bf6e9161538d195c33abe841ec7d32a3224e989f83528fb663dbd1",
|
|
69
|
+
"bytes": 16252
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"path": "references/decision-tree.md",
|
|
73
|
+
"sha256": "bd1522be6352b0c8df75b0abc8e18c3e785cef463abf0e75cbd88a58d36cf1fc",
|
|
74
|
+
"bytes": 5570
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"path": "references/failure-tree.md",
|
|
78
|
+
"sha256": "38d7d33a07086391441c7cc3e8dcae52d999497196c1111013ce26eb9e58bcd0",
|
|
79
|
+
"bytes": 15857
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"path": "references/pattern-authoring.md",
|
|
83
|
+
"sha256": "b678b2290cf0ba2a126bb03224c10c984d952475625dc7209b2d20ab04a9a2f2",
|
|
84
|
+
"bytes": 3372
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"path": "references/patterns.md",
|
|
88
|
+
"sha256": "92c627dced78fc574f3bd729af7f21cf9ee8c7914b9ec4766c9374b634275566",
|
|
89
|
+
"bytes": 4156
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"path": "references/privacy.md",
|
|
93
|
+
"sha256": "2448abb1623bed15f646eddc0875a4ba30a95d33cf66f2779f42b8dbf55cc0d0",
|
|
94
|
+
"bytes": 3502
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"path": "references/scripts.md",
|
|
98
|
+
"sha256": "7b6a35582c789b6a24678139e11fc58e3670507a4d3532108d37832068a73827",
|
|
99
|
+
"bytes": 15127
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"path": "references/template-reference.md",
|
|
103
|
+
"sha256": "6b640f07bfa3c553a6f274cbc639e5badc8a39652de04ad8de602631f1e15314",
|
|
104
|
+
"bytes": 25089
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"path": "references/workflow.md",
|
|
108
|
+
"sha256": "084324d28143c8ae5994534f76fe6c82086da697c04fedba71c6672042af2ec4",
|
|
109
|
+
"bytes": 10509
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"path": "scripts/_common.csx",
|
|
113
|
+
"sha256": "65eeea7c7f3b9383804bab2d4c8a4d204afa873f94c013d8a06181bf01fc07c5",
|
|
114
|
+
"bytes": 10518
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"path": "scripts/classify.csx",
|
|
118
|
+
"sha256": "386bdae4c07f4b8a6305c7e2d77dda236fe36079ab618cc9811a1593f49b605a",
|
|
119
|
+
"bytes": 2229
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"path": "scripts/dry-run.csx",
|
|
123
|
+
"sha256": "d89d7f7a5333dae60628ae43374a81ef001fcc79db99560619779bb414b79fa8",
|
|
124
|
+
"bytes": 3279
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"path": "scripts/evaluate-match.csx",
|
|
128
|
+
"sha256": "3157aa515d648b913a829b069a8187e4979a6204518e164c79c60610e625ef83",
|
|
129
|
+
"bytes": 2727
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"path": "scripts/execute.csx",
|
|
133
|
+
"sha256": "7e609a1f8e4883f4948c5da66f044ed1941f93b89170e758d85580264eefd8e7",
|
|
134
|
+
"bytes": 3309
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"path": "scripts/inspect.csx",
|
|
138
|
+
"sha256": "ee7dd99e4e1905c8b60664c42b48c4f23aece97a6f3f73ed695cb0957c49ad1a",
|
|
139
|
+
"bytes": 1332
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
"path": "scripts/list-templates.csx",
|
|
143
|
+
"sha256": "6c89fd1646fc8408bbc7c8dad3f44deed8e01ba8d58baaa7b8fe32c23f2f1028",
|
|
144
|
+
"bytes": 1136
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"path": "scripts/load-template.csx",
|
|
148
|
+
"sha256": "5dc873ed53176370a70b59091f45b7eeda86f085f6f635421d0e407b37d684f0",
|
|
149
|
+
"bytes": 1970
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"path": "scripts/save-template.csx",
|
|
153
|
+
"sha256": "f4f9ff50b33a1f69e5f78af7c2122eb47fe3f84274e06831e56ac41be72cbed6",
|
|
154
|
+
"bytes": 1936
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"path": "scripts/schema-info.csx",
|
|
158
|
+
"sha256": "2645c8d2e2e8f5aa0121f420e903288b33878ab62fd7811331ef8a2d6dfe5bf1",
|
|
159
|
+
"bytes": 2610
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"path": "scripts/test-groups.csx",
|
|
163
|
+
"sha256": "71db4230f994a9946d3c62628aa2880562e0590262095fac6655500122e94ead",
|
|
164
|
+
"bytes": 1445
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"path": "scripts/test-pattern.csx",
|
|
168
|
+
"sha256": "b61b15f163c54d0a5331c72dbea0b388ac8c1353d1fc628f165d4adee7900b66",
|
|
169
|
+
"bytes": 1982
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"path": "scripts/validate-template.csx",
|
|
173
|
+
"sha256": "6c3db1bb1f7b0ceae5a546568b151fd16f80b3773cb1b840c01cc4f660ba4ab8",
|
|
174
|
+
"bytes": 1800
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
"path": "SKILL.md",
|
|
178
|
+
"sha256": "9f341be7cc9a5bacb2543700e9a51f5acfa14d52825d415b40e8ff8dc150b052",
|
|
179
|
+
"bytes": 5301
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"path": "skill/assets/lib/Docuoria.dll",
|
|
183
|
+
"sha256": "64af912f8a90ec4ca5cb2c087f5e1e105ae2eb8213829431cafcac4789e70f6c",
|
|
184
|
+
"bytes": 287744
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"path": "skill/assets/schemas/template-schema.json",
|
|
188
|
+
"sha256": "157f4f6f0643232c892c5362a2aaaeb3927528d7b9222237de1e387afe3d710c",
|
|
189
|
+
"bytes": 15327
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"path": "skill/examples/01-extract-to-csv.md",
|
|
193
|
+
"sha256": "8742f36efad83be45a7b76410f90bd1e3a3c5a4889df30237cda19831fdf1b0d",
|
|
194
|
+
"bytes": 3641
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"path": "skill/examples/02-classify-unknown-pdf.md",
|
|
198
|
+
"sha256": "9d24dd10b75693104d19f95f0dfb98ee56deeed8ef69ace07968b27c573418e3",
|
|
199
|
+
"bytes": 4943
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
"path": "skill/examples/03-diagnose-failed-result.md",
|
|
203
|
+
"sha256": "4b86cb4bf80460c1d483dd02b4454f858d04594bfe3c1676949b4cb5179de926",
|
|
204
|
+
"bytes": 3955
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"path": "skill/references/classification.md",
|
|
208
|
+
"sha256": "1ef5549515bf6e9161538d195c33abe841ec7d32a3224e989f83528fb663dbd1",
|
|
209
|
+
"bytes": 16252
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"path": "skill/references/decision-tree.md",
|
|
213
|
+
"sha256": "bd1522be6352b0c8df75b0abc8e18c3e785cef463abf0e75cbd88a58d36cf1fc",
|
|
214
|
+
"bytes": 5570
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
"path": "skill/references/failure-tree.md",
|
|
218
|
+
"sha256": "38d7d33a07086391441c7cc3e8dcae52d999497196c1111013ce26eb9e58bcd0",
|
|
219
|
+
"bytes": 15857
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
"path": "skill/references/pattern-authoring.md",
|
|
223
|
+
"sha256": "b678b2290cf0ba2a126bb03224c10c984d952475625dc7209b2d20ab04a9a2f2",
|
|
224
|
+
"bytes": 3372
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"path": "skill/references/patterns.md",
|
|
228
|
+
"sha256": "92c627dced78fc574f3bd729af7f21cf9ee8c7914b9ec4766c9374b634275566",
|
|
229
|
+
"bytes": 4156
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"path": "skill/references/privacy.md",
|
|
233
|
+
"sha256": "2448abb1623bed15f646eddc0875a4ba30a95d33cf66f2779f42b8dbf55cc0d0",
|
|
234
|
+
"bytes": 3502
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"path": "skill/references/scripts.md",
|
|
238
|
+
"sha256": "7b6a35582c789b6a24678139e11fc58e3670507a4d3532108d37832068a73827",
|
|
239
|
+
"bytes": 15127
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
"path": "skill/references/template-reference.md",
|
|
243
|
+
"sha256": "6b640f07bfa3c553a6f274cbc639e5badc8a39652de04ad8de602631f1e15314",
|
|
244
|
+
"bytes": 25089
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
"path": "skill/references/workflow.md",
|
|
248
|
+
"sha256": "084324d28143c8ae5994534f76fe6c82086da697c04fedba71c6672042af2ec4",
|
|
249
|
+
"bytes": 10509
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
"path": "skill/scripts/_common.csx",
|
|
253
|
+
"sha256": "65eeea7c7f3b9383804bab2d4c8a4d204afa873f94c013d8a06181bf01fc07c5",
|
|
254
|
+
"bytes": 10518
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
"path": "skill/scripts/classify.csx",
|
|
258
|
+
"sha256": "386bdae4c07f4b8a6305c7e2d77dda236fe36079ab618cc9811a1593f49b605a",
|
|
259
|
+
"bytes": 2229
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
"path": "skill/scripts/dry-run.csx",
|
|
263
|
+
"sha256": "d89d7f7a5333dae60628ae43374a81ef001fcc79db99560619779bb414b79fa8",
|
|
264
|
+
"bytes": 3279
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
"path": "skill/scripts/evaluate-match.csx",
|
|
268
|
+
"sha256": "3157aa515d648b913a829b069a8187e4979a6204518e164c79c60610e625ef83",
|
|
269
|
+
"bytes": 2727
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
"path": "skill/scripts/execute.csx",
|
|
273
|
+
"sha256": "7e609a1f8e4883f4948c5da66f044ed1941f93b89170e758d85580264eefd8e7",
|
|
274
|
+
"bytes": 3309
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
"path": "skill/scripts/inspect.csx",
|
|
278
|
+
"sha256": "ee7dd99e4e1905c8b60664c42b48c4f23aece97a6f3f73ed695cb0957c49ad1a",
|
|
279
|
+
"bytes": 1332
|
|
280
|
+
},
|
|
281
|
+
{
|
|
282
|
+
"path": "skill/scripts/list-templates.csx",
|
|
283
|
+
"sha256": "6c89fd1646fc8408bbc7c8dad3f44deed8e01ba8d58baaa7b8fe32c23f2f1028",
|
|
284
|
+
"bytes": 1136
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
"path": "skill/scripts/load-template.csx",
|
|
288
|
+
"sha256": "5dc873ed53176370a70b59091f45b7eeda86f085f6f635421d0e407b37d684f0",
|
|
289
|
+
"bytes": 1970
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"path": "skill/scripts/save-template.csx",
|
|
293
|
+
"sha256": "f4f9ff50b33a1f69e5f78af7c2122eb47fe3f84274e06831e56ac41be72cbed6",
|
|
294
|
+
"bytes": 1936
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
"path": "skill/scripts/schema-info.csx",
|
|
298
|
+
"sha256": "2645c8d2e2e8f5aa0121f420e903288b33878ab62fd7811331ef8a2d6dfe5bf1",
|
|
299
|
+
"bytes": 2610
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
"path": "skill/scripts/test-groups.csx",
|
|
303
|
+
"sha256": "71db4230f994a9946d3c62628aa2880562e0590262095fac6655500122e94ead",
|
|
304
|
+
"bytes": 1445
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
"path": "skill/scripts/test-pattern.csx",
|
|
308
|
+
"sha256": "b61b15f163c54d0a5331c72dbea0b388ac8c1353d1fc628f165d4adee7900b66",
|
|
309
|
+
"bytes": 1982
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
"path": "skill/scripts/validate-template.csx",
|
|
313
|
+
"sha256": "6c3db1bb1f7b0ceae5a546568b151fd16f80b3773cb1b840c01cc4f660ba4ab8",
|
|
314
|
+
"bytes": 1800
|
|
315
|
+
},
|
|
316
|
+
{
|
|
317
|
+
"path": "skill/SKILL.md",
|
|
318
|
+
"sha256": "9f341be7cc9a5bacb2543700e9a51f5acfa14d52825d415b40e8ff8dc150b052",
|
|
319
|
+
"bytes": 5301
|
|
320
|
+
}
|
|
321
|
+
]
|
|
322
|
+
}
|
package/payload/SKILL.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: docuoria
|
|
3
|
+
description: Use this skill when working with Docuoria to extract structured data from PDFs, author or validate a template, design match rules for classification, diagnose a FailedResult or RejectedResult, select an ExtractionSource type, write or debug a regex pattern, or verify that PDF processing is local and private. Apply even when the user does not say "Docuoria" — any task involving the Docuoria CLI scripts, template JSON, or the IDocuoriaEngine API qualifies.
|
|
4
|
+
license: MIT
|
|
5
|
+
compatibility: Requires .NET 10 SDK and the `dotnet-script` global tool. SDK assembly (`Docuoria.dll`) is bundled under `assets/lib/`; transitive NuGet dependencies (PdfPig, Tabula, CsvHelper, pythonnet, Microsoft.Extensions.*) are resolved by `dotnet-script` at first run.
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Docuoria Skill
|
|
9
|
+
|
|
10
|
+
## Installing this skill
|
|
11
|
+
|
|
12
|
+
This skill directory was scaffolded by the Docuoria CLI. To install or update:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# npm (Node.js ≥ 20)
|
|
16
|
+
npm install -g @sidub/docuoria
|
|
17
|
+
docuoria init
|
|
18
|
+
|
|
19
|
+
# .NET global tool
|
|
20
|
+
dotnet tool install -g Docuoria.Cli
|
|
21
|
+
docuoria init
|
|
22
|
+
|
|
23
|
+
# Update an existing installation
|
|
24
|
+
docuoria update
|
|
25
|
+
|
|
26
|
+
# Check status / drift
|
|
27
|
+
docuoria list-tools
|
|
28
|
+
docuoria doctor
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
See `docs/cli.md` in the Docuoria repository for the full command reference.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Invocation
|
|
36
|
+
|
|
37
|
+
All scripts follow `dotnet script scripts/<name>.csx -- --<flag> <value>`, run from the skill root. The `--` separator is mandatory — without it, dotnet-script consumes the flags as its own. Positional arguments are rejected; pass `--help` to any script for its full flag list.
|
|
38
|
+
|
|
39
|
+
Scripts divide into two groups:
|
|
40
|
+
|
|
41
|
+
| Group | Scripts | Store flag |
|
|
42
|
+
| --- | --- | --- |
|
|
43
|
+
| **Store-aware** — read from or write to a template store | `classify`, `evaluate-match`, `list-templates`, `load-template`, `save-template` | `--store-path <dir>` or `--store-url <url>` |
|
|
44
|
+
| **Standalone** — operate on individual PDF and/or template files | `inspect`, `test-pattern`, `test-groups`, `dry-run`, `execute`, `validate-template` | — |
|
|
45
|
+
|
|
46
|
+
Store-aware scripts accept `--store-path <dir>` (local directory) or `--store-url <url>` (API endpoint) to locate templates; these flags are mutually exclusive. When omitted, `--store-path` defaults to `./templates` relative to the process working directory — since the CWD varies by environment, always pass the store location explicitly.
|
|
47
|
+
|
|
48
|
+
## Workflow
|
|
49
|
+
|
|
50
|
+
The pipeline runs in order; classification determines the entry point. Load `references/workflow.md` for the full step-by-step guide.
|
|
51
|
+
|
|
52
|
+
1. **Classify** — match the PDF against all stored templates
|
|
53
|
+
2. **Inspect** — read the engine's text extraction (when no template matches)
|
|
54
|
+
3. **Test** — prove regex patterns against the engine's haystack
|
|
55
|
+
4. **Build** — author the template JSON, validate classification rules and schema
|
|
56
|
+
5. **Dry-run** — end-to-end extraction without output generation
|
|
57
|
+
6. **Execute** — full pipeline producing CSV or JSON output
|
|
58
|
+
7. **Store** — persist the template and verify it ranks correctly
|
|
59
|
+
|
|
60
|
+
## Routing
|
|
61
|
+
|
|
62
|
+
Consult the canonical reference before relying on memory. Each concern has a single owner.
|
|
63
|
+
|
|
64
|
+
| If the agent needs to… | Load |
|
|
65
|
+
| --- | --- |
|
|
66
|
+
| Follow the full pipeline step-by-step | `references/workflow.md` |
|
|
67
|
+
| Pick an `ExtractionSource` subtype for a field (`TextPattern`, `TableRows`, `TextAnchor`, `MetadataField`, `Fallback`) | `references/decision-tree.md` |
|
|
68
|
+
| Design a discriminating `rootMatchRule` (token selection, composite architecture, structural rules, weights, thresholds) | `references/classification.md` |
|
|
69
|
+
| Diagnose a `RejectedResult`, `FailedResult`, classification failure, or empty/incomplete `DryRunSucceeded` | `references/failure-tree.md` |
|
|
70
|
+
| Map a stderr `error.code` to a remediation branch | `references/failure-tree.md` § Stderr error.code → Branch routing |
|
|
71
|
+
| Copy a regex pattern from the library or adapt one to a specific PDF | `references/patterns.md` then `references/pattern-authoring.md` |
|
|
72
|
+
| Look up a CLI script's flags, output envelope, or error codes | `references/scripts.md` |
|
|
73
|
+
| Look up a template JSON property, `$kind` discriminator, enum value, or shape | `references/template-reference.md` |
|
|
74
|
+
| Answer whether PDF processing is local/private | `references/privacy.md` |
|
|
75
|
+
|
|
76
|
+
## Skill layout
|
|
77
|
+
|
|
78
|
+
- `SKILL.md` — this router; loaded at skill activation.
|
|
79
|
+
- `references/` — deep guides loaded on demand (see Routing table).
|
|
80
|
+
- `scripts/` — `dotnet-script` CLI surface (`_common.csx` plus 11 verb scripts).
|
|
81
|
+
- `assets/lib/Docuoria.dll` — bundled SDK assembly.
|
|
82
|
+
- `assets/schemas/template-schema.json` — JSON Schema for template authoring and validation.
|
|
83
|
+
- `examples/` — three worked end-to-end walkthroughs.
|
|
84
|
+
|
|
85
|
+
## Gotchas
|
|
86
|
+
|
|
87
|
+
- **`fieldType` in template JSON must be an integer (0–5), never a string.** The engine rejects string values with `RejectionReason.MalformedTemplate`. Enum: 0 String, 1 Number, 2 Integer, 3 Boolean, 4 Date, 5 Timestamp. Run `validate-template.csx` to catch this before dry-run.
|
|
88
|
+
- **Adapt every regex to the actual PDF.** The engine's flattened text differs from the visual layout — whitespace, line breaks, and character encoding may not match what you see. Validate with `test-pattern.csx` and `inspect.csx` rather than pasting library patterns verbatim.
|
|
Binary file
|