@sidub-inc/docuoria.cli 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/index.js +1056 -0
  2. package/package.json +56 -0
  3. package/payload/.claude-plugin/plugin.json +21 -0
  4. package/payload/MANIFEST.json +322 -0
  5. package/payload/SKILL.md +88 -0
  6. package/payload/assets/lib/Docuoria.dll +0 -0
  7. package/payload/assets/schemas/template-schema.json +413 -0
  8. package/payload/commands/classify.md +11 -0
  9. package/payload/commands/diagnose.md +11 -0
  10. package/payload/commands/extract.md +11 -0
  11. package/payload/commands/inspect.md +11 -0
  12. package/payload/commands/validate-template.md +11 -0
  13. package/payload/examples/01-extract-to-csv.md +49 -0
  14. package/payload/examples/02-classify-unknown-pdf.md +102 -0
  15. package/payload/examples/03-diagnose-failed-result.md +68 -0
  16. package/payload/references/classification.md +363 -0
  17. package/payload/references/decision-tree.md +43 -0
  18. package/payload/references/failure-tree.md +169 -0
  19. package/payload/references/pattern-authoring.md +40 -0
  20. package/payload/references/patterns.md +97 -0
  21. package/payload/references/privacy.md +36 -0
  22. package/payload/references/scripts.md +361 -0
  23. package/payload/references/template-reference.md +606 -0
  24. package/payload/references/workflow.md +163 -0
  25. package/payload/scripts/_common.csx +250 -0
  26. package/payload/scripts/classify.csx +53 -0
  27. package/payload/scripts/dry-run.csx +85 -0
  28. package/payload/scripts/evaluate-match.csx +72 -0
  29. package/payload/scripts/execute.csx +89 -0
  30. package/payload/scripts/inspect.csx +43 -0
  31. package/payload/scripts/list-templates.csx +34 -0
  32. package/payload/scripts/load-template.csx +54 -0
  33. package/payload/scripts/save-template.csx +53 -0
  34. package/payload/scripts/schema-info.csx +84 -0
  35. package/payload/scripts/test-groups.csx +44 -0
  36. package/payload/scripts/test-pattern.csx +61 -0
  37. package/payload/scripts/validate-template.csx +54 -0
  38. package/payload/skill/SKILL.md +88 -0
  39. package/payload/skill/assets/lib/Docuoria.dll +0 -0
  40. package/payload/skill/assets/schemas/template-schema.json +413 -0
  41. package/payload/skill/examples/01-extract-to-csv.md +49 -0
  42. package/payload/skill/examples/02-classify-unknown-pdf.md +102 -0
  43. package/payload/skill/examples/03-diagnose-failed-result.md +68 -0
  44. package/payload/skill/references/classification.md +363 -0
  45. package/payload/skill/references/decision-tree.md +43 -0
  46. package/payload/skill/references/failure-tree.md +169 -0
  47. package/payload/skill/references/pattern-authoring.md +40 -0
  48. package/payload/skill/references/patterns.md +97 -0
  49. package/payload/skill/references/privacy.md +36 -0
  50. package/payload/skill/references/scripts.md +361 -0
  51. package/payload/skill/references/template-reference.md +606 -0
  52. package/payload/skill/references/workflow.md +163 -0
  53. package/payload/skill/scripts/_common.csx +250 -0
  54. package/payload/skill/scripts/classify.csx +53 -0
  55. package/payload/skill/scripts/dry-run.csx +85 -0
  56. package/payload/skill/scripts/evaluate-match.csx +72 -0
  57. package/payload/skill/scripts/execute.csx +89 -0
  58. package/payload/skill/scripts/inspect.csx +43 -0
  59. package/payload/skill/scripts/list-templates.csx +34 -0
  60. package/payload/skill/scripts/load-template.csx +54 -0
  61. package/payload/skill/scripts/save-template.csx +53 -0
  62. package/payload/skill/scripts/schema-info.csx +84 -0
  63. package/payload/skill/scripts/test-groups.csx +44 -0
  64. package/payload/skill/scripts/test-pattern.csx +61 -0
  65. package/payload/skill/scripts/validate-template.csx +54 -0
package/package.json ADDED
@@ -0,0 +1,56 @@
1
+ {
2
+ "name": "@sidub-inc/docuoria.cli",
3
+ "version": "1.0.15",
4
+ "description": "Install the Docuoria AI plugin into any AI tool skill directory.",
5
+ "keywords": [
6
+ "docuoria",
7
+ "ai-agent",
8
+ "skill",
9
+ "agentskills",
10
+ "pdf",
11
+ "extraction"
12
+ ],
13
+ "license": "MIT",
14
+ "author": "Sidub Inc.",
15
+ "homepage": "https://github.com/Sidub-Inc/Docuoria",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "https://github.com/Sidub-Inc/Docuoria.git",
19
+ "directory": "src/hosts/docuoria-cli-npm"
20
+ },
21
+ "bugs": {
22
+ "url": "https://github.com/Sidub-Inc/Docuoria/issues"
23
+ },
24
+ "engines": {
25
+ "node": ">=20.0.0",
26
+ "pnpm": ">=9.0.0"
27
+ },
28
+ "bin": {
29
+ "docuoria": "./bin/docuoria.js"
30
+ },
31
+ "main": "./dist/index.js",
32
+ "files": [
33
+ "bin/",
34
+ "dist/",
35
+ "payload/"
36
+ ],
37
+ "publishConfig": {
38
+ "access": "public"
39
+ },
40
+ "scripts": {
41
+ "build": "tsup",
42
+ "test": "vitest run",
43
+ "prepublishOnly": "node scripts/verify-payload.mjs"
44
+ },
45
+ "dependencies": {
46
+ "@inquirer/prompts": "^7.0.0",
47
+ "commander": "^12.0.0",
48
+ "picocolors": "^1.1.0"
49
+ },
50
+ "devDependencies": {
51
+ "@types/node": "^20.0.0",
52
+ "tsup": "^8.0.0",
53
+ "typescript": "^5.4.0",
54
+ "vitest": "^2.0.0"
55
+ }
56
+ }
@@ -0,0 +1,21 @@
1
+ {
2
+ "name": "docuoria",
3
+ "displayName": "Sidub Docuoria",
4
+ "version": "1.0.15",
5
+ "description": "Extract structured data from PDFs using template-driven pipelines. Author, classify, inspect, test, and execute extraction templates with a dotnet-script CLI surface.",
6
+ "author": {
7
+ "name": "Sidub Inc.",
8
+ "url": "https://github.com/Sidub-Inc"
9
+ },
10
+ "homepage": "https://github.com/Sidub-Inc/Docuoria",
11
+ "repository": "https://github.com/Sidub-Inc/Docuoria",
12
+ "license": "MIT",
13
+ "keywords": [
14
+ "pdf",
15
+ "extraction",
16
+ "pipeline",
17
+ "template",
18
+ "classification",
19
+ "dotnet-script"
20
+ ]
21
+ }
@@ -0,0 +1,322 @@
1
+ {
2
+ "package": "docuoria",
3
+ "version": "1.0.15",
4
+ "builtAt": "2026-06-03T02:39:22.6166355Z",
5
+ "sdk": {
6
+ "assembly": "Docuoria.dll",
7
+ "fileVersion": "1.0.15.0",
8
+ "informationalVersion": "1.0.15+Branch.main.Sha.67cad57e05c48eb1b6025c0077fa9357e32c6cdc.67cad57e05c48eb1b6025c0077fa9357e32c6cdc"
9
+ },
10
+ "files": [
11
+ {
12
+ "path": ".claude-plugin/plugin.json",
13
+ "sha256": "4ace45efa10dc2513678c7e262c1c0292c0ecfb67fb325657dd7ce0732ff967d",
14
+ "bytes": 632
15
+ },
16
+ {
17
+ "path": "assets/lib/Docuoria.dll",
18
+ "sha256": "64af912f8a90ec4ca5cb2c087f5e1e105ae2eb8213829431cafcac4789e70f6c",
19
+ "bytes": 287744
20
+ },
21
+ {
22
+ "path": "assets/schemas/template-schema.json",
23
+ "sha256": "157f4f6f0643232c892c5362a2aaaeb3927528d7b9222237de1e387afe3d710c",
24
+ "bytes": 15327
25
+ },
26
+ {
27
+ "path": "commands/classify.md",
28
+ "sha256": "5df5dabdb4d8dcc1b9666aec1510e6db98fb9ed05d8ecece11aa075cf4fd447c",
29
+ "bytes": 262
30
+ },
31
+ {
32
+ "path": "commands/diagnose.md",
33
+ "sha256": "11862ac6695e5196c131b9f13a5bdfa83053b8196fa7b3b562584e01016e07f6",
34
+ "bytes": 275
35
+ },
36
+ {
37
+ "path": "commands/extract.md",
38
+ "sha256": "f685936aa8bf5a3c8c45ce86e4ef0dcdb3996fd26f15015ed2a59212ce49e55b",
39
+ "bytes": 256
40
+ },
41
+ {
42
+ "path": "commands/inspect.md",
43
+ "sha256": "a70e3e4d7b06a9a1e6367af419b590005b0c5b55e7c3090bb14efd57570e6486",
44
+ "bytes": 265
45
+ },
46
+ {
47
+ "path": "commands/validate-template.md",
48
+ "sha256": "be1fcc943ca1545bcc2705f1d866c1f3b99c92d1d9dec447f0d64dee1d741179",
49
+ "bytes": 277
50
+ },
51
+ {
52
+ "path": "examples/01-extract-to-csv.md",
53
+ "sha256": "8742f36efad83be45a7b76410f90bd1e3a3c5a4889df30237cda19831fdf1b0d",
54
+ "bytes": 3641
55
+ },
56
+ {
57
+ "path": "examples/02-classify-unknown-pdf.md",
58
+ "sha256": "9d24dd10b75693104d19f95f0dfb98ee56deeed8ef69ace07968b27c573418e3",
59
+ "bytes": 4943
60
+ },
61
+ {
62
+ "path": "examples/03-diagnose-failed-result.md",
63
+ "sha256": "4b86cb4bf80460c1d483dd02b4454f858d04594bfe3c1676949b4cb5179de926",
64
+ "bytes": 3955
65
+ },
66
+ {
67
+ "path": "references/classification.md",
68
+ "sha256": "1ef5549515bf6e9161538d195c33abe841ec7d32a3224e989f83528fb663dbd1",
69
+ "bytes": 16252
70
+ },
71
+ {
72
+ "path": "references/decision-tree.md",
73
+ "sha256": "bd1522be6352b0c8df75b0abc8e18c3e785cef463abf0e75cbd88a58d36cf1fc",
74
+ "bytes": 5570
75
+ },
76
+ {
77
+ "path": "references/failure-tree.md",
78
+ "sha256": "38d7d33a07086391441c7cc3e8dcae52d999497196c1111013ce26eb9e58bcd0",
79
+ "bytes": 15857
80
+ },
81
+ {
82
+ "path": "references/pattern-authoring.md",
83
+ "sha256": "b678b2290cf0ba2a126bb03224c10c984d952475625dc7209b2d20ab04a9a2f2",
84
+ "bytes": 3372
85
+ },
86
+ {
87
+ "path": "references/patterns.md",
88
+ "sha256": "92c627dced78fc574f3bd729af7f21cf9ee8c7914b9ec4766c9374b634275566",
89
+ "bytes": 4156
90
+ },
91
+ {
92
+ "path": "references/privacy.md",
93
+ "sha256": "2448abb1623bed15f646eddc0875a4ba30a95d33cf66f2779f42b8dbf55cc0d0",
94
+ "bytes": 3502
95
+ },
96
+ {
97
+ "path": "references/scripts.md",
98
+ "sha256": "7b6a35582c789b6a24678139e11fc58e3670507a4d3532108d37832068a73827",
99
+ "bytes": 15127
100
+ },
101
+ {
102
+ "path": "references/template-reference.md",
103
+ "sha256": "6b640f07bfa3c553a6f274cbc639e5badc8a39652de04ad8de602631f1e15314",
104
+ "bytes": 25089
105
+ },
106
+ {
107
+ "path": "references/workflow.md",
108
+ "sha256": "084324d28143c8ae5994534f76fe6c82086da697c04fedba71c6672042af2ec4",
109
+ "bytes": 10509
110
+ },
111
+ {
112
+ "path": "scripts/_common.csx",
113
+ "sha256": "65eeea7c7f3b9383804bab2d4c8a4d204afa873f94c013d8a06181bf01fc07c5",
114
+ "bytes": 10518
115
+ },
116
+ {
117
+ "path": "scripts/classify.csx",
118
+ "sha256": "386bdae4c07f4b8a6305c7e2d77dda236fe36079ab618cc9811a1593f49b605a",
119
+ "bytes": 2229
120
+ },
121
+ {
122
+ "path": "scripts/dry-run.csx",
123
+ "sha256": "d89d7f7a5333dae60628ae43374a81ef001fcc79db99560619779bb414b79fa8",
124
+ "bytes": 3279
125
+ },
126
+ {
127
+ "path": "scripts/evaluate-match.csx",
128
+ "sha256": "3157aa515d648b913a829b069a8187e4979a6204518e164c79c60610e625ef83",
129
+ "bytes": 2727
130
+ },
131
+ {
132
+ "path": "scripts/execute.csx",
133
+ "sha256": "7e609a1f8e4883f4948c5da66f044ed1941f93b89170e758d85580264eefd8e7",
134
+ "bytes": 3309
135
+ },
136
+ {
137
+ "path": "scripts/inspect.csx",
138
+ "sha256": "ee7dd99e4e1905c8b60664c42b48c4f23aece97a6f3f73ed695cb0957c49ad1a",
139
+ "bytes": 1332
140
+ },
141
+ {
142
+ "path": "scripts/list-templates.csx",
143
+ "sha256": "6c89fd1646fc8408bbc7c8dad3f44deed8e01ba8d58baaa7b8fe32c23f2f1028",
144
+ "bytes": 1136
145
+ },
146
+ {
147
+ "path": "scripts/load-template.csx",
148
+ "sha256": "5dc873ed53176370a70b59091f45b7eeda86f085f6f635421d0e407b37d684f0",
149
+ "bytes": 1970
150
+ },
151
+ {
152
+ "path": "scripts/save-template.csx",
153
+ "sha256": "f4f9ff50b33a1f69e5f78af7c2122eb47fe3f84274e06831e56ac41be72cbed6",
154
+ "bytes": 1936
155
+ },
156
+ {
157
+ "path": "scripts/schema-info.csx",
158
+ "sha256": "2645c8d2e2e8f5aa0121f420e903288b33878ab62fd7811331ef8a2d6dfe5bf1",
159
+ "bytes": 2610
160
+ },
161
+ {
162
+ "path": "scripts/test-groups.csx",
163
+ "sha256": "71db4230f994a9946d3c62628aa2880562e0590262095fac6655500122e94ead",
164
+ "bytes": 1445
165
+ },
166
+ {
167
+ "path": "scripts/test-pattern.csx",
168
+ "sha256": "b61b15f163c54d0a5331c72dbea0b388ac8c1353d1fc628f165d4adee7900b66",
169
+ "bytes": 1982
170
+ },
171
+ {
172
+ "path": "scripts/validate-template.csx",
173
+ "sha256": "6c3db1bb1f7b0ceae5a546568b151fd16f80b3773cb1b840c01cc4f660ba4ab8",
174
+ "bytes": 1800
175
+ },
176
+ {
177
+ "path": "SKILL.md",
178
+ "sha256": "9f341be7cc9a5bacb2543700e9a51f5acfa14d52825d415b40e8ff8dc150b052",
179
+ "bytes": 5301
180
+ },
181
+ {
182
+ "path": "skill/assets/lib/Docuoria.dll",
183
+ "sha256": "64af912f8a90ec4ca5cb2c087f5e1e105ae2eb8213829431cafcac4789e70f6c",
184
+ "bytes": 287744
185
+ },
186
+ {
187
+ "path": "skill/assets/schemas/template-schema.json",
188
+ "sha256": "157f4f6f0643232c892c5362a2aaaeb3927528d7b9222237de1e387afe3d710c",
189
+ "bytes": 15327
190
+ },
191
+ {
192
+ "path": "skill/examples/01-extract-to-csv.md",
193
+ "sha256": "8742f36efad83be45a7b76410f90bd1e3a3c5a4889df30237cda19831fdf1b0d",
194
+ "bytes": 3641
195
+ },
196
+ {
197
+ "path": "skill/examples/02-classify-unknown-pdf.md",
198
+ "sha256": "9d24dd10b75693104d19f95f0dfb98ee56deeed8ef69ace07968b27c573418e3",
199
+ "bytes": 4943
200
+ },
201
+ {
202
+ "path": "skill/examples/03-diagnose-failed-result.md",
203
+ "sha256": "4b86cb4bf80460c1d483dd02b4454f858d04594bfe3c1676949b4cb5179de926",
204
+ "bytes": 3955
205
+ },
206
+ {
207
+ "path": "skill/references/classification.md",
208
+ "sha256": "1ef5549515bf6e9161538d195c33abe841ec7d32a3224e989f83528fb663dbd1",
209
+ "bytes": 16252
210
+ },
211
+ {
212
+ "path": "skill/references/decision-tree.md",
213
+ "sha256": "bd1522be6352b0c8df75b0abc8e18c3e785cef463abf0e75cbd88a58d36cf1fc",
214
+ "bytes": 5570
215
+ },
216
+ {
217
+ "path": "skill/references/failure-tree.md",
218
+ "sha256": "38d7d33a07086391441c7cc3e8dcae52d999497196c1111013ce26eb9e58bcd0",
219
+ "bytes": 15857
220
+ },
221
+ {
222
+ "path": "skill/references/pattern-authoring.md",
223
+ "sha256": "b678b2290cf0ba2a126bb03224c10c984d952475625dc7209b2d20ab04a9a2f2",
224
+ "bytes": 3372
225
+ },
226
+ {
227
+ "path": "skill/references/patterns.md",
228
+ "sha256": "92c627dced78fc574f3bd729af7f21cf9ee8c7914b9ec4766c9374b634275566",
229
+ "bytes": 4156
230
+ },
231
+ {
232
+ "path": "skill/references/privacy.md",
233
+ "sha256": "2448abb1623bed15f646eddc0875a4ba30a95d33cf66f2779f42b8dbf55cc0d0",
234
+ "bytes": 3502
235
+ },
236
+ {
237
+ "path": "skill/references/scripts.md",
238
+ "sha256": "7b6a35582c789b6a24678139e11fc58e3670507a4d3532108d37832068a73827",
239
+ "bytes": 15127
240
+ },
241
+ {
242
+ "path": "skill/references/template-reference.md",
243
+ "sha256": "6b640f07bfa3c553a6f274cbc639e5badc8a39652de04ad8de602631f1e15314",
244
+ "bytes": 25089
245
+ },
246
+ {
247
+ "path": "skill/references/workflow.md",
248
+ "sha256": "084324d28143c8ae5994534f76fe6c82086da697c04fedba71c6672042af2ec4",
249
+ "bytes": 10509
250
+ },
251
+ {
252
+ "path": "skill/scripts/_common.csx",
253
+ "sha256": "65eeea7c7f3b9383804bab2d4c8a4d204afa873f94c013d8a06181bf01fc07c5",
254
+ "bytes": 10518
255
+ },
256
+ {
257
+ "path": "skill/scripts/classify.csx",
258
+ "sha256": "386bdae4c07f4b8a6305c7e2d77dda236fe36079ab618cc9811a1593f49b605a",
259
+ "bytes": 2229
260
+ },
261
+ {
262
+ "path": "skill/scripts/dry-run.csx",
263
+ "sha256": "d89d7f7a5333dae60628ae43374a81ef001fcc79db99560619779bb414b79fa8",
264
+ "bytes": 3279
265
+ },
266
+ {
267
+ "path": "skill/scripts/evaluate-match.csx",
268
+ "sha256": "3157aa515d648b913a829b069a8187e4979a6204518e164c79c60610e625ef83",
269
+ "bytes": 2727
270
+ },
271
+ {
272
+ "path": "skill/scripts/execute.csx",
273
+ "sha256": "7e609a1f8e4883f4948c5da66f044ed1941f93b89170e758d85580264eefd8e7",
274
+ "bytes": 3309
275
+ },
276
+ {
277
+ "path": "skill/scripts/inspect.csx",
278
+ "sha256": "ee7dd99e4e1905c8b60664c42b48c4f23aece97a6f3f73ed695cb0957c49ad1a",
279
+ "bytes": 1332
280
+ },
281
+ {
282
+ "path": "skill/scripts/list-templates.csx",
283
+ "sha256": "6c89fd1646fc8408bbc7c8dad3f44deed8e01ba8d58baaa7b8fe32c23f2f1028",
284
+ "bytes": 1136
285
+ },
286
+ {
287
+ "path": "skill/scripts/load-template.csx",
288
+ "sha256": "5dc873ed53176370a70b59091f45b7eeda86f085f6f635421d0e407b37d684f0",
289
+ "bytes": 1970
290
+ },
291
+ {
292
+ "path": "skill/scripts/save-template.csx",
293
+ "sha256": "f4f9ff50b33a1f69e5f78af7c2122eb47fe3f84274e06831e56ac41be72cbed6",
294
+ "bytes": 1936
295
+ },
296
+ {
297
+ "path": "skill/scripts/schema-info.csx",
298
+ "sha256": "2645c8d2e2e8f5aa0121f420e903288b33878ab62fd7811331ef8a2d6dfe5bf1",
299
+ "bytes": 2610
300
+ },
301
+ {
302
+ "path": "skill/scripts/test-groups.csx",
303
+ "sha256": "71db4230f994a9946d3c62628aa2880562e0590262095fac6655500122e94ead",
304
+ "bytes": 1445
305
+ },
306
+ {
307
+ "path": "skill/scripts/test-pattern.csx",
308
+ "sha256": "b61b15f163c54d0a5331c72dbea0b388ac8c1353d1fc628f165d4adee7900b66",
309
+ "bytes": 1982
310
+ },
311
+ {
312
+ "path": "skill/scripts/validate-template.csx",
313
+ "sha256": "6c3db1bb1f7b0ceae5a546568b151fd16f80b3773cb1b840c01cc4f660ba4ab8",
314
+ "bytes": 1800
315
+ },
316
+ {
317
+ "path": "skill/SKILL.md",
318
+ "sha256": "9f341be7cc9a5bacb2543700e9a51f5acfa14d52825d415b40e8ff8dc150b052",
319
+ "bytes": 5301
320
+ }
321
+ ]
322
+ }
@@ -0,0 +1,88 @@
1
+ ---
2
+ name: docuoria
3
+ description: Use this skill when working with Docuoria to extract structured data from PDFs, author or validate a template, design match rules for classification, diagnose a FailedResult or RejectedResult, select an ExtractionSource type, write or debug a regex pattern, or verify that PDF processing is local and private. Apply even when the user does not say "Docuoria" — any task involving the Docuoria CLI scripts, template JSON, or the IDocuoriaEngine API qualifies.
4
+ license: MIT
5
+ compatibility: Requires .NET 10 SDK and the `dotnet-script` global tool. SDK assembly (`Docuoria.dll`) is bundled under `assets/lib/`; transitive NuGet dependencies (PdfPig, Tabula, CsvHelper, pythonnet, Microsoft.Extensions.*) are resolved by `dotnet-script` at first run.
6
+ ---
7
+
8
+ # Docuoria Skill
9
+
10
+ ## Installing this skill
11
+
12
+ This skill directory was scaffolded by the Docuoria CLI. To install or update:
13
+
14
+ ```bash
15
+ # npm (Node.js ≥ 20)
16
+ npm install -g @sidub/docuoria
17
+ docuoria init
18
+
19
+ # .NET global tool
20
+ dotnet tool install -g Docuoria.Cli
21
+ docuoria init
22
+
23
+ # Update an existing installation
24
+ docuoria update
25
+
26
+ # Check status / drift
27
+ docuoria list-tools
28
+ docuoria doctor
29
+ ```
30
+
31
+ See `docs/cli.md` in the Docuoria repository for the full command reference.
32
+
33
+ ---
34
+
35
+ ## Invocation
36
+
37
+ All scripts follow `dotnet script scripts/<name>.csx -- --<flag> <value>`, run from the skill root. The `--` separator is mandatory — without it, dotnet-script consumes the flags as its own. Positional arguments are rejected; pass `--help` to any script for its full flag list.
38
+
39
+ Scripts divide into two groups:
40
+
41
+ | Group | Scripts | Store flag |
42
+ | --- | --- | --- |
43
+ | **Store-aware** — read from or write to a template store | `classify`, `evaluate-match`, `list-templates`, `load-template`, `save-template` | `--store-path <dir>` or `--store-url <url>` |
44
+ | **Standalone** — operate on individual PDF and/or template files | `inspect`, `test-pattern`, `test-groups`, `dry-run`, `execute`, `validate-template` | — |
45
+
46
+ Store-aware scripts accept `--store-path <dir>` (local directory) or `--store-url <url>` (API endpoint) to locate templates; these flags are mutually exclusive. When omitted, `--store-path` defaults to `./templates` relative to the process working directory — since the CWD varies by environment, always pass the store location explicitly.
47
+
48
+ ## Workflow
49
+
50
+ The pipeline runs in order; classification determines the entry point. Load `references/workflow.md` for the full step-by-step guide.
51
+
52
+ 1. **Classify** — match the PDF against all stored templates
53
+ 2. **Inspect** — read the engine's text extraction (when no template matches)
54
+ 3. **Test** — prove regex patterns against the engine's haystack
55
+ 4. **Build** — author the template JSON, validate classification rules and schema
56
+ 5. **Dry-run** — end-to-end extraction without output generation
57
+ 6. **Execute** — full pipeline producing CSV or JSON output
58
+ 7. **Store** — persist the template and verify it ranks correctly
59
+
60
+ ## Routing
61
+
62
+ Consult the canonical reference before relying on memory. Each concern has a single owner.
63
+
64
+ | If the agent needs to… | Load |
65
+ | --- | --- |
66
+ | Follow the full pipeline step-by-step | `references/workflow.md` |
67
+ | Pick an `ExtractionSource` subtype for a field (`TextPattern`, `TableRows`, `TextAnchor`, `MetadataField`, `Fallback`) | `references/decision-tree.md` |
68
+ | Design a discriminating `rootMatchRule` (token selection, composite architecture, structural rules, weights, thresholds) | `references/classification.md` |
69
+ | Diagnose a `RejectedResult`, `FailedResult`, classification failure, or empty/incomplete `DryRunSucceeded` | `references/failure-tree.md` |
70
+ | Map a stderr `error.code` to a remediation branch | `references/failure-tree.md` § Stderr error.code → Branch routing |
71
+ | Copy a regex pattern from the library or adapt one to a specific PDF | `references/patterns.md` then `references/pattern-authoring.md` |
72
+ | Look up a CLI script's flags, output envelope, or error codes | `references/scripts.md` |
73
+ | Look up a template JSON property, `$kind` discriminator, enum value, or shape | `references/template-reference.md` |
74
+ | Answer whether PDF processing is local/private | `references/privacy.md` |
75
+
76
+ ## Skill layout
77
+
78
+ - `SKILL.md` — this router; loaded at skill activation.
79
+ - `references/` — deep guides loaded on demand (see Routing table).
80
+ - `scripts/` — `dotnet-script` CLI surface (`_common.csx` plus 11 verb scripts).
81
+ - `assets/lib/Docuoria.dll` — bundled SDK assembly.
82
+ - `assets/schemas/template-schema.json` — JSON Schema for template authoring and validation.
83
+ - `examples/` — three worked end-to-end walkthroughs.
84
+
85
+ ## Gotchas
86
+
87
+ - **`fieldType` in template JSON must be an integer (0–5), never a string.** The engine rejects string values with `RejectionReason.MalformedTemplate`. Enum: 0 String, 1 Number, 2 Integer, 3 Boolean, 4 Date, 5 Timestamp. Run `validate-template.csx` to catch this before dry-run.
88
+ - **Adapt every regex to the actual PDF.** The engine's flattened text differs from the visual layout — whitespace, line breaks, and character encoding may not match what you see. Validate with `test-pattern.csx` and `inspect.csx` rather than pasting library patterns verbatim.
Binary file