@sidub-inc/docuoria.cli 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1056 -0
- package/package.json +56 -0
- package/payload/.claude-plugin/plugin.json +21 -0
- package/payload/MANIFEST.json +322 -0
- package/payload/SKILL.md +88 -0
- package/payload/assets/lib/Docuoria.dll +0 -0
- package/payload/assets/schemas/template-schema.json +413 -0
- package/payload/commands/classify.md +11 -0
- package/payload/commands/diagnose.md +11 -0
- package/payload/commands/extract.md +11 -0
- package/payload/commands/inspect.md +11 -0
- package/payload/commands/validate-template.md +11 -0
- package/payload/examples/01-extract-to-csv.md +49 -0
- package/payload/examples/02-classify-unknown-pdf.md +102 -0
- package/payload/examples/03-diagnose-failed-result.md +68 -0
- package/payload/references/classification.md +363 -0
- package/payload/references/decision-tree.md +43 -0
- package/payload/references/failure-tree.md +169 -0
- package/payload/references/pattern-authoring.md +40 -0
- package/payload/references/patterns.md +97 -0
- package/payload/references/privacy.md +36 -0
- package/payload/references/scripts.md +361 -0
- package/payload/references/template-reference.md +606 -0
- package/payload/references/workflow.md +163 -0
- package/payload/scripts/_common.csx +250 -0
- package/payload/scripts/classify.csx +53 -0
- package/payload/scripts/dry-run.csx +85 -0
- package/payload/scripts/evaluate-match.csx +72 -0
- package/payload/scripts/execute.csx +89 -0
- package/payload/scripts/inspect.csx +43 -0
- package/payload/scripts/list-templates.csx +34 -0
- package/payload/scripts/load-template.csx +54 -0
- package/payload/scripts/save-template.csx +53 -0
- package/payload/scripts/schema-info.csx +84 -0
- package/payload/scripts/test-groups.csx +44 -0
- package/payload/scripts/test-pattern.csx +61 -0
- package/payload/scripts/validate-template.csx +54 -0
- package/payload/skill/SKILL.md +88 -0
- package/payload/skill/assets/lib/Docuoria.dll +0 -0
- package/payload/skill/assets/schemas/template-schema.json +413 -0
- package/payload/skill/examples/01-extract-to-csv.md +49 -0
- package/payload/skill/examples/02-classify-unknown-pdf.md +102 -0
- package/payload/skill/examples/03-diagnose-failed-result.md +68 -0
- package/payload/skill/references/classification.md +363 -0
- package/payload/skill/references/decision-tree.md +43 -0
- package/payload/skill/references/failure-tree.md +169 -0
- package/payload/skill/references/pattern-authoring.md +40 -0
- package/payload/skill/references/patterns.md +97 -0
- package/payload/skill/references/privacy.md +36 -0
- package/payload/skill/references/scripts.md +361 -0
- package/payload/skill/references/template-reference.md +606 -0
- package/payload/skill/references/workflow.md +163 -0
- package/payload/skill/scripts/_common.csx +250 -0
- package/payload/skill/scripts/classify.csx +53 -0
- package/payload/skill/scripts/dry-run.csx +85 -0
- package/payload/skill/scripts/evaluate-match.csx +72 -0
- package/payload/skill/scripts/execute.csx +89 -0
- package/payload/skill/scripts/inspect.csx +43 -0
- package/payload/skill/scripts/list-templates.csx +34 -0
- package/payload/skill/scripts/load-template.csx +54 -0
- package/payload/skill/scripts/save-template.csx +53 -0
- package/payload/skill/scripts/schema-info.csx +84 -0
- package/payload/skill/scripts/test-groups.csx +44 -0
- package/payload/skill/scripts/test-pattern.csx +61 -0
- package/payload/skill/scripts/validate-template.csx +54 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-06 — Wrapper over IDocuoriaEngine.ExecuteTemplateAsync<TGenerator, TOptions>.
|
|
6
|
+
// Args: --pdf <path> --template <file.json> --format csv|json [--output <path>]
|
|
7
|
+
// Success: SucceededResult → write payload bytes (to --output or wrap as string in stdout JSON).
|
|
8
|
+
// Rejected/Failed: emit { status, result } and exit 1.
|
|
9
|
+
|
|
10
|
+
using System.Text;
|
|
11
|
+
using Docuoria.Configuration;
|
|
12
|
+
using Docuoria.Contracts;
|
|
13
|
+
using Docuoria.Models;
|
|
14
|
+
using Docuoria.Output.Csv;
|
|
15
|
+
using Docuoria.Output.Json;
|
|
16
|
+
using Docuoria.Results;
|
|
17
|
+
|
|
18
|
+
try
|
|
19
|
+
{
|
|
20
|
+
Cli.Help(Args, "execute.csx", "Full pipeline run with output generation (CSV or JSON)",
|
|
21
|
+
("pdf", true, "Path to the source PDF", false),
|
|
22
|
+
("template", true, "Path to the template JSON file", false),
|
|
23
|
+
("format", true, "Output format: csv or json", false),
|
|
24
|
+
("output", false, "Write output to this file path (default: stdout)", false));
|
|
25
|
+
|
|
26
|
+
var pdfPath = Cli.Require(Args, "pdf");
|
|
27
|
+
var templatePath = Cli.Require(Args, "template");
|
|
28
|
+
var format = Cli.Require(Args, "format").Trim().ToLowerInvariant();
|
|
29
|
+
var outputPath = Cli.Get(Args, "output");
|
|
30
|
+
|
|
31
|
+
if (format != "csv" && format != "json")
|
|
32
|
+
{
|
|
33
|
+
JsonOut.Error("bad-format", "expected csv|json", null, 2);
|
|
34
|
+
}
|
|
35
|
+
if (!File.Exists(templatePath))
|
|
36
|
+
{
|
|
37
|
+
JsonOut.Error("template-not-found", $"Template not found at '{templatePath}'", null, 1);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
var template = Template.FromJson(File.ReadAllText(templatePath));
|
|
41
|
+
|
|
42
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: false);
|
|
43
|
+
var engine = ScriptHost.GetEngine(host);
|
|
44
|
+
|
|
45
|
+
using var pdf = LoadPdf(pdfPath);
|
|
46
|
+
|
|
47
|
+
ProcessingResult result = format switch
|
|
48
|
+
{
|
|
49
|
+
"csv" => await engine.ExecuteTemplateAsync<CsvOutputGenerator, CsvGeneratorOptions>(
|
|
50
|
+
pdf, template, new CsvGeneratorOptions()),
|
|
51
|
+
"json" => await engine.ExecuteTemplateAsync<JsonOutputGenerator, JsonGeneratorOptions>(
|
|
52
|
+
pdf, template, new JsonGeneratorOptions()),
|
|
53
|
+
_ => throw new InvalidOperationException("unreachable")
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
switch (result)
|
|
57
|
+
{
|
|
58
|
+
case SucceededResult ok:
|
|
59
|
+
var payload = ok.Output.Payload;
|
|
60
|
+
if (!string.IsNullOrEmpty(outputPath))
|
|
61
|
+
{
|
|
62
|
+
Directory.CreateDirectory(Path.GetDirectoryName(Path.GetFullPath(outputPath!))!);
|
|
63
|
+
await File.WriteAllBytesAsync(outputPath!, payload.ToArray());
|
|
64
|
+
JsonOut.Write(new { status = "ok", path = outputPath });
|
|
65
|
+
}
|
|
66
|
+
else
|
|
67
|
+
{
|
|
68
|
+
var text = Encoding.UTF8.GetString(payload.Span);
|
|
69
|
+
JsonOut.Write(new { status = "ok", format, output = text });
|
|
70
|
+
}
|
|
71
|
+
break;
|
|
72
|
+
|
|
73
|
+
case RejectedResult rej:
|
|
74
|
+
JsonOut.Error("rejected", $"Rejected ({rej.Reason}){(rej.Detail is not null ? $": {rej.Detail}" : "")}", null, 1);
|
|
75
|
+
break;
|
|
76
|
+
|
|
77
|
+
case FailedResult fail:
|
|
78
|
+
JsonOut.Error("failed", fail.ErrorMessage, fail.InnerDetail, 1);
|
|
79
|
+
break;
|
|
80
|
+
|
|
81
|
+
default:
|
|
82
|
+
JsonOut.Error("unknown-result", result.GetType().Name, null, 1);
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
catch (Exception ex)
|
|
87
|
+
{
|
|
88
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
89
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-01 — Agent-facing wrapper over IDocuoriaEngine.InspectAsync (read-only PDF probe).
|
|
6
|
+
// Args: --pdf <path> [--page <n>]
|
|
7
|
+
// stdout: PdfInspection JSON
|
|
8
|
+
// Exit codes: 0 success, 1 handled error, 2 bad args.
|
|
9
|
+
|
|
10
|
+
using Docuoria.Configuration;
|
|
11
|
+
using Docuoria.Contracts;
|
|
12
|
+
using Docuoria.Models;
|
|
13
|
+
|
|
14
|
+
try
|
|
15
|
+
{
|
|
16
|
+
Cli.Help(Args, "inspect.csx", "Inspect PDF structure (page count, text blocks, tables)",
|
|
17
|
+
("pdf", true, "Path to the source PDF", false),
|
|
18
|
+
("page", false, "1-based page index (default: all pages)", false));
|
|
19
|
+
|
|
20
|
+
var pdfPath = Cli.Require(Args, "pdf");
|
|
21
|
+
var pageStr = Cli.Get(Args, "page");
|
|
22
|
+
|
|
23
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: false);
|
|
24
|
+
var engine = ScriptHost.GetEngine(host);
|
|
25
|
+
|
|
26
|
+
PageFilter? filter = null;
|
|
27
|
+
if (!string.IsNullOrWhiteSpace(pageStr))
|
|
28
|
+
{
|
|
29
|
+
if (!int.TryParse(pageStr, out var page) || page < 1)
|
|
30
|
+
{
|
|
31
|
+
JsonOut.Error("bad-arg", "--page must be a positive integer", null, 2);
|
|
32
|
+
}
|
|
33
|
+
filter = PageFilter.SinglePage(page);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
using var pdf = LoadPdf(pdfPath);
|
|
37
|
+
var result = await engine.InspectAsync(pdf, filter);
|
|
38
|
+
JsonOut.Write(result);
|
|
39
|
+
}
|
|
40
|
+
catch (Exception ex)
|
|
41
|
+
{
|
|
42
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
43
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-08 — enumerate ITemplateStoreProvider.ListAsync.
|
|
6
|
+
// stdout: { templates: [..ids..] }
|
|
7
|
+
|
|
8
|
+
using Docuoria.Storage;
|
|
9
|
+
|
|
10
|
+
try
|
|
11
|
+
{
|
|
12
|
+
Cli.Help(Args, "list-templates.csx", "List all template IDs in the configured store",
|
|
13
|
+
("store-path", false, "Local template store directory (default: ./templates)", false),
|
|
14
|
+
("store-url", false, "API template store URL (mutually exclusive with --store-path)", false),
|
|
15
|
+
("store-key", false, "Function key for API store authentication", false));
|
|
16
|
+
|
|
17
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: true);
|
|
18
|
+
var store = ScriptHost.GetStore(host);
|
|
19
|
+
if (store is null)
|
|
20
|
+
{
|
|
21
|
+
JsonOut.Error("no-store", "ITemplateStoreProvider is not registered. Pass --store-path <dir> to specify a local template store directory.", null, 1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
var ids = new List<string>();
|
|
25
|
+
await foreach (var id in store!.ListAsync())
|
|
26
|
+
{
|
|
27
|
+
ids.Add(id);
|
|
28
|
+
}
|
|
29
|
+
JsonOut.Write(new { templates = ids });
|
|
30
|
+
}
|
|
31
|
+
catch (Exception ex)
|
|
32
|
+
{
|
|
33
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
34
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-09 — fetch a template by ID and emit its JSON.
|
|
6
|
+
// Args: --id <identifier> [--output <path>]
|
|
7
|
+
// stdout: parsed template JSON or { status, path }.
|
|
8
|
+
|
|
9
|
+
using System.Text.Json;
|
|
10
|
+
using Docuoria.Serialization;
|
|
11
|
+
using Docuoria.Storage;
|
|
12
|
+
|
|
13
|
+
try
|
|
14
|
+
{
|
|
15
|
+
Cli.Help(Args, "load-template.csx", "Fetch a template by ID and emit its JSON",
|
|
16
|
+
("id", true, "Template identifier", false),
|
|
17
|
+
("output", false, "Write template JSON to this file path", false),
|
|
18
|
+
("store-path", false, "Local template store directory (default: ./templates)", false),
|
|
19
|
+
("store-url", false, "API template store URL (mutually exclusive with --store-path)", false),
|
|
20
|
+
("store-key", false, "Function key for API store authentication", false));
|
|
21
|
+
|
|
22
|
+
var id = Cli.Require(Args, "id");
|
|
23
|
+
var outputPath = Cli.Get(Args, "output");
|
|
24
|
+
|
|
25
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: true);
|
|
26
|
+
var store = ScriptHost.GetStore(host);
|
|
27
|
+
if (store is null)
|
|
28
|
+
{
|
|
29
|
+
JsonOut.Error("no-store", "ITemplateStoreProvider is not registered. Pass --store-path <dir> to specify a local template store directory.", null, 1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
var tpl = await store!.LoadAsync(id);
|
|
33
|
+
if (tpl is null)
|
|
34
|
+
{
|
|
35
|
+
JsonOut.Error("not-found", $"Template '{id}' not found. Run list-templates.csx to see available template IDs.", null, 1);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
var json = tpl!.ToJson();
|
|
39
|
+
if (!string.IsNullOrEmpty(outputPath))
|
|
40
|
+
{
|
|
41
|
+
Directory.CreateDirectory(Path.GetDirectoryName(Path.GetFullPath(outputPath!))!);
|
|
42
|
+
await File.WriteAllTextAsync(outputPath!, json);
|
|
43
|
+
JsonOut.Write(new { status = "ok", path = outputPath });
|
|
44
|
+
}
|
|
45
|
+
else
|
|
46
|
+
{
|
|
47
|
+
var element = JsonSerializer.Deserialize<JsonElement>(json, DocuoriaJsonOptions.Default);
|
|
48
|
+
JsonOut.Write(element);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
catch (Exception ex)
|
|
52
|
+
{
|
|
53
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
54
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-10 — persist a template JSON file to the configured store.
|
|
6
|
+
// Args: --template <path> [--overwrite] [--store-path <dir>] [--store-url <url>] [--store-key <key>]
|
|
7
|
+
// stdout: { status, identifier }.
|
|
8
|
+
|
|
9
|
+
using Docuoria.Models;
|
|
10
|
+
using Docuoria.Storage;
|
|
11
|
+
using Docuoria.Storage.Exceptions;
|
|
12
|
+
|
|
13
|
+
try
|
|
14
|
+
{
|
|
15
|
+
Cli.Help(Args, "save-template.csx", "Persist a template JSON file to the configured store",
|
|
16
|
+
("template", true, "Path to the template JSON file", false),
|
|
17
|
+
("overwrite", false, "Overwrite if template already exists", true),
|
|
18
|
+
("store-path", false, "Local template store directory (default: ./templates)", false),
|
|
19
|
+
("store-url", false, "API template store URL (mutually exclusive with --store-path)", false),
|
|
20
|
+
("store-key", false, "Function key for API store authentication", false));
|
|
21
|
+
|
|
22
|
+
var filePath = Cli.Require(Args, "template");
|
|
23
|
+
var overwrite = Cli.Has(Args, "overwrite");
|
|
24
|
+
|
|
25
|
+
if (!File.Exists(filePath))
|
|
26
|
+
{
|
|
27
|
+
JsonOut.Error("template-not-found", $"Template file not found at '{filePath}'", null, 1);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: true);
|
|
31
|
+
var store = ScriptHost.GetStore(host);
|
|
32
|
+
if (store is null)
|
|
33
|
+
{
|
|
34
|
+
JsonOut.Error("no-store", "ITemplateStoreProvider is not registered. Pass --store-path <dir> to specify a local template store directory.", null, 1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
var tpl = Template.FromJson(File.ReadAllText(filePath));
|
|
38
|
+
|
|
39
|
+
try
|
|
40
|
+
{
|
|
41
|
+
await store!.SaveAsync(tpl, overwrite);
|
|
42
|
+
}
|
|
43
|
+
catch (TemplateAlreadyExistsException tae)
|
|
44
|
+
{
|
|
45
|
+
JsonOut.Error("already-exists", $"{tae.Message} Use --overwrite to replace the existing template.", null, 1);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
JsonOut.Write(new { status = "ok", identifier = tpl.Identifier });
|
|
49
|
+
}
|
|
50
|
+
catch (Exception ex)
|
|
51
|
+
{
|
|
52
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
53
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-12 — Dump all type/enum/mode information for the SDK.
|
|
6
|
+
// Args: (none)
|
|
7
|
+
// stdout: JSON object with fieldTypes, extractionSources, modes, matchRules, subFieldMappings.
|
|
8
|
+
|
|
9
|
+
using System.Reflection;
|
|
10
|
+
using Docuoria.Models;
|
|
11
|
+
|
|
12
|
+
try
|
|
13
|
+
{
|
|
14
|
+
Cli.Help(Args, "schema-info.csx", "List all SDK types, enums, modes, and valid values");
|
|
15
|
+
|
|
16
|
+
var fieldTypes = new Dictionary<int, string>();
|
|
17
|
+
foreach (var val in Enum.GetValues<FieldType>())
|
|
18
|
+
{
|
|
19
|
+
fieldTypes[(int)val] = val.ToString();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
var extractionSources = new[]
|
|
23
|
+
{
|
|
24
|
+
"TextPatternExtractionSource",
|
|
25
|
+
"TextAnchorExtractionSource",
|
|
26
|
+
"TableCellExtractionSource",
|
|
27
|
+
"TableRowsExtractionSource",
|
|
28
|
+
"MetadataFieldExtractionSource",
|
|
29
|
+
"FallbackExtractionSource"
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
var modes = new Dictionary<string, object>
|
|
33
|
+
{
|
|
34
|
+
["TextPatternExtractionSource"] = new { mode = new[] { "Token", "Pattern", "AllMatches" }, notes = new { Token = "requires 'literalToken'", Pattern = "requires 'regexPattern'", AllMatches = "requires 'regexPattern'" } },
|
|
35
|
+
["TableRowsExtractionSource"] = new { mode = new[] { "ByHeader", "Ordinal" } },
|
|
36
|
+
["TextPatternMatchRule"] = new { mode = new[] { "AnyToken", "AllTokens" } },
|
|
37
|
+
["FileNameMatchRule"] = new { mode = new[] { "Glob", "Regex" } },
|
|
38
|
+
["CompositeMatchRule"] = new { @operator = new[] { "And", "Or", "Not" } }
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
var matchRules = new[]
|
|
42
|
+
{
|
|
43
|
+
"TextPatternMatchRule",
|
|
44
|
+
"FileNameMatchRule",
|
|
45
|
+
"TextAnchorMatchRule",
|
|
46
|
+
"MetadataMatchRule",
|
|
47
|
+
"PageGeometryMatchRule",
|
|
48
|
+
"TableMatchRule",
|
|
49
|
+
"CompositeMatchRule"
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
var subFieldMappings = new[]
|
|
53
|
+
{
|
|
54
|
+
"NamedGroupSubFieldMapping",
|
|
55
|
+
"RegexGroupSubFieldMapping",
|
|
56
|
+
"HeaderSubFieldMapping",
|
|
57
|
+
"OrdinalSubFieldMapping"
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
var metadataFields = new[]
|
|
61
|
+
{
|
|
62
|
+
"Title", "Author", "Subject", "Keywords",
|
|
63
|
+
"Creator", "Producer", "CreationDate", "ModifiedDate"
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
JsonOut.Write(new
|
|
67
|
+
{
|
|
68
|
+
fieldTypes,
|
|
69
|
+
extractionSources,
|
|
70
|
+
modes,
|
|
71
|
+
matchRules,
|
|
72
|
+
subFieldMappings,
|
|
73
|
+
metadataFields,
|
|
74
|
+
notes = new
|
|
75
|
+
{
|
|
76
|
+
fieldType = "fieldType serializes as INTEGER (0-5), not string. Use the integer value in template JSON.",
|
|
77
|
+
kind = "All polymorphic types use '$kind' discriminator with the CLR class name."
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
catch (Exception ex)
|
|
82
|
+
{
|
|
83
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
84
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-03 — Wrapper over IDocuoriaEngine.TestGroupsAsync.
|
|
6
|
+
// Args: --pattern <regex> --pdf <path> [--page <n>]
|
|
7
|
+
// stdout: PatternGroupTestResult JSON.
|
|
8
|
+
|
|
9
|
+
using Docuoria.Configuration;
|
|
10
|
+
using Docuoria.Contracts;
|
|
11
|
+
using Docuoria.Models;
|
|
12
|
+
|
|
13
|
+
try
|
|
14
|
+
{
|
|
15
|
+
Cli.Help(Args, "test-groups.csx", "Test each capture group of a regex independently",
|
|
16
|
+
("pdf", true, "Path to the source PDF", false),
|
|
17
|
+
("pattern", true, "Multi-group regex pattern", false),
|
|
18
|
+
("page", false, "1-based page index (default: all pages)", false));
|
|
19
|
+
|
|
20
|
+
var pattern = Cli.Require(Args, "pattern");
|
|
21
|
+
var pdfPath = Cli.Require(Args, "pdf");
|
|
22
|
+
var pageStr = Cli.Get(Args, "page");
|
|
23
|
+
|
|
24
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: false);
|
|
25
|
+
var engine = ScriptHost.GetEngine(host);
|
|
26
|
+
|
|
27
|
+
PatternTestOptions? options = null;
|
|
28
|
+
if (!string.IsNullOrWhiteSpace(pageStr))
|
|
29
|
+
{
|
|
30
|
+
if (!int.TryParse(pageStr, out var page) || page < 1)
|
|
31
|
+
{
|
|
32
|
+
JsonOut.Error("bad-arg", "--page must be a positive integer", null, 2);
|
|
33
|
+
}
|
|
34
|
+
options = new PatternTestOptions { PageFilter = PageFilter.SinglePage(page) };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
using var pdf = LoadPdf(pdfPath);
|
|
38
|
+
var result = await engine.TestGroupsAsync(pdf, pattern, options);
|
|
39
|
+
JsonOut.Write(result);
|
|
40
|
+
}
|
|
41
|
+
catch (Exception ex)
|
|
42
|
+
{
|
|
43
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
44
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-02 — Wrapper over IDocuoriaEngine.TestPatternAsync.
|
|
6
|
+
// Args: --pattern <regex> --pdf <path> [--page <n>] [--block-separator <str>]
|
|
7
|
+
// stdout: PatternTestResult JSON (Error round-trips per Phase 24 — do NOT swallow).
|
|
8
|
+
|
|
9
|
+
using Docuoria.Configuration;
|
|
10
|
+
using Docuoria.Contracts;
|
|
11
|
+
using Docuoria.Models;
|
|
12
|
+
|
|
13
|
+
try
|
|
14
|
+
{
|
|
15
|
+
Cli.Help(Args, "test-pattern.csx", "Test a regex pattern against PDF text",
|
|
16
|
+
("pdf", true, "Path to the source PDF", false),
|
|
17
|
+
("pattern", true, "Regex pattern to test", false),
|
|
18
|
+
("page", false, "1-based page index (default: all pages)", false),
|
|
19
|
+
("block-separator", false, "Override block separator for text flattening", false));
|
|
20
|
+
|
|
21
|
+
var pattern = Cli.Require(Args, "pattern");
|
|
22
|
+
var pdfPath = Cli.Require(Args, "pdf");
|
|
23
|
+
var blockSep = Cli.Get(Args, "block-separator");
|
|
24
|
+
var pageStr = Cli.Get(Args, "page");
|
|
25
|
+
|
|
26
|
+
using var host = ScriptHost.CreateHost(Args.ToArray(), includeStore: false);
|
|
27
|
+
var engine = ScriptHost.GetEngine(host);
|
|
28
|
+
|
|
29
|
+
PatternTestOptions? options = null;
|
|
30
|
+
|
|
31
|
+
// Build options from optional flags
|
|
32
|
+
string? blockSepValue = blockSep;
|
|
33
|
+
PageFilter? pageFilter = null;
|
|
34
|
+
|
|
35
|
+
if (!string.IsNullOrWhiteSpace(pageStr))
|
|
36
|
+
{
|
|
37
|
+
if (!int.TryParse(pageStr, out var page) || page < 1)
|
|
38
|
+
{
|
|
39
|
+
JsonOut.Error("bad-arg", "--page must be a positive integer", null, 2);
|
|
40
|
+
}
|
|
41
|
+
pageFilter = PageFilter.SinglePage(page);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (!string.IsNullOrEmpty(blockSepValue) || pageFilter is not null)
|
|
45
|
+
{
|
|
46
|
+
options = new PatternTestOptions
|
|
47
|
+
{
|
|
48
|
+
BlockSeparator = blockSepValue ?? "\n",
|
|
49
|
+
PageFilter = pageFilter
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
using var pdf = LoadPdf(pdfPath);
|
|
54
|
+
|
|
55
|
+
var result = await engine.TestPatternAsync(pdf, pattern, options);
|
|
56
|
+
JsonOut.Write(result);
|
|
57
|
+
}
|
|
58
|
+
catch (Exception ex)
|
|
59
|
+
{
|
|
60
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
61
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#load "_common.csx"
|
|
2
|
+
|
|
3
|
+
#nullable enable
|
|
4
|
+
|
|
5
|
+
// SCR-04 — Template.FromJson + Validate() static check (no engine required, but we host for consistency).
|
|
6
|
+
// Args: --template <file.json>
|
|
7
|
+
// stdout: { valid, errors }
|
|
8
|
+
|
|
9
|
+
using System.Text.Json;
|
|
10
|
+
using Docuoria.Models;
|
|
11
|
+
|
|
12
|
+
try
|
|
13
|
+
{
|
|
14
|
+
Cli.Help(Args, "validate-template.csx", "Validate a template JSON file against the schema",
|
|
15
|
+
("template", true, "Path to the template JSON file", false));
|
|
16
|
+
|
|
17
|
+
var templatePath = Cli.Require(Args, "template");
|
|
18
|
+
if (!File.Exists(templatePath))
|
|
19
|
+
{
|
|
20
|
+
JsonOut.Error("template-not-found", $"Template not found at '{templatePath}'", null, 1);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
var json = File.ReadAllText(templatePath);
|
|
24
|
+
|
|
25
|
+
Template tpl;
|
|
26
|
+
try
|
|
27
|
+
{
|
|
28
|
+
tpl = Template.FromJson(json);
|
|
29
|
+
}
|
|
30
|
+
catch (JsonException jex)
|
|
31
|
+
{
|
|
32
|
+
var message = jex.Message;
|
|
33
|
+
var hint = (string?)null;
|
|
34
|
+
|
|
35
|
+
// Detect fieldType string-vs-integer errors and add a helpful hint.
|
|
36
|
+
if (message.Contains("FieldType", StringComparison.OrdinalIgnoreCase)
|
|
37
|
+
|| (message.Contains("fieldType", StringComparison.OrdinalIgnoreCase))
|
|
38
|
+
|| (message.Contains("could not be converted") && message.Contains("String", StringComparison.Ordinal)))
|
|
39
|
+
{
|
|
40
|
+
hint = "fieldType must be an integer (0=String, 1=Number, 2=Integer, 3=Boolean, 4=Date, 5=Timestamp). "
|
|
41
|
+
+ "Do NOT use string values like \"String\" or \"Number\".";
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
JsonOut.Error("parse-error", hint is not null ? $"{message} — Hint: {hint}" : message, null, 1);
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
var errors = tpl.Validate();
|
|
49
|
+
JsonOut.Write(new { valid = errors.Count == 0, errors });
|
|
50
|
+
}
|
|
51
|
+
catch (Exception ex)
|
|
52
|
+
{
|
|
53
|
+
JsonOut.Error("unhandled", ex.Message, ex.ToString(), 1);
|
|
54
|
+
}
|