@talonic/docs 0.20.8 → 0.20.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/content.js +1560 -276
- package/package.json +1 -1
package/dist/content.js
CHANGED
|
@@ -427,6 +427,26 @@ var sections = [
|
|
|
427
427
|
{
|
|
428
428
|
type: "paragraph",
|
|
429
429
|
text: "**Supported Formats:** 25+ file types. **Resolution:** 4-phase pipeline. **Instant Matches:** ~30% of cells (free)."
|
|
430
|
+
},
|
|
431
|
+
{
|
|
432
|
+
type: "paragraph",
|
|
433
|
+
text: "Talonic is an **agentic data structuring platform**. It ingests documents of any type, discovers every data point inside them, builds a knowledge graph of canonical fields, and deploys AI agents to fill structured output schemas. Every cell in the output carries provenance metadata \u2014 which pipeline phase filled it, the confidence score, and an AI reasoning trace linking back to the source document."
|
|
434
|
+
},
|
|
435
|
+
{
|
|
436
|
+
type: "list",
|
|
437
|
+
ordered: false,
|
|
438
|
+
items: [
|
|
439
|
+
"**25+ file formats** \u2014 PDF, DOCX, XLSX, images, HTML, JSON, CSV, email formats (EML, MSG), and ZIP archives.",
|
|
440
|
+
"**4-phase extraction pipeline** \u2014 resolve from the knowledge graph, extract with AI agents, re-resolve, then transform and validate.",
|
|
441
|
+
"**~30% instant matches** \u2014 cells filled from graph lookup are free and instant, reducing both cost and latency.",
|
|
442
|
+
"**Per-cell provenance** \u2014 every value traces back to its source with confidence scores and reasoning.",
|
|
443
|
+
"**Batch mode** \u2014 process large backlogs at 50% cost with a 48-hour delivery window."
|
|
444
|
+
]
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
type: "callout",
|
|
448
|
+
variant: "info",
|
|
449
|
+
text: "Talonic uses Anthropic Claude for intelligent extraction and reasoning. The platform handles OCR, classification, field discovery, and schema generation automatically \u2014 you provide documents and define what output you need."
|
|
430
450
|
}
|
|
431
451
|
],
|
|
432
452
|
related: [
|
|
@@ -442,6 +462,14 @@ var sections = [
|
|
|
442
462
|
{
|
|
443
463
|
question: "How many file formats does Talonic support?",
|
|
444
464
|
answer: "Talonic supports 25+ file types including PDF, DOCX, XLSX, images (PNG, JPG), plain text, HTML, JSON, CSV, email formats (EML, MSG), and ZIP archives."
|
|
465
|
+
},
|
|
466
|
+
{
|
|
467
|
+
question: 'What does "per-cell provenance" mean?',
|
|
468
|
+
answer: "Every cell in the structured output carries metadata about which pipeline phase filled it, a confidence score, an AI reasoning trace, and references back to the source document. This makes every value auditable and explainable."
|
|
469
|
+
},
|
|
470
|
+
{
|
|
471
|
+
question: "How much do instant graph matches cost?",
|
|
472
|
+
answer: "Graph matches (approximately 30% of cells) are free. They are filled from the knowledge graph through deterministic lookup, so no LLM call is needed. Only cells that require AI extraction incur cost."
|
|
445
473
|
}
|
|
446
474
|
],
|
|
447
475
|
mentions: [
|
|
@@ -449,7 +477,9 @@ var sections = [
|
|
|
449
477
|
"structured data",
|
|
450
478
|
"provenance",
|
|
451
479
|
"AI reasoning traces",
|
|
452
|
-
"4-phase pipeline"
|
|
480
|
+
"4-phase pipeline",
|
|
481
|
+
"agentic platform",
|
|
482
|
+
"knowledge graph"
|
|
453
483
|
]
|
|
454
484
|
},
|
|
455
485
|
{
|
|
@@ -463,6 +493,10 @@ var sections = [
|
|
|
463
493
|
type: "paragraph",
|
|
464
494
|
text: "The platform revolves around a small set of interconnected concepts. Understanding these will help you navigate every feature."
|
|
465
495
|
},
|
|
496
|
+
{
|
|
497
|
+
type: "paragraph",
|
|
498
|
+
text: "Each concept builds on the previous ones. **Sources** produce **Documents**, documents yield **Fields**, fields aggregate into the **Field Registry**, the registry powers **Schemas**, schemas drive **Jobs**, and jobs produce structured output with **Provenance**. **Cases** emerge organically when documents share entities like names, reference numbers, or project codes."
|
|
499
|
+
},
|
|
466
500
|
{
|
|
467
501
|
type: "param-table",
|
|
468
502
|
params: [
|
|
@@ -507,6 +541,11 @@ var sections = [
|
|
|
507
541
|
description: "Per-cell metadata: which phase filled it, confidence score, reasoning trace, source references."
|
|
508
542
|
}
|
|
509
543
|
]
|
|
544
|
+
},
|
|
545
|
+
{
|
|
546
|
+
type: "callout",
|
|
547
|
+
variant: "info",
|
|
548
|
+
text: "The **Field Registry** is the heart of the platform. As you process more documents, the registry grows \u2014 fields are clustered semantically, promoted through tiers, and enriched with master extraction instructions. This accumulated knowledge makes every subsequent extraction faster and more accurate."
|
|
510
549
|
}
|
|
511
550
|
],
|
|
512
551
|
related: [
|
|
@@ -522,6 +561,14 @@ var sections = [
|
|
|
522
561
|
{
|
|
523
562
|
question: "What is provenance in Talonic?",
|
|
524
563
|
answer: "Provenance is per-cell metadata that tracks which pipeline phase filled the value, the confidence score, an AI reasoning trace, and source references back to the original document."
|
|
564
|
+
},
|
|
565
|
+
{
|
|
566
|
+
question: "How do Cases form?",
|
|
567
|
+
answer: "Cases form automatically through the linking system. When two or more documents share entities \u2014 like a person's name, a reference number, or a project code \u2014 they are connected into a case via a bipartite document-entity graph."
|
|
568
|
+
},
|
|
569
|
+
{
|
|
570
|
+
question: "What is the difference between a Generated Schema and a Template Schema?",
|
|
571
|
+
answer: "Generated Schemas are created automatically by the platform based on the document types it discovers. Template Schemas are user-defined for specific output needs \u2014 you choose which fields to include and how they map to the Field Registry."
|
|
525
572
|
}
|
|
526
573
|
],
|
|
527
574
|
mentions: [
|
|
@@ -532,7 +579,8 @@ var sections = [
|
|
|
532
579
|
"schema",
|
|
533
580
|
"job",
|
|
534
581
|
"case",
|
|
535
|
-
"provenance"
|
|
582
|
+
"provenance",
|
|
583
|
+
"knowledge graph"
|
|
536
584
|
]
|
|
537
585
|
},
|
|
538
586
|
{
|
|
@@ -564,6 +612,15 @@ var sections = [
|
|
|
564
612
|
"**Review & approve** \u2014 Review with confidence indicators, provenance, and validation flags. Correct any values.",
|
|
565
613
|
"**Deliver** \u2014 Push approved data to webhooks, REST APIs, SFTP, email, or cloud storage."
|
|
566
614
|
]
|
|
615
|
+
},
|
|
616
|
+
{
|
|
617
|
+
type: "paragraph",
|
|
618
|
+
text: "The pipeline is designed to be **progressive** \u2014 results appear as each phase completes rather than waiting for the entire job to finish. Phase 1 (graph resolve) fills ~30% of cells instantly and for free. Phase 2 (AI extraction) fills the remaining gaps. Phases 3 and 4 handle re-resolution and transformation. You can start reviewing early results while later phases are still running."
|
|
619
|
+
},
|
|
620
|
+
{
|
|
621
|
+
type: "callout",
|
|
622
|
+
variant: "info",
|
|
623
|
+
text: "The **Dashboard** provides a real-time view of your pipeline progress with telemetry on strategy distribution, tier funnel, capture hit rate, and per-field state distribution. Use it to understand how well the knowledge graph is performing."
|
|
567
624
|
}
|
|
568
625
|
],
|
|
569
626
|
related: [
|
|
@@ -579,6 +636,14 @@ var sections = [
|
|
|
579
636
|
{
|
|
580
637
|
question: "What percentage of cells are filled by graph matches?",
|
|
581
638
|
answer: "Approximately 30% of cells are filled instantly from graph matches at no AI cost. The remaining ~70% are filled by AI agents in subsequent pipeline phases."
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
question: "Can I review results before the full pipeline completes?",
|
|
642
|
+
answer: "Yes. The pipeline is progressive \u2014 Phase 1 graph matches appear instantly, and you can start reviewing while Phase 2 (AI extraction) and later phases are still running."
|
|
643
|
+
},
|
|
644
|
+
{
|
|
645
|
+
question: "What delivery destinations are supported?",
|
|
646
|
+
answer: "Six live connectors: webhook (with HMAC-SHA256 signing), SFTP, Amazon S3, Azure Blob Storage, Google Drive, and OneDrive. Additional integrations for Sheets, SharePoint, Gmail, Outlook, and HubSpot are planned."
|
|
582
647
|
}
|
|
583
648
|
],
|
|
584
649
|
mentions: [
|
|
@@ -588,7 +653,9 @@ var sections = [
|
|
|
588
653
|
"field registry",
|
|
589
654
|
"schema",
|
|
590
655
|
"job execution",
|
|
591
|
-
"delivery"
|
|
656
|
+
"delivery",
|
|
657
|
+
"progressive results",
|
|
658
|
+
"dashboard"
|
|
592
659
|
]
|
|
593
660
|
},
|
|
594
661
|
{
|
|
@@ -602,6 +669,10 @@ var sections = [
|
|
|
602
669
|
type: "paragraph",
|
|
603
670
|
text: "Navigate using the sidebar. The platform is organized into three primary sections: **Sources** (ingest), **Structuring** (process & validate), and **Outputs** (deliver)."
|
|
604
671
|
},
|
|
672
|
+
{
|
|
673
|
+
type: "paragraph",
|
|
674
|
+
text: "**Sources** is where documents enter the system \u2014 through manual upload, connected cloud storage, email inboxes, or the API. **Structuring** is where you define schemas, run extraction jobs, and review results. **Outputs** is where approved data is delivered to downstream systems through configured bindings."
|
|
675
|
+
},
|
|
605
676
|
{
|
|
606
677
|
type: "ui-excerpt",
|
|
607
678
|
id: "sidebar-navigation",
|
|
@@ -611,6 +682,17 @@ var sections = [
|
|
|
611
682
|
{
|
|
612
683
|
type: "callout",
|
|
613
684
|
text: "The fastest path to results: upload documents in **Sources**, then go to **Structuring → Runs → New** to create your first extraction job."
|
|
685
|
+
},
|
|
686
|
+
{
|
|
687
|
+
type: "list",
|
|
688
|
+
ordered: true,
|
|
689
|
+
items: [
|
|
690
|
+
"Upload a few sample documents in **Sources** (drag and drop or use a connected source).",
|
|
691
|
+
"Wait for extraction to complete \u2014 documents are OCR'd, classified, and fields are extracted automatically.",
|
|
692
|
+
"Navigate to **Structuring** and review the auto-generated schemas or create a custom template.",
|
|
693
|
+
"Create a new **Run** by selecting a schema and the documents to process.",
|
|
694
|
+
"Review results in the run view \u2014 each cell shows confidence, provenance, and reasoning."
|
|
695
|
+
]
|
|
614
696
|
}
|
|
615
697
|
],
|
|
616
698
|
related: [
|
|
@@ -626,9 +708,17 @@ var sections = [
|
|
|
626
708
|
{
|
|
627
709
|
question: "How is the Talonic platform organized?",
|
|
628
710
|
answer: "The platform is organized into three primary sections: Sources (document ingest), Structuring (processing & validation), and Outputs (delivery to downstream systems)."
|
|
711
|
+
},
|
|
712
|
+
{
|
|
713
|
+
question: "Do I need to define a schema before processing documents?",
|
|
714
|
+
answer: "No. Talonic auto-generates schemas based on the document types it discovers during extraction. You can use these generated schemas directly or create custom template schemas for specific output needs."
|
|
715
|
+
},
|
|
716
|
+
{
|
|
717
|
+
question: "What source connections are available?",
|
|
718
|
+
answer: "Ten source connectors: Google Drive, Gmail, SharePoint, OneDrive, Outlook, Teams, Notion, SQL databases (MSSQL/PostgreSQL), Amazon S3, and Azure Blob Storage. You can also upload files manually or ingest via the REST API."
|
|
629
719
|
}
|
|
630
720
|
],
|
|
631
|
-
mentions: ["sidebar", "sources", "structuring", "outputs", "navigation", "Cmd+K"]
|
|
721
|
+
mentions: ["sidebar", "sources", "structuring", "outputs", "navigation", "Cmd+K", "source connectors"]
|
|
632
722
|
}
|
|
633
723
|
];
|
|
634
724
|
|
|
@@ -3449,6 +3539,27 @@ var sections11 = [
|
|
|
3449
3539
|
{
|
|
3450
3540
|
type: "paragraph",
|
|
3451
3541
|
text: "Navigate to **Workspace Settings → Shared Dialects** to manage workspace-level formatting. Individual schemas can override these defaults with inline dialect definitions when needed."
|
|
3542
|
+
},
|
|
3543
|
+
{
|
|
3544
|
+
type: "paragraph",
|
|
3545
|
+
text: "Dialects ensure consistency across all your structured output. When your downstream systems expect dates in `YYYY-MM-DD` format, numbers with `.` as the decimal separator, and CSVs delimited by `;`, you configure this once in the shared dialect rather than repeating it in every schema."
|
|
3546
|
+
},
|
|
3547
|
+
{
|
|
3548
|
+
type: "list",
|
|
3549
|
+
ordered: false,
|
|
3550
|
+
items: [
|
|
3551
|
+
"**Date format** \u2014 control how dates are serialized (e.g., `YYYY-MM-DD`, `DD/MM/YYYY`, `MM-DD-YYYY`).",
|
|
3552
|
+
"**Number locale** \u2014 set the decimal separator and thousands grouping.",
|
|
3553
|
+
"**CSV delimiter** \u2014 choose comma, semicolon, tab, or pipe for CSV exports.",
|
|
3554
|
+
"**Null representation** \u2014 define how missing values appear (empty string, `NULL`, `N/A`, etc.).",
|
|
3555
|
+
"**Boolean format** \u2014 choose between `true/false`, `yes/no`, `1/0`, or custom values.",
|
|
3556
|
+
"**Encoding** \u2014 set the character encoding for file exports (UTF-8, ISO-8859-1, etc.)."
|
|
3557
|
+
]
|
|
3558
|
+
},
|
|
3559
|
+
{
|
|
3560
|
+
type: "callout",
|
|
3561
|
+
variant: "info",
|
|
3562
|
+
text: "When a schema defines an inline dialect, it takes precedence over the shared dialect for that schema only. All other schemas continue using the workspace defaults. This lets you handle special cases without affecting the rest of your output."
|
|
3452
3563
|
}
|
|
3453
3564
|
],
|
|
3454
3565
|
related: [
|
|
@@ -3464,6 +3575,14 @@ var sections11 = [
|
|
|
3464
3575
|
{
|
|
3465
3576
|
question: "Can individual schemas override shared dialects?",
|
|
3466
3577
|
answer: "Yes. Individual schemas can override workspace-level shared dialect settings with inline dialect definitions when specific formatting is needed."
|
|
3578
|
+
},
|
|
3579
|
+
{
|
|
3580
|
+
question: "When should I use a shared dialect vs an inline dialect?",
|
|
3581
|
+
answer: "Use shared dialects for workspace-wide defaults that apply to most schemas. Use inline dialects only when a specific schema needs different formatting \u2014 for example, a schema that outputs dates in a different format for a particular downstream system."
|
|
3582
|
+
},
|
|
3583
|
+
{
|
|
3584
|
+
question: "Do shared dialects affect the extraction process?",
|
|
3585
|
+
answer: "No. Dialects only affect output formatting \u2014 how extracted values are serialized in exports and deliveries. The extraction and validation phases work with normalized internal representations regardless of dialect settings."
|
|
3467
3586
|
}
|
|
3468
3587
|
],
|
|
3469
3588
|
mentions: [
|
|
@@ -3471,7 +3590,9 @@ var sections11 = [
|
|
|
3471
3590
|
"workspace settings",
|
|
3472
3591
|
"output formatting",
|
|
3473
3592
|
"date format",
|
|
3474
|
-
"number locale"
|
|
3593
|
+
"number locale",
|
|
3594
|
+
"CSV delimiter",
|
|
3595
|
+
"null representation"
|
|
3475
3596
|
]
|
|
3476
3597
|
},
|
|
3477
3598
|
{
|
|
@@ -3488,6 +3609,25 @@ var sections11 = [
|
|
|
3488
3609
|
{
|
|
3489
3610
|
type: "paragraph",
|
|
3490
3611
|
text: "Navigate to **Workspace Settings → Reference Primitives** to manage workspace-level lookup tables. Each primitive is versioned independently, and schemas reference a specific version to ensure stable resolution."
|
|
3612
|
+
},
|
|
3613
|
+
{
|
|
3614
|
+
type: "paragraph",
|
|
3615
|
+
text: "The lookup convention follows a `key` / `value` structure where the `key` is the output code and the `value` is the human-readable label. During extraction, the platform maps FROM labels found in documents TO the canonical codes defined in the reference primitive. This ensures consistent, machine-readable output regardless of how values appear in source documents."
|
|
3616
|
+
},
|
|
3617
|
+
{
|
|
3618
|
+
type: "callout",
|
|
3619
|
+
variant: "info",
|
|
3620
|
+
text: "Versioning protects production stability. When you update a reference primitive, existing schemas continue using their pinned version until you explicitly update the version reference. This prevents unexpected changes to live extraction pipelines."
|
|
3621
|
+
},
|
|
3622
|
+
{
|
|
3623
|
+
type: "list",
|
|
3624
|
+
ordered: false,
|
|
3625
|
+
items: [
|
|
3626
|
+
"**Key-value structure** \u2014 `key` is the canonical code, `value` is the label the platform matches against.",
|
|
3627
|
+
"**Independent versioning** \u2014 each update creates a new version; schemas pin to a specific version.",
|
|
3628
|
+
"**Cross-schema sharing** \u2014 one primitive can be referenced by any number of schemas.",
|
|
3629
|
+
"**3-tier lookup cascade** \u2014 string normalization, token fuzzy matching, and Haiku LLM fallback for ambiguous values."
|
|
3630
|
+
]
|
|
3491
3631
|
}
|
|
3492
3632
|
],
|
|
3493
3633
|
related: [
|
|
@@ -3503,6 +3643,14 @@ var sections11 = [
|
|
|
3503
3643
|
{
|
|
3504
3644
|
question: "How do reference primitives differ from schema reference tables?",
|
|
3505
3645
|
answer: "Reference primitives are workspace-level and shared across schemas with independent versioning. Schema reference tables are defined inline within a specific schema."
|
|
3646
|
+
},
|
|
3647
|
+
{
|
|
3648
|
+
question: "How does the lookup cascade work?",
|
|
3649
|
+
answer: "The platform tries three tiers: first, exact string normalization (whitespace and case normalization). If that fails, token-based fuzzy matching. If the fuzzy match is below the confidence threshold, a Haiku LLM call resolves the ambiguity."
|
|
3650
|
+
},
|
|
3651
|
+
{
|
|
3652
|
+
question: "What happens when I update a reference primitive?",
|
|
3653
|
+
answer: "A new version is created. Existing schemas continue using their pinned version. You must explicitly update the version reference in each schema to use the new data, which protects production pipelines from unexpected changes."
|
|
3506
3654
|
}
|
|
3507
3655
|
],
|
|
3508
3656
|
mentions: [
|
|
@@ -3510,7 +3658,8 @@ var sections11 = [
|
|
|
3510
3658
|
"lookup tables",
|
|
3511
3659
|
"versioned",
|
|
3512
3660
|
"workspace-level",
|
|
3513
|
-
"code mapping"
|
|
3661
|
+
"code mapping",
|
|
3662
|
+
"lookup cascade"
|
|
3514
3663
|
]
|
|
3515
3664
|
},
|
|
3516
3665
|
{
|
|
@@ -3527,6 +3676,24 @@ var sections11 = [
|
|
|
3527
3676
|
{
|
|
3528
3677
|
type: "paragraph",
|
|
3529
3678
|
text: "Navigate to **Workspace Settings → Change Review** to configure review requirements. When enabled, changes are queued for approval before being applied, ensuring that production data pipelines are not disrupted by unreviewed modifications."
|
|
3679
|
+
},
|
|
3680
|
+
{
|
|
3681
|
+
type: "paragraph",
|
|
3682
|
+
text: "Change review is particularly important for workspaces that feed downstream systems through delivery bindings. A small change to a schema field mapping or a reference primitive value can ripple through to every document processed after that point. The review process creates a checkpoint where a second pair of eyes can verify the change before it goes live."
|
|
3683
|
+
},
|
|
3684
|
+
{
|
|
3685
|
+
type: "list",
|
|
3686
|
+
ordered: false,
|
|
3687
|
+
items: [
|
|
3688
|
+
"**Schema changes** \u2014 field additions, removals, mapping updates, and format constraint modifications.",
|
|
3689
|
+
"**Shared dialect changes** \u2014 date format, number locale, delimiter, and encoding updates.",
|
|
3690
|
+
"**Reference primitive changes** \u2014 new versions of lookup tables and key-value modifications."
|
|
3691
|
+
]
|
|
3692
|
+
},
|
|
3693
|
+
{
|
|
3694
|
+
type: "callout",
|
|
3695
|
+
variant: "warning",
|
|
3696
|
+
text: "Change review is recommended for any workspace that delivers data to production systems. Without it, a schema modification takes effect immediately and applies to all future extractions \u2014 there is no undo for documents already processed with the new configuration."
|
|
3530
3697
|
}
|
|
3531
3698
|
],
|
|
3532
3699
|
related: [
|
|
@@ -3542,6 +3709,14 @@ var sections11 = [
|
|
|
3542
3709
|
{
|
|
3543
3710
|
question: "When should I enable change review?",
|
|
3544
3711
|
answer: "Enable change review for production workspaces where unreviewed modifications could disrupt live data pipelines. It ensures all changes are approved before being applied."
|
|
3712
|
+
},
|
|
3713
|
+
{
|
|
3714
|
+
question: "What types of changes require review?",
|
|
3715
|
+
answer: "Three categories: schema changes (field additions, removals, mapping updates, format constraints), shared dialect changes (formatting rules), and reference primitive changes (lookup table updates and new versions)."
|
|
3716
|
+
},
|
|
3717
|
+
{
|
|
3718
|
+
question: "Can I bypass change review for urgent fixes?",
|
|
3719
|
+
answer: "Change review can be disabled temporarily from Workspace Settings if an urgent fix is needed. However, this should be done with caution in production workspaces, and the review requirement should be re-enabled afterward."
|
|
3545
3720
|
}
|
|
3546
3721
|
],
|
|
3547
3722
|
mentions: [
|
|
@@ -3549,7 +3724,8 @@ var sections11 = [
|
|
|
3549
3724
|
"governance",
|
|
3550
3725
|
"production workspace",
|
|
3551
3726
|
"approval workflow",
|
|
3552
|
-
"review process"
|
|
3727
|
+
"review process",
|
|
3728
|
+
"delivery bindings"
|
|
3553
3729
|
]
|
|
3554
3730
|
}
|
|
3555
3731
|
];
|
|
@@ -3566,6 +3742,26 @@ var sections12 = [
|
|
|
3566
3742
|
{
|
|
3567
3743
|
type: "paragraph",
|
|
3568
3744
|
text: "Press `Cmd+K` (or `Ctrl+K`) from any page to open global search. Searches across documents, extracted values, field names, schema names, and sources simultaneously."
|
|
3745
|
+
},
|
|
3746
|
+
{
|
|
3747
|
+
type: "paragraph",
|
|
3748
|
+
text: "Omnisearch is designed to be the single entry point for finding anything in the platform. Rather than navigating to specific pages to search within them, Omnisearch queries a **materialized values index** that aggregates data across all your content. Results are grouped by category so you can quickly distinguish between a document match and a field name match."
|
|
3749
|
+
},
|
|
3750
|
+
{
|
|
3751
|
+
type: "callout",
|
|
3752
|
+
variant: "info",
|
|
3753
|
+
text: "Omnisearch results update as you type. The materialized index is rebuilt automatically whenever documents are processed or schemas change, so results are always current."
|
|
3754
|
+
},
|
|
3755
|
+
{
|
|
3756
|
+
type: "list",
|
|
3757
|
+
ordered: false,
|
|
3758
|
+
items: [
|
|
3759
|
+
"**Documents** \u2014 matches against file names, extracted text, and metadata.",
|
|
3760
|
+
"**Extracted values** \u2014 finds specific data points across all processed documents.",
|
|
3761
|
+
"**Field names** \u2014 searches the Field Registry for canonical field definitions.",
|
|
3762
|
+
"**Schema names** \u2014 locates generated and template schemas by title.",
|
|
3763
|
+
"**Sources** \u2014 matches source connection names and configurations."
|
|
3764
|
+
]
|
|
3569
3765
|
}
|
|
3570
3766
|
],
|
|
3571
3767
|
related: [
|
|
@@ -3581,9 +3777,17 @@ var sections12 = [
|
|
|
3581
3777
|
{
|
|
3582
3778
|
question: "What does Omnisearch search across?",
|
|
3583
3779
|
answer: "Omnisearch searches across documents, extracted values, field names, schema names, and sources simultaneously, providing results from all categories in a single query."
|
|
3780
|
+
},
|
|
3781
|
+
{
|
|
3782
|
+
question: "Does Omnisearch work on extracted data or just file names?",
|
|
3783
|
+
answer: "Omnisearch queries a materialized values index that includes both file-level metadata and individual extracted data points. You can search for a specific invoice number, date, or name and find the document that contains it."
|
|
3784
|
+
},
|
|
3785
|
+
{
|
|
3786
|
+
question: "How quickly are new documents searchable in Omnisearch?",
|
|
3787
|
+
answer: "Documents become searchable as soon as extraction completes. The materialized index is updated automatically during document processing, so there is no manual reindex step."
|
|
3584
3788
|
}
|
|
3585
3789
|
],
|
|
3586
|
-
mentions: ["omnisearch", "global search", "Cmd+K", "Ctrl+K", "document search"]
|
|
3790
|
+
mentions: ["omnisearch", "global search", "Cmd+K", "Ctrl+K", "document search", "materialized values index"]
|
|
3587
3791
|
},
|
|
3588
3792
|
{
|
|
3589
3793
|
slug: "document-filters",
|
|
@@ -3595,6 +3799,30 @@ var sections12 = [
|
|
|
3595
3799
|
{
|
|
3596
3800
|
type: "paragraph",
|
|
3597
3801
|
text: "The Documents page supports advanced filtering by extracted field values. Build conditions with field autocomplete, comparison operators (eq, contains, gt, between, is_empty, etc.), and combine multiple conditions. Filter state is URL-serializable so you can share filtered views. Save frequently-used filters as presets."
|
|
3802
|
+
},
|
|
3803
|
+
{
|
|
3804
|
+
type: "paragraph",
|
|
3805
|
+
text: "Filters operate on the **materialized values index** \u2014 a flattened representation of every extracted field value across your documents. This means filtering is fast even across thousands of documents because queries run against pre-indexed data rather than scanning raw extractions at query time."
|
|
3806
|
+
},
|
|
3807
|
+
{
|
|
3808
|
+
type: "callout",
|
|
3809
|
+
variant: "info",
|
|
3810
|
+
text: 'Combine multiple filter conditions to build precise queries. For example, filter by `document_type eq "Invoice"` AND `total_amount gt 10000` AND `date between "2025-01-01" "2025-12-31"` to find high-value invoices from a specific year.'
|
|
3811
|
+
},
|
|
3812
|
+
{
|
|
3813
|
+
type: "list",
|
|
3814
|
+
ordered: false,
|
|
3815
|
+
items: [
|
|
3816
|
+
"`eq` \u2014 exact match (case-insensitive for strings).",
|
|
3817
|
+
"`contains` \u2014 substring match within field values.",
|
|
3818
|
+
"`gt` / `lt` \u2014 greater than or less than for numeric and date fields.",
|
|
3819
|
+
"`between` \u2014 range match with lower and upper bounds.",
|
|
3820
|
+
"`is_empty` \u2014 finds documents where a specific field has no extracted value."
|
|
3821
|
+
]
|
|
3822
|
+
},
|
|
3823
|
+
{
|
|
3824
|
+
type: "paragraph",
|
|
3825
|
+
text: "Filter state is encoded in the URL query string using dynamic SQL generation on the backend. This means you can bookmark filtered views, share them with teammates via a link, or save them as **presets** for one-click access to commonly used queries."
|
|
3598
3826
|
}
|
|
3599
3827
|
],
|
|
3600
3828
|
related: [
|
|
@@ -3610,6 +3838,14 @@ var sections12 = [
|
|
|
3610
3838
|
{
|
|
3611
3839
|
question: "Can I share filtered views with my team?",
|
|
3612
3840
|
answer: "Yes. Filter state is URL-serializable, so you can share filtered views by copying the URL. You can also save frequently-used filters as presets."
|
|
3841
|
+
},
|
|
3842
|
+
{
|
|
3843
|
+
question: "How does field autocomplete work in filters?",
|
|
3844
|
+
answer: "When building a filter condition, the field selector shows all canonical fields from your Field Registry with autocomplete. Start typing a field name and matching fields appear, ensuring you always filter on valid, extracted fields."
|
|
3845
|
+
},
|
|
3846
|
+
{
|
|
3847
|
+
question: "Can I filter on fields that have no value?",
|
|
3848
|
+
answer: "Yes. The is_empty operator lets you find documents where a specific field was not extracted or has no value. This is useful for identifying documents that may need reprocessing or manual review."
|
|
3613
3849
|
}
|
|
3614
3850
|
],
|
|
3615
3851
|
mentions: [
|
|
@@ -3617,7 +3853,9 @@ var sections12 = [
|
|
|
3617
3853
|
"field autocomplete",
|
|
3618
3854
|
"comparison operators",
|
|
3619
3855
|
"URL-serializable",
|
|
3620
|
-
"filter presets"
|
|
3856
|
+
"filter presets",
|
|
3857
|
+
"materialized values index",
|
|
3858
|
+
"dynamic SQL"
|
|
3621
3859
|
]
|
|
3622
3860
|
}
|
|
3623
3861
|
];
|
|
@@ -3930,6 +4168,10 @@ var sections14 = [
|
|
|
3930
4168
|
type: "paragraph",
|
|
3931
4169
|
text: "Organizations support role-based access control:"
|
|
3932
4170
|
},
|
|
4171
|
+
{
|
|
4172
|
+
type: "paragraph",
|
|
4173
|
+
text: "Every user in your organization is assigned one of four roles that determine what they can see and do. Roles are hierarchical \u2014 each level includes all permissions of the levels below it. Choose the most restrictive role that still lets a team member do their job."
|
|
4174
|
+
},
|
|
3933
4175
|
{
|
|
3934
4176
|
type: "param-table",
|
|
3935
4177
|
title: "Roles",
|
|
@@ -3959,6 +4201,22 @@ var sections14 = [
|
|
|
3959
4201
|
{
|
|
3960
4202
|
type: "paragraph",
|
|
3961
4203
|
text: "New members are added via domain matching: company email domains auto-match to your org with **pending** status requiring admin approval. Manage from the Team page."
|
|
4204
|
+
},
|
|
4205
|
+
{
|
|
4206
|
+
type: "callout",
|
|
4207
|
+
variant: "info",
|
|
4208
|
+
text: "Domain matching streamlines onboarding for larger teams. When a new user signs up with an email address matching your organization's domain (e.g., `@yourcompany.com`), they are automatically associated with your org in a **pending** state. An admin must approve them before they gain access."
|
|
4209
|
+
},
|
|
4210
|
+
{
|
|
4211
|
+
type: "list",
|
|
4212
|
+
ordered: true,
|
|
4213
|
+
items: [
|
|
4214
|
+
"Navigate to the **Team** page from the sidebar.",
|
|
4215
|
+
"Review any **pending** members waiting for approval.",
|
|
4216
|
+
"Approve or reject each pending member.",
|
|
4217
|
+
"Assign the appropriate role based on their responsibilities.",
|
|
4218
|
+
"Optionally, change roles later from the same Team page."
|
|
4219
|
+
]
|
|
3962
4220
|
}
|
|
3963
4221
|
],
|
|
3964
4222
|
related: [
|
|
@@ -3974,6 +4232,14 @@ var sections14 = [
|
|
|
3974
4232
|
{
|
|
3975
4233
|
question: "How are new team members added?",
|
|
3976
4234
|
answer: "New members are added via domain matching: company email domains auto-match to your organization with pending status. Admin approval is required before access is granted."
|
|
4235
|
+
},
|
|
4236
|
+
{
|
|
4237
|
+
question: "Can I change a team member's role after they join?",
|
|
4238
|
+
answer: "Yes. Navigate to the Team page, find the member, and update their role. Changes take effect immediately. Only Admins and Owners can modify roles."
|
|
4239
|
+
},
|
|
4240
|
+
{
|
|
4241
|
+
question: "What happens if I remove a team member?",
|
|
4242
|
+
answer: "Removing a team member revokes their access to the organization immediately. Their past actions (edits, uploads, approvals) remain in the audit trail. They can be re-added later through the same domain matching process."
|
|
3977
4243
|
}
|
|
3978
4244
|
],
|
|
3979
4245
|
mentions: [
|
|
@@ -3984,7 +4250,8 @@ var sections14 = [
|
|
|
3984
4250
|
"Member",
|
|
3985
4251
|
"Admin",
|
|
3986
4252
|
"Owner",
|
|
3987
|
-
"domain matching"
|
|
4253
|
+
"domain matching",
|
|
4254
|
+
"pending approval"
|
|
3988
4255
|
]
|
|
3989
4256
|
},
|
|
3990
4257
|
{
|
|
@@ -3998,6 +4265,10 @@ var sections14 = [
|
|
|
3998
4265
|
type: "paragraph",
|
|
3999
4266
|
text: "The Usage & Registry page replaces the legacy credits view with a comprehensive cost breakdown. It shows per-feature cost (extraction, OCR, batch, matching), a daily cost chart, and a full call log with model, tokens, and cost per request. The **Master view** (admin only) shows per-customer breakdowns and platform-wide statistics."
|
|
4000
4267
|
},
|
|
4268
|
+
{
|
|
4269
|
+
type: "paragraph",
|
|
4270
|
+
text: "Understanding your usage patterns helps optimize costs. For example, if extraction dominates your spend, consider using **batch mode** for non-urgent documents to cut that cost in half. The daily cost chart makes it easy to spot usage spikes and correlate them with specific ingestion events."
|
|
4271
|
+
},
|
|
4001
4272
|
{
|
|
4002
4273
|
type: "param-table",
|
|
4003
4274
|
title: "Usage views",
|
|
@@ -4023,6 +4294,11 @@ var sections14 = [
|
|
|
4023
4294
|
description: "Per-customer breakdown and platform-wide aggregates. Accessible only in master (all-tenant) mode."
|
|
4024
4295
|
}
|
|
4025
4296
|
]
|
|
4297
|
+
},
|
|
4298
|
+
{
|
|
4299
|
+
type: "callout",
|
|
4300
|
+
variant: "info",
|
|
4301
|
+
text: "The call log records every LLM and OCR call with full detail \u2014 model name, input/output token counts, latency, and cost. Use it to audit individual extractions or investigate unexpected cost increases."
|
|
4026
4302
|
}
|
|
4027
4303
|
],
|
|
4028
4304
|
related: [
|
|
@@ -4038,6 +4314,10 @@ var sections14 = [
|
|
|
4038
4314
|
{
|
|
4039
4315
|
question: "What is the Master view?",
|
|
4040
4316
|
answer: "The Master view is an admin-only feature that shows per-customer breakdowns and platform-wide statistics. It is accessible only in master (all-tenant) mode."
|
|
4317
|
+
},
|
|
4318
|
+
{
|
|
4319
|
+
question: "How can I reduce my usage costs?",
|
|
4320
|
+
answer: "Use batch mode for non-urgent documents to cut extraction costs by 50%. Review the per-feature breakdown to identify your highest-cost operations, and use the daily cost chart to spot and investigate usage spikes."
|
|
4041
4321
|
}
|
|
4042
4322
|
],
|
|
4043
4323
|
mentions: [
|
|
@@ -4045,7 +4325,8 @@ var sections14 = [
|
|
|
4045
4325
|
"cost breakdown",
|
|
4046
4326
|
"daily cost chart",
|
|
4047
4327
|
"call log",
|
|
4048
|
-
"master view"
|
|
4328
|
+
"master view",
|
|
4329
|
+
"per-feature costs"
|
|
4049
4330
|
]
|
|
4050
4331
|
},
|
|
4051
4332
|
{
|
|
@@ -4058,6 +4339,26 @@ var sections14 = [
|
|
|
4058
4339
|
{
|
|
4059
4340
|
type: "paragraph",
|
|
4060
4341
|
text: "Accessible from the user menu for admins and superadmins. Provides: customer management, user management, usage statistics, data clear & rebuild, and cross-tenant master registry view."
|
|
4342
|
+
},
|
|
4343
|
+
{
|
|
4344
|
+
type: "paragraph",
|
|
4345
|
+
text: "The Admin Panel is the central hub for platform-wide operations. **Customer management** lets you create, view, and delete organizations. **User management** provides a cross-tenant view of all platform users with the ability to remove accounts. The **data clear & rebuild** function wipes all data for a specific customer and reprocesses from scratch \u2014 useful during onboarding or after significant schema changes."
|
|
4346
|
+
},
|
|
4347
|
+
{
|
|
4348
|
+
type: "list",
|
|
4349
|
+
ordered: false,
|
|
4350
|
+
items: [
|
|
4351
|
+
"**Customer management** \u2014 create, list, and delete organizations.",
|
|
4352
|
+
"**User management** \u2014 view all users across tenants, remove accounts.",
|
|
4353
|
+
"**Usage statistics** \u2014 platform-wide cost and usage aggregates.",
|
|
4354
|
+
"**Data clear & rebuild** \u2014 wipe and reprocess all data for a customer.",
|
|
4355
|
+
"**Master registry** \u2014 cross-tenant view of the Field Registry and schemas."
|
|
4356
|
+
]
|
|
4357
|
+
},
|
|
4358
|
+
{
|
|
4359
|
+
type: "callout",
|
|
4360
|
+
variant: "warning",
|
|
4361
|
+
text: "The **data clear** operation is irreversible. It deletes all documents, extractions, jobs, and results for the selected customer. Use with caution and only when a full reprocessing is genuinely needed."
|
|
4061
4362
|
}
|
|
4062
4363
|
],
|
|
4063
4364
|
related: [
|
|
@@ -4072,6 +4373,14 @@ var sections14 = [
|
|
|
4072
4373
|
{
|
|
4073
4374
|
question: "Who can access the Admin Panel?",
|
|
4074
4375
|
answer: "The Admin Panel is accessible only to users with admin or superadmin roles, via the user menu in the platform navigation."
|
|
4376
|
+
},
|
|
4377
|
+
{
|
|
4378
|
+
question: "What does the data clear operation do?",
|
|
4379
|
+
answer: "Data clear wipes all documents, extractions, jobs, results, and related data for a specific customer. It is irreversible and intended for full reprocessing scenarios during onboarding or after major schema changes."
|
|
4380
|
+
},
|
|
4381
|
+
{
|
|
4382
|
+
question: "Can I view usage across all customers?",
|
|
4383
|
+
answer: "Yes. The Admin Panel includes a master registry view that shows cross-tenant usage statistics, per-customer cost breakdowns, and platform-wide aggregates."
|
|
4075
4384
|
}
|
|
4076
4385
|
],
|
|
4077
4386
|
mentions: [
|
|
@@ -4079,7 +4388,8 @@ var sections14 = [
|
|
|
4079
4388
|
"customer management",
|
|
4080
4389
|
"user management",
|
|
4081
4390
|
"data clear",
|
|
4082
|
-
"master registry"
|
|
4391
|
+
"master registry",
|
|
4392
|
+
"superadmin"
|
|
4083
4393
|
]
|
|
4084
4394
|
},
|
|
4085
4395
|
{
|
|
@@ -4089,6 +4399,10 @@ var sections14 = [
|
|
|
4089
4399
|
seoTitle: "Keyboard Shortcuts \u2014 Talonic Docs",
|
|
4090
4400
|
description: "Global keyboard shortcuts: Cmd+K / Ctrl+K for Omnisearch, Cmd+J / Ctrl+J for quick extract, and Escape to close overlays. Speed up your workflow with keyboard navigation.",
|
|
4091
4401
|
content: [
|
|
4402
|
+
{
|
|
4403
|
+
type: "paragraph",
|
|
4404
|
+
text: "Talonic provides global keyboard shortcuts that work from any page in the platform. These shortcuts let you access common actions without leaving your current context, significantly speeding up daily workflows."
|
|
4405
|
+
},
|
|
4092
4406
|
{
|
|
4093
4407
|
type: "param-table",
|
|
4094
4408
|
title: "Shortcuts",
|
|
@@ -4109,6 +4423,11 @@ var sections14 = [
|
|
|
4109
4423
|
description: "Close overlays, modals, and search."
|
|
4110
4424
|
}
|
|
4111
4425
|
]
|
|
4426
|
+
},
|
|
4427
|
+
{
|
|
4428
|
+
type: "callout",
|
|
4429
|
+
variant: "info",
|
|
4430
|
+
text: "The **quick extract** shortcut (`Cmd+J` / `Ctrl+J`) is the fastest way to upload a single document. It opens a streamlined upload interface that lets you drag a file and start processing immediately."
|
|
4112
4431
|
}
|
|
4113
4432
|
],
|
|
4114
4433
|
related: [
|
|
@@ -4123,6 +4442,10 @@ var sections14 = [
|
|
|
4123
4442
|
{
|
|
4124
4443
|
question: "What does the quick extract shortcut do?",
|
|
4125
4444
|
answer: "Cmd+J / Ctrl+J opens the quick extract interface, allowing you to upload and process a document directly from any page."
|
|
4445
|
+
},
|
|
4446
|
+
{
|
|
4447
|
+
question: "Do shortcuts work inside modals or overlays?",
|
|
4448
|
+
answer: "The Escape shortcut works inside any modal or overlay to close it. Omnisearch (Cmd+K) works globally, even when other overlays are open. Quick extract (Cmd+J) is available from the main interface."
|
|
4126
4449
|
}
|
|
4127
4450
|
],
|
|
4128
4451
|
mentions: ["keyboard shortcuts", "Cmd+K", "Cmd+J", "Escape", "quick extract"]
|
|
@@ -4145,6 +4468,21 @@ var sections15 = [
|
|
|
4145
4468
|
{
|
|
4146
4469
|
type: "callout",
|
|
4147
4470
|
text: "Batch mode cuts extraction cost in half. Stage 1 (OCR + classify) still runs immediately \u2014 only Stage 2 (Claude extraction) is deferred."
|
|
4471
|
+
},
|
|
4472
|
+
{
|
|
4473
|
+
type: "paragraph",
|
|
4474
|
+
text: "Under the hood, batch inference leverages the provider's native batch API (Anthropic Message Batches or AWS Bedrock invocation jobs). Documents accumulate in a queue and are submitted together, allowing the provider to schedule processing during off-peak capacity. This is why the cost reduction is possible without any loss in extraction quality."
|
|
4475
|
+
},
|
|
4476
|
+
{
|
|
4477
|
+
type: "list",
|
|
4478
|
+
ordered: false,
|
|
4479
|
+
items: [
|
|
4480
|
+
"**50% cost reduction** on all Claude extraction calls in Stage 2.",
|
|
4481
|
+
"**48-hour delivery window** \u2014 most batches complete well within this timeframe.",
|
|
4482
|
+
"**No quality difference** \u2014 the same extraction model and prompts are used as in real-time mode.",
|
|
4483
|
+
"**Immediate visibility** \u2014 documents appear in your library right after Stage 1 (OCR + classification).",
|
|
4484
|
+
"**Automatic result application** \u2014 when the batch completes, results are applied and documents transition to their final status."
|
|
4485
|
+
]
|
|
4148
4486
|
}
|
|
4149
4487
|
],
|
|
4150
4488
|
related: [
|
|
@@ -4160,9 +4498,17 @@ var sections15 = [
|
|
|
4160
4498
|
{
|
|
4161
4499
|
question: "When should I use batch mode?",
|
|
4162
4500
|
answer: "Batch mode is ideal for large backlog ingestion where real-time results are not required. It cuts extraction cost in half compared to real-time processing."
|
|
4501
|
+
},
|
|
4502
|
+
{
|
|
4503
|
+
question: "Is there a minimum number of documents for batch processing?",
|
|
4504
|
+
answer: "The batch system requires a minimum of 100 items per batch (a Bedrock requirement). If fewer documents are uploaded in batch mode, the system falls back to real-time processing with a warning."
|
|
4505
|
+
},
|
|
4506
|
+
{
|
|
4507
|
+
question: "Does batch mode affect extraction quality?",
|
|
4508
|
+
answer: "No. Batch mode uses the same Claude extraction model and prompts as real-time processing. The only difference is timing \u2014 extraction is deferred to take advantage of provider off-peak pricing."
|
|
4163
4509
|
}
|
|
4164
4510
|
],
|
|
4165
|
-
mentions: ["batch inference", "50% cost", "48-hour delivery", "backlog ingestion"]
|
|
4511
|
+
mentions: ["batch inference", "50% cost", "48-hour delivery", "backlog ingestion", "Message Batches API"]
|
|
4166
4512
|
},
|
|
4167
4513
|
{
|
|
4168
4514
|
slug: "batch-processing",
|
|
@@ -4175,6 +4521,10 @@ var sections15 = [
|
|
|
4175
4521
|
type: "paragraph",
|
|
4176
4522
|
text: 'Set `processing_mode=batch` on upload (API) or toggle the "Batch" switch in the upload UI. Stage 1 (OCR + classification) runs immediately so documents appear in your library right away. Stage 2 (Claude extraction) is deferred to the provider\'s batch API for asynchronous processing.'
|
|
4177
4523
|
},
|
|
4524
|
+
{
|
|
4525
|
+
type: "paragraph",
|
|
4526
|
+
text: "The two-stage architecture means you get immediate feedback on what was uploaded. Documents are OCR'd, classified by type, and triaged within seconds. Only the AI extraction step \u2014 where Claude reads the document and fills structured fields \u2014 is deferred to the batch queue for cost savings."
|
|
4527
|
+
},
|
|
4178
4528
|
{
|
|
4179
4529
|
type: "param-table",
|
|
4180
4530
|
title: "Batch stages",
|
|
@@ -4190,6 +4540,15 @@ var sections15 = [
|
|
|
4190
4540
|
description: "Claude extraction is queued for batch processing. Items accumulate, then submit to the batch API on a timer or threshold."
|
|
4191
4541
|
}
|
|
4192
4542
|
]
|
|
4543
|
+
},
|
|
4544
|
+
{
|
|
4545
|
+
type: "callout",
|
|
4546
|
+
variant: "warning",
|
|
4547
|
+
text: "Image-only documents (PNG, JPG) cannot be batched because the batch payload is text-only. These are automatically routed to real-time processing even when batch mode is enabled."
|
|
4548
|
+
},
|
|
4549
|
+
{
|
|
4550
|
+
type: "paragraph",
|
|
4551
|
+
text: "While waiting for batch results, documents show a status of `batch_queued`. Once the provider returns results, the platform applies them through the same post-processing pipeline as real-time extraction \u2014 including markdown pre-processing, field parsing, quality metrics, and extraction metadata computation."
|
|
4193
4552
|
}
|
|
4194
4553
|
],
|
|
4195
4554
|
related: [
|
|
@@ -4205,6 +4564,14 @@ var sections15 = [
|
|
|
4205
4564
|
{
|
|
4206
4565
|
question: "What runs immediately in batch mode?",
|
|
4207
4566
|
answer: "Stage 1 (OCR, classification, and triage) runs in real-time. Only Stage 2 (Claude extraction) is deferred to the batch API for cost savings."
|
|
4567
|
+
},
|
|
4568
|
+
{
|
|
4569
|
+
question: "What happens if a batch extraction fails to parse?",
|
|
4570
|
+
answer: "Parse failures in batch mode are retried through the real-time extraction path \u2014 never as a new batch. This ensures the original 48-hour SLA is maintained while still recovering from transient issues."
|
|
4571
|
+
},
|
|
4572
|
+
{
|
|
4573
|
+
question: "Can I enable batch mode per source?",
|
|
4574
|
+
answer: "Yes. Each source connection has a batch processing toggle. When enabled, all documents ingested through that source are automatically processed in batch mode."
|
|
4208
4575
|
}
|
|
4209
4576
|
],
|
|
4210
4577
|
mentions: [
|
|
@@ -4212,7 +4579,8 @@ var sections15 = [
|
|
|
4212
4579
|
"processing_mode",
|
|
4213
4580
|
"Stage 1",
|
|
4214
4581
|
"Stage 2",
|
|
4215
|
-
"deferred extraction"
|
|
4582
|
+
"deferred extraction",
|
|
4583
|
+
"batch_queued"
|
|
4216
4584
|
]
|
|
4217
4585
|
},
|
|
4218
4586
|
{
|
|
@@ -4226,6 +4594,10 @@ var sections15 = [
|
|
|
4226
4594
|
type: "paragraph",
|
|
4227
4595
|
text: "The Batches page at `/sources/batches` shows the status of all batch jobs. Each batch progresses through three states: **accumulating** (items collecting), **submitted** (sent to provider), and **completed** (results applied). The page live-syncs with the provider for real-time status updates."
|
|
4228
4596
|
},
|
|
4597
|
+
{
|
|
4598
|
+
type: "paragraph",
|
|
4599
|
+
text: "Batches are submitted automatically when the accumulation timer fires (every 15 minutes by default) or when the item count threshold is reached. Once submitted, the platform polls the provider hourly to check for completion. When results arrive, they are applied to the corresponding documents and the batch transitions to **completed** status."
|
|
4600
|
+
},
|
|
4229
4601
|
{
|
|
4230
4602
|
type: "param-table",
|
|
4231
4603
|
title: "Batch statuses",
|
|
@@ -4246,6 +4618,11 @@ var sections15 = [
|
|
|
4246
4618
|
description: "All results have been received and applied to the corresponding documents."
|
|
4247
4619
|
}
|
|
4248
4620
|
]
|
|
4621
|
+
},
|
|
4622
|
+
{
|
|
4623
|
+
type: "callout",
|
|
4624
|
+
variant: "info",
|
|
4625
|
+
text: 'If a batch gets stuck in "processing" due to an unexpected interruption, the platform automatically recovers it on startup. Batches stuck for more than 15 minutes are reverted to "submitted" so the next poll cycle retries them.'
|
|
4249
4626
|
}
|
|
4250
4627
|
],
|
|
4251
4628
|
related: [
|
|
@@ -4261,6 +4638,14 @@ var sections15 = [
|
|
|
4261
4638
|
{
|
|
4262
4639
|
question: "What are the batch statuses?",
|
|
4263
4640
|
answer: "Three statuses: Accumulating (items collecting), Submitted (sent to provider, polled hourly), and Completed (results received and applied)."
|
|
4641
|
+
},
|
|
4642
|
+
{
|
|
4643
|
+
question: "How often are batches submitted to the provider?",
|
|
4644
|
+
answer: "Batches are submitted on a 15-minute timer or when the item count threshold is reached, whichever comes first. These intervals are configurable in the pipeline settings."
|
|
4645
|
+
},
|
|
4646
|
+
{
|
|
4647
|
+
question: "What happens if a batch gets stuck?",
|
|
4648
|
+
answer: 'The platform includes crash recovery logic. Batches stuck in "processing" for more than 15 minutes are automatically reverted to "submitted" so the next poll cycle retries them. No manual intervention is needed.'
|
|
4264
4649
|
}
|
|
4265
4650
|
],
|
|
4266
4651
|
mentions: [
|
|
@@ -4269,7 +4654,8 @@ var sections15 = [
|
|
|
4269
4654
|
"submitted",
|
|
4270
4655
|
"completed",
|
|
4271
4656
|
"live-sync",
|
|
4272
|
-
"provider polling"
|
|
4657
|
+
"provider polling",
|
|
4658
|
+
"crash recovery"
|
|
4273
4659
|
]
|
|
4274
4660
|
}
|
|
4275
4661
|
];
|
|
@@ -4286,6 +4672,25 @@ var sections16 = [
|
|
|
4286
4672
|
{
|
|
4287
4673
|
type: "paragraph",
|
|
4288
4674
|
text: "Upload CSV or Excel files as lookup tables. These reference datasets are used by the matching engine and by reference strategies in schemas. Each reference dataset is versioned and can be shared across multiple schemas."
|
|
4675
|
+
},
|
|
4676
|
+
{
|
|
4677
|
+
type: "paragraph",
|
|
4678
|
+
text: 'Reference data is the foundation of the matching system. It represents your "ground truth" \u2014 the known records you want to match extracted document data against. Common examples include customer lists, product catalogs, vendor registries, and contract databases.'
|
|
4679
|
+
},
|
|
4680
|
+
{
|
|
4681
|
+
type: "callout",
|
|
4682
|
+
variant: "info",
|
|
4683
|
+
text: "You can also import reference data directly from a SQL database connection. The import runs asynchronously \u2014 rows are streamed in batches of 500 and column headers appear immediately so you can preview the structure while the import runs."
|
|
4684
|
+
},
|
|
4685
|
+
{
|
|
4686
|
+
type: "list",
|
|
4687
|
+
ordered: false,
|
|
4688
|
+
items: [
|
|
4689
|
+
"**CSV and Excel (XLSX)** file uploads for quick one-time imports.",
|
|
4690
|
+
"**SQL database imports** for live reference data from connected sources.",
|
|
4691
|
+
"**Versioning** \u2014 each dataset tracks versions independently.",
|
|
4692
|
+
"**Cross-schema sharing** \u2014 one dataset can be referenced by multiple schemas and matching configurations."
|
|
4693
|
+
]
|
|
4289
4694
|
}
|
|
4290
4695
|
],
|
|
4291
4696
|
related: [
|
|
@@ -4301,6 +4706,14 @@ var sections16 = [
|
|
|
4301
4706
|
{
|
|
4302
4707
|
question: "How is reference data used?",
|
|
4303
4708
|
answer: "Reference datasets are used by the matching engine for field-to-field comparisons and by reference strategies in schemas for code mapping and value resolution."
|
|
4709
|
+
},
|
|
4710
|
+
{
|
|
4711
|
+
question: "Can I import reference data from a database?",
|
|
4712
|
+
answer: "Yes. Use the SQL import option to stream rows from a connected SQL database (MSSQL or PostgreSQL). The import runs asynchronously and you can monitor progress while it loads."
|
|
4713
|
+
},
|
|
4714
|
+
{
|
|
4715
|
+
question: "What happens if I delete a source connection that was used for a SQL import?",
|
|
4716
|
+
answer: 'The reference data remains intact. Deleting a source connection does not cascade to reference datasets \u2014 the UI shows a "source disconnected" indicator, but the imported data continues to work for matching.'
|
|
4304
4717
|
}
|
|
4305
4718
|
],
|
|
4306
4719
|
mentions: [
|
|
@@ -4308,7 +4721,8 @@ var sections16 = [
|
|
|
4308
4721
|
"CSV upload",
|
|
4309
4722
|
"Excel upload",
|
|
4310
4723
|
"versioned datasets",
|
|
4311
|
-
"matching engine"
|
|
4724
|
+
"matching engine",
|
|
4725
|
+
"SQL import"
|
|
4312
4726
|
]
|
|
4313
4727
|
},
|
|
4314
4728
|
{
|
|
@@ -4351,6 +4765,15 @@ var sections16 = [
|
|
|
4351
4765
|
{
|
|
4352
4766
|
type: "paragraph",
|
|
4353
4767
|
text: "You can also use **AI strategy generation** to let the platform suggest field mappings and strategies automatically based on the schema and reference data structure."
|
|
4768
|
+
},
|
|
4769
|
+
{
|
|
4770
|
+
type: "paragraph",
|
|
4771
|
+
text: "Each field comparison carries a **weight** that determines how much it contributes to the overall confidence score. Set high weights on fields that are strong identifiers (like reference numbers or unique IDs) and lower weights on fields that are common or prone to variation (like names or descriptions). The weighted aggregate produces a final score between 0% and 100%."
|
|
4772
|
+
},
|
|
4773
|
+
{
|
|
4774
|
+
type: "callout",
|
|
4775
|
+
variant: "info",
|
|
4776
|
+
text: "Use **AI strategy generation** when setting up matching for the first time. The platform analyzes your schema fields and reference data columns, then suggests which fields to compare and which strategy to use for each. You can review and adjust the suggestions before saving."
|
|
4354
4777
|
}
|
|
4355
4778
|
],
|
|
4356
4779
|
related: [
|
|
@@ -4366,6 +4789,14 @@ var sections16 = [
|
|
|
4366
4789
|
{
|
|
4367
4790
|
question: "Can Talonic suggest matching configurations?",
|
|
4368
4791
|
answer: "Yes. AI strategy generation can suggest field mappings and strategies automatically based on the schema and reference data structure."
|
|
4792
|
+
},
|
|
4793
|
+
{
|
|
4794
|
+
question: "How do weights affect matching scores?",
|
|
4795
|
+
answer: "Each field comparison carries a weight that determines its contribution to the overall confidence score. Fields with higher weights have more influence on the final score. The weighted aggregate produces a score between 0% and 100%."
|
|
4796
|
+
},
|
|
4797
|
+
{
|
|
4798
|
+
question: "What is the difference between fuzzy and exact matching?",
|
|
4799
|
+
answer: "Exact matching requires an identical string (case-insensitive). Fuzzy matching uses token-based comparison with a configurable similarity threshold, making it suitable for fields with minor variations like misspellings, abbreviations, or word reordering."
|
|
4369
4800
|
}
|
|
4370
4801
|
],
|
|
4371
4802
|
mentions: [
|
|
@@ -4374,7 +4805,8 @@ var sections16 = [
|
|
|
4374
4805
|
"exact match",
|
|
4375
4806
|
"fuzzy matching",
|
|
4376
4807
|
"date_range",
|
|
4377
|
-
"numeric_range"
|
|
4808
|
+
"numeric_range",
|
|
4809
|
+
"AI strategy generation"
|
|
4378
4810
|
]
|
|
4379
4811
|
},
|
|
4380
4812
|
{
|
|
@@ -4387,6 +4819,26 @@ var sections16 = [
|
|
|
4387
4819
|
{
|
|
4388
4820
|
type: "paragraph",
|
|
4389
4821
|
text: "Execute a matching run against a reference dataset. Matching runs are processed asynchronously via BullMQ. You can monitor progress from the matching page and cancel running jobs if needed."
|
|
4822
|
+
},
|
|
4823
|
+
{
|
|
4824
|
+
type: "paragraph",
|
|
4825
|
+
text: "There are two types of runs: **manual runs** use only the deterministic matching strategies (exact, fuzzy, date_range, numeric_range) and complete quickly. **Smart runs** add an AI resolution pass \u2014 after the initial matching, an embedding-based search with a Haiku LLM resolver attempts to improve low-confidence results."
|
|
4826
|
+
},
|
|
4827
|
+
{
|
|
4828
|
+
type: "list",
|
|
4829
|
+
ordered: true,
|
|
4830
|
+
items: [
|
|
4831
|
+
"Navigate to the **Matching** page and select a matching configuration.",
|
|
4832
|
+
"Click **Run** for a standard match or **Smart Run** for AI-enhanced matching.",
|
|
4833
|
+
"Monitor progress in real-time on the matching page.",
|
|
4834
|
+
"Cancel the run at any time if needed \u2014 partial results are preserved.",
|
|
4835
|
+
"Review results when the run completes."
|
|
4836
|
+
]
|
|
4837
|
+
},
|
|
4838
|
+
{
|
|
4839
|
+
type: "callout",
|
|
4840
|
+
variant: "info",
|
|
4841
|
+
text: "Smart runs take longer but can significantly improve match quality for ambiguous data. The AI resolver uses document embeddings and a Haiku LLM to evaluate low-confidence candidates that the deterministic strategies could not resolve."
|
|
4390
4842
|
}
|
|
4391
4843
|
],
|
|
4392
4844
|
related: [
|
|
@@ -4402,9 +4854,17 @@ var sections16 = [
|
|
|
4402
4854
|
{
|
|
4403
4855
|
question: "Are matching runs synchronous or asynchronous?",
|
|
4404
4856
|
answer: "Matching runs are processed asynchronously via a job queue. You can monitor progress from the matching page in real-time."
|
|
4857
|
+
},
|
|
4858
|
+
{
|
|
4859
|
+
question: "What is the difference between a manual run and a smart run?",
|
|
4860
|
+
answer: "A manual run uses only deterministic strategies (exact, fuzzy, date_range, numeric_range). A smart run adds an AI resolution pass using embeddings and a Haiku LLM to improve low-confidence results."
|
|
4861
|
+
},
|
|
4862
|
+
{
|
|
4863
|
+
question: "Can I cancel a matching run in progress?",
|
|
4864
|
+
answer: "Yes. You can cancel a running match job from the matching page. Partial results from documents already processed are preserved and available for review."
|
|
4405
4865
|
}
|
|
4406
4866
|
],
|
|
4407
|
-
mentions: ["matching runs", "async execution", "BullMQ", "progress monitoring"]
|
|
4867
|
+
mentions: ["matching runs", "async execution", "BullMQ", "progress monitoring", "smart run", "AI resolution"]
|
|
4408
4868
|
},
|
|
4409
4869
|
{
|
|
4410
4870
|
slug: "matching-results",
|
|
@@ -4417,6 +4877,10 @@ var sections16 = [
|
|
|
4417
4877
|
type: "paragraph",
|
|
4418
4878
|
text: "Results are presented per document with the top 5 match candidates. Each candidate includes a confidence score and field-level evidence showing which comparisons contributed to the match and how each field scored."
|
|
4419
4879
|
},
|
|
4880
|
+
{
|
|
4881
|
+
type: "paragraph",
|
|
4882
|
+
text: "The evidence view is designed to make match decisions transparent. For each candidate, you can see exactly which fields matched, what strategy was used, the individual field score, and the actual values that were compared. This makes it straightforward to verify correct matches and investigate false positives."
|
|
4883
|
+
},
|
|
4420
4884
|
{
|
|
4421
4885
|
type: "param-table",
|
|
4422
4886
|
title: "Result fields",
|
|
@@ -4437,6 +4901,11 @@ var sections16 = [
|
|
|
4437
4901
|
description: "The five highest-scoring reference records for each document."
|
|
4438
4902
|
}
|
|
4439
4903
|
]
|
|
4904
|
+
},
|
|
4905
|
+
{
|
|
4906
|
+
type: "callout",
|
|
4907
|
+
variant: "info",
|
|
4908
|
+
text: "You can **approve or reject** individual match results. Approved matches can be used downstream in delivery pipelines. Rejected matches are excluded from future consideration for that document."
|
|
4440
4909
|
}
|
|
4441
4910
|
],
|
|
4442
4911
|
related: [
|
|
@@ -4452,6 +4921,14 @@ var sections16 = [
|
|
|
4452
4921
|
{
|
|
4453
4922
|
question: "What evidence is provided for each match?",
|
|
4454
4923
|
answer: "Per-field evidence shows the strategy used (exact, fuzzy, date_range, numeric_range), the individual score, and the matched values for both the extracted data and the reference record."
|
|
4924
|
+
},
|
|
4925
|
+
{
|
|
4926
|
+
question: "Can I approve or reject match results?",
|
|
4927
|
+
answer: "Yes. Each match result can be individually approved or rejected. Approved matches flow through to downstream delivery pipelines, while rejected matches are excluded from future consideration for that document."
|
|
4928
|
+
},
|
|
4929
|
+
{
|
|
4930
|
+
question: "Why does a match have a low confidence score?",
|
|
4931
|
+
answer: "Low confidence usually means the fields being compared have significant differences or the matching strategies produced weak scores. Check the per-field evidence to identify which comparisons dragged the score down, then consider adjusting weights or strategies in the matching configuration."
|
|
4455
4932
|
}
|
|
4456
4933
|
],
|
|
4457
4934
|
mentions: [
|
|
@@ -4459,7 +4936,9 @@ var sections16 = [
|
|
|
4459
4936
|
"top 5 candidates",
|
|
4460
4937
|
"confidence score",
|
|
4461
4938
|
"field-level evidence",
|
|
4462
|
-
"weighted aggregate"
|
|
4939
|
+
"weighted aggregate",
|
|
4940
|
+
"approve",
|
|
4941
|
+
"reject"
|
|
4463
4942
|
]
|
|
4464
4943
|
}
|
|
4465
4944
|
];
|
|
@@ -5331,6 +5810,8 @@ var sections19 = [
|
|
|
5331
5810
|
description: "List all uploaded and processed documents with filtering by source, status, date range, and full-text search with pagination support.",
|
|
5332
5811
|
content: [
|
|
5333
5812
|
{ type: "paragraph", text: "Documents represent files that have been uploaded and processed. Each document retains its original file, extracted text, and metadata." },
|
|
5813
|
+
{ type: "paragraph", text: "Use query parameters to filter by source connection, processing status, date range, or full-text search across filenames and extracted content. Results are paginated with cursor-based navigation." },
|
|
5814
|
+
{ type: "callout", variant: "info", text: "Documents include **triage metadata** (sensitivity, department, jurisdiction, PII detection) when available. Triage is populated automatically during ingestion via Document AI annotations or Haiku LLM calls." },
|
|
5334
5815
|
{
|
|
5335
5816
|
type: "endpoint",
|
|
5336
5817
|
method: "GET",
|
|
@@ -5443,9 +5924,11 @@ var sections19 = [
|
|
|
5443
5924
|
{ label: "List Extractions", slug: "list-extractions" }
|
|
5444
5925
|
],
|
|
5445
5926
|
faq: [
|
|
5446
|
-
{ question: "How do I list documents in Talonic?", answer: "Send a GET request to /v1/documents with optional filters for source, status, date range, and search." }
|
|
5927
|
+
{ question: "How do I list documents in Talonic?", answer: "Send a GET request to /v1/documents with optional filters for source, status, date range, and search." },
|
|
5928
|
+
{ question: "What document statuses are possible?", answer: "Documents progress through `pending` (uploaded, awaiting processing), `processing` (OCR and extraction in progress), `completed` (ready for use), or `error` (processing failed). Batch-mode documents may also show `batch_queued`." },
|
|
5929
|
+
{ question: "How does full-text search work?", answer: "The `search` parameter matches against the document filename and extracted text content. It uses full-text search, so partial word matches are supported." }
|
|
5447
5930
|
],
|
|
5448
|
-
mentions: ["documents", "pagination", "filtering"]
|
|
5931
|
+
mentions: ["documents", "pagination", "filtering", "triage", "document status"]
|
|
5449
5932
|
},
|
|
5450
5933
|
{
|
|
5451
5934
|
slug: "get-document",
|
|
@@ -5454,6 +5937,7 @@ var sections19 = [
|
|
|
5454
5937
|
seoTitle: "Get Document Endpoint \u2014 Talonic Docs",
|
|
5455
5938
|
description: "Retrieve a single document by ID with full metadata including file size, extracted text length, extraction count, and timestamps.",
|
|
5456
5939
|
content: [
|
|
5940
|
+
{ type: "paragraph", text: "Retrieve full metadata for a single document, including processing status, detected type and language, triage information, and links to related resources. The response includes a `dashboard` link for viewing the document in the Talonic platform UI." },
|
|
5457
5941
|
{
|
|
5458
5942
|
type: "endpoint",
|
|
5459
5943
|
method: "GET",
|
|
@@ -5535,8 +6019,11 @@ var sections19 = [
|
|
|
5535
6019
|
{ label: "List Documents", slug: "list-documents" },
|
|
5536
6020
|
{ label: "Delete Document", slug: "delete-document" }
|
|
5537
6021
|
],
|
|
5538
|
-
faq: [
|
|
5539
|
-
|
|
6022
|
+
faq: [
|
|
6023
|
+
{ question: "How do I get the extracted text for a document?", answer: "Use the `GET /v1/documents/:id/markdown` endpoint to retrieve the OCR markdown output. For structured extraction results, use the extractions endpoint linked in the response." },
|
|
6024
|
+
{ question: "What does extraction_count indicate?", answer: "The number of times extraction has been performed on this document. Typically 0 (not yet extracted) or 1 (extracted). Re-extraction increments this count." }
|
|
6025
|
+
],
|
|
6026
|
+
mentions: ["document metadata", "document detail", "triage"]
|
|
5540
6027
|
},
|
|
5541
6028
|
{
|
|
5542
6029
|
slug: "delete-document",
|
|
@@ -5545,6 +6032,8 @@ var sections19 = [
|
|
|
5545
6032
|
seoTitle: "Delete Document Endpoint \u2014 Talonic Docs",
|
|
5546
6033
|
description: "Permanently delete a document and all associated extractions. This action is irreversible and removes the original file and all extraction results.",
|
|
5547
6034
|
content: [
|
|
6035
|
+
{ type: "paragraph", text: "Permanently delete a document along with its original file, extracted text, and all associated extraction results. This operation cannot be undone." },
|
|
6036
|
+
{ type: "callout", variant: "warning", text: "Deletion is **irreversible**. The original file, OCR output, and all extraction results are permanently removed. If the document is part of a case or entity group, its links are also removed." },
|
|
5548
6037
|
{
|
|
5549
6038
|
type: "endpoint",
|
|
5550
6039
|
method: "DELETE",
|
|
@@ -5584,8 +6073,11 @@ var sections19 = [
|
|
|
5584
6073
|
related: [
|
|
5585
6074
|
{ label: "Get Document", slug: "get-document" }
|
|
5586
6075
|
],
|
|
5587
|
-
faq: [
|
|
5588
|
-
|
|
6076
|
+
faq: [
|
|
6077
|
+
{ question: "Can I recover a deleted document?", answer: "No. Document deletion is permanent and irreversible. There is no soft-delete or trash mechanism. Ensure you have a backup before deleting." },
|
|
6078
|
+
{ question: "What happens to linking and cases when a document is deleted?", answer: "Entity links referencing the deleted document are removed. If the document was part of a case, the case may be recomputed with the remaining documents during the next backfill." }
|
|
6079
|
+
],
|
|
6080
|
+
mentions: ["delete", "irreversible", "permanent deletion"]
|
|
5589
6081
|
}
|
|
5590
6082
|
];
|
|
5591
6083
|
|
|
@@ -5599,6 +6091,8 @@ var sections20 = [
|
|
|
5599
6091
|
description: "List extraction results with optional filters by document, schema, and status. Supports pagination for browsing large extraction sets.",
|
|
5600
6092
|
content: [
|
|
5601
6093
|
{ type: "paragraph", text: "An extraction is the result of applying a schema to a document. A single document can have multiple extractions if different schemas are applied to it." },
|
|
6094
|
+
{ type: "paragraph", text: "Use this endpoint to browse extraction results across your organization. Filter by **document**, **status**, or **time range** to find specific results. Each extraction summary includes an overall confidence score and links to the full result." },
|
|
6095
|
+
{ type: "callout", variant: "info", text: "Extractions are returned in descending order by `created_at` by default. Use the `after` and `before` parameters to narrow results to a specific time window." },
|
|
5602
6096
|
{
|
|
5603
6097
|
type: "endpoint",
|
|
5604
6098
|
method: "GET",
|
|
@@ -5679,8 +6173,12 @@ var sections20 = [
|
|
|
5679
6173
|
{ label: "Get Extraction", slug: "get-extraction" },
|
|
5680
6174
|
{ label: "POST /v1/extract", slug: "post-extract" }
|
|
5681
6175
|
],
|
|
5682
|
-
faq: [
|
|
5683
|
-
|
|
6176
|
+
faq: [
|
|
6177
|
+
{ question: "Can a single document have multiple extractions?", answer: "Yes. A document can have multiple extractions if different schemas are applied to it, or if it is re-extracted with updated settings." },
|
|
6178
|
+
{ question: "What does the confidence_overall score represent?", answer: "It is the average confidence across all extracted fields (0 to 1). A score of 0.94 means the extraction engine is highly confident in the accuracy of the results." },
|
|
6179
|
+
{ question: "How do I filter extractions by date range?", answer: "Use the `after` and `before` query parameters with ISO 8601 datetime strings to retrieve extractions within a specific time window." }
|
|
6180
|
+
],
|
|
6181
|
+
mentions: ["extractions", "pagination", "confidence score"]
|
|
5684
6182
|
},
|
|
5685
6183
|
{
|
|
5686
6184
|
slug: "get-extraction",
|
|
@@ -5689,6 +6187,8 @@ var sections20 = [
|
|
|
5689
6187
|
seoTitle: "Get Extraction Endpoint \u2014 Talonic Docs",
|
|
5690
6188
|
description: "Retrieve the full extraction result by ID including extracted data, per-field confidence scores, and document metadata.",
|
|
5691
6189
|
content: [
|
|
6190
|
+
{ type: "paragraph", text: "Retrieve the complete extraction result for a single document, including all **extracted field values**, **per-field confidence scores**, and **processing metadata**. This is the primary endpoint for consuming structured data produced by the extraction pipeline." },
|
|
6191
|
+
{ type: "paragraph", text: "The response includes the source document summary, a `data` object with field values, and a `confidence` object with both an overall score and individual scores for each field. Fields that have been manually corrected appear in the `locked_fields` array." },
|
|
5692
6192
|
{
|
|
5693
6193
|
type: "endpoint",
|
|
5694
6194
|
method: "GET",
|
|
@@ -5769,8 +6269,11 @@ var sections20 = [
|
|
|
5769
6269
|
{ label: "List Extractions", slug: "list-extractions" },
|
|
5770
6270
|
{ label: "Get Extraction Data", slug: "get-extraction-fields" }
|
|
5771
6271
|
],
|
|
5772
|
-
faq: [
|
|
5773
|
-
|
|
6272
|
+
faq: [
|
|
6273
|
+
{ question: "What does a locked_fields entry mean?", answer: "A locked field has been manually corrected via the Correct Fields endpoint. Locked fields always have a confidence of 1.0 and will not be overwritten by re-extraction." },
|
|
6274
|
+
{ question: "What processing metadata is available?", answer: "The `processing` object includes `duration_ms` (total extraction time), `pages_processed` (number of document pages analyzed), and `region` (the datacenter region that handled the request)." }
|
|
6275
|
+
],
|
|
6276
|
+
mentions: ["extraction result", "confidence scores", "locked fields", "processing metadata"]
|
|
5774
6277
|
},
|
|
5775
6278
|
{
|
|
5776
6279
|
slug: "get-extraction-fields",
|
|
@@ -5779,6 +6282,8 @@ var sections20 = [
|
|
|
5779
6282
|
seoTitle: "Get Extraction Data Endpoint \u2014 Talonic Docs",
|
|
5780
6283
|
description: "Retrieve just the extracted data from an extraction result without metadata. Supports CSV export format for download.",
|
|
5781
6284
|
content: [
|
|
6285
|
+
{ type: "paragraph", text: "Retrieve only the extracted field values from an extraction, without metadata, confidence scores, or processing details. This is the lightest-weight endpoint for consuming extraction output and is ideal for downstream integrations that only need the structured data." },
|
|
6286
|
+
{ type: "callout", variant: "info", text: "Use `?format=csv` to download the data as a CSV file. The response Content-Type changes to `text/csv` and includes a `Content-Disposition` header for browser downloads." },
|
|
5782
6287
|
{
|
|
5783
6288
|
type: "endpoint",
|
|
5784
6289
|
method: "GET",
|
|
@@ -5825,10 +6330,14 @@ var sections20 = [
|
|
|
5825
6330
|
}
|
|
5826
6331
|
],
|
|
5827
6332
|
related: [
|
|
5828
|
-
{ label: "Get Extraction", slug: "get-extraction" }
|
|
6333
|
+
{ label: "Get Extraction", slug: "get-extraction" },
|
|
6334
|
+
{ label: "Correct Fields", slug: "get-extraction-markdown" }
|
|
5829
6335
|
],
|
|
5830
|
-
faq: [
|
|
5831
|
-
|
|
6336
|
+
faq: [
|
|
6337
|
+
{ question: "What is the difference between this endpoint and GET /v1/extractions/:id?", answer: "This endpoint returns only the extracted key-value data. The full extraction endpoint also includes confidence scores, processing metadata, document details, and locked fields." },
|
|
6338
|
+
{ question: "Does the CSV export include column headers?", answer: "Yes. The CSV format includes field names as the header row and extracted values as the data row." }
|
|
6339
|
+
],
|
|
6340
|
+
mentions: ["CSV export", "extraction data", "data download"]
|
|
5832
6341
|
},
|
|
5833
6342
|
{
|
|
5834
6343
|
slug: "get-extraction-markdown",
|
|
@@ -5837,6 +6346,9 @@ var sections20 = [
|
|
|
5837
6346
|
seoTitle: "Correct Extraction Fields \u2014 Talonic Docs",
|
|
5838
6347
|
description: "Submit corrections for specific fields in an extraction result. Corrections are logged and can be propagated to similar extractions.",
|
|
5839
6348
|
content: [
|
|
6349
|
+
{ type: "paragraph", text: "Submit corrections to specific fields in a completed extraction. Corrected fields are **locked** at confidence 1.0 and will not be overwritten by future re-extractions. Use this to fix extraction errors and optionally propagate corrections to similar documents." },
|
|
6350
|
+
{ type: "paragraph", text: "Each correction includes the field name, new value, and an optional reason. The `propagate` parameter controls whether corrections apply only to this document or are applied to all similar extractions." },
|
|
6351
|
+
{ type: "callout", variant: "warning", text: "Corrected fields are locked and cannot be overridden by re-extraction. To unlock a field, submit a new correction or contact support." },
|
|
5840
6352
|
{
|
|
5841
6353
|
type: "endpoint",
|
|
5842
6354
|
method: "PATCH",
|
|
@@ -5942,10 +6454,15 @@ var sections20 = [
|
|
|
5942
6454
|
}
|
|
5943
6455
|
],
|
|
5944
6456
|
related: [
|
|
5945
|
-
{ label: "Get Extraction", slug: "get-extraction" }
|
|
6457
|
+
{ label: "Get Extraction", slug: "get-extraction" },
|
|
6458
|
+
{ label: "Get Extraction Data", slug: "get-extraction-fields" }
|
|
5946
6459
|
],
|
|
5947
|
-
faq: [
|
|
5948
|
-
|
|
6460
|
+
faq: [
|
|
6461
|
+
{ question: "What does propagate: all_similar do?", answer: "When set to `all_similar`, the correction is applied to all extractions from documents with the same detected type and matching field values. Use this to fix systematic extraction errors across a batch." },
|
|
6462
|
+
{ question: "Can I correct multiple fields in a single request?", answer: "Yes. The `corrections` array accepts multiple entries. Each correction is applied atomically in the same request." },
|
|
6463
|
+
{ question: "What happens to the confidence score of a corrected field?", answer: "Corrected fields are automatically set to confidence 1.0 and added to the `locked_fields` array. The overall confidence score is recalculated to reflect the correction." }
|
|
6464
|
+
],
|
|
6465
|
+
mentions: ["field corrections", "propagation", "locked fields", "confidence override"]
|
|
5949
6466
|
}
|
|
5950
6467
|
];
|
|
5951
6468
|
|
|
@@ -5959,6 +6476,8 @@ var sections21 = [
|
|
|
5959
6476
|
description: "List all saved schemas with field counts and extraction counts. Schemas define the structure you want to extract from documents.",
|
|
5960
6477
|
content: [
|
|
5961
6478
|
{ type: "paragraph", text: "Schemas define the structure you want to extract from documents. Save schemas to reuse them across extractions and maintain consistency." },
|
|
6479
|
+
{ type: "paragraph", text: "Use this endpoint to browse your organization's schemas, search by name, and paginate through large collections. Each schema in the response includes its **field count**, **version number**, and links to related extractions." },
|
|
6480
|
+
{ type: "callout", variant: "info", text: "Schemas are organization-scoped. You will only see schemas created by your organization, not other tenants." },
|
|
5962
6481
|
{
|
|
5963
6482
|
type: "endpoint",
|
|
5964
6483
|
method: "GET",
|
|
@@ -6052,8 +6571,12 @@ var sections21 = [
|
|
|
6052
6571
|
{ label: "Create Schema", slug: "create-schema" },
|
|
6053
6572
|
{ label: "Get Schema", slug: "get-schema" }
|
|
6054
6573
|
],
|
|
6055
|
-
faq: [
|
|
6056
|
-
|
|
6574
|
+
faq: [
|
|
6575
|
+
{ question: "How do I search for a schema by name?", answer: "Pass the `search` query parameter with a partial name string. The search is case-insensitive and matches any substring of the schema name." },
|
|
6576
|
+
{ question: "What is the maximum number of schemas I can retrieve in one request?", answer: "Set `limit` up to 100 per request. Use the `next_cursor` value from the response to paginate through additional results." },
|
|
6577
|
+
{ question: "Does listing schemas include the full field definitions?", answer: "Yes. Each schema object in the response includes the complete `definition` with all field properties, types, and required markers." }
|
|
6578
|
+
],
|
|
6579
|
+
mentions: ["schemas", "extraction", "pagination", "cursor"]
|
|
6057
6580
|
},
|
|
6058
6581
|
{
|
|
6059
6582
|
slug: "get-schema",
|
|
@@ -6062,6 +6585,8 @@ var sections21 = [
|
|
|
6062
6585
|
seoTitle: "Get Schema Endpoint \u2014 Talonic Docs",
|
|
6063
6586
|
description: "Retrieve a schema by ID including its full definition with all field names, data types, extraction instructions, and configuration options. Returns version history and extraction count.",
|
|
6064
6587
|
content: [
|
|
6588
|
+
{ type: "paragraph", text: "Retrieve the full details of a single schema, including its **JSON Schema definition**, field count, and version number. Use this to inspect a schema before running extractions or to verify that an update was applied correctly." },
|
|
6589
|
+
{ type: "callout", variant: "info", text: "You can pass either a UUID or a `SCH-` prefixed short ID (e.g. `SCH-A1B2C3D4`) as the `:id` path parameter." },
|
|
6065
6590
|
{
|
|
6066
6591
|
type: "endpoint",
|
|
6067
6592
|
method: "GET",
|
|
@@ -6132,8 +6657,11 @@ var sections21 = [
|
|
|
6132
6657
|
{ label: "List Schemas", slug: "list-schemas" },
|
|
6133
6658
|
{ label: "Update Schema", slug: "update-schema" }
|
|
6134
6659
|
],
|
|
6135
|
-
faq: [
|
|
6136
|
-
|
|
6660
|
+
faq: [
|
|
6661
|
+
{ question: "Can I use the short ID instead of the UUID?", answer: "Yes. Both the full UUID and the `SCH-` prefixed short ID (e.g. `SCH-A1B2C3D4`) are accepted as the `:id` path parameter." },
|
|
6662
|
+
{ question: "Does this endpoint return the schema version history?", answer: "The response includes the current `version` number. To see previous versions, compare schemas over time using the `updated_at` timestamp." }
|
|
6663
|
+
],
|
|
6664
|
+
mentions: ["schema definition", "short ID", "UUID"]
|
|
6137
6665
|
},
|
|
6138
6666
|
{
|
|
6139
6667
|
slug: "create-schema",
|
|
@@ -6142,6 +6670,16 @@ var sections21 = [
|
|
|
6142
6670
|
seoTitle: "Create Schema Endpoint \u2014 Talonic Docs",
|
|
6143
6671
|
description: "Create a new schema with a name and definition in any of the three supported formats: JSON Schema, simplified fields, or flat key-type map.",
|
|
6144
6672
|
content: [
|
|
6673
|
+
{ type: "paragraph", text: "Create a new schema to define the fields you want to extract from documents. The schema definition is **normalized** to JSON Schema format on creation, regardless of which input format you use." },
|
|
6674
|
+
{ type: "paragraph", text: "Talonic accepts three definition formats to make schema creation flexible. You can pass a full JSON Schema object, a simplified fields array, or a flat key-type map. All three are converted to the same internal representation." },
|
|
6675
|
+
{
|
|
6676
|
+
type: "list",
|
|
6677
|
+
items: [
|
|
6678
|
+
'**JSON Schema** \u2014 Standard `{ "type": "object", "properties": { ... } }` format with full control over types and required fields.',
|
|
6679
|
+
'**Simplified fields** \u2014 An array of `{ "name": "field_name", "type": "string" }` objects for quick creation.',
|
|
6680
|
+
'**Flat key-type map** \u2014 A simple `{ "field_name": "string", "amount": "number" }` object for the fastest path.'
|
|
6681
|
+
]
|
|
6682
|
+
},
|
|
6145
6683
|
{
|
|
6146
6684
|
type: "endpoint",
|
|
6147
6685
|
method: "POST",
|
|
@@ -6219,8 +6757,12 @@ var sections21 = [
|
|
|
6219
6757
|
{ label: "Schema Formats", slug: "extract-schemas" },
|
|
6220
6758
|
{ label: "List Schemas", slug: "list-schemas" }
|
|
6221
6759
|
],
|
|
6222
|
-
faq: [
|
|
6223
|
-
|
|
6760
|
+
faq: [
|
|
6761
|
+
{ question: "Which schema definition format should I use?", answer: 'Use the flat key-type map (`{ "field": "type" }`) for quick prototyping and JSON Schema for production schemas that need `required` fields or descriptions.' },
|
|
6762
|
+
{ question: "What happens if my definition has no valid fields?", answer: "The API returns a `400 validation_error`. Every schema must produce at least one extractable field after normalization." },
|
|
6763
|
+
{ question: "Is the version always 1 for new schemas?", answer: "Yes. New schemas always start at version 1. The version increments when you update the schema definition via `PUT /v1/schemas/:id`." }
|
|
6764
|
+
],
|
|
6765
|
+
mentions: ["create schema", "JSON Schema", "definition format"]
|
|
6224
6766
|
},
|
|
6225
6767
|
{
|
|
6226
6768
|
slug: "update-schema",
|
|
@@ -6229,6 +6771,8 @@ var sections21 = [
|
|
|
6229
6771
|
seoTitle: "Update Schema Endpoint \u2014 Talonic Docs",
|
|
6230
6772
|
description: "Replace a schema definition, creating a new version internally. Existing extractions retain their original schema version.",
|
|
6231
6773
|
content: [
|
|
6774
|
+
{ type: "paragraph", text: "Replace a schema's definition, name, or description. Each update creates a **new version** internally, so you can track how a schema evolves over time. Existing extractions are not affected by schema updates \u2014 they retain the schema version that was active when they were created." },
|
|
6775
|
+
{ type: "callout", variant: "warning", text: "Updating a schema definition changes the fields used for **future** extractions. Already-completed extractions continue to reference their original schema version." },
|
|
6232
6776
|
{
|
|
6233
6777
|
type: "endpoint",
|
|
6234
6778
|
method: "PUT",
|
|
@@ -6307,10 +6851,14 @@ var sections21 = [
|
|
|
6307
6851
|
}
|
|
6308
6852
|
],
|
|
6309
6853
|
related: [
|
|
6310
|
-
{ label: "Get Schema", slug: "get-schema" }
|
|
6854
|
+
{ label: "Get Schema", slug: "get-schema" },
|
|
6855
|
+
{ label: "Create Schema", slug: "create-schema" }
|
|
6311
6856
|
],
|
|
6312
|
-
faq: [
|
|
6313
|
-
|
|
6857
|
+
faq: [
|
|
6858
|
+
{ question: "Do existing extractions change when I update a schema?", answer: "No. Existing extractions retain the schema version that was active when they were created. Only new extractions use the updated definition." },
|
|
6859
|
+
{ question: "Can I update just the name without changing the definition?", answer: "Yes. All body parameters are optional. You can send only `name` or only `description` to update those fields without touching the definition." }
|
|
6860
|
+
],
|
|
6861
|
+
mentions: ["schema versioning", "schema update"]
|
|
6314
6862
|
},
|
|
6315
6863
|
{
|
|
6316
6864
|
slug: "delete-schema",
|
|
@@ -6319,6 +6867,8 @@ var sections21 = [
|
|
|
6319
6867
|
seoTitle: "Delete Schema Endpoint \u2014 Talonic Docs",
|
|
6320
6868
|
description: "Delete a schema by ID. Associated extractions are retained and not deleted when the schema is removed.",
|
|
6321
6869
|
content: [
|
|
6870
|
+
{ type: "paragraph", text: "Permanently delete a schema from your organization. This operation is **irreversible**. Associated extractions are retained and remain accessible \u2014 only the schema definition itself is removed." },
|
|
6871
|
+
{ type: "callout", variant: "warning", text: "Deleting a schema does not delete its associated extractions. Extraction results remain available via the extractions API." },
|
|
6322
6872
|
{
|
|
6323
6873
|
type: "endpoint",
|
|
6324
6874
|
method: "DELETE",
|
|
@@ -6356,10 +6906,14 @@ var sections21 = [
|
|
|
6356
6906
|
}
|
|
6357
6907
|
],
|
|
6358
6908
|
related: [
|
|
6359
|
-
{ label: "List Schemas", slug: "list-schemas" }
|
|
6909
|
+
{ label: "List Schemas", slug: "list-schemas" },
|
|
6910
|
+
{ label: "Get Schema", slug: "get-schema" }
|
|
6360
6911
|
],
|
|
6361
|
-
faq: [
|
|
6362
|
-
|
|
6912
|
+
faq: [
|
|
6913
|
+
{ question: "What happens to extractions when I delete a schema?", answer: "Extractions are retained and remain accessible. Only the schema definition is removed. You can still query extraction results via the extractions API." },
|
|
6914
|
+
{ question: "Can I recover a deleted schema?", answer: "No. Schema deletion is permanent. If you need the same structure again, create a new schema with the same definition." }
|
|
6915
|
+
],
|
|
6916
|
+
mentions: ["delete schema", "extraction retention"]
|
|
6363
6917
|
}
|
|
6364
6918
|
];
|
|
6365
6919
|
|
|
@@ -6373,6 +6927,7 @@ var sections22 = [
|
|
|
6373
6927
|
description: "List all extraction jobs with status, progress, and pagination. Filter by status to find queued, processing, completed, failed, or cancelled jobs.",
|
|
6374
6928
|
content: [
|
|
6375
6929
|
{ type: "paragraph", text: "Jobs track asynchronous extraction work. Create a job with a schema and document set, then poll for progress. Each job runs the full 4-phase extraction pipeline." },
|
|
6930
|
+
{ type: "paragraph", text: "The 4-phase pipeline consists of: **Phase 1 (Resolve)** which fills 60-80% of cells using registry transfer, raw extraction mapping, lookup cascades, and deterministic compute; **Phase 2 (Agent)** which uses Claude to extract remaining values; **Phase 3 (Resolve II)** which normalizes LLM-extracted values to canonical codes; and **Phase 4 (Transform)** which applies deterministic transforms, validation, and format constraints." },
|
|
6376
6931
|
{
|
|
6377
6932
|
type: "endpoint",
|
|
6378
6933
|
method: "GET",
|
|
@@ -6472,8 +7027,12 @@ var sections22 = [
|
|
|
6472
7027
|
{ label: "Create Job", slug: "create-job" },
|
|
6473
7028
|
{ label: "Get Job", slug: "get-job" }
|
|
6474
7029
|
],
|
|
6475
|
-
faq: [
|
|
6476
|
-
|
|
7030
|
+
faq: [
|
|
7031
|
+
{ question: "What does fill_rate in grid_stats represent?", answer: "Fill rate is the ratio of filled cells to total cells in the extraction grid (documents x schema fields). A fill rate of 0.94 means 94% of expected field values were successfully extracted." },
|
|
7032
|
+
{ question: "What does current_phase tell me?", answer: "It indicates which pipeline phase the job is currently executing: `phase_1_resolve`, `phase_2_execute`, `phase_3_resolve`, or `phase_4_transform`. It is `null` when the job is complete or has not started." },
|
|
7033
|
+
{ question: "How does cursor-based pagination work?", answer: "Pass the `next_cursor` value from the previous response as the `cursor` query parameter to fetch the next page. When `has_more` is `false`, you have reached the last page." }
|
|
7034
|
+
],
|
|
7035
|
+
mentions: ["jobs", "extraction pipeline", "4-phase pipeline", "grid stats"]
|
|
6477
7036
|
},
|
|
6478
7037
|
{
|
|
6479
7038
|
slug: "create-job",
|
|
@@ -6482,6 +7041,8 @@ var sections22 = [
|
|
|
6482
7041
|
seoTitle: "Create Job Endpoint \u2014 Talonic Docs",
|
|
6483
7042
|
description: "Create and run an extraction job with a schema and optional document set. Returns a job ID for polling progress and retrieving results.",
|
|
6484
7043
|
content: [
|
|
7044
|
+
{ type: "paragraph", text: "Create a new extraction job targeting a specific schema. The job immediately enters `pending` status and begins processing asynchronously. If `document_ids` is omitted, the job processes all completed documents in your organization. Poll the job status endpoint to track progress." },
|
|
7045
|
+
{ type: "callout", variant: "info", text: "Jobs are limited to 2,000 documents per run. If you need to process more, split your document set across multiple jobs." },
|
|
6485
7046
|
{
|
|
6486
7047
|
type: "endpoint",
|
|
6487
7048
|
method: "POST",
|
|
@@ -6539,8 +7100,11 @@ var sections22 = [
|
|
|
6539
7100
|
{ label: "List Jobs", slug: "list-jobs" },
|
|
6540
7101
|
{ label: "Get Job", slug: "get-job" }
|
|
6541
7102
|
],
|
|
6542
|
-
faq: [
|
|
6543
|
-
|
|
7103
|
+
faq: [
|
|
7104
|
+
{ question: "What happens if I omit document_ids?", answer: "The job processes all documents with `completed` status in your organization. This is convenient for initial runs but may include documents you do not want to process. Use `document_ids` for targeted extraction." },
|
|
7105
|
+
{ question: "Can I run multiple jobs simultaneously?", answer: "Yes. Jobs are processed independently. However, concurrent jobs compete for the same compute resources, so running many large jobs in parallel may increase processing times." }
|
|
7106
|
+
],
|
|
7107
|
+
mentions: ["create job", "extraction", "schema_id"]
|
|
6544
7108
|
},
|
|
6545
7109
|
{
|
|
6546
7110
|
slug: "get-job",
|
|
@@ -6549,6 +7113,7 @@ var sections22 = [
|
|
|
6549
7113
|
seoTitle: "Get Job Endpoint \u2014 Talonic Docs",
|
|
6550
7114
|
description: "Get job status, progress percentage, current phase, document counts, grid statistics, and estimated completion time for an extraction job.",
|
|
6551
7115
|
content: [
|
|
7116
|
+
{ type: "paragraph", text: "Retrieve the current status and progress of a specific extraction job. Use this endpoint to poll for completion after creating a job. The `grid_stats` field provides real-time cell fill rates, and `current_phase` indicates which pipeline phase is actively executing." },
|
|
6552
7117
|
{
|
|
6553
7118
|
type: "endpoint",
|
|
6554
7119
|
method: "GET",
|
|
@@ -6623,8 +7188,11 @@ var sections22 = [
|
|
|
6623
7188
|
{ label: "List Jobs", slug: "list-jobs" },
|
|
6624
7189
|
{ label: "Create Job", slug: "create-job" }
|
|
6625
7190
|
],
|
|
6626
|
-
faq: [
|
|
6627
|
-
|
|
7191
|
+
faq: [
|
|
7192
|
+
{ question: "How often should I poll for job status?", answer: "Every 2-5 seconds is a reasonable polling interval. The `progress` field updates as cells are filled across pipeline phases, giving you real-time feedback." },
|
|
7193
|
+
{ question: "What does it mean when grid_stats is null?", answer: "The grid has not been initialized yet. This happens when the job is still in `pending` status or has just started processing before Phase 1 begins." }
|
|
7194
|
+
],
|
|
7195
|
+
mentions: ["job status", "grid stats", "polling"]
|
|
6628
7196
|
},
|
|
6629
7197
|
{
|
|
6630
7198
|
slug: "cancel-job",
|
|
@@ -6633,6 +7201,8 @@ var sections22 = [
|
|
|
6633
7201
|
seoTitle: "Cancel Job Endpoint \u2014 Talonic Docs",
|
|
6634
7202
|
description: "Cancel a pending or processing job. Returns the final job state with status set to failed and an error message indicating cancellation.",
|
|
6635
7203
|
content: [
|
|
7204
|
+
{ type: "paragraph", text: "Cancel a job that is currently `pending` or `processing`. The job is immediately marked as `failed` with an error message indicating it was cancelled via the API. Any results that were already processed are preserved and can still be retrieved via the results endpoint." },
|
|
7205
|
+
{ type: "callout", variant: "info", text: "Cancellation is best-effort. If the job completes between your cancel request and the server processing it, you will receive a `409 conflict` error. Already-completed or already-failed jobs cannot be cancelled." },
|
|
6636
7206
|
{
|
|
6637
7207
|
type: "endpoint",
|
|
6638
7208
|
method: "POST",
|
|
@@ -6697,8 +7267,11 @@ var sections22 = [
|
|
|
6697
7267
|
{ label: "Get Job", slug: "get-job" },
|
|
6698
7268
|
{ label: "List Jobs", slug: "list-jobs" }
|
|
6699
7269
|
],
|
|
6700
|
-
faq: [
|
|
6701
|
-
|
|
7270
|
+
faq: [
|
|
7271
|
+
{ question: "Can I retrieve partial results from a cancelled job?", answer: "Yes. Any results that were written before cancellation are preserved. Use `GET /v1/jobs/:id/results` to retrieve whatever was processed before the job was stopped." },
|
|
7272
|
+
{ question: "Does cancelling a job refund credits?", answer: "No. Credits consumed during the processing that already occurred are not refunded. Only future phases that did not execute are saved." }
|
|
7273
|
+
],
|
|
7274
|
+
mentions: ["cancel job", "cancellation"]
|
|
6702
7275
|
},
|
|
6703
7276
|
{
|
|
6704
7277
|
slug: "get-job-results",
|
|
@@ -6707,6 +7280,8 @@ var sections22 = [
|
|
|
6707
7280
|
seoTitle: "Job Results Endpoint \u2014 Talonic Docs",
|
|
6708
7281
|
description: "Retrieve the extracted result rows for a job. Each row corresponds to one document and contains extracted field values, confidence score, and validation flags.",
|
|
6709
7282
|
content: [
|
|
7283
|
+
{ type: "paragraph", text: "Retrieve the extracted result rows for a completed (or partially completed) job. Each row corresponds to one document and contains the extracted field values keyed by field name, a row-level confidence score, and any validation flags raised during Phase 4. Results are available progressively as each pipeline phase flushes to the database." },
|
|
7284
|
+
{ type: "callout", variant: "info", text: "Results are available even while the job is still processing. The grid flushes to the database after each phase, so you can read partial results before the job completes." },
|
|
6710
7285
|
{
|
|
6711
7286
|
type: "endpoint",
|
|
6712
7287
|
method: "GET",
|
|
@@ -6795,8 +7370,12 @@ var sections22 = [
|
|
|
6795
7370
|
{ label: "Get Job", slug: "get-job" },
|
|
6796
7371
|
{ label: "Create Job", slug: "create-job" }
|
|
6797
7372
|
],
|
|
6798
|
-
faq: [
|
|
6799
|
-
|
|
7373
|
+
faq: [
|
|
7374
|
+
{ question: "What do validation_flags contain?", answer: "Validation flags are strings indicating data quality issues detected in Phase 4. Common flags include `missing_required_field:<field_name>` and `format_mismatch:<field_name>`. An empty array means all validations passed." },
|
|
7375
|
+
{ question: 'What is the difference between status "approved" and "review"?', answer: "Rows with `approved` status passed all validation checks and are ready for delivery. Rows with `review` status have one or more validation flags and may require manual inspection before use." },
|
|
7376
|
+
{ question: "Can I get results for a failed job?", answer: "Yes. If the job processed some documents before failing, those partial results are available. Check `job_status` in the response to understand the context." }
|
|
7377
|
+
],
|
|
7378
|
+
mentions: ["job results", "extracted values", "validation flags", "confidence score"]
|
|
6800
7379
|
}
|
|
6801
7380
|
];
|
|
6802
7381
|
|
|
@@ -6810,6 +7389,8 @@ var sections23 = [
|
|
|
6810
7389
|
description: "List all input sources that group documents by origin. Each source has its own API key for programmatic document ingestion.",
|
|
6811
7390
|
content: [
|
|
6812
7391
|
{ type: "paragraph", text: "Inputs group documents by origin. Each input source has its own API key for programmatic document ingestion." },
|
|
7392
|
+
{ type: "paragraph", text: "Sources are the entry point for document ingestion in Talonic. Each source represents a distinct pipeline or integration \u2014 for example, an invoice processing pipeline or a contract review workflow. Documents ingested through a source inherit its configuration, including any default schema." },
|
|
7393
|
+
{ type: "callout", variant: "info", text: "Each source has a unique `endpoint` URL for document ingestion. Use the source-scoped API key (returned at creation) to authenticate uploads to that endpoint." },
|
|
6813
7394
|
{
|
|
6814
7395
|
type: "endpoint",
|
|
6815
7396
|
method: "GET",
|
|
@@ -6871,8 +7452,11 @@ var sections23 = [
|
|
|
6871
7452
|
{ label: "Create Input", slug: "create-source" },
|
|
6872
7453
|
{ label: "Get Source", slug: "get-source" }
|
|
6873
7454
|
],
|
|
6874
|
-
faq: [
|
|
6875
|
-
|
|
7455
|
+
faq: [
|
|
7456
|
+
{ question: "What is the difference between a source and a schema?", answer: "A source groups documents by origin (e.g. an API integration or upload pipeline). A schema defines the fields to extract from documents. A source can optionally have a default schema applied to all its documents." },
|
|
7457
|
+
{ question: "Can I see the API key for an existing source?", answer: "No. The source-scoped API key is only shown once in the creation response. If lost, delete the source and create a new one." }
|
|
7458
|
+
],
|
|
7459
|
+
mentions: ["sources", "input", "document ingestion"]
|
|
6876
7460
|
},
|
|
6877
7461
|
{
|
|
6878
7462
|
slug: "create-source",
|
|
@@ -6881,6 +7465,8 @@ var sections23 = [
|
|
|
6881
7465
|
seoTitle: "Create Source Endpoint \u2014 Talonic Docs",
|
|
6882
7466
|
description: "Create a new input source and receive a source-scoped API key. The key is only shown once in the creation response \u2014 store it securely.",
|
|
6883
7467
|
content: [
|
|
7468
|
+
{ type: "paragraph", text: "Create a new source to start ingesting documents. The response includes a **source-scoped API key** (`tlnc_sk_*`) that authenticates uploads to this source's endpoint. This key is shown only once \u2014 store it securely immediately after creation." },
|
|
7469
|
+
{ type: "callout", variant: "warning", text: "The `api_key` is only returned in the creation response. It cannot be retrieved later. If you lose it, delete the source and create a new one." },
|
|
6884
7470
|
{
|
|
6885
7471
|
type: "endpoint",
|
|
6886
7472
|
method: "POST",
|
|
@@ -6948,10 +7534,14 @@ var sections23 = [
|
|
|
6948
7534
|
}
|
|
6949
7535
|
],
|
|
6950
7536
|
related: [
|
|
6951
|
-
{ label: "List Inputs", slug: "list-sources" }
|
|
7537
|
+
{ label: "List Inputs", slug: "list-sources" },
|
|
7538
|
+
{ label: "Source Documents", slug: "update-source" }
|
|
6952
7539
|
],
|
|
6953
|
-
faq: [
|
|
6954
|
-
|
|
7540
|
+
faq: [
|
|
7541
|
+
{ question: "What source types are available?", answer: "The `type` field accepts `api` (programmatic ingestion via REST), `upload` (manual file uploads), and `connector` (third-party integrations like Google Drive or SharePoint)." },
|
|
7542
|
+
{ question: "Can I set a default schema when creating a source?", answer: "Yes. Pass a `default_schema_id` in the request body to automatically apply a schema to all documents ingested through this source." }
|
|
7543
|
+
],
|
|
7544
|
+
mentions: ["API key", "source creation", "source-scoped key"]
|
|
6955
7545
|
},
|
|
6956
7546
|
{
|
|
6957
7547
|
slug: "get-source",
|
|
@@ -6960,6 +7550,7 @@ var sections23 = [
|
|
|
6960
7550
|
seoTitle: "Manage Source Endpoint \u2014 Talonic Docs",
|
|
6961
7551
|
description: "Get source details, update a source name, or delete a source. Documents are retained but unlinked when a source is deleted.",
|
|
6962
7552
|
content: [
|
|
7553
|
+
{ type: "paragraph", text: "Manage an individual source with GET, PATCH, and DELETE operations on the same path. Retrieve source details, update its name, or permanently delete it. When a source is deleted, its documents are **retained** but unlinked from the source." },
|
|
6963
7554
|
{
|
|
6964
7555
|
type: "endpoint",
|
|
6965
7556
|
method: "GET",
|
|
@@ -7108,10 +7699,14 @@ var sections23 = [
|
|
|
7108
7699
|
}
|
|
7109
7700
|
],
|
|
7110
7701
|
related: [
|
|
7111
|
-
{ label: "List Inputs", slug: "list-sources" }
|
|
7702
|
+
{ label: "List Inputs", slug: "list-sources" },
|
|
7703
|
+
{ label: "Source Documents", slug: "update-source" }
|
|
7112
7704
|
],
|
|
7113
|
-
faq: [
|
|
7114
|
-
|
|
7705
|
+
faq: [
|
|
7706
|
+
{ question: "What happens to documents when I delete a source?", answer: "Documents are retained in your organization but unlinked from the deleted source. They remain accessible via the documents API and any existing extractions are preserved." },
|
|
7707
|
+
{ question: "Does deleting a source revoke its API key?", answer: "Yes. The source-scoped API key is immediately invalidated when the source is deleted. Any subsequent upload attempts using that key will return 401." }
|
|
7708
|
+
],
|
|
7709
|
+
mentions: ["source management", "source deletion", "document retention"]
|
|
7115
7710
|
},
|
|
7116
7711
|
{
|
|
7117
7712
|
slug: "update-source",
|
|
@@ -7120,6 +7715,9 @@ var sections23 = [
|
|
|
7120
7715
|
seoTitle: "Source Documents Endpoint \u2014 Talonic Docs",
|
|
7121
7716
|
description: "Ingest documents into a specific source or list documents belonging to a source. Supports batch processing mode at 50% cost discount.",
|
|
7122
7717
|
content: [
|
|
7718
|
+
{ type: "paragraph", text: "Ingest documents into a source for processing, or list all documents that belong to a source. The ingestion endpoint accepts a file upload or a URL, processes the document through the extraction pipeline, and returns the document ID for status tracking." },
|
|
7719
|
+
{ type: "paragraph", text: "Documents can be processed in **realtime** (default, results in seconds) or **batch** mode (50% cost discount, results within 48 hours). Duplicate files are detected via SHA-256 hash and rejected with a `duplicate` status." },
|
|
7720
|
+
{ type: "callout", variant: "info", text: "Batch processing mode reduces cost by 50% but delivers results within 48 hours. Use `processing_mode=batch` for large ingestion jobs where latency is not critical." },
|
|
7123
7721
|
{
|
|
7124
7722
|
type: "endpoint",
|
|
7125
7723
|
method: "POST",
|
|
@@ -7254,8 +7852,12 @@ var sections23 = [
|
|
|
7254
7852
|
{ label: "List Inputs", slug: "list-sources" },
|
|
7255
7853
|
{ label: "List Documents", slug: "list-documents" }
|
|
7256
7854
|
],
|
|
7257
|
-
faq: [
|
|
7258
|
-
|
|
7855
|
+
faq: [
|
|
7856
|
+
{ question: "What happens if I upload a duplicate file?", answer: 'Duplicate files are detected via SHA-256 hash comparison. The response returns `status: "duplicate"` with `existing_document_id` pointing to the original document. No new document is created.' },
|
|
7857
|
+
{ question: "Can I use both file and file_url in the same request?", answer: "No. Provide either `file` (binary upload) or `file_url` (URL fetch), but not both. The API returns a 400 error if neither is provided." },
|
|
7858
|
+
{ question: "What file formats are supported for document ingestion?", answer: "Talonic supports 25+ formats including PDF, DOCX, XLSX, CSV, PPTX, MSG, EML, PNG, JPG, HTML, XML, JSON, and more. See the supported file types documentation for the full list." }
|
|
7859
|
+
],
|
|
7860
|
+
mentions: ["document ingestion", "batch processing", "duplicate detection", "file upload"]
|
|
7259
7861
|
}
|
|
7260
7862
|
];
|
|
7261
7863
|
|
|
@@ -7269,6 +7871,7 @@ var sections24 = [
|
|
|
7269
7871
|
description: "Autocomplete field names from the registry ranked by relevance and occurrence count. Power field picker UIs with type-ahead search.",
|
|
7270
7872
|
content: [
|
|
7271
7873
|
{ type: "paragraph", text: "Search and filter documents by their extracted field values. Includes field autocomplete, document filtering with composable conditions, global omnisearch, and saved filter management." },
|
|
7874
|
+
{ type: "paragraph", text: "The field autocomplete endpoint enables type-ahead search over the field registry. Results are ranked by a combination of name relevance and occurrence count, making frequently seen fields surface first. Use this to power field picker dropdowns and search-as-you-type UIs." },
|
|
7272
7875
|
{
|
|
7273
7876
|
type: "endpoint",
|
|
7274
7877
|
method: "GET",
|
|
@@ -7335,8 +7938,12 @@ var sections24 = [
|
|
|
7335
7938
|
{ label: "Field Values", slug: "field-values" },
|
|
7336
7939
|
{ label: "Filter Documents", slug: "filter-documents" }
|
|
7337
7940
|
],
|
|
7338
|
-
faq: [
|
|
7339
|
-
|
|
7941
|
+
faq: [
|
|
7942
|
+
{ question: "How does the ranking work?", answer: "Results are ranked by a combined score of name match relevance (canonical name, display name, and alias matches) and `occurrenceCount`. Fields that appear in more documents rank higher for equivalent relevance." },
|
|
7943
|
+
{ question: "What does the tier field represent?", answer: "Tier 1 fields have high confidence and consistent extraction across documents. Higher tiers indicate lower confidence or less frequent occurrence. Use tier to decide which fields to display prominently." },
|
|
7944
|
+
{ question: "Can I search by alias names?", answer: "Yes. The autocomplete matches against canonical names, display names, and registered aliases. The `matchSource` field indicates how the match was made." }
|
|
7945
|
+
],
|
|
7946
|
+
mentions: ["autocomplete", "field registry", "type-ahead search"]
|
|
7340
7947
|
},
|
|
7341
7948
|
{
|
|
7342
7949
|
slug: "field-values",
|
|
@@ -7345,6 +7952,7 @@ var sections24 = [
|
|
|
7345
7952
|
seoTitle: "Field Values Endpoint \u2014 Talonic Docs",
|
|
7346
7953
|
description: "List distinct values for a field across documents with counts. Useful for building filter dropdowns and faceted search interfaces.",
|
|
7347
7954
|
content: [
|
|
7955
|
+
{ type: "paragraph", text: "Retrieve the distinct values for a specific field across all documents in your workspace. Results are sorted by count descending, making the most common values appear first. Use this to populate filter dropdowns, build faceted search interfaces, or analyze value distributions for data quality." },
|
|
7348
7956
|
{
|
|
7349
7957
|
type: "endpoint",
|
|
7350
7958
|
method: "GET",
|
|
@@ -7400,8 +8008,11 @@ var sections24 = [
|
|
|
7400
8008
|
{ label: "Field Autocomplete", slug: "field-autocomplete" },
|
|
7401
8009
|
{ label: "Filter Documents", slug: "filter-documents" }
|
|
7402
8010
|
],
|
|
7403
|
-
faq: [
|
|
7404
|
-
|
|
8011
|
+
faq: [
|
|
8012
|
+
{ question: "Are values case-sensitive?", answer: 'Values are returned as extracted. The `q` substring filter is case-insensitive, so searching for "acme" will match "Acme Corp".' },
|
|
8013
|
+
{ question: "What does totalDistinct represent when a limit is applied?", answer: 'It shows the total number of unique values for this field across all documents, regardless of the `limit` parameter. Use it to indicate "showing 20 of 156 values" in your UI.' }
|
|
8014
|
+
],
|
|
8015
|
+
mentions: ["field values", "faceted search", "value distribution"]
|
|
7405
8016
|
},
|
|
7406
8017
|
{
|
|
7407
8018
|
slug: "filter-documents",
|
|
@@ -7410,6 +8021,8 @@ var sections24 = [
|
|
|
7410
8021
|
seoTitle: "Filter Documents Endpoint \u2014 Talonic Docs",
|
|
7411
8022
|
description: "Filter documents by field value conditions using composable operators including equality, comparison, range, containment, and emptiness checks.",
|
|
7412
8023
|
content: [
|
|
8024
|
+
{ type: "paragraph", text: "Filter documents by composing conditions on extracted field values. Each condition targets a specific field and applies an operator to test its value. Multiple conditions are AND-combined. The endpoint also supports free-text search across document content and sorting by any field." },
|
|
8025
|
+
{ type: "callout", variant: "info", text: "Field names in conditions must be valid field registry IDs (e.g. `fld_a1b2c3d4`). Use the field autocomplete endpoint to discover available field IDs for your workspace." },
|
|
7413
8026
|
{
|
|
7414
8027
|
type: "endpoint",
|
|
7415
8028
|
method: "POST",
|
|
@@ -7497,8 +8110,12 @@ var sections24 = [
|
|
|
7497
8110
|
{ label: "Field Autocomplete", slug: "field-autocomplete" },
|
|
7498
8111
|
{ label: "Omnisearch", slug: "omnisearch" }
|
|
7499
8112
|
],
|
|
7500
|
-
faq: [
|
|
7501
|
-
|
|
8113
|
+
faq: [
|
|
8114
|
+
{ question: "How do I use the between operator?", answer: 'Provide both `value` (lower bound, inclusive) and `valueTo` (upper bound, inclusive) in the condition. Works with dates, numbers, and strings. Example: `{ "fieldId": "fld_x", "operator": "between", "value": "2024-01-01", "valueTo": "2024-12-31" }`.' },
|
|
8115
|
+
{ question: "What happens if a document does not have a value for a filtered field?", answer: "Documents missing the filtered field are excluded from results unless you use the `is_empty` operator, which explicitly matches documents where the field is null or absent." },
|
|
8116
|
+
{ question: "Can I combine free-text search with field conditions?", answer: "Yes. Set the `search` parameter alongside `conditions`. Both are AND-combined \u2014 documents must match the search text and all conditions." }
|
|
8117
|
+
],
|
|
8118
|
+
mentions: ["filter", "conditions", "operators", "composable filters"]
|
|
7502
8119
|
},
|
|
7503
8120
|
{
|
|
7504
8121
|
slug: "filter-documents-export",
|
|
@@ -7507,6 +8124,7 @@ var sections24 = [
|
|
|
7507
8124
|
seoTitle: "Omnisearch Endpoint \u2014 Talonic Docs",
|
|
7508
8125
|
description: "Global omnisearch across documents, fields, sources, and schemas. Unified search endpoint that powers the Cmd+K search experience.",
|
|
7509
8126
|
content: [
|
|
8127
|
+
{ type: "paragraph", text: "The omnisearch endpoint provides a unified search across all entity types in your workspace: documents, field values, source connections, schemas, and registry fields. A single query returns categorized results, making it ideal for building global search UIs like Cmd+K palettes." },
|
|
7510
8128
|
{
|
|
7511
8129
|
type: "endpoint",
|
|
7512
8130
|
method: "GET",
|
|
@@ -7576,8 +8194,11 @@ var sections24 = [
|
|
|
7576
8194
|
{ label: "Filter Documents", slug: "filter-documents" },
|
|
7577
8195
|
{ label: "Field Autocomplete", slug: "field-autocomplete" }
|
|
7578
8196
|
],
|
|
7579
|
-
faq: [
|
|
7580
|
-
|
|
8197
|
+
faq: [
|
|
8198
|
+
{ question: "Does omnisearch return results from all entity types in every call?", answer: "Yes. Every call searches documents, field values, sources, schemas, and registry fields simultaneously. Empty categories are returned as empty arrays." },
|
|
8199
|
+
{ question: "How is the limit parameter applied?", answer: "The `limit` applies independently to each entity type. Setting `limit=5` returns up to 5 documents, 5 field matches, 5 sources, 5 schemas, and 5 fields." }
|
|
8200
|
+
],
|
|
8201
|
+
mentions: ["omnisearch", "Cmd+K", "unified search"]
|
|
7581
8202
|
},
|
|
7582
8203
|
{
|
|
7583
8204
|
slug: "omnisearch",
|
|
@@ -7586,6 +8207,7 @@ var sections24 = [
|
|
|
7586
8207
|
seoTitle: "Saved Filters Endpoints \u2014 Talonic Docs",
|
|
7587
8208
|
description: "Create, list, and delete saved filter configurations for reuse. Persist filter conditions, search terms, and sort settings.",
|
|
7588
8209
|
content: [
|
|
8210
|
+
{ type: "paragraph", text: "Saved filters persist reusable filter configurations so you can apply the same conditions, search terms, and sort settings across multiple sessions. Create a saved filter from any combination of conditions, then reload it later without re-specifying each parameter." },
|
|
7589
8211
|
{
|
|
7590
8212
|
type: "endpoint",
|
|
7591
8213
|
method: "GET",
|
|
@@ -7719,8 +8341,11 @@ var sections24 = [
|
|
|
7719
8341
|
related: [
|
|
7720
8342
|
{ label: "Filter Documents", slug: "filter-documents" }
|
|
7721
8343
|
],
|
|
7722
|
-
faq: [
|
|
7723
|
-
|
|
8344
|
+
faq: [
|
|
8345
|
+
{ question: "Can I update a saved filter?", answer: "There is no PUT endpoint for saved filters. To update, delete the existing filter and create a new one with the desired configuration." },
|
|
8346
|
+
{ question: "Are saved filters shared across team members?", answer: "Yes. Saved filters are scoped to the organization, so all team members with read access can list and use them." }
|
|
8347
|
+
],
|
|
8348
|
+
mentions: ["saved filters", "reusable filters"]
|
|
7724
8349
|
},
|
|
7725
8350
|
{
|
|
7726
8351
|
slug: "saved-filters",
|
|
@@ -7734,8 +8359,11 @@ var sections24 = [
|
|
|
7734
8359
|
related: [
|
|
7735
8360
|
{ label: "Filter Documents", slug: "filter-documents" }
|
|
7736
8361
|
],
|
|
7737
|
-
faq: [
|
|
7738
|
-
|
|
8362
|
+
faq: [
|
|
8363
|
+
{ question: "Does the document counts endpoint support the same conditions as the filter endpoint?", answer: "Yes. The conditions format is identical to the filter documents endpoint. You can reuse the same condition arrays to get counts before fetching the actual documents." },
|
|
8364
|
+
{ question: "Can I use document counts without any conditions?", answer: "Yes. Omitting conditions returns the total document count across all sources, which is useful for dashboard overview widgets." }
|
|
8365
|
+
],
|
|
8366
|
+
mentions: ["document counts", "faceted navigation", "aggregate counts"]
|
|
7739
8367
|
},
|
|
7740
8368
|
{
|
|
7741
8369
|
slug: "document-counts",
|
|
@@ -7744,6 +8372,8 @@ var sections24 = [
|
|
|
7744
8372
|
seoTitle: "Materialize Index Endpoint \u2014 Talonic Docs",
|
|
7745
8373
|
description: "Trigger materialization backfill for filter indexes. Rebuilds the materialized field value index used by filter queries after bulk ingestion.",
|
|
7746
8374
|
content: [
|
|
8375
|
+
{ type: "paragraph", text: "Trigger a rebuild of the materialized field value index. The materialized index pre-computes extracted field values for every document, enabling sub-second filter queries even on large workspaces. Run this endpoint after bulk document ingestion or schema changes to ensure filter results are current." },
|
|
8376
|
+
{ type: "callout", variant: "warning", text: "Materialization can be resource-intensive on large workspaces. Avoid calling this endpoint during peak usage. For incremental ingestion, the index updates automatically per document." },
|
|
7747
8377
|
{
|
|
7748
8378
|
type: "endpoint",
|
|
7749
8379
|
method: "POST",
|
|
@@ -7780,8 +8410,11 @@ var sections24 = [
|
|
|
7780
8410
|
related: [
|
|
7781
8411
|
{ label: "Filter Documents", slug: "filter-documents" }
|
|
7782
8412
|
],
|
|
7783
|
-
faq: [
|
|
7784
|
-
|
|
8413
|
+
faq: [
|
|
8414
|
+
{ question: "When do I need to manually trigger materialization?", answer: "Only after bulk ingestion (e.g. uploading hundreds of documents at once). For normal single-document uploads, the index is updated automatically during post-extraction processing." },
|
|
8415
|
+
{ question: "Is materialization idempotent?", answer: "Yes. Running it multiple times produces the same result. Existing materialized values are upserted, not duplicated." }
|
|
8416
|
+
],
|
|
8417
|
+
mentions: ["materialized index", "backfill", "index rebuild"]
|
|
7785
8418
|
},
|
|
7786
8419
|
{
|
|
7787
8420
|
slug: "materialized-index",
|
|
@@ -7795,8 +8428,11 @@ var sections24 = [
|
|
|
7795
8428
|
related: [
|
|
7796
8429
|
{ label: "Materialize", slug: "document-counts" }
|
|
7797
8430
|
],
|
|
7798
|
-
faq: [
|
|
7799
|
-
|
|
8431
|
+
faq: [
|
|
8432
|
+
{ question: "What happens if the materialized index is out of date?", answer: "Filter queries may return stale results or miss recently ingested documents. Trigger a materialization backfill via `POST /filter/materialize` to rebuild the index." },
|
|
8433
|
+
{ question: "Does the materialized index affect omnisearch results?", answer: "Yes. Omnisearch queries field values from the same materialized index. An out-of-date index may cause missing results in both filter and omnisearch endpoints." }
|
|
8434
|
+
],
|
|
8435
|
+
mentions: ["materialized index", "filter performance"]
|
|
7800
8436
|
}
|
|
7801
8437
|
];
|
|
7802
8438
|
|
|
@@ -7972,6 +8608,7 @@ var sections26 = [
|
|
|
7972
8608
|
description: "List resolution runs that apply field normalization, lookup cascades, and value transforms to extracted data. Supports cursor-based pagination.",
|
|
7973
8609
|
content: [
|
|
7974
8610
|
{ type: "paragraph", text: "Resolution runs apply field normalization, lookup cascades, and value transforms to extracted data. Create a resolution from a completed job run to standardise field values against reference data." },
|
|
8611
|
+
{ type: "paragraph", text: 'The resolution pipeline maps raw extracted values (e.g. "Deutschland") to canonical forms (e.g. "DE") using a 3-tier lookup cascade: string normalization, token-based fuzzy matching, and an LLM fallback. Each resolution run captures a snapshot of the active resolution policy and dialect at run time for reproducibility.' },
|
|
7975
8612
|
{
|
|
7976
8613
|
type: "endpoint",
|
|
7977
8614
|
method: "GET",
|
|
@@ -8046,8 +8683,12 @@ var sections26 = [
|
|
|
8046
8683
|
{ label: "Create Resolution", slug: "create-resolution" },
|
|
8047
8684
|
{ label: "Get Resolution Results", slug: "get-resolution-results" }
|
|
8048
8685
|
],
|
|
8049
|
-
faq: [
|
|
8050
|
-
|
|
8686
|
+
faq: [
|
|
8687
|
+
{ question: "What is the difference between a job run and a resolution run?", answer: "A job run extracts raw field values from documents. A resolution run takes those raw values and normalizes them to canonical forms using lookup tables, fuzzy matching, and LLM fallbacks." },
|
|
8688
|
+
{ question: "What are policy_snapshot and dialect_snapshot?", answer: "These capture the resolution configuration at run time. The policy snapshot records which normalization strategies were active. The dialect snapshot records output format preferences (e.g. date format, number locale). Both ensure runs are reproducible." },
|
|
8689
|
+
{ question: "Can I filter resolutions by the originating job?", answer: "Yes. Use the `source_run_id` query parameter to find all resolution runs derived from a specific job run." }
|
|
8690
|
+
],
|
|
8691
|
+
mentions: ["resolution", "normalization", "lookup cascade", "canonical forms"]
|
|
8051
8692
|
},
|
|
8052
8693
|
{
|
|
8053
8694
|
slug: "create-resolution",
|
|
@@ -8056,6 +8697,8 @@ var sections26 = [
|
|
|
8056
8697
|
seoTitle: "Create Resolution Endpoint \u2014 Talonic Docs",
|
|
8057
8698
|
description: "Create a resolution run from a completed job to standardise extracted field values against reference data using lookup cascades and transforms.",
|
|
8058
8699
|
content: [
|
|
8700
|
+
{ type: "paragraph", text: "Create a new resolution run targeting documents from a completed job run. The resolution enters `pending` status immediately. Call the execute endpoint to start processing, or it will be picked up automatically depending on your pipeline configuration." },
|
|
8701
|
+
{ type: "callout", variant: "info", text: "The `source_run_id` must reference a completed job run. Creating a resolution against a pending or failed run returns a `404` error." },
|
|
8059
8702
|
{
|
|
8060
8703
|
type: "endpoint",
|
|
8061
8704
|
method: "POST",
|
|
@@ -8130,8 +8773,11 @@ var sections26 = [
|
|
|
8130
8773
|
{ label: "List Resolutions", slug: "list-resolutions" },
|
|
8131
8774
|
{ label: "Execute Resolution", slug: "execute-resolution" }
|
|
8132
8775
|
],
|
|
8133
|
-
faq: [
|
|
8134
|
-
|
|
8776
|
+
faq: [
|
|
8777
|
+
{ question: "Do I need to call execute after creating a resolution?", answer: "Yes. Creating a resolution only sets it to `pending`. Call `POST /v1/resolutions/{id}/execute` to start the resolution pipeline." },
|
|
8778
|
+
{ question: "Can I create multiple resolutions from the same job run?", answer: "Yes. Each resolution run is independent and produces its own set of results. This is useful for testing different resolution policies." }
|
|
8779
|
+
],
|
|
8780
|
+
mentions: ["create resolution", "source_run_id"]
|
|
8135
8781
|
},
|
|
8136
8782
|
{
|
|
8137
8783
|
slug: "get-resolution",
|
|
@@ -8140,6 +8786,7 @@ var sections26 = [
|
|
|
8140
8786
|
seoTitle: "Get Resolution Endpoint \u2014 Talonic Docs",
|
|
8141
8787
|
description: "Retrieve a resolution run by ID with its current status, document count, completion timestamp, and links to results. Requires read scope for the workspace.",
|
|
8142
8788
|
content: [
|
|
8789
|
+
{ type: "paragraph", text: "Retrieve the current status and metadata of a specific resolution run. Use this endpoint to poll for completion after executing a resolution. The response includes links to the results endpoint where you can inspect per-field resolved values." },
|
|
8143
8790
|
{
|
|
8144
8791
|
type: "endpoint",
|
|
8145
8792
|
method: "GET",
|
|
@@ -8198,8 +8845,11 @@ var sections26 = [
|
|
|
8198
8845
|
{ label: "List Resolutions", slug: "list-resolutions" },
|
|
8199
8846
|
{ label: "Get Resolution Results", slug: "get-resolution-results" }
|
|
8200
8847
|
],
|
|
8201
|
-
faq: [
|
|
8202
|
-
|
|
8848
|
+
faq: [
|
|
8849
|
+
{ question: "How long does a resolution run typically take?", answer: "Resolution runs are fast for purely deterministic lookups (seconds). Runs that require LLM fallback for ambiguous values take longer, typically 1-5 minutes depending on the number of unresolved fields." },
|
|
8850
|
+
{ question: "What does a failed resolution status mean?", answer: "A `failed` status indicates the resolution pipeline encountered an unrecoverable error. Check the `error_message` field for details. You can delete the failed run and create a new one." }
|
|
8851
|
+
],
|
|
8852
|
+
mentions: ["resolution status", "polling"]
|
|
8203
8853
|
},
|
|
8204
8854
|
{
|
|
8205
8855
|
slug: "get-resolution-results",
|
|
@@ -8208,6 +8858,8 @@ var sections26 = [
|
|
|
8208
8858
|
seoTitle: "Get Resolution Results \u2014 Talonic Docs",
|
|
8209
8859
|
description: "Get per-field resolution results showing original values, resolved values, resolution step used, and confidence scores for each document.",
|
|
8210
8860
|
content: [
|
|
8861
|
+
{ type: "paragraph", text: "Retrieve the per-field resolution results for a completed run. Each result shows the original extracted value alongside the resolved canonical value, the resolution strategy that produced the match, and a confidence score. Use this to audit how values were normalized and identify fields that may need manual review." },
|
|
8862
|
+
{ type: "callout", variant: "info", text: "The `resolution_step` field indicates which strategy produced the resolved value: `lookup` (direct table match), `transfer` (registry transfer), `compute` (deterministic computation), or `null` if no resolution was applied." },
|
|
8211
8863
|
{
|
|
8212
8864
|
type: "endpoint",
|
|
8213
8865
|
method: "GET",
|
|
@@ -8266,8 +8918,11 @@ var sections26 = [
|
|
|
8266
8918
|
related: [
|
|
8267
8919
|
{ label: "Get Resolution", slug: "get-resolution" }
|
|
8268
8920
|
],
|
|
8269
|
-
faq: [
|
|
8270
|
-
|
|
8921
|
+
faq: [
|
|
8922
|
+
{ question: "What does it mean when resolved_value is null?", answer: "A null `resolved_value` means no resolution strategy could match the original value to a canonical form. The field retains its raw extracted value. Consider adding the value to a lookup table for future runs." },
|
|
8923
|
+
{ question: "What confidence scores should I expect from each resolution step?", answer: "Direct `lookup` matches typically score 0.95-1.0. Fuzzy token matches score 0.7-0.95. LLM fallback matches score 0.5-0.8. Values below 0.5 usually indicate low-confidence guesses." }
|
|
8924
|
+
],
|
|
8925
|
+
mentions: ["resolution results", "resolved values", "original values", "resolution step"]
|
|
8271
8926
|
},
|
|
8272
8927
|
{
|
|
8273
8928
|
slug: "execute-resolution",
|
|
@@ -8276,6 +8931,8 @@ var sections26 = [
|
|
|
8276
8931
|
seoTitle: "Execute Resolution Endpoint \u2014 Talonic Docs",
|
|
8277
8932
|
description: "Execute the resolution pipeline on all pending fields. Returns immediately \u2014 poll the run for progress. Requires write scope.",
|
|
8278
8933
|
content: [
|
|
8934
|
+
{ type: "paragraph", text: "Start the resolution pipeline on a pending resolution run. The endpoint returns immediately with the updated run status (typically `running`). Poll the get resolution endpoint to track progress and check for completion." },
|
|
8935
|
+
{ type: "callout", variant: "warning", text: "Executing a resolution that is already `running` or `completed` has no effect. Only `pending` runs can be executed." },
|
|
8279
8936
|
{
|
|
8280
8937
|
type: "endpoint",
|
|
8281
8938
|
method: "POST",
|
|
@@ -8334,8 +8991,11 @@ var sections26 = [
|
|
|
8334
8991
|
{ label: "Create Resolution", slug: "create-resolution" },
|
|
8335
8992
|
{ label: "Get Resolution Results", slug: "get-resolution-results" }
|
|
8336
8993
|
],
|
|
8337
|
-
faq: [
|
|
8338
|
-
|
|
8994
|
+
faq: [
|
|
8995
|
+
{ question: "Is the execute call synchronous?", answer: "No. The endpoint returns immediately with status `running`. Poll `GET /v1/resolutions/{id}` to track progress and detect completion." },
|
|
8996
|
+
{ question: "What happens if execution fails?", answer: "The run status transitions to `failed` and the `error_message` field is populated. You can delete the failed run and create a new one to retry." }
|
|
8997
|
+
],
|
|
8998
|
+
mentions: ["execute resolution", "start resolution"]
|
|
8339
8999
|
},
|
|
8340
9000
|
{
|
|
8341
9001
|
slug: "cancel-resolution",
|
|
@@ -8344,6 +9004,8 @@ var sections26 = [
|
|
|
8344
9004
|
seoTitle: "Delete Resolution Endpoint \u2014 Talonic Docs",
|
|
8345
9005
|
description: "Delete a resolution run and its results. Requires write scope. This action permanently removes all resolution data.",
|
|
8346
9006
|
content: [
|
|
9007
|
+
{ type: "paragraph", text: "Permanently delete a resolution run and all its associated results. This action is irreversible. Use this to clean up failed runs, remove outdated resolutions, or free up storage. The originating job run and its results are not affected." },
|
|
9008
|
+
{ type: "callout", variant: "warning", text: "Deletion is permanent. All per-field resolution results associated with this run are removed. The source job run and its extracted data are unaffected." },
|
|
8347
9009
|
{
|
|
8348
9010
|
type: "endpoint",
|
|
8349
9011
|
method: "DELETE",
|
|
@@ -8381,8 +9043,11 @@ var sections26 = [
|
|
|
8381
9043
|
related: [
|
|
8382
9044
|
{ label: "List Resolutions", slug: "list-resolutions" }
|
|
8383
9045
|
],
|
|
8384
|
-
faq: [
|
|
8385
|
-
|
|
9046
|
+
faq: [
|
|
9047
|
+
{ question: "Can I delete a resolution that is currently running?", answer: "Yes. The delete operation cancels the running resolution and removes all data. However, it is recommended to wait for completion or failure before deleting to avoid race conditions." },
|
|
9048
|
+
{ question: "Does deleting a resolution affect the source job run?", answer: "No. The source job run, its documents, and its extracted values are completely unaffected by resolution deletion." }
|
|
9049
|
+
],
|
|
9050
|
+
mentions: ["delete resolution", "permanent deletion"]
|
|
8386
9051
|
}
|
|
8387
9052
|
];
|
|
8388
9053
|
|
|
@@ -8396,6 +9061,8 @@ var sections27 = [
|
|
|
8396
9061
|
description: "List all discovered link keys with their classification category and frequency. Link keys are field-level entity identifiers used for document linking.",
|
|
8397
9062
|
content: [
|
|
8398
9063
|
{ type: "paragraph", text: "The linking graph connects documents through shared entity values \u2014 an invoice and a contract sharing the same customer ID are linked. The API exposes the bipartite document-entity graph: link keys (field-level entity identifiers), document-level links, the full graph, document-centric subgraphs, classification (identity, transaction, reference), backfill, and document-to-case mapping." },
|
|
9064
|
+
{ type: "paragraph", text: "Link keys are the fields that the linking engine uses to discover connections between documents. Each link key has a **classification category** \u2014 `identity` (e.g. vendor ID, customer number), `transaction` (e.g. invoice number, PO number), or `reference` (e.g. contract reference). Use this endpoint to inspect which fields are currently recognized as link keys and how frequently they appear across your documents." },
|
|
9065
|
+
{ type: "callout", variant: "info", text: "Link keys are discovered automatically during extraction. If a field is not yet classified, use the **Classify** endpoint to trigger AI-based classification on ambiguous fields." },
|
|
8399
9066
|
{
|
|
8400
9067
|
type: "endpoint",
|
|
8401
9068
|
method: "GET",
|
|
@@ -8457,9 +9124,11 @@ var sections27 = [
|
|
|
8457
9124
|
{ label: "List Cases", slug: "list-cases" }
|
|
8458
9125
|
],
|
|
8459
9126
|
faq: [
|
|
8460
|
-
{ question: "What are link keys in Talonic?", answer: "Link keys are field-level entity identifiers (e.g. customer_id, invoice_number) used to connect documents that share the same value." }
|
|
9127
|
+
{ question: "What are link keys in Talonic?", answer: "Link keys are field-level entity identifiers (e.g. customer_id, invoice_number) used to connect documents that share the same value." },
|
|
9128
|
+
{ question: "What is the difference between identity, transaction, and reference link keys?", answer: "Identity keys represent stable entity identifiers (e.g. vendor ID, tax number). Transaction keys are document-specific identifiers (e.g. invoice number, PO number). Reference keys are cross-references between documents (e.g. contract reference cited in an invoice)." },
|
|
9129
|
+
{ question: "How are link keys discovered?", answer: "Link keys are discovered automatically during document extraction. Fields are classified using rule-based heuristics first, then an LLM call for ambiguous cases. You can also trigger classification manually via the Classify endpoint." }
|
|
8461
9130
|
],
|
|
8462
|
-
mentions: ["link keys", "bipartite graph", "entity linking"]
|
|
9131
|
+
mentions: ["link keys", "bipartite graph", "entity linking", "field registry", "link key category"]
|
|
8463
9132
|
},
|
|
8464
9133
|
{
|
|
8465
9134
|
slug: "reclassify-link-key",
|
|
@@ -8468,6 +9137,8 @@ var sections27 = [
|
|
|
8468
9137
|
seoTitle: "Classify Link Keys \u2014 Talonic Docs",
|
|
8469
9138
|
description: "Classify link keys into categories (identity, transaction, reference) using AI. Runs asynchronously on ambiguous fields.",
|
|
8470
9139
|
content: [
|
|
9140
|
+
{ type: "paragraph", text: "When new fields are extracted, some may not be automatically classified as link keys. The classify endpoint runs AI-powered classification on ambiguous fields to determine whether they are **identity**, **transaction**, or **reference** link keys. This is useful after onboarding new document types or when the field registry grows." },
|
|
9141
|
+
{ type: "callout", variant: "info", text: "Classification uses a two-pass approach: rule-based heuristics handle obvious cases (e.g. fields named `invoice_number`), then an LLM call classifies the remaining ambiguous fields. A backfill is automatically triggered when new link keys are identified." },
|
|
8471
9142
|
{
|
|
8472
9143
|
type: "endpoint",
|
|
8473
9144
|
method: "POST",
|
|
@@ -8504,10 +9175,14 @@ var sections27 = [
|
|
|
8504
9175
|
}
|
|
8505
9176
|
],
|
|
8506
9177
|
related: [
|
|
8507
|
-
{ label: "Link Keys", slug: "list-link-keys" }
|
|
9178
|
+
{ label: "Link Keys", slug: "list-link-keys" },
|
|
9179
|
+
{ label: "Backfill", slug: "list-cases" }
|
|
8508
9180
|
],
|
|
8509
|
-
faq: [
|
|
8510
|
-
|
|
9181
|
+
faq: [
|
|
9182
|
+
{ question: "Does classification run synchronously?", answer: "The endpoint returns immediately with the count of classified fields. If new link keys are found, a backfill is triggered asynchronously to update entity links across all documents." },
|
|
9183
|
+
{ question: "Can I reclassify an already-classified link key?", answer: "The classify endpoint targets unclassified or ambiguous fields. Already-classified link keys are not re-evaluated unless their category is null." }
|
|
9184
|
+
],
|
|
9185
|
+
mentions: ["AI classification", "link key categories", "rule-based classification", "LLM classification"]
|
|
8511
9186
|
},
|
|
8512
9187
|
{
|
|
8513
9188
|
slug: "list-entities",
|
|
@@ -8516,6 +9191,8 @@ var sections27 = [
|
|
|
8516
9191
|
seoTitle: "Document Links Endpoint \u2014 Talonic Docs",
|
|
8517
9192
|
description: "Get all entity links for a specific document showing entity values, types, link keys, and linked document IDs.",
|
|
8518
9193
|
content: [
|
|
9194
|
+
{ type: "paragraph", text: "Retrieve all entity links discovered for a specific document. Each link represents a shared field value \u2014 such as a customer ID or PO number \u2014 that connects this document to others in the workspace. Use this endpoint to understand how a document relates to the rest of your corpus." },
|
|
9195
|
+
{ type: "callout", variant: "info", text: "The `document_count` field on each entity indicates how many documents share that value. A high count on an identity entity (e.g. a vendor ID appearing in 50+ documents) is expected, while a high count on a transaction entity may indicate a data quality issue." },
|
|
8519
9196
|
{
|
|
8520
9197
|
type: "endpoint",
|
|
8521
9198
|
method: "GET",
|
|
@@ -8572,10 +9249,14 @@ var sections27 = [
|
|
|
8572
9249
|
],
|
|
8573
9250
|
related: [
|
|
8574
9251
|
{ label: "Full Graph", slug: "list-linked-documents" },
|
|
8575
|
-
{ label: "Link Keys", slug: "list-link-keys" }
|
|
9252
|
+
{ label: "Link Keys", slug: "list-link-keys" },
|
|
9253
|
+
{ label: "Document-Case Map", slug: "refresh-cases" }
|
|
8576
9254
|
],
|
|
8577
|
-
faq: [
|
|
8578
|
-
|
|
9255
|
+
faq: [
|
|
9256
|
+
{ question: "What does document_count represent?", answer: 'The number of documents in your workspace that share the same entity value for that field. For example, if three invoices reference vendor ID "ACME-001", the document_count is 3.' },
|
|
9257
|
+
{ question: "Can a document have zero links?", answer: "Yes. Documents that have no extracted field values matching other documents will return an empty data array. These appear as unlinked in the graph." }
|
|
9258
|
+
],
|
|
9259
|
+
mentions: ["document links", "entity values", "shared fields"]
|
|
8579
9260
|
},
|
|
8580
9261
|
{
|
|
8581
9262
|
slug: "list-linked-documents",
|
|
@@ -8584,6 +9265,15 @@ var sections27 = [
|
|
|
8584
9265
|
seoTitle: "Linking Graph Endpoint \u2014 Talonic Docs",
|
|
8585
9266
|
description: "Get the full document linking graph as nodes and edges, or get the graph neighbourhood for a single document with configurable traversal depth.",
|
|
8586
9267
|
content: [
|
|
9268
|
+
{ type: "paragraph", text: "The linking graph is a **bipartite graph** with two node types: documents and entities. Edges connect documents to the entity values they share. This endpoint returns the complete graph for your workspace, including detected cases (groups of documents linked through transaction or reference entities) and entity groups (documents linked only through identity entities)." },
|
|
9269
|
+
{ type: "callout", variant: "warning", text: "The full graph endpoint can return large payloads for workspaces with many documents. For targeted exploration, use the document subgraph endpoint with a configurable `depth` parameter instead." },
|
|
9270
|
+
{ type: "list", ordered: false, items: [
|
|
9271
|
+
"**Nodes** represent documents (with filename and detected type) or entities (with value and link key category)",
|
|
9272
|
+
"**Edges** connect a document to an entity through a specific field key",
|
|
9273
|
+
"**Cases** are connected components containing transaction or reference entities (2+ documents)",
|
|
9274
|
+
"**Entity groups** are components linked only through identity entities",
|
|
9275
|
+
"**Excluded entities** are high-frequency or owner entities removed from BFS case detection to avoid merging unrelated cases"
|
|
9276
|
+
] },
|
|
8587
9277
|
{
|
|
8588
9278
|
type: "endpoint",
|
|
8589
9279
|
method: "GET",
|
|
@@ -8654,6 +9344,7 @@ var sections27 = [
|
|
|
8654
9344
|
}
|
|
8655
9345
|
}`
|
|
8656
9346
|
},
|
|
9347
|
+
{ type: "paragraph", text: "To explore the graph from a single document outward, use the document subgraph endpoint below. The `depth` parameter controls how many hops to traverse \u2014 each hop alternates between document and entity nodes." },
|
|
8657
9348
|
{
|
|
8658
9349
|
type: "endpoint",
|
|
8659
9350
|
method: "GET",
|
|
@@ -8715,8 +9406,11 @@ var sections27 = [
|
|
|
8715
9406
|
{ label: "Document Links", slug: "list-entities" },
|
|
8716
9407
|
{ label: "Document-Case Map", slug: "refresh-cases" }
|
|
8717
9408
|
],
|
|
8718
|
-
faq: [
|
|
8719
|
-
|
|
9409
|
+
faq: [
|
|
9410
|
+
{ question: "What does the depth parameter control?", answer: "Depth controls BFS traversal hops from the starting document. Depth 1 returns only the document and its direct entities. Depth 2 (default) also includes other documents sharing those entities. Higher depths expand the neighbourhood further." },
|
|
9411
|
+
{ question: "Why are some entities excluded from case detection?", answer: "High-frequency entities (e.g. a company name appearing on every document) and owner entities are excluded from BFS to prevent merging unrelated document groups into a single oversized case." }
|
|
9412
|
+
],
|
|
9413
|
+
mentions: ["linking graph", "bipartite graph", "BFS traversal", "document subgraph"]
|
|
8720
9414
|
},
|
|
8721
9415
|
{
|
|
8722
9416
|
slug: "list-cases",
|
|
@@ -8725,6 +9419,8 @@ var sections27 = [
|
|
|
8725
9419
|
seoTitle: "Backfill Linking \u2014 Talonic Docs",
|
|
8726
9420
|
description: "Trigger a backfill of the linking graph for all documents. Useful after link key configuration changes. Poll progress via the backfill progress endpoint.",
|
|
8727
9421
|
content: [
|
|
9422
|
+
{ type: "paragraph", text: "After changing link key configurations \u2014 such as classifying new fields as link keys or reclassifying existing ones \u2014 the linking graph needs to be rebuilt. The backfill endpoint scans all documents and reconstructs entity links based on the current link key set." },
|
|
9423
|
+
{ type: "callout", variant: "warning", text: "Backfill is serialized per organization. If a backfill is already running, the request is silently skipped. Poll the progress endpoint to monitor in-flight backfills." },
|
|
8728
9424
|
{
|
|
8729
9425
|
type: "endpoint",
|
|
8730
9426
|
method: "POST",
|
|
@@ -8750,6 +9446,7 @@ var sections27 = [
|
|
|
8750
9446
|
"message": "Backfill queued."
|
|
8751
9447
|
}`
|
|
8752
9448
|
},
|
|
9449
|
+
{ type: "paragraph", text: "Use the progress endpoint to monitor the backfill. The `running` field indicates whether the operation is still in progress, and `processed` / `total` track document-level progress." },
|
|
8753
9450
|
{
|
|
8754
9451
|
type: "endpoint",
|
|
8755
9452
|
method: "GET",
|
|
@@ -8792,10 +9489,14 @@ var sections27 = [
|
|
|
8792
9489
|
}
|
|
8793
9490
|
],
|
|
8794
9491
|
related: [
|
|
8795
|
-
{ label: "Link Keys", slug: "list-link-keys" }
|
|
9492
|
+
{ label: "Link Keys", slug: "list-link-keys" },
|
|
9493
|
+
{ label: "Classify", slug: "reclassify-link-key" }
|
|
8796
9494
|
],
|
|
8797
|
-
faq: [
|
|
8798
|
-
|
|
9495
|
+
faq: [
|
|
9496
|
+
{ question: "How long does a backfill take?", answer: "Duration depends on the number of documents in your workspace. Poll the progress endpoint to track completion. The backfill processes documents in batches." },
|
|
9497
|
+
{ question: "Is classification automatically triggered during backfill?", answer: "No. Backfill only rebuilds entity links using the current link key set. To classify new fields as link keys, call the Classify endpoint first, which will trigger its own backfill if new keys are found." }
|
|
9498
|
+
],
|
|
9499
|
+
mentions: ["backfill", "linking graph", "progress polling"]
|
|
8799
9500
|
},
|
|
8800
9501
|
{
|
|
8801
9502
|
slug: "get-case",
|
|
@@ -8804,6 +9505,12 @@ var sections27 = [
|
|
|
8804
9505
|
seoTitle: "List Cases Endpoint \u2014 Talonic Docs",
|
|
8805
9506
|
description: "List and retrieve cases \u2014 automatically created groups of 2+ related documents linked through shared field values with narrative summaries.",
|
|
8806
9507
|
content: [
|
|
9508
|
+
{ type: "paragraph", text: "Cases are automatically created groups of two or more documents that are connected through shared **transaction** or **reference** entity values. For example, an invoice, a purchase order, and a delivery note sharing the same PO number form a case. Cases provide a high-level view of document relationships without needing to navigate the full graph." },
|
|
9509
|
+
{ type: "list", ordered: false, items: [
|
|
9510
|
+
"Each case has a deterministic **case key** (hex hash of its document IDs)",
|
|
9511
|
+
"Cases are created by the linking pipeline during backfill or real-time processing",
|
|
9512
|
+
"Documents linked only through **identity** entities (e.g. vendor ID) appear as entity groups, not cases"
|
|
9513
|
+
] },
|
|
8807
9514
|
{
|
|
8808
9515
|
type: "endpoint",
|
|
8809
9516
|
method: "GET",
|
|
@@ -8855,10 +9562,15 @@ var sections27 = [
|
|
|
8855
9562
|
}
|
|
8856
9563
|
],
|
|
8857
9564
|
related: [
|
|
8858
|
-
{ label: "Link Keys", slug: "list-link-keys" }
|
|
9565
|
+
{ label: "Link Keys", slug: "list-link-keys" },
|
|
9566
|
+
{ label: "Case Graph", slug: "get-case-graph" },
|
|
9567
|
+
{ label: "Document-Case Map", slug: "refresh-cases" }
|
|
8859
9568
|
],
|
|
8860
|
-
faq: [
|
|
8861
|
-
|
|
9569
|
+
faq: [
|
|
9570
|
+
{ question: "How are cases different from entity groups?", answer: "Cases require at least one transaction or reference entity linking the documents (e.g. shared PO number). Entity groups are documents linked only through identity entities (e.g. same vendor ID) and do not form cases." },
|
|
9571
|
+
{ question: "Can a document belong to multiple cases?", answer: "No. Each document belongs to at most one case. The case key is a deterministic hash of the sorted document IDs in the group." }
|
|
9572
|
+
],
|
|
9573
|
+
mentions: ["cases", "document groups", "case key"]
|
|
8862
9574
|
},
|
|
8863
9575
|
{
|
|
8864
9576
|
slug: "get-case-graph",
|
|
@@ -8867,6 +9579,7 @@ var sections27 = [
|
|
|
8867
9579
|
seoTitle: "Case Graph \u2014 Talonic Docs",
|
|
8868
9580
|
description: "Retrieve the D3-compatible graph visualization for a single case, showing document nodes and entity edges within the case boundary.",
|
|
8869
9581
|
content: [
|
|
9582
|
+
{ type: "paragraph", text: "Retrieve the graph structure for a single case, formatted for **D3.js** or similar graph visualization libraries. The response contains only the nodes and edges within the case boundary, making it suitable for rendering focused relationship diagrams." },
|
|
8870
9583
|
{
|
|
8871
9584
|
type: "endpoint",
|
|
8872
9585
|
method: "GET",
|
|
@@ -8920,8 +9633,11 @@ var sections27 = [
|
|
|
8920
9633
|
{ label: "Cases", slug: "get-case" },
|
|
8921
9634
|
{ label: "Full Graph", slug: "list-linked-documents" }
|
|
8922
9635
|
],
|
|
8923
|
-
faq: [
|
|
8924
|
-
|
|
9636
|
+
faq: [
|
|
9637
|
+
{ question: "What graph format does the case graph use?", answer: "The response uses a nodes-and-edges structure compatible with D3.js force-directed graphs. Node IDs are stable across requests, so you can maintain layout state between refreshes." },
|
|
9638
|
+
{ question: "Does the case graph include excluded entities?", answer: "No. The case graph is scoped to the case boundary and only includes entities that contributed to forming the case. High-frequency entities excluded from BFS are not shown." }
|
|
9639
|
+
],
|
|
9640
|
+
mentions: ["case graph", "D3 visualization", "graph rendering"]
|
|
8925
9641
|
},
|
|
8926
9642
|
{
|
|
8927
9643
|
slug: "refresh-cases",
|
|
@@ -8930,6 +9646,8 @@ var sections27 = [
|
|
|
8930
9646
|
seoTitle: "Document-Case Map Endpoint \u2014 Talonic Docs",
|
|
8931
9647
|
description: "Get the mapping of documents to their resolved cases. Returns a mapping of document IDs to assigned case keys.",
|
|
8932
9648
|
content: [
|
|
9649
|
+
{ type: "paragraph", text: "The document-case map provides a flat lookup from document ID to case assignment. Use it to quickly determine which case a document belongs to, or to identify documents that are not part of any case. Documents in **entity groups** (linked only through identity entities) are included with `is_case: false`." },
|
|
9650
|
+
{ type: "callout", variant: "info", text: "Documents with `is_case: false` are linked to other documents only through identity entities (e.g. same vendor). They appear in the map but do not form a case. Documents with no links at all are not included in the map." },
|
|
8933
9651
|
{
|
|
8934
9652
|
type: "endpoint",
|
|
8935
9653
|
method: "GET",
|
|
@@ -8983,10 +9701,14 @@ var sections27 = [
|
|
|
8983
9701
|
}
|
|
8984
9702
|
],
|
|
8985
9703
|
related: [
|
|
8986
|
-
{ label: "Full Graph", slug: "list-linked-documents" }
|
|
9704
|
+
{ label: "Full Graph", slug: "list-linked-documents" },
|
|
9705
|
+
{ label: "Cases", slug: "get-case" }
|
|
8987
9706
|
],
|
|
8988
|
-
faq: [
|
|
8989
|
-
|
|
9707
|
+
faq: [
|
|
9708
|
+
{ question: "What does an empty case_key mean?", answer: "An empty string for case_key indicates the document is in an entity group (linked via identity entities only) but not in a case. The is_case field will be false." },
|
|
9709
|
+
{ question: "Are unlinked documents included in the map?", answer: "No. Only documents with at least one entity link appear in the map. Completely unlinked documents are omitted." }
|
|
9710
|
+
],
|
|
9711
|
+
mentions: ["document-case mapping", "case assignment", "entity groups"]
|
|
8990
9712
|
}
|
|
8991
9713
|
];
|
|
8992
9714
|
|
|
@@ -9000,6 +9722,15 @@ var sections28 = [
|
|
|
9000
9722
|
description: "Get an aggregate N-Shot summary for a run comparing field-level extraction quality across schema versions.",
|
|
9001
9723
|
content: [
|
|
9002
9724
|
{ type: "paragraph", text: "N-Shot endpoints provide field-level comparisons between job runs \u2014 useful for evaluating extraction quality across schema versions. Submit judge decisions (human or AI) to record which run produced the better result. All routes are nested under `/v1/jobs/runs/{runId}/nshot/...`." },
|
|
9725
|
+
{ type: "paragraph", text: "The summary endpoint returns aggregate statistics for all N-Shot comparisons in a run: total comparisons, agreement breakdown (green/yellow/red), override count, and overall agreement rate. Use this to quickly assess whether a schema change improved or degraded extraction quality." },
|
|
9726
|
+
{
|
|
9727
|
+
type: "list",
|
|
9728
|
+
items: [
|
|
9729
|
+
"**Green** \u2014 all shots produced the same value (high confidence)",
|
|
9730
|
+
"**Yellow** \u2014 partial agreement between shots (majority value exists but not unanimous)",
|
|
9731
|
+
"**Red** \u2014 no agreement between shots (each shot produced a different value)"
|
|
9732
|
+
]
|
|
9733
|
+
},
|
|
9003
9734
|
{
|
|
9004
9735
|
type: "endpoint",
|
|
9005
9736
|
method: "GET",
|
|
@@ -9052,8 +9783,12 @@ var sections28 = [
|
|
|
9052
9783
|
{ label: "Comparisons", slug: "nshot-list-shots" },
|
|
9053
9784
|
{ label: "Judge Decision", slug: "nshot-judge-decision" }
|
|
9054
9785
|
],
|
|
9055
|
-
faq: [
|
|
9056
|
-
|
|
9786
|
+
faq: [
|
|
9787
|
+
{ question: "What is a good agreement_rate?", answer: "An agreement rate above 0.90 indicates stable extraction. Rates between 0.75-0.90 suggest the schema needs tuning. Below 0.75 typically indicates structural issues with the schema or inconsistent source documents." },
|
|
9788
|
+
{ question: "How many shots are typically used?", answer: "Three shots is the default and most common configuration. This provides a reliable majority vote while keeping cost manageable." },
|
|
9789
|
+
{ question: "Does the summary update as I submit judge decisions?", answer: "Yes. The `overridden` count increments with each accepted judge decision. The agreement breakdown (green/yellow/red) reflects the original shot outcomes and does not change when overrides are applied." }
|
|
9790
|
+
],
|
|
9791
|
+
mentions: ["N-Shot", "extraction quality", "agreement rate", "field comparison"]
|
|
9057
9792
|
},
|
|
9058
9793
|
{
|
|
9059
9794
|
slug: "nshot-list-shots",
|
|
@@ -9062,6 +9797,7 @@ var sections28 = [
|
|
|
9062
9797
|
seoTitle: "N-Shot Comparisons Endpoint \u2014 Talonic Docs",
|
|
9063
9798
|
description: "List per-document field comparisons for N-Shot evaluation across job runs. Returns all comparisons for a specific job run.",
|
|
9064
9799
|
content: [
|
|
9800
|
+
{ type: "paragraph", text: "Retrieve all per-document field comparisons for a job run. Each comparison shows the values produced by each shot, the agreement status (green/yellow/red), the majority value, and any override or judge decision that has been applied. Use this to drill into specific fields and understand where extraction diverges across shots." },
|
|
9065
9801
|
{
|
|
9066
9802
|
type: "endpoint",
|
|
9067
9803
|
method: "GET",
|
|
@@ -9135,8 +9871,11 @@ var sections28 = [
|
|
|
9135
9871
|
{ label: "Single Comparison", slug: "nshot-compare" },
|
|
9136
9872
|
{ label: "Summary", slug: "nshot-summary" }
|
|
9137
9873
|
],
|
|
9138
|
-
faq: [
|
|
9139
|
-
|
|
9874
|
+
faq: [
|
|
9875
|
+
{ question: "What comparison methods are available?", answer: "Currently `exact` (string equality after normalization) and `semantic` (embedding-based similarity). The method is chosen automatically based on the field data type." },
|
|
9876
|
+
{ question: "How do I find comparisons that need attention?", answer: 'Filter for `status: "red"` comparisons first (no agreement), then `status: "yellow"` (partial agreement). Green comparisons are confident and typically need no review.' }
|
|
9877
|
+
],
|
|
9878
|
+
mentions: ["N-Shot comparisons", "field agreement", "per-document comparison"]
|
|
9140
9879
|
},
|
|
9141
9880
|
{
|
|
9142
9881
|
slug: "nshot-compare",
|
|
@@ -9145,6 +9884,7 @@ var sections28 = [
|
|
|
9145
9884
|
seoTitle: "N-Shot Single Comparison \u2014 Talonic Docs",
|
|
9146
9885
|
description: "Get a specific field comparison filtered by document and field name. Returns a single N-Shot comparison for detailed evaluation.",
|
|
9147
9886
|
content: [
|
|
9887
|
+
{ type: "paragraph", text: "Retrieve a single N-Shot comparison for a specific document and field. Use this endpoint when you need detailed information about one particular comparison, including per-shot values, any existing override, and the LLM judge recommendation." },
|
|
9148
9888
|
{
|
|
9149
9889
|
type: "endpoint",
|
|
9150
9890
|
method: "GET",
|
|
@@ -9228,8 +9968,11 @@ var sections28 = [
|
|
|
9228
9968
|
{ label: "Comparisons", slug: "nshot-list-shots" },
|
|
9229
9969
|
{ label: "Override", slug: "nshot-select" }
|
|
9230
9970
|
],
|
|
9231
|
-
faq: [
|
|
9232
|
-
|
|
9971
|
+
faq: [
|
|
9972
|
+
{ question: "What does a judgement with accepted: null mean?", answer: "The LLM judge has produced a recommendation but no human or API decision has been submitted yet. Use the judge decision endpoint to accept or decline the recommendation." },
|
|
9973
|
+
{ question: "Can a comparison have both an override and a judgement?", answer: "Yes. If a judge decision is accepted, an override is automatically created from the recommended shot. A manual override can also coexist with a pending (not-yet-decided) judgement." }
|
|
9974
|
+
],
|
|
9975
|
+
mentions: ["N-Shot comparison", "single comparison"]
|
|
9233
9976
|
},
|
|
9234
9977
|
{
|
|
9235
9978
|
slug: "nshot-select",
|
|
@@ -9238,6 +9981,8 @@ var sections28 = [
|
|
|
9238
9981
|
seoTitle: "N-Shot Override Endpoint \u2014 Talonic Docs",
|
|
9239
9982
|
description: "Manually override the N-Shot selected value for a specific document-field pair. Requires write scope.",
|
|
9240
9983
|
content: [
|
|
9984
|
+
{ type: "paragraph", text: "Manually override the selected value for a specific document-field comparison by choosing a specific shot number. The override is recorded with an audit trail including the actor, timestamp, original value, and new value. Use this when the majority value is incorrect and you want to select a different shot's extraction." },
|
|
9985
|
+
{ type: "callout", variant: "info", text: 'The `selected_shot` must be a valid shot number from the comparison\'s `values` array. The override records `actor_id` as `"api"` for all API-initiated overrides.' },
|
|
9241
9986
|
{
|
|
9242
9987
|
type: "endpoint",
|
|
9243
9988
|
method: "POST",
|
|
@@ -9325,8 +10070,11 @@ var sections28 = [
|
|
|
9325
10070
|
{ label: "Single Comparison", slug: "nshot-compare" },
|
|
9326
10071
|
{ label: "Judge Decision", slug: "nshot-judge-decision" }
|
|
9327
10072
|
],
|
|
9328
|
-
faq: [
|
|
9329
|
-
|
|
10073
|
+
faq: [
|
|
10074
|
+
{ question: "Can I override the same comparison multiple times?", answer: "Yes. Each override replaces the previous one. The `from_value` in the latest override reflects the value before the most recent change, not the original majority value." },
|
|
10075
|
+
{ question: "Does an override change the agreement status?", answer: "No. The `status` (green/yellow/red) reflects the original shot agreement and does not change when an override is applied. Overrides are tracked separately." }
|
|
10076
|
+
],
|
|
10077
|
+
mentions: ["N-Shot override", "manual override"]
|
|
9330
10078
|
},
|
|
9331
10079
|
{
|
|
9332
10080
|
slug: "nshot-judge-decision",
|
|
@@ -9335,6 +10083,8 @@ var sections28 = [
|
|
|
9335
10083
|
seoTitle: "N-Shot Judge Decision \u2014 Talonic Docs",
|
|
9336
10084
|
description: "Submit a judge decision (human or AI) for an N-Shot comparison to record which candidate produced the correct extraction result.",
|
|
9337
10085
|
content: [
|
|
10086
|
+
{ type: "paragraph", text: "Submit a decision to accept or decline the LLM judge's recommendation for a specific comparison. When `accepted` is `true`, the recommended shot value is automatically applied as an override. When `false`, the recommendation is recorded as declined and no override is applied. Use this to efficiently review LLM suggestions at scale." },
|
|
10087
|
+
{ type: "callout", variant: "info", text: 'Accepting a judge decision automatically creates an override with `actor_id: "judge"`. You can still manually override the value afterwards using the override endpoint.' },
|
|
9338
10088
|
{
|
|
9339
10089
|
type: "endpoint",
|
|
9340
10090
|
method: "POST",
|
|
@@ -9426,8 +10176,11 @@ var sections28 = [
|
|
|
9426
10176
|
{ label: "Summary", slug: "nshot-summary" },
|
|
9427
10177
|
{ label: "Override", slug: "nshot-select" }
|
|
9428
10178
|
],
|
|
9429
|
-
faq: [
|
|
9430
|
-
|
|
10179
|
+
faq: [
|
|
10180
|
+
{ question: "What happens if there is no LLM judge recommendation to accept?", answer: "If the comparison has no `judgement` object (or `recommended_shot` is null), the endpoint returns a 404. Only comparisons with existing LLM judge recommendations can receive decisions." },
|
|
10181
|
+
{ question: "Can I change a judge decision after submitting it?", answer: "Yes. Submit a new judge decision with the opposite `accepted` value. If you previously accepted and the override was applied, declining will not remove the override \u2014 use the override endpoint to change it manually." }
|
|
10182
|
+
],
|
|
10183
|
+
mentions: ["judge decision", "N-Shot evaluation", "LLM judge"]
|
|
9431
10184
|
}
|
|
9432
10185
|
];
|
|
9433
10186
|
|
|
@@ -9441,6 +10194,7 @@ var sections29 = [
|
|
|
9441
10194
|
description: "List all schema graph classes in the versioned ontology of document classes discovered across your workspace with field counts and version info.",
|
|
9442
10195
|
content: [
|
|
9443
10196
|
{ type: "paragraph", text: "The schema graph is a versioned ontology of document classes discovered across your workspace. Each class captures a document type's canonical fields. The API exposes versioned classes, diffs proposed between versions (with approve/reject workflow), inter-class edges, aliases, and a D3-compatible visualization payload." },
|
|
10197
|
+
{ type: "paragraph", text: "Use this endpoint to retrieve all schema graph classes for your organization. Classes are created automatically as the platform processes documents and discovers recurring field patterns. Each class tracks its version history and links to the field registry." },
|
|
9444
10198
|
{
|
|
9445
10199
|
type: "endpoint",
|
|
9446
10200
|
method: "GET",
|
|
@@ -9503,8 +10257,12 @@ var sections29 = [
|
|
|
9503
10257
|
{ label: "Get Class", slug: "get-schema-graph-class" },
|
|
9504
10258
|
{ label: "List Versions", slug: "list-class-versions" }
|
|
9505
10259
|
],
|
|
9506
|
-
faq: [
|
|
9507
|
-
|
|
10260
|
+
faq: [
|
|
10261
|
+
{ question: "Are schema graph classes created automatically?", answer: "Yes. Classes are generated automatically as the platform discovers recurring document types and their field patterns during extraction. You do not need to create them manually." },
|
|
10262
|
+
{ question: "What does the current_version_id represent?", answer: "It points to the latest approved version of the class. Each time a diff is approved, a new version is created and `current_version_id` is updated to point to it." },
|
|
10263
|
+
{ question: "How do schema graph classes relate to user schemas?", answer: "Schema graph classes represent discovered document types in the ontology. User schemas are manually defined output schemas for extraction jobs. The two are linked through the field registry \u2014 both reference the same canonical field definitions." }
|
|
10264
|
+
],
|
|
10265
|
+
mentions: ["schema graph", "document classes", "ontology", "versioned classes"]
|
|
9508
10266
|
},
|
|
9509
10267
|
{
|
|
9510
10268
|
slug: "get-schema-graph-class",
|
|
@@ -9513,6 +10271,7 @@ var sections29 = [
|
|
|
9513
10271
|
seoTitle: "Get Schema Graph Class \u2014 Talonic Docs",
|
|
9514
10272
|
description: "Retrieve a schema graph class by ID with its current field definitions, version number, document count, and links to version history. Requires read scope.",
|
|
9515
10273
|
content: [
|
|
10274
|
+
{ type: "paragraph", text: "Retrieve a single schema graph class by its UUID. The response includes the class metadata, its current active version, and links to the full version history. Use this to inspect a specific document type's canonical field structure." },
|
|
9516
10275
|
{
|
|
9517
10276
|
type: "endpoint",
|
|
9518
10277
|
method: "GET",
|
|
@@ -9571,8 +10330,11 @@ var sections29 = [
|
|
|
9571
10330
|
{ label: "List Classes", slug: "list-schema-graph-classes" },
|
|
9572
10331
|
{ label: "List Versions", slug: "list-class-versions" }
|
|
9573
10332
|
],
|
|
9574
|
-
faq: [
|
|
9575
|
-
|
|
10333
|
+
faq: [
|
|
10334
|
+
{ question: "Can a class have no current version?", answer: "Yes. A newly discovered class may have `current_version_id` set to `null` until the first version is published via the diff approval workflow." },
|
|
10335
|
+
{ question: "What does the document_type_id field link to?", answer: "It links to a `DocumentType` entity in the extraction system. This association connects the schema graph ontology to the document classification pipeline." }
|
|
10336
|
+
],
|
|
10337
|
+
mentions: ["schema graph class", "class detail"]
|
|
9576
10338
|
},
|
|
9577
10339
|
{
|
|
9578
10340
|
slug: "list-class-versions",
|
|
@@ -9581,6 +10343,7 @@ var sections29 = [
|
|
|
9581
10343
|
seoTitle: "Schema Graph Class Versions \u2014 Talonic Docs",
|
|
9582
10344
|
description: "List all published versions of a schema graph class ordered by version number descending with field definitions for each version.",
|
|
9583
10345
|
content: [
|
|
10346
|
+
{ type: "paragraph", text: "Retrieve the complete version history of a schema graph class. Each version captures a snapshot of the class's JSON Schema definition and its associated field registry IDs at the time the version was published. Versions are created when diffs are approved, and version numbers increment monotonically." },
|
|
9584
10347
|
{
|
|
9585
10348
|
type: "endpoint",
|
|
9586
10349
|
method: "GET",
|
|
@@ -9641,8 +10404,11 @@ var sections29 = [
|
|
|
9641
10404
|
{ label: "Get Version", slug: "get-class-version" },
|
|
9642
10405
|
{ label: "List Diffs", slug: "list-schema-graph-diffs" }
|
|
9643
10406
|
],
|
|
9644
|
-
faq: [
|
|
9645
|
-
|
|
10407
|
+
faq: [
|
|
10408
|
+
{ question: "How are new versions created?", answer: "New versions are created when a pending diff is approved via `POST /v1/schema-graph/diffs/{id}/approve`. The diff's field changes are applied and a new version snapshot is published." },
|
|
10409
|
+
{ question: "Can I roll back to a previous version?", answer: "There is no direct rollback endpoint. To revert changes, create a new diff that reverses the unwanted field modifications and approve it to produce a new version." }
|
|
10410
|
+
],
|
|
10411
|
+
mentions: ["class versions", "version history", "JSON Schema"]
|
|
9646
10412
|
},
|
|
9647
10413
|
{
|
|
9648
10414
|
slug: "get-class-version",
|
|
@@ -9651,6 +10417,7 @@ var sections29 = [
|
|
|
9651
10417
|
seoTitle: "Get Schema Graph Class Version \u2014 Talonic Docs",
|
|
9652
10418
|
description: "Retrieve a specific version of a schema graph class by class ID and version number. Requires read scope.",
|
|
9653
10419
|
content: [
|
|
10420
|
+
{ type: "paragraph", text: "Retrieve a specific version of a schema graph class by its class ID and version number. Use this to inspect the exact JSON Schema definition and field composition that was active at a particular point in the class's evolution." },
|
|
9654
10421
|
{
|
|
9655
10422
|
type: "endpoint",
|
|
9656
10423
|
method: "GET",
|
|
@@ -9704,8 +10471,11 @@ var sections29 = [
|
|
|
9704
10471
|
related: [
|
|
9705
10472
|
{ label: "List Versions", slug: "list-class-versions" }
|
|
9706
10473
|
],
|
|
9707
|
-
faq: [
|
|
9708
|
-
|
|
10474
|
+
faq: [
|
|
10475
|
+
{ question: "What is the version number path parameter?", answer: "It is the integer version number (e.g. `1`, `2`, `3`), not the version UUID. Use the list versions endpoint to discover available version numbers." },
|
|
10476
|
+
{ question: "Does the json_schema field contain a valid JSON Schema?", answer: "Yes. It is a standard JSON Schema object with `type`, `properties`, and optionally `required` arrays. You can use it directly for validation or code generation." }
|
|
10477
|
+
],
|
|
10478
|
+
mentions: ["class version", "specific version"]
|
|
9709
10479
|
},
|
|
9710
10480
|
{
|
|
9711
10481
|
slug: "list-schema-graph-diffs",
|
|
@@ -9714,6 +10484,8 @@ var sections29 = [
|
|
|
9714
10484
|
seoTitle: "Schema Graph Diffs \u2014 Talonic Docs",
|
|
9715
10485
|
description: "List pending, approved, and rejected diffs between schema graph class versions. Shows proposed changes for the approve/reject workflow.",
|
|
9716
10486
|
content: [
|
|
10487
|
+
{ type: "paragraph", text: "Diffs represent proposed changes between schema graph class versions. When the platform discovers new fields or detects field type changes, it creates a diff that can be reviewed and either approved (promoting the changes to a new version) or rejected (discarding them). This endpoint returns all diffs, optionally filtered by class or review status." },
|
|
10488
|
+
{ type: "callout", variant: "info", text: "Diffs are classified as `additive` (new fields only) or `breaking` (field removals or type changes). Breaking diffs may affect downstream extraction jobs that depend on the removed fields." },
|
|
9717
10489
|
{
|
|
9718
10490
|
type: "endpoint",
|
|
9719
10491
|
method: "GET",
|
|
@@ -9785,8 +10557,11 @@ var sections29 = [
|
|
|
9785
10557
|
{ label: "Approve Diff", slug: "approve-diff" },
|
|
9786
10558
|
{ label: "Reject Diff", slug: "reject-diff" }
|
|
9787
10559
|
],
|
|
9788
|
-
faq: [
|
|
9789
|
-
|
|
10560
|
+
faq: [
|
|
10561
|
+
{ question: "How are diffs generated?", answer: "Diffs are generated automatically when the platform detects field changes during extraction. When new documents introduce fields not present in the current class version, a diff is created and set to `pending` for review." },
|
|
10562
|
+
{ question: "What happens to pending diffs if I approve a newer diff first?", answer: "Pending diffs reference specific version numbers. If the class advances past a pending diff's `to_version`, the diff becomes stale. Review and reject stale diffs to keep the queue clean." }
|
|
10563
|
+
],
|
|
10564
|
+
mentions: ["schema diffs", "version comparison", "additive", "breaking"]
|
|
9790
10565
|
},
|
|
9791
10566
|
{
|
|
9792
10567
|
slug: "approve-diff",
|
|
@@ -9795,6 +10570,8 @@ var sections29 = [
|
|
|
9795
10570
|
seoTitle: "Approve Schema Graph Diff \u2014 Talonic Docs",
|
|
9796
10571
|
description: "Approve a pending diff to promote proposed changes to the next live class version. Requires write scope.",
|
|
9797
10572
|
content: [
|
|
10573
|
+
{ type: "paragraph", text: "Approve a pending diff to promote its proposed field changes into a new class version. Approving a diff updates the class's `current_version_id` and publishes a new version snapshot with the diff's added fields, removed fields, and type changes applied." },
|
|
10574
|
+
{ type: "callout", variant: "warning", text: "Approving a `breaking` diff (one that removes fields or changes types) may affect downstream extraction jobs. Review the `removed_fields` and `type_changes` arrays before approving." },
|
|
9798
10575
|
{
|
|
9799
10576
|
type: "endpoint",
|
|
9800
10577
|
method: "POST",
|
|
@@ -9835,8 +10612,11 @@ var sections29 = [
|
|
|
9835
10612
|
{ label: "List Diffs", slug: "list-schema-graph-diffs" },
|
|
9836
10613
|
{ label: "Reject Diff", slug: "reject-diff" }
|
|
9837
10614
|
],
|
|
9838
|
-
faq: [
|
|
9839
|
-
|
|
10615
|
+
faq: [
|
|
10616
|
+
{ question: "Can I approve a diff that is not in pending status?", answer: 'No. Only diffs with `review_status: "pending"` can be approved. Already approved or rejected diffs return a 404 or are ignored.' },
|
|
10617
|
+
{ question: "Does approving a diff immediately update the class version?", answer: "Yes. Approval is synchronous. The new version is created and `current_version_id` is updated in the same request." }
|
|
10618
|
+
],
|
|
10619
|
+
mentions: ["approve diff", "version promotion"]
|
|
9840
10620
|
},
|
|
9841
10621
|
{
|
|
9842
10622
|
slug: "reject-diff",
|
|
@@ -9845,6 +10625,7 @@ var sections29 = [
|
|
|
9845
10625
|
seoTitle: "Reject Schema Graph Diff \u2014 Talonic Docs",
|
|
9846
10626
|
description: "Reject a pending diff to discard proposed changes to a schema graph class version. Requires write scope.",
|
|
9847
10627
|
content: [
|
|
10628
|
+
{ type: "paragraph", text: "Reject a pending diff to discard its proposed field changes. The class version remains unchanged, and the diff is marked as `rejected` for audit purposes. Rejected diffs are retained in the history and can be reviewed later but cannot be re-approved." },
|
|
9848
10629
|
{
|
|
9849
10630
|
type: "endpoint",
|
|
9850
10631
|
method: "POST",
|
|
@@ -9885,8 +10666,11 @@ var sections29 = [
|
|
|
9885
10666
|
{ label: "List Diffs", slug: "list-schema-graph-diffs" },
|
|
9886
10667
|
{ label: "Approve Diff", slug: "approve-diff" }
|
|
9887
10668
|
],
|
|
9888
|
-
faq: [
|
|
9889
|
-
|
|
10669
|
+
faq: [
|
|
10670
|
+
{ question: "Can I re-approve a rejected diff?", answer: "No. Once rejected, a diff cannot be re-approved. If the same field changes are needed later, a new diff will be generated automatically during the next extraction cycle." },
|
|
10671
|
+
{ question: "Does rejecting a diff affect the current class version?", answer: "No. The class version remains unchanged. Rejection only marks the proposed changes as discarded." }
|
|
10672
|
+
],
|
|
10673
|
+
mentions: ["reject diff", "discard changes"]
|
|
9890
10674
|
},
|
|
9891
10675
|
{
|
|
9892
10676
|
slug: "list-schema-graph-edges",
|
|
@@ -9895,6 +10679,8 @@ var sections29 = [
|
|
|
9895
10679
|
seoTitle: "Schema Graph Edges \u2014 Talonic Docs",
|
|
9896
10680
|
description: "List inter-class edges (relationships) between schema graph classes with relationship type and weight information.",
|
|
9897
10681
|
content: [
|
|
10682
|
+
{ type: "paragraph", text: "Edges represent relationships between schema graph classes, computed using cosine similarity between field embeddings. Each edge captures a directional relationship (e.g. an Invoice class `references` a Purchase Order class) with a weight indicating the strength of the relationship. Use edges to understand how document types relate to each other across your workspace." },
|
|
10683
|
+
{ type: "callout", variant: "info", text: "Edge weights range from 0 to 1. Higher weights indicate stronger field overlap between classes. Edges below a minimum weight threshold are automatically pruned and will not appear in results." },
|
|
9898
10684
|
{
|
|
9899
10685
|
type: "endpoint",
|
|
9900
10686
|
method: "GET",
|
|
@@ -9955,8 +10741,11 @@ var sections29 = [
|
|
|
9955
10741
|
{ label: "List Classes", slug: "list-schema-graph-classes" },
|
|
9956
10742
|
{ label: "Visualize", slug: "visualize-schema-graph" }
|
|
9957
10743
|
],
|
|
9958
|
-
faq: [
|
|
9959
|
-
|
|
10744
|
+
faq: [
|
|
10745
|
+
{ question: "How are edge weights computed?", answer: "Edge weights are derived from cosine similarity between field embeddings of the two classes. A weight of 0.87 means the two classes share approximately 87% field overlap." },
|
|
10746
|
+
{ question: "Are edges directional?", answer: "Yes. An edge from class A to class B means A references B. The reverse relationship may also exist as a separate edge with a different weight." }
|
|
10747
|
+
],
|
|
10748
|
+
mentions: ["schema graph edges", "relationships", "cosine similarity", "field overlap"]
|
|
9960
10749
|
},
|
|
9961
10750
|
{
|
|
9962
10751
|
slug: "list-schema-graph-aliases",
|
|
@@ -9965,6 +10754,7 @@ var sections29 = [
|
|
|
9965
10754
|
seoTitle: "Schema Graph Aliases \u2014 Talonic Docs",
|
|
9966
10755
|
description: "List all class aliases \u2014 alternative names that map to canonical class IDs in the schema graph ontology.",
|
|
9967
10756
|
content: [
|
|
10757
|
+
{ type: "paragraph", text: 'Aliases map alternative names to canonical schema graph classes. For example, "Bill" might be an alias for the "Invoice" class. The platform uses aliases during document classification to resolve variant names to their canonical class. Use this endpoint to audit or inspect the alias mappings for your organization.' },
|
|
9968
10758
|
{
|
|
9969
10759
|
type: "endpoint",
|
|
9970
10760
|
method: "GET",
|
|
@@ -10014,8 +10804,11 @@ var sections29 = [
|
|
|
10014
10804
|
related: [
|
|
10015
10805
|
{ label: "List Classes", slug: "list-schema-graph-classes" }
|
|
10016
10806
|
],
|
|
10017
|
-
faq: [
|
|
10018
|
-
|
|
10807
|
+
faq: [
|
|
10808
|
+
{ question: "Are aliases case-sensitive?", answer: 'No. Alias matching during classification is case-insensitive. "bill", "Bill", and "BILL" all resolve to the same canonical class.' },
|
|
10809
|
+
{ question: "How are aliases created?", answer: "Aliases are generated automatically when the platform encounters documents with variant type labels that resolve to the same canonical class during extraction." }
|
|
10810
|
+
],
|
|
10811
|
+
mentions: ["class aliases", "alternative names", "name mapping"]
|
|
10019
10812
|
},
|
|
10020
10813
|
{
|
|
10021
10814
|
slug: "visualize-schema-graph",
|
|
@@ -10024,6 +10817,7 @@ var sections29 = [
|
|
|
10024
10817
|
seoTitle: "Visualize Schema Graph \u2014 Talonic Docs",
|
|
10025
10818
|
description: "Get D3-compatible visualization data for the schema graph with nodes and edges formatted for graph rendering.",
|
|
10026
10819
|
content: [
|
|
10820
|
+
{ type: "paragraph", text: "Retrieve the entire schema graph as a D3-compatible payload with nodes (classes) and edges (relationships). The response is structured for direct consumption by graph visualization libraries such as D3.js, Cytoscape, or vis.js. Nodes include full class metadata; edges use `source` and `target` fields referencing node IDs." },
|
|
10027
10821
|
{
|
|
10028
10822
|
type: "endpoint",
|
|
10029
10823
|
method: "GET",
|
|
@@ -10094,8 +10888,11 @@ var sections29 = [
|
|
|
10094
10888
|
{ label: "Edges", slug: "list-schema-graph-edges" },
|
|
10095
10889
|
{ label: "List Classes", slug: "list-schema-graph-classes" }
|
|
10096
10890
|
],
|
|
10097
|
-
faq: [
|
|
10098
|
-
|
|
10891
|
+
faq: [
|
|
10892
|
+
{ question: "Can I use this response directly with D3.js force-directed graphs?", answer: "Yes. The `nodes` and `edges` arrays are structured for direct use with D3 force simulations. Edge `source` and `target` fields reference node `id` values." },
|
|
10893
|
+
{ question: "Does the visualization endpoint include archived classes?", answer: "Yes. All classes are returned regardless of status. Filter by `status` on the client side if you want to exclude archived nodes from the visualization." }
|
|
10894
|
+
],
|
|
10895
|
+
mentions: ["D3 visualization", "graph rendering", "force-directed graph"]
|
|
10099
10896
|
}
|
|
10100
10897
|
];
|
|
10101
10898
|
|
|
@@ -10109,6 +10906,16 @@ var sections30 = [
|
|
|
10109
10906
|
description: "List validation checks in the structuring pipeline. Checks define rules like field_format, value_range, cross_field, and ai_coherence.",
|
|
10110
10907
|
content: [
|
|
10111
10908
|
{ type: "paragraph", text: "The structuring pipeline validates extracted data through configurable checks and approval gates. Checks define validation rules; gates aggregate checks and determine whether records require manual approval before delivery. Also exposes per-result check outcomes, the pending-approvals queue, approve/reject actions, and the manual delivery trigger for an approved run." },
|
|
10909
|
+
{ type: "paragraph", text: "Use this endpoint to retrieve all validation checks configured for your organization. Checks are evaluated against every structuring result to flag data quality issues before delivery. You can scope checks to a specific schema to apply different validation logic per document type." },
|
|
10910
|
+
{
|
|
10911
|
+
type: "list",
|
|
10912
|
+
items: [
|
|
10913
|
+
"**field_format** \u2014 validates that a field value matches an expected pattern (e.g. date format, currency code)",
|
|
10914
|
+
"**value_range** \u2014 ensures numeric or date values fall within defined bounds",
|
|
10915
|
+
"**cross_field** \u2014 validates relationships between two or more fields (e.g. end_date > start_date)",
|
|
10916
|
+
"**ai_coherence** \u2014 uses an LLM to assess whether extracted values are semantically plausible"
|
|
10917
|
+
]
|
|
10918
|
+
},
|
|
10112
10919
|
{
|
|
10113
10920
|
type: "endpoint",
|
|
10114
10921
|
method: "GET",
|
|
@@ -10183,8 +10990,12 @@ var sections30 = [
|
|
|
10183
10990
|
{ label: "Create Check", slug: "create-structuring-check" },
|
|
10184
10991
|
{ label: "List Gates", slug: "list-structuring-gates" }
|
|
10185
10992
|
],
|
|
10186
|
-
faq: [
|
|
10187
|
-
|
|
10993
|
+
faq: [
|
|
10994
|
+
{ question: "What is the difference between warning and error severity?", answer: "A `warning` check flags an issue but does not block delivery. An `error` check causes the result to fail the gate and require manual approval before it can be delivered." },
|
|
10995
|
+
{ question: "Can I create checks that apply to all schemas?", answer: "Yes. Omit the `user_schema_id` field when creating a check and it will apply globally to all structuring results regardless of schema." },
|
|
10996
|
+
{ question: "How are checks ordered during evaluation?", answer: "Checks are evaluated in `sort_order` ascending, then by `created_at`. You can control evaluation priority by setting `sort_order` on each check." }
|
|
10997
|
+
],
|
|
10998
|
+
mentions: ["structuring checks", "validation", "field_format", "value_range", "cross_field", "ai_coherence"]
|
|
10188
10999
|
},
|
|
10189
11000
|
{
|
|
10190
11001
|
slug: "create-structuring-check",
|
|
@@ -10193,6 +11004,8 @@ var sections30 = [
|
|
|
10193
11004
|
seoTitle: "Create Structuring Check \u2014 Talonic Docs",
|
|
10194
11005
|
description: "Create a validation check for the structuring pipeline. Supports field_format, value_range, cross_field, and ai_coherence check types.",
|
|
10195
11006
|
content: [
|
|
11007
|
+
{ type: "paragraph", text: "Create a new validation check to enforce data quality rules on structuring results. Each check targets a specific validation type and can be scoped to a single schema or applied globally. Newly created checks are automatically active and will run against all future structuring results." },
|
|
11008
|
+
{ type: "callout", variant: "info", text: "The `config` object shape depends on the check `type`. For `value_range`, provide `field`, `min`, and `max`. For `field_format`, provide `field` and `pattern`. For `cross_field`, provide `fields` and a `rule` expression. For `ai_coherence`, no config is required." },
|
|
10196
11009
|
{
|
|
10197
11010
|
type: "endpoint",
|
|
10198
11011
|
method: "POST",
|
|
@@ -10281,8 +11094,11 @@ var sections30 = [
|
|
|
10281
11094
|
{ label: "List Checks", slug: "list-structuring-checks" },
|
|
10282
11095
|
{ label: "Get Check", slug: "get-structuring-check" }
|
|
10283
11096
|
],
|
|
10284
|
-
faq: [
|
|
10285
|
-
|
|
11097
|
+
faq: [
|
|
11098
|
+
{ question: "Can I create a check without a config object?", answer: "Yes, for `ai_coherence` checks. For `value_range`, `field_format`, and `cross_field` checks, the `config` object defines the validation logic and is effectively required." },
|
|
11099
|
+
{ question: "What happens if I use a master-view API key?", answer: "Create operations require a customer-scoped API key. Using a master-view key returns a `400 bad_request` error because the system cannot determine which organization to associate the check with." }
|
|
11100
|
+
],
|
|
11101
|
+
mentions: ["validation check", "value_range", "create check", "field_format"]
|
|
10286
11102
|
},
|
|
10287
11103
|
{
|
|
10288
11104
|
slug: "get-structuring-check",
|
|
@@ -10291,6 +11107,8 @@ var sections30 = [
|
|
|
10291
11107
|
seoTitle: "Manage Structuring Check \u2014 Talonic Docs",
|
|
10292
11108
|
description: "Get, update, or delete a structuring check. Same path supports GET (detail), PUT (update), and DELETE operations with appropriate scopes.",
|
|
10293
11109
|
content: [
|
|
11110
|
+
{ type: "paragraph", text: "Retrieve, update, or remove a validation check by its UUID. Use **GET** to inspect a check's current configuration, **PUT** to modify its name, severity, config, or active status, and **DELETE** to soft-delete it. Soft-deleted checks set `is_active` to `false` and stop running against new results, but their historical check outcomes remain intact." },
|
|
11111
|
+
{ type: "callout", variant: "warning", text: "DELETE is a soft-delete. The check is deactivated (`is_active = false`) rather than permanently removed. Existing result check outcomes referencing this check are preserved." },
|
|
10294
11112
|
{
|
|
10295
11113
|
type: "endpoint",
|
|
10296
11114
|
method: "GET",
|
|
@@ -10360,8 +11178,11 @@ var sections30 = [
|
|
|
10360
11178
|
{ label: "List Checks", slug: "list-structuring-checks" },
|
|
10361
11179
|
{ label: "Create Check", slug: "create-structuring-check" }
|
|
10362
11180
|
],
|
|
10363
|
-
faq: [
|
|
10364
|
-
|
|
11181
|
+
faq: [
|
|
11182
|
+
{ question: "Can I reactivate a deleted check?", answer: "Yes. Since DELETE is a soft-delete, you can use PUT on the same check ID to set `is_active` back to `true`." },
|
|
11183
|
+
{ question: "Does updating a check re-evaluate existing results?", answer: "No. Updates only affect future structuring results. Historical check outcomes are immutable." }
|
|
11184
|
+
],
|
|
11185
|
+
mentions: ["manage check", "soft-delete", "update check"]
|
|
10365
11186
|
},
|
|
10366
11187
|
{
|
|
10367
11188
|
slug: "list-structuring-gates",
|
|
@@ -10370,6 +11191,8 @@ var sections30 = [
|
|
|
10370
11191
|
seoTitle: "List Structuring Gates \u2014 Talonic Docs",
|
|
10371
11192
|
description: "List approval gates that aggregate validation checks and control whether records require manual approval before delivery.",
|
|
10372
11193
|
content: [
|
|
11194
|
+
{ type: "paragraph", text: "Approval gates sit between extraction and delivery. Each gate aggregates one or more rules (e.g. minimum confidence threshold, validation pass rate) and decides whether structuring results are auto-approved for delivery or queued for human review. Gates can be scoped to a specific schema and optionally linked to a delivery destination." },
|
|
11195
|
+
{ type: "callout", variant: "info", text: "Gates without any rules will auto-approve all results. Add at least one rule to enforce quality thresholds before delivery." },
|
|
10373
11196
|
{
|
|
10374
11197
|
type: "endpoint",
|
|
10375
11198
|
method: "GET",
|
|
@@ -10458,8 +11281,11 @@ var sections30 = [
|
|
|
10458
11281
|
{ label: "Create Gate", slug: "create-structuring-gate" },
|
|
10459
11282
|
{ label: "Gate Rules", slug: "gate-rules" }
|
|
10460
11283
|
],
|
|
10461
|
-
faq: [
|
|
10462
|
-
|
|
11284
|
+
faq: [
|
|
11285
|
+
{ question: "How does auto_approve_after_hours work?", answer: "When set, results that have been pending for longer than the specified number of hours are automatically approved without manual intervention. This prevents bottlenecks when reviewers are unavailable." },
|
|
11286
|
+
{ question: "Can I link a gate to a delivery destination?", answer: "Yes. Set `destination_id` when creating or updating a gate to route approved results directly to a specific delivery destination (webhook, S3, SFTP, etc.)." }
|
|
11287
|
+
],
|
|
11288
|
+
mentions: ["approval gates", "delivery gate", "auto-approve"]
|
|
10463
11289
|
},
|
|
10464
11290
|
{
|
|
10465
11291
|
slug: "create-structuring-gate",
|
|
@@ -10468,6 +11294,8 @@ var sections30 = [
|
|
|
10468
11294
|
seoTitle: "Create Structuring Gate \u2014 Talonic Docs",
|
|
10469
11295
|
description: "Create an approval gate with optional schema scope. Gates aggregate validation checks and control approval workflows.",
|
|
10470
11296
|
content: [
|
|
11297
|
+
{ type: "paragraph", text: "Create an approval gate to control the flow of structuring results to delivery. A gate starts with no rules \u2014 add rules via the gate rules endpoint to define quality thresholds. The `on_approve` and `on_flag` fields control what happens when results pass or fail the gate's rules." },
|
|
11298
|
+
{ type: "callout", variant: "warning", text: "A newly created gate has an empty rules array. Results will auto-approve until you add at least one rule via `POST /v1/structuring/gates/{id}/rules`." },
|
|
10471
11299
|
{
|
|
10472
11300
|
type: "endpoint",
|
|
10473
11301
|
method: "POST",
|
|
@@ -10545,8 +11373,11 @@ var sections30 = [
|
|
|
10545
11373
|
{ label: "List Gates", slug: "list-structuring-gates" },
|
|
10546
11374
|
{ label: "Gate Rules", slug: "gate-rules" }
|
|
10547
11375
|
],
|
|
10548
|
-
faq: [
|
|
10549
|
-
|
|
11376
|
+
faq: [
|
|
11377
|
+
{ question: "What is the typical workflow after creating a gate?", answer: "Create the gate, then add rules via `POST /v1/structuring/gates/{id}/rules` to define thresholds like minimum confidence or validation pass rate. Results that fail any rule are queued for manual approval." },
|
|
11378
|
+
{ question: "Can I create multiple gates for the same schema?", answer: "Yes. Multiple gates can target the same `user_schema_id`. Each gate evaluates independently, and a result must pass all applicable gates to be auto-approved." }
|
|
11379
|
+
],
|
|
11380
|
+
mentions: ["create gate", "approval workflow"]
|
|
10550
11381
|
},
|
|
10551
11382
|
{
|
|
10552
11383
|
slug: "get-structuring-gate",
|
|
@@ -10555,6 +11386,7 @@ var sections30 = [
|
|
|
10555
11386
|
seoTitle: "Manage Structuring Gate \u2014 Talonic Docs",
|
|
10556
11387
|
description: "Get, update, or delete an approval gate. Same path supports GET (detail with rules), PUT (update), and DELETE operations.",
|
|
10557
11388
|
content: [
|
|
11389
|
+
{ type: "paragraph", text: "Retrieve, update, or remove an approval gate by its UUID. **GET** returns the gate with its active rules embedded. **PUT** updates gate properties (same body shape as create). **DELETE** soft-deletes the gate by setting `is_active` to `false`." },
|
|
10558
11390
|
{
|
|
10559
11391
|
type: "endpoint",
|
|
10560
11392
|
method: "GET",
|
|
@@ -10633,8 +11465,11 @@ var sections30 = [
|
|
|
10633
11465
|
{ label: "List Gates", slug: "list-structuring-gates" },
|
|
10634
11466
|
{ label: "Gate Rules", slug: "gate-rules" }
|
|
10635
11467
|
],
|
|
10636
|
-
faq: [
|
|
10637
|
-
|
|
11468
|
+
faq: [
|
|
11469
|
+
{ question: "Does deleting a gate affect pending approvals?", answer: "No. Pending approval items that were queued by this gate remain in the queue. They can still be approved or rejected manually. The gate simply stops evaluating new results." },
|
|
11470
|
+
{ question: "Are rules returned on PUT responses?", answer: "No. The `rules` array is only populated on GET responses. After a PUT update, re-fetch with GET to see the current rules." }
|
|
11471
|
+
],
|
|
11472
|
+
mentions: ["manage gate", "soft-delete gate"]
|
|
10638
11473
|
},
|
|
10639
11474
|
{
|
|
10640
11475
|
slug: "gate-rules",
|
|
@@ -10643,6 +11478,15 @@ var sections30 = [
|
|
|
10643
11478
|
seoTitle: "Structuring Gate Rules \u2014 Talonic Docs",
|
|
10644
11479
|
description: "Add or remove rules from an approval gate. Rules define thresholds like min_confidence, validation_pass_rate, and field_coverage.",
|
|
10645
11480
|
content: [
|
|
11481
|
+
{ type: "paragraph", text: "Gate rules define the quality thresholds that structuring results must meet to be auto-approved. Each rule has a type and a configuration that specifies the threshold. Rules are evaluated in `sort_order` \u2014 if any rule fails, the result is flagged and queued for manual approval." },
|
|
11482
|
+
{
|
|
11483
|
+
type: "list",
|
|
11484
|
+
items: [
|
|
11485
|
+
"**min_confidence** \u2014 requires the row-level confidence score to exceed a threshold (e.g. 0.85)",
|
|
11486
|
+
"**validation_pass_rate** \u2014 requires a minimum percentage of validation checks to pass",
|
|
11487
|
+
"**field_coverage** \u2014 requires a minimum percentage of schema fields to have non-null values"
|
|
11488
|
+
]
|
|
11489
|
+
},
|
|
10646
11490
|
{
|
|
10647
11491
|
type: "endpoint",
|
|
10648
11492
|
method: "POST",
|
|
@@ -10746,8 +11590,11 @@ var sections30 = [
|
|
|
10746
11590
|
{ label: "Create Gate", slug: "create-structuring-gate" },
|
|
10747
11591
|
{ label: "Pending Approvals", slug: "pending-approvals" }
|
|
10748
11592
|
],
|
|
10749
|
-
faq: [
|
|
10750
|
-
|
|
11593
|
+
faq: [
|
|
11594
|
+
{ question: "Can I add multiple rules of the same type to a gate?", answer: "Yes. For example, you could add two `min_confidence` rules with different thresholds for different severity levels, though in practice a single rule per type is typical." },
|
|
11595
|
+
{ question: "What happens when I remove all rules from a gate?", answer: "The gate will auto-approve all results since there are no thresholds to fail against. This is equivalent to disabling the gate without deleting it." }
|
|
11596
|
+
],
|
|
11597
|
+
mentions: ["gate rules", "min_confidence", "threshold", "validation_pass_rate", "field_coverage"]
|
|
10751
11598
|
},
|
|
10752
11599
|
{
|
|
10753
11600
|
slug: "result-checks",
|
|
@@ -10756,6 +11603,7 @@ var sections30 = [
|
|
|
10756
11603
|
seoTitle: "Structuring Result Checks \u2014 Talonic Docs",
|
|
10757
11604
|
description: "Get validation check outcomes for a specific structuring result showing check name, pass/fail status, and messages.",
|
|
10758
11605
|
content: [
|
|
11606
|
+
{ type: "paragraph", text: "Retrieve the validation check outcomes for a specific structuring result. Each outcome records whether a configured check passed or failed for that result, along with the check's name and severity. Use this to understand why a result was flagged or to build audit trails for data quality." },
|
|
10759
11607
|
{
|
|
10760
11608
|
type: "endpoint",
|
|
10761
11609
|
method: "GET",
|
|
@@ -10816,8 +11664,11 @@ var sections30 = [
|
|
|
10816
11664
|
{ label: "List Checks", slug: "list-structuring-checks" },
|
|
10817
11665
|
{ label: "Pending Approvals", slug: "pending-approvals" }
|
|
10818
11666
|
],
|
|
10819
|
-
faq: [
|
|
10820
|
-
|
|
11667
|
+
faq: [
|
|
11668
|
+
{ question: "Are check outcomes generated automatically?", answer: "Yes. Check outcomes are computed automatically when a structuring result is produced. You do not need to trigger evaluation manually." },
|
|
11669
|
+
{ question: "What does the details object contain for a failed check?", answer: "The `details` object contains type-specific failure information. For `value_range`, it includes the field name, actual value, and the configured min/max bounds. For `ai_coherence`, it includes the LLM reasoning." }
|
|
11670
|
+
],
|
|
11671
|
+
mentions: ["result checks", "validation outcomes", "check results"]
|
|
10821
11672
|
},
|
|
10822
11673
|
{
|
|
10823
11674
|
slug: "pending-approvals",
|
|
@@ -10826,6 +11677,8 @@ var sections30 = [
|
|
|
10826
11677
|
seoTitle: "Pending Approvals \u2014 Talonic Docs",
|
|
10827
11678
|
description: "List structuring results awaiting manual approval. These are results that did not pass all gate rules automatically.",
|
|
10828
11679
|
content: [
|
|
11680
|
+
{ type: "paragraph", text: "Retrieve the queue of structuring results that failed one or more gate rules and require manual review. Each item in the response represents a failed check outcome, linking a structuring result to the check that flagged it. Use this endpoint to build approval workflows or monitor data quality issues." },
|
|
11681
|
+
{ type: "callout", variant: "info", text: "The pending approvals endpoint returns up to 100 items per call. If you have a high volume of flagged results, implement polling or use the `auto_approve_after_hours` gate setting to prevent queue buildup." },
|
|
10829
11682
|
{
|
|
10830
11683
|
type: "endpoint",
|
|
10831
11684
|
method: "GET",
|
|
@@ -10882,8 +11735,11 @@ var sections30 = [
|
|
|
10882
11735
|
{ label: "Approve / Reject", slug: "approve-reject-result" },
|
|
10883
11736
|
{ label: "Gate Rules", slug: "gate-rules" }
|
|
10884
11737
|
],
|
|
10885
|
-
faq: [
|
|
10886
|
-
|
|
11738
|
+
faq: [
|
|
11739
|
+
{ question: "Can a single result appear multiple times in pending approvals?", answer: "Yes. If a result fails multiple checks, each failed check outcome appears as a separate item in the pending approvals list. Approving the result clears all pending items for that result." },
|
|
11740
|
+
{ question: "How do I clear the pending approvals queue?", answer: "Approve or reject each pending result via `POST /v1/structuring/approvals/{id}/approve` or `/reject`. Alternatively, configure `auto_approve_after_hours` on the gate to auto-clear items after a timeout." }
|
|
11741
|
+
],
|
|
11742
|
+
mentions: ["pending approvals", "approval queue", "manual review"]
|
|
10887
11743
|
},
|
|
10888
11744
|
{
|
|
10889
11745
|
slug: "approve-reject-result",
|
|
@@ -10892,6 +11748,8 @@ var sections30 = [
|
|
|
10892
11748
|
seoTitle: "Approve or Reject Structuring Result \u2014 Talonic Docs",
|
|
10893
11749
|
description: "Approve or reject a structuring result. POST /approve approves the result; POST /reject rejects it. Both return the updated status.",
|
|
10894
11750
|
content: [
|
|
11751
|
+
{ type: "paragraph", text: "Submit an approval or rejection decision for a structuring result that is pending manual review. Approving a result triggers the gate's `on_approve` action (typically delivery). Rejecting it removes the result from the approval queue without triggering delivery. Both actions require the `gate_id` to record which gate the decision applies to." },
|
|
11752
|
+
{ type: "callout", variant: "warning", text: "The `gate_id` parameter is required. Each decision is recorded against a specific gate, allowing multiple gates to independently control the same result." },
|
|
10895
11753
|
{
|
|
10896
11754
|
type: "endpoint",
|
|
10897
11755
|
method: "POST",
|
|
@@ -10944,8 +11802,11 @@ var sections30 = [
|
|
|
10944
11802
|
{ label: "Pending Approvals", slug: "pending-approvals" },
|
|
10945
11803
|
{ label: "Trigger Delivery", slug: "trigger-delivery" }
|
|
10946
11804
|
],
|
|
10947
|
-
faq: [
|
|
10948
|
-
|
|
11805
|
+
faq: [
|
|
11806
|
+
{ question: "What happens after I approve a result?", answer: "The gate's `on_approve` action fires. If set to `export`, a delivery signal is emitted for the result. Use the trigger delivery endpoint to manually control when delivery occurs for an entire run." },
|
|
11807
|
+
{ question: "Can I approve a result that was already rejected?", answer: "Yes. Approval and rejection decisions are additive records. A subsequent approval overrides a prior rejection for the same gate." }
|
|
11808
|
+
],
|
|
11809
|
+
mentions: ["approve", "reject", "structuring result", "approval decision"]
|
|
10949
11810
|
},
|
|
10950
11811
|
{
|
|
10951
11812
|
slug: "trigger-delivery",
|
|
@@ -10954,6 +11815,8 @@ var sections30 = [
|
|
|
10954
11815
|
seoTitle: "Trigger Delivery \u2014 Talonic Docs",
|
|
10955
11816
|
description: "Trigger delivery for a structuring run by emitting delivery signals for all approved results. Returns delivered and skipped counts.",
|
|
10956
11817
|
content: [
|
|
11818
|
+
{ type: "paragraph", text: "Manually trigger delivery for an entire structuring run. This emits delivery signals for all approved results in the run, routing them to configured delivery destinations (webhooks, S3, SFTP, etc.). Results that have not been approved are skipped. Use this after batch-approving results or when you want explicit control over when data leaves the platform." },
|
|
11819
|
+
{ type: "callout", variant: "warning", text: "This endpoint triggers delivery for all approved results in the run. There is no undo. Ensure all results have been reviewed before calling this endpoint in production workflows." },
|
|
10957
11820
|
{
|
|
10958
11821
|
type: "endpoint",
|
|
10959
11822
|
method: "POST",
|
|
@@ -10995,8 +11858,11 @@ var sections30 = [
|
|
|
10995
11858
|
related: [
|
|
10996
11859
|
{ label: "Approve / Reject", slug: "approve-reject-result" }
|
|
10997
11860
|
],
|
|
10998
|
-
faq: [
|
|
10999
|
-
|
|
11861
|
+
faq: [
|
|
11862
|
+
{ question: "What happens to unapproved results when I trigger delivery?", answer: "Unapproved results are silently skipped. Only results with an `approved` status are included in the delivery signals." },
|
|
11863
|
+
{ question: "Can I trigger delivery multiple times for the same run?", answer: "Yes. Delivery is idempotent per result \u2014 each result generates a deterministic idempotency key, so duplicate signals are deduplicated by the delivery pipeline." }
|
|
11864
|
+
],
|
|
11865
|
+
mentions: ["trigger delivery", "delivery signals", "export"]
|
|
11000
11866
|
}
|
|
11001
11867
|
];
|
|
11002
11868
|
|
|
@@ -11010,6 +11876,16 @@ var sections31 = [
|
|
|
11010
11876
|
description: "Get aggregate structuring metrics for a schema across all runs including capture hit rate, synthesize rate, strategy distribution, and tier funnel.",
|
|
11011
11877
|
content: [
|
|
11012
11878
|
{ type: "paragraph", text: "Telemetry endpoints aggregate structuring metrics (capture hit rate, synthesize rate, strategy distribution, tier funnel) per schema or per run." },
|
|
11879
|
+
{ type: "paragraph", text: "The schema summary returns metrics from the **latest run** for a given schema. Use it to understand how effectively the pipeline fills cells using the field registry versus LLM synthesis, and how fields distribute across resolution tiers." },
|
|
11880
|
+
{
|
|
11881
|
+
type: "list",
|
|
11882
|
+
items: [
|
|
11883
|
+
"**capture_hit_rate** \u2014 Fraction of cells filled from the field registry without LLM calls. Higher is more cost-efficient.",
|
|
11884
|
+
"**synthesize_rate** \u2014 Fraction of cells that required LLM synthesis (Phase 2 agent extraction).",
|
|
11885
|
+
"**strategy_distribution** \u2014 Breakdown by strategy: `transfer`, `extract`, `compute`, `skip`.",
|
|
11886
|
+
"**tier_funnel** \u2014 How cells resolved across registry tiers: `tier1` (core), `tier2` (established), `tier3` (emerging), `unresolved`."
|
|
11887
|
+
]
|
|
11888
|
+
},
|
|
11013
11889
|
{
|
|
11014
11890
|
type: "endpoint",
|
|
11015
11891
|
method: "GET",
|
|
@@ -11068,8 +11944,12 @@ var sections31 = [
|
|
|
11068
11944
|
{ label: "Schema Trend", slug: "schema-telemetry-trend" },
|
|
11069
11945
|
{ label: "Schema Fields", slug: "schema-telemetry-fields" }
|
|
11070
11946
|
],
|
|
11071
|
-
faq: [
|
|
11072
|
-
|
|
11947
|
+
faq: [
|
|
11948
|
+
{ question: "What is the difference between capture_hit_rate and synthesize_rate?", answer: "Capture hit rate measures cells filled deterministically from the field registry (no LLM cost). Synthesize rate measures cells that required an LLM call to fill. The two rates plus skipped cells sum to approximately 1.0." },
|
|
11949
|
+
{ question: "Which run does the summary reflect?", answer: "The summary always reflects the most recent completed run for the schema. To see metrics from a specific run, use the Run Summary endpoint instead." },
|
|
11950
|
+
{ question: "What do the strategy_distribution values mean?", answer: "`transfer` means the value was copied from the field registry, `extract` means LLM extraction was used, `compute` means a deterministic formula produced the value, and `skip` means the field was intentionally left empty." }
|
|
11951
|
+
],
|
|
11952
|
+
mentions: ["telemetry", "capture hit rate", "synthesize rate", "strategy distribution", "tier funnel"]
|
|
11073
11953
|
},
|
|
11074
11954
|
{
|
|
11075
11955
|
slug: "schema-telemetry-trend",
|
|
@@ -11078,6 +11958,8 @@ var sections31 = [
|
|
|
11078
11958
|
seoTitle: "Schema Telemetry Trend \u2014 Talonic Docs",
|
|
11079
11959
|
description: "Get metric trends over time for a schema. Returns time-series telemetry data across recent runs for tracking quality changes.",
|
|
11080
11960
|
content: [
|
|
11961
|
+
{ type: "paragraph", text: "Track how structuring metrics evolve over successive runs for a schema. This endpoint returns a **time-series** of telemetry snapshots, allowing you to detect quality improvements, regressions, or shifts in strategy distribution as your field registry matures." },
|
|
11962
|
+
{ type: "callout", variant: "info", text: "A rising `capture_hit_rate` over time indicates the field registry is learning from extractions and resolving more fields deterministically, reducing LLM costs." },
|
|
11081
11963
|
{
|
|
11082
11964
|
type: "endpoint",
|
|
11083
11965
|
method: "GET",
|
|
@@ -11167,8 +12049,11 @@ var sections31 = [
|
|
|
11167
12049
|
{ label: "Schema Summary", slug: "schema-telemetry-summary" },
|
|
11168
12050
|
{ label: "Run Summary", slug: "run-telemetry-summary" }
|
|
11169
12051
|
],
|
|
11170
|
-
faq: [
|
|
11171
|
-
|
|
12052
|
+
faq: [
|
|
12053
|
+
{ question: "How many runs does the trend include by default?", answer: "The default window is 10 runs. Use the `window` query parameter to request up to 50 recent runs." },
|
|
12054
|
+
{ question: "What does a decreasing synthesize_rate indicate?", answer: "A decreasing synthesize rate means more fields are being resolved from the registry without LLM calls. This is the expected trajectory as the field registry accumulates data from successive extractions." }
|
|
12055
|
+
],
|
|
12056
|
+
mentions: ["telemetry trend", "time-series", "quality tracking"]
|
|
11172
12057
|
},
|
|
11173
12058
|
{
|
|
11174
12059
|
slug: "schema-telemetry-fields",
|
|
@@ -11177,6 +12062,8 @@ var sections31 = [
|
|
|
11177
12062
|
seoTitle: "Schema Field Telemetry \u2014 Talonic Docs",
|
|
11178
12063
|
description: "Get per-field structuring metrics for a schema including field-level state distribution, capture rates, and strategy breakdown.",
|
|
11179
12064
|
content: [
|
|
12065
|
+
{ type: "paragraph", text: "Drill down to **individual field performance** within a schema. This endpoint returns per-field capture rates, synthesis rates, the most common strategy used, and the distribution of cell states (filled, empty, skipped). Use it to identify underperforming fields that may need instruction tuning or manual review." },
|
|
12066
|
+
{ type: "callout", variant: "info", text: "Fields with a high `synthesize_rate` and low `capture_rate` are candidates for field registry enrichment or instruction refinement to reduce LLM dependency." },
|
|
11180
12067
|
{
|
|
11181
12068
|
type: "endpoint",
|
|
11182
12069
|
method: "GET",
|
|
@@ -11240,10 +12127,14 @@ var sections31 = [
|
|
|
11240
12127
|
}
|
|
11241
12128
|
],
|
|
11242
12129
|
related: [
|
|
11243
|
-
{ label: "Schema Summary", slug: "schema-telemetry-summary" }
|
|
12130
|
+
{ label: "Schema Summary", slug: "schema-telemetry-summary" },
|
|
12131
|
+
{ label: "Schema Trend", slug: "schema-telemetry-trend" }
|
|
11244
12132
|
],
|
|
11245
|
-
faq: [
|
|
11246
|
-
|
|
12133
|
+
faq: [
|
|
12134
|
+
{ question: "What does a high empty count in state_distribution mean?", answer: "A high empty count means the field could not be extracted from many documents. This may indicate the field is not present in those document types, or the extraction instructions need refinement." },
|
|
12135
|
+
{ question: "How is the strategy field determined?", answer: "The `strategy` field shows the most frequently used resolution strategy for that field across all documents in the latest run. Possible values are `transfer`, `extract`, `compute`, and `skip`." }
|
|
12136
|
+
],
|
|
12137
|
+
mentions: ["field telemetry", "capture rates", "per-field metrics", "state distribution"]
|
|
11247
12138
|
},
|
|
11248
12139
|
{
|
|
11249
12140
|
slug: "run-telemetry-summary",
|
|
@@ -11252,6 +12143,8 @@ var sections31 = [
|
|
|
11252
12143
|
seoTitle: "Run Telemetry Summary \u2014 Talonic Docs",
|
|
11253
12144
|
description: "Get aggregate structuring metrics for a single job run including strategy distribution, tier funnel, and capture hit rate.",
|
|
11254
12145
|
content: [
|
|
12146
|
+
{ type: "paragraph", text: "Retrieve structuring telemetry for a **specific job run** rather than the latest run for a schema. Use this when you need to inspect the performance of a particular execution, compare two runs side by side, or debug a run that produced unexpected results." },
|
|
12147
|
+
{ type: "callout", variant: "info", text: "The response shape is identical to the Schema Summary endpoint. The only difference is that this endpoint targets a specific run by ID instead of returning the latest run for a schema." },
|
|
11255
12148
|
{
|
|
11256
12149
|
type: "endpoint",
|
|
11257
12150
|
method: "GET",
|
|
@@ -11310,8 +12203,11 @@ var sections31 = [
|
|
|
11310
12203
|
{ label: "Schema Summary", slug: "schema-telemetry-summary" },
|
|
11311
12204
|
{ label: "Schema Trend", slug: "schema-telemetry-trend" }
|
|
11312
12205
|
],
|
|
11313
|
-
faq: [
|
|
11314
|
-
|
|
12206
|
+
faq: [
|
|
12207
|
+
{ question: "How do I compare two runs?", answer: "Call this endpoint twice with different run IDs and compare the `capture_hit_rate`, `synthesize_rate`, and `strategy_distribution` values to identify improvements or regressions." },
|
|
12208
|
+
{ question: "Can I get run telemetry for a run that is still in progress?", answer: "No. Telemetry is computed after a run completes. Runs with status `pending` or `running` will return a 404 until they finish." }
|
|
12209
|
+
],
|
|
12210
|
+
mentions: ["run telemetry", "run comparison"]
|
|
11315
12211
|
}
|
|
11316
12212
|
];
|
|
11317
12213
|
|
|
@@ -11325,6 +12221,8 @@ var sections32 = [
|
|
|
11325
12221
|
description: "List ground-truth datasets used for measuring extraction accuracy. Each dataset contains manually verified document-field value pairs.",
|
|
11326
12222
|
content: [
|
|
11327
12223
|
{ type: "paragraph", text: "Validation runs measure extraction accuracy against ground-truth datasets. Manage datasets and runs, and retrieve per-document and per-field accuracy results. Create a ground-truth set, then run validations to compare extracted values against expected values." },
|
|
12224
|
+
{ type: "paragraph", text: "A **ground-truth dataset** is a collection of manually verified document-field value pairs that serve as the benchmark for accuracy measurement. Each dataset can be scoped to a specific schema via `user_schema_id`, or left unscoped for cross-schema validation." },
|
|
12225
|
+
{ type: "callout", variant: "info", text: "Ground-truth datasets are reusable. Create a dataset once and run validations against it repeatedly as you refine your schemas and extraction pipeline." },
|
|
11328
12226
|
{
|
|
11329
12227
|
type: "endpoint",
|
|
11330
12228
|
method: "GET",
|
|
@@ -11381,8 +12279,11 @@ var sections32 = [
|
|
|
11381
12279
|
{ label: "Get Ground-Truth", slug: "get-ground-truth" },
|
|
11382
12280
|
{ label: "Create Validation Run", slug: "create-validation-run" }
|
|
11383
12281
|
],
|
|
11384
|
-
faq: [
|
|
11385
|
-
|
|
12282
|
+
faq: [
|
|
12283
|
+
{ question: "Can I create ground-truth datasets without a schema?", answer: "Yes. The `user_schema_id` field is optional. Unscoped datasets can be used for cross-schema validation or when you want to validate specific fields regardless of schema." },
|
|
12284
|
+
{ question: "How many entries should a ground-truth dataset have?", answer: "There is no minimum or maximum. For statistically meaningful accuracy results, aim for at least 20-30 document-field pairs covering the fields you care about most." }
|
|
12285
|
+
],
|
|
12286
|
+
mentions: ["ground truth", "validation dataset", "accuracy benchmark"]
|
|
11386
12287
|
},
|
|
11387
12288
|
{
|
|
11388
12289
|
slug: "get-ground-truth",
|
|
@@ -11391,6 +12292,8 @@ var sections32 = [
|
|
|
11391
12292
|
seoTitle: "Manage Ground-Truth Dataset \u2014 Talonic Docs",
|
|
11392
12293
|
description: "Get detail with expected values or delete a ground-truth dataset. Supports GET (read scope) and DELETE (write scope) on the same path.",
|
|
11393
12294
|
content: [
|
|
12295
|
+
{ type: "paragraph", text: "Retrieve the full details of a ground-truth dataset including all expected value entries, or permanently delete the dataset. The GET response includes every document-field pair with the expected value, which you can use to audit the benchmark data before running a validation." },
|
|
12296
|
+
{ type: "callout", variant: "warning", text: "Deleting a ground-truth dataset also removes all associated expected value entries. Existing validation runs that used this dataset are retained but can no longer be re-run." },
|
|
11394
12297
|
{
|
|
11395
12298
|
type: "endpoint",
|
|
11396
12299
|
method: "GET",
|
|
@@ -11462,10 +12365,14 @@ var sections32 = [
|
|
|
11462
12365
|
}
|
|
11463
12366
|
],
|
|
11464
12367
|
related: [
|
|
11465
|
-
{ label: "List Ground-Truth", slug: "list-ground-truth" }
|
|
12368
|
+
{ label: "List Ground-Truth", slug: "list-ground-truth" },
|
|
12369
|
+
{ label: "Create Validation Run", slug: "create-validation-run" }
|
|
11466
12370
|
],
|
|
11467
|
-
faq: [
|
|
11468
|
-
|
|
12371
|
+
faq: [
|
|
12372
|
+
{ question: "Does deleting a ground-truth dataset delete its validation runs?", answer: "No. Validation runs and their results are retained. However, you cannot create new validation runs against a deleted dataset." },
|
|
12373
|
+
{ question: "Can I update individual expected values in a dataset?", answer: "Expected values are managed as part of the dataset. To change values, delete the dataset and recreate it with the corrected entries." }
|
|
12374
|
+
],
|
|
12375
|
+
mentions: ["ground truth dataset", "expected values"]
|
|
11469
12376
|
},
|
|
11470
12377
|
{
|
|
11471
12378
|
slug: "list-validation-runs",
|
|
@@ -11474,6 +12381,8 @@ var sections32 = [
|
|
|
11474
12381
|
seoTitle: "List Validation Runs \u2014 Talonic Docs",
|
|
11475
12382
|
description: "List validation runs that compare extraction results against ground-truth datasets. Requires read scope.",
|
|
11476
12383
|
content: [
|
|
12384
|
+
{ type: "paragraph", text: "List all validation runs for your organization, ordered by most recent first. A **validation run** compares the structured output of a job run against a ground-truth dataset to produce per-field and overall accuracy scores." },
|
|
12385
|
+
{ type: "paragraph", text: "Each run includes its status (`pending`, `running`, `completed`, `failed`), the overall accuracy score (available once completed), and links to the detailed results. Use this to track validation history and identify accuracy trends." },
|
|
11477
12386
|
{
|
|
11478
12387
|
type: "endpoint",
|
|
11479
12388
|
method: "GET",
|
|
@@ -11537,8 +12446,11 @@ var sections32 = [
|
|
|
11537
12446
|
{ label: "Create Validation Run", slug: "create-validation-run" },
|
|
11538
12447
|
{ label: "Get Validation Run", slug: "get-validation-run" }
|
|
11539
12448
|
],
|
|
11540
|
-
faq: [
|
|
11541
|
-
|
|
12449
|
+
faq: [
|
|
12450
|
+
{ question: "How many validation runs are returned?", answer: "Up to 100 runs are returned, ordered by `created_at` descending. There is no pagination \u2014 all runs are included in a single response." },
|
|
12451
|
+
{ question: "What does an accuracy of null mean?", answer: "A null accuracy indicates the run has not completed yet. The score is populated only when the run reaches `completed` status." }
|
|
12452
|
+
],
|
|
12453
|
+
mentions: ["validation runs", "accuracy tracking"]
|
|
11542
12454
|
},
|
|
11543
12455
|
{
|
|
11544
12456
|
slug: "create-validation-run",
|
|
@@ -11547,6 +12459,9 @@ var sections32 = [
|
|
|
11547
12459
|
seoTitle: "Create Validation Run \u2014 Talonic Docs",
|
|
11548
12460
|
description: "Create a validation run comparing a job against a ground-truth dataset. Measures per-document and per-field extraction accuracy.",
|
|
11549
12461
|
content: [
|
|
12462
|
+
{ type: "paragraph", text: "Start a new validation run that compares the output of a **job run** against a **ground-truth dataset**. The validation engine compares each extracted value to the expected value, computing exact match, fuzzy match, and similarity scores. An optional LLM judge provides a semantic verdict for ambiguous cases." },
|
|
12463
|
+
{ type: "paragraph", text: "Validation runs start in `pending` status and move to `running` as comparisons are performed. Once complete, the `accuracy` field contains the overall score and per-field results are available via the Results endpoint." },
|
|
12464
|
+
{ type: "callout", variant: "info", text: "Both `golden_sample_id` and `dataspace_run_id` must belong to your organization. The API returns 404 if either resource is not found." },
|
|
11550
12465
|
{
|
|
11551
12466
|
type: "endpoint",
|
|
11552
12467
|
method: "POST",
|
|
@@ -11625,8 +12540,11 @@ var sections32 = [
|
|
|
11625
12540
|
{ label: "List Validation Runs", slug: "list-validation-runs" },
|
|
11626
12541
|
{ label: "Get Validation Results", slug: "get-validation-results" }
|
|
11627
12542
|
],
|
|
11628
|
-
faq: [
|
|
11629
|
-
|
|
12543
|
+
faq: [
|
|
12544
|
+
{ question: "How long does a validation run take?", answer: "Most validation runs complete within seconds. The duration depends on the number of document-field pairs in the ground-truth dataset and whether the LLM judge is invoked for ambiguous comparisons." },
|
|
12545
|
+
{ question: "Can I run validation against the same dataset multiple times?", answer: "Yes. You can create multiple validation runs against the same ground-truth dataset with different job runs to track accuracy improvements over time." }
|
|
12546
|
+
],
|
|
12547
|
+
mentions: ["create validation run", "accuracy measurement", "LLM judge"]
|
|
11630
12548
|
},
|
|
11631
12549
|
{
|
|
11632
12550
|
slug: "get-validation-run",
|
|
@@ -11635,6 +12553,7 @@ var sections32 = [
|
|
|
11635
12553
|
seoTitle: "Manage Validation Run \u2014 Talonic Docs",
|
|
11636
12554
|
description: "Get validation run detail with accuracy summary or delete a run. Supports GET (read scope) and DELETE (write scope) on the same path.",
|
|
11637
12555
|
content: [
|
|
12556
|
+
{ type: "paragraph", text: "Retrieve the full details of a validation run including its status, accuracy score, and total comparisons. Or permanently delete a run and its associated results. Use GET to poll a run's status until it reaches `completed`, then fetch the detailed results." },
|
|
11638
12557
|
{
|
|
11639
12558
|
type: "endpoint",
|
|
11640
12559
|
method: "GET",
|
|
@@ -11701,8 +12620,11 @@ var sections32 = [
|
|
|
11701
12620
|
{ label: "List Validation Runs", slug: "list-validation-runs" },
|
|
11702
12621
|
{ label: "Get Validation Results", slug: "get-validation-results" }
|
|
11703
12622
|
],
|
|
11704
|
-
faq: [
|
|
11705
|
-
|
|
12623
|
+
faq: [
|
|
12624
|
+
{ question: "Does deleting a validation run delete the ground-truth dataset?", answer: "No. Deleting a run only removes the run record and its per-field results. The ground-truth dataset and the job run remain intact." },
|
|
12625
|
+
{ question: "How do I poll for run completion?", answer: "Call `GET /v1/validation/runs/{id}` and check the `status` field. Poll until it changes from `pending` or `running` to `completed` or `failed`." }
|
|
12626
|
+
],
|
|
12627
|
+
mentions: ["validation run detail", "run status polling"]
|
|
11706
12628
|
},
|
|
11707
12629
|
{
|
|
11708
12630
|
slug: "get-validation-results",
|
|
@@ -11711,6 +12633,16 @@ var sections32 = [
|
|
|
11711
12633
|
seoTitle: "Validation Results \u2014 Talonic Docs",
|
|
11712
12634
|
description: "Get per-field validation results including overall accuracy, per-field accuracy, match type, similarity scores, and judge verdicts.",
|
|
11713
12635
|
content: [
|
|
12636
|
+
{ type: "paragraph", text: "Retrieve the granular, per-field comparison results for a completed validation run. Each result entry shows the **expected value**, **actual extracted value**, **match type** (exact, fuzzy, or no_match), a **similarity score**, and an optional **LLM judge verdict** for ambiguous cases." },
|
|
12637
|
+
{ type: "paragraph", text: "Use the `judged_only=true` parameter to filter results to only those where the LLM judge was invoked. This is useful for reviewing cases where simple string comparison was insufficient and semantic judgment was required." },
|
|
12638
|
+
{
|
|
12639
|
+
type: "list",
|
|
12640
|
+
items: [
|
|
12641
|
+
"**exact** \u2014 The extracted value matches the expected value character-for-character.",
|
|
12642
|
+
"**fuzzy** \u2014 The values are similar but not identical (e.g. different formatting, minor typos).",
|
|
12643
|
+
"**no_match** \u2014 The extracted value does not match the expected value."
|
|
12644
|
+
]
|
|
12645
|
+
},
|
|
11714
12646
|
{
|
|
11715
12647
|
type: "endpoint",
|
|
11716
12648
|
method: "GET",
|
|
@@ -11782,8 +12714,12 @@ var sections32 = [
|
|
|
11782
12714
|
{ label: "Get Validation Run", slug: "get-validation-run" },
|
|
11783
12715
|
{ label: "List Ground-Truth", slug: "list-ground-truth" }
|
|
11784
12716
|
],
|
|
11785
|
-
faq: [
|
|
11786
|
-
|
|
12717
|
+
faq: [
|
|
12718
|
+
{ question: "What is the difference between match_type and judge_verdict?", answer: "The `match_type` is a deterministic string comparison (exact, fuzzy, no_match). The `judge_verdict` is an LLM-based semantic assessment (`correct`, `incorrect`, `partial`) invoked for ambiguous cases where string comparison alone is insufficient." },
|
|
12719
|
+
{ question: "When is the LLM judge invoked?", answer: "The judge is invoked for fuzzy matches and edge cases where the similarity score falls in an ambiguous range. Exact matches and clear no-matches do not trigger the judge." },
|
|
12720
|
+
{ question: "Can I filter results by document or field?", answer: "The `judged_only` parameter filters by judge involvement. To filter by document or field, retrieve the full results and filter client-side." }
|
|
12721
|
+
],
|
|
12722
|
+
mentions: ["validation results", "accuracy", "judge verdict", "similarity score", "match type"]
|
|
11787
12723
|
}
|
|
11788
12724
|
];
|
|
11789
12725
|
|
|
@@ -11797,6 +12733,16 @@ var sections33 = [
|
|
|
11797
12733
|
description: "Get the current credit balance for the authenticated customer including currency and timestamp. Requires read scope.",
|
|
11798
12734
|
content: [
|
|
11799
12735
|
{ type: "paragraph", text: "Credit endpoints expose the current balance, transaction history, aggregate usage summaries, daily usage, and a per-request usage log with model and token counts. Track credit balance and usage breakdowns by operation type and time period." },
|
|
12736
|
+
{ type: "paragraph", text: "The balance endpoint returns an **enriched** view of your credit account, including the EUR-equivalent balance, a 30-day burn rate, projected runway in days, and your current API tier. Use this to monitor consumption and plan capacity." },
|
|
12737
|
+
{
|
|
12738
|
+
type: "list",
|
|
12739
|
+
items: [
|
|
12740
|
+
"**balance_credits** \u2014 Current credit balance as an integer.",
|
|
12741
|
+
"**burn_rate_30d_credits** \u2014 Credits consumed in the last 30 days for trend analysis.",
|
|
12742
|
+
"**projected_runway_days** \u2014 Estimated days remaining at the current burn rate.",
|
|
12743
|
+
"**tier** \u2014 Your current API tier (e.g. `free`, `starter`, `growth`), which determines rate limits and features."
|
|
12744
|
+
]
|
|
12745
|
+
},
|
|
11800
12746
|
{
|
|
11801
12747
|
type: "endpoint",
|
|
11802
12748
|
method: "GET",
|
|
@@ -11844,8 +12790,12 @@ var sections33 = [
|
|
|
11844
12790
|
{ label: "History", slug: "credits-history" },
|
|
11845
12791
|
{ label: "Usage Summary", slug: "credits-usage" }
|
|
11846
12792
|
],
|
|
11847
|
-
faq: [
|
|
11848
|
-
|
|
12793
|
+
faq: [
|
|
12794
|
+
{ question: "What does projected_runway_days: -1 mean?", answer: "A value of -1 indicates that your burn rate over the last 30 days is zero, so runway cannot be projected. This typically means no API calls were made during the period." },
|
|
12795
|
+
{ question: "When does the API tier reset?", answer: "Tiers reset on the 1st of each month at midnight UTC. The exact reset timestamp is returned in the `tier_resets_at` field." },
|
|
12796
|
+
{ question: "How is the EUR balance calculated?", answer: "The `balance_eur` is computed by dividing `balance_credits` by a configured credits-per-EUR rate. This rate is fixed and does not fluctuate." }
|
|
12797
|
+
],
|
|
12798
|
+
mentions: ["credit balance", "burn rate", "API tier", "runway"]
|
|
11849
12799
|
},
|
|
11850
12800
|
{
|
|
11851
12801
|
slug: "credits-history",
|
|
@@ -11854,6 +12804,8 @@ var sections33 = [
|
|
|
11854
12804
|
seoTitle: "Credits History Endpoint \u2014 Talonic Docs",
|
|
11855
12805
|
description: "Get credit transaction history including purchases, deductions, and adjustments with page-based pagination.",
|
|
11856
12806
|
content: [
|
|
12807
|
+
{ type: "paragraph", text: "Retrieve a chronological log of every credit transaction on your account. Transactions include **purchases** (positive amounts), **consumption deductions** (negative amounts), **bonuses**, and **manual adjustments**. Use this to audit spending and reconcile usage." },
|
|
12808
|
+
{ type: "callout", variant: "info", text: "Transactions are ordered by most recent first. Each entry includes the `operation_type` that triggered it (e.g. `extraction`, `manual`), making it easy to trace costs back to specific pipeline operations." },
|
|
11857
12809
|
{
|
|
11858
12810
|
type: "endpoint",
|
|
11859
12811
|
method: "GET",
|
|
@@ -11921,8 +12873,11 @@ var sections33 = [
|
|
|
11921
12873
|
{ label: "Balance", slug: "credits-balance" },
|
|
11922
12874
|
{ label: "Usage Summary", slug: "credits-usage" }
|
|
11923
12875
|
],
|
|
11924
|
-
faq: [
|
|
11925
|
-
|
|
12876
|
+
faq: [
|
|
12877
|
+
{ question: "How do I distinguish between a purchase and a deduction?", answer: "The `amount` field is positive for purchases and bonuses, and negative for consumption deductions. The `type` field also explicitly labels each transaction." },
|
|
12878
|
+
{ question: "What operation types are tracked?", answer: "Common operation types include `extraction`, `document_ai_ocr`, `matching`, and `manual`. The exact set depends on which pipeline operations your account uses." }
|
|
12879
|
+
],
|
|
12880
|
+
mentions: ["credit history", "transactions", "consumption", "purchase"]
|
|
11926
12881
|
},
|
|
11927
12882
|
{
|
|
11928
12883
|
slug: "credits-usage",
|
|
@@ -11931,6 +12886,8 @@ var sections33 = [
|
|
|
11931
12886
|
seoTitle: "Credits Usage Summary \u2014 Talonic Docs",
|
|
11932
12887
|
description: "Get aggregate credit usage summary broken down by operation type and model for a configurable time period.",
|
|
11933
12888
|
content: [
|
|
12889
|
+
{ type: "paragraph", text: "Get a high-level view of your API usage grouped by **operation type** and **model**. This endpoint aggregates call counts, token consumption, and estimated costs over a configurable lookback period. Use it to understand which operations drive your spending." },
|
|
12890
|
+
{ type: "callout", variant: "info", text: "Cost estimates include all token classes: input tokens, output tokens, cache creation tokens, and cache read tokens. Each is priced at the model-specific rate." },
|
|
11934
12891
|
{
|
|
11935
12892
|
type: "endpoint",
|
|
11936
12893
|
method: "GET",
|
|
@@ -12004,8 +12961,11 @@ var sections33 = [
|
|
|
12004
12961
|
{ label: "Daily Usage", slug: "credits-usage-daily" },
|
|
12005
12962
|
{ label: "Usage Log", slug: "credits-usage-log" }
|
|
12006
12963
|
],
|
|
12007
|
-
faq: [
|
|
12008
|
-
|
|
12964
|
+
faq: [
|
|
12965
|
+
{ question: "Why does document_ai_ocr show zero tokens?", answer: "Document AI OCR (Mistral) is not token-based. Its cost is calculated from `pages_processed * cost_per_page`. Token fields are 0 because they do not apply to this operation type." },
|
|
12966
|
+
{ question: "Can I change the lookback period?", answer: "Yes. Pass the `days` query parameter to set the lookback window. The default is 30 days. There is no maximum limit." }
|
|
12967
|
+
],
|
|
12968
|
+
mentions: ["usage summary", "credit breakdown", "token consumption", "cost estimation"]
|
|
12009
12969
|
},
|
|
12010
12970
|
{
|
|
12011
12971
|
slug: "credits-usage-daily",
|
|
@@ -12014,6 +12974,7 @@ var sections33 = [
|
|
|
12014
12974
|
seoTitle: "Credits Daily Usage \u2014 Talonic Docs",
|
|
12015
12975
|
description: "Get per-day credit usage breakdown for the specified period (default last 30 days) with call counts and token totals per day.",
|
|
12016
12976
|
content: [
|
|
12977
|
+
{ type: "paragraph", text: "Get a per-day breakdown of API usage over a configurable period. Each entry includes the total number of API calls, input/output token counts, and estimated cost for that calendar date. Use this for usage trend analysis and daily cost monitoring." },
|
|
12017
12978
|
{
|
|
12018
12979
|
type: "endpoint",
|
|
12019
12980
|
method: "GET",
|
|
@@ -12076,8 +13037,11 @@ var sections33 = [
|
|
|
12076
13037
|
{ label: "Usage Summary", slug: "credits-usage" },
|
|
12077
13038
|
{ label: "Usage Log", slug: "credits-usage-log" }
|
|
12078
13039
|
],
|
|
12079
|
-
faq: [
|
|
12080
|
-
|
|
13040
|
+
faq: [
|
|
13041
|
+
{ question: "Are days with zero usage included in the response?", answer: "Days with no API calls are omitted from the response array. Only dates with at least one recorded call are returned." },
|
|
13042
|
+
{ question: "What timezone are the daily dates in?", answer: "Dates are in UTC. A call made at 23:59 UTC on September 14 appears under `2024-09-14`, not the caller's local date." }
|
|
13043
|
+
],
|
|
13044
|
+
mentions: ["daily usage", "usage trend", "daily cost"]
|
|
12081
13045
|
},
|
|
12082
13046
|
{
|
|
12083
13047
|
slug: "credits-usage-log",
|
|
@@ -12086,6 +13050,9 @@ var sections33 = [
|
|
|
12086
13050
|
seoTitle: "Credits Usage Log \u2014 Talonic Docs",
|
|
12087
13051
|
description: "Get a detailed per-request usage log with operation type, model, input/output token counts, and cost estimates for each API call.",
|
|
12088
13052
|
content: [
|
|
13053
|
+
{ type: "paragraph", text: "Retrieve a detailed log of individual API requests with per-request token counts, model information, and cost estimates. This is the most granular usage view available, showing every LLM call and OCR request made by your account." },
|
|
13054
|
+
{ type: "paragraph", text: "Each log entry links back to the originating document (when applicable) via the `document_id` field, allowing you to trace costs to specific documents in your pipeline." },
|
|
13055
|
+
{ type: "callout", variant: "info", text: "The usage log is ordered by most recent first. Use page-based pagination to browse historical entries." },
|
|
12089
13056
|
{
|
|
12090
13057
|
type: "endpoint",
|
|
12091
13058
|
method: "GET",
|
|
@@ -12159,8 +13126,11 @@ var sections33 = [
|
|
|
12159
13126
|
{ label: "Usage Summary", slug: "credits-usage" },
|
|
12160
13127
|
{ label: "Balance", slug: "credits-balance" }
|
|
12161
13128
|
],
|
|
12162
|
-
faq: [
|
|
12163
|
-
|
|
13129
|
+
faq: [
|
|
13130
|
+
{ question: "What are cache_read_tokens?", answer: "Cache read tokens represent prompt cache hits where previously cached input was reused instead of being reprocessed. These are charged at a significantly lower rate than regular input tokens." },
|
|
13131
|
+
{ question: "Why is document_id null for some entries?", answer: "Operations that are not tied to a specific document (e.g. schema generation, field resolution) have a null `document_id`. Document-level operations like extraction always include the document reference." }
|
|
13132
|
+
],
|
|
13133
|
+
mentions: ["usage log", "token counts", "cost estimation", "cache tokens", "per-request"]
|
|
12164
13134
|
}
|
|
12165
13135
|
];
|
|
12166
13136
|
|
|
@@ -12174,6 +13144,14 @@ var sections34 = [
|
|
|
12174
13144
|
description: "Get a comprehensive workspace overview including document stats, schemas, active runs, field registry summary, and recent activity for AI integrations.",
|
|
12175
13145
|
content: [
|
|
12176
13146
|
{ type: "paragraph", text: "The Agent API provides programmatic access to the same AI assistant capabilities available in the Talonic platform UI. Use the context endpoint to retrieve a comprehensive workspace snapshot, and the tools endpoint to discover all available agent capabilities." },
|
|
13147
|
+
{ type: "paragraph", text: "The workspace context is useful for building external dashboards, feeding context into custom AI integrations, or monitoring workspace health. The response includes document processing stats, schema summaries, active extraction runs, field registry tier distribution, and up to 15 recent activity events." },
|
|
13148
|
+
{ type: "list", ordered: false, items: [
|
|
13149
|
+
"**Document stats** \u2014 total count, completed this week/24h, currently processing",
|
|
13150
|
+
"**Schemas** \u2014 user-defined schemas with field counts and versions",
|
|
13151
|
+
"**Active runs** \u2014 in-flight extraction runs with status and document counts",
|
|
13152
|
+
"**Field registry** \u2014 total fields and distribution across Tier 1, 2, and 3",
|
|
13153
|
+
"**Recent activity** \u2014 up to 15 events with type, message, timestamp, and actor"
|
|
13154
|
+
] },
|
|
12177
13155
|
{
|
|
12178
13156
|
type: "endpoint",
|
|
12179
13157
|
method: "GET",
|
|
@@ -12252,9 +13230,11 @@ var sections34 = [
|
|
|
12252
13230
|
{ label: "List Agent Tools", slug: "agent-tools" }
|
|
12253
13231
|
],
|
|
12254
13232
|
faq: [
|
|
12255
|
-
{ question: "What does the agent context endpoint return?", answer: "A comprehensive workspace snapshot including document stats, schemas, active runs, field registry summary, and recent activity." }
|
|
13233
|
+
{ question: "What does the agent context endpoint return?", answer: "A comprehensive workspace snapshot including document stats, schemas, active runs, field registry summary, and recent activity." },
|
|
13234
|
+
{ question: "How often is the workspace context updated?", answer: "The context is computed on each request from live data. There is no caching, so the response always reflects the current workspace state." },
|
|
13235
|
+
{ question: "Can I use the agent context to feed an external LLM?", answer: "Yes. The context endpoint is designed for this use case. Pass the response as system context to any LLM to give it awareness of your workspace state, document types, and schemas." }
|
|
12256
13236
|
],
|
|
12257
|
-
mentions: ["agent context", "workspace overview", "AI integration"]
|
|
13237
|
+
mentions: ["agent context", "workspace overview", "AI integration", "dashboard data"]
|
|
12258
13238
|
},
|
|
12259
13239
|
{
|
|
12260
13240
|
slug: "agent-tools",
|
|
@@ -12263,6 +13243,8 @@ var sections34 = [
|
|
|
12263
13243
|
seoTitle: "Agent Tools Endpoint \u2014 Talonic Docs",
|
|
12264
13244
|
description: "List all tools available to the embedded agent including their impact level (read/write) and descriptions for discovering agent capabilities.",
|
|
12265
13245
|
content: [
|
|
13246
|
+
{ type: "paragraph", text: "Discover all tools available to the embedded AI agent. Each tool declares its **impact level** \u2014 whether it performs a read-only operation or a mutation \u2014 so you can build permission-aware integrations. Use this endpoint to dynamically generate tool descriptions for external AI agents or to audit available capabilities." },
|
|
13247
|
+
{ type: "callout", variant: "info", text: "Impact levels follow a severity scale: `read` (no side effects), `draft_mutation` (creates drafts only), `live_mutation` (modifies live data), and `irreversible` (permanent changes like deletion). Use these to implement confirmation gates in your integration." },
|
|
12266
13248
|
{
|
|
12267
13249
|
type: "endpoint",
|
|
12268
13250
|
method: "GET",
|
|
@@ -12310,8 +13292,11 @@ var sections34 = [
|
|
|
12310
13292
|
related: [
|
|
12311
13293
|
{ label: "Get Workspace Context", slug: "agent-context" }
|
|
12312
13294
|
],
|
|
12313
|
-
faq: [
|
|
12314
|
-
|
|
13295
|
+
faq: [
|
|
13296
|
+
{ question: "Can I invoke agent tools directly via the API?", answer: "The tools endpoint lists available capabilities. Tool invocation happens through the embedded agent chat interface or via the Claude API proxy, not through direct REST calls to individual tools." },
|
|
13297
|
+
{ question: "What impact levels are available?", answer: "Four levels: `read` (safe, no side effects), `draft_mutation` (creates drafts), `live_mutation` (modifies live data), and `irreversible` (permanent changes). Use these to build confirmation gates." }
|
|
13298
|
+
],
|
|
13299
|
+
mentions: ["agent tools", "capabilities", "impact levels", "tool discovery"]
|
|
12315
13300
|
}
|
|
12316
13301
|
];
|
|
12317
13302
|
|
|
@@ -12325,6 +13310,16 @@ var sections35 = [
|
|
|
12325
13310
|
description: "List all matching configurations for the current workspace. Each config defines field mappings, strategies, and weights used to match documents against reference data.",
|
|
12326
13311
|
content: [
|
|
12327
13312
|
{ type: "paragraph", text: "Matching connects incoming documents to reference datasets using configurable field-level strategies. Each configuration defines which fields to compare, how to compare them (exact, fuzzy, date range, numeric range), and the relative weight of each field in the overall confidence score." },
|
|
13313
|
+
{ type: "paragraph", text: "A typical matching workflow is: upload reference data (CSV or XLSX), create a config with field mappings, run matching, and review results. For complex datasets, use the **generate strategy** endpoint to let AI recommend optimal field mappings and weights." },
|
|
13314
|
+
{
|
|
13315
|
+
type: "list",
|
|
13316
|
+
items: [
|
|
13317
|
+
"Each config targets a single reference dataset.",
|
|
13318
|
+
"Field mappings define source-to-target column pairs with a comparison strategy.",
|
|
13319
|
+
"Weights must sum to 1.0 across all field mappings.",
|
|
13320
|
+
"The confidence threshold (default 0.85) controls the minimum score for a match."
|
|
13321
|
+
]
|
|
13322
|
+
},
|
|
12328
13323
|
{
|
|
12329
13324
|
type: "endpoint",
|
|
12330
13325
|
method: "GET",
|
|
@@ -12414,7 +13409,8 @@ var sections35 = [
|
|
|
12414
13409
|
{ label: "Run Matching", slug: "run-matching" }
|
|
12415
13410
|
],
|
|
12416
13411
|
faq: [
|
|
12417
|
-
{ question: "What is a matching configuration?", answer: "A matching configuration defines which fields to compare between documents and reference data, what comparison strategy to use per field (exact, fuzzy, date_range, numeric_range), and how much weight each field carries in the overall confidence score." }
|
|
13412
|
+
{ question: "What is a matching configuration?", answer: "A matching configuration defines which fields to compare between documents and reference data, what comparison strategy to use per field (exact, fuzzy, date_range, numeric_range), and how much weight each field carries in the overall confidence score." },
|
|
13413
|
+
{ question: "Can I have multiple configs for the same reference dataset?", answer: "Yes. You can create multiple configs targeting the same reference dataset with different field mappings, strategies, or thresholds to test different matching approaches." }
|
|
12418
13414
|
],
|
|
12419
13415
|
mentions: ["matching", "reference data", "field mapping"]
|
|
12420
13416
|
},
|
|
@@ -12425,6 +13421,17 @@ var sections35 = [
|
|
|
12425
13421
|
seoTitle: "Create Matching Config \u2014 Talonic Docs",
|
|
12426
13422
|
description: "Create a matching configuration with field mappings, comparison strategies (exact, fuzzy, date_range, numeric_range), and per-field weights that sum to 1.0.",
|
|
12427
13423
|
content: [
|
|
13424
|
+
{ type: "paragraph", text: "Create a matching configuration that defines how documents are compared against a reference dataset. Each field mapping specifies a source field (from extracted documents), a target column (in the reference data), a comparison strategy, and a relative weight." },
|
|
13425
|
+
{ type: "callout", variant: "info", text: "Field weights should sum to 1.0. The overall confidence score for a match is the weighted sum of per-field scores. Use the **generate strategy** endpoint to get AI-recommended mappings if you are unsure which fields and weights to use." },
|
|
13426
|
+
{
|
|
13427
|
+
type: "list",
|
|
13428
|
+
items: [
|
|
13429
|
+
"**exact** \u2014 case-insensitive string equality. Best for codes, IDs, and standardized values.",
|
|
13430
|
+
"**fuzzy** \u2014 Levenshtein/token similarity. Handles name variations and minor typos.",
|
|
13431
|
+
"**date_range** \u2014 date proximity within a configurable tolerance window.",
|
|
13432
|
+
"**numeric_range** \u2014 numeric proximity within a configurable tolerance. Handles rounding differences."
|
|
13433
|
+
]
|
|
13434
|
+
},
|
|
12428
13435
|
{
|
|
12429
13436
|
type: "endpoint",
|
|
12430
13437
|
method: "POST",
|
|
@@ -12522,7 +13529,9 @@ var sections35 = [
|
|
|
12522
13529
|
{ label: "Generate Strategy", slug: "generate-strategy" }
|
|
12523
13530
|
],
|
|
12524
13531
|
faq: [
|
|
12525
|
-
{ question: "What strategies are available for field matching?", answer: "Four strategies: exact (case-insensitive equality), fuzzy (Levenshtein/token similarity), date_range (date proximity within tolerance), and numeric_range (numeric proximity within tolerance)." }
|
|
13532
|
+
{ question: "What strategies are available for field matching?", answer: "Four strategies: exact (case-insensitive equality), fuzzy (Levenshtein/token similarity), date_range (date proximity within tolerance), and numeric_range (numeric proximity within tolerance)." },
|
|
13533
|
+
{ question: "Do field weights need to sum to exactly 1.0?", answer: "Weights should sum to 1.0 for meaningful confidence scores. If they do not sum to 1.0, the system normalizes them internally, but explicitly setting weights to sum to 1.0 gives you predictable confidence values." },
|
|
13534
|
+
{ question: "Can I use the same reference dataset column in multiple mappings?", answer: "Yes. A single target column can appear in multiple field mappings with different source fields and strategies, which is useful when multiple document fields might correspond to the same reference column." }
|
|
12526
13535
|
],
|
|
12527
13536
|
mentions: ["matching config", "field strategy", "weights"]
|
|
12528
13537
|
},
|
|
@@ -12533,6 +13542,7 @@ var sections35 = [
|
|
|
12533
13542
|
seoTitle: "Manage Matching Config \u2014 Talonic Docs",
|
|
12534
13543
|
description: "Get matching configuration details, update field mappings and weights, or delete a configuration. Deleting a config does not remove past run results.",
|
|
12535
13544
|
content: [
|
|
13545
|
+
{ type: "paragraph", text: "Retrieve, update, or delete a matching configuration. Updates to field mappings and thresholds take effect on the next run \u2014 they do not retroactively change past results. Deleting a config removes the configuration but preserves all historical run results for audit purposes." },
|
|
12536
13546
|
{
|
|
12537
13547
|
type: "endpoint",
|
|
12538
13548
|
method: "GET",
|
|
@@ -12673,7 +13683,10 @@ var sections35 = [
|
|
|
12673
13683
|
{ label: "List Configs", slug: "list-matching-configs" },
|
|
12674
13684
|
{ label: "Run Matching", slug: "run-matching" }
|
|
12675
13685
|
],
|
|
12676
|
-
faq: [
|
|
13686
|
+
faq: [
|
|
13687
|
+
{ question: "Can I change the reference_data_id on an existing config?", answer: "No. The reference dataset is fixed at creation. To match against a different dataset, create a new configuration." },
|
|
13688
|
+
{ question: "Does updating a config affect past run results?", answer: "No. Updates only affect future runs. Past results are immutable and retain the field mappings and thresholds that were active when the run executed." }
|
|
13689
|
+
],
|
|
12677
13690
|
mentions: ["matching config management"]
|
|
12678
13691
|
},
|
|
12679
13692
|
{
|
|
@@ -12683,6 +13696,9 @@ var sections35 = [
|
|
|
12683
13696
|
seoTitle: "Run Matching \u2014 Talonic Docs",
|
|
12684
13697
|
description: "Execute a matching run against reference data asynchronously, or use smart-run for AI-assisted matching that auto-tunes strategies. Poll run status via the run detail endpoint.",
|
|
12685
13698
|
content: [
|
|
13699
|
+
{ type: "paragraph", text: "Execute a matching run to compare documents against the reference dataset defined in the config. Runs are **asynchronous** \u2014 the endpoint returns immediately with a `queued` status. Poll the run detail endpoint to track progress and retrieve results." },
|
|
13700
|
+
{ type: "paragraph", text: "The **smart-run** variant uses a pre-generated AI strategy that automatically tunes comparison thresholds and field priorities. This can significantly improve accuracy on datasets with heterogeneous formatting, inconsistent naming, or mixed-language content." },
|
|
13701
|
+
{ type: "callout", variant: "info", text: "Matching runs are processed via a BullMQ job queue. Large datasets may take several minutes. Use `GET /v1/matching/runs/:id/progress` to poll for live progress updates while the run is active." },
|
|
12686
13702
|
{
|
|
12687
13703
|
type: "endpoint",
|
|
12688
13704
|
method: "POST",
|
|
@@ -12791,7 +13807,9 @@ var sections35 = [
|
|
|
12791
13807
|
{ label: "Results", slug: "get-matching-results" }
|
|
12792
13808
|
],
|
|
12793
13809
|
faq: [
|
|
12794
|
-
{ question: "What is the difference between run and smart-run?", answer: "A standard run uses the strategies and weights defined in the config. A smart-run uses a pre-generated strategy with AI-tuned comparison thresholds, which can improve accuracy on heterogeneous datasets." }
|
|
13810
|
+
{ question: "What is the difference between run and smart-run?", answer: "A standard run uses the strategies and weights defined in the config. A smart-run uses a pre-generated strategy with AI-tuned comparison thresholds, which can improve accuracy on heterogeneous datasets." },
|
|
13811
|
+
{ question: "How long does a matching run take?", answer: "Runtime depends on the number of documents and reference rows. For a typical dataset of a few hundred documents against a few thousand reference rows, expect 1-5 minutes. Large datasets may take longer." },
|
|
13812
|
+
{ question: "Can I run matching on a subset of documents?", answer: "Currently, runs match all unmatched documents in the workspace. Document-level filtering is not yet supported \u2014 use the results endpoint to filter outcomes after the run completes." }
|
|
12795
13813
|
],
|
|
12796
13814
|
mentions: ["matching run", "smart matching", "async"]
|
|
12797
13815
|
},
|
|
@@ -12802,6 +13820,8 @@ var sections35 = [
|
|
|
12802
13820
|
seoTitle: "Matching Run Detail \u2014 Talonic Docs",
|
|
12803
13821
|
description: "Get the status, progress, and summary of a matching run. Status progresses from queued to running to completed or failed.",
|
|
12804
13822
|
content: [
|
|
13823
|
+
{ type: "paragraph", text: "Retrieve the current state of a matching run. Poll this endpoint while `status` is `queued` or `running` to track progress. Once `completed`, the response includes the top 50 results by confidence. Use the results endpoint for full paginated access." },
|
|
13824
|
+
{ type: "callout", variant: "info", text: "The `ai_resolving` status indicates that the run has finished standard matching and is now running an AI resolution pass on low-confidence rows. This pass uses Claude Haiku to disambiguate borderline matches." },
|
|
12805
13825
|
{
|
|
12806
13826
|
type: "endpoint",
|
|
12807
13827
|
method: "GET",
|
|
@@ -12889,7 +13909,10 @@ var sections35 = [
|
|
|
12889
13909
|
{ label: "Results", slug: "get-matching-results" },
|
|
12890
13910
|
{ label: "Run Matching", slug: "run-matching" }
|
|
12891
13911
|
],
|
|
12892
|
-
faq: [
|
|
13912
|
+
faq: [
|
|
13913
|
+
{ question: "Why does the run detail only show 50 results?", answer: "The run detail endpoint includes the top 50 results by confidence for quick inspection. Use GET /v1/matching/runs/:id/results with pagination for the full result set." },
|
|
13914
|
+
{ question: "What does the ai_resolving status mean?", answer: "The run has completed standard field-level matching and is now running an AI resolution pass (using Claude Haiku) on rows with low confidence scores. This can upgrade borderline matches or confirm non-matches." }
|
|
13915
|
+
],
|
|
12893
13916
|
mentions: ["matching run status", "progress"]
|
|
12894
13917
|
},
|
|
12895
13918
|
{
|
|
@@ -12899,6 +13922,8 @@ var sections35 = [
|
|
|
12899
13922
|
seoTitle: "Matching Results \u2014 Talonic Docs",
|
|
12900
13923
|
description: "Retrieve matching results for a completed run. Returns the top 5 candidates per document with weighted confidence scores and per-field evidence breakdowns.",
|
|
12901
13924
|
content: [
|
|
13925
|
+
{ type: "paragraph", text: "Retrieve the full paginated results for a completed matching run. Each result represents a document matched (or unmatched) against the reference dataset, with a weighted confidence score and per-field evidence breakdown showing how each field contributed to the overall score." },
|
|
13926
|
+
{ type: "callout", variant: "info", text: "Results with `status: pending` have not been reviewed. Use `POST /v1/matching/runs/:runId/results/:resultId/review` to approve or reject individual matches. Approved matches can be used downstream for data enrichment and reconciliation workflows." },
|
|
12902
13927
|
{
|
|
12903
13928
|
type: "endpoint",
|
|
12904
13929
|
method: "GET",
|
|
@@ -12977,7 +14002,9 @@ var sections35 = [
|
|
|
12977
14002
|
{ label: "List Configs", slug: "list-matching-configs" }
|
|
12978
14003
|
],
|
|
12979
14004
|
faq: [
|
|
12980
|
-
{ question: "How is the confidence score calculated?", answer: "Each field produces a strategy-specific score (0-1). The overall confidence is the weighted sum of per-field scores, using the weights defined in the matching config." }
|
|
14005
|
+
{ question: "How is the confidence score calculated?", answer: "Each field produces a strategy-specific score (0-1). The overall confidence is the weighted sum of per-field scores, using the weights defined in the matching config." },
|
|
14006
|
+
{ question: "What does null matched_reference_row_id mean?", answer: "A null matched_reference_row_id means no reference row scored above the configured threshold for this document. The document is effectively unmatched." },
|
|
14007
|
+
{ question: "Can I review results programmatically?", answer: "Yes. Use POST /v1/matching/runs/:runId/results/:resultId/review with a status of approved or rejected to programmatically review match results." }
|
|
12981
14008
|
],
|
|
12982
14009
|
mentions: ["matching results", "confidence score", "evidence"]
|
|
12983
14010
|
},
|
|
@@ -12988,6 +14015,9 @@ var sections35 = [
|
|
|
12988
14015
|
seoTitle: "Generate Matching Strategy \u2014 Talonic Docs",
|
|
12989
14016
|
description: "Use AI to auto-suggest field mappings and strategies for a reference dataset. Analyses column names, data types, and sample values to recommend optimal matching configurations.",
|
|
12990
14017
|
content: [
|
|
14018
|
+
{ type: "paragraph", text: "Let AI analyse your reference dataset and recommend field mappings, comparison strategies, and weights. The strategy generator examines column names, data types, and sample values to produce an optimal matching configuration. Use the generated strategy as a starting point or pass it directly to a smart-run." },
|
|
14019
|
+
{ type: "paragraph", text: 'Each generated strategy includes a `rationale` per field mapping explaining why the AI chose that strategy and weight. You can optionally provide a `user_prompt` with natural language guidance (e.g. "prioritise date matching" or "vendor name is the most important field") to steer the generation.' },
|
|
14020
|
+
{ type: "callout", variant: "info", text: "Strategy generation does not modify any existing configs. The generated strategy is saved independently and can be applied to a config via the smart-run endpoint, or used as a reference when manually creating a config." },
|
|
12991
14021
|
{
|
|
12992
14022
|
type: "endpoint",
|
|
12993
14023
|
method: "POST",
|
|
@@ -13060,9 +14090,14 @@ var sections35 = [
|
|
|
13060
14090
|
}
|
|
13061
14091
|
],
|
|
13062
14092
|
related: [
|
|
13063
|
-
{ label: "Create Config", slug: "create-matching-config" }
|
|
14093
|
+
{ label: "Create Config", slug: "create-matching-config" },
|
|
14094
|
+
{ label: "Run Smart Matching", slug: "run-matching" }
|
|
14095
|
+
],
|
|
14096
|
+
faq: [
|
|
14097
|
+
{ question: "Does strategy generation cost credits?", answer: "Yes. Strategy generation uses AI (Claude) to analyse columns and sample data. The cost is minimal \u2014 typically a single Haiku call \u2014 and is reflected in your credit usage." },
|
|
14098
|
+
{ question: "Can I regenerate a strategy with different guidance?", answer: "Yes. Call the generate endpoint again with a different user_prompt. Each call creates a new strategy version. Previous versions are retained." },
|
|
14099
|
+
{ question: "How do I use a generated strategy?", answer: "Pass the strategy ID to POST /v1/matching/configs/:id/smart-run to execute a run using the AI-generated thresholds and weights. Alternatively, copy the field_mappings from the strategy into a new config via POST /v1/matching/configs." }
|
|
13064
14100
|
],
|
|
13065
|
-
faq: [],
|
|
13066
14101
|
mentions: ["AI strategy", "auto-suggest", "field mapping"]
|
|
13067
14102
|
}
|
|
13068
14103
|
];
|
|
@@ -13077,6 +14112,18 @@ var sections36 = [
|
|
|
13077
14112
|
description: "List all configured delivery destinations for the workspace. Destinations define where processed data is sent \u2014 webhook, SFTP, S3, Azure Blob, Google Drive, or OneDrive.",
|
|
13078
14113
|
content: [
|
|
13079
14114
|
{ type: "paragraph", text: "Delivery routes processed data to external systems through a four-part pipeline: **signals** (domain events) are matched to **bindings** (routing rules), resolved into **deliverables** (payloads), serialized, and sent to **destinations** (connectors). Destinations define the transport protocol and credentials." },
|
|
14115
|
+
{ type: "paragraph", text: "Each destination is an instance of one of the six supported connector types. A single destination can serve multiple bindings \u2014 for example, you might have one S3 destination with separate bindings for extraction results and run outcomes. Auth credentials are stored securely and never returned in API responses." },
|
|
14116
|
+
{
|
|
14117
|
+
type: "list",
|
|
14118
|
+
items: [
|
|
14119
|
+
"**webhook** \u2014 HTTP POST with optional HMAC-SHA256 signing and idempotency headers.",
|
|
14120
|
+
"**sftp** \u2014 File upload via SSH with password or private key auth.",
|
|
14121
|
+
"**s3** \u2014 Object upload to AWS S3 with access key auth.",
|
|
14122
|
+
"**azure_blob** \u2014 Blob upload to Azure Storage with connection string or account key.",
|
|
14123
|
+
"**google_drive** \u2014 File upload via OAuth (`drive.file` scope).",
|
|
14124
|
+
"**onedrive** \u2014 File upload via OAuth (`Files.ReadWrite.All` scope)."
|
|
14125
|
+
]
|
|
14126
|
+
},
|
|
13080
14127
|
{
|
|
13081
14128
|
type: "endpoint",
|
|
13082
14129
|
method: "GET",
|
|
@@ -13156,7 +14203,9 @@ var sections36 = [
|
|
|
13156
14203
|
{ label: "List Bindings", slug: "list-delivery-bindings" }
|
|
13157
14204
|
],
|
|
13158
14205
|
faq: [
|
|
13159
|
-
{ question: "What destination types are supported?", answer: "Six connector types: webhook (HTTP POST with HMAC-SHA256 signing), sftp, s3, azure_blob, google_drive (OAuth), and onedrive (OAuth)." }
|
|
14206
|
+
{ question: "What destination types are supported?", answer: "Six connector types: webhook (HTTP POST with HMAC-SHA256 signing), sftp, s3, azure_blob, google_drive (OAuth), and onedrive (OAuth)." },
|
|
14207
|
+
{ question: "Why is my destination marked as inactive?", answer: "Destinations are automatically deactivated when an auth failure or SSRF block occurs during delivery. Fix the credentials or URL, then re-enable via PUT /v1/delivery/destinations/:id." },
|
|
14208
|
+
{ question: "Are auth credentials returned in the response?", answer: "No. Auth credentials are never returned in API responses. The has_auth_config and has_signing_secret boolean fields indicate whether credentials are configured." }
|
|
13160
14209
|
],
|
|
13161
14210
|
mentions: ["delivery", "destinations", "connectors"]
|
|
13162
14211
|
},
|
|
@@ -13167,6 +14216,8 @@ var sections36 = [
|
|
|
13167
14216
|
seoTitle: "Create Delivery Destination \u2014 Talonic Docs",
|
|
13168
14217
|
description: "Create a delivery destination with connector type, transport config, and authentication. Supported types: webhook, sftp, s3, azure_blob, google_drive, onedrive.",
|
|
13169
14218
|
content: [
|
|
14219
|
+
{ type: "paragraph", text: "Create a new delivery destination by specifying the connector type, transport configuration, and optional authentication. The `config` and `auth_config` schemas vary by destination type \u2014 see the catalog endpoint for connector capabilities." },
|
|
14220
|
+
{ type: "callout", variant: "info", text: "OAuth-based destinations (google_drive, onedrive) require completing an OAuth flow before creating the destination. Use the OAuth start endpoint to initiate the flow and obtain tokens." },
|
|
13170
14221
|
{
|
|
13171
14222
|
type: "endpoint",
|
|
13172
14223
|
method: "POST",
|
|
@@ -13254,8 +14305,11 @@ var sections36 = [
|
|
|
13254
14305
|
{ label: "List Destinations", slug: "list-delivery-destinations" },
|
|
13255
14306
|
{ label: "Test Destination", slug: "manage-delivery-destination" }
|
|
13256
14307
|
],
|
|
13257
|
-
faq: [
|
|
13258
|
-
|
|
14308
|
+
faq: [
|
|
14309
|
+
{ question: "How does webhook signing work?", answer: 'When a signing_secret is configured, every delivery includes an X-Talonic-Signature header with format t=<timestamp>,v1=<hex>. The signature is HMAC-SHA256 of the signing secret over "<timestamp>.<body>". Verify this on your server to confirm authenticity.' },
|
|
14310
|
+
{ question: "What is the payload cap?", answer: "The global payload cap is 5 MiB per delivery. You can override this per destination with payload_cap_bytes. Payloads exceeding the cap are rejected with a non-retryable payload_too_large error." }
|
|
14311
|
+
],
|
|
14312
|
+
mentions: ["destination creation", "webhook", "HMAC", "signing"]
|
|
13259
14313
|
},
|
|
13260
14314
|
{
|
|
13261
14315
|
slug: "manage-delivery-destination",
|
|
@@ -13264,6 +14318,8 @@ var sections36 = [
|
|
|
13264
14318
|
seoTitle: "Manage Delivery Destination \u2014 Talonic Docs",
|
|
13265
14319
|
description: "Get destination details, update config, delete a destination, or send a test payload to verify connectivity. Auth credentials are always redacted in responses.",
|
|
13266
14320
|
content: [
|
|
14321
|
+
{ type: "paragraph", text: "Manage a single destination: retrieve its current config, update transport settings or credentials, delete it, or test connectivity. The **test** endpoint probes the destination without delivering real data \u2014 file-drop connectors (S3, SFTP, Azure Blob) verify bucket/container reachability without writing any objects." },
|
|
14322
|
+
{ type: "callout", variant: "warning", text: "Deleting a destination cascades to all its bindings, delivery items, and DLQ entries. This is irreversible. Disable the destination (`is_active: false`) instead if you want to preserve history." },
|
|
13267
14323
|
{
|
|
13268
14324
|
type: "endpoint",
|
|
13269
14325
|
method: "GET",
|
|
@@ -13453,10 +14509,14 @@ var sections36 = [
|
|
|
13453
14509
|
}
|
|
13454
14510
|
],
|
|
13455
14511
|
related: [
|
|
13456
|
-
{ label: "List Destinations", slug: "list-delivery-destinations" }
|
|
14512
|
+
{ label: "List Destinations", slug: "list-delivery-destinations" },
|
|
14513
|
+
{ label: "Create Destination", slug: "create-delivery-destination" }
|
|
13457
14514
|
],
|
|
13458
|
-
faq: [
|
|
13459
|
-
|
|
14515
|
+
faq: [
|
|
14516
|
+
{ question: "Does the test endpoint write data to the destination?", answer: "No. File-drop connectors (S3, SFTP, Azure Blob, Google Drive, OneDrive) use lightweight probes (HeadBucket, list, getProperties) that verify reachability without creating any objects. Webhook destinations receive a small synthetic payload." },
|
|
14517
|
+
{ question: "Can I re-enable a destination that was auto-disabled?", answer: "Yes. Fix the underlying issue (expired credentials, unreachable URL), then update the destination with the corrected config. The destination will be re-enabled automatically, or you can explicitly set is_active: true." }
|
|
14518
|
+
],
|
|
14519
|
+
mentions: ["destination management", "test delivery", "cascade delete"]
|
|
13460
14520
|
},
|
|
13461
14521
|
{
|
|
13462
14522
|
slug: "list-delivery-bindings",
|
|
@@ -13466,6 +14526,7 @@ var sections36 = [
|
|
|
13466
14526
|
description: "List all delivery bindings that route signals to destinations. Each binding maps an event type to a deliverable resolver, serializer format, and destination.",
|
|
13467
14527
|
content: [
|
|
13468
14528
|
{ type: "paragraph", text: "A binding connects the four parts of the delivery pipeline: a **signal filter** (which events to listen for), a **deliverable type** (what payload to build), a **serializer format** (how to encode it), and a **destination** (where to send it). The compatibility triangle is enforced on creation." },
|
|
14529
|
+
{ type: "paragraph", text: "Bindings are the core routing configuration for delivery. When a domain event (e.g. `document.extracted`) fires, the poller matches it against all active bindings. Each matching binding produces a separate delivery attempt \u2014 so a single event can fan out to multiple destinations simultaneously." },
|
|
13469
14530
|
{
|
|
13470
14531
|
type: "endpoint",
|
|
13471
14532
|
method: "GET",
|
|
@@ -13533,7 +14594,8 @@ var sections36 = [
|
|
|
13533
14594
|
{ label: "Catalog", slug: "delivery-catalog" }
|
|
13534
14595
|
],
|
|
13535
14596
|
faq: [
|
|
13536
|
-
{ question: "What is the compatibility triangle?", answer: "When creating a binding, the system verifies that the signal event type, deliverable resolver, and serializer format are mutually compatible. For example, a CSV serializer cannot serialize a graph deliverable." }
|
|
14597
|
+
{ question: "What is the compatibility triangle?", answer: "When creating a binding, the system verifies that the signal event type, deliverable resolver, and serializer format are mutually compatible. For example, a CSV serializer cannot serialize a graph deliverable." },
|
|
14598
|
+
{ question: "Can multiple bindings target the same destination?", answer: "Yes. A single destination can serve many bindings with different signal filters, deliverable types, and serializer formats. Each binding produces independent delivery attempts." }
|
|
13537
14599
|
],
|
|
13538
14600
|
mentions: ["bindings", "signal filter", "compatibility triangle"]
|
|
13539
14601
|
},
|
|
@@ -13544,6 +14606,8 @@ var sections36 = [
|
|
|
13544
14606
|
seoTitle: "Create Delivery Binding \u2014 Talonic Docs",
|
|
13545
14607
|
description: "Create a delivery binding that routes domain signals through a deliverable resolver and serializer to a destination. Includes field mapping and retry policy configuration.",
|
|
13546
14608
|
content: [
|
|
14609
|
+
{ type: "paragraph", text: "Create a binding that wires a domain event to a destination. The **compatibility triangle** is validated on creation: the signal event type must be compatible with the deliverable resolver, the serializer must support the deliverable shape, and the connector must support the serializer format." },
|
|
14610
|
+
{ type: "callout", variant: "info", text: "Use the catalog endpoints (`/v1/delivery/catalog/*`) to discover valid combinations before creating a binding. The catalog lists all available signals, deliverables, serializers, and connectors with their compatibility constraints." },
|
|
13547
14611
|
{
|
|
13548
14612
|
type: "endpoint",
|
|
13549
14613
|
method: "POST",
|
|
@@ -13631,8 +14695,11 @@ var sections36 = [
|
|
|
13631
14695
|
{ label: "List Bindings", slug: "list-delivery-bindings" },
|
|
13632
14696
|
{ label: "Catalog", slug: "delivery-catalog" }
|
|
13633
14697
|
],
|
|
13634
|
-
faq: [
|
|
13635
|
-
|
|
14698
|
+
faq: [
|
|
14699
|
+
{ question: "What is the default retry policy?", answer: "By default, deliveries are retried up to 7 times with an exponential backoff schedule: 0s, 30s, 2m, 8m, 30m, 2h, 8h. Override this with the delivery_policy field." },
|
|
14700
|
+
{ question: "What is the field_map for?", answer: "The field_map applies a JSONPath projection to the resolved payload before serialization. Use it to rename fields, drop internal fields, or add static values. If omitted, the full payload is delivered as-is." }
|
|
14701
|
+
],
|
|
14702
|
+
mentions: ["binding creation", "field map", "delivery policy", "retry"]
|
|
13636
14703
|
},
|
|
13637
14704
|
{
|
|
13638
14705
|
slug: "manage-delivery-binding",
|
|
@@ -13641,6 +14708,7 @@ var sections36 = [
|
|
|
13641
14708
|
seoTitle: "Manage Delivery Binding \u2014 Talonic Docs",
|
|
13642
14709
|
description: "Get binding details, update signal filters or field maps, delete a binding, or preview the resolved payload for a binding without sending it.",
|
|
13643
14710
|
content: [
|
|
14711
|
+
{ type: "paragraph", text: "Manage a single delivery binding: retrieve its configuration, update the signal filter or field map, delete it, or preview the payload it would produce. Updates re-validate the compatibility triangle. Deleting a binding stops future routing but allows in-flight deliveries to complete." },
|
|
13644
14712
|
{
|
|
13645
14713
|
type: "endpoint",
|
|
13646
14714
|
method: "GET",
|
|
@@ -13832,9 +14900,13 @@ var sections36 = [
|
|
|
13832
14900
|
}
|
|
13833
14901
|
],
|
|
13834
14902
|
related: [
|
|
13835
|
-
{ label: "List Bindings", slug: "list-delivery-bindings" }
|
|
14903
|
+
{ label: "List Bindings", slug: "list-delivery-bindings" },
|
|
14904
|
+
{ label: "Catalog", slug: "delivery-catalog" }
|
|
14905
|
+
],
|
|
14906
|
+
faq: [
|
|
14907
|
+
{ question: "Does deleting a binding affect in-flight deliveries?", answer: "No. In-flight deliveries (already queued in the delivery job queue) will complete normally. Deletion only prevents new events from being routed to this binding." },
|
|
14908
|
+
{ question: "Is the preview endpoint live?", answer: "The public API preview endpoint currently returns a stub response. The internal preview endpoint is fully functional and walks the full resolve -> project -> serialize pipeline." }
|
|
13836
14909
|
],
|
|
13837
|
-
faq: [],
|
|
13838
14910
|
mentions: ["binding management", "preview"]
|
|
13839
14911
|
},
|
|
13840
14912
|
{
|
|
@@ -13844,6 +14916,8 @@ var sections36 = [
|
|
|
13844
14916
|
seoTitle: "Delivery History \u2014 Talonic Docs",
|
|
13845
14917
|
description: "View delivery attempt history with status, HTTP codes, and timing. Get detail for a single item or replay a failed delivery attempt.",
|
|
13846
14918
|
content: [
|
|
14919
|
+
{ type: "paragraph", text: "The delivery history tracks every attempt to deliver a payload to a destination. Each attempt is recorded as a **delivery item** with status, timing, HTTP response code, and optional request/response bodies. Use this endpoint to audit delivery performance and debug failures." },
|
|
14920
|
+
{ type: "callout", variant: "info", text: "Request and response bodies are truncated to 10 KB and retained for a configurable period (default 30 days). After the retention period, bodies are nulled but metadata (status, HTTP code, duration, error code) is preserved indefinitely." },
|
|
13847
14921
|
{
|
|
13848
14922
|
type: "endpoint",
|
|
13849
14923
|
method: "GET",
|
|
@@ -14015,8 +15089,11 @@ var sections36 = [
|
|
|
14015
15089
|
{ label: "Dead Letter Queue", slug: "delivery-dlq" },
|
|
14016
15090
|
{ label: "List Bindings", slug: "list-delivery-bindings" }
|
|
14017
15091
|
],
|
|
14018
|
-
faq: [
|
|
14019
|
-
|
|
15092
|
+
faq: [
|
|
15093
|
+
{ question: "What is the idempotency key?", answer: "The idempotency key is a deterministic SHA-256 hash of the binding ID and event ID. It is sent on the wire (as an HTTP header, object metadata, or filename token depending on the connector) so receivers can deduplicate repeated deliveries." },
|
|
15094
|
+
{ question: "How does replay differ from DLQ replay?", answer: "Item replay re-enqueues a specific (binding, event) pair with a new attempt number. DLQ replay deletes the dead-letter row and re-enqueues with attempt=1. Both preserve the same idempotency key so receivers can deduplicate." }
|
|
15095
|
+
],
|
|
15096
|
+
mentions: ["delivery history", "replay", "attempt log", "idempotency"]
|
|
14020
15097
|
},
|
|
14021
15098
|
{
|
|
14022
15099
|
slug: "delivery-dlq",
|
|
@@ -14026,6 +15103,8 @@ var sections36 = [
|
|
|
14026
15103
|
description: "Inspect and manage permanently failed deliveries in the dead letter queue. Replay individual items or discard them.",
|
|
14027
15104
|
content: [
|
|
14028
15105
|
{ type: "paragraph", text: "Deliveries that exhaust all retry attempts are moved to the dead letter queue (DLQ). Items in the DLQ can be inspected, replayed (enqueues a fresh attempt), or deleted." },
|
|
15106
|
+
{ type: "paragraph", text: "The DLQ is the terminal state for failed deliveries. Common error codes include `connector_5xx` (destination returned a server error), `auth_failed` (invalid credentials), `ssrf_blocked` (destination URL resolves to a private network), and `payload_too_large` (payload exceeds the cap). Non-retryable errors (`auth_failed`, `ssrf_blocked`) skip the retry ladder and go directly to the DLQ." },
|
|
15107
|
+
{ type: "callout", variant: "warning", text: "DLQ replay **deletes** the dead-letter row before enqueuing the new attempt. If the enqueue fails, the DLQ row is lost. This is an intentional trade-off: the system prefers losing a DLQ row over duplicating a delivery." },
|
|
14029
15108
|
{
|
|
14030
15109
|
type: "endpoint",
|
|
14031
15110
|
method: "GET",
|
|
@@ -14158,9 +15237,11 @@ var sections36 = [
|
|
|
14158
15237
|
{ label: "List Destinations", slug: "list-delivery-destinations" }
|
|
14159
15238
|
],
|
|
14160
15239
|
faq: [
|
|
14161
|
-
{ question: "When does a delivery end up in the DLQ?", answer: "A delivery enters the dead letter queue after exhausting all retry attempts defined in the binding delivery_policy (default 7 attempts with exponential backoff over ~10 hours)." }
|
|
15240
|
+
{ question: "When does a delivery end up in the DLQ?", answer: "A delivery enters the dead letter queue after exhausting all retry attempts defined in the binding delivery_policy (default 7 attempts with exponential backoff over ~10 hours)." },
|
|
15241
|
+
{ question: "What error codes indicate non-retryable failures?", answer: "auth_failed (invalid credentials), ssrf_blocked (destination URL resolves to private network), and payload_too_large (exceeds payload cap) skip the retry ladder and go directly to the DLQ after a single attempt." },
|
|
15242
|
+
{ question: "Can I set up alerts for DLQ entries?", answer: "Yes. Create a binding with signal_filter event_type: delivery.item.failed targeting your alerting webhook. The delivery system emits meta-signals for failed deliveries, with built-in loop prevention to avoid infinite cascades." }
|
|
14162
15243
|
],
|
|
14163
|
-
mentions: ["dead letter queue", "DLQ", "failed delivery"]
|
|
15244
|
+
mentions: ["dead letter queue", "DLQ", "failed delivery", "error codes"]
|
|
14164
15245
|
},
|
|
14165
15246
|
{
|
|
14166
15247
|
slug: "delivery-catalog",
|
|
@@ -14170,6 +15251,7 @@ var sections36 = [
|
|
|
14170
15251
|
description: "Discover available signals, deliverable types, serializer formats, and connector types from the delivery registry. Use the catalog to build valid bindings.",
|
|
14171
15252
|
content: [
|
|
14172
15253
|
{ type: "paragraph", text: "The catalog endpoints expose the four delivery registries. Use them to discover which event types, deliverable resolvers, serializer formats, and connectors are available before creating bindings." },
|
|
15254
|
+
{ type: "paragraph", text: "Walk the catalog **top-down** to build valid binding configurations: start with signals to pick an event type, then check which deliverables are compatible with that signal, which serializers support the deliverable shape, and which connectors accept the serializer format. This ensures every combination passes the compatibility triangle." },
|
|
14173
15255
|
{
|
|
14174
15256
|
type: "endpoint",
|
|
14175
15257
|
method: "GET",
|
|
@@ -14362,9 +15444,11 @@ var sections36 = [
|
|
|
14362
15444
|
{ label: "List Destinations", slug: "list-delivery-destinations" }
|
|
14363
15445
|
],
|
|
14364
15446
|
faq: [
|
|
14365
|
-
{ question: "How do I know which combinations are valid for a binding?", answer: "Use the catalog endpoints to check compatibility. A valid binding requires: the signal event_type exists, the deliverable type lists that signal in compatible_signals, the serializer supports the deliverable shape, and the connector supports the serializer format." }
|
|
15447
|
+
{ question: "How do I know which combinations are valid for a binding?", answer: "Use the catalog endpoints to check compatibility. A valid binding requires: the signal event_type exists, the deliverable type lists that signal in compatible_signals, the serializer supports the deliverable shape, and the connector supports the serializer format." },
|
|
15448
|
+
{ question: "What is the difference between record and file delivery semantics?", answer: "Record semantics (webhook) deliver one event per HTTP request. File semantics (S3, SFTP, Azure Blob, Google Drive, OneDrive) write each delivery as a separate file/object, using a configurable filename template with tokens like {event_id} and {timestamp_iso}." },
|
|
15449
|
+
{ question: "Are all catalog entries available for use?", answer: "Most entries are live. Some deliverable types (case_snapshot, graph_relations) are registered as stubs with empty compatible_signals arrays \u2014 they appear in the catalog but cannot be used in bindings until their resolvers are implemented." }
|
|
14366
15450
|
],
|
|
14367
|
-
mentions: ["delivery catalog", "registry discovery"]
|
|
15451
|
+
mentions: ["delivery catalog", "registry discovery", "compatibility"]
|
|
14368
15452
|
}
|
|
14369
15453
|
];
|
|
14370
15454
|
|
|
@@ -14378,6 +15462,17 @@ var sections37 = [
|
|
|
14378
15462
|
description: "List all extraction batches with status, item counts, and provider information. Batches group documents submitted for deferred extraction at 50% cost.",
|
|
14379
15463
|
content: [
|
|
14380
15464
|
{ type: "paragraph", text: "Batch inference defers Claude extraction to the provider batch API at **50% cost**. Documents uploaded with `processing_mode=batch` run OCR and classification immediately but queue extraction. Batches accumulate items, submit to the provider on a timer or threshold, and poll for results." },
|
|
15465
|
+
{ type: "paragraph", text: "Talonic supports two batch providers: the **Anthropic Message Batches API** (direct) and **AWS Bedrock** (`CreateModelInvocationJob`). The provider is auto-detected from your workspace configuration. Results are typically delivered within 24 hours, with a maximum SLA of 48 hours." },
|
|
15466
|
+
{
|
|
15467
|
+
type: "list",
|
|
15468
|
+
items: [
|
|
15469
|
+
"Documents run Stage 1 (OCR + classification + triage) immediately on upload.",
|
|
15470
|
+
"Stage 2 (Claude extraction) is deferred to the batch API at 50% of standard cost.",
|
|
15471
|
+
"Batches require a minimum of 100 items (Bedrock requirement). Smaller uploads fall back to realtime extraction.",
|
|
15472
|
+
"Results are polled hourly. Use the sync endpoint to check sooner."
|
|
15473
|
+
]
|
|
15474
|
+
},
|
|
15475
|
+
{ type: "callout", variant: "info", text: "Image-only documents (no text content) cannot enter the batch pipeline because the text-only batch payload cannot carry image bytes. These are automatically routed to realtime extraction." },
|
|
14381
15476
|
{
|
|
14382
15477
|
type: "endpoint",
|
|
14383
15478
|
method: "GET",
|
|
@@ -14467,7 +15562,8 @@ var sections37 = [
|
|
|
14467
15562
|
],
|
|
14468
15563
|
faq: [
|
|
14469
15564
|
{ question: "What are the batch statuses?", answer: "Batches progress through: accumulating (collecting items), submitted (sent to provider), completed (all results received), failed (provider error), or cancelled (manually cancelled)." },
|
|
14470
|
-
{ question: "How much does batch processing cost?", answer: "Batch processing runs at 50% of the standard extraction cost. Results are delivered within 48 hours." }
|
|
15565
|
+
{ question: "How much does batch processing cost?", answer: "Batch processing runs at 50% of the standard extraction cost. Results are delivered within 48 hours." },
|
|
15566
|
+
{ question: "What is the minimum batch size?", answer: "Batches require a minimum of 100 items (a Bedrock requirement enforced for both providers). Uploads below this threshold fall back to realtime extraction with a warning." }
|
|
14471
15567
|
],
|
|
14472
15568
|
mentions: ["batch inference", "extraction batches", "cost reduction"]
|
|
14473
15569
|
},
|
|
@@ -14478,6 +15574,8 @@ var sections37 = [
|
|
|
14478
15574
|
seoTitle: "Get Batch Detail \u2014 Talonic Docs",
|
|
14479
15575
|
description: "Get detailed information for a single extraction batch including item counts, provider, status, and timing. Shows per-item breakdown when the batch is completed.",
|
|
14480
15576
|
content: [
|
|
15577
|
+
{ type: "paragraph", text: "Retrieve the full batch record including per-item status. Poll this endpoint while `status` is `submitted` to track progress. Once `completed`, each item shows its individual outcome and processing timestamp." },
|
|
15578
|
+
{ type: "callout", variant: "info", text: "Items that fail extraction in the batch are retried via **realtime** extraction (never re-batched) to preserve the original 48-hour SLA. Check `items[].status` for per-document outcomes." },
|
|
14481
15579
|
{
|
|
14482
15580
|
type: "endpoint",
|
|
14483
15581
|
method: "GET",
|
|
@@ -14561,8 +15659,11 @@ var sections37 = [
|
|
|
14561
15659
|
{ label: "List Batches", slug: "list-batches" },
|
|
14562
15660
|
{ label: "Sync Batch", slug: "sync-batch" }
|
|
14563
15661
|
],
|
|
14564
|
-
faq: [
|
|
14565
|
-
|
|
15662
|
+
faq: [
|
|
15663
|
+
{ question: "What happens to failed batch items?", answer: "Items that fail during batch processing are retried via realtime extraction (not re-batched). The document status transitions from batch_queued to the standard extraction pipeline." },
|
|
15664
|
+
{ question: "How long do batch results take?", answer: "Results are typically delivered within 24 hours. The maximum SLA is 48 hours. Use the sync endpoint to poll the provider for results before the automatic hourly check." }
|
|
15665
|
+
],
|
|
15666
|
+
mentions: ["batch detail", "provider metadata", "per-item status"]
|
|
14566
15667
|
},
|
|
14567
15668
|
{
|
|
14568
15669
|
slug: "sync-batch",
|
|
@@ -14571,6 +15672,7 @@ var sections37 = [
|
|
|
14571
15672
|
seoTitle: "Sync Batch with Provider \u2014 Talonic Docs",
|
|
14572
15673
|
description: "Force a sync with the provider to check for batch results. Useful when you do not want to wait for the hourly automatic poll.",
|
|
14573
15674
|
content: [
|
|
15675
|
+
{ type: "paragraph", text: "Force an immediate check with the batch provider (Anthropic or Bedrock) for results. By default, batches are polled automatically every hour. Use this endpoint when you need results sooner or want to verify the current provider-side status." },
|
|
14574
15676
|
{
|
|
14575
15677
|
type: "endpoint",
|
|
14576
15678
|
method: "POST",
|
|
@@ -14635,7 +15737,8 @@ var sections37 = [
|
|
|
14635
15737
|
{ label: "Cancel Batch", slug: "cancel-batch" }
|
|
14636
15738
|
],
|
|
14637
15739
|
faq: [
|
|
14638
|
-
{ question: "How often are batches polled automatically?", answer: "The system polls the provider every hour for results. Use the sync endpoint to trigger an immediate check." }
|
|
15740
|
+
{ question: "How often are batches polled automatically?", answer: "The system polls the provider every hour for results. Use the sync endpoint to trigger an immediate check." },
|
|
15741
|
+
{ question: "Can I sync a batch that is still accumulating?", answer: "Yes, but it will have no effect since the batch has not been submitted to the provider yet. The status will remain accumulating." }
|
|
14639
15742
|
],
|
|
14640
15743
|
mentions: ["batch sync", "provider poll"]
|
|
14641
15744
|
},
|
|
@@ -14646,6 +15749,7 @@ var sections37 = [
|
|
|
14646
15749
|
seoTitle: "Cancel Extraction Batch \u2014 Talonic Docs",
|
|
14647
15750
|
description: "Cancel an in-progress extraction batch. Only batches in accumulating or submitted status can be cancelled. Completed batches cannot be rolled back.",
|
|
14648
15751
|
content: [
|
|
15752
|
+
{ type: "paragraph", text: "Cancel a batch that is still `accumulating` or `submitted`. Cancellation sends a stop request to the provider if the batch was already submitted. Documents in the cancelled batch revert to `batch_queued` status and can be resubmitted or processed via realtime extraction." },
|
|
14649
15753
|
{
|
|
14650
15754
|
type: "endpoint",
|
|
14651
15755
|
method: "POST",
|
|
@@ -14710,7 +15814,10 @@ var sections37 = [
|
|
|
14710
15814
|
{ label: "List Batches", slug: "list-batches" },
|
|
14711
15815
|
{ label: "Batch Detail", slug: "get-batch" }
|
|
14712
15816
|
],
|
|
14713
|
-
faq: [
|
|
15817
|
+
faq: [
|
|
15818
|
+
{ question: "Can I cancel a completed batch?", answer: "No. Only batches in accumulating or submitted status can be cancelled. Completed, failed, and already-cancelled batches return a 400 error." },
|
|
15819
|
+
{ question: "What happens to documents after cancellation?", answer: "Documents revert to batch_queued status. You can resubmit them in a new batch or process them via realtime extraction by updating their processing_mode." }
|
|
15820
|
+
],
|
|
14714
15821
|
mentions: ["batch cancellation"]
|
|
14715
15822
|
}
|
|
14716
15823
|
];
|
|
@@ -14725,6 +15832,15 @@ var sections38 = [
|
|
|
14725
15832
|
description: "List cases with optional status and search filters. Supports pagination for large result sets. Cases group related documents via shared entities.",
|
|
14726
15833
|
content: [
|
|
14727
15834
|
{ type: "paragraph", text: "Cases group documents that share entities discovered through linking. Use filters to narrow results by status or search term." },
|
|
15835
|
+
{ type: "paragraph", text: "A **case** is a cluster of documents connected by shared field values in the linking graph. The linking engine discovers entity overlaps \u2014 such as matching vendor IDs, policy numbers, or account references \u2014 and groups the connected documents into a single case for review." },
|
|
15836
|
+
{
|
|
15837
|
+
type: "list",
|
|
15838
|
+
items: [
|
|
15839
|
+
"Filter by `search` to find cases by label or document content.",
|
|
15840
|
+
"Use `min_documents` to exclude trivial single-document cases.",
|
|
15841
|
+
"Cases are created automatically \u2014 there is no manual create endpoint."
|
|
15842
|
+
]
|
|
15843
|
+
},
|
|
14728
15844
|
{
|
|
14729
15845
|
type: "endpoint",
|
|
14730
15846
|
method: "GET",
|
|
@@ -14788,7 +15904,9 @@ var sections38 = [
|
|
|
14788
15904
|
{ label: "Case Anomalies", slug: "case-anomalies" }
|
|
14789
15905
|
],
|
|
14790
15906
|
faq: [
|
|
14791
|
-
{ question: "How are cases created?", answer: "Cases are created automatically when the linking engine discovers shared entities across two or more documents." }
|
|
15907
|
+
{ question: "How are cases created?", answer: "Cases are created automatically when the linking engine discovers shared entities across two or more documents." },
|
|
15908
|
+
{ question: "Can I create a case manually?", answer: "No. Cases are discovered automatically through the linking graph. You can manually adjust case membership using the merge, split, pin, and remove document endpoints." },
|
|
15909
|
+
{ question: "What is a case key?", answer: "A case key is a hex hash (8-64 characters) that uniquely identifies a case within your workspace. It is derived from the linked entity values that form the case." }
|
|
14792
15910
|
],
|
|
14793
15911
|
mentions: ["cases", "linking", "documents"]
|
|
14794
15912
|
},
|
|
@@ -14799,6 +15917,8 @@ var sections38 = [
|
|
|
14799
15917
|
seoTitle: "Get Case Endpoint \u2014 Talonic Docs",
|
|
14800
15918
|
description: "Retrieve a case by its key (e.g. CASE-001) including linked documents, shared entities, AI-generated narration, label, and anomaly count.",
|
|
14801
15919
|
content: [
|
|
15920
|
+
{ type: "paragraph", text: "Retrieve the full detail of a case including its documents, AI-generated narrative summary, and anomaly count. The narrative is generated by Claude and summarizes the relationships between documents in the case." },
|
|
15921
|
+
{ type: "callout", variant: "info", text: "The `narrative` field is generated on demand via `POST /v1/cases/:key/narrate`. It will be `null` until narration is triggered for this case." },
|
|
14802
15922
|
{
|
|
14803
15923
|
type: "endpoint",
|
|
14804
15924
|
method: "GET",
|
|
@@ -14881,7 +16001,10 @@ var sections38 = [
|
|
|
14881
16001
|
{ label: "Case Operations", slug: "case-operations" },
|
|
14882
16002
|
{ label: "Case Evidence", slug: "case-evidence" }
|
|
14883
16003
|
],
|
|
14884
|
-
faq: [
|
|
16004
|
+
faq: [
|
|
16005
|
+
{ question: "What is the case narrative?", answer: "The narrative is an AI-generated summary produced by Claude that describes the relationships, patterns, and key facts across the documents in the case. It is generated on demand via POST /v1/cases/:key/narrate." },
|
|
16006
|
+
{ question: "Why is the narrative field null?", answer: "Narration is generated on demand. Call POST /v1/cases/:key/narrate to generate a narrative, then retrieve it with this endpoint." }
|
|
16007
|
+
],
|
|
14885
16008
|
mentions: ["case detail", "narration", "shared entities"]
|
|
14886
16009
|
},
|
|
14887
16010
|
{
|
|
@@ -14892,6 +16015,17 @@ var sections38 = [
|
|
|
14892
16015
|
description: "List anomalies detected within a case with severity filtering and dismissed toggle. Dismiss individual anomalies after review.",
|
|
14893
16016
|
content: [
|
|
14894
16017
|
{ type: "paragraph", text: "Anomalies are structural issues detected across the documents in a case \u2014 field conflicts, duplicate key divergence, value reuse patterns, and more." },
|
|
16018
|
+
{ type: "paragraph", text: "Five **structural detectors** run automatically when cases are materialized. They surface inconsistencies that may indicate data quality issues, duplicate submissions, or missing documents within a case." },
|
|
16019
|
+
{
|
|
16020
|
+
type: "list",
|
|
16021
|
+
items: [
|
|
16022
|
+
"**Validation cluster** \u2014 groups of fields that consistently fail validation together.",
|
|
16023
|
+
"**Field conflict** \u2014 contradictory values for the same field across documents.",
|
|
16024
|
+
"**Duplicate key divergence** \u2014 documents share a key but differ on other fields.",
|
|
16025
|
+
"**Missing document type** \u2014 a case is missing an expected document type based on templates.",
|
|
16026
|
+
"**Value reuse** \u2014 the same value appears in unrelated fields across documents."
|
|
16027
|
+
]
|
|
16028
|
+
},
|
|
14895
16029
|
{
|
|
14896
16030
|
type: "endpoint",
|
|
14897
16031
|
method: "GET",
|
|
@@ -14975,9 +16109,11 @@ var sections38 = [
|
|
|
14975
16109
|
{ label: "Case Evidence", slug: "case-evidence" }
|
|
14976
16110
|
],
|
|
14977
16111
|
faq: [
|
|
14978
|
-
{ question: "What types of anomalies are detected?", answer: "Five structural detectors: validation cluster, field conflict, duplicate key divergence, missing document type, and value reuse." }
|
|
16112
|
+
{ question: "What types of anomalies are detected?", answer: "Five structural detectors: validation cluster, field conflict, duplicate key divergence, missing document type, and value reuse." },
|
|
16113
|
+
{ question: "Can I un-dismiss an anomaly?", answer: "Dismissal is a one-way operation. If a similar anomaly is detected again during case recomputation, it will appear as a new anomaly entry." },
|
|
16114
|
+
{ question: "How are anomaly severities assigned?", answer: "Severity is assigned by each detector based on the scope of the issue. For example, a field conflict across many documents is rated higher than a conflict in just two." }
|
|
14979
16115
|
],
|
|
14980
|
-
mentions: ["anomalies", "severity", "dismiss"]
|
|
16116
|
+
mentions: ["anomalies", "severity", "dismiss", "structural detectors"]
|
|
14981
16117
|
},
|
|
14982
16118
|
{
|
|
14983
16119
|
slug: "case-evidence",
|
|
@@ -14986,6 +16122,8 @@ var sections38 = [
|
|
|
14986
16122
|
seoTitle: "Case Evidence Endpoint \u2014 Talonic Docs",
|
|
14987
16123
|
description: "List evidence items within a case. Filter by validation status, source document, category, or free-text search across evidence fields.",
|
|
14988
16124
|
content: [
|
|
16125
|
+
{ type: "paragraph", text: "Evidence items are the extracted field values from documents in a case, annotated with validation status and confidence scores. Use evidence to audit the data quality within a case and understand which fields link documents together." },
|
|
16126
|
+
{ type: "callout", variant: "info", text: "Evidence is produced by the evidence validation engine, which runs rule-based validators (structural checks, checksum validation, domain packs) against extracted values. Each evidence item records the validation outcome for a specific field on a specific document." },
|
|
14989
16127
|
{
|
|
14990
16128
|
type: "endpoint",
|
|
14991
16129
|
method: "GET",
|
|
@@ -15052,8 +16190,11 @@ var sections38 = [
|
|
|
15052
16190
|
{ label: "Get Case", slug: "get-case" },
|
|
15053
16191
|
{ label: "Case Anomalies", slug: "case-anomalies" }
|
|
15054
16192
|
],
|
|
15055
|
-
faq: [
|
|
15056
|
-
|
|
16193
|
+
faq: [
|
|
16194
|
+
{ question: "What is the difference between evidence and anomalies?", answer: "Evidence items are individual field values with validation status and confidence. Anomalies are higher-level structural issues detected across multiple evidence items, such as field conflicts or duplicate key divergence." },
|
|
16195
|
+
{ question: "How is the confidence score computed?", answer: "Confidence is assigned during extraction and reflects how certain the AI is about the extracted value. It ranges from 0 (low confidence) to 1 (high confidence) and is independent of the validation status." }
|
|
16196
|
+
],
|
|
16197
|
+
mentions: ["evidence", "validation", "case evidence", "confidence"]
|
|
15057
16198
|
},
|
|
15058
16199
|
{
|
|
15059
16200
|
slug: "case-operations",
|
|
@@ -15063,6 +16204,8 @@ var sections38 = [
|
|
|
15063
16204
|
description: "Update the status of a case. These operations modify case metadata.",
|
|
15064
16205
|
content: [
|
|
15065
16206
|
{ type: "paragraph", text: "Operations that modify a case: update the case status (lifecycle management)." },
|
|
16207
|
+
{ type: "paragraph", text: "Cases follow a lifecycle: `discovered` → `confirmed` → `active` → `resolved`. Transition the status as your team reviews and processes the case. Resolved cases can include optional notes documenting the resolution." },
|
|
16208
|
+
{ type: "callout", variant: "info", text: "Status transitions are not strictly enforced \u2014 you can move a case to any valid status. However, setting `resolved` records a `resolved_at` timestamp that cannot be cleared by reverting to an earlier status." },
|
|
15066
16209
|
{
|
|
15067
16210
|
type: "endpoint",
|
|
15068
16211
|
method: "PATCH",
|
|
@@ -15116,8 +16259,11 @@ var sections38 = [
|
|
|
15116
16259
|
{ label: "Get Case", slug: "get-case" },
|
|
15117
16260
|
{ label: "Case Merge & Split", slug: "case-merge-split" }
|
|
15118
16261
|
],
|
|
15119
|
-
faq: [
|
|
15120
|
-
|
|
16262
|
+
faq: [
|
|
16263
|
+
{ question: "What are the valid case statuses?", answer: "Four lifecycle statuses: discovered (initial), confirmed (verified as real), active (being worked), and resolved (complete). The typical flow is discovered -> confirmed -> active -> resolved." },
|
|
16264
|
+
{ question: "Can I reopen a resolved case?", answer: "Yes, you can set the status back to active or confirmed. However, the resolved_at timestamp will remain set from the original resolution." }
|
|
16265
|
+
],
|
|
16266
|
+
mentions: ["status", "case operations", "lifecycle"]
|
|
15121
16267
|
},
|
|
15122
16268
|
{
|
|
15123
16269
|
slug: "case-edges",
|
|
@@ -15127,6 +16273,8 @@ var sections38 = [
|
|
|
15127
16273
|
description: "List, confirm, or reject linking edges within a case. Edges represent shared-value connections between documents in the linking graph.",
|
|
15128
16274
|
content: [
|
|
15129
16275
|
{ type: "paragraph", text: "Edges are the connections between documents in a case, created by the linking engine when documents share field values. Confirm or reject edges to refine case membership." },
|
|
16276
|
+
{ type: "paragraph", text: "Each edge connects two documents through a shared field value \u2014 for example, both documents might contain the same `vendor_id`. The `final_score` reflects how strong the connection is, based on learned field-pair weights. Confirming or rejecting edges feeds back into the weight-learning system to improve future case discovery." },
|
|
16277
|
+
{ type: "callout", variant: "warning", text: "Rejecting an edge may split the case if the rejected edge was the only connection between a subset of documents. The affected documents will form a separate case." },
|
|
15130
16278
|
{
|
|
15131
16279
|
type: "endpoint",
|
|
15132
16280
|
method: "GET",
|
|
@@ -15225,9 +16373,10 @@ var sections38 = [
|
|
|
15225
16373
|
{ label: "Linking", slug: "linking-overview" }
|
|
15226
16374
|
],
|
|
15227
16375
|
faq: [
|
|
15228
|
-
{ question: "What happens when I reject an edge?", answer: "The connection between the two documents is removed. If this disconnects a document from all others in the case, it may be removed from the case." }
|
|
16376
|
+
{ question: "What happens when I reject an edge?", answer: "The connection between the two documents is removed. If this disconnects a document from all others in the case, it may be removed from the case." },
|
|
16377
|
+
{ question: "Do edge confirmations and rejections affect future cases?", answer: "Yes. Confirmations and rejections update the learned field-pair weights (FieldBindingWeight), which adjusts edge scores in future case discovery runs. This creates a feedback loop that improves case quality over time." }
|
|
15229
16378
|
],
|
|
15230
|
-
mentions: ["edges", "linking", "confirm", "reject"]
|
|
16379
|
+
mentions: ["edges", "linking", "confirm", "reject", "field-pair weights"]
|
|
15231
16380
|
},
|
|
15232
16381
|
{
|
|
15233
16382
|
slug: "case-documents",
|
|
@@ -15236,6 +16385,8 @@ var sections38 = [
|
|
|
15236
16385
|
seoTitle: "Case Documents Endpoints \u2014 Talonic Docs",
|
|
15237
16386
|
description: "Pin or remove documents within a case. Pinned documents are highlighted in the case view and preserved during case operations.",
|
|
15238
16387
|
content: [
|
|
16388
|
+
{ type: "paragraph", text: "Manage document membership within a case. **Pin** a document to mark it as important \u2014 pinned documents are highlighted in the UI and preserved during split operations. **Remove** a document to detach it from the case entirely." },
|
|
16389
|
+
{ type: "callout", variant: "info", text: "Removing a document from a case does not delete the document itself. The document remains in your workspace and may be re-linked into a case during the next recompute cycle if linking edges still exist." },
|
|
15239
16390
|
{
|
|
15240
16391
|
type: "endpoint",
|
|
15241
16392
|
method: "POST",
|
|
@@ -15292,7 +16443,10 @@ var sections38 = [
|
|
|
15292
16443
|
{ label: "Get Case", slug: "get-case" },
|
|
15293
16444
|
{ label: "Case Edges", slug: "case-edges" }
|
|
15294
16445
|
],
|
|
15295
|
-
faq: [
|
|
16446
|
+
faq: [
|
|
16447
|
+
{ question: "Does removing a document delete it?", answer: "No. Removing a document only detaches it from the case. The document remains in your workspace and can still appear in other cases or be re-linked in a future recompute." },
|
|
16448
|
+
{ question: "What does pinning a document do?", answer: "Pinning highlights the document in the case view and ensures it is preserved in the original partition during split operations. It does not affect linking or anomaly detection." }
|
|
16449
|
+
],
|
|
15296
16450
|
mentions: ["pin", "documents", "case documents"]
|
|
15297
16451
|
},
|
|
15298
16452
|
{
|
|
@@ -15303,6 +16457,8 @@ var sections38 = [
|
|
|
15303
16457
|
description: "Split a case into two cases or merge two cases into one. Restructure case boundaries when automatic grouping needs adjustment.",
|
|
15304
16458
|
content: [
|
|
15305
16459
|
{ type: "paragraph", text: "When the automatic case grouping does not match your needs, split a case into two partitions or merge two cases together." },
|
|
16460
|
+
{ type: "paragraph", text: "These operations let you manually adjust case boundaries. **Splitting** divides a case by assigning each document to one of two partitions. **Merging** combines all documents from two cases into a single case. Both operations preserve linking edges and re-evaluate anomalies on the resulting cases." },
|
|
16461
|
+
{ type: "callout", variant: "warning", text: "When merging, `case_key_b` is resolved (set to `resolved` status) and its documents are moved into `case_key_a`. The merged case key is derived from case A." },
|
|
15306
16462
|
{
|
|
15307
16463
|
type: "endpoint",
|
|
15308
16464
|
method: "POST",
|
|
@@ -15386,7 +16542,9 @@ var sections38 = [
|
|
|
15386
16542
|
{ label: "Case Operations", slug: "case-operations" }
|
|
15387
16543
|
],
|
|
15388
16544
|
faq: [
|
|
15389
|
-
{ question: "What happens to anomalies when cases are merged?", answer: "Anomalies from all source cases are carried over to the merged case and re-evaluated." }
|
|
16545
|
+
{ question: "What happens to anomalies when cases are merged?", answer: "Anomalies from all source cases are carried over to the merged case and re-evaluated." },
|
|
16546
|
+
{ question: "Do I need to include every document when splitting?", answer: "Yes. Every document in the case must appear in exactly one of partition_a or partition_b. Omitting a document or including it in both partitions will return a 400 error." },
|
|
16547
|
+
{ question: "Can I merge more than two cases at once?", answer: "No. The merge endpoint accepts exactly two case keys. To merge multiple cases, chain merge calls \u2014 merge A and B first, then merge the result with C." }
|
|
15390
16548
|
],
|
|
15391
16549
|
mentions: ["merge", "split", "case restructuring"]
|
|
15392
16550
|
}
|
|
@@ -15402,6 +16560,11 @@ var sections39 = [
|
|
|
15402
16560
|
description: "List pending review items in the review queue with pagination. Review items are validation records awaiting human approval, rejection, or flagging.",
|
|
15403
16561
|
content: [
|
|
15404
16562
|
{ type: "paragraph", text: "The review queue surfaces validation records that require human judgment before delivery. Items appear when extraction confidence is below the auto-approval threshold or when anomalies are detected." },
|
|
16563
|
+
{ type: "list", ordered: false, items: [
|
|
16564
|
+
"Filter by `status` to see only `pending`, `approved`, or `rejected` records",
|
|
16565
|
+
"Use cursor-based pagination to iterate through large queues",
|
|
16566
|
+
"Sort by `created_at` in ascending or descending order"
|
|
16567
|
+
] },
|
|
15405
16568
|
{
|
|
15406
16569
|
type: "endpoint",
|
|
15407
16570
|
method: "GET",
|
|
@@ -15488,9 +16651,11 @@ var sections39 = [
|
|
|
15488
16651
|
{ label: "Get Review Item", slug: "get-review-item" }
|
|
15489
16652
|
],
|
|
15490
16653
|
faq: [
|
|
15491
|
-
{ question: "When do items appear in the review queue?", answer: "Items are queued when extraction confidence falls below the auto-approval threshold or when anomalies require human verification." }
|
|
16654
|
+
{ question: "When do items appear in the review queue?", answer: "Items are queued when extraction confidence falls below the auto-approval threshold or when anomalies require human verification." },
|
|
16655
|
+
{ question: "How do I paginate through all review items?", answer: "Pass the `next_cursor` value from the response as the `cursor` query parameter in your next request. Continue until `has_more` is false." },
|
|
16656
|
+
{ question: "Can I filter review items by document or schema?", answer: "The list endpoint supports filtering by `status`. To find review items for a specific document, retrieve all pending items and filter client-side by `document_id`." }
|
|
15492
16657
|
],
|
|
15493
|
-
mentions: ["review", "queue", "validation"]
|
|
16658
|
+
mentions: ["review", "queue", "validation", "pagination"]
|
|
15494
16659
|
},
|
|
15495
16660
|
{
|
|
15496
16661
|
slug: "review-stats",
|
|
@@ -15499,6 +16664,7 @@ var sections39 = [
|
|
|
15499
16664
|
seoTitle: "Review Queue Statistics \u2014 Talonic Docs",
|
|
15500
16665
|
description: "Get statistics for the review queue including total pending items, items by status, and average time in queue. Useful for monitoring review backlog.",
|
|
15501
16666
|
content: [
|
|
16667
|
+
{ type: "paragraph", text: "Get a summary of the review queue broken down by status. Use this endpoint to monitor backlog size, track review throughput, and trigger alerts when pending items exceed a threshold." },
|
|
15502
16668
|
{
|
|
15503
16669
|
type: "endpoint",
|
|
15504
16670
|
method: "GET",
|
|
@@ -15541,8 +16707,11 @@ var sections39 = [
|
|
|
15541
16707
|
{ label: "List Review Items", slug: "list-review-items" },
|
|
15542
16708
|
{ label: "Review Batch", slug: "review-batch" }
|
|
15543
16709
|
],
|
|
15544
|
-
faq: [
|
|
15545
|
-
|
|
16710
|
+
faq: [
|
|
16711
|
+
{ question: "Does the stats endpoint count all-time or only active items?", answer: "It counts all review records across all statuses, including already-approved and rejected items. Use the `by_status.pending` value to see only the active backlog." },
|
|
16712
|
+
{ question: "How often should I poll review stats?", answer: "Stats are computed on each request. For dashboard polling, an interval of 30-60 seconds is reasonable. For high-throughput pipelines, consider using webhooks for real-time notifications." }
|
|
16713
|
+
],
|
|
16714
|
+
mentions: ["review statistics", "queue metrics", "backlog monitoring"]
|
|
15546
16715
|
},
|
|
15547
16716
|
{
|
|
15548
16717
|
slug: "get-review-item",
|
|
@@ -15551,6 +16720,8 @@ var sections39 = [
|
|
|
15551
16720
|
seoTitle: "Get Review Item Endpoint \u2014 Talonic Docs",
|
|
15552
16721
|
description: "Retrieve a single review item by ID with full detail including extracted data, confidence scores, and validation flags for human review.",
|
|
15553
16722
|
content: [
|
|
16723
|
+
{ type: "paragraph", text: "Retrieve full details for a single review item, including per-field review decisions, low-confidence fields that triggered the review, and any reviewer comments. This endpoint provides the data needed to build custom review interfaces." },
|
|
16724
|
+
{ type: "callout", variant: "info", text: "The `low_confidence_fields` array lists the specific field keys that fell below the confidence threshold and triggered this item for review. Use this to highlight problematic fields in your review UI." },
|
|
15554
16725
|
{
|
|
15555
16726
|
type: "endpoint",
|
|
15556
16727
|
method: "GET",
|
|
@@ -15617,8 +16788,11 @@ var sections39 = [
|
|
|
15617
16788
|
{ label: "Review Action", slug: "review-action" },
|
|
15618
16789
|
{ label: "Review Assign", slug: "review-assign" }
|
|
15619
16790
|
],
|
|
15620
|
-
faq: [
|
|
15621
|
-
|
|
16791
|
+
faq: [
|
|
16792
|
+
{ question: "What are low_confidence_fields?", answer: "An array of field keys where the extraction confidence fell below the auto-approval threshold. These are the fields that caused the record to be queued for human review." },
|
|
16793
|
+
{ question: "Can I see the extracted data for a review item?", answer: "The review item includes the document_id and run_id. Use these to retrieve the full extraction output from the extraction or structuring endpoints." }
|
|
16794
|
+
],
|
|
16795
|
+
mentions: ["review detail", "confidence", "flagged fields", "field decisions"]
|
|
15622
16796
|
},
|
|
15623
16797
|
{
|
|
15624
16798
|
slug: "review-action",
|
|
@@ -15627,6 +16801,8 @@ var sections39 = [
|
|
|
15627
16801
|
seoTitle: "Review Action Endpoint \u2014 Talonic Docs",
|
|
15628
16802
|
description: "Take an action on a review item: approve, reject, or flag. Optionally include a comment explaining the decision for audit purposes.",
|
|
15629
16803
|
content: [
|
|
16804
|
+
{ type: "paragraph", text: "Approve or reject a review item to advance it through the delivery pipeline. Approved items proceed to delivery. Rejected items are returned for re-extraction or manual correction. Include an optional `reason` for audit trail purposes." },
|
|
16805
|
+
{ type: "callout", variant: "warning", text: "Review actions are final. Once a record is approved or rejected, it cannot be reverted to pending status through the API." },
|
|
15630
16806
|
{
|
|
15631
16807
|
type: "endpoint",
|
|
15632
16808
|
method: "POST",
|
|
@@ -15697,9 +16873,10 @@ var sections39 = [
|
|
|
15697
16873
|
{ label: "Review Batch", slug: "review-batch" }
|
|
15698
16874
|
],
|
|
15699
16875
|
faq: [
|
|
15700
|
-
{ question: "What happens after approval?", answer: "Approved items proceed to delivery. Rejected items are returned for re-extraction or manual correction." }
|
|
16876
|
+
{ question: "What happens after approval?", answer: "Approved items proceed to delivery. Rejected items are returned for re-extraction or manual correction." },
|
|
16877
|
+
{ question: "Is the reason field stored for audit purposes?", answer: "Yes. The reason is stored as the review_comment on the record and is visible when retrieving the review item detail." }
|
|
15701
16878
|
],
|
|
15702
|
-
mentions: ["approve", "reject", "flag", "review action"]
|
|
16879
|
+
mentions: ["approve", "reject", "flag", "review action", "audit trail"]
|
|
15703
16880
|
},
|
|
15704
16881
|
{
|
|
15705
16882
|
slug: "review-batch",
|
|
@@ -15708,6 +16885,8 @@ var sections39 = [
|
|
|
15708
16885
|
seoTitle: "Batch Review Endpoint \u2014 Talonic Docs",
|
|
15709
16886
|
description: "Batch approve or reject multiple review items in a single request. Useful for clearing the review queue when items share similar characteristics.",
|
|
15710
16887
|
content: [
|
|
16888
|
+
{ type: "paragraph", text: "Process multiple review items in a single API call. This is useful for clearing backlogs when you have high-confidence items that can be bulk-approved, or when rejecting a batch of items from a failed extraction run." },
|
|
16889
|
+
{ type: "callout", variant: "info", text: "The batch endpoint processes items independently. If some items fail (e.g. not found), the remaining items are still processed. Check the `results` array for per-item outcomes." },
|
|
15711
16890
|
{
|
|
15712
16891
|
type: "endpoint",
|
|
15713
16892
|
method: "POST",
|
|
@@ -15763,8 +16942,11 @@ var sections39 = [
|
|
|
15763
16942
|
{ label: "Review Action", slug: "review-action" },
|
|
15764
16943
|
{ label: "Review Stats", slug: "review-stats" }
|
|
15765
16944
|
],
|
|
15766
|
-
faq: [
|
|
15767
|
-
|
|
16945
|
+
faq: [
|
|
16946
|
+
{ question: "Is there a limit on how many items I can batch?", answer: "There is no hard limit on array size, but very large batches may take longer to process. For best performance, batch in groups of 50-100 items." },
|
|
16947
|
+
{ question: "What happens if some items in the batch are already approved?", answer: "Already-actioned items are skipped and reported with an error status in the results array. The remaining items are still processed." }
|
|
16948
|
+
],
|
|
16949
|
+
mentions: ["batch", "bulk review", "batch processing"]
|
|
15768
16950
|
},
|
|
15769
16951
|
{
|
|
15770
16952
|
slug: "review-assign",
|
|
@@ -15773,6 +16955,7 @@ var sections39 = [
|
|
|
15773
16955
|
seoTitle: "Assign Review Item Endpoint \u2014 Talonic Docs",
|
|
15774
16956
|
description: "Assign a review item to a team member for review. Assigned items appear in the assignee's personal review queue.",
|
|
15775
16957
|
content: [
|
|
16958
|
+
{ type: "paragraph", text: "Assign a pending review item to a specific team member. Assignments help distribute review workload and track who is responsible for each item. Pass `null` as the `user_id` to unassign an item." },
|
|
15776
16959
|
{
|
|
15777
16960
|
type: "endpoint",
|
|
15778
16961
|
method: "POST",
|
|
@@ -15841,8 +17024,11 @@ var sections39 = [
|
|
|
15841
17024
|
{ label: "Get Review Item", slug: "get-review-item" },
|
|
15842
17025
|
{ label: "Review Action", slug: "review-action" }
|
|
15843
17026
|
],
|
|
15844
|
-
faq: [
|
|
15845
|
-
|
|
17027
|
+
faq: [
|
|
17028
|
+
{ question: "Can I assign an already-reviewed item?", answer: "Assignment is only meaningful for pending items. You can technically assign a reviewed item, but it has no effect on the review workflow since the item has already been actioned." },
|
|
17029
|
+
{ question: "How do I unassign a review item?", answer: "Pass `null` as the `user_id` in the request body. The `assigned_to` field will be set to null." }
|
|
17030
|
+
],
|
|
17031
|
+
mentions: ["assign", "team", "review assignment", "workload distribution"]
|
|
15846
17032
|
}
|
|
15847
17033
|
];
|
|
15848
17034
|
|
|
@@ -15856,6 +17042,11 @@ var sections40 = [
|
|
|
15856
17042
|
description: "List all ground truth datasets used for benchmarking extraction accuracy. Each dataset contains manually verified entries that serve as the gold standard.",
|
|
15857
17043
|
content: [
|
|
15858
17044
|
{ type: "paragraph", text: "Ground truth datasets contain manually verified data entries that serve as the gold standard for measuring extraction accuracy. Create datasets, add entries, then run benchmarks against extraction results." },
|
|
17045
|
+
{ type: "list", ordered: false, items: [
|
|
17046
|
+
"Each dataset contains verified entries mapping documents to expected field values",
|
|
17047
|
+
"Datasets can be scoped to a specific user schema via `user_schema_id`",
|
|
17048
|
+
"Use datasets as inputs to benchmark runs for per-field accuracy measurement"
|
|
17049
|
+
] },
|
|
15859
17050
|
{
|
|
15860
17051
|
type: "endpoint",
|
|
15861
17052
|
method: "GET",
|
|
@@ -15931,8 +17122,11 @@ var sections40 = [
|
|
|
15931
17122
|
{ label: "Create Dataset", slug: "create-quality-dataset" },
|
|
15932
17123
|
{ label: "List Benchmarks", slug: "list-benchmarks" }
|
|
15933
17124
|
],
|
|
15934
|
-
faq: [
|
|
15935
|
-
|
|
17125
|
+
faq: [
|
|
17126
|
+
{ question: "How many ground truth datasets can I create?", answer: "There is no hard limit on the number of datasets. Create separate datasets for different document types or schema versions to track accuracy independently." },
|
|
17127
|
+
{ question: "What is the recommended number of entries per dataset?", answer: "For statistically meaningful accuracy scores, aim for at least 30-50 entries per dataset. Smaller datasets may produce volatile accuracy metrics." }
|
|
17128
|
+
],
|
|
17129
|
+
mentions: ["ground truth", "quality", "benchmarking", "datasets"]
|
|
15936
17130
|
},
|
|
15937
17131
|
{
|
|
15938
17132
|
slug: "create-quality-dataset",
|
|
@@ -15941,6 +17135,7 @@ var sections40 = [
|
|
|
15941
17135
|
seoTitle: "Create Ground Truth Dataset \u2014 Talonic Docs",
|
|
15942
17136
|
description: "Create a new ground truth dataset linked to a schema. The dataset defines the expected extraction output used for accuracy benchmarking.",
|
|
15943
17137
|
content: [
|
|
17138
|
+
{ type: "paragraph", text: "Create an empty ground truth dataset that you can populate with verified entries. Datasets serve as the baseline for benchmark runs that measure extraction accuracy. After creating a dataset, add entries individually or import them in bulk via CSV." },
|
|
15944
17139
|
{
|
|
15945
17140
|
type: "endpoint",
|
|
15946
17141
|
method: "POST",
|
|
@@ -16001,7 +17196,10 @@ var sections40 = [
|
|
|
16001
17196
|
{ label: "List Datasets", slug: "list-quality-datasets" },
|
|
16002
17197
|
{ label: "Quality Entries", slug: "quality-entries" }
|
|
16003
17198
|
],
|
|
16004
|
-
faq: [
|
|
17199
|
+
faq: [
|
|
17200
|
+
{ question: "Do I need to link a dataset to a schema?", answer: "No. The user_schema_id is optional. However, linking to a schema ensures that your ground truth entries use the correct field names and makes benchmark results more meaningful." },
|
|
17201
|
+
{ question: "Can I rename a dataset after creation?", answer: "Dataset metadata (name, description) is set at creation time. To change it, delete the dataset and create a new one with the desired name." }
|
|
17202
|
+
],
|
|
16005
17203
|
mentions: ["create ground truth", "dataset"]
|
|
16006
17204
|
},
|
|
16007
17205
|
{
|
|
@@ -16011,6 +17209,8 @@ var sections40 = [
|
|
|
16011
17209
|
seoTitle: "Get or Delete Ground Truth Dataset \u2014 Talonic Docs",
|
|
16012
17210
|
description: "Retrieve a ground truth dataset by ID with metadata and entry count, or delete it permanently. Deleting a dataset does not remove associated benchmark results.",
|
|
16013
17211
|
content: [
|
|
17212
|
+
{ type: "paragraph", text: "Retrieve a dataset with its metadata and sample entries, or delete it permanently. The GET response includes a `samples` array with the actual ground truth entries, allowing you to inspect the expected values for each document." },
|
|
17213
|
+
{ type: "callout", variant: "warning", text: "Deleting a dataset is permanent. However, benchmark results that used this dataset are retained for historical reference. The benchmark will show the dataset_id but the dataset itself will no longer be retrievable." },
|
|
16014
17214
|
{
|
|
16015
17215
|
type: "endpoint",
|
|
16016
17216
|
method: "GET",
|
|
@@ -16096,8 +17296,11 @@ var sections40 = [
|
|
|
16096
17296
|
{ label: "List Datasets", slug: "list-quality-datasets" },
|
|
16097
17297
|
{ label: "Quality Entries", slug: "quality-entries" }
|
|
16098
17298
|
],
|
|
16099
|
-
faq: [
|
|
16100
|
-
|
|
17299
|
+
faq: [
|
|
17300
|
+
{ question: "Are benchmark results deleted when I delete a dataset?", answer: "No. Benchmark results are retained for historical reference even after the source dataset is deleted." },
|
|
17301
|
+
{ question: "Does the GET response include all entries?", answer: "Yes. The `samples` array contains all ground truth entries in the dataset. For very large datasets, this response may be sizable." }
|
|
17302
|
+
],
|
|
17303
|
+
mentions: ["ground truth detail", "delete dataset", "samples"]
|
|
16101
17304
|
},
|
|
16102
17305
|
{
|
|
16103
17306
|
slug: "quality-entries",
|
|
@@ -16107,6 +17310,7 @@ var sections40 = [
|
|
|
16107
17310
|
description: "List, add, import, or delete entries in a ground truth dataset. Entries represent individual verified data points used for benchmarking extraction accuracy.",
|
|
16108
17311
|
content: [
|
|
16109
17312
|
{ type: "paragraph", text: "Entries are the individual verified data points within a ground truth dataset. Add them manually, or import in bulk via CSV." },
|
|
17313
|
+
{ type: "callout", variant: "info", text: "Each entry maps a `document_id` to an `expected_data` object containing the verified field values. Field keys in `expected_data` should match the field names used in your extraction schema for accurate benchmark comparisons." },
|
|
16110
17314
|
{
|
|
16111
17315
|
type: "endpoint",
|
|
16112
17316
|
method: "GET",
|
|
@@ -16190,6 +17394,7 @@ var sections40 = [
|
|
|
16190
17394
|
"created_at": "2024-09-05T12:00:00.000Z"
|
|
16191
17395
|
}`
|
|
16192
17396
|
},
|
|
17397
|
+
{ type: "paragraph", text: "For bulk entry creation, use the CSV import endpoint. The CSV must have `document_id` as the first column, with remaining columns matching your schema field names." },
|
|
16193
17398
|
{
|
|
16194
17399
|
type: "endpoint",
|
|
16195
17400
|
method: "POST",
|
|
@@ -16256,9 +17461,11 @@ var sections40 = [
|
|
|
16256
17461
|
{ label: "Create Benchmark", slug: "create-benchmark" }
|
|
16257
17462
|
],
|
|
16258
17463
|
faq: [
|
|
16259
|
-
{ question: "What CSV format is expected for import?", answer: "The first column must be document_id. Remaining columns should match the schema field names. Header row is required." }
|
|
17464
|
+
{ question: "What CSV format is expected for import?", answer: "The first column must be document_id. Remaining columns should match the schema field names. Header row is required." },
|
|
17465
|
+
{ question: "Can I add multiple entries for the same document?", answer: "Each document should have one entry per dataset. Duplicate document_id rows in CSV imports are skipped and counted in the `skipped` total." },
|
|
17466
|
+
{ question: "What happens if expected_data field names do not match the schema?", answer: "Unmatched fields are stored but ignored during benchmark comparison. Only fields present in both the ground truth entry and the extraction output are compared." }
|
|
16260
17467
|
],
|
|
16261
|
-
mentions: ["entries", "ground truth values", "CSV import"]
|
|
17468
|
+
mentions: ["entries", "ground truth values", "CSV import", "bulk import"]
|
|
16262
17469
|
},
|
|
16263
17470
|
{
|
|
16264
17471
|
slug: "list-benchmarks",
|
|
@@ -16267,6 +17474,7 @@ var sections40 = [
|
|
|
16267
17474
|
seoTitle: "List Benchmark Runs \u2014 Talonic Docs",
|
|
16268
17475
|
description: "List benchmark runs that compare extraction results against ground truth datasets. Each run produces per-field accuracy metrics.",
|
|
16269
17476
|
content: [
|
|
17477
|
+
{ type: "paragraph", text: "Benchmark runs compare your extraction output against ground truth datasets to produce per-field accuracy scores. Each run evaluates every document in the dataset and produces an `accuracy_overall` score along with per-field breakdowns. Use benchmarks to track extraction quality over time and measure the impact of schema or pipeline changes." },
|
|
16270
17478
|
{
|
|
16271
17479
|
type: "endpoint",
|
|
16272
17480
|
method: "GET",
|
|
@@ -16362,8 +17570,11 @@ var sections40 = [
|
|
|
16362
17570
|
{ label: "Create Benchmark", slug: "create-benchmark" },
|
|
16363
17571
|
{ label: "Benchmark Results", slug: "get-benchmark-results" }
|
|
16364
17572
|
],
|
|
16365
|
-
faq: [
|
|
16366
|
-
|
|
17573
|
+
faq: [
|
|
17574
|
+
{ question: "What benchmark statuses are possible?", answer: "Benchmarks progress through `queued` (waiting to start), `running` (evaluating documents), `completed` (results available), or `failed` (an error occurred during evaluation)." },
|
|
17575
|
+
{ question: "Why is accuracy_overall null?", answer: "Accuracy scores are only computed after the benchmark run completes. While the status is `queued` or `running`, accuracy fields are null." }
|
|
17576
|
+
],
|
|
17577
|
+
mentions: ["benchmarks", "accuracy", "quality runs", "per-field accuracy"]
|
|
16367
17578
|
},
|
|
16368
17579
|
{
|
|
16369
17580
|
slug: "create-benchmark",
|
|
@@ -16372,6 +17583,8 @@ var sections40 = [
|
|
|
16372
17583
|
seoTitle: "Create Benchmark Run \u2014 Talonic Docs",
|
|
16373
17584
|
description: "Start a benchmark run that compares a job run output against a ground truth dataset. Produces per-field accuracy scores and overall metrics.",
|
|
16374
17585
|
content: [
|
|
17586
|
+
{ type: "paragraph", text: "Start a new benchmark run that evaluates your current extraction output against a ground truth dataset. The benchmark compares each document in the dataset entry-by-entry and field-by-field, producing an overall accuracy score and per-field breakdowns." },
|
|
17587
|
+
{ type: "callout", variant: "info", text: "Benchmark runs are asynchronous. The endpoint returns immediately with status `queued`. Poll the benchmark detail endpoint or list benchmarks to check when the run completes." },
|
|
16375
17588
|
{
|
|
16376
17589
|
type: "endpoint",
|
|
16377
17590
|
method: "POST",
|
|
@@ -16448,8 +17661,11 @@ var sections40 = [
|
|
|
16448
17661
|
{ label: "List Benchmarks", slug: "list-benchmarks" },
|
|
16449
17662
|
{ label: "Benchmark Results", slug: "get-benchmark-results" }
|
|
16450
17663
|
],
|
|
16451
|
-
faq: [
|
|
16452
|
-
|
|
17664
|
+
faq: [
|
|
17665
|
+
{ question: "Can I run multiple benchmarks simultaneously?", answer: "Yes. Benchmark runs are independent and can execute in parallel against different datasets or the same dataset." },
|
|
17666
|
+
{ question: "How long does a benchmark take to complete?", answer: "Duration depends on the number of entries in the dataset. A 50-entry dataset typically completes in a few seconds. Poll the benchmark detail to track progress." }
|
|
17667
|
+
],
|
|
17668
|
+
mentions: ["create benchmark", "run comparison", "accuracy evaluation"]
|
|
16453
17669
|
},
|
|
16454
17670
|
{
|
|
16455
17671
|
slug: "get-benchmark-results",
|
|
@@ -16458,6 +17674,7 @@ var sections40 = [
|
|
|
16458
17674
|
seoTitle: "Benchmark Results & Comparison \u2014 Talonic Docs",
|
|
16459
17675
|
description: "Get per-field accuracy results for a benchmark run, or compare two benchmark runs side by side to track extraction quality improvements over time.",
|
|
16460
17676
|
content: [
|
|
17677
|
+
{ type: "paragraph", text: "Retrieve per-document accuracy results for a completed benchmark run, showing which fields matched and which diverged from the ground truth. Each result includes the extracted value, expected value, and whether they matched. Use the compare endpoint to track accuracy improvements across runs." },
|
|
16461
17678
|
{
|
|
16462
17679
|
type: "endpoint",
|
|
16463
17680
|
method: "GET",
|
|
@@ -16499,6 +17716,7 @@ var sections40 = [
|
|
|
16499
17716
|
]
|
|
16500
17717
|
}`
|
|
16501
17718
|
},
|
|
17719
|
+
{ type: "paragraph", text: "To track accuracy trends over time, compare two benchmark runs side by side. The `accuracy_delta` shows the difference in overall accuracy between the two runs." },
|
|
16502
17720
|
{
|
|
16503
17721
|
type: "endpoint",
|
|
16504
17722
|
method: "GET",
|
|
@@ -16565,9 +17783,11 @@ var sections40 = [
|
|
|
16565
17783
|
{ label: "List Datasets", slug: "list-quality-datasets" }
|
|
16566
17784
|
],
|
|
16567
17785
|
faq: [
|
|
16568
|
-
{ question: "How is field accuracy calculated?", answer: "Each extracted value is compared to the ground truth entry for the same document and field. Accuracy is the ratio of correct matches to total entries." }
|
|
17786
|
+
{ question: "How is field accuracy calculated?", answer: "Each extracted value is compared to the ground truth entry for the same document and field. Accuracy is the ratio of correct matches to total entries." },
|
|
17787
|
+
{ question: "What does a negative accuracy_delta mean?", answer: "A negative delta means run_a has lower accuracy than run_b. For example, -0.03 means run_a is 3 percentage points less accurate. Use chronological ordering (older run as run_a) to see improvement as a positive delta." },
|
|
17788
|
+
{ question: "Can I compare runs from different datasets?", answer: "Yes, but the comparison only shows overall accuracy differences. Per-field comparisons are most meaningful when both runs use the same ground truth dataset." }
|
|
16569
17789
|
],
|
|
16570
|
-
mentions: ["accuracy", "benchmark results", "comparison", "field-level metrics"]
|
|
17790
|
+
mentions: ["accuracy", "benchmark results", "comparison", "field-level metrics", "accuracy delta"]
|
|
16571
17791
|
}
|
|
16572
17792
|
];
|
|
16573
17793
|
|
|
@@ -16581,6 +17801,16 @@ var sections41 = [
|
|
|
16581
17801
|
description: "List all routing rules ordered by priority. Routing rules match incoming documents by conditions and apply actions like schema assignment, job triggering, or team routing.",
|
|
16582
17802
|
content: [
|
|
16583
17803
|
{ type: "paragraph", text: "Routing rules automate document processing by matching incoming documents against conditions and applying actions. Rules are evaluated in priority order \u2014 the first matching rule wins." },
|
|
17804
|
+
{ type: "paragraph", text: "When a document is classified (after OCR and type detection), the routing engine evaluates each active rule in priority order. The first rule whose conditions match is applied, and subsequent rules are skipped. This lets you build a priority chain: specific rules at the top, catch-all rules at the bottom." },
|
|
17805
|
+
{
|
|
17806
|
+
type: "list",
|
|
17807
|
+
items: [
|
|
17808
|
+
"Rules trigger on `document_classified` events after OCR and type detection.",
|
|
17809
|
+
"Conditions can match `document_type`, `source`, `language`, `sensitivity`, and other metadata.",
|
|
17810
|
+
"Actions include schema assignment, job triggering, and team routing.",
|
|
17811
|
+
"Lower priority numbers are evaluated first."
|
|
17812
|
+
]
|
|
17813
|
+
},
|
|
16584
17814
|
{
|
|
16585
17815
|
type: "endpoint",
|
|
16586
17816
|
method: "GET",
|
|
@@ -16669,7 +17899,9 @@ var sections41 = [
|
|
|
16669
17899
|
{ label: "Reorder Rules", slug: "reorder-routing-rules" }
|
|
16670
17900
|
],
|
|
16671
17901
|
faq: [
|
|
16672
|
-
{ question: "How are routing rules evaluated?", answer: "Rules are evaluated in priority order (lowest number first). The first rule whose conditions match the incoming document is applied." }
|
|
17902
|
+
{ question: "How are routing rules evaluated?", answer: "Rules are evaluated in priority order (lowest number first). The first rule whose conditions match the incoming document is applied." },
|
|
17903
|
+
{ question: "What happens if no rule matches a document?", answer: "If no routing rule matches, the document proceeds through the default pipeline without any schema assignment or special routing. You can add a low-priority catch-all rule to handle unmatched documents." },
|
|
17904
|
+
{ question: "Can I scope a rule to a specific source connection?", answer: "Yes. Set source_connection_id to limit the rule to documents ingested from a specific source (e.g. a particular Google Drive folder or S3 bucket). Documents from other sources will skip the rule." }
|
|
16673
17905
|
],
|
|
16674
17906
|
mentions: ["routing rules", "priority", "document routing"]
|
|
16675
17907
|
},
|
|
@@ -16680,6 +17912,8 @@ var sections41 = [
|
|
|
16680
17912
|
seoTitle: "Create Routing Rule Endpoint \u2014 Talonic Docs",
|
|
16681
17913
|
description: "Create a new routing rule with conditions on document properties and actions to apply when matched. Conditions can match document type, source, and other metadata.",
|
|
16682
17914
|
content: [
|
|
17915
|
+
{ type: "paragraph", text: 'Create a rule that automatically applies actions to incoming documents based on their metadata. Conditions define what to match (e.g. document type equals "invoice"), and actions define what to do (e.g. assign the finance schema). Rules are evaluated on every `document_classified` event.' },
|
|
17916
|
+
{ type: "callout", variant: "info", text: "New rules are created with `is_active: true` by default. If you want to test a rule before activating it, create it, then immediately disable it via `PATCH /v1/routing-rules/:id` with `is_active: false`." },
|
|
16683
17917
|
{
|
|
16684
17918
|
type: "endpoint",
|
|
16685
17919
|
method: "POST",
|
|
@@ -16768,7 +18002,8 @@ var sections41 = [
|
|
|
16768
18002
|
],
|
|
16769
18003
|
faq: [
|
|
16770
18004
|
{ question: "What condition fields are available?", answer: "Conditions can match on `document_type`, `source`, `language`, `sensitivity`, and other document metadata fields." },
|
|
16771
|
-
{ question: "Can a rule have multiple actions?", answer: "Yes. Actions are executed in order. Common combinations include assigning a schema and triggering a job." }
|
|
18005
|
+
{ question: "Can a rule have multiple actions?", answer: "Yes. Actions are executed in order. Common combinations include assigning a schema and triggering a job." },
|
|
18006
|
+
{ question: "What is the default priority?", answer: "If you omit the priority field, it defaults to 100. Use the reorder endpoint to adjust priorities after creation." }
|
|
16772
18007
|
],
|
|
16773
18008
|
mentions: ["create rule", "conditions", "actions", "assign_schema", "trigger_job", "route_to"]
|
|
16774
18009
|
},
|
|
@@ -16779,6 +18014,7 @@ var sections41 = [
|
|
|
16779
18014
|
seoTitle: "Get, Update, Delete Routing Rule \u2014 Talonic Docs",
|
|
16780
18015
|
description: "Retrieve, update, or delete a routing rule by ID. Update conditions, actions, priority, or enabled state. Deleting a rule does not affect previously routed documents.",
|
|
16781
18016
|
content: [
|
|
18017
|
+
{ type: "paragraph", text: "Retrieve, update, or delete a single routing rule. Updates take effect immediately \u2014 the next `document_classified` event will use the updated rule. Deleting a rule does not retroactively affect documents that were already routed by it." },
|
|
16782
18018
|
{
|
|
16783
18019
|
type: "endpoint",
|
|
16784
18020
|
method: "GET",
|
|
@@ -16946,7 +18182,10 @@ var sections41 = [
|
|
|
16946
18182
|
{ label: "List Routing Rules", slug: "list-routing-rules" },
|
|
16947
18183
|
{ label: "Reorder Rules", slug: "reorder-routing-rules" }
|
|
16948
18184
|
],
|
|
16949
|
-
faq: [
|
|
18185
|
+
faq: [
|
|
18186
|
+
{ question: "Do updates affect already-routed documents?", answer: "No. Rule changes only affect future document_classified events. Documents that were already routed by the previous version of the rule are not retroactively updated." },
|
|
18187
|
+
{ question: "Can I temporarily disable a rule without deleting it?", answer: "Yes. Use PATCH with is_active: false to disable the rule. It will be skipped during evaluation but retained for future re-activation." }
|
|
18188
|
+
],
|
|
16950
18189
|
mentions: ["update rule", "delete rule", "manage routing"]
|
|
16951
18190
|
},
|
|
16952
18191
|
{
|
|
@@ -16956,6 +18195,8 @@ var sections41 = [
|
|
|
16956
18195
|
seoTitle: "Reorder Routing Rules Endpoint \u2014 Talonic Docs",
|
|
16957
18196
|
description: "Reorder routing rules by providing an ordered array of rule IDs. Priority values are reassigned sequentially based on the new order.",
|
|
16958
18197
|
content: [
|
|
18198
|
+
{ type: "paragraph", text: "Reassign priority values for all routing rules at once. Pass an ordered array of rule IDs \u2014 the first ID receives priority 1, the second receives priority 2, and so on. This is the recommended way to change evaluation order after initial creation." },
|
|
18199
|
+
{ type: "callout", variant: "warning", text: "All active rule IDs must be included in the `rule_ids` array. Omitting any rule returns a validation error. Inactive rules should also be included to maintain a consistent priority sequence." },
|
|
16959
18200
|
{
|
|
16960
18201
|
type: "endpoint",
|
|
16961
18202
|
method: "POST",
|
|
@@ -17007,7 +18248,8 @@ var sections41 = [
|
|
|
17007
18248
|
{ label: "Manage Routing Rule", slug: "manage-routing-rule" }
|
|
17008
18249
|
],
|
|
17009
18250
|
faq: [
|
|
17010
|
-
{ question: "Do I need to include all rule IDs?", answer: "Yes. All active rule IDs must be included in the array. Omitting a rule ID will result in an error." }
|
|
18251
|
+
{ question: "Do I need to include all rule IDs?", answer: "Yes. All active rule IDs must be included in the array. Omitting a rule ID will result in an error." },
|
|
18252
|
+
{ question: "Does reordering affect currently processing documents?", answer: "No. Reordering only affects future document_classified events. Documents currently being processed continue with their already-matched rule." }
|
|
17011
18253
|
],
|
|
17012
18254
|
mentions: ["reorder", "priority", "rule ordering"]
|
|
17013
18255
|
}
|
|
@@ -17026,6 +18268,18 @@ var sections42 = [
|
|
|
17026
18268
|
type: "paragraph",
|
|
17027
18269
|
text: "Billing settings control whether AI agents can autonomously top up credits. A human must enable auto top-up and configure the threshold and amount before agents can call the topup endpoint."
|
|
17028
18270
|
},
|
|
18271
|
+
{
|
|
18272
|
+
type: "paragraph",
|
|
18273
|
+
text: "The auto top-up system is designed with a **human-in-the-loop** safety model. An organization admin enables it once with a threshold and amount, and from that point agents can autonomously maintain the credit balance without further human intervention."
|
|
18274
|
+
},
|
|
18275
|
+
{
|
|
18276
|
+
type: "list",
|
|
18277
|
+
items: [
|
|
18278
|
+
"Read current settings with `GET /v1/billing/settings`.",
|
|
18279
|
+
"Enable auto top-up and set threshold/amount with `PATCH /v1/billing/settings`.",
|
|
18280
|
+
"Only users with `write` scope can modify settings \u2014 agents cannot enable auto top-up themselves."
|
|
18281
|
+
]
|
|
18282
|
+
},
|
|
17029
18283
|
{
|
|
17030
18284
|
type: "endpoint",
|
|
17031
18285
|
method: "GET",
|
|
@@ -17118,6 +18372,14 @@ var sections42 = [
|
|
|
17118
18372
|
{
|
|
17119
18373
|
question: "Who can enable auto top-up?",
|
|
17120
18374
|
answer: "Only a human with write access can enable auto top-up via PATCH /v1/billing/settings. Agents cannot enable it themselves."
|
|
18375
|
+
},
|
|
18376
|
+
{
|
|
18377
|
+
question: "What are the limits for auto_topup_amount?",
|
|
18378
|
+
answer: "The minimum is 1,000 credits and the maximum is 500,000 credits per top-up. Values outside this range return a 400 error."
|
|
18379
|
+
},
|
|
18380
|
+
{
|
|
18381
|
+
question: "Does disabling auto top-up affect the current balance?",
|
|
18382
|
+
answer: "No. Disabling auto top-up only prevents future autonomous top-ups. The current credit balance is unchanged."
|
|
17121
18383
|
}
|
|
17122
18384
|
],
|
|
17123
18385
|
mentions: ["billing settings", "auto top-up", "threshold", "credits"]
|
|
@@ -17133,6 +18395,11 @@ var sections42 = [
|
|
|
17133
18395
|
type: "paragraph",
|
|
17134
18396
|
text: "AI agents call this endpoint to autonomously add credits when the balance falls below the configured threshold. **A human must first enable auto top-up** via `PATCH /v1/billing/settings`."
|
|
17135
18397
|
},
|
|
18398
|
+
{
|
|
18399
|
+
type: "paragraph",
|
|
18400
|
+
text: "This endpoint is idempotent when the balance is already above the threshold \u2014 it returns `topped_up: false` without adding credits. Agents can safely call it on every extraction cycle without risk of over-provisioning."
|
|
18401
|
+
},
|
|
18402
|
+
{ type: "callout", variant: "info", text: "Combine this endpoint with the `X-Talonic-Balance-Credits` response header from `POST /v1/extract` to build an autonomous credit management loop. Check the balance header after each extraction and call top-up when it drops below your threshold." },
|
|
17136
18403
|
{
|
|
17137
18404
|
type: "endpoint",
|
|
17138
18405
|
method: "POST",
|
|
@@ -17213,6 +18480,10 @@ var sections42 = [
|
|
|
17213
18480
|
{
|
|
17214
18481
|
question: "What scope does the API key need?",
|
|
17215
18482
|
answer: "The billing scope. This must be explicitly granted when creating the API key \u2014 existing keys do not have it by default."
|
|
18483
|
+
},
|
|
18484
|
+
{
|
|
18485
|
+
question: "Is it safe to call top-up on every request?",
|
|
18486
|
+
answer: "Yes. When the balance is above the threshold, the endpoint returns topped_up: false without adding credits. There is no cost or side effect for a no-op call."
|
|
17216
18487
|
}
|
|
17217
18488
|
],
|
|
17218
18489
|
mentions: ["auto top-up", "agent", "billing scope", "credits", "autonomous"]
|
|
@@ -17228,6 +18499,10 @@ var sections42 = [
|
|
|
17228
18499
|
type: "paragraph",
|
|
17229
18500
|
text: "Every successful `POST /v1/extract` response includes cost headers so AI agents can track spending without a separate API call:"
|
|
17230
18501
|
},
|
|
18502
|
+
{
|
|
18503
|
+
type: "paragraph",
|
|
18504
|
+
text: "Cost tracking is built into the extraction response to support autonomous agent workflows. Instead of polling a separate balance endpoint, agents read the cost headers inline and decide whether to trigger a top-up. The `Cells-Resolved-Registry` vs `Cells-Resolved-AI` breakdown shows how many fields were resolved from cached registry data (free) versus AI extraction (metered)."
|
|
18505
|
+
},
|
|
17231
18506
|
{
|
|
17232
18507
|
type: "param-table",
|
|
17233
18508
|
title: "Response headers",
|
|
@@ -17253,7 +18528,8 @@ X-Talonic-Cells-Resolved-AI: 1`
|
|
|
17253
18528
|
{
|
|
17254
18529
|
type: "paragraph",
|
|
17255
18530
|
text: "Agents can read these headers after every extraction to decide whether to call `POST /v1/billing/topup` to replenish credits."
|
|
17256
|
-
}
|
|
18531
|
+
},
|
|
18532
|
+
{ type: "callout", variant: "info", text: "Registry-resolved cells are free because the value was already known from a previous extraction. Over time, as your field registry grows, more cells resolve from the registry and fewer require paid AI extraction." }
|
|
17257
18533
|
],
|
|
17258
18534
|
related: [
|
|
17259
18535
|
{ label: "POST /v1/extract", slug: "post-extract" },
|
|
@@ -17264,6 +18540,14 @@ X-Talonic-Cells-Resolved-AI: 1`
|
|
|
17264
18540
|
{
|
|
17265
18541
|
question: "Are cost headers included on async (202) responses?",
|
|
17266
18542
|
answer: "No. Cost headers are only included on synchronous 200 responses where extraction completes immediately. Async responses return a poll URL instead."
|
|
18543
|
+
},
|
|
18544
|
+
{
|
|
18545
|
+
question: "Are cost headers included on batch extraction responses?",
|
|
18546
|
+
answer: "No. Batch extraction (processing_mode=batch) defers extraction to the provider batch API. Cost is calculated when the batch completes, not at upload time."
|
|
18547
|
+
},
|
|
18548
|
+
{
|
|
18549
|
+
question: "What is the credit-to-EUR conversion rate?",
|
|
18550
|
+
answer: "The rate is configured per organization and visible in the billing settings. One credit typically equals EUR 0.001, but this may vary by plan tier."
|
|
17267
18551
|
}
|
|
17268
18552
|
],
|
|
17269
18553
|
mentions: ["cost headers", "X-Talonic-Cost", "credits", "balance", "cells resolved"]
|