@talonic/docs 0.20.8 → 0.20.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/content.js +1560 -276
  2. package/package.json +1 -1
package/dist/content.js CHANGED
@@ -427,6 +427,26 @@ var sections = [
427
427
  {
428
428
  type: "paragraph",
429
429
  text: "**Supported Formats:** 25+ file types. **Resolution:** 4-phase pipeline. **Instant Matches:** ~30% of cells (free)."
430
+ },
431
+ {
432
+ type: "paragraph",
433
+ text: "Talonic is an **agentic data structuring platform**. It ingests documents of any type, discovers every data point inside them, builds a knowledge graph of canonical fields, and deploys AI agents to fill structured output schemas. Every cell in the output carries provenance metadata \u2014 which pipeline phase filled it, the confidence score, and an AI reasoning trace linking back to the source document."
434
+ },
435
+ {
436
+ type: "list",
437
+ ordered: false,
438
+ items: [
439
+ "**25+ file formats** \u2014 PDF, DOCX, XLSX, images, HTML, JSON, CSV, email formats (EML, MSG), and ZIP archives.",
440
+ "**4-phase extraction pipeline** \u2014 resolve from the knowledge graph, extract with AI agents, re-resolve, then transform and validate.",
441
+ "**~30% instant matches** \u2014 cells filled from graph lookup are free and instant, reducing both cost and latency.",
442
+ "**Per-cell provenance** \u2014 every value traces back to its source with confidence scores and reasoning.",
443
+ "**Batch mode** \u2014 process large backlogs at 50% cost with a 48-hour delivery window."
444
+ ]
445
+ },
446
+ {
447
+ type: "callout",
448
+ variant: "info",
449
+ text: "Talonic uses Anthropic Claude for intelligent extraction and reasoning. The platform handles OCR, classification, field discovery, and schema generation automatically \u2014 you provide documents and define what output you need."
430
450
  }
431
451
  ],
432
452
  related: [
@@ -442,6 +462,14 @@ var sections = [
442
462
  {
443
463
  question: "How many file formats does Talonic support?",
444
464
  answer: "Talonic supports 25+ file types including PDF, DOCX, XLSX, images (PNG, JPG), plain text, HTML, JSON, CSV, email formats (EML, MSG), and ZIP archives."
465
+ },
466
+ {
467
+ question: 'What does "per-cell provenance" mean?',
468
+ answer: "Every cell in the structured output carries metadata about which pipeline phase filled it, a confidence score, an AI reasoning trace, and references back to the source document. This makes every value auditable and explainable."
469
+ },
470
+ {
471
+ question: "How much do instant graph matches cost?",
472
+ answer: "Graph matches (approximately 30% of cells) are free. They are filled from the knowledge graph through deterministic lookup, so no LLM call is needed. Only cells that require AI extraction incur cost."
445
473
  }
446
474
  ],
447
475
  mentions: [
@@ -449,7 +477,9 @@ var sections = [
449
477
  "structured data",
450
478
  "provenance",
451
479
  "AI reasoning traces",
452
- "4-phase pipeline"
480
+ "4-phase pipeline",
481
+ "agentic platform",
482
+ "knowledge graph"
453
483
  ]
454
484
  },
455
485
  {
@@ -463,6 +493,10 @@ var sections = [
463
493
  type: "paragraph",
464
494
  text: "The platform revolves around a small set of interconnected concepts. Understanding these will help you navigate every feature."
465
495
  },
496
+ {
497
+ type: "paragraph",
498
+ text: "Each concept builds on the previous ones. **Sources** produce **Documents**, documents yield **Fields**, fields aggregate into the **Field Registry**, the registry powers **Schemas**, schemas drive **Jobs**, and jobs produce structured output with **Provenance**. **Cases** emerge organically when documents share entities like names, reference numbers, or project codes."
499
+ },
466
500
  {
467
501
  type: "param-table",
468
502
  params: [
@@ -507,6 +541,11 @@ var sections = [
507
541
  description: "Per-cell metadata: which phase filled it, confidence score, reasoning trace, source references."
508
542
  }
509
543
  ]
544
+ },
545
+ {
546
+ type: "callout",
547
+ variant: "info",
548
+ text: "The **Field Registry** is the heart of the platform. As you process more documents, the registry grows \u2014 fields are clustered semantically, promoted through tiers, and enriched with master extraction instructions. This accumulated knowledge makes every subsequent extraction faster and more accurate."
510
549
  }
511
550
  ],
512
551
  related: [
@@ -522,6 +561,14 @@ var sections = [
522
561
  {
523
562
  question: "What is provenance in Talonic?",
524
563
  answer: "Provenance is per-cell metadata that tracks which pipeline phase filled the value, the confidence score, an AI reasoning trace, and source references back to the original document."
564
+ },
565
+ {
566
+ question: "How do Cases form?",
567
+ answer: "Cases form automatically through the linking system. When two or more documents share entities \u2014 like a person's name, a reference number, or a project code \u2014 they are connected into a case via a bipartite document-entity graph."
568
+ },
569
+ {
570
+ question: "What is the difference between a Generated Schema and a Template Schema?",
571
+ answer: "Generated Schemas are created automatically by the platform based on the document types it discovers. Template Schemas are user-defined for specific output needs \u2014 you choose which fields to include and how they map to the Field Registry."
525
572
  }
526
573
  ],
527
574
  mentions: [
@@ -532,7 +579,8 @@ var sections = [
532
579
  "schema",
533
580
  "job",
534
581
  "case",
535
- "provenance"
582
+ "provenance",
583
+ "knowledge graph"
536
584
  ]
537
585
  },
538
586
  {
@@ -564,6 +612,15 @@ var sections = [
564
612
  "**Review & approve** \u2014 Review with confidence indicators, provenance, and validation flags. Correct any values.",
565
613
  "**Deliver** \u2014 Push approved data to webhooks, REST APIs, SFTP, email, or cloud storage."
566
614
  ]
615
+ },
616
+ {
617
+ type: "paragraph",
618
+ text: "The pipeline is designed to be **progressive** \u2014 results appear as each phase completes rather than waiting for the entire job to finish. Phase 1 (graph resolve) fills ~30% of cells instantly and for free. Phase 2 (AI extraction) fills the remaining gaps. Phases 3 and 4 handle re-resolution and transformation. You can start reviewing early results while later phases are still running."
619
+ },
620
+ {
621
+ type: "callout",
622
+ variant: "info",
623
+ text: "The **Dashboard** provides a real-time view of your pipeline progress with telemetry on strategy distribution, tier funnel, capture hit rate, and per-field state distribution. Use it to understand how well the knowledge graph is performing."
567
624
  }
568
625
  ],
569
626
  related: [
@@ -579,6 +636,14 @@ var sections = [
579
636
  {
580
637
  question: "What percentage of cells are filled by graph matches?",
581
638
  answer: "Approximately 30% of cells are filled instantly from graph matches at no AI cost. The remaining ~70% are filled by AI agents in subsequent pipeline phases."
639
+ },
640
+ {
641
+ question: "Can I review results before the full pipeline completes?",
642
+ answer: "Yes. The pipeline is progressive \u2014 Phase 1 graph matches appear instantly, and you can start reviewing while Phase 2 (AI extraction) and later phases are still running."
643
+ },
644
+ {
645
+ question: "What delivery destinations are supported?",
646
+ answer: "Six live connectors: webhook (with HMAC-SHA256 signing), SFTP, Amazon S3, Azure Blob Storage, Google Drive, and OneDrive. Additional integrations for Sheets, SharePoint, Gmail, Outlook, and HubSpot are planned."
582
647
  }
583
648
  ],
584
649
  mentions: [
@@ -588,7 +653,9 @@ var sections = [
588
653
  "field registry",
589
654
  "schema",
590
655
  "job execution",
591
- "delivery"
656
+ "delivery",
657
+ "progressive results",
658
+ "dashboard"
592
659
  ]
593
660
  },
594
661
  {
@@ -602,6 +669,10 @@ var sections = [
602
669
  type: "paragraph",
603
670
  text: "Navigate using the sidebar. The platform is organized into three primary sections: **Sources** (ingest), **Structuring** (process & validate), and **Outputs** (deliver)."
604
671
  },
672
+ {
673
+ type: "paragraph",
674
+ text: "**Sources** is where documents enter the system \u2014 through manual upload, connected cloud storage, email inboxes, or the API. **Structuring** is where you define schemas, run extraction jobs, and review results. **Outputs** is where approved data is delivered to downstream systems through configured bindings."
675
+ },
605
676
  {
606
677
  type: "ui-excerpt",
607
678
  id: "sidebar-navigation",
@@ -611,6 +682,17 @@ var sections = [
611
682
  {
612
683
  type: "callout",
613
684
  text: "The fastest path to results: upload documents in **Sources**, then go to **Structuring → Runs → New** to create your first extraction job."
685
+ },
686
+ {
687
+ type: "list",
688
+ ordered: true,
689
+ items: [
690
+ "Upload a few sample documents in **Sources** (drag and drop or use a connected source).",
691
+ "Wait for extraction to complete \u2014 documents are OCR'd, classified, and fields are extracted automatically.",
692
+ "Navigate to **Structuring** and review the auto-generated schemas or create a custom template.",
693
+ "Create a new **Run** by selecting a schema and the documents to process.",
694
+ "Review results in the run view \u2014 each cell shows confidence, provenance, and reasoning."
695
+ ]
614
696
  }
615
697
  ],
616
698
  related: [
@@ -626,9 +708,17 @@ var sections = [
626
708
  {
627
709
  question: "How is the Talonic platform organized?",
628
710
  answer: "The platform is organized into three primary sections: Sources (document ingest), Structuring (processing & validation), and Outputs (delivery to downstream systems)."
711
+ },
712
+ {
713
+ question: "Do I need to define a schema before processing documents?",
714
+ answer: "No. Talonic auto-generates schemas based on the document types it discovers during extraction. You can use these generated schemas directly or create custom template schemas for specific output needs."
715
+ },
716
+ {
717
+ question: "What source connections are available?",
718
+ answer: "Ten source connectors: Google Drive, Gmail, SharePoint, OneDrive, Outlook, Teams, Notion, SQL databases (MSSQL/PostgreSQL), Amazon S3, and Azure Blob Storage. You can also upload files manually or ingest via the REST API."
629
719
  }
630
720
  ],
631
- mentions: ["sidebar", "sources", "structuring", "outputs", "navigation", "Cmd+K"]
721
+ mentions: ["sidebar", "sources", "structuring", "outputs", "navigation", "Cmd+K", "source connectors"]
632
722
  }
633
723
  ];
634
724
 
@@ -3449,6 +3539,27 @@ var sections11 = [
3449
3539
  {
3450
3540
  type: "paragraph",
3451
3541
  text: "Navigate to **Workspace Settings → Shared Dialects** to manage workspace-level formatting. Individual schemas can override these defaults with inline dialect definitions when needed."
3542
+ },
3543
+ {
3544
+ type: "paragraph",
3545
+ text: "Dialects ensure consistency across all your structured output. When your downstream systems expect dates in `YYYY-MM-DD` format, numbers with `.` as the decimal separator, and CSVs delimited by `;`, you configure this once in the shared dialect rather than repeating it in every schema."
3546
+ },
3547
+ {
3548
+ type: "list",
3549
+ ordered: false,
3550
+ items: [
3551
+ "**Date format** \u2014 control how dates are serialized (e.g., `YYYY-MM-DD`, `DD/MM/YYYY`, `MM-DD-YYYY`).",
3552
+ "**Number locale** \u2014 set the decimal separator and thousands grouping.",
3553
+ "**CSV delimiter** \u2014 choose comma, semicolon, tab, or pipe for CSV exports.",
3554
+ "**Null representation** \u2014 define how missing values appear (empty string, `NULL`, `N/A`, etc.).",
3555
+ "**Boolean format** \u2014 choose between `true/false`, `yes/no`, `1/0`, or custom values.",
3556
+ "**Encoding** \u2014 set the character encoding for file exports (UTF-8, ISO-8859-1, etc.)."
3557
+ ]
3558
+ },
3559
+ {
3560
+ type: "callout",
3561
+ variant: "info",
3562
+ text: "When a schema defines an inline dialect, it takes precedence over the shared dialect for that schema only. All other schemas continue using the workspace defaults. This lets you handle special cases without affecting the rest of your output."
3452
3563
  }
3453
3564
  ],
3454
3565
  related: [
@@ -3464,6 +3575,14 @@ var sections11 = [
3464
3575
  {
3465
3576
  question: "Can individual schemas override shared dialects?",
3466
3577
  answer: "Yes. Individual schemas can override workspace-level shared dialect settings with inline dialect definitions when specific formatting is needed."
3578
+ },
3579
+ {
3580
+ question: "When should I use a shared dialect vs an inline dialect?",
3581
+ answer: "Use shared dialects for workspace-wide defaults that apply to most schemas. Use inline dialects only when a specific schema needs different formatting \u2014 for example, a schema that outputs dates in a different format for a particular downstream system."
3582
+ },
3583
+ {
3584
+ question: "Do shared dialects affect the extraction process?",
3585
+ answer: "No. Dialects only affect output formatting \u2014 how extracted values are serialized in exports and deliveries. The extraction and validation phases work with normalized internal representations regardless of dialect settings."
3467
3586
  }
3468
3587
  ],
3469
3588
  mentions: [
@@ -3471,7 +3590,9 @@ var sections11 = [
3471
3590
  "workspace settings",
3472
3591
  "output formatting",
3473
3592
  "date format",
3474
- "number locale"
3593
+ "number locale",
3594
+ "CSV delimiter",
3595
+ "null representation"
3475
3596
  ]
3476
3597
  },
3477
3598
  {
@@ -3488,6 +3609,25 @@ var sections11 = [
3488
3609
  {
3489
3610
  type: "paragraph",
3490
3611
  text: "Navigate to **Workspace Settings → Reference Primitives** to manage workspace-level lookup tables. Each primitive is versioned independently, and schemas reference a specific version to ensure stable resolution."
3612
+ },
3613
+ {
3614
+ type: "paragraph",
3615
+ text: "The lookup convention follows a `key` / `value` structure where the `key` is the output code and the `value` is the human-readable label. During extraction, the platform maps FROM labels found in documents TO the canonical codes defined in the reference primitive. This ensures consistent, machine-readable output regardless of how values appear in source documents."
3616
+ },
3617
+ {
3618
+ type: "callout",
3619
+ variant: "info",
3620
+ text: "Versioning protects production stability. When you update a reference primitive, existing schemas continue using their pinned version until you explicitly update the version reference. This prevents unexpected changes to live extraction pipelines."
3621
+ },
3622
+ {
3623
+ type: "list",
3624
+ ordered: false,
3625
+ items: [
3626
+ "**Key-value structure** \u2014 `key` is the canonical code, `value` is the label the platform matches against.",
3627
+ "**Independent versioning** \u2014 each update creates a new version; schemas pin to a specific version.",
3628
+ "**Cross-schema sharing** \u2014 one primitive can be referenced by any number of schemas.",
3629
+ "**3-tier lookup cascade** \u2014 string normalization, token fuzzy matching, and Haiku LLM fallback for ambiguous values."
3630
+ ]
3491
3631
  }
3492
3632
  ],
3493
3633
  related: [
@@ -3503,6 +3643,14 @@ var sections11 = [
3503
3643
  {
3504
3644
  question: "How do reference primitives differ from schema reference tables?",
3505
3645
  answer: "Reference primitives are workspace-level and shared across schemas with independent versioning. Schema reference tables are defined inline within a specific schema."
3646
+ },
3647
+ {
3648
+ question: "How does the lookup cascade work?",
3649
+ answer: "The platform tries three tiers: first, exact string normalization (whitespace and case normalization). If that fails, token-based fuzzy matching. If the fuzzy match is below the confidence threshold, a Haiku LLM call resolves the ambiguity."
3650
+ },
3651
+ {
3652
+ question: "What happens when I update a reference primitive?",
3653
+ answer: "A new version is created. Existing schemas continue using their pinned version. You must explicitly update the version reference in each schema to use the new data, which protects production pipelines from unexpected changes."
3506
3654
  }
3507
3655
  ],
3508
3656
  mentions: [
@@ -3510,7 +3658,8 @@ var sections11 = [
3510
3658
  "lookup tables",
3511
3659
  "versioned",
3512
3660
  "workspace-level",
3513
- "code mapping"
3661
+ "code mapping",
3662
+ "lookup cascade"
3514
3663
  ]
3515
3664
  },
3516
3665
  {
@@ -3527,6 +3676,24 @@ var sections11 = [
3527
3676
  {
3528
3677
  type: "paragraph",
3529
3678
  text: "Navigate to **Workspace Settings → Change Review** to configure review requirements. When enabled, changes are queued for approval before being applied, ensuring that production data pipelines are not disrupted by unreviewed modifications."
3679
+ },
3680
+ {
3681
+ type: "paragraph",
3682
+ text: "Change review is particularly important for workspaces that feed downstream systems through delivery bindings. A small change to a schema field mapping or a reference primitive value can ripple through to every document processed after that point. The review process creates a checkpoint where a second pair of eyes can verify the change before it goes live."
3683
+ },
3684
+ {
3685
+ type: "list",
3686
+ ordered: false,
3687
+ items: [
3688
+ "**Schema changes** \u2014 field additions, removals, mapping updates, and format constraint modifications.",
3689
+ "**Shared dialect changes** \u2014 date format, number locale, delimiter, and encoding updates.",
3690
+ "**Reference primitive changes** \u2014 new versions of lookup tables and key-value modifications."
3691
+ ]
3692
+ },
3693
+ {
3694
+ type: "callout",
3695
+ variant: "warning",
3696
+ text: "Change review is recommended for any workspace that delivers data to production systems. Without it, a schema modification takes effect immediately and applies to all future extractions \u2014 there is no undo for documents already processed with the new configuration."
3530
3697
  }
3531
3698
  ],
3532
3699
  related: [
@@ -3542,6 +3709,14 @@ var sections11 = [
3542
3709
  {
3543
3710
  question: "When should I enable change review?",
3544
3711
  answer: "Enable change review for production workspaces where unreviewed modifications could disrupt live data pipelines. It ensures all changes are approved before being applied."
3712
+ },
3713
+ {
3714
+ question: "What types of changes require review?",
3715
+ answer: "Three categories: schema changes (field additions, removals, mapping updates, format constraints), shared dialect changes (formatting rules), and reference primitive changes (lookup table updates and new versions)."
3716
+ },
3717
+ {
3718
+ question: "Can I bypass change review for urgent fixes?",
3719
+ answer: "Change review can be disabled temporarily from Workspace Settings if an urgent fix is needed. However, this should be done with caution in production workspaces, and the review requirement should be re-enabled afterward."
3545
3720
  }
3546
3721
  ],
3547
3722
  mentions: [
@@ -3549,7 +3724,8 @@ var sections11 = [
3549
3724
  "governance",
3550
3725
  "production workspace",
3551
3726
  "approval workflow",
3552
- "review process"
3727
+ "review process",
3728
+ "delivery bindings"
3553
3729
  ]
3554
3730
  }
3555
3731
  ];
@@ -3566,6 +3742,26 @@ var sections12 = [
3566
3742
  {
3567
3743
  type: "paragraph",
3568
3744
  text: "Press `Cmd+K` (or `Ctrl+K`) from any page to open global search. Searches across documents, extracted values, field names, schema names, and sources simultaneously."
3745
+ },
3746
+ {
3747
+ type: "paragraph",
3748
+ text: "Omnisearch is designed to be the single entry point for finding anything in the platform. Rather than navigating to specific pages to search within them, Omnisearch queries a **materialized values index** that aggregates data across all your content. Results are grouped by category so you can quickly distinguish between a document match and a field name match."
3749
+ },
3750
+ {
3751
+ type: "callout",
3752
+ variant: "info",
3753
+ text: "Omnisearch results update as you type. The materialized index is rebuilt automatically whenever documents are processed or schemas change, so results are always current."
3754
+ },
3755
+ {
3756
+ type: "list",
3757
+ ordered: false,
3758
+ items: [
3759
+ "**Documents** \u2014 matches against file names, extracted text, and metadata.",
3760
+ "**Extracted values** \u2014 finds specific data points across all processed documents.",
3761
+ "**Field names** \u2014 searches the Field Registry for canonical field definitions.",
3762
+ "**Schema names** \u2014 locates generated and template schemas by title.",
3763
+ "**Sources** \u2014 matches source connection names and configurations."
3764
+ ]
3569
3765
  }
3570
3766
  ],
3571
3767
  related: [
@@ -3581,9 +3777,17 @@ var sections12 = [
3581
3777
  {
3582
3778
  question: "What does Omnisearch search across?",
3583
3779
  answer: "Omnisearch searches across documents, extracted values, field names, schema names, and sources simultaneously, providing results from all categories in a single query."
3780
+ },
3781
+ {
3782
+ question: "Does Omnisearch work on extracted data or just file names?",
3783
+ answer: "Omnisearch queries a materialized values index that includes both file-level metadata and individual extracted data points. You can search for a specific invoice number, date, or name and find the document that contains it."
3784
+ },
3785
+ {
3786
+ question: "How quickly are new documents searchable in Omnisearch?",
3787
+ answer: "Documents become searchable as soon as extraction completes. The materialized index is updated automatically during document processing, so there is no manual reindex step."
3584
3788
  }
3585
3789
  ],
3586
- mentions: ["omnisearch", "global search", "Cmd+K", "Ctrl+K", "document search"]
3790
+ mentions: ["omnisearch", "global search", "Cmd+K", "Ctrl+K", "document search", "materialized values index"]
3587
3791
  },
3588
3792
  {
3589
3793
  slug: "document-filters",
@@ -3595,6 +3799,30 @@ var sections12 = [
3595
3799
  {
3596
3800
  type: "paragraph",
3597
3801
  text: "The Documents page supports advanced filtering by extracted field values. Build conditions with field autocomplete, comparison operators (eq, contains, gt, between, is_empty, etc.), and combine multiple conditions. Filter state is URL-serializable so you can share filtered views. Save frequently-used filters as presets."
3802
+ },
3803
+ {
3804
+ type: "paragraph",
3805
+ text: "Filters operate on the **materialized values index** \u2014 a flattened representation of every extracted field value across your documents. This means filtering is fast even across thousands of documents because queries run against pre-indexed data rather than scanning raw extractions at query time."
3806
+ },
3807
+ {
3808
+ type: "callout",
3809
+ variant: "info",
3810
+ text: 'Combine multiple filter conditions to build precise queries. For example, filter by `document_type eq "Invoice"` AND `total_amount gt 10000` AND `date between "2025-01-01" "2025-12-31"` to find high-value invoices from a specific year.'
3811
+ },
3812
+ {
3813
+ type: "list",
3814
+ ordered: false,
3815
+ items: [
3816
+ "`eq` \u2014 exact match (case-insensitive for strings).",
3817
+ "`contains` \u2014 substring match within field values.",
3818
+ "`gt` / `lt` \u2014 greater than or less than for numeric and date fields.",
3819
+ "`between` \u2014 range match with lower and upper bounds.",
3820
+ "`is_empty` \u2014 finds documents where a specific field has no extracted value."
3821
+ ]
3822
+ },
3823
+ {
3824
+ type: "paragraph",
3825
+ text: "Filter state is encoded in the URL query string using dynamic SQL generation on the backend. This means you can bookmark filtered views, share them with teammates via a link, or save them as **presets** for one-click access to commonly used queries."
3598
3826
  }
3599
3827
  ],
3600
3828
  related: [
@@ -3610,6 +3838,14 @@ var sections12 = [
3610
3838
  {
3611
3839
  question: "Can I share filtered views with my team?",
3612
3840
  answer: "Yes. Filter state is URL-serializable, so you can share filtered views by copying the URL. You can also save frequently-used filters as presets."
3841
+ },
3842
+ {
3843
+ question: "How does field autocomplete work in filters?",
3844
+ answer: "When building a filter condition, the field selector shows all canonical fields from your Field Registry with autocomplete. Start typing a field name and matching fields appear, ensuring you always filter on valid, extracted fields."
3845
+ },
3846
+ {
3847
+ question: "Can I filter on fields that have no value?",
3848
+ answer: "Yes. The is_empty operator lets you find documents where a specific field was not extracted or has no value. This is useful for identifying documents that may need reprocessing or manual review."
3613
3849
  }
3614
3850
  ],
3615
3851
  mentions: [
@@ -3617,7 +3853,9 @@ var sections12 = [
3617
3853
  "field autocomplete",
3618
3854
  "comparison operators",
3619
3855
  "URL-serializable",
3620
- "filter presets"
3856
+ "filter presets",
3857
+ "materialized values index",
3858
+ "dynamic SQL"
3621
3859
  ]
3622
3860
  }
3623
3861
  ];
@@ -3930,6 +4168,10 @@ var sections14 = [
3930
4168
  type: "paragraph",
3931
4169
  text: "Organizations support role-based access control:"
3932
4170
  },
4171
+ {
4172
+ type: "paragraph",
4173
+ text: "Every user in your organization is assigned one of four roles that determine what they can see and do. Roles are hierarchical \u2014 each level includes all permissions of the levels below it. Choose the most restrictive role that still lets a team member do their job."
4174
+ },
3933
4175
  {
3934
4176
  type: "param-table",
3935
4177
  title: "Roles",
@@ -3959,6 +4201,22 @@ var sections14 = [
3959
4201
  {
3960
4202
  type: "paragraph",
3961
4203
  text: "New members are added via domain matching: company email domains auto-match to your org with **pending** status requiring admin approval. Manage from the Team page."
4204
+ },
4205
+ {
4206
+ type: "callout",
4207
+ variant: "info",
4208
+ text: "Domain matching streamlines onboarding for larger teams. When a new user signs up with an email address matching your organization's domain (e.g., `@yourcompany.com`), they are automatically associated with your org in a **pending** state. An admin must approve them before they gain access."
4209
+ },
4210
+ {
4211
+ type: "list",
4212
+ ordered: true,
4213
+ items: [
4214
+ "Navigate to the **Team** page from the sidebar.",
4215
+ "Review any **pending** members waiting for approval.",
4216
+ "Approve or reject each pending member.",
4217
+ "Assign the appropriate role based on their responsibilities.",
4218
+ "Optionally, change roles later from the same Team page."
4219
+ ]
3962
4220
  }
3963
4221
  ],
3964
4222
  related: [
@@ -3974,6 +4232,14 @@ var sections14 = [
3974
4232
  {
3975
4233
  question: "How are new team members added?",
3976
4234
  answer: "New members are added via domain matching: company email domains auto-match to your organization with pending status. Admin approval is required before access is granted."
4235
+ },
4236
+ {
4237
+ question: "Can I change a team member's role after they join?",
4238
+ answer: "Yes. Navigate to the Team page, find the member, and update their role. Changes take effect immediately. Only Admins and Owners can modify roles."
4239
+ },
4240
+ {
4241
+ question: "What happens if I remove a team member?",
4242
+ answer: "Removing a team member revokes their access to the organization immediately. Their past actions (edits, uploads, approvals) remain in the audit trail. They can be re-added later through the same domain matching process."
3977
4243
  }
3978
4244
  ],
3979
4245
  mentions: [
@@ -3984,7 +4250,8 @@ var sections14 = [
3984
4250
  "Member",
3985
4251
  "Admin",
3986
4252
  "Owner",
3987
- "domain matching"
4253
+ "domain matching",
4254
+ "pending approval"
3988
4255
  ]
3989
4256
  },
3990
4257
  {
@@ -3998,6 +4265,10 @@ var sections14 = [
3998
4265
  type: "paragraph",
3999
4266
  text: "The Usage & Registry page replaces the legacy credits view with a comprehensive cost breakdown. It shows per-feature cost (extraction, OCR, batch, matching), a daily cost chart, and a full call log with model, tokens, and cost per request. The **Master view** (admin only) shows per-customer breakdowns and platform-wide statistics."
4000
4267
  },
4268
+ {
4269
+ type: "paragraph",
4270
+ text: "Understanding your usage patterns helps optimize costs. For example, if extraction dominates your spend, consider using **batch mode** for non-urgent documents to cut that cost in half. The daily cost chart makes it easy to spot usage spikes and correlate them with specific ingestion events."
4271
+ },
4001
4272
  {
4002
4273
  type: "param-table",
4003
4274
  title: "Usage views",
@@ -4023,6 +4294,11 @@ var sections14 = [
4023
4294
  description: "Per-customer breakdown and platform-wide aggregates. Accessible only in master (all-tenant) mode."
4024
4295
  }
4025
4296
  ]
4297
+ },
4298
+ {
4299
+ type: "callout",
4300
+ variant: "info",
4301
+ text: "The call log records every LLM and OCR call with full detail \u2014 model name, input/output token counts, latency, and cost. Use it to audit individual extractions or investigate unexpected cost increases."
4026
4302
  }
4027
4303
  ],
4028
4304
  related: [
@@ -4038,6 +4314,10 @@ var sections14 = [
4038
4314
  {
4039
4315
  question: "What is the Master view?",
4040
4316
  answer: "The Master view is an admin-only feature that shows per-customer breakdowns and platform-wide statistics. It is accessible only in master (all-tenant) mode."
4317
+ },
4318
+ {
4319
+ question: "How can I reduce my usage costs?",
4320
+ answer: "Use batch mode for non-urgent documents to cut extraction costs by 50%. Review the per-feature breakdown to identify your highest-cost operations, and use the daily cost chart to spot and investigate usage spikes."
4041
4321
  }
4042
4322
  ],
4043
4323
  mentions: [
@@ -4045,7 +4325,8 @@ var sections14 = [
4045
4325
  "cost breakdown",
4046
4326
  "daily cost chart",
4047
4327
  "call log",
4048
- "master view"
4328
+ "master view",
4329
+ "per-feature costs"
4049
4330
  ]
4050
4331
  },
4051
4332
  {
@@ -4058,6 +4339,26 @@ var sections14 = [
4058
4339
  {
4059
4340
  type: "paragraph",
4060
4341
  text: "Accessible from the user menu for admins and superadmins. Provides: customer management, user management, usage statistics, data clear & rebuild, and cross-tenant master registry view."
4342
+ },
4343
+ {
4344
+ type: "paragraph",
4345
+ text: "The Admin Panel is the central hub for platform-wide operations. **Customer management** lets you create, view, and delete organizations. **User management** provides a cross-tenant view of all platform users with the ability to remove accounts. The **data clear & rebuild** function wipes all data for a specific customer and reprocesses from scratch \u2014 useful during onboarding or after significant schema changes."
4346
+ },
4347
+ {
4348
+ type: "list",
4349
+ ordered: false,
4350
+ items: [
4351
+ "**Customer management** \u2014 create, list, and delete organizations.",
4352
+ "**User management** \u2014 view all users across tenants, remove accounts.",
4353
+ "**Usage statistics** \u2014 platform-wide cost and usage aggregates.",
4354
+ "**Data clear & rebuild** \u2014 wipe and reprocess all data for a customer.",
4355
+ "**Master registry** \u2014 cross-tenant view of the Field Registry and schemas."
4356
+ ]
4357
+ },
4358
+ {
4359
+ type: "callout",
4360
+ variant: "warning",
4361
+ text: "The **data clear** operation is irreversible. It deletes all documents, extractions, jobs, and results for the selected customer. Use with caution and only when a full reprocessing is genuinely needed."
4061
4362
  }
4062
4363
  ],
4063
4364
  related: [
@@ -4072,6 +4373,14 @@ var sections14 = [
4072
4373
  {
4073
4374
  question: "Who can access the Admin Panel?",
4074
4375
  answer: "The Admin Panel is accessible only to users with admin or superadmin roles, via the user menu in the platform navigation."
4376
+ },
4377
+ {
4378
+ question: "What does the data clear operation do?",
4379
+ answer: "Data clear wipes all documents, extractions, jobs, results, and related data for a specific customer. It is irreversible and intended for full reprocessing scenarios during onboarding or after major schema changes."
4380
+ },
4381
+ {
4382
+ question: "Can I view usage across all customers?",
4383
+ answer: "Yes. The Admin Panel includes a master registry view that shows cross-tenant usage statistics, per-customer cost breakdowns, and platform-wide aggregates."
4075
4384
  }
4076
4385
  ],
4077
4386
  mentions: [
@@ -4079,7 +4388,8 @@ var sections14 = [
4079
4388
  "customer management",
4080
4389
  "user management",
4081
4390
  "data clear",
4082
- "master registry"
4391
+ "master registry",
4392
+ "superadmin"
4083
4393
  ]
4084
4394
  },
4085
4395
  {
@@ -4089,6 +4399,10 @@ var sections14 = [
4089
4399
  seoTitle: "Keyboard Shortcuts \u2014 Talonic Docs",
4090
4400
  description: "Global keyboard shortcuts: Cmd+K / Ctrl+K for Omnisearch, Cmd+J / Ctrl+J for quick extract, and Escape to close overlays. Speed up your workflow with keyboard navigation.",
4091
4401
  content: [
4402
+ {
4403
+ type: "paragraph",
4404
+ text: "Talonic provides global keyboard shortcuts that work from any page in the platform. These shortcuts let you access common actions without leaving your current context, significantly speeding up daily workflows."
4405
+ },
4092
4406
  {
4093
4407
  type: "param-table",
4094
4408
  title: "Shortcuts",
@@ -4109,6 +4423,11 @@ var sections14 = [
4109
4423
  description: "Close overlays, modals, and search."
4110
4424
  }
4111
4425
  ]
4426
+ },
4427
+ {
4428
+ type: "callout",
4429
+ variant: "info",
4430
+ text: "The **quick extract** shortcut (`Cmd+J` / `Ctrl+J`) is the fastest way to upload a single document. It opens a streamlined upload interface that lets you drag a file and start processing immediately."
4112
4431
  }
4113
4432
  ],
4114
4433
  related: [
@@ -4123,6 +4442,10 @@ var sections14 = [
4123
4442
  {
4124
4443
  question: "What does the quick extract shortcut do?",
4125
4444
  answer: "Cmd+J / Ctrl+J opens the quick extract interface, allowing you to upload and process a document directly from any page."
4445
+ },
4446
+ {
4447
+ question: "Do shortcuts work inside modals or overlays?",
4448
+ answer: "The Escape shortcut works inside any modal or overlay to close it. Omnisearch (Cmd+K) works globally, even when other overlays are open. Quick extract (Cmd+J) is available from the main interface."
4126
4449
  }
4127
4450
  ],
4128
4451
  mentions: ["keyboard shortcuts", "Cmd+K", "Cmd+J", "Escape", "quick extract"]
@@ -4145,6 +4468,21 @@ var sections15 = [
4145
4468
  {
4146
4469
  type: "callout",
4147
4470
  text: "Batch mode cuts extraction cost in half. Stage 1 (OCR + classify) still runs immediately \u2014 only Stage 2 (Claude extraction) is deferred."
4471
+ },
4472
+ {
4473
+ type: "paragraph",
4474
+ text: "Under the hood, batch inference leverages the provider's native batch API (Anthropic Message Batches or AWS Bedrock invocation jobs). Documents accumulate in a queue and are submitted together, allowing the provider to schedule processing during off-peak capacity. This is why the cost reduction is possible without any loss in extraction quality."
4475
+ },
4476
+ {
4477
+ type: "list",
4478
+ ordered: false,
4479
+ items: [
4480
+ "**50% cost reduction** on all Claude extraction calls in Stage 2.",
4481
+ "**48-hour delivery window** \u2014 most batches complete well within this timeframe.",
4482
+ "**No quality difference** \u2014 the same extraction model and prompts are used as in real-time mode.",
4483
+ "**Immediate visibility** \u2014 documents appear in your library right after Stage 1 (OCR + classification).",
4484
+ "**Automatic result application** \u2014 when the batch completes, results are applied and documents transition to their final status."
4485
+ ]
4148
4486
  }
4149
4487
  ],
4150
4488
  related: [
@@ -4160,9 +4498,17 @@ var sections15 = [
4160
4498
  {
4161
4499
  question: "When should I use batch mode?",
4162
4500
  answer: "Batch mode is ideal for large backlog ingestion where real-time results are not required. It cuts extraction cost in half compared to real-time processing."
4501
+ },
4502
+ {
4503
+ question: "Is there a minimum number of documents for batch processing?",
4504
+ answer: "The batch system requires a minimum of 100 items per batch (a Bedrock requirement). If fewer documents are uploaded in batch mode, the system falls back to real-time processing with a warning."
4505
+ },
4506
+ {
4507
+ question: "Does batch mode affect extraction quality?",
4508
+ answer: "No. Batch mode uses the same Claude extraction model and prompts as real-time processing. The only difference is timing \u2014 extraction is deferred to take advantage of provider off-peak pricing."
4163
4509
  }
4164
4510
  ],
4165
- mentions: ["batch inference", "50% cost", "48-hour delivery", "backlog ingestion"]
4511
+ mentions: ["batch inference", "50% cost", "48-hour delivery", "backlog ingestion", "Message Batches API"]
4166
4512
  },
4167
4513
  {
4168
4514
  slug: "batch-processing",
@@ -4175,6 +4521,10 @@ var sections15 = [
4175
4521
  type: "paragraph",
4176
4522
  text: 'Set `processing_mode=batch` on upload (API) or toggle the "Batch" switch in the upload UI. Stage 1 (OCR + classification) runs immediately so documents appear in your library right away. Stage 2 (Claude extraction) is deferred to the provider\'s batch API for asynchronous processing.'
4177
4523
  },
4524
+ {
4525
+ type: "paragraph",
4526
+ text: "The two-stage architecture means you get immediate feedback on what was uploaded. Documents are OCR'd, classified by type, and triaged within seconds. Only the AI extraction step \u2014 where Claude reads the document and fills structured fields \u2014 is deferred to the batch queue for cost savings."
4527
+ },
4178
4528
  {
4179
4529
  type: "param-table",
4180
4530
  title: "Batch stages",
@@ -4190,6 +4540,15 @@ var sections15 = [
4190
4540
  description: "Claude extraction is queued for batch processing. Items accumulate, then submit to the batch API on a timer or threshold."
4191
4541
  }
4192
4542
  ]
4543
+ },
4544
+ {
4545
+ type: "callout",
4546
+ variant: "warning",
4547
+ text: "Image-only documents (PNG, JPG) cannot be batched because the batch payload is text-only. These are automatically routed to real-time processing even when batch mode is enabled."
4548
+ },
4549
+ {
4550
+ type: "paragraph",
4551
+ text: "While waiting for batch results, documents show a status of `batch_queued`. Once the provider returns results, the platform applies them through the same post-processing pipeline as real-time extraction \u2014 including markdown pre-processing, field parsing, quality metrics, and extraction metadata computation."
4193
4552
  }
4194
4553
  ],
4195
4554
  related: [
@@ -4205,6 +4564,14 @@ var sections15 = [
4205
4564
  {
4206
4565
  question: "What runs immediately in batch mode?",
4207
4566
  answer: "Stage 1 (OCR, classification, and triage) runs in real-time. Only Stage 2 (Claude extraction) is deferred to the batch API for cost savings."
4567
+ },
4568
+ {
4569
+ question: "What happens if a batch extraction fails to parse?",
4570
+ answer: "Parse failures in batch mode are retried through the real-time extraction path \u2014 never as a new batch. This ensures the original 48-hour SLA is maintained while still recovering from transient issues."
4571
+ },
4572
+ {
4573
+ question: "Can I enable batch mode per source?",
4574
+ answer: "Yes. Each source connection has a batch processing toggle. When enabled, all documents ingested through that source are automatically processed in batch mode."
4208
4575
  }
4209
4576
  ],
4210
4577
  mentions: [
@@ -4212,7 +4579,8 @@ var sections15 = [
4212
4579
  "processing_mode",
4213
4580
  "Stage 1",
4214
4581
  "Stage 2",
4215
- "deferred extraction"
4582
+ "deferred extraction",
4583
+ "batch_queued"
4216
4584
  ]
4217
4585
  },
4218
4586
  {
@@ -4226,6 +4594,10 @@ var sections15 = [
4226
4594
  type: "paragraph",
4227
4595
  text: "The Batches page at `/sources/batches` shows the status of all batch jobs. Each batch progresses through three states: **accumulating** (items collecting), **submitted** (sent to provider), and **completed** (results applied). The page live-syncs with the provider for real-time status updates."
4228
4596
  },
4597
+ {
4598
+ type: "paragraph",
4599
+ text: "Batches are submitted automatically when the accumulation timer fires (every 15 minutes by default) or when the item count threshold is reached. Once submitted, the platform polls the provider hourly to check for completion. When results arrive, they are applied to the corresponding documents and the batch transitions to **completed** status."
4600
+ },
4229
4601
  {
4230
4602
  type: "param-table",
4231
4603
  title: "Batch statuses",
@@ -4246,6 +4618,11 @@ var sections15 = [
4246
4618
  description: "All results have been received and applied to the corresponding documents."
4247
4619
  }
4248
4620
  ]
4621
+ },
4622
+ {
4623
+ type: "callout",
4624
+ variant: "info",
4625
+ text: 'If a batch gets stuck in "processing" due to an unexpected interruption, the platform automatically recovers it on startup. Batches stuck for more than 15 minutes are reverted to "submitted" so the next poll cycle retries them.'
4249
4626
  }
4250
4627
  ],
4251
4628
  related: [
@@ -4261,6 +4638,14 @@ var sections15 = [
4261
4638
  {
4262
4639
  question: "What are the batch statuses?",
4263
4640
  answer: "Three statuses: Accumulating (items collecting), Submitted (sent to provider, polled hourly), and Completed (results received and applied)."
4641
+ },
4642
+ {
4643
+ question: "How often are batches submitted to the provider?",
4644
+ answer: "Batches are submitted on a 15-minute timer or when the item count threshold is reached, whichever comes first. These intervals are configurable in the pipeline settings."
4645
+ },
4646
+ {
4647
+ question: "What happens if a batch gets stuck?",
4648
+ answer: 'The platform includes crash recovery logic. Batches stuck in "processing" for more than 15 minutes are automatically reverted to "submitted" so the next poll cycle retries them. No manual intervention is needed.'
4264
4649
  }
4265
4650
  ],
4266
4651
  mentions: [
@@ -4269,7 +4654,8 @@ var sections15 = [
4269
4654
  "submitted",
4270
4655
  "completed",
4271
4656
  "live-sync",
4272
- "provider polling"
4657
+ "provider polling",
4658
+ "crash recovery"
4273
4659
  ]
4274
4660
  }
4275
4661
  ];
@@ -4286,6 +4672,25 @@ var sections16 = [
4286
4672
  {
4287
4673
  type: "paragraph",
4288
4674
  text: "Upload CSV or Excel files as lookup tables. These reference datasets are used by the matching engine and by reference strategies in schemas. Each reference dataset is versioned and can be shared across multiple schemas."
4675
+ },
4676
+ {
4677
+ type: "paragraph",
4678
+ text: 'Reference data is the foundation of the matching system. It represents your "ground truth" \u2014 the known records you want to match extracted document data against. Common examples include customer lists, product catalogs, vendor registries, and contract databases.'
4679
+ },
4680
+ {
4681
+ type: "callout",
4682
+ variant: "info",
4683
+ text: "You can also import reference data directly from a SQL database connection. The import runs asynchronously \u2014 rows are streamed in batches of 500 and column headers appear immediately so you can preview the structure while the import runs."
4684
+ },
4685
+ {
4686
+ type: "list",
4687
+ ordered: false,
4688
+ items: [
4689
+ "**CSV and Excel (XLSX)** file uploads for quick one-time imports.",
4690
+ "**SQL database imports** for live reference data from connected sources.",
4691
+ "**Versioning** \u2014 each dataset tracks versions independently.",
4692
+ "**Cross-schema sharing** \u2014 one dataset can be referenced by multiple schemas and matching configurations."
4693
+ ]
4289
4694
  }
4290
4695
  ],
4291
4696
  related: [
@@ -4301,6 +4706,14 @@ var sections16 = [
4301
4706
  {
4302
4707
  question: "How is reference data used?",
4303
4708
  answer: "Reference datasets are used by the matching engine for field-to-field comparisons and by reference strategies in schemas for code mapping and value resolution."
4709
+ },
4710
+ {
4711
+ question: "Can I import reference data from a database?",
4712
+ answer: "Yes. Use the SQL import option to stream rows from a connected SQL database (MSSQL or PostgreSQL). The import runs asynchronously and you can monitor progress while it loads."
4713
+ },
4714
+ {
4715
+ question: "What happens if I delete a source connection that was used for a SQL import?",
4716
+ answer: 'The reference data remains intact. Deleting a source connection does not cascade to reference datasets \u2014 the UI shows a "source disconnected" indicator, but the imported data continues to work for matching.'
4304
4717
  }
4305
4718
  ],
4306
4719
  mentions: [
@@ -4308,7 +4721,8 @@ var sections16 = [
4308
4721
  "CSV upload",
4309
4722
  "Excel upload",
4310
4723
  "versioned datasets",
4311
- "matching engine"
4724
+ "matching engine",
4725
+ "SQL import"
4312
4726
  ]
4313
4727
  },
4314
4728
  {
@@ -4351,6 +4765,15 @@ var sections16 = [
4351
4765
  {
4352
4766
  type: "paragraph",
4353
4767
  text: "You can also use **AI strategy generation** to let the platform suggest field mappings and strategies automatically based on the schema and reference data structure."
4768
+ },
4769
+ {
4770
+ type: "paragraph",
4771
+ text: "Each field comparison carries a **weight** that determines how much it contributes to the overall confidence score. Set high weights on fields that are strong identifiers (like reference numbers or unique IDs) and lower weights on fields that are common or prone to variation (like names or descriptions). The weighted aggregate produces a final score between 0% and 100%."
4772
+ },
4773
+ {
4774
+ type: "callout",
4775
+ variant: "info",
4776
+ text: "Use **AI strategy generation** when setting up matching for the first time. The platform analyzes your schema fields and reference data columns, then suggests which fields to compare and which strategy to use for each. You can review and adjust the suggestions before saving."
4354
4777
  }
4355
4778
  ],
4356
4779
  related: [
@@ -4366,6 +4789,14 @@ var sections16 = [
4366
4789
  {
4367
4790
  question: "Can Talonic suggest matching configurations?",
4368
4791
  answer: "Yes. AI strategy generation can suggest field mappings and strategies automatically based on the schema and reference data structure."
4792
+ },
4793
+ {
4794
+ question: "How do weights affect matching scores?",
4795
+ answer: "Each field comparison carries a weight that determines its contribution to the overall confidence score. Fields with higher weights have more influence on the final score. The weighted aggregate produces a score between 0% and 100%."
4796
+ },
4797
+ {
4798
+ question: "What is the difference between fuzzy and exact matching?",
4799
+ answer: "Exact matching requires an identical string (case-insensitive). Fuzzy matching uses token-based comparison with a configurable similarity threshold, making it suitable for fields with minor variations like misspellings, abbreviations, or word reordering."
4369
4800
  }
4370
4801
  ],
4371
4802
  mentions: [
@@ -4374,7 +4805,8 @@ var sections16 = [
4374
4805
  "exact match",
4375
4806
  "fuzzy matching",
4376
4807
  "date_range",
4377
- "numeric_range"
4808
+ "numeric_range",
4809
+ "AI strategy generation"
4378
4810
  ]
4379
4811
  },
4380
4812
  {
@@ -4387,6 +4819,26 @@ var sections16 = [
4387
4819
  {
4388
4820
  type: "paragraph",
4389
4821
  text: "Execute a matching run against a reference dataset. Matching runs are processed asynchronously via BullMQ. You can monitor progress from the matching page and cancel running jobs if needed."
4822
+ },
4823
+ {
4824
+ type: "paragraph",
4825
+ text: "There are two types of runs: **manual runs** use only the deterministic matching strategies (exact, fuzzy, date_range, numeric_range) and complete quickly. **Smart runs** add an AI resolution pass \u2014 after the initial matching, an embedding-based search with a Haiku LLM resolver attempts to improve low-confidence results."
4826
+ },
4827
+ {
4828
+ type: "list",
4829
+ ordered: true,
4830
+ items: [
4831
+ "Navigate to the **Matching** page and select a matching configuration.",
4832
+ "Click **Run** for a standard match or **Smart Run** for AI-enhanced matching.",
4833
+ "Monitor progress in real-time on the matching page.",
4834
+ "Cancel the run at any time if needed \u2014 partial results are preserved.",
4835
+ "Review results when the run completes."
4836
+ ]
4837
+ },
4838
+ {
4839
+ type: "callout",
4840
+ variant: "info",
4841
+ text: "Smart runs take longer but can significantly improve match quality for ambiguous data. The AI resolver uses document embeddings and a Haiku LLM to evaluate low-confidence candidates that the deterministic strategies could not resolve."
4390
4842
  }
4391
4843
  ],
4392
4844
  related: [
@@ -4402,9 +4854,17 @@ var sections16 = [
4402
4854
  {
4403
4855
  question: "Are matching runs synchronous or asynchronous?",
4404
4856
  answer: "Matching runs are processed asynchronously via a job queue. You can monitor progress from the matching page in real-time."
4857
+ },
4858
+ {
4859
+ question: "What is the difference between a manual run and a smart run?",
4860
+ answer: "A manual run uses only deterministic strategies (exact, fuzzy, date_range, numeric_range). A smart run adds an AI resolution pass using embeddings and a Haiku LLM to improve low-confidence results."
4861
+ },
4862
+ {
4863
+ question: "Can I cancel a matching run in progress?",
4864
+ answer: "Yes. You can cancel a running match job from the matching page. Partial results from documents already processed are preserved and available for review."
4405
4865
  }
4406
4866
  ],
4407
- mentions: ["matching runs", "async execution", "BullMQ", "progress monitoring"]
4867
+ mentions: ["matching runs", "async execution", "BullMQ", "progress monitoring", "smart run", "AI resolution"]
4408
4868
  },
4409
4869
  {
4410
4870
  slug: "matching-results",
@@ -4417,6 +4877,10 @@ var sections16 = [
4417
4877
  type: "paragraph",
4418
4878
  text: "Results are presented per document with the top 5 match candidates. Each candidate includes a confidence score and field-level evidence showing which comparisons contributed to the match and how each field scored."
4419
4879
  },
4880
+ {
4881
+ type: "paragraph",
4882
+ text: "The evidence view is designed to make match decisions transparent. For each candidate, you can see exactly which fields matched, what strategy was used, the individual field score, and the actual values that were compared. This makes it straightforward to verify correct matches and investigate false positives."
4883
+ },
4420
4884
  {
4421
4885
  type: "param-table",
4422
4886
  title: "Result fields",
@@ -4437,6 +4901,11 @@ var sections16 = [
4437
4901
  description: "The five highest-scoring reference records for each document."
4438
4902
  }
4439
4903
  ]
4904
+ },
4905
+ {
4906
+ type: "callout",
4907
+ variant: "info",
4908
+ text: "You can **approve or reject** individual match results. Approved matches can be used downstream in delivery pipelines. Rejected matches are excluded from future consideration for that document."
4440
4909
  }
4441
4910
  ],
4442
4911
  related: [
@@ -4452,6 +4921,14 @@ var sections16 = [
4452
4921
  {
4453
4922
  question: "What evidence is provided for each match?",
4454
4923
  answer: "Per-field evidence shows the strategy used (exact, fuzzy, date_range, numeric_range), the individual score, and the matched values for both the extracted data and the reference record."
4924
+ },
4925
+ {
4926
+ question: "Can I approve or reject match results?",
4927
+ answer: "Yes. Each match result can be individually approved or rejected. Approved matches flow through to downstream delivery pipelines, while rejected matches are excluded from future consideration for that document."
4928
+ },
4929
+ {
4930
+ question: "Why does a match have a low confidence score?",
4931
+ answer: "Low confidence usually means the fields being compared have significant differences or the matching strategies produced weak scores. Check the per-field evidence to identify which comparisons dragged the score down, then consider adjusting weights or strategies in the matching configuration."
4455
4932
  }
4456
4933
  ],
4457
4934
  mentions: [
@@ -4459,7 +4936,9 @@ var sections16 = [
4459
4936
  "top 5 candidates",
4460
4937
  "confidence score",
4461
4938
  "field-level evidence",
4462
- "weighted aggregate"
4939
+ "weighted aggregate",
4940
+ "approve",
4941
+ "reject"
4463
4942
  ]
4464
4943
  }
4465
4944
  ];
@@ -5331,6 +5810,8 @@ var sections19 = [
5331
5810
  description: "List all uploaded and processed documents with filtering by source, status, date range, and full-text search with pagination support.",
5332
5811
  content: [
5333
5812
  { type: "paragraph", text: "Documents represent files that have been uploaded and processed. Each document retains its original file, extracted text, and metadata." },
5813
+ { type: "paragraph", text: "Use query parameters to filter by source connection, processing status, date range, or full-text search across filenames and extracted content. Results are paginated with cursor-based navigation." },
5814
+ { type: "callout", variant: "info", text: "Documents include **triage metadata** (sensitivity, department, jurisdiction, PII detection) when available. Triage is populated automatically during ingestion via Document AI annotations or Haiku LLM calls." },
5334
5815
  {
5335
5816
  type: "endpoint",
5336
5817
  method: "GET",
@@ -5443,9 +5924,11 @@ var sections19 = [
5443
5924
  { label: "List Extractions", slug: "list-extractions" }
5444
5925
  ],
5445
5926
  faq: [
5446
- { question: "How do I list documents in Talonic?", answer: "Send a GET request to /v1/documents with optional filters for source, status, date range, and search." }
5927
+ { question: "How do I list documents in Talonic?", answer: "Send a GET request to /v1/documents with optional filters for source, status, date range, and search." },
5928
+ { question: "What document statuses are possible?", answer: "Documents progress through `pending` (uploaded, awaiting processing), `processing` (OCR and extraction in progress), `completed` (ready for use), or `error` (processing failed). Batch-mode documents may also show `batch_queued`." },
5929
+ { question: "How does full-text search work?", answer: "The `search` parameter matches against the document filename and extracted text content. It uses full-text search, so partial word matches are supported." }
5447
5930
  ],
5448
- mentions: ["documents", "pagination", "filtering"]
5931
+ mentions: ["documents", "pagination", "filtering", "triage", "document status"]
5449
5932
  },
5450
5933
  {
5451
5934
  slug: "get-document",
@@ -5454,6 +5937,7 @@ var sections19 = [
5454
5937
  seoTitle: "Get Document Endpoint \u2014 Talonic Docs",
5455
5938
  description: "Retrieve a single document by ID with full metadata including file size, extracted text length, extraction count, and timestamps.",
5456
5939
  content: [
5940
+ { type: "paragraph", text: "Retrieve full metadata for a single document, including processing status, detected type and language, triage information, and links to related resources. The response includes a `dashboard` link for viewing the document in the Talonic platform UI." },
5457
5941
  {
5458
5942
  type: "endpoint",
5459
5943
  method: "GET",
@@ -5535,8 +6019,11 @@ var sections19 = [
5535
6019
  { label: "List Documents", slug: "list-documents" },
5536
6020
  { label: "Delete Document", slug: "delete-document" }
5537
6021
  ],
5538
- faq: [],
5539
- mentions: ["document metadata"]
6022
+ faq: [
6023
+ { question: "How do I get the extracted text for a document?", answer: "Use the `GET /v1/documents/:id/markdown` endpoint to retrieve the OCR markdown output. For structured extraction results, use the extractions endpoint linked in the response." },
6024
+ { question: "What does extraction_count indicate?", answer: "The number of times extraction has been performed on this document. Typically 0 (not yet extracted) or 1 (extracted). Re-extraction increments this count." }
6025
+ ],
6026
+ mentions: ["document metadata", "document detail", "triage"]
5540
6027
  },
5541
6028
  {
5542
6029
  slug: "delete-document",
@@ -5545,6 +6032,8 @@ var sections19 = [
5545
6032
  seoTitle: "Delete Document Endpoint \u2014 Talonic Docs",
5546
6033
  description: "Permanently delete a document and all associated extractions. This action is irreversible and removes the original file and all extraction results.",
5547
6034
  content: [
6035
+ { type: "paragraph", text: "Permanently delete a document along with its original file, extracted text, and all associated extraction results. This operation cannot be undone." },
6036
+ { type: "callout", variant: "warning", text: "Deletion is **irreversible**. The original file, OCR output, and all extraction results are permanently removed. If the document is part of a case or entity group, its links are also removed." },
5548
6037
  {
5549
6038
  type: "endpoint",
5550
6039
  method: "DELETE",
@@ -5584,8 +6073,11 @@ var sections19 = [
5584
6073
  related: [
5585
6074
  { label: "Get Document", slug: "get-document" }
5586
6075
  ],
5587
- faq: [],
5588
- mentions: ["delete", "irreversible"]
6076
+ faq: [
6077
+ { question: "Can I recover a deleted document?", answer: "No. Document deletion is permanent and irreversible. There is no soft-delete or trash mechanism. Ensure you have a backup before deleting." },
6078
+ { question: "What happens to linking and cases when a document is deleted?", answer: "Entity links referencing the deleted document are removed. If the document was part of a case, the case may be recomputed with the remaining documents during the next backfill." }
6079
+ ],
6080
+ mentions: ["delete", "irreversible", "permanent deletion"]
5589
6081
  }
5590
6082
  ];
5591
6083
 
@@ -5599,6 +6091,8 @@ var sections20 = [
5599
6091
  description: "List extraction results with optional filters by document, schema, and status. Supports pagination for browsing large extraction sets.",
5600
6092
  content: [
5601
6093
  { type: "paragraph", text: "An extraction is the result of applying a schema to a document. A single document can have multiple extractions if different schemas are applied to it." },
6094
+ { type: "paragraph", text: "Use this endpoint to browse extraction results across your organization. Filter by **document**, **status**, or **time range** to find specific results. Each extraction summary includes an overall confidence score and links to the full result." },
6095
+ { type: "callout", variant: "info", text: "Extractions are returned in descending order by `created_at` by default. Use the `after` and `before` parameters to narrow results to a specific time window." },
5602
6096
  {
5603
6097
  type: "endpoint",
5604
6098
  method: "GET",
@@ -5679,8 +6173,12 @@ var sections20 = [
5679
6173
  { label: "Get Extraction", slug: "get-extraction" },
5680
6174
  { label: "POST /v1/extract", slug: "post-extract" }
5681
6175
  ],
5682
- faq: [],
5683
- mentions: ["extractions", "pagination"]
6176
+ faq: [
6177
+ { question: "Can a single document have multiple extractions?", answer: "Yes. A document can have multiple extractions if different schemas are applied to it, or if it is re-extracted with updated settings." },
6178
+ { question: "What does the confidence_overall score represent?", answer: "It is the average confidence across all extracted fields (0 to 1). A score of 0.94 means the extraction engine is highly confident in the accuracy of the results." },
6179
+ { question: "How do I filter extractions by date range?", answer: "Use the `after` and `before` query parameters with ISO 8601 datetime strings to retrieve extractions within a specific time window." }
6180
+ ],
6181
+ mentions: ["extractions", "pagination", "confidence score"]
5684
6182
  },
5685
6183
  {
5686
6184
  slug: "get-extraction",
@@ -5689,6 +6187,8 @@ var sections20 = [
5689
6187
  seoTitle: "Get Extraction Endpoint \u2014 Talonic Docs",
5690
6188
  description: "Retrieve the full extraction result by ID including extracted data, per-field confidence scores, and document metadata.",
5691
6189
  content: [
6190
+ { type: "paragraph", text: "Retrieve the complete extraction result for a single document, including all **extracted field values**, **per-field confidence scores**, and **processing metadata**. This is the primary endpoint for consuming structured data produced by the extraction pipeline." },
6191
+ { type: "paragraph", text: "The response includes the source document summary, a `data` object with field values, and a `confidence` object with both an overall score and individual scores for each field. Fields that have been manually corrected appear in the `locked_fields` array." },
5692
6192
  {
5693
6193
  type: "endpoint",
5694
6194
  method: "GET",
@@ -5769,8 +6269,11 @@ var sections20 = [
5769
6269
  { label: "List Extractions", slug: "list-extractions" },
5770
6270
  { label: "Get Extraction Data", slug: "get-extraction-fields" }
5771
6271
  ],
5772
- faq: [],
5773
- mentions: ["extraction result", "confidence scores"]
6272
+ faq: [
6273
+ { question: "What does a locked_fields entry mean?", answer: "A locked field has been manually corrected via the Correct Fields endpoint. Locked fields always have a confidence of 1.0 and will not be overwritten by re-extraction." },
6274
+ { question: "What processing metadata is available?", answer: "The `processing` object includes `duration_ms` (total extraction time), `pages_processed` (number of document pages analyzed), and `region` (the datacenter region that handled the request)." }
6275
+ ],
6276
+ mentions: ["extraction result", "confidence scores", "locked fields", "processing metadata"]
5774
6277
  },
5775
6278
  {
5776
6279
  slug: "get-extraction-fields",
@@ -5779,6 +6282,8 @@ var sections20 = [
5779
6282
  seoTitle: "Get Extraction Data Endpoint \u2014 Talonic Docs",
5780
6283
  description: "Retrieve just the extracted data from an extraction result without metadata. Supports CSV export format for download.",
5781
6284
  content: [
6285
+ { type: "paragraph", text: "Retrieve only the extracted field values from an extraction, without metadata, confidence scores, or processing details. This is the lightest-weight endpoint for consuming extraction output and is ideal for downstream integrations that only need the structured data." },
6286
+ { type: "callout", variant: "info", text: "Use `?format=csv` to download the data as a CSV file. The response Content-Type changes to `text/csv` and includes a `Content-Disposition` header for browser downloads." },
5782
6287
  {
5783
6288
  type: "endpoint",
5784
6289
  method: "GET",
@@ -5825,10 +6330,14 @@ var sections20 = [
5825
6330
  }
5826
6331
  ],
5827
6332
  related: [
5828
- { label: "Get Extraction", slug: "get-extraction" }
6333
+ { label: "Get Extraction", slug: "get-extraction" },
6334
+ { label: "Correct Fields", slug: "get-extraction-markdown" }
5829
6335
  ],
5830
- faq: [],
5831
- mentions: ["CSV export", "extraction data"]
6336
+ faq: [
6337
+ { question: "What is the difference between this endpoint and GET /v1/extractions/:id?", answer: "This endpoint returns only the extracted key-value data. The full extraction endpoint also includes confidence scores, processing metadata, document details, and locked fields." },
6338
+ { question: "Does the CSV export include column headers?", answer: "Yes. The CSV format includes field names as the header row and extracted values as the data row." }
6339
+ ],
6340
+ mentions: ["CSV export", "extraction data", "data download"]
5832
6341
  },
5833
6342
  {
5834
6343
  slug: "get-extraction-markdown",
@@ -5837,6 +6346,9 @@ var sections20 = [
5837
6346
  seoTitle: "Correct Extraction Fields \u2014 Talonic Docs",
5838
6347
  description: "Submit corrections for specific fields in an extraction result. Corrections are logged and can be propagated to similar extractions.",
5839
6348
  content: [
6349
+ { type: "paragraph", text: "Submit corrections to specific fields in a completed extraction. Corrected fields are **locked** at confidence 1.0 and will not be overwritten by future re-extractions. Use this to fix extraction errors and optionally propagate corrections to similar documents." },
6350
+ { type: "paragraph", text: "Each correction includes the field name, new value, and an optional reason. The `propagate` parameter controls whether corrections apply only to this document or are applied to all similar extractions." },
6351
+ { type: "callout", variant: "warning", text: "Corrected fields are locked and cannot be overridden by re-extraction. To unlock a field, submit a new correction or contact support." },
5840
6352
  {
5841
6353
  type: "endpoint",
5842
6354
  method: "PATCH",
@@ -5942,10 +6454,15 @@ var sections20 = [
5942
6454
  }
5943
6455
  ],
5944
6456
  related: [
5945
- { label: "Get Extraction", slug: "get-extraction" }
6457
+ { label: "Get Extraction", slug: "get-extraction" },
6458
+ { label: "Get Extraction Data", slug: "get-extraction-fields" }
5946
6459
  ],
5947
- faq: [],
5948
- mentions: ["field corrections", "propagation"]
6460
+ faq: [
6461
+ { question: "What does propagate: all_similar do?", answer: "When set to `all_similar`, the correction is applied to all extractions from documents with the same detected type and matching field values. Use this to fix systematic extraction errors across a batch." },
6462
+ { question: "Can I correct multiple fields in a single request?", answer: "Yes. The `corrections` array accepts multiple entries. Each correction is applied atomically in the same request." },
6463
+ { question: "What happens to the confidence score of a corrected field?", answer: "Corrected fields are automatically set to confidence 1.0 and added to the `locked_fields` array. The overall confidence score is recalculated to reflect the correction." }
6464
+ ],
6465
+ mentions: ["field corrections", "propagation", "locked fields", "confidence override"]
5949
6466
  }
5950
6467
  ];
5951
6468
 
@@ -5959,6 +6476,8 @@ var sections21 = [
5959
6476
  description: "List all saved schemas with field counts and extraction counts. Schemas define the structure you want to extract from documents.",
5960
6477
  content: [
5961
6478
  { type: "paragraph", text: "Schemas define the structure you want to extract from documents. Save schemas to reuse them across extractions and maintain consistency." },
6479
+ { type: "paragraph", text: "Use this endpoint to browse your organization's schemas, search by name, and paginate through large collections. Each schema in the response includes its **field count**, **version number**, and links to related extractions." },
6480
+ { type: "callout", variant: "info", text: "Schemas are organization-scoped. You will only see schemas created by your organization, not other tenants." },
5962
6481
  {
5963
6482
  type: "endpoint",
5964
6483
  method: "GET",
@@ -6052,8 +6571,12 @@ var sections21 = [
6052
6571
  { label: "Create Schema", slug: "create-schema" },
6053
6572
  { label: "Get Schema", slug: "get-schema" }
6054
6573
  ],
6055
- faq: [],
6056
- mentions: ["schemas", "extraction"]
6574
+ faq: [
6575
+ { question: "How do I search for a schema by name?", answer: "Pass the `search` query parameter with a partial name string. The search is case-insensitive and matches any substring of the schema name." },
6576
+ { question: "What is the maximum number of schemas I can retrieve in one request?", answer: "Set `limit` up to 100 per request. Use the `next_cursor` value from the response to paginate through additional results." },
6577
+ { question: "Does listing schemas include the full field definitions?", answer: "Yes. Each schema object in the response includes the complete `definition` with all field properties, types, and required markers." }
6578
+ ],
6579
+ mentions: ["schemas", "extraction", "pagination", "cursor"]
6057
6580
  },
6058
6581
  {
6059
6582
  slug: "get-schema",
@@ -6062,6 +6585,8 @@ var sections21 = [
6062
6585
  seoTitle: "Get Schema Endpoint \u2014 Talonic Docs",
6063
6586
  description: "Retrieve a schema by ID including its full definition with all field names, data types, extraction instructions, and configuration options. Returns version history and extraction count.",
6064
6587
  content: [
6588
+ { type: "paragraph", text: "Retrieve the full details of a single schema, including its **JSON Schema definition**, field count, and version number. Use this to inspect a schema before running extractions or to verify that an update was applied correctly." },
6589
+ { type: "callout", variant: "info", text: "You can pass either a UUID or a `SCH-` prefixed short ID (e.g. `SCH-A1B2C3D4`) as the `:id` path parameter." },
6065
6590
  {
6066
6591
  type: "endpoint",
6067
6592
  method: "GET",
@@ -6132,8 +6657,11 @@ var sections21 = [
6132
6657
  { label: "List Schemas", slug: "list-schemas" },
6133
6658
  { label: "Update Schema", slug: "update-schema" }
6134
6659
  ],
6135
- faq: [],
6136
- mentions: ["schema definition"]
6660
+ faq: [
6661
+ { question: "Can I use the short ID instead of the UUID?", answer: "Yes. Both the full UUID and the `SCH-` prefixed short ID (e.g. `SCH-A1B2C3D4`) are accepted as the `:id` path parameter." },
6662
+ { question: "Does this endpoint return the schema version history?", answer: "The response includes the current `version` number. To see previous versions, compare schemas over time using the `updated_at` timestamp." }
6663
+ ],
6664
+ mentions: ["schema definition", "short ID", "UUID"]
6137
6665
  },
6138
6666
  {
6139
6667
  slug: "create-schema",
@@ -6142,6 +6670,16 @@ var sections21 = [
6142
6670
  seoTitle: "Create Schema Endpoint \u2014 Talonic Docs",
6143
6671
  description: "Create a new schema with a name and definition in any of the three supported formats: JSON Schema, simplified fields, or flat key-type map.",
6144
6672
  content: [
6673
+ { type: "paragraph", text: "Create a new schema to define the fields you want to extract from documents. The schema definition is **normalized** to JSON Schema format on creation, regardless of which input format you use." },
6674
+ { type: "paragraph", text: "Talonic accepts three definition formats to make schema creation flexible. You can pass a full JSON Schema object, a simplified fields array, or a flat key-type map. All three are converted to the same internal representation." },
6675
+ {
6676
+ type: "list",
6677
+ items: [
6678
+ '**JSON Schema** \u2014 Standard `{ "type": "object", "properties": { ... } }` format with full control over types and required fields.',
6679
+ '**Simplified fields** \u2014 An array of `{ "name": "field_name", "type": "string" }` objects for quick creation.',
6680
+ '**Flat key-type map** \u2014 A simple `{ "field_name": "string", "amount": "number" }` object for the fastest path.'
6681
+ ]
6682
+ },
6145
6683
  {
6146
6684
  type: "endpoint",
6147
6685
  method: "POST",
@@ -6219,8 +6757,12 @@ var sections21 = [
6219
6757
  { label: "Schema Formats", slug: "extract-schemas" },
6220
6758
  { label: "List Schemas", slug: "list-schemas" }
6221
6759
  ],
6222
- faq: [],
6223
- mentions: ["create schema"]
6760
+ faq: [
6761
+ { question: "Which schema definition format should I use?", answer: 'Use the flat key-type map (`{ "field": "type" }`) for quick prototyping and JSON Schema for production schemas that need `required` fields or descriptions.' },
6762
+ { question: "What happens if my definition has no valid fields?", answer: "The API returns a `400 validation_error`. Every schema must produce at least one extractable field after normalization." },
6763
+ { question: "Is the version always 1 for new schemas?", answer: "Yes. New schemas always start at version 1. The version increments when you update the schema definition via `PUT /v1/schemas/:id`." }
6764
+ ],
6765
+ mentions: ["create schema", "JSON Schema", "definition format"]
6224
6766
  },
6225
6767
  {
6226
6768
  slug: "update-schema",
@@ -6229,6 +6771,8 @@ var sections21 = [
6229
6771
  seoTitle: "Update Schema Endpoint \u2014 Talonic Docs",
6230
6772
  description: "Replace a schema definition, creating a new version internally. Existing extractions retain their original schema version.",
6231
6773
  content: [
6774
+ { type: "paragraph", text: "Replace a schema's definition, name, or description. Each update creates a **new version** internally, so you can track how a schema evolves over time. Existing extractions are not affected by schema updates \u2014 they retain the schema version that was active when they were created." },
6775
+ { type: "callout", variant: "warning", text: "Updating a schema definition changes the fields used for **future** extractions. Already-completed extractions continue to reference their original schema version." },
6232
6776
  {
6233
6777
  type: "endpoint",
6234
6778
  method: "PUT",
@@ -6307,10 +6851,14 @@ var sections21 = [
6307
6851
  }
6308
6852
  ],
6309
6853
  related: [
6310
- { label: "Get Schema", slug: "get-schema" }
6854
+ { label: "Get Schema", slug: "get-schema" },
6855
+ { label: "Create Schema", slug: "create-schema" }
6311
6856
  ],
6312
- faq: [],
6313
- mentions: ["schema versioning"]
6857
+ faq: [
6858
+ { question: "Do existing extractions change when I update a schema?", answer: "No. Existing extractions retain the schema version that was active when they were created. Only new extractions use the updated definition." },
6859
+ { question: "Can I update just the name without changing the definition?", answer: "Yes. All body parameters are optional. You can send only `name` or only `description` to update those fields without touching the definition." }
6860
+ ],
6861
+ mentions: ["schema versioning", "schema update"]
6314
6862
  },
6315
6863
  {
6316
6864
  slug: "delete-schema",
@@ -6319,6 +6867,8 @@ var sections21 = [
6319
6867
  seoTitle: "Delete Schema Endpoint \u2014 Talonic Docs",
6320
6868
  description: "Delete a schema by ID. Associated extractions are retained and not deleted when the schema is removed.",
6321
6869
  content: [
6870
+ { type: "paragraph", text: "Permanently delete a schema from your organization. This operation is **irreversible**. Associated extractions are retained and remain accessible \u2014 only the schema definition itself is removed." },
6871
+ { type: "callout", variant: "warning", text: "Deleting a schema does not delete its associated extractions. Extraction results remain available via the extractions API." },
6322
6872
  {
6323
6873
  type: "endpoint",
6324
6874
  method: "DELETE",
@@ -6356,10 +6906,14 @@ var sections21 = [
6356
6906
  }
6357
6907
  ],
6358
6908
  related: [
6359
- { label: "List Schemas", slug: "list-schemas" }
6909
+ { label: "List Schemas", slug: "list-schemas" },
6910
+ { label: "Get Schema", slug: "get-schema" }
6360
6911
  ],
6361
- faq: [],
6362
- mentions: ["delete schema"]
6912
+ faq: [
6913
+ { question: "What happens to extractions when I delete a schema?", answer: "Extractions are retained and remain accessible. Only the schema definition is removed. You can still query extraction results via the extractions API." },
6914
+ { question: "Can I recover a deleted schema?", answer: "No. Schema deletion is permanent. If you need the same structure again, create a new schema with the same definition." }
6915
+ ],
6916
+ mentions: ["delete schema", "extraction retention"]
6363
6917
  }
6364
6918
  ];
6365
6919
 
@@ -6373,6 +6927,7 @@ var sections22 = [
6373
6927
  description: "List all extraction jobs with status, progress, and pagination. Filter by status to find queued, processing, completed, failed, or cancelled jobs.",
6374
6928
  content: [
6375
6929
  { type: "paragraph", text: "Jobs track asynchronous extraction work. Create a job with a schema and document set, then poll for progress. Each job runs the full 4-phase extraction pipeline." },
6930
+ { type: "paragraph", text: "The 4-phase pipeline consists of: **Phase 1 (Resolve)** which fills 60-80% of cells using registry transfer, raw extraction mapping, lookup cascades, and deterministic compute; **Phase 2 (Agent)** which uses Claude to extract remaining values; **Phase 3 (Resolve II)** which normalizes LLM-extracted values to canonical codes; and **Phase 4 (Transform)** which applies deterministic transforms, validation, and format constraints." },
6376
6931
  {
6377
6932
  type: "endpoint",
6378
6933
  method: "GET",
@@ -6472,8 +7027,12 @@ var sections22 = [
6472
7027
  { label: "Create Job", slug: "create-job" },
6473
7028
  { label: "Get Job", slug: "get-job" }
6474
7029
  ],
6475
- faq: [],
6476
- mentions: ["jobs", "extraction pipeline"]
7030
+ faq: [
7031
+ { question: "What does fill_rate in grid_stats represent?", answer: "Fill rate is the ratio of filled cells to total cells in the extraction grid (documents x schema fields). A fill rate of 0.94 means 94% of expected field values were successfully extracted." },
7032
+ { question: "What does current_phase tell me?", answer: "It indicates which pipeline phase the job is currently executing: `phase_1_resolve`, `phase_2_execute`, `phase_3_resolve`, or `phase_4_transform`. It is `null` when the job is complete or has not started." },
7033
+ { question: "How does cursor-based pagination work?", answer: "Pass the `next_cursor` value from the previous response as the `cursor` query parameter to fetch the next page. When `has_more` is `false`, you have reached the last page." }
7034
+ ],
7035
+ mentions: ["jobs", "extraction pipeline", "4-phase pipeline", "grid stats"]
6477
7036
  },
6478
7037
  {
6479
7038
  slug: "create-job",
@@ -6482,6 +7041,8 @@ var sections22 = [
6482
7041
  seoTitle: "Create Job Endpoint \u2014 Talonic Docs",
6483
7042
  description: "Create and run an extraction job with a schema and optional document set. Returns a job ID for polling progress and retrieving results.",
6484
7043
  content: [
7044
+ { type: "paragraph", text: "Create a new extraction job targeting a specific schema. The job immediately enters `pending` status and begins processing asynchronously. If `document_ids` is omitted, the job processes all completed documents in your organization. Poll the job status endpoint to track progress." },
7045
+ { type: "callout", variant: "info", text: "Jobs are limited to 2,000 documents per run. If you need to process more, split your document set across multiple jobs." },
6485
7046
  {
6486
7047
  type: "endpoint",
6487
7048
  method: "POST",
@@ -6539,8 +7100,11 @@ var sections22 = [
6539
7100
  { label: "List Jobs", slug: "list-jobs" },
6540
7101
  { label: "Get Job", slug: "get-job" }
6541
7102
  ],
6542
- faq: [],
6543
- mentions: ["create job", "extraction"]
7103
+ faq: [
7104
+ { question: "What happens if I omit document_ids?", answer: "The job processes all documents with `completed` status in your organization. This is convenient for initial runs but may include documents you do not want to process. Use `document_ids` for targeted extraction." },
7105
+ { question: "Can I run multiple jobs simultaneously?", answer: "Yes. Jobs are processed independently. However, concurrent jobs compete for the same compute resources, so running many large jobs in parallel may increase processing times." }
7106
+ ],
7107
+ mentions: ["create job", "extraction", "schema_id"]
6544
7108
  },
6545
7109
  {
6546
7110
  slug: "get-job",
@@ -6549,6 +7113,7 @@ var sections22 = [
6549
7113
  seoTitle: "Get Job Endpoint \u2014 Talonic Docs",
6550
7114
  description: "Get job status, progress percentage, current phase, document counts, grid statistics, and estimated completion time for an extraction job.",
6551
7115
  content: [
7116
+ { type: "paragraph", text: "Retrieve the current status and progress of a specific extraction job. Use this endpoint to poll for completion after creating a job. The `grid_stats` field provides real-time cell fill rates, and `current_phase` indicates which pipeline phase is actively executing." },
6552
7117
  {
6553
7118
  type: "endpoint",
6554
7119
  method: "GET",
@@ -6623,8 +7188,11 @@ var sections22 = [
6623
7188
  { label: "List Jobs", slug: "list-jobs" },
6624
7189
  { label: "Create Job", slug: "create-job" }
6625
7190
  ],
6626
- faq: [],
6627
- mentions: ["job status", "grid stats"]
7191
+ faq: [
7192
+ { question: "How often should I poll for job status?", answer: "Every 2-5 seconds is a reasonable polling interval. The `progress` field updates as cells are filled across pipeline phases, giving you real-time feedback." },
7193
+ { question: "What does it mean when grid_stats is null?", answer: "The grid has not been initialized yet. This happens when the job is still in `pending` status or has just started processing before Phase 1 begins." }
7194
+ ],
7195
+ mentions: ["job status", "grid stats", "polling"]
6628
7196
  },
6629
7197
  {
6630
7198
  slug: "cancel-job",
@@ -6633,6 +7201,8 @@ var sections22 = [
6633
7201
  seoTitle: "Cancel Job Endpoint \u2014 Talonic Docs",
6634
7202
  description: "Cancel a pending or processing job. Returns the final job state with status set to failed and an error message indicating cancellation.",
6635
7203
  content: [
7204
+ { type: "paragraph", text: "Cancel a job that is currently `pending` or `processing`. The job is immediately marked as `failed` with an error message indicating it was cancelled via the API. Any results that were already processed are preserved and can still be retrieved via the results endpoint." },
7205
+ { type: "callout", variant: "info", text: "Cancellation is best-effort. If the job completes between your cancel request and the server processing it, you will receive a `409 conflict` error. Already-completed or already-failed jobs cannot be cancelled." },
6636
7206
  {
6637
7207
  type: "endpoint",
6638
7208
  method: "POST",
@@ -6697,8 +7267,11 @@ var sections22 = [
6697
7267
  { label: "Get Job", slug: "get-job" },
6698
7268
  { label: "List Jobs", slug: "list-jobs" }
6699
7269
  ],
6700
- faq: [],
6701
- mentions: ["cancel job"]
7270
+ faq: [
7271
+ { question: "Can I retrieve partial results from a cancelled job?", answer: "Yes. Any results that were written before cancellation are preserved. Use `GET /v1/jobs/:id/results` to retrieve whatever was processed before the job was stopped." },
7272
+ { question: "Does cancelling a job refund credits?", answer: "No. Credits consumed during the processing that already occurred are not refunded. Only future phases that did not execute are saved." }
7273
+ ],
7274
+ mentions: ["cancel job", "cancellation"]
6702
7275
  },
6703
7276
  {
6704
7277
  slug: "get-job-results",
@@ -6707,6 +7280,8 @@ var sections22 = [
6707
7280
  seoTitle: "Job Results Endpoint \u2014 Talonic Docs",
6708
7281
  description: "Retrieve the extracted result rows for a job. Each row corresponds to one document and contains extracted field values, confidence score, and validation flags.",
6709
7282
  content: [
7283
+ { type: "paragraph", text: "Retrieve the extracted result rows for a completed (or partially completed) job. Each row corresponds to one document and contains the extracted field values keyed by field name, a row-level confidence score, and any validation flags raised during Phase 4. Results are available progressively as each pipeline phase flushes to the database." },
7284
+ { type: "callout", variant: "info", text: "Results are available even while the job is still processing. The grid flushes to the database after each phase, so you can read partial results before the job completes." },
6710
7285
  {
6711
7286
  type: "endpoint",
6712
7287
  method: "GET",
@@ -6795,8 +7370,12 @@ var sections22 = [
6795
7370
  { label: "Get Job", slug: "get-job" },
6796
7371
  { label: "Create Job", slug: "create-job" }
6797
7372
  ],
6798
- faq: [],
6799
- mentions: ["job results", "extracted values", "validation flags"]
7373
+ faq: [
7374
+ { question: "What do validation_flags contain?", answer: "Validation flags are strings indicating data quality issues detected in Phase 4. Common flags include `missing_required_field:<field_name>` and `format_mismatch:<field_name>`. An empty array means all validations passed." },
7375
+ { question: 'What is the difference between status "approved" and "review"?', answer: "Rows with `approved` status passed all validation checks and are ready for delivery. Rows with `review` status have one or more validation flags and may require manual inspection before use." },
7376
+ { question: "Can I get results for a failed job?", answer: "Yes. If the job processed some documents before failing, those partial results are available. Check `job_status` in the response to understand the context." }
7377
+ ],
7378
+ mentions: ["job results", "extracted values", "validation flags", "confidence score"]
6800
7379
  }
6801
7380
  ];
6802
7381
 
@@ -6810,6 +7389,8 @@ var sections23 = [
6810
7389
  description: "List all input sources that group documents by origin. Each source has its own API key for programmatic document ingestion.",
6811
7390
  content: [
6812
7391
  { type: "paragraph", text: "Inputs group documents by origin. Each input source has its own API key for programmatic document ingestion." },
7392
+ { type: "paragraph", text: "Sources are the entry point for document ingestion in Talonic. Each source represents a distinct pipeline or integration \u2014 for example, an invoice processing pipeline or a contract review workflow. Documents ingested through a source inherit its configuration, including any default schema." },
7393
+ { type: "callout", variant: "info", text: "Each source has a unique `endpoint` URL for document ingestion. Use the source-scoped API key (returned at creation) to authenticate uploads to that endpoint." },
6813
7394
  {
6814
7395
  type: "endpoint",
6815
7396
  method: "GET",
@@ -6871,8 +7452,11 @@ var sections23 = [
6871
7452
  { label: "Create Input", slug: "create-source" },
6872
7453
  { label: "Get Source", slug: "get-source" }
6873
7454
  ],
6874
- faq: [],
6875
- mentions: ["sources", "input"]
7455
+ faq: [
7456
+ { question: "What is the difference between a source and a schema?", answer: "A source groups documents by origin (e.g. an API integration or upload pipeline). A schema defines the fields to extract from documents. A source can optionally have a default schema applied to all its documents." },
7457
+ { question: "Can I see the API key for an existing source?", answer: "No. The source-scoped API key is only shown once in the creation response. If lost, delete the source and create a new one." }
7458
+ ],
7459
+ mentions: ["sources", "input", "document ingestion"]
6876
7460
  },
6877
7461
  {
6878
7462
  slug: "create-source",
@@ -6881,6 +7465,8 @@ var sections23 = [
6881
7465
  seoTitle: "Create Source Endpoint \u2014 Talonic Docs",
6882
7466
  description: "Create a new input source and receive a source-scoped API key. The key is only shown once in the creation response \u2014 store it securely.",
6883
7467
  content: [
7468
+ { type: "paragraph", text: "Create a new source to start ingesting documents. The response includes a **source-scoped API key** (`tlnc_sk_*`) that authenticates uploads to this source's endpoint. This key is shown only once \u2014 store it securely immediately after creation." },
7469
+ { type: "callout", variant: "warning", text: "The `api_key` is only returned in the creation response. It cannot be retrieved later. If you lose it, delete the source and create a new one." },
6884
7470
  {
6885
7471
  type: "endpoint",
6886
7472
  method: "POST",
@@ -6948,10 +7534,14 @@ var sections23 = [
6948
7534
  }
6949
7535
  ],
6950
7536
  related: [
6951
- { label: "List Inputs", slug: "list-sources" }
7537
+ { label: "List Inputs", slug: "list-sources" },
7538
+ { label: "Source Documents", slug: "update-source" }
6952
7539
  ],
6953
- faq: [],
6954
- mentions: ["API key", "source creation"]
7540
+ faq: [
7541
+ { question: "What source types are available?", answer: "The `type` field accepts `api` (programmatic ingestion via REST), `upload` (manual file uploads), and `connector` (third-party integrations like Google Drive or SharePoint)." },
7542
+ { question: "Can I set a default schema when creating a source?", answer: "Yes. Pass a `default_schema_id` in the request body to automatically apply a schema to all documents ingested through this source." }
7543
+ ],
7544
+ mentions: ["API key", "source creation", "source-scoped key"]
6955
7545
  },
6956
7546
  {
6957
7547
  slug: "get-source",
@@ -6960,6 +7550,7 @@ var sections23 = [
6960
7550
  seoTitle: "Manage Source Endpoint \u2014 Talonic Docs",
6961
7551
  description: "Get source details, update a source name, or delete a source. Documents are retained but unlinked when a source is deleted.",
6962
7552
  content: [
7553
+ { type: "paragraph", text: "Manage an individual source with GET, PATCH, and DELETE operations on the same path. Retrieve source details, update its name, or permanently delete it. When a source is deleted, its documents are **retained** but unlinked from the source." },
6963
7554
  {
6964
7555
  type: "endpoint",
6965
7556
  method: "GET",
@@ -7108,10 +7699,14 @@ var sections23 = [
7108
7699
  }
7109
7700
  ],
7110
7701
  related: [
7111
- { label: "List Inputs", slug: "list-sources" }
7702
+ { label: "List Inputs", slug: "list-sources" },
7703
+ { label: "Source Documents", slug: "update-source" }
7112
7704
  ],
7113
- faq: [],
7114
- mentions: ["source management"]
7705
+ faq: [
7706
+ { question: "What happens to documents when I delete a source?", answer: "Documents are retained in your organization but unlinked from the deleted source. They remain accessible via the documents API and any existing extractions are preserved." },
7707
+ { question: "Does deleting a source revoke its API key?", answer: "Yes. The source-scoped API key is immediately invalidated when the source is deleted. Any subsequent upload attempts using that key will return 401." }
7708
+ ],
7709
+ mentions: ["source management", "source deletion", "document retention"]
7115
7710
  },
7116
7711
  {
7117
7712
  slug: "update-source",
@@ -7120,6 +7715,9 @@ var sections23 = [
7120
7715
  seoTitle: "Source Documents Endpoint \u2014 Talonic Docs",
7121
7716
  description: "Ingest documents into a specific source or list documents belonging to a source. Supports batch processing mode at 50% cost discount.",
7122
7717
  content: [
7718
+ { type: "paragraph", text: "Ingest documents into a source for processing, or list all documents that belong to a source. The ingestion endpoint accepts a file upload or a URL, processes the document through the extraction pipeline, and returns the document ID for status tracking." },
7719
+ { type: "paragraph", text: "Documents can be processed in **realtime** (default, results in seconds) or **batch** mode (50% cost discount, results within 48 hours). Duplicate files are detected via SHA-256 hash and rejected with a `duplicate` status." },
7720
+ { type: "callout", variant: "info", text: "Batch processing mode reduces cost by 50% but delivers results within 48 hours. Use `processing_mode=batch` for large ingestion jobs where latency is not critical." },
7123
7721
  {
7124
7722
  type: "endpoint",
7125
7723
  method: "POST",
@@ -7254,8 +7852,12 @@ var sections23 = [
7254
7852
  { label: "List Inputs", slug: "list-sources" },
7255
7853
  { label: "List Documents", slug: "list-documents" }
7256
7854
  ],
7257
- faq: [],
7258
- mentions: ["document ingestion", "batch processing"]
7855
+ faq: [
7856
+ { question: "What happens if I upload a duplicate file?", answer: 'Duplicate files are detected via SHA-256 hash comparison. The response returns `status: "duplicate"` with `existing_document_id` pointing to the original document. No new document is created.' },
7857
+ { question: "Can I use both file and file_url in the same request?", answer: "No. Provide either `file` (binary upload) or `file_url` (URL fetch), but not both. The API returns a 400 error if neither is provided." },
7858
+ { question: "What file formats are supported for document ingestion?", answer: "Talonic supports 25+ formats including PDF, DOCX, XLSX, CSV, PPTX, MSG, EML, PNG, JPG, HTML, XML, JSON, and more. See the supported file types documentation for the full list." }
7859
+ ],
7860
+ mentions: ["document ingestion", "batch processing", "duplicate detection", "file upload"]
7259
7861
  }
7260
7862
  ];
7261
7863
 
@@ -7269,6 +7871,7 @@ var sections24 = [
7269
7871
  description: "Autocomplete field names from the registry ranked by relevance and occurrence count. Power field picker UIs with type-ahead search.",
7270
7872
  content: [
7271
7873
  { type: "paragraph", text: "Search and filter documents by their extracted field values. Includes field autocomplete, document filtering with composable conditions, global omnisearch, and saved filter management." },
7874
+ { type: "paragraph", text: "The field autocomplete endpoint enables type-ahead search over the field registry. Results are ranked by a combination of name relevance and occurrence count, making frequently seen fields surface first. Use this to power field picker dropdowns and search-as-you-type UIs." },
7272
7875
  {
7273
7876
  type: "endpoint",
7274
7877
  method: "GET",
@@ -7335,8 +7938,12 @@ var sections24 = [
7335
7938
  { label: "Field Values", slug: "field-values" },
7336
7939
  { label: "Filter Documents", slug: "filter-documents" }
7337
7940
  ],
7338
- faq: [],
7339
- mentions: ["autocomplete", "field registry"]
7941
+ faq: [
7942
+ { question: "How does the ranking work?", answer: "Results are ranked by a combined score of name match relevance (canonical name, display name, and alias matches) and `occurrenceCount`. Fields that appear in more documents rank higher for equivalent relevance." },
7943
+ { question: "What does the tier field represent?", answer: "Tier 1 fields have high confidence and consistent extraction across documents. Higher tiers indicate lower confidence or less frequent occurrence. Use tier to decide which fields to display prominently." },
7944
+ { question: "Can I search by alias names?", answer: "Yes. The autocomplete matches against canonical names, display names, and registered aliases. The `matchSource` field indicates how the match was made." }
7945
+ ],
7946
+ mentions: ["autocomplete", "field registry", "type-ahead search"]
7340
7947
  },
7341
7948
  {
7342
7949
  slug: "field-values",
@@ -7345,6 +7952,7 @@ var sections24 = [
7345
7952
  seoTitle: "Field Values Endpoint \u2014 Talonic Docs",
7346
7953
  description: "List distinct values for a field across documents with counts. Useful for building filter dropdowns and faceted search interfaces.",
7347
7954
  content: [
7955
+ { type: "paragraph", text: "Retrieve the distinct values for a specific field across all documents in your workspace. Results are sorted by count descending, making the most common values appear first. Use this to populate filter dropdowns, build faceted search interfaces, or analyze value distributions for data quality." },
7348
7956
  {
7349
7957
  type: "endpoint",
7350
7958
  method: "GET",
@@ -7400,8 +8008,11 @@ var sections24 = [
7400
8008
  { label: "Field Autocomplete", slug: "field-autocomplete" },
7401
8009
  { label: "Filter Documents", slug: "filter-documents" }
7402
8010
  ],
7403
- faq: [],
7404
- mentions: ["field values", "faceted search"]
8011
+ faq: [
8012
+ { question: "Are values case-sensitive?", answer: 'Values are returned as extracted. The `q` substring filter is case-insensitive, so searching for "acme" will match "Acme Corp".' },
8013
+ { question: "What does totalDistinct represent when a limit is applied?", answer: 'It shows the total number of unique values for this field across all documents, regardless of the `limit` parameter. Use it to indicate "showing 20 of 156 values" in your UI.' }
8014
+ ],
8015
+ mentions: ["field values", "faceted search", "value distribution"]
7405
8016
  },
7406
8017
  {
7407
8018
  slug: "filter-documents",
@@ -7410,6 +8021,8 @@ var sections24 = [
7410
8021
  seoTitle: "Filter Documents Endpoint \u2014 Talonic Docs",
7411
8022
  description: "Filter documents by field value conditions using composable operators including equality, comparison, range, containment, and emptiness checks.",
7412
8023
  content: [
8024
+ { type: "paragraph", text: "Filter documents by composing conditions on extracted field values. Each condition targets a specific field and applies an operator to test its value. Multiple conditions are AND-combined. The endpoint also supports free-text search across document content and sorting by any field." },
8025
+ { type: "callout", variant: "info", text: "Field names in conditions must be valid field registry IDs (e.g. `fld_a1b2c3d4`). Use the field autocomplete endpoint to discover available field IDs for your workspace." },
7413
8026
  {
7414
8027
  type: "endpoint",
7415
8028
  method: "POST",
@@ -7497,8 +8110,12 @@ var sections24 = [
7497
8110
  { label: "Field Autocomplete", slug: "field-autocomplete" },
7498
8111
  { label: "Omnisearch", slug: "omnisearch" }
7499
8112
  ],
7500
- faq: [],
7501
- mentions: ["filter", "conditions", "operators"]
8113
+ faq: [
8114
+ { question: "How do I use the between operator?", answer: 'Provide both `value` (lower bound, inclusive) and `valueTo` (upper bound, inclusive) in the condition. Works with dates, numbers, and strings. Example: `{ "fieldId": "fld_x", "operator": "between", "value": "2024-01-01", "valueTo": "2024-12-31" }`.' },
8115
+ { question: "What happens if a document does not have a value for a filtered field?", answer: "Documents missing the filtered field are excluded from results unless you use the `is_empty` operator, which explicitly matches documents where the field is null or absent." },
8116
+ { question: "Can I combine free-text search with field conditions?", answer: "Yes. Set the `search` parameter alongside `conditions`. Both are AND-combined \u2014 documents must match the search text and all conditions." }
8117
+ ],
8118
+ mentions: ["filter", "conditions", "operators", "composable filters"]
7502
8119
  },
7503
8120
  {
7504
8121
  slug: "filter-documents-export",
@@ -7507,6 +8124,7 @@ var sections24 = [
7507
8124
  seoTitle: "Omnisearch Endpoint \u2014 Talonic Docs",
7508
8125
  description: "Global omnisearch across documents, fields, sources, and schemas. Unified search endpoint that powers the Cmd+K search experience.",
7509
8126
  content: [
8127
+ { type: "paragraph", text: "The omnisearch endpoint provides a unified search across all entity types in your workspace: documents, field values, source connections, schemas, and registry fields. A single query returns categorized results, making it ideal for building global search UIs like Cmd+K palettes." },
7510
8128
  {
7511
8129
  type: "endpoint",
7512
8130
  method: "GET",
@@ -7576,8 +8194,11 @@ var sections24 = [
7576
8194
  { label: "Filter Documents", slug: "filter-documents" },
7577
8195
  { label: "Field Autocomplete", slug: "field-autocomplete" }
7578
8196
  ],
7579
- faq: [],
7580
- mentions: ["omnisearch", "Cmd+K"]
8197
+ faq: [
8198
+ { question: "Does omnisearch return results from all entity types in every call?", answer: "Yes. Every call searches documents, field values, sources, schemas, and registry fields simultaneously. Empty categories are returned as empty arrays." },
8199
+ { question: "How is the limit parameter applied?", answer: "The `limit` applies independently to each entity type. Setting `limit=5` returns up to 5 documents, 5 field matches, 5 sources, 5 schemas, and 5 fields." }
8200
+ ],
8201
+ mentions: ["omnisearch", "Cmd+K", "unified search"]
7581
8202
  },
7582
8203
  {
7583
8204
  slug: "omnisearch",
@@ -7586,6 +8207,7 @@ var sections24 = [
7586
8207
  seoTitle: "Saved Filters Endpoints \u2014 Talonic Docs",
7587
8208
  description: "Create, list, and delete saved filter configurations for reuse. Persist filter conditions, search terms, and sort settings.",
7588
8209
  content: [
8210
+ { type: "paragraph", text: "Saved filters persist reusable filter configurations so you can apply the same conditions, search terms, and sort settings across multiple sessions. Create a saved filter from any combination of conditions, then reload it later without re-specifying each parameter." },
7589
8211
  {
7590
8212
  type: "endpoint",
7591
8213
  method: "GET",
@@ -7719,8 +8341,11 @@ var sections24 = [
7719
8341
  related: [
7720
8342
  { label: "Filter Documents", slug: "filter-documents" }
7721
8343
  ],
7722
- faq: [],
7723
- mentions: ["saved filters"]
8344
+ faq: [
8345
+ { question: "Can I update a saved filter?", answer: "There is no PUT endpoint for saved filters. To update, delete the existing filter and create a new one with the desired configuration." },
8346
+ { question: "Are saved filters shared across team members?", answer: "Yes. Saved filters are scoped to the organization, so all team members with read access can list and use them." }
8347
+ ],
8348
+ mentions: ["saved filters", "reusable filters"]
7724
8349
  },
7725
8350
  {
7726
8351
  slug: "saved-filters",
@@ -7734,8 +8359,11 @@ var sections24 = [
7734
8359
  related: [
7735
8360
  { label: "Filter Documents", slug: "filter-documents" }
7736
8361
  ],
7737
- faq: [],
7738
- mentions: ["document counts", "faceted navigation"]
8362
+ faq: [
8363
+ { question: "Does the document counts endpoint support the same conditions as the filter endpoint?", answer: "Yes. The conditions format is identical to the filter documents endpoint. You can reuse the same condition arrays to get counts before fetching the actual documents." },
8364
+ { question: "Can I use document counts without any conditions?", answer: "Yes. Omitting conditions returns the total document count across all sources, which is useful for dashboard overview widgets." }
8365
+ ],
8366
+ mentions: ["document counts", "faceted navigation", "aggregate counts"]
7739
8367
  },
7740
8368
  {
7741
8369
  slug: "document-counts",
@@ -7744,6 +8372,8 @@ var sections24 = [
7744
8372
  seoTitle: "Materialize Index Endpoint \u2014 Talonic Docs",
7745
8373
  description: "Trigger materialization backfill for filter indexes. Rebuilds the materialized field value index used by filter queries after bulk ingestion.",
7746
8374
  content: [
8375
+ { type: "paragraph", text: "Trigger a rebuild of the materialized field value index. The materialized index pre-computes extracted field values for every document, enabling sub-second filter queries even on large workspaces. Run this endpoint after bulk document ingestion or schema changes to ensure filter results are current." },
8376
+ { type: "callout", variant: "warning", text: "Materialization can be resource-intensive on large workspaces. Avoid calling this endpoint during peak usage. For incremental ingestion, the index updates automatically per document." },
7747
8377
  {
7748
8378
  type: "endpoint",
7749
8379
  method: "POST",
@@ -7780,8 +8410,11 @@ var sections24 = [
7780
8410
  related: [
7781
8411
  { label: "Filter Documents", slug: "filter-documents" }
7782
8412
  ],
7783
- faq: [],
7784
- mentions: ["materialized index", "backfill"]
8413
+ faq: [
8414
+ { question: "When do I need to manually trigger materialization?", answer: "Only after bulk ingestion (e.g. uploading hundreds of documents at once). For normal single-document uploads, the index is updated automatically during post-extraction processing." },
8415
+ { question: "Is materialization idempotent?", answer: "Yes. Running it multiple times produces the same result. Existing materialized values are upserted, not duplicated." }
8416
+ ],
8417
+ mentions: ["materialized index", "backfill", "index rebuild"]
7785
8418
  },
7786
8419
  {
7787
8420
  slug: "materialized-index",
@@ -7795,8 +8428,11 @@ var sections24 = [
7795
8428
  related: [
7796
8429
  { label: "Materialize", slug: "document-counts" }
7797
8430
  ],
7798
- faq: [],
7799
- mentions: ["materialized index"]
8431
+ faq: [
8432
+ { question: "What happens if the materialized index is out of date?", answer: "Filter queries may return stale results or miss recently ingested documents. Trigger a materialization backfill via `POST /filter/materialize` to rebuild the index." },
8433
+ { question: "Does the materialized index affect omnisearch results?", answer: "Yes. Omnisearch queries field values from the same materialized index. An out-of-date index may cause missing results in both filter and omnisearch endpoints." }
8434
+ ],
8435
+ mentions: ["materialized index", "filter performance"]
7800
8436
  }
7801
8437
  ];
7802
8438
 
@@ -7972,6 +8608,7 @@ var sections26 = [
7972
8608
  description: "List resolution runs that apply field normalization, lookup cascades, and value transforms to extracted data. Supports cursor-based pagination.",
7973
8609
  content: [
7974
8610
  { type: "paragraph", text: "Resolution runs apply field normalization, lookup cascades, and value transforms to extracted data. Create a resolution from a completed job run to standardise field values against reference data." },
8611
+ { type: "paragraph", text: 'The resolution pipeline maps raw extracted values (e.g. "Deutschland") to canonical forms (e.g. "DE") using a 3-tier lookup cascade: string normalization, token-based fuzzy matching, and an LLM fallback. Each resolution run captures a snapshot of the active resolution policy and dialect at run time for reproducibility.' },
7975
8612
  {
7976
8613
  type: "endpoint",
7977
8614
  method: "GET",
@@ -8046,8 +8683,12 @@ var sections26 = [
8046
8683
  { label: "Create Resolution", slug: "create-resolution" },
8047
8684
  { label: "Get Resolution Results", slug: "get-resolution-results" }
8048
8685
  ],
8049
- faq: [],
8050
- mentions: ["resolution", "normalization", "lookup cascade"]
8686
+ faq: [
8687
+ { question: "What is the difference between a job run and a resolution run?", answer: "A job run extracts raw field values from documents. A resolution run takes those raw values and normalizes them to canonical forms using lookup tables, fuzzy matching, and LLM fallbacks." },
8688
+ { question: "What are policy_snapshot and dialect_snapshot?", answer: "These capture the resolution configuration at run time. The policy snapshot records which normalization strategies were active. The dialect snapshot records output format preferences (e.g. date format, number locale). Both ensure runs are reproducible." },
8689
+ { question: "Can I filter resolutions by the originating job?", answer: "Yes. Use the `source_run_id` query parameter to find all resolution runs derived from a specific job run." }
8690
+ ],
8691
+ mentions: ["resolution", "normalization", "lookup cascade", "canonical forms"]
8051
8692
  },
8052
8693
  {
8053
8694
  slug: "create-resolution",
@@ -8056,6 +8697,8 @@ var sections26 = [
8056
8697
  seoTitle: "Create Resolution Endpoint \u2014 Talonic Docs",
8057
8698
  description: "Create a resolution run from a completed job to standardise extracted field values against reference data using lookup cascades and transforms.",
8058
8699
  content: [
8700
+ { type: "paragraph", text: "Create a new resolution run targeting documents from a completed job run. The resolution enters `pending` status immediately. Call the execute endpoint to start processing, or it will be picked up automatically depending on your pipeline configuration." },
8701
+ { type: "callout", variant: "info", text: "The `source_run_id` must reference a completed job run. Creating a resolution against a pending or failed run returns a `404` error." },
8059
8702
  {
8060
8703
  type: "endpoint",
8061
8704
  method: "POST",
@@ -8130,8 +8773,11 @@ var sections26 = [
8130
8773
  { label: "List Resolutions", slug: "list-resolutions" },
8131
8774
  { label: "Execute Resolution", slug: "execute-resolution" }
8132
8775
  ],
8133
- faq: [],
8134
- mentions: ["create resolution"]
8776
+ faq: [
8777
+ { question: "Do I need to call execute after creating a resolution?", answer: "Yes. Creating a resolution only sets it to `pending`. Call `POST /v1/resolutions/{id}/execute` to start the resolution pipeline." },
8778
+ { question: "Can I create multiple resolutions from the same job run?", answer: "Yes. Each resolution run is independent and produces its own set of results. This is useful for testing different resolution policies." }
8779
+ ],
8780
+ mentions: ["create resolution", "source_run_id"]
8135
8781
  },
8136
8782
  {
8137
8783
  slug: "get-resolution",
@@ -8140,6 +8786,7 @@ var sections26 = [
8140
8786
  seoTitle: "Get Resolution Endpoint \u2014 Talonic Docs",
8141
8787
  description: "Retrieve a resolution run by ID with its current status, document count, completion timestamp, and links to results. Requires read scope for the workspace.",
8142
8788
  content: [
8789
+ { type: "paragraph", text: "Retrieve the current status and metadata of a specific resolution run. Use this endpoint to poll for completion after executing a resolution. The response includes links to the results endpoint where you can inspect per-field resolved values." },
8143
8790
  {
8144
8791
  type: "endpoint",
8145
8792
  method: "GET",
@@ -8198,8 +8845,11 @@ var sections26 = [
8198
8845
  { label: "List Resolutions", slug: "list-resolutions" },
8199
8846
  { label: "Get Resolution Results", slug: "get-resolution-results" }
8200
8847
  ],
8201
- faq: [],
8202
- mentions: ["resolution status"]
8848
+ faq: [
8849
+ { question: "How long does a resolution run typically take?", answer: "Resolution runs are fast for purely deterministic lookups (seconds). Runs that require LLM fallback for ambiguous values take longer, typically 1-5 minutes depending on the number of unresolved fields." },
8850
+ { question: "What does a failed resolution status mean?", answer: "A `failed` status indicates the resolution pipeline encountered an unrecoverable error. Check the `error_message` field for details. You can delete the failed run and create a new one." }
8851
+ ],
8852
+ mentions: ["resolution status", "polling"]
8203
8853
  },
8204
8854
  {
8205
8855
  slug: "get-resolution-results",
@@ -8208,6 +8858,8 @@ var sections26 = [
8208
8858
  seoTitle: "Get Resolution Results \u2014 Talonic Docs",
8209
8859
  description: "Get per-field resolution results showing original values, resolved values, resolution step used, and confidence scores for each document.",
8210
8860
  content: [
8861
+ { type: "paragraph", text: "Retrieve the per-field resolution results for a completed run. Each result shows the original extracted value alongside the resolved canonical value, the resolution strategy that produced the match, and a confidence score. Use this to audit how values were normalized and identify fields that may need manual review." },
8862
+ { type: "callout", variant: "info", text: "The `resolution_step` field indicates which strategy produced the resolved value: `lookup` (direct table match), `transfer` (registry transfer), `compute` (deterministic computation), or `null` if no resolution was applied." },
8211
8863
  {
8212
8864
  type: "endpoint",
8213
8865
  method: "GET",
@@ -8266,8 +8918,11 @@ var sections26 = [
8266
8918
  related: [
8267
8919
  { label: "Get Resolution", slug: "get-resolution" }
8268
8920
  ],
8269
- faq: [],
8270
- mentions: ["resolution results", "resolved values"]
8921
+ faq: [
8922
+ { question: "What does it mean when resolved_value is null?", answer: "A null `resolved_value` means no resolution strategy could match the original value to a canonical form. The field retains its raw extracted value. Consider adding the value to a lookup table for future runs." },
8923
+ { question: "What confidence scores should I expect from each resolution step?", answer: "Direct `lookup` matches typically score 0.95-1.0. Fuzzy token matches score 0.7-0.95. LLM fallback matches score 0.5-0.8. Values below 0.5 usually indicate low-confidence guesses." }
8924
+ ],
8925
+ mentions: ["resolution results", "resolved values", "original values", "resolution step"]
8271
8926
  },
8272
8927
  {
8273
8928
  slug: "execute-resolution",
@@ -8276,6 +8931,8 @@ var sections26 = [
8276
8931
  seoTitle: "Execute Resolution Endpoint \u2014 Talonic Docs",
8277
8932
  description: "Execute the resolution pipeline on all pending fields. Returns immediately \u2014 poll the run for progress. Requires write scope.",
8278
8933
  content: [
8934
+ { type: "paragraph", text: "Start the resolution pipeline on a pending resolution run. The endpoint returns immediately with the updated run status (typically `running`). Poll the get resolution endpoint to track progress and check for completion." },
8935
+ { type: "callout", variant: "warning", text: "Executing a resolution that is already `running` or `completed` has no effect. Only `pending` runs can be executed." },
8279
8936
  {
8280
8937
  type: "endpoint",
8281
8938
  method: "POST",
@@ -8334,8 +8991,11 @@ var sections26 = [
8334
8991
  { label: "Create Resolution", slug: "create-resolution" },
8335
8992
  { label: "Get Resolution Results", slug: "get-resolution-results" }
8336
8993
  ],
8337
- faq: [],
8338
- mentions: ["execute resolution"]
8994
+ faq: [
8995
+ { question: "Is the execute call synchronous?", answer: "No. The endpoint returns immediately with status `running`. Poll `GET /v1/resolutions/{id}` to track progress and detect completion." },
8996
+ { question: "What happens if execution fails?", answer: "The run status transitions to `failed` and the `error_message` field is populated. You can delete the failed run and create a new one to retry." }
8997
+ ],
8998
+ mentions: ["execute resolution", "start resolution"]
8339
8999
  },
8340
9000
  {
8341
9001
  slug: "cancel-resolution",
@@ -8344,6 +9004,8 @@ var sections26 = [
8344
9004
  seoTitle: "Delete Resolution Endpoint \u2014 Talonic Docs",
8345
9005
  description: "Delete a resolution run and its results. Requires write scope. This action permanently removes all resolution data.",
8346
9006
  content: [
9007
+ { type: "paragraph", text: "Permanently delete a resolution run and all its associated results. This action is irreversible. Use this to clean up failed runs, remove outdated resolutions, or free up storage. The originating job run and its results are not affected." },
9008
+ { type: "callout", variant: "warning", text: "Deletion is permanent. All per-field resolution results associated with this run are removed. The source job run and its extracted data are unaffected." },
8347
9009
  {
8348
9010
  type: "endpoint",
8349
9011
  method: "DELETE",
@@ -8381,8 +9043,11 @@ var sections26 = [
8381
9043
  related: [
8382
9044
  { label: "List Resolutions", slug: "list-resolutions" }
8383
9045
  ],
8384
- faq: [],
8385
- mentions: ["delete resolution"]
9046
+ faq: [
9047
+ { question: "Can I delete a resolution that is currently running?", answer: "Yes. The delete operation cancels the running resolution and removes all data. However, it is recommended to wait for completion or failure before deleting to avoid race conditions." },
9048
+ { question: "Does deleting a resolution affect the source job run?", answer: "No. The source job run, its documents, and its extracted values are completely unaffected by resolution deletion." }
9049
+ ],
9050
+ mentions: ["delete resolution", "permanent deletion"]
8386
9051
  }
8387
9052
  ];
8388
9053
 
@@ -8396,6 +9061,8 @@ var sections27 = [
8396
9061
  description: "List all discovered link keys with their classification category and frequency. Link keys are field-level entity identifiers used for document linking.",
8397
9062
  content: [
8398
9063
  { type: "paragraph", text: "The linking graph connects documents through shared entity values \u2014 an invoice and a contract sharing the same customer ID are linked. The API exposes the bipartite document-entity graph: link keys (field-level entity identifiers), document-level links, the full graph, document-centric subgraphs, classification (identity, transaction, reference), backfill, and document-to-case mapping." },
9064
+ { type: "paragraph", text: "Link keys are the fields that the linking engine uses to discover connections between documents. Each link key has a **classification category** \u2014 `identity` (e.g. vendor ID, customer number), `transaction` (e.g. invoice number, PO number), or `reference` (e.g. contract reference). Use this endpoint to inspect which fields are currently recognized as link keys and how frequently they appear across your documents." },
9065
+ { type: "callout", variant: "info", text: "Link keys are discovered automatically during extraction. If a field is not yet classified, use the **Classify** endpoint to trigger AI-based classification on ambiguous fields." },
8399
9066
  {
8400
9067
  type: "endpoint",
8401
9068
  method: "GET",
@@ -8457,9 +9124,11 @@ var sections27 = [
8457
9124
  { label: "List Cases", slug: "list-cases" }
8458
9125
  ],
8459
9126
  faq: [
8460
- { question: "What are link keys in Talonic?", answer: "Link keys are field-level entity identifiers (e.g. customer_id, invoice_number) used to connect documents that share the same value." }
9127
+ { question: "What are link keys in Talonic?", answer: "Link keys are field-level entity identifiers (e.g. customer_id, invoice_number) used to connect documents that share the same value." },
9128
+ { question: "What is the difference between identity, transaction, and reference link keys?", answer: "Identity keys represent stable entity identifiers (e.g. vendor ID, tax number). Transaction keys are document-specific identifiers (e.g. invoice number, PO number). Reference keys are cross-references between documents (e.g. contract reference cited in an invoice)." },
9129
+ { question: "How are link keys discovered?", answer: "Link keys are discovered automatically during document extraction. Fields are classified using rule-based heuristics first, then an LLM call for ambiguous cases. You can also trigger classification manually via the Classify endpoint." }
8461
9130
  ],
8462
- mentions: ["link keys", "bipartite graph", "entity linking"]
9131
+ mentions: ["link keys", "bipartite graph", "entity linking", "field registry", "link key category"]
8463
9132
  },
8464
9133
  {
8465
9134
  slug: "reclassify-link-key",
@@ -8468,6 +9137,8 @@ var sections27 = [
8468
9137
  seoTitle: "Classify Link Keys \u2014 Talonic Docs",
8469
9138
  description: "Classify link keys into categories (identity, transaction, reference) using AI. Runs asynchronously on ambiguous fields.",
8470
9139
  content: [
9140
+ { type: "paragraph", text: "When new fields are extracted, some may not be automatically classified as link keys. The classify endpoint runs AI-powered classification on ambiguous fields to determine whether they are **identity**, **transaction**, or **reference** link keys. This is useful after onboarding new document types or when the field registry grows." },
9141
+ { type: "callout", variant: "info", text: "Classification uses a two-pass approach: rule-based heuristics handle obvious cases (e.g. fields named `invoice_number`), then an LLM call classifies the remaining ambiguous fields. A backfill is automatically triggered when new link keys are identified." },
8471
9142
  {
8472
9143
  type: "endpoint",
8473
9144
  method: "POST",
@@ -8504,10 +9175,14 @@ var sections27 = [
8504
9175
  }
8505
9176
  ],
8506
9177
  related: [
8507
- { label: "Link Keys", slug: "list-link-keys" }
9178
+ { label: "Link Keys", slug: "list-link-keys" },
9179
+ { label: "Backfill", slug: "list-cases" }
8508
9180
  ],
8509
- faq: [],
8510
- mentions: ["AI classification", "link key categories"]
9181
+ faq: [
9182
+ { question: "Does classification run synchronously?", answer: "The endpoint returns immediately with the count of classified fields. If new link keys are found, a backfill is triggered asynchronously to update entity links across all documents." },
9183
+ { question: "Can I reclassify an already-classified link key?", answer: "The classify endpoint targets unclassified or ambiguous fields. Already-classified link keys are not re-evaluated unless their category is null." }
9184
+ ],
9185
+ mentions: ["AI classification", "link key categories", "rule-based classification", "LLM classification"]
8511
9186
  },
8512
9187
  {
8513
9188
  slug: "list-entities",
@@ -8516,6 +9191,8 @@ var sections27 = [
8516
9191
  seoTitle: "Document Links Endpoint \u2014 Talonic Docs",
8517
9192
  description: "Get all entity links for a specific document showing entity values, types, link keys, and linked document IDs.",
8518
9193
  content: [
9194
+ { type: "paragraph", text: "Retrieve all entity links discovered for a specific document. Each link represents a shared field value \u2014 such as a customer ID or PO number \u2014 that connects this document to others in the workspace. Use this endpoint to understand how a document relates to the rest of your corpus." },
9195
+ { type: "callout", variant: "info", text: "The `document_count` field on each entity indicates how many documents share that value. A high count on an identity entity (e.g. a vendor ID appearing in 50+ documents) is expected, while a high count on a transaction entity may indicate a data quality issue." },
8519
9196
  {
8520
9197
  type: "endpoint",
8521
9198
  method: "GET",
@@ -8572,10 +9249,14 @@ var sections27 = [
8572
9249
  ],
8573
9250
  related: [
8574
9251
  { label: "Full Graph", slug: "list-linked-documents" },
8575
- { label: "Link Keys", slug: "list-link-keys" }
9252
+ { label: "Link Keys", slug: "list-link-keys" },
9253
+ { label: "Document-Case Map", slug: "refresh-cases" }
8576
9254
  ],
8577
- faq: [],
8578
- mentions: ["document links"]
9255
+ faq: [
9256
+ { question: "What does document_count represent?", answer: 'The number of documents in your workspace that share the same entity value for that field. For example, if three invoices reference vendor ID "ACME-001", the document_count is 3.' },
9257
+ { question: "Can a document have zero links?", answer: "Yes. Documents that have no extracted field values matching other documents will return an empty data array. These appear as unlinked in the graph." }
9258
+ ],
9259
+ mentions: ["document links", "entity values", "shared fields"]
8579
9260
  },
8580
9261
  {
8581
9262
  slug: "list-linked-documents",
@@ -8584,6 +9265,15 @@ var sections27 = [
8584
9265
  seoTitle: "Linking Graph Endpoint \u2014 Talonic Docs",
8585
9266
  description: "Get the full document linking graph as nodes and edges, or get the graph neighbourhood for a single document with configurable traversal depth.",
8586
9267
  content: [
9268
+ { type: "paragraph", text: "The linking graph is a **bipartite graph** with two node types: documents and entities. Edges connect documents to the entity values they share. This endpoint returns the complete graph for your workspace, including detected cases (groups of documents linked through transaction or reference entities) and entity groups (documents linked only through identity entities)." },
9269
+ { type: "callout", variant: "warning", text: "The full graph endpoint can return large payloads for workspaces with many documents. For targeted exploration, use the document subgraph endpoint with a configurable `depth` parameter instead." },
9270
+ { type: "list", ordered: false, items: [
9271
+ "**Nodes** represent documents (with filename and detected type) or entities (with value and link key category)",
9272
+ "**Edges** connect a document to an entity through a specific field key",
9273
+ "**Cases** are connected components containing transaction or reference entities (2+ documents)",
9274
+ "**Entity groups** are components linked only through identity entities",
9275
+ "**Excluded entities** are high-frequency or owner entities removed from BFS case detection to avoid merging unrelated cases"
9276
+ ] },
8587
9277
  {
8588
9278
  type: "endpoint",
8589
9279
  method: "GET",
@@ -8654,6 +9344,7 @@ var sections27 = [
8654
9344
  }
8655
9345
  }`
8656
9346
  },
9347
+ { type: "paragraph", text: "To explore the graph from a single document outward, use the document subgraph endpoint below. The `depth` parameter controls how many hops to traverse \u2014 each hop alternates between document and entity nodes." },
8657
9348
  {
8658
9349
  type: "endpoint",
8659
9350
  method: "GET",
@@ -8715,8 +9406,11 @@ var sections27 = [
8715
9406
  { label: "Document Links", slug: "list-entities" },
8716
9407
  { label: "Document-Case Map", slug: "refresh-cases" }
8717
9408
  ],
8718
- faq: [],
8719
- mentions: ["linking graph", "bipartite graph"]
9409
+ faq: [
9410
+ { question: "What does the depth parameter control?", answer: "Depth controls BFS traversal hops from the starting document. Depth 1 returns only the document and its direct entities. Depth 2 (default) also includes other documents sharing those entities. Higher depths expand the neighbourhood further." },
9411
+ { question: "Why are some entities excluded from case detection?", answer: "High-frequency entities (e.g. a company name appearing on every document) and owner entities are excluded from BFS to prevent merging unrelated document groups into a single oversized case." }
9412
+ ],
9413
+ mentions: ["linking graph", "bipartite graph", "BFS traversal", "document subgraph"]
8720
9414
  },
8721
9415
  {
8722
9416
  slug: "list-cases",
@@ -8725,6 +9419,8 @@ var sections27 = [
8725
9419
  seoTitle: "Backfill Linking \u2014 Talonic Docs",
8726
9420
  description: "Trigger a backfill of the linking graph for all documents. Useful after link key configuration changes. Poll progress via the backfill progress endpoint.",
8727
9421
  content: [
9422
+ { type: "paragraph", text: "After changing link key configurations \u2014 such as classifying new fields as link keys or reclassifying existing ones \u2014 the linking graph needs to be rebuilt. The backfill endpoint scans all documents and reconstructs entity links based on the current link key set." },
9423
+ { type: "callout", variant: "warning", text: "Backfill is serialized per organization. If a backfill is already running, the request is silently skipped. Poll the progress endpoint to monitor in-flight backfills." },
8728
9424
  {
8729
9425
  type: "endpoint",
8730
9426
  method: "POST",
@@ -8750,6 +9446,7 @@ var sections27 = [
8750
9446
  "message": "Backfill queued."
8751
9447
  }`
8752
9448
  },
9449
+ { type: "paragraph", text: "Use the progress endpoint to monitor the backfill. The `running` field indicates whether the operation is still in progress, and `processed` / `total` track document-level progress." },
8753
9450
  {
8754
9451
  type: "endpoint",
8755
9452
  method: "GET",
@@ -8792,10 +9489,14 @@ var sections27 = [
8792
9489
  }
8793
9490
  ],
8794
9491
  related: [
8795
- { label: "Link Keys", slug: "list-link-keys" }
9492
+ { label: "Link Keys", slug: "list-link-keys" },
9493
+ { label: "Classify", slug: "reclassify-link-key" }
8796
9494
  ],
8797
- faq: [],
8798
- mentions: ["backfill", "linking graph"]
9495
+ faq: [
9496
+ { question: "How long does a backfill take?", answer: "Duration depends on the number of documents in your workspace. Poll the progress endpoint to track completion. The backfill processes documents in batches." },
9497
+ { question: "Is classification automatically triggered during backfill?", answer: "No. Backfill only rebuilds entity links using the current link key set. To classify new fields as link keys, call the Classify endpoint first, which will trigger its own backfill if new keys are found." }
9498
+ ],
9499
+ mentions: ["backfill", "linking graph", "progress polling"]
8799
9500
  },
8800
9501
  {
8801
9502
  slug: "get-case",
@@ -8804,6 +9505,12 @@ var sections27 = [
8804
9505
  seoTitle: "List Cases Endpoint \u2014 Talonic Docs",
8805
9506
  description: "List and retrieve cases \u2014 automatically created groups of 2+ related documents linked through shared field values with narrative summaries.",
8806
9507
  content: [
9508
+ { type: "paragraph", text: "Cases are automatically created groups of two or more documents that are connected through shared **transaction** or **reference** entity values. For example, an invoice, a purchase order, and a delivery note sharing the same PO number form a case. Cases provide a high-level view of document relationships without needing to navigate the full graph." },
9509
+ { type: "list", ordered: false, items: [
9510
+ "Each case has a deterministic **case key** (hex hash of its document IDs)",
9511
+ "Cases are created by the linking pipeline during backfill or real-time processing",
9512
+ "Documents linked only through **identity** entities (e.g. vendor ID) appear as entity groups, not cases"
9513
+ ] },
8807
9514
  {
8808
9515
  type: "endpoint",
8809
9516
  method: "GET",
@@ -8855,10 +9562,15 @@ var sections27 = [
8855
9562
  }
8856
9563
  ],
8857
9564
  related: [
8858
- { label: "Link Keys", slug: "list-link-keys" }
9565
+ { label: "Link Keys", slug: "list-link-keys" },
9566
+ { label: "Case Graph", slug: "get-case-graph" },
9567
+ { label: "Document-Case Map", slug: "refresh-cases" }
8859
9568
  ],
8860
- faq: [],
8861
- mentions: ["cases"]
9569
+ faq: [
9570
+ { question: "How are cases different from entity groups?", answer: "Cases require at least one transaction or reference entity linking the documents (e.g. shared PO number). Entity groups are documents linked only through identity entities (e.g. same vendor ID) and do not form cases." },
9571
+ { question: "Can a document belong to multiple cases?", answer: "No. Each document belongs to at most one case. The case key is a deterministic hash of the sorted document IDs in the group." }
9572
+ ],
9573
+ mentions: ["cases", "document groups", "case key"]
8862
9574
  },
8863
9575
  {
8864
9576
  slug: "get-case-graph",
@@ -8867,6 +9579,7 @@ var sections27 = [
8867
9579
  seoTitle: "Case Graph \u2014 Talonic Docs",
8868
9580
  description: "Retrieve the D3-compatible graph visualization for a single case, showing document nodes and entity edges within the case boundary.",
8869
9581
  content: [
9582
+ { type: "paragraph", text: "Retrieve the graph structure for a single case, formatted for **D3.js** or similar graph visualization libraries. The response contains only the nodes and edges within the case boundary, making it suitable for rendering focused relationship diagrams." },
8870
9583
  {
8871
9584
  type: "endpoint",
8872
9585
  method: "GET",
@@ -8920,8 +9633,11 @@ var sections27 = [
8920
9633
  { label: "Cases", slug: "get-case" },
8921
9634
  { label: "Full Graph", slug: "list-linked-documents" }
8922
9635
  ],
8923
- faq: [],
8924
- mentions: ["case graph", "D3 visualization"]
9636
+ faq: [
9637
+ { question: "What graph format does the case graph use?", answer: "The response uses a nodes-and-edges structure compatible with D3.js force-directed graphs. Node IDs are stable across requests, so you can maintain layout state between refreshes." },
9638
+ { question: "Does the case graph include excluded entities?", answer: "No. The case graph is scoped to the case boundary and only includes entities that contributed to forming the case. High-frequency entities excluded from BFS are not shown." }
9639
+ ],
9640
+ mentions: ["case graph", "D3 visualization", "graph rendering"]
8925
9641
  },
8926
9642
  {
8927
9643
  slug: "refresh-cases",
@@ -8930,6 +9646,8 @@ var sections27 = [
8930
9646
  seoTitle: "Document-Case Map Endpoint \u2014 Talonic Docs",
8931
9647
  description: "Get the mapping of documents to their resolved cases. Returns a mapping of document IDs to assigned case keys.",
8932
9648
  content: [
9649
+ { type: "paragraph", text: "The document-case map provides a flat lookup from document ID to case assignment. Use it to quickly determine which case a document belongs to, or to identify documents that are not part of any case. Documents in **entity groups** (linked only through identity entities) are included with `is_case: false`." },
9650
+ { type: "callout", variant: "info", text: "Documents with `is_case: false` are linked to other documents only through identity entities (e.g. same vendor). They appear in the map but do not form a case. Documents with no links at all are not included in the map." },
8933
9651
  {
8934
9652
  type: "endpoint",
8935
9653
  method: "GET",
@@ -8983,10 +9701,14 @@ var sections27 = [
8983
9701
  }
8984
9702
  ],
8985
9703
  related: [
8986
- { label: "Full Graph", slug: "list-linked-documents" }
9704
+ { label: "Full Graph", slug: "list-linked-documents" },
9705
+ { label: "Cases", slug: "get-case" }
8987
9706
  ],
8988
- faq: [],
8989
- mentions: ["document-case mapping"]
9707
+ faq: [
9708
+ { question: "What does an empty case_key mean?", answer: "An empty string for case_key indicates the document is in an entity group (linked via identity entities only) but not in a case. The is_case field will be false." },
9709
+ { question: "Are unlinked documents included in the map?", answer: "No. Only documents with at least one entity link appear in the map. Completely unlinked documents are omitted." }
9710
+ ],
9711
+ mentions: ["document-case mapping", "case assignment", "entity groups"]
8990
9712
  }
8991
9713
  ];
8992
9714
 
@@ -9000,6 +9722,15 @@ var sections28 = [
9000
9722
  description: "Get an aggregate N-Shot summary for a run comparing field-level extraction quality across schema versions.",
9001
9723
  content: [
9002
9724
  { type: "paragraph", text: "N-Shot endpoints provide field-level comparisons between job runs \u2014 useful for evaluating extraction quality across schema versions. Submit judge decisions (human or AI) to record which run produced the better result. All routes are nested under `/v1/jobs/runs/{runId}/nshot/...`." },
9725
+ { type: "paragraph", text: "The summary endpoint returns aggregate statistics for all N-Shot comparisons in a run: total comparisons, agreement breakdown (green/yellow/red), override count, and overall agreement rate. Use this to quickly assess whether a schema change improved or degraded extraction quality." },
9726
+ {
9727
+ type: "list",
9728
+ items: [
9729
+ "**Green** \u2014 all shots produced the same value (high confidence)",
9730
+ "**Yellow** \u2014 partial agreement between shots (majority value exists but not unanimous)",
9731
+ "**Red** \u2014 no agreement between shots (each shot produced a different value)"
9732
+ ]
9733
+ },
9003
9734
  {
9004
9735
  type: "endpoint",
9005
9736
  method: "GET",
@@ -9052,8 +9783,12 @@ var sections28 = [
9052
9783
  { label: "Comparisons", slug: "nshot-list-shots" },
9053
9784
  { label: "Judge Decision", slug: "nshot-judge-decision" }
9054
9785
  ],
9055
- faq: [],
9056
- mentions: ["N-Shot", "extraction quality"]
9786
+ faq: [
9787
+ { question: "What is a good agreement_rate?", answer: "An agreement rate above 0.90 indicates stable extraction. Rates between 0.75-0.90 suggest the schema needs tuning. Below 0.75 typically indicates structural issues with the schema or inconsistent source documents." },
9788
+ { question: "How many shots are typically used?", answer: "Three shots is the default and most common configuration. This provides a reliable majority vote while keeping cost manageable." },
9789
+ { question: "Does the summary update as I submit judge decisions?", answer: "Yes. The `overridden` count increments with each accepted judge decision. The agreement breakdown (green/yellow/red) reflects the original shot outcomes and does not change when overrides are applied." }
9790
+ ],
9791
+ mentions: ["N-Shot", "extraction quality", "agreement rate", "field comparison"]
9057
9792
  },
9058
9793
  {
9059
9794
  slug: "nshot-list-shots",
@@ -9062,6 +9797,7 @@ var sections28 = [
9062
9797
  seoTitle: "N-Shot Comparisons Endpoint \u2014 Talonic Docs",
9063
9798
  description: "List per-document field comparisons for N-Shot evaluation across job runs. Returns all comparisons for a specific job run.",
9064
9799
  content: [
9800
+ { type: "paragraph", text: "Retrieve all per-document field comparisons for a job run. Each comparison shows the values produced by each shot, the agreement status (green/yellow/red), the majority value, and any override or judge decision that has been applied. Use this to drill into specific fields and understand where extraction diverges across shots." },
9065
9801
  {
9066
9802
  type: "endpoint",
9067
9803
  method: "GET",
@@ -9135,8 +9871,11 @@ var sections28 = [
9135
9871
  { label: "Single Comparison", slug: "nshot-compare" },
9136
9872
  { label: "Summary", slug: "nshot-summary" }
9137
9873
  ],
9138
- faq: [],
9139
- mentions: ["N-Shot comparisons"]
9874
+ faq: [
9875
+ { question: "What comparison methods are available?", answer: "Currently `exact` (string equality after normalization) and `semantic` (embedding-based similarity). The method is chosen automatically based on the field data type." },
9876
+ { question: "How do I find comparisons that need attention?", answer: 'Filter for `status: "red"` comparisons first (no agreement), then `status: "yellow"` (partial agreement). Green comparisons are confident and typically need no review.' }
9877
+ ],
9878
+ mentions: ["N-Shot comparisons", "field agreement", "per-document comparison"]
9140
9879
  },
9141
9880
  {
9142
9881
  slug: "nshot-compare",
@@ -9145,6 +9884,7 @@ var sections28 = [
9145
9884
  seoTitle: "N-Shot Single Comparison \u2014 Talonic Docs",
9146
9885
  description: "Get a specific field comparison filtered by document and field name. Returns a single N-Shot comparison for detailed evaluation.",
9147
9886
  content: [
9887
+ { type: "paragraph", text: "Retrieve a single N-Shot comparison for a specific document and field. Use this endpoint when you need detailed information about one particular comparison, including per-shot values, any existing override, and the LLM judge recommendation." },
9148
9888
  {
9149
9889
  type: "endpoint",
9150
9890
  method: "GET",
@@ -9228,8 +9968,11 @@ var sections28 = [
9228
9968
  { label: "Comparisons", slug: "nshot-list-shots" },
9229
9969
  { label: "Override", slug: "nshot-select" }
9230
9970
  ],
9231
- faq: [],
9232
- mentions: ["N-Shot comparison"]
9971
+ faq: [
9972
+ { question: "What does a judgement with accepted: null mean?", answer: "The LLM judge has produced a recommendation but no human or API decision has been submitted yet. Use the judge decision endpoint to accept or decline the recommendation." },
9973
+ { question: "Can a comparison have both an override and a judgement?", answer: "Yes. If a judge decision is accepted, an override is automatically created from the recommended shot. A manual override can also coexist with a pending (not-yet-decided) judgement." }
9974
+ ],
9975
+ mentions: ["N-Shot comparison", "single comparison"]
9233
9976
  },
9234
9977
  {
9235
9978
  slug: "nshot-select",
@@ -9238,6 +9981,8 @@ var sections28 = [
9238
9981
  seoTitle: "N-Shot Override Endpoint \u2014 Talonic Docs",
9239
9982
  description: "Manually override the N-Shot selected value for a specific document-field pair. Requires write scope.",
9240
9983
  content: [
9984
+ { type: "paragraph", text: "Manually override the selected value for a specific document-field comparison by choosing a specific shot number. The override is recorded with an audit trail including the actor, timestamp, original value, and new value. Use this when the majority value is incorrect and you want to select a different shot's extraction." },
9985
+ { type: "callout", variant: "info", text: 'The `selected_shot` must be a valid shot number from the comparison\'s `values` array. The override records `actor_id` as `"api"` for all API-initiated overrides.' },
9241
9986
  {
9242
9987
  type: "endpoint",
9243
9988
  method: "POST",
@@ -9325,8 +10070,11 @@ var sections28 = [
9325
10070
  { label: "Single Comparison", slug: "nshot-compare" },
9326
10071
  { label: "Judge Decision", slug: "nshot-judge-decision" }
9327
10072
  ],
9328
- faq: [],
9329
- mentions: ["N-Shot override"]
10073
+ faq: [
10074
+ { question: "Can I override the same comparison multiple times?", answer: "Yes. Each override replaces the previous one. The `from_value` in the latest override reflects the value before the most recent change, not the original majority value." },
10075
+ { question: "Does an override change the agreement status?", answer: "No. The `status` (green/yellow/red) reflects the original shot agreement and does not change when an override is applied. Overrides are tracked separately." }
10076
+ ],
10077
+ mentions: ["N-Shot override", "manual override"]
9330
10078
  },
9331
10079
  {
9332
10080
  slug: "nshot-judge-decision",
@@ -9335,6 +10083,8 @@ var sections28 = [
9335
10083
  seoTitle: "N-Shot Judge Decision \u2014 Talonic Docs",
9336
10084
  description: "Submit a judge decision (human or AI) for an N-Shot comparison to record which candidate produced the correct extraction result.",
9337
10085
  content: [
10086
+ { type: "paragraph", text: "Submit a decision to accept or decline the LLM judge's recommendation for a specific comparison. When `accepted` is `true`, the recommended shot value is automatically applied as an override. When `false`, the recommendation is recorded as declined and no override is applied. Use this to efficiently review LLM suggestions at scale." },
10087
+ { type: "callout", variant: "info", text: 'Accepting a judge decision automatically creates an override with `actor_id: "judge"`. You can still manually override the value afterwards using the override endpoint.' },
9338
10088
  {
9339
10089
  type: "endpoint",
9340
10090
  method: "POST",
@@ -9426,8 +10176,11 @@ var sections28 = [
9426
10176
  { label: "Summary", slug: "nshot-summary" },
9427
10177
  { label: "Override", slug: "nshot-select" }
9428
10178
  ],
9429
- faq: [],
9430
- mentions: ["judge decision", "N-Shot evaluation"]
10179
+ faq: [
10180
+ { question: "What happens if there is no LLM judge recommendation to accept?", answer: "If the comparison has no `judgement` object (or `recommended_shot` is null), the endpoint returns a 404. Only comparisons with existing LLM judge recommendations can receive decisions." },
10181
+ { question: "Can I change a judge decision after submitting it?", answer: "Yes. Submit a new judge decision with the opposite `accepted` value. If you previously accepted and the override was applied, declining will not remove the override \u2014 use the override endpoint to change it manually." }
10182
+ ],
10183
+ mentions: ["judge decision", "N-Shot evaluation", "LLM judge"]
9431
10184
  }
9432
10185
  ];
9433
10186
 
@@ -9441,6 +10194,7 @@ var sections29 = [
9441
10194
  description: "List all schema graph classes in the versioned ontology of document classes discovered across your workspace with field counts and version info.",
9442
10195
  content: [
9443
10196
  { type: "paragraph", text: "The schema graph is a versioned ontology of document classes discovered across your workspace. Each class captures a document type's canonical fields. The API exposes versioned classes, diffs proposed between versions (with approve/reject workflow), inter-class edges, aliases, and a D3-compatible visualization payload." },
10197
+ { type: "paragraph", text: "Use this endpoint to retrieve all schema graph classes for your organization. Classes are created automatically as the platform processes documents and discovers recurring field patterns. Each class tracks its version history and links to the field registry." },
9444
10198
  {
9445
10199
  type: "endpoint",
9446
10200
  method: "GET",
@@ -9503,8 +10257,12 @@ var sections29 = [
9503
10257
  { label: "Get Class", slug: "get-schema-graph-class" },
9504
10258
  { label: "List Versions", slug: "list-class-versions" }
9505
10259
  ],
9506
- faq: [],
9507
- mentions: ["schema graph", "document classes", "ontology"]
10260
+ faq: [
10261
+ { question: "Are schema graph classes created automatically?", answer: "Yes. Classes are generated automatically as the platform discovers recurring document types and their field patterns during extraction. You do not need to create them manually." },
10262
+ { question: "What does the current_version_id represent?", answer: "It points to the latest approved version of the class. Each time a diff is approved, a new version is created and `current_version_id` is updated to point to it." },
10263
+ { question: "How do schema graph classes relate to user schemas?", answer: "Schema graph classes represent discovered document types in the ontology. User schemas are manually defined output schemas for extraction jobs. The two are linked through the field registry \u2014 both reference the same canonical field definitions." }
10264
+ ],
10265
+ mentions: ["schema graph", "document classes", "ontology", "versioned classes"]
9508
10266
  },
9509
10267
  {
9510
10268
  slug: "get-schema-graph-class",
@@ -9513,6 +10271,7 @@ var sections29 = [
9513
10271
  seoTitle: "Get Schema Graph Class \u2014 Talonic Docs",
9514
10272
  description: "Retrieve a schema graph class by ID with its current field definitions, version number, document count, and links to version history. Requires read scope.",
9515
10273
  content: [
10274
+ { type: "paragraph", text: "Retrieve a single schema graph class by its UUID. The response includes the class metadata, its current active version, and links to the full version history. Use this to inspect a specific document type's canonical field structure." },
9516
10275
  {
9517
10276
  type: "endpoint",
9518
10277
  method: "GET",
@@ -9571,8 +10330,11 @@ var sections29 = [
9571
10330
  { label: "List Classes", slug: "list-schema-graph-classes" },
9572
10331
  { label: "List Versions", slug: "list-class-versions" }
9573
10332
  ],
9574
- faq: [],
9575
- mentions: ["schema graph class"]
10333
+ faq: [
10334
+ { question: "Can a class have no current version?", answer: "Yes. A newly discovered class may have `current_version_id` set to `null` until the first version is published via the diff approval workflow." },
10335
+ { question: "What does the document_type_id field link to?", answer: "It links to a `DocumentType` entity in the extraction system. This association connects the schema graph ontology to the document classification pipeline." }
10336
+ ],
10337
+ mentions: ["schema graph class", "class detail"]
9576
10338
  },
9577
10339
  {
9578
10340
  slug: "list-class-versions",
@@ -9581,6 +10343,7 @@ var sections29 = [
9581
10343
  seoTitle: "Schema Graph Class Versions \u2014 Talonic Docs",
9582
10344
  description: "List all published versions of a schema graph class ordered by version number descending with field definitions for each version.",
9583
10345
  content: [
10346
+ { type: "paragraph", text: "Retrieve the complete version history of a schema graph class. Each version captures a snapshot of the class's JSON Schema definition and its associated field registry IDs at the time the version was published. Versions are created when diffs are approved, and version numbers increment monotonically." },
9584
10347
  {
9585
10348
  type: "endpoint",
9586
10349
  method: "GET",
@@ -9641,8 +10404,11 @@ var sections29 = [
9641
10404
  { label: "Get Version", slug: "get-class-version" },
9642
10405
  { label: "List Diffs", slug: "list-schema-graph-diffs" }
9643
10406
  ],
9644
- faq: [],
9645
- mentions: ["class versions"]
10407
+ faq: [
10408
+ { question: "How are new versions created?", answer: "New versions are created when a pending diff is approved via `POST /v1/schema-graph/diffs/{id}/approve`. The diff's field changes are applied and a new version snapshot is published." },
10409
+ { question: "Can I roll back to a previous version?", answer: "There is no direct rollback endpoint. To revert changes, create a new diff that reverses the unwanted field modifications and approve it to produce a new version." }
10410
+ ],
10411
+ mentions: ["class versions", "version history", "JSON Schema"]
9646
10412
  },
9647
10413
  {
9648
10414
  slug: "get-class-version",
@@ -9651,6 +10417,7 @@ var sections29 = [
9651
10417
  seoTitle: "Get Schema Graph Class Version \u2014 Talonic Docs",
9652
10418
  description: "Retrieve a specific version of a schema graph class by class ID and version number. Requires read scope.",
9653
10419
  content: [
10420
+ { type: "paragraph", text: "Retrieve a specific version of a schema graph class by its class ID and version number. Use this to inspect the exact JSON Schema definition and field composition that was active at a particular point in the class's evolution." },
9654
10421
  {
9655
10422
  type: "endpoint",
9656
10423
  method: "GET",
@@ -9704,8 +10471,11 @@ var sections29 = [
9704
10471
  related: [
9705
10472
  { label: "List Versions", slug: "list-class-versions" }
9706
10473
  ],
9707
- faq: [],
9708
- mentions: ["class version"]
10474
+ faq: [
10475
+ { question: "What is the version number path parameter?", answer: "It is the integer version number (e.g. `1`, `2`, `3`), not the version UUID. Use the list versions endpoint to discover available version numbers." },
10476
+ { question: "Does the json_schema field contain a valid JSON Schema?", answer: "Yes. It is a standard JSON Schema object with `type`, `properties`, and optionally `required` arrays. You can use it directly for validation or code generation." }
10477
+ ],
10478
+ mentions: ["class version", "specific version"]
9709
10479
  },
9710
10480
  {
9711
10481
  slug: "list-schema-graph-diffs",
@@ -9714,6 +10484,8 @@ var sections29 = [
9714
10484
  seoTitle: "Schema Graph Diffs \u2014 Talonic Docs",
9715
10485
  description: "List pending, approved, and rejected diffs between schema graph class versions. Shows proposed changes for the approve/reject workflow.",
9716
10486
  content: [
10487
+ { type: "paragraph", text: "Diffs represent proposed changes between schema graph class versions. When the platform discovers new fields or detects field type changes, it creates a diff that can be reviewed and either approved (promoting the changes to a new version) or rejected (discarding them). This endpoint returns all diffs, optionally filtered by class or review status." },
10488
+ { type: "callout", variant: "info", text: "Diffs are classified as `additive` (new fields only) or `breaking` (field removals or type changes). Breaking diffs may affect downstream extraction jobs that depend on the removed fields." },
9717
10489
  {
9718
10490
  type: "endpoint",
9719
10491
  method: "GET",
@@ -9785,8 +10557,11 @@ var sections29 = [
9785
10557
  { label: "Approve Diff", slug: "approve-diff" },
9786
10558
  { label: "Reject Diff", slug: "reject-diff" }
9787
10559
  ],
9788
- faq: [],
9789
- mentions: ["schema diffs", "version comparison"]
10560
+ faq: [
10561
+ { question: "How are diffs generated?", answer: "Diffs are generated automatically when the platform detects field changes during extraction. When new documents introduce fields not present in the current class version, a diff is created and set to `pending` for review." },
10562
+ { question: "What happens to pending diffs if I approve a newer diff first?", answer: "Pending diffs reference specific version numbers. If the class advances past a pending diff's `to_version`, the diff becomes stale. Review and reject stale diffs to keep the queue clean." }
10563
+ ],
10564
+ mentions: ["schema diffs", "version comparison", "additive", "breaking"]
9790
10565
  },
9791
10566
  {
9792
10567
  slug: "approve-diff",
@@ -9795,6 +10570,8 @@ var sections29 = [
9795
10570
  seoTitle: "Approve Schema Graph Diff \u2014 Talonic Docs",
9796
10571
  description: "Approve a pending diff to promote proposed changes to the next live class version. Requires write scope.",
9797
10572
  content: [
10573
+ { type: "paragraph", text: "Approve a pending diff to promote its proposed field changes into a new class version. Approving a diff updates the class's `current_version_id` and publishes a new version snapshot with the diff's added fields, removed fields, and type changes applied." },
10574
+ { type: "callout", variant: "warning", text: "Approving a `breaking` diff (one that removes fields or changes types) may affect downstream extraction jobs. Review the `removed_fields` and `type_changes` arrays before approving." },
9798
10575
  {
9799
10576
  type: "endpoint",
9800
10577
  method: "POST",
@@ -9835,8 +10612,11 @@ var sections29 = [
9835
10612
  { label: "List Diffs", slug: "list-schema-graph-diffs" },
9836
10613
  { label: "Reject Diff", slug: "reject-diff" }
9837
10614
  ],
9838
- faq: [],
9839
- mentions: ["approve diff"]
10615
+ faq: [
10616
+ { question: "Can I approve a diff that is not in pending status?", answer: 'No. Only diffs with `review_status: "pending"` can be approved. Already approved or rejected diffs return a 404 or are ignored.' },
10617
+ { question: "Does approving a diff immediately update the class version?", answer: "Yes. Approval is synchronous. The new version is created and `current_version_id` is updated in the same request." }
10618
+ ],
10619
+ mentions: ["approve diff", "version promotion"]
9840
10620
  },
9841
10621
  {
9842
10622
  slug: "reject-diff",
@@ -9845,6 +10625,7 @@ var sections29 = [
9845
10625
  seoTitle: "Reject Schema Graph Diff \u2014 Talonic Docs",
9846
10626
  description: "Reject a pending diff to discard proposed changes to a schema graph class version. Requires write scope.",
9847
10627
  content: [
10628
+ { type: "paragraph", text: "Reject a pending diff to discard its proposed field changes. The class version remains unchanged, and the diff is marked as `rejected` for audit purposes. Rejected diffs are retained in the history and can be reviewed later but cannot be re-approved." },
9848
10629
  {
9849
10630
  type: "endpoint",
9850
10631
  method: "POST",
@@ -9885,8 +10666,11 @@ var sections29 = [
9885
10666
  { label: "List Diffs", slug: "list-schema-graph-diffs" },
9886
10667
  { label: "Approve Diff", slug: "approve-diff" }
9887
10668
  ],
9888
- faq: [],
9889
- mentions: ["reject diff"]
10669
+ faq: [
10670
+ { question: "Can I re-approve a rejected diff?", answer: "No. Once rejected, a diff cannot be re-approved. If the same field changes are needed later, a new diff will be generated automatically during the next extraction cycle." },
10671
+ { question: "Does rejecting a diff affect the current class version?", answer: "No. The class version remains unchanged. Rejection only marks the proposed changes as discarded." }
10672
+ ],
10673
+ mentions: ["reject diff", "discard changes"]
9890
10674
  },
9891
10675
  {
9892
10676
  slug: "list-schema-graph-edges",
@@ -9895,6 +10679,8 @@ var sections29 = [
9895
10679
  seoTitle: "Schema Graph Edges \u2014 Talonic Docs",
9896
10680
  description: "List inter-class edges (relationships) between schema graph classes with relationship type and weight information.",
9897
10681
  content: [
10682
+ { type: "paragraph", text: "Edges represent relationships between schema graph classes, computed using cosine similarity between field embeddings. Each edge captures a directional relationship (e.g. an Invoice class `references` a Purchase Order class) with a weight indicating the strength of the relationship. Use edges to understand how document types relate to each other across your workspace." },
10683
+ { type: "callout", variant: "info", text: "Edge weights range from 0 to 1. Higher weights indicate stronger field overlap between classes. Edges below a minimum weight threshold are automatically pruned and will not appear in results." },
9898
10684
  {
9899
10685
  type: "endpoint",
9900
10686
  method: "GET",
@@ -9955,8 +10741,11 @@ var sections29 = [
9955
10741
  { label: "List Classes", slug: "list-schema-graph-classes" },
9956
10742
  { label: "Visualize", slug: "visualize-schema-graph" }
9957
10743
  ],
9958
- faq: [],
9959
- mentions: ["schema graph edges", "relationships"]
10744
+ faq: [
10745
+ { question: "How are edge weights computed?", answer: "Edge weights are derived from cosine similarity between field embeddings of the two classes. A weight of 0.87 means the two classes share approximately 87% field overlap." },
10746
+ { question: "Are edges directional?", answer: "Yes. An edge from class A to class B means A references B. The reverse relationship may also exist as a separate edge with a different weight." }
10747
+ ],
10748
+ mentions: ["schema graph edges", "relationships", "cosine similarity", "field overlap"]
9960
10749
  },
9961
10750
  {
9962
10751
  slug: "list-schema-graph-aliases",
@@ -9965,6 +10754,7 @@ var sections29 = [
9965
10754
  seoTitle: "Schema Graph Aliases \u2014 Talonic Docs",
9966
10755
  description: "List all class aliases \u2014 alternative names that map to canonical class IDs in the schema graph ontology.",
9967
10756
  content: [
10757
+ { type: "paragraph", text: 'Aliases map alternative names to canonical schema graph classes. For example, "Bill" might be an alias for the "Invoice" class. The platform uses aliases during document classification to resolve variant names to their canonical class. Use this endpoint to audit or inspect the alias mappings for your organization.' },
9968
10758
  {
9969
10759
  type: "endpoint",
9970
10760
  method: "GET",
@@ -10014,8 +10804,11 @@ var sections29 = [
10014
10804
  related: [
10015
10805
  { label: "List Classes", slug: "list-schema-graph-classes" }
10016
10806
  ],
10017
- faq: [],
10018
- mentions: ["class aliases"]
10807
+ faq: [
10808
+ { question: "Are aliases case-sensitive?", answer: 'No. Alias matching during classification is case-insensitive. "bill", "Bill", and "BILL" all resolve to the same canonical class.' },
10809
+ { question: "How are aliases created?", answer: "Aliases are generated automatically when the platform encounters documents with variant type labels that resolve to the same canonical class during extraction." }
10810
+ ],
10811
+ mentions: ["class aliases", "alternative names", "name mapping"]
10019
10812
  },
10020
10813
  {
10021
10814
  slug: "visualize-schema-graph",
@@ -10024,6 +10817,7 @@ var sections29 = [
10024
10817
  seoTitle: "Visualize Schema Graph \u2014 Talonic Docs",
10025
10818
  description: "Get D3-compatible visualization data for the schema graph with nodes and edges formatted for graph rendering.",
10026
10819
  content: [
10820
+ { type: "paragraph", text: "Retrieve the entire schema graph as a D3-compatible payload with nodes (classes) and edges (relationships). The response is structured for direct consumption by graph visualization libraries such as D3.js, Cytoscape, or vis.js. Nodes include full class metadata; edges use `source` and `target` fields referencing node IDs." },
10027
10821
  {
10028
10822
  type: "endpoint",
10029
10823
  method: "GET",
@@ -10094,8 +10888,11 @@ var sections29 = [
10094
10888
  { label: "Edges", slug: "list-schema-graph-edges" },
10095
10889
  { label: "List Classes", slug: "list-schema-graph-classes" }
10096
10890
  ],
10097
- faq: [],
10098
- mentions: ["D3 visualization", "graph rendering"]
10891
+ faq: [
10892
+ { question: "Can I use this response directly with D3.js force-directed graphs?", answer: "Yes. The `nodes` and `edges` arrays are structured for direct use with D3 force simulations. Edge `source` and `target` fields reference node `id` values." },
10893
+ { question: "Does the visualization endpoint include archived classes?", answer: "Yes. All classes are returned regardless of status. Filter by `status` on the client side if you want to exclude archived nodes from the visualization." }
10894
+ ],
10895
+ mentions: ["D3 visualization", "graph rendering", "force-directed graph"]
10099
10896
  }
10100
10897
  ];
10101
10898
 
@@ -10109,6 +10906,16 @@ var sections30 = [
10109
10906
  description: "List validation checks in the structuring pipeline. Checks define rules like field_format, value_range, cross_field, and ai_coherence.",
10110
10907
  content: [
10111
10908
  { type: "paragraph", text: "The structuring pipeline validates extracted data through configurable checks and approval gates. Checks define validation rules; gates aggregate checks and determine whether records require manual approval before delivery. Also exposes per-result check outcomes, the pending-approvals queue, approve/reject actions, and the manual delivery trigger for an approved run." },
10909
+ { type: "paragraph", text: "Use this endpoint to retrieve all validation checks configured for your organization. Checks are evaluated against every structuring result to flag data quality issues before delivery. You can scope checks to a specific schema to apply different validation logic per document type." },
10910
+ {
10911
+ type: "list",
10912
+ items: [
10913
+ "**field_format** \u2014 validates that a field value matches an expected pattern (e.g. date format, currency code)",
10914
+ "**value_range** \u2014 ensures numeric or date values fall within defined bounds",
10915
+ "**cross_field** \u2014 validates relationships between two or more fields (e.g. end_date > start_date)",
10916
+ "**ai_coherence** \u2014 uses an LLM to assess whether extracted values are semantically plausible"
10917
+ ]
10918
+ },
10112
10919
  {
10113
10920
  type: "endpoint",
10114
10921
  method: "GET",
@@ -10183,8 +10990,12 @@ var sections30 = [
10183
10990
  { label: "Create Check", slug: "create-structuring-check" },
10184
10991
  { label: "List Gates", slug: "list-structuring-gates" }
10185
10992
  ],
10186
- faq: [],
10187
- mentions: ["structuring checks", "validation"]
10993
+ faq: [
10994
+ { question: "What is the difference between warning and error severity?", answer: "A `warning` check flags an issue but does not block delivery. An `error` check causes the result to fail the gate and require manual approval before it can be delivered." },
10995
+ { question: "Can I create checks that apply to all schemas?", answer: "Yes. Omit the `user_schema_id` field when creating a check and it will apply globally to all structuring results regardless of schema." },
10996
+ { question: "How are checks ordered during evaluation?", answer: "Checks are evaluated in `sort_order` ascending, then by `created_at`. You can control evaluation priority by setting `sort_order` on each check." }
10997
+ ],
10998
+ mentions: ["structuring checks", "validation", "field_format", "value_range", "cross_field", "ai_coherence"]
10188
10999
  },
10189
11000
  {
10190
11001
  slug: "create-structuring-check",
@@ -10193,6 +11004,8 @@ var sections30 = [
10193
11004
  seoTitle: "Create Structuring Check \u2014 Talonic Docs",
10194
11005
  description: "Create a validation check for the structuring pipeline. Supports field_format, value_range, cross_field, and ai_coherence check types.",
10195
11006
  content: [
11007
+ { type: "paragraph", text: "Create a new validation check to enforce data quality rules on structuring results. Each check targets a specific validation type and can be scoped to a single schema or applied globally. Newly created checks are automatically active and will run against all future structuring results." },
11008
+ { type: "callout", variant: "info", text: "The `config` object shape depends on the check `type`. For `value_range`, provide `field`, `min`, and `max`. For `field_format`, provide `field` and `pattern`. For `cross_field`, provide `fields` and a `rule` expression. For `ai_coherence`, no config is required." },
10196
11009
  {
10197
11010
  type: "endpoint",
10198
11011
  method: "POST",
@@ -10281,8 +11094,11 @@ var sections30 = [
10281
11094
  { label: "List Checks", slug: "list-structuring-checks" },
10282
11095
  { label: "Get Check", slug: "get-structuring-check" }
10283
11096
  ],
10284
- faq: [],
10285
- mentions: ["validation check", "value_range"]
11097
+ faq: [
11098
+ { question: "Can I create a check without a config object?", answer: "Yes, for `ai_coherence` checks. For `value_range`, `field_format`, and `cross_field` checks, the `config` object defines the validation logic and is effectively required." },
11099
+ { question: "What happens if I use a master-view API key?", answer: "Create operations require a customer-scoped API key. Using a master-view key returns a `400 bad_request` error because the system cannot determine which organization to associate the check with." }
11100
+ ],
11101
+ mentions: ["validation check", "value_range", "create check", "field_format"]
10286
11102
  },
10287
11103
  {
10288
11104
  slug: "get-structuring-check",
@@ -10291,6 +11107,8 @@ var sections30 = [
10291
11107
  seoTitle: "Manage Structuring Check \u2014 Talonic Docs",
10292
11108
  description: "Get, update, or delete a structuring check. Same path supports GET (detail), PUT (update), and DELETE operations with appropriate scopes.",
10293
11109
  content: [
11110
+ { type: "paragraph", text: "Retrieve, update, or remove a validation check by its UUID. Use **GET** to inspect a check's current configuration, **PUT** to modify its name, severity, config, or active status, and **DELETE** to soft-delete it. Soft-deleted checks set `is_active` to `false` and stop running against new results, but their historical check outcomes remain intact." },
11111
+ { type: "callout", variant: "warning", text: "DELETE is a soft-delete. The check is deactivated (`is_active = false`) rather than permanently removed. Existing result check outcomes referencing this check are preserved." },
10294
11112
  {
10295
11113
  type: "endpoint",
10296
11114
  method: "GET",
@@ -10360,8 +11178,11 @@ var sections30 = [
10360
11178
  { label: "List Checks", slug: "list-structuring-checks" },
10361
11179
  { label: "Create Check", slug: "create-structuring-check" }
10362
11180
  ],
10363
- faq: [],
10364
- mentions: ["manage check"]
11181
+ faq: [
11182
+ { question: "Can I reactivate a deleted check?", answer: "Yes. Since DELETE is a soft-delete, you can use PUT on the same check ID to set `is_active` back to `true`." },
11183
+ { question: "Does updating a check re-evaluate existing results?", answer: "No. Updates only affect future structuring results. Historical check outcomes are immutable." }
11184
+ ],
11185
+ mentions: ["manage check", "soft-delete", "update check"]
10365
11186
  },
10366
11187
  {
10367
11188
  slug: "list-structuring-gates",
@@ -10370,6 +11191,8 @@ var sections30 = [
10370
11191
  seoTitle: "List Structuring Gates \u2014 Talonic Docs",
10371
11192
  description: "List approval gates that aggregate validation checks and control whether records require manual approval before delivery.",
10372
11193
  content: [
11194
+ { type: "paragraph", text: "Approval gates sit between extraction and delivery. Each gate aggregates one or more rules (e.g. minimum confidence threshold, validation pass rate) and decides whether structuring results are auto-approved for delivery or queued for human review. Gates can be scoped to a specific schema and optionally linked to a delivery destination." },
11195
+ { type: "callout", variant: "info", text: "Gates without any rules will auto-approve all results. Add at least one rule to enforce quality thresholds before delivery." },
10373
11196
  {
10374
11197
  type: "endpoint",
10375
11198
  method: "GET",
@@ -10458,8 +11281,11 @@ var sections30 = [
10458
11281
  { label: "Create Gate", slug: "create-structuring-gate" },
10459
11282
  { label: "Gate Rules", slug: "gate-rules" }
10460
11283
  ],
10461
- faq: [],
10462
- mentions: ["approval gates"]
11284
+ faq: [
11285
+ { question: "How does auto_approve_after_hours work?", answer: "When set, results that have been pending for longer than the specified number of hours are automatically approved without manual intervention. This prevents bottlenecks when reviewers are unavailable." },
11286
+ { question: "Can I link a gate to a delivery destination?", answer: "Yes. Set `destination_id` when creating or updating a gate to route approved results directly to a specific delivery destination (webhook, S3, SFTP, etc.)." }
11287
+ ],
11288
+ mentions: ["approval gates", "delivery gate", "auto-approve"]
10463
11289
  },
10464
11290
  {
10465
11291
  slug: "create-structuring-gate",
@@ -10468,6 +11294,8 @@ var sections30 = [
10468
11294
  seoTitle: "Create Structuring Gate \u2014 Talonic Docs",
10469
11295
  description: "Create an approval gate with optional schema scope. Gates aggregate validation checks and control approval workflows.",
10470
11296
  content: [
11297
+ { type: "paragraph", text: "Create an approval gate to control the flow of structuring results to delivery. A gate starts with no rules \u2014 add rules via the gate rules endpoint to define quality thresholds. The `on_approve` and `on_flag` fields control what happens when results pass or fail the gate's rules." },
11298
+ { type: "callout", variant: "warning", text: "A newly created gate has an empty rules array. Results will auto-approve until you add at least one rule via `POST /v1/structuring/gates/{id}/rules`." },
10471
11299
  {
10472
11300
  type: "endpoint",
10473
11301
  method: "POST",
@@ -10545,8 +11373,11 @@ var sections30 = [
10545
11373
  { label: "List Gates", slug: "list-structuring-gates" },
10546
11374
  { label: "Gate Rules", slug: "gate-rules" }
10547
11375
  ],
10548
- faq: [],
10549
- mentions: ["create gate"]
11376
+ faq: [
11377
+ { question: "What is the typical workflow after creating a gate?", answer: "Create the gate, then add rules via `POST /v1/structuring/gates/{id}/rules` to define thresholds like minimum confidence or validation pass rate. Results that fail any rule are queued for manual approval." },
11378
+ { question: "Can I create multiple gates for the same schema?", answer: "Yes. Multiple gates can target the same `user_schema_id`. Each gate evaluates independently, and a result must pass all applicable gates to be auto-approved." }
11379
+ ],
11380
+ mentions: ["create gate", "approval workflow"]
10550
11381
  },
10551
11382
  {
10552
11383
  slug: "get-structuring-gate",
@@ -10555,6 +11386,7 @@ var sections30 = [
10555
11386
  seoTitle: "Manage Structuring Gate \u2014 Talonic Docs",
10556
11387
  description: "Get, update, or delete an approval gate. Same path supports GET (detail with rules), PUT (update), and DELETE operations.",
10557
11388
  content: [
11389
+ { type: "paragraph", text: "Retrieve, update, or remove an approval gate by its UUID. **GET** returns the gate with its active rules embedded. **PUT** updates gate properties (same body shape as create). **DELETE** soft-deletes the gate by setting `is_active` to `false`." },
10558
11390
  {
10559
11391
  type: "endpoint",
10560
11392
  method: "GET",
@@ -10633,8 +11465,11 @@ var sections30 = [
10633
11465
  { label: "List Gates", slug: "list-structuring-gates" },
10634
11466
  { label: "Gate Rules", slug: "gate-rules" }
10635
11467
  ],
10636
- faq: [],
10637
- mentions: ["manage gate"]
11468
+ faq: [
11469
+ { question: "Does deleting a gate affect pending approvals?", answer: "No. Pending approval items that were queued by this gate remain in the queue. They can still be approved or rejected manually. The gate simply stops evaluating new results." },
11470
+ { question: "Are rules returned on PUT responses?", answer: "No. The `rules` array is only populated on GET responses. After a PUT update, re-fetch with GET to see the current rules." }
11471
+ ],
11472
+ mentions: ["manage gate", "soft-delete gate"]
10638
11473
  },
10639
11474
  {
10640
11475
  slug: "gate-rules",
@@ -10643,6 +11478,15 @@ var sections30 = [
10643
11478
  seoTitle: "Structuring Gate Rules \u2014 Talonic Docs",
10644
11479
  description: "Add or remove rules from an approval gate. Rules define thresholds like min_confidence, validation_pass_rate, and field_coverage.",
10645
11480
  content: [
11481
+ { type: "paragraph", text: "Gate rules define the quality thresholds that structuring results must meet to be auto-approved. Each rule has a type and a configuration that specifies the threshold. Rules are evaluated in `sort_order` \u2014 if any rule fails, the result is flagged and queued for manual approval." },
11482
+ {
11483
+ type: "list",
11484
+ items: [
11485
+ "**min_confidence** \u2014 requires the row-level confidence score to exceed a threshold (e.g. 0.85)",
11486
+ "**validation_pass_rate** \u2014 requires a minimum percentage of validation checks to pass",
11487
+ "**field_coverage** \u2014 requires a minimum percentage of schema fields to have non-null values"
11488
+ ]
11489
+ },
10646
11490
  {
10647
11491
  type: "endpoint",
10648
11492
  method: "POST",
@@ -10746,8 +11590,11 @@ var sections30 = [
10746
11590
  { label: "Create Gate", slug: "create-structuring-gate" },
10747
11591
  { label: "Pending Approvals", slug: "pending-approvals" }
10748
11592
  ],
10749
- faq: [],
10750
- mentions: ["gate rules", "min_confidence", "threshold"]
11593
+ faq: [
11594
+ { question: "Can I add multiple rules of the same type to a gate?", answer: "Yes. For example, you could add two `min_confidence` rules with different thresholds for different severity levels, though in practice a single rule per type is typical." },
11595
+ { question: "What happens when I remove all rules from a gate?", answer: "The gate will auto-approve all results since there are no thresholds to fail against. This is equivalent to disabling the gate without deleting it." }
11596
+ ],
11597
+ mentions: ["gate rules", "min_confidence", "threshold", "validation_pass_rate", "field_coverage"]
10751
11598
  },
10752
11599
  {
10753
11600
  slug: "result-checks",
@@ -10756,6 +11603,7 @@ var sections30 = [
10756
11603
  seoTitle: "Structuring Result Checks \u2014 Talonic Docs",
10757
11604
  description: "Get validation check outcomes for a specific structuring result showing check name, pass/fail status, and messages.",
10758
11605
  content: [
11606
+ { type: "paragraph", text: "Retrieve the validation check outcomes for a specific structuring result. Each outcome records whether a configured check passed or failed for that result, along with the check's name and severity. Use this to understand why a result was flagged or to build audit trails for data quality." },
10759
11607
  {
10760
11608
  type: "endpoint",
10761
11609
  method: "GET",
@@ -10816,8 +11664,11 @@ var sections30 = [
10816
11664
  { label: "List Checks", slug: "list-structuring-checks" },
10817
11665
  { label: "Pending Approvals", slug: "pending-approvals" }
10818
11666
  ],
10819
- faq: [],
10820
- mentions: ["result checks", "validation outcomes"]
11667
+ faq: [
11668
+ { question: "Are check outcomes generated automatically?", answer: "Yes. Check outcomes are computed automatically when a structuring result is produced. You do not need to trigger evaluation manually." },
11669
+ { question: "What does the details object contain for a failed check?", answer: "The `details` object contains type-specific failure information. For `value_range`, it includes the field name, actual value, and the configured min/max bounds. For `ai_coherence`, it includes the LLM reasoning." }
11670
+ ],
11671
+ mentions: ["result checks", "validation outcomes", "check results"]
10821
11672
  },
10822
11673
  {
10823
11674
  slug: "pending-approvals",
@@ -10826,6 +11677,8 @@ var sections30 = [
10826
11677
  seoTitle: "Pending Approvals \u2014 Talonic Docs",
10827
11678
  description: "List structuring results awaiting manual approval. These are results that did not pass all gate rules automatically.",
10828
11679
  content: [
11680
+ { type: "paragraph", text: "Retrieve the queue of structuring results that failed one or more gate rules and require manual review. Each item in the response represents a failed check outcome, linking a structuring result to the check that flagged it. Use this endpoint to build approval workflows or monitor data quality issues." },
11681
+ { type: "callout", variant: "info", text: "The pending approvals endpoint returns up to 100 items per call. If you have a high volume of flagged results, implement polling or use the `auto_approve_after_hours` gate setting to prevent queue buildup." },
10829
11682
  {
10830
11683
  type: "endpoint",
10831
11684
  method: "GET",
@@ -10882,8 +11735,11 @@ var sections30 = [
10882
11735
  { label: "Approve / Reject", slug: "approve-reject-result" },
10883
11736
  { label: "Gate Rules", slug: "gate-rules" }
10884
11737
  ],
10885
- faq: [],
10886
- mentions: ["pending approvals"]
11738
+ faq: [
11739
+ { question: "Can a single result appear multiple times in pending approvals?", answer: "Yes. If a result fails multiple checks, each failed check outcome appears as a separate item in the pending approvals list. Approving the result clears all pending items for that result." },
11740
+ { question: "How do I clear the pending approvals queue?", answer: "Approve or reject each pending result via `POST /v1/structuring/approvals/{id}/approve` or `/reject`. Alternatively, configure `auto_approve_after_hours` on the gate to auto-clear items after a timeout." }
11741
+ ],
11742
+ mentions: ["pending approvals", "approval queue", "manual review"]
10887
11743
  },
10888
11744
  {
10889
11745
  slug: "approve-reject-result",
@@ -10892,6 +11748,8 @@ var sections30 = [
10892
11748
  seoTitle: "Approve or Reject Structuring Result \u2014 Talonic Docs",
10893
11749
  description: "Approve or reject a structuring result. POST /approve approves the result; POST /reject rejects it. Both return the updated status.",
10894
11750
  content: [
11751
+ { type: "paragraph", text: "Submit an approval or rejection decision for a structuring result that is pending manual review. Approving a result triggers the gate's `on_approve` action (typically delivery). Rejecting it removes the result from the approval queue without triggering delivery. Both actions require the `gate_id` to record which gate the decision applies to." },
11752
+ { type: "callout", variant: "warning", text: "The `gate_id` parameter is required. Each decision is recorded against a specific gate, allowing multiple gates to independently control the same result." },
10895
11753
  {
10896
11754
  type: "endpoint",
10897
11755
  method: "POST",
@@ -10944,8 +11802,11 @@ var sections30 = [
10944
11802
  { label: "Pending Approvals", slug: "pending-approvals" },
10945
11803
  { label: "Trigger Delivery", slug: "trigger-delivery" }
10946
11804
  ],
10947
- faq: [],
10948
- mentions: ["approve", "reject", "structuring result"]
11805
+ faq: [
11806
+ { question: "What happens after I approve a result?", answer: "The gate's `on_approve` action fires. If set to `export`, a delivery signal is emitted for the result. Use the trigger delivery endpoint to manually control when delivery occurs for an entire run." },
11807
+ { question: "Can I approve a result that was already rejected?", answer: "Yes. Approval and rejection decisions are additive records. A subsequent approval overrides a prior rejection for the same gate." }
11808
+ ],
11809
+ mentions: ["approve", "reject", "structuring result", "approval decision"]
10949
11810
  },
10950
11811
  {
10951
11812
  slug: "trigger-delivery",
@@ -10954,6 +11815,8 @@ var sections30 = [
10954
11815
  seoTitle: "Trigger Delivery \u2014 Talonic Docs",
10955
11816
  description: "Trigger delivery for a structuring run by emitting delivery signals for all approved results. Returns delivered and skipped counts.",
10956
11817
  content: [
11818
+ { type: "paragraph", text: "Manually trigger delivery for an entire structuring run. This emits delivery signals for all approved results in the run, routing them to configured delivery destinations (webhooks, S3, SFTP, etc.). Results that have not been approved are skipped. Use this after batch-approving results or when you want explicit control over when data leaves the platform." },
11819
+ { type: "callout", variant: "warning", text: "This endpoint triggers delivery for all approved results in the run. There is no undo. Ensure all results have been reviewed before calling this endpoint in production workflows." },
10957
11820
  {
10958
11821
  type: "endpoint",
10959
11822
  method: "POST",
@@ -10995,8 +11858,11 @@ var sections30 = [
10995
11858
  related: [
10996
11859
  { label: "Approve / Reject", slug: "approve-reject-result" }
10997
11860
  ],
10998
- faq: [],
10999
- mentions: ["trigger delivery", "delivery signals"]
11861
+ faq: [
11862
+ { question: "What happens to unapproved results when I trigger delivery?", answer: "Unapproved results are silently skipped. Only results with an `approved` status are included in the delivery signals." },
11863
+ { question: "Can I trigger delivery multiple times for the same run?", answer: "Yes. Delivery is idempotent per result \u2014 each result generates a deterministic idempotency key, so duplicate signals are deduplicated by the delivery pipeline." }
11864
+ ],
11865
+ mentions: ["trigger delivery", "delivery signals", "export"]
11000
11866
  }
11001
11867
  ];
11002
11868
 
@@ -11010,6 +11876,16 @@ var sections31 = [
11010
11876
  description: "Get aggregate structuring metrics for a schema across all runs including capture hit rate, synthesize rate, strategy distribution, and tier funnel.",
11011
11877
  content: [
11012
11878
  { type: "paragraph", text: "Telemetry endpoints aggregate structuring metrics (capture hit rate, synthesize rate, strategy distribution, tier funnel) per schema or per run." },
11879
+ { type: "paragraph", text: "The schema summary returns metrics from the **latest run** for a given schema. Use it to understand how effectively the pipeline fills cells using the field registry versus LLM synthesis, and how fields distribute across resolution tiers." },
11880
+ {
11881
+ type: "list",
11882
+ items: [
11883
+ "**capture_hit_rate** \u2014 Fraction of cells filled from the field registry without LLM calls. Higher is more cost-efficient.",
11884
+ "**synthesize_rate** \u2014 Fraction of cells that required LLM synthesis (Phase 2 agent extraction).",
11885
+ "**strategy_distribution** \u2014 Breakdown by strategy: `transfer`, `extract`, `compute`, `skip`.",
11886
+ "**tier_funnel** \u2014 How cells resolved across registry tiers: `tier1` (core), `tier2` (established), `tier3` (emerging), `unresolved`."
11887
+ ]
11888
+ },
11013
11889
  {
11014
11890
  type: "endpoint",
11015
11891
  method: "GET",
@@ -11068,8 +11944,12 @@ var sections31 = [
11068
11944
  { label: "Schema Trend", slug: "schema-telemetry-trend" },
11069
11945
  { label: "Schema Fields", slug: "schema-telemetry-fields" }
11070
11946
  ],
11071
- faq: [],
11072
- mentions: ["telemetry", "capture hit rate", "synthesize rate"]
11947
+ faq: [
11948
+ { question: "What is the difference between capture_hit_rate and synthesize_rate?", answer: "Capture hit rate measures cells filled deterministically from the field registry (no LLM cost). Synthesize rate measures cells that required an LLM call to fill. The two rates plus skipped cells sum to approximately 1.0." },
11949
+ { question: "Which run does the summary reflect?", answer: "The summary always reflects the most recent completed run for the schema. To see metrics from a specific run, use the Run Summary endpoint instead." },
11950
+ { question: "What do the strategy_distribution values mean?", answer: "`transfer` means the value was copied from the field registry, `extract` means LLM extraction was used, `compute` means a deterministic formula produced the value, and `skip` means the field was intentionally left empty." }
11951
+ ],
11952
+ mentions: ["telemetry", "capture hit rate", "synthesize rate", "strategy distribution", "tier funnel"]
11073
11953
  },
11074
11954
  {
11075
11955
  slug: "schema-telemetry-trend",
@@ -11078,6 +11958,8 @@ var sections31 = [
11078
11958
  seoTitle: "Schema Telemetry Trend \u2014 Talonic Docs",
11079
11959
  description: "Get metric trends over time for a schema. Returns time-series telemetry data across recent runs for tracking quality changes.",
11080
11960
  content: [
11961
+ { type: "paragraph", text: "Track how structuring metrics evolve over successive runs for a schema. This endpoint returns a **time-series** of telemetry snapshots, allowing you to detect quality improvements, regressions, or shifts in strategy distribution as your field registry matures." },
11962
+ { type: "callout", variant: "info", text: "A rising `capture_hit_rate` over time indicates the field registry is learning from extractions and resolving more fields deterministically, reducing LLM costs." },
11081
11963
  {
11082
11964
  type: "endpoint",
11083
11965
  method: "GET",
@@ -11167,8 +12049,11 @@ var sections31 = [
11167
12049
  { label: "Schema Summary", slug: "schema-telemetry-summary" },
11168
12050
  { label: "Run Summary", slug: "run-telemetry-summary" }
11169
12051
  ],
11170
- faq: [],
11171
- mentions: ["telemetry trend", "time-series"]
12052
+ faq: [
12053
+ { question: "How many runs does the trend include by default?", answer: "The default window is 10 runs. Use the `window` query parameter to request up to 50 recent runs." },
12054
+ { question: "What does a decreasing synthesize_rate indicate?", answer: "A decreasing synthesize rate means more fields are being resolved from the registry without LLM calls. This is the expected trajectory as the field registry accumulates data from successive extractions." }
12055
+ ],
12056
+ mentions: ["telemetry trend", "time-series", "quality tracking"]
11172
12057
  },
11173
12058
  {
11174
12059
  slug: "schema-telemetry-fields",
@@ -11177,6 +12062,8 @@ var sections31 = [
11177
12062
  seoTitle: "Schema Field Telemetry \u2014 Talonic Docs",
11178
12063
  description: "Get per-field structuring metrics for a schema including field-level state distribution, capture rates, and strategy breakdown.",
11179
12064
  content: [
12065
+ { type: "paragraph", text: "Drill down to **individual field performance** within a schema. This endpoint returns per-field capture rates, synthesis rates, the most common strategy used, and the distribution of cell states (filled, empty, skipped). Use it to identify underperforming fields that may need instruction tuning or manual review." },
12066
+ { type: "callout", variant: "info", text: "Fields with a high `synthesize_rate` and low `capture_rate` are candidates for field registry enrichment or instruction refinement to reduce LLM dependency." },
11180
12067
  {
11181
12068
  type: "endpoint",
11182
12069
  method: "GET",
@@ -11240,10 +12127,14 @@ var sections31 = [
11240
12127
  }
11241
12128
  ],
11242
12129
  related: [
11243
- { label: "Schema Summary", slug: "schema-telemetry-summary" }
12130
+ { label: "Schema Summary", slug: "schema-telemetry-summary" },
12131
+ { label: "Schema Trend", slug: "schema-telemetry-trend" }
11244
12132
  ],
11245
- faq: [],
11246
- mentions: ["field telemetry", "capture rates"]
12133
+ faq: [
12134
+ { question: "What does a high empty count in state_distribution mean?", answer: "A high empty count means the field could not be extracted from many documents. This may indicate the field is not present in those document types, or the extraction instructions need refinement." },
12135
+ { question: "How is the strategy field determined?", answer: "The `strategy` field shows the most frequently used resolution strategy for that field across all documents in the latest run. Possible values are `transfer`, `extract`, `compute`, and `skip`." }
12136
+ ],
12137
+ mentions: ["field telemetry", "capture rates", "per-field metrics", "state distribution"]
11247
12138
  },
11248
12139
  {
11249
12140
  slug: "run-telemetry-summary",
@@ -11252,6 +12143,8 @@ var sections31 = [
11252
12143
  seoTitle: "Run Telemetry Summary \u2014 Talonic Docs",
11253
12144
  description: "Get aggregate structuring metrics for a single job run including strategy distribution, tier funnel, and capture hit rate.",
11254
12145
  content: [
12146
+ { type: "paragraph", text: "Retrieve structuring telemetry for a **specific job run** rather than the latest run for a schema. Use this when you need to inspect the performance of a particular execution, compare two runs side by side, or debug a run that produced unexpected results." },
12147
+ { type: "callout", variant: "info", text: "The response shape is identical to the Schema Summary endpoint. The only difference is that this endpoint targets a specific run by ID instead of returning the latest run for a schema." },
11255
12148
  {
11256
12149
  type: "endpoint",
11257
12150
  method: "GET",
@@ -11310,8 +12203,11 @@ var sections31 = [
11310
12203
  { label: "Schema Summary", slug: "schema-telemetry-summary" },
11311
12204
  { label: "Schema Trend", slug: "schema-telemetry-trend" }
11312
12205
  ],
11313
- faq: [],
11314
- mentions: ["run telemetry"]
12206
+ faq: [
12207
+ { question: "How do I compare two runs?", answer: "Call this endpoint twice with different run IDs and compare the `capture_hit_rate`, `synthesize_rate`, and `strategy_distribution` values to identify improvements or regressions." },
12208
+ { question: "Can I get run telemetry for a run that is still in progress?", answer: "No. Telemetry is computed after a run completes. Runs with status `pending` or `running` will return a 404 until they finish." }
12209
+ ],
12210
+ mentions: ["run telemetry", "run comparison"]
11315
12211
  }
11316
12212
  ];
11317
12213
 
@@ -11325,6 +12221,8 @@ var sections32 = [
11325
12221
  description: "List ground-truth datasets used for measuring extraction accuracy. Each dataset contains manually verified document-field value pairs.",
11326
12222
  content: [
11327
12223
  { type: "paragraph", text: "Validation runs measure extraction accuracy against ground-truth datasets. Manage datasets and runs, and retrieve per-document and per-field accuracy results. Create a ground-truth set, then run validations to compare extracted values against expected values." },
12224
+ { type: "paragraph", text: "A **ground-truth dataset** is a collection of manually verified document-field value pairs that serve as the benchmark for accuracy measurement. Each dataset can be scoped to a specific schema via `user_schema_id`, or left unscoped for cross-schema validation." },
12225
+ { type: "callout", variant: "info", text: "Ground-truth datasets are reusable. Create a dataset once and run validations against it repeatedly as you refine your schemas and extraction pipeline." },
11328
12226
  {
11329
12227
  type: "endpoint",
11330
12228
  method: "GET",
@@ -11381,8 +12279,11 @@ var sections32 = [
11381
12279
  { label: "Get Ground-Truth", slug: "get-ground-truth" },
11382
12280
  { label: "Create Validation Run", slug: "create-validation-run" }
11383
12281
  ],
11384
- faq: [],
11385
- mentions: ["ground truth", "validation dataset"]
12282
+ faq: [
12283
+ { question: "Can I create ground-truth datasets without a schema?", answer: "Yes. The `user_schema_id` field is optional. Unscoped datasets can be used for cross-schema validation or when you want to validate specific fields regardless of schema." },
12284
+ { question: "How many entries should a ground-truth dataset have?", answer: "There is no minimum or maximum. For statistically meaningful accuracy results, aim for at least 20-30 document-field pairs covering the fields you care about most." }
12285
+ ],
12286
+ mentions: ["ground truth", "validation dataset", "accuracy benchmark"]
11386
12287
  },
11387
12288
  {
11388
12289
  slug: "get-ground-truth",
@@ -11391,6 +12292,8 @@ var sections32 = [
11391
12292
  seoTitle: "Manage Ground-Truth Dataset \u2014 Talonic Docs",
11392
12293
  description: "Get detail with expected values or delete a ground-truth dataset. Supports GET (read scope) and DELETE (write scope) on the same path.",
11393
12294
  content: [
12295
+ { type: "paragraph", text: "Retrieve the full details of a ground-truth dataset including all expected value entries, or permanently delete the dataset. The GET response includes every document-field pair with the expected value, which you can use to audit the benchmark data before running a validation." },
12296
+ { type: "callout", variant: "warning", text: "Deleting a ground-truth dataset also removes all associated expected value entries. Existing validation runs that used this dataset are retained but can no longer be re-run." },
11394
12297
  {
11395
12298
  type: "endpoint",
11396
12299
  method: "GET",
@@ -11462,10 +12365,14 @@ var sections32 = [
11462
12365
  }
11463
12366
  ],
11464
12367
  related: [
11465
- { label: "List Ground-Truth", slug: "list-ground-truth" }
12368
+ { label: "List Ground-Truth", slug: "list-ground-truth" },
12369
+ { label: "Create Validation Run", slug: "create-validation-run" }
11466
12370
  ],
11467
- faq: [],
11468
- mentions: ["ground truth dataset"]
12371
+ faq: [
12372
+ { question: "Does deleting a ground-truth dataset delete its validation runs?", answer: "No. Validation runs and their results are retained. However, you cannot create new validation runs against a deleted dataset." },
12373
+ { question: "Can I update individual expected values in a dataset?", answer: "Expected values are managed as part of the dataset. To change values, delete the dataset and recreate it with the corrected entries." }
12374
+ ],
12375
+ mentions: ["ground truth dataset", "expected values"]
11469
12376
  },
11470
12377
  {
11471
12378
  slug: "list-validation-runs",
@@ -11474,6 +12381,8 @@ var sections32 = [
11474
12381
  seoTitle: "List Validation Runs \u2014 Talonic Docs",
11475
12382
  description: "List validation runs that compare extraction results against ground-truth datasets. Requires read scope.",
11476
12383
  content: [
12384
+ { type: "paragraph", text: "List all validation runs for your organization, ordered by most recent first. A **validation run** compares the structured output of a job run against a ground-truth dataset to produce per-field and overall accuracy scores." },
12385
+ { type: "paragraph", text: "Each run includes its status (`pending`, `running`, `completed`, `failed`), the overall accuracy score (available once completed), and links to the detailed results. Use this to track validation history and identify accuracy trends." },
11477
12386
  {
11478
12387
  type: "endpoint",
11479
12388
  method: "GET",
@@ -11537,8 +12446,11 @@ var sections32 = [
11537
12446
  { label: "Create Validation Run", slug: "create-validation-run" },
11538
12447
  { label: "Get Validation Run", slug: "get-validation-run" }
11539
12448
  ],
11540
- faq: [],
11541
- mentions: ["validation runs"]
12449
+ faq: [
12450
+ { question: "How many validation runs are returned?", answer: "Up to 100 runs are returned, ordered by `created_at` descending. There is no pagination \u2014 all runs are included in a single response." },
12451
+ { question: "What does an accuracy of null mean?", answer: "A null accuracy indicates the run has not completed yet. The score is populated only when the run reaches `completed` status." }
12452
+ ],
12453
+ mentions: ["validation runs", "accuracy tracking"]
11542
12454
  },
11543
12455
  {
11544
12456
  slug: "create-validation-run",
@@ -11547,6 +12459,9 @@ var sections32 = [
11547
12459
  seoTitle: "Create Validation Run \u2014 Talonic Docs",
11548
12460
  description: "Create a validation run comparing a job against a ground-truth dataset. Measures per-document and per-field extraction accuracy.",
11549
12461
  content: [
12462
+ { type: "paragraph", text: "Start a new validation run that compares the output of a **job run** against a **ground-truth dataset**. The validation engine compares each extracted value to the expected value, computing exact match, fuzzy match, and similarity scores. An optional LLM judge provides a semantic verdict for ambiguous cases." },
12463
+ { type: "paragraph", text: "Validation runs start in `pending` status and move to `running` as comparisons are performed. Once complete, the `accuracy` field contains the overall score and per-field results are available via the Results endpoint." },
12464
+ { type: "callout", variant: "info", text: "Both `golden_sample_id` and `dataspace_run_id` must belong to your organization. The API returns 404 if either resource is not found." },
11550
12465
  {
11551
12466
  type: "endpoint",
11552
12467
  method: "POST",
@@ -11625,8 +12540,11 @@ var sections32 = [
11625
12540
  { label: "List Validation Runs", slug: "list-validation-runs" },
11626
12541
  { label: "Get Validation Results", slug: "get-validation-results" }
11627
12542
  ],
11628
- faq: [],
11629
- mentions: ["create validation run", "accuracy measurement"]
12543
+ faq: [
12544
+ { question: "How long does a validation run take?", answer: "Most validation runs complete within seconds. The duration depends on the number of document-field pairs in the ground-truth dataset and whether the LLM judge is invoked for ambiguous comparisons." },
12545
+ { question: "Can I run validation against the same dataset multiple times?", answer: "Yes. You can create multiple validation runs against the same ground-truth dataset with different job runs to track accuracy improvements over time." }
12546
+ ],
12547
+ mentions: ["create validation run", "accuracy measurement", "LLM judge"]
11630
12548
  },
11631
12549
  {
11632
12550
  slug: "get-validation-run",
@@ -11635,6 +12553,7 @@ var sections32 = [
11635
12553
  seoTitle: "Manage Validation Run \u2014 Talonic Docs",
11636
12554
  description: "Get validation run detail with accuracy summary or delete a run. Supports GET (read scope) and DELETE (write scope) on the same path.",
11637
12555
  content: [
12556
+ { type: "paragraph", text: "Retrieve the full details of a validation run including its status, accuracy score, and total comparisons. Or permanently delete a run and its associated results. Use GET to poll a run's status until it reaches `completed`, then fetch the detailed results." },
11638
12557
  {
11639
12558
  type: "endpoint",
11640
12559
  method: "GET",
@@ -11701,8 +12620,11 @@ var sections32 = [
11701
12620
  { label: "List Validation Runs", slug: "list-validation-runs" },
11702
12621
  { label: "Get Validation Results", slug: "get-validation-results" }
11703
12622
  ],
11704
- faq: [],
11705
- mentions: ["validation run detail"]
12623
+ faq: [
12624
+ { question: "Does deleting a validation run delete the ground-truth dataset?", answer: "No. Deleting a run only removes the run record and its per-field results. The ground-truth dataset and the job run remain intact." },
12625
+ { question: "How do I poll for run completion?", answer: "Call `GET /v1/validation/runs/{id}` and check the `status` field. Poll until it changes from `pending` or `running` to `completed` or `failed`." }
12626
+ ],
12627
+ mentions: ["validation run detail", "run status polling"]
11706
12628
  },
11707
12629
  {
11708
12630
  slug: "get-validation-results",
@@ -11711,6 +12633,16 @@ var sections32 = [
11711
12633
  seoTitle: "Validation Results \u2014 Talonic Docs",
11712
12634
  description: "Get per-field validation results including overall accuracy, per-field accuracy, match type, similarity scores, and judge verdicts.",
11713
12635
  content: [
12636
+ { type: "paragraph", text: "Retrieve the granular, per-field comparison results for a completed validation run. Each result entry shows the **expected value**, **actual extracted value**, **match type** (exact, fuzzy, or no_match), a **similarity score**, and an optional **LLM judge verdict** for ambiguous cases." },
12637
+ { type: "paragraph", text: "Use the `judged_only=true` parameter to filter results to only those where the LLM judge was invoked. This is useful for reviewing cases where simple string comparison was insufficient and semantic judgment was required." },
12638
+ {
12639
+ type: "list",
12640
+ items: [
12641
+ "**exact** \u2014 The extracted value matches the expected value character-for-character.",
12642
+ "**fuzzy** \u2014 The values are similar but not identical (e.g. different formatting, minor typos).",
12643
+ "**no_match** \u2014 The extracted value does not match the expected value."
12644
+ ]
12645
+ },
11714
12646
  {
11715
12647
  type: "endpoint",
11716
12648
  method: "GET",
@@ -11782,8 +12714,12 @@ var sections32 = [
11782
12714
  { label: "Get Validation Run", slug: "get-validation-run" },
11783
12715
  { label: "List Ground-Truth", slug: "list-ground-truth" }
11784
12716
  ],
11785
- faq: [],
11786
- mentions: ["validation results", "accuracy", "judge verdict"]
12717
+ faq: [
12718
+ { question: "What is the difference between match_type and judge_verdict?", answer: "The `match_type` is a deterministic string comparison (exact, fuzzy, no_match). The `judge_verdict` is an LLM-based semantic assessment (`correct`, `incorrect`, `partial`) invoked for ambiguous cases where string comparison alone is insufficient." },
12719
+ { question: "When is the LLM judge invoked?", answer: "The judge is invoked for fuzzy matches and edge cases where the similarity score falls in an ambiguous range. Exact matches and clear no-matches do not trigger the judge." },
12720
+ { question: "Can I filter results by document or field?", answer: "The `judged_only` parameter filters by judge involvement. To filter by document or field, retrieve the full results and filter client-side." }
12721
+ ],
12722
+ mentions: ["validation results", "accuracy", "judge verdict", "similarity score", "match type"]
11787
12723
  }
11788
12724
  ];
11789
12725
 
@@ -11797,6 +12733,16 @@ var sections33 = [
11797
12733
  description: "Get the current credit balance for the authenticated customer including currency and timestamp. Requires read scope.",
11798
12734
  content: [
11799
12735
  { type: "paragraph", text: "Credit endpoints expose the current balance, transaction history, aggregate usage summaries, daily usage, and a per-request usage log with model and token counts. Track credit balance and usage breakdowns by operation type and time period." },
12736
+ { type: "paragraph", text: "The balance endpoint returns an **enriched** view of your credit account, including the EUR-equivalent balance, a 30-day burn rate, projected runway in days, and your current API tier. Use this to monitor consumption and plan capacity." },
12737
+ {
12738
+ type: "list",
12739
+ items: [
12740
+ "**balance_credits** \u2014 Current credit balance as an integer.",
12741
+ "**burn_rate_30d_credits** \u2014 Credits consumed in the last 30 days for trend analysis.",
12742
+ "**projected_runway_days** \u2014 Estimated days remaining at the current burn rate.",
12743
+ "**tier** \u2014 Your current API tier (e.g. `free`, `starter`, `growth`), which determines rate limits and features."
12744
+ ]
12745
+ },
11800
12746
  {
11801
12747
  type: "endpoint",
11802
12748
  method: "GET",
@@ -11844,8 +12790,12 @@ var sections33 = [
11844
12790
  { label: "History", slug: "credits-history" },
11845
12791
  { label: "Usage Summary", slug: "credits-usage" }
11846
12792
  ],
11847
- faq: [],
11848
- mentions: ["credit balance"]
12793
+ faq: [
12794
+ { question: "What does projected_runway_days: -1 mean?", answer: "A value of -1 indicates that your burn rate over the last 30 days is zero, so runway cannot be projected. This typically means no API calls were made during the period." },
12795
+ { question: "When does the API tier reset?", answer: "Tiers reset on the 1st of each month at midnight UTC. The exact reset timestamp is returned in the `tier_resets_at` field." },
12796
+ { question: "How is the EUR balance calculated?", answer: "The `balance_eur` is computed by dividing `balance_credits` by a configured credits-per-EUR rate. This rate is fixed and does not fluctuate." }
12797
+ ],
12798
+ mentions: ["credit balance", "burn rate", "API tier", "runway"]
11849
12799
  },
11850
12800
  {
11851
12801
  slug: "credits-history",
@@ -11854,6 +12804,8 @@ var sections33 = [
11854
12804
  seoTitle: "Credits History Endpoint \u2014 Talonic Docs",
11855
12805
  description: "Get credit transaction history including purchases, deductions, and adjustments with page-based pagination.",
11856
12806
  content: [
12807
+ { type: "paragraph", text: "Retrieve a chronological log of every credit transaction on your account. Transactions include **purchases** (positive amounts), **consumption deductions** (negative amounts), **bonuses**, and **manual adjustments**. Use this to audit spending and reconcile usage." },
12808
+ { type: "callout", variant: "info", text: "Transactions are ordered by most recent first. Each entry includes the `operation_type` that triggered it (e.g. `extraction`, `manual`), making it easy to trace costs back to specific pipeline operations." },
11857
12809
  {
11858
12810
  type: "endpoint",
11859
12811
  method: "GET",
@@ -11921,8 +12873,11 @@ var sections33 = [
11921
12873
  { label: "Balance", slug: "credits-balance" },
11922
12874
  { label: "Usage Summary", slug: "credits-usage" }
11923
12875
  ],
11924
- faq: [],
11925
- mentions: ["credit history", "transactions"]
12876
+ faq: [
12877
+ { question: "How do I distinguish between a purchase and a deduction?", answer: "The `amount` field is positive for purchases and bonuses, and negative for consumption deductions. The `type` field also explicitly labels each transaction." },
12878
+ { question: "What operation types are tracked?", answer: "Common operation types include `extraction`, `document_ai_ocr`, `matching`, and `manual`. The exact set depends on which pipeline operations your account uses." }
12879
+ ],
12880
+ mentions: ["credit history", "transactions", "consumption", "purchase"]
11926
12881
  },
11927
12882
  {
11928
12883
  slug: "credits-usage",
@@ -11931,6 +12886,8 @@ var sections33 = [
11931
12886
  seoTitle: "Credits Usage Summary \u2014 Talonic Docs",
11932
12887
  description: "Get aggregate credit usage summary broken down by operation type and model for a configurable time period.",
11933
12888
  content: [
12889
+ { type: "paragraph", text: "Get a high-level view of your API usage grouped by **operation type** and **model**. This endpoint aggregates call counts, token consumption, and estimated costs over a configurable lookback period. Use it to understand which operations drive your spending." },
12890
+ { type: "callout", variant: "info", text: "Cost estimates include all token classes: input tokens, output tokens, cache creation tokens, and cache read tokens. Each is priced at the model-specific rate." },
11934
12891
  {
11935
12892
  type: "endpoint",
11936
12893
  method: "GET",
@@ -12004,8 +12961,11 @@ var sections33 = [
12004
12961
  { label: "Daily Usage", slug: "credits-usage-daily" },
12005
12962
  { label: "Usage Log", slug: "credits-usage-log" }
12006
12963
  ],
12007
- faq: [],
12008
- mentions: ["usage summary", "credit breakdown"]
12964
+ faq: [
12965
+ { question: "Why does document_ai_ocr show zero tokens?", answer: "Document AI OCR (Mistral) is not token-based. Its cost is calculated from `pages_processed * cost_per_page`. Token fields are 0 because they do not apply to this operation type." },
12966
+ { question: "Can I change the lookback period?", answer: "Yes. Pass the `days` query parameter to set the lookback window. The default is 30 days. There is no maximum limit." }
12967
+ ],
12968
+ mentions: ["usage summary", "credit breakdown", "token consumption", "cost estimation"]
12009
12969
  },
12010
12970
  {
12011
12971
  slug: "credits-usage-daily",
@@ -12014,6 +12974,7 @@ var sections33 = [
12014
12974
  seoTitle: "Credits Daily Usage \u2014 Talonic Docs",
12015
12975
  description: "Get per-day credit usage breakdown for the specified period (default last 30 days) with call counts and token totals per day.",
12016
12976
  content: [
12977
+ { type: "paragraph", text: "Get a per-day breakdown of API usage over a configurable period. Each entry includes the total number of API calls, input/output token counts, and estimated cost for that calendar date. Use this for usage trend analysis and daily cost monitoring." },
12017
12978
  {
12018
12979
  type: "endpoint",
12019
12980
  method: "GET",
@@ -12076,8 +13037,11 @@ var sections33 = [
12076
13037
  { label: "Usage Summary", slug: "credits-usage" },
12077
13038
  { label: "Usage Log", slug: "credits-usage-log" }
12078
13039
  ],
12079
- faq: [],
12080
- mentions: ["daily usage"]
13040
+ faq: [
13041
+ { question: "Are days with zero usage included in the response?", answer: "Days with no API calls are omitted from the response array. Only dates with at least one recorded call are returned." },
13042
+ { question: "What timezone are the daily dates in?", answer: "Dates are in UTC. A call made at 23:59 UTC on September 14 appears under `2024-09-14`, not the caller's local date." }
13043
+ ],
13044
+ mentions: ["daily usage", "usage trend", "daily cost"]
12081
13045
  },
12082
13046
  {
12083
13047
  slug: "credits-usage-log",
@@ -12086,6 +13050,9 @@ var sections33 = [
12086
13050
  seoTitle: "Credits Usage Log \u2014 Talonic Docs",
12087
13051
  description: "Get a detailed per-request usage log with operation type, model, input/output token counts, and cost estimates for each API call.",
12088
13052
  content: [
13053
+ { type: "paragraph", text: "Retrieve a detailed log of individual API requests with per-request token counts, model information, and cost estimates. This is the most granular usage view available, showing every LLM call and OCR request made by your account." },
13054
+ { type: "paragraph", text: "Each log entry links back to the originating document (when applicable) via the `document_id` field, allowing you to trace costs to specific documents in your pipeline." },
13055
+ { type: "callout", variant: "info", text: "The usage log is ordered by most recent first. Use page-based pagination to browse historical entries." },
12089
13056
  {
12090
13057
  type: "endpoint",
12091
13058
  method: "GET",
@@ -12159,8 +13126,11 @@ var sections33 = [
12159
13126
  { label: "Usage Summary", slug: "credits-usage" },
12160
13127
  { label: "Balance", slug: "credits-balance" }
12161
13128
  ],
12162
- faq: [],
12163
- mentions: ["usage log", "token counts", "cost estimation"]
13129
+ faq: [
13130
+ { question: "What are cache_read_tokens?", answer: "Cache read tokens represent prompt cache hits where previously cached input was reused instead of being reprocessed. These are charged at a significantly lower rate than regular input tokens." },
13131
+ { question: "Why is document_id null for some entries?", answer: "Operations that are not tied to a specific document (e.g. schema generation, field resolution) have a null `document_id`. Document-level operations like extraction always include the document reference." }
13132
+ ],
13133
+ mentions: ["usage log", "token counts", "cost estimation", "cache tokens", "per-request"]
12164
13134
  }
12165
13135
  ];
12166
13136
 
@@ -12174,6 +13144,14 @@ var sections34 = [
12174
13144
  description: "Get a comprehensive workspace overview including document stats, schemas, active runs, field registry summary, and recent activity for AI integrations.",
12175
13145
  content: [
12176
13146
  { type: "paragraph", text: "The Agent API provides programmatic access to the same AI assistant capabilities available in the Talonic platform UI. Use the context endpoint to retrieve a comprehensive workspace snapshot, and the tools endpoint to discover all available agent capabilities." },
13147
+ { type: "paragraph", text: "The workspace context is useful for building external dashboards, feeding context into custom AI integrations, or monitoring workspace health. The response includes document processing stats, schema summaries, active extraction runs, field registry tier distribution, and up to 15 recent activity events." },
13148
+ { type: "list", ordered: false, items: [
13149
+ "**Document stats** \u2014 total count, completed this week/24h, currently processing",
13150
+ "**Schemas** \u2014 user-defined schemas with field counts and versions",
13151
+ "**Active runs** \u2014 in-flight extraction runs with status and document counts",
13152
+ "**Field registry** \u2014 total fields and distribution across Tier 1, 2, and 3",
13153
+ "**Recent activity** \u2014 up to 15 events with type, message, timestamp, and actor"
13154
+ ] },
12177
13155
  {
12178
13156
  type: "endpoint",
12179
13157
  method: "GET",
@@ -12252,9 +13230,11 @@ var sections34 = [
12252
13230
  { label: "List Agent Tools", slug: "agent-tools" }
12253
13231
  ],
12254
13232
  faq: [
12255
- { question: "What does the agent context endpoint return?", answer: "A comprehensive workspace snapshot including document stats, schemas, active runs, field registry summary, and recent activity." }
13233
+ { question: "What does the agent context endpoint return?", answer: "A comprehensive workspace snapshot including document stats, schemas, active runs, field registry summary, and recent activity." },
13234
+ { question: "How often is the workspace context updated?", answer: "The context is computed on each request from live data. There is no caching, so the response always reflects the current workspace state." },
13235
+ { question: "Can I use the agent context to feed an external LLM?", answer: "Yes. The context endpoint is designed for this use case. Pass the response as system context to any LLM to give it awareness of your workspace state, document types, and schemas." }
12256
13236
  ],
12257
- mentions: ["agent context", "workspace overview", "AI integration"]
13237
+ mentions: ["agent context", "workspace overview", "AI integration", "dashboard data"]
12258
13238
  },
12259
13239
  {
12260
13240
  slug: "agent-tools",
@@ -12263,6 +13243,8 @@ var sections34 = [
12263
13243
  seoTitle: "Agent Tools Endpoint \u2014 Talonic Docs",
12264
13244
  description: "List all tools available to the embedded agent including their impact level (read/write) and descriptions for discovering agent capabilities.",
12265
13245
  content: [
13246
+ { type: "paragraph", text: "Discover all tools available to the embedded AI agent. Each tool declares its **impact level** \u2014 whether it performs a read-only operation or a mutation \u2014 so you can build permission-aware integrations. Use this endpoint to dynamically generate tool descriptions for external AI agents or to audit available capabilities." },
13247
+ { type: "callout", variant: "info", text: "Impact levels follow a severity scale: `read` (no side effects), `draft_mutation` (creates drafts only), `live_mutation` (modifies live data), and `irreversible` (permanent changes like deletion). Use these to implement confirmation gates in your integration." },
12266
13248
  {
12267
13249
  type: "endpoint",
12268
13250
  method: "GET",
@@ -12310,8 +13292,11 @@ var sections34 = [
12310
13292
  related: [
12311
13293
  { label: "Get Workspace Context", slug: "agent-context" }
12312
13294
  ],
12313
- faq: [],
12314
- mentions: ["agent tools", "capabilities"]
13295
+ faq: [
13296
+ { question: "Can I invoke agent tools directly via the API?", answer: "The tools endpoint lists available capabilities. Tool invocation happens through the embedded agent chat interface or via the Claude API proxy, not through direct REST calls to individual tools." },
13297
+ { question: "What impact levels are available?", answer: "Four levels: `read` (safe, no side effects), `draft_mutation` (creates drafts), `live_mutation` (modifies live data), and `irreversible` (permanent changes). Use these to build confirmation gates." }
13298
+ ],
13299
+ mentions: ["agent tools", "capabilities", "impact levels", "tool discovery"]
12315
13300
  }
12316
13301
  ];
12317
13302
 
@@ -12325,6 +13310,16 @@ var sections35 = [
12325
13310
  description: "List all matching configurations for the current workspace. Each config defines field mappings, strategies, and weights used to match documents against reference data.",
12326
13311
  content: [
12327
13312
  { type: "paragraph", text: "Matching connects incoming documents to reference datasets using configurable field-level strategies. Each configuration defines which fields to compare, how to compare them (exact, fuzzy, date range, numeric range), and the relative weight of each field in the overall confidence score." },
13313
+ { type: "paragraph", text: "A typical matching workflow is: upload reference data (CSV or XLSX), create a config with field mappings, run matching, and review results. For complex datasets, use the **generate strategy** endpoint to let AI recommend optimal field mappings and weights." },
13314
+ {
13315
+ type: "list",
13316
+ items: [
13317
+ "Each config targets a single reference dataset.",
13318
+ "Field mappings define source-to-target column pairs with a comparison strategy.",
13319
+ "Weights must sum to 1.0 across all field mappings.",
13320
+ "The confidence threshold (default 0.85) controls the minimum score for a match."
13321
+ ]
13322
+ },
12328
13323
  {
12329
13324
  type: "endpoint",
12330
13325
  method: "GET",
@@ -12414,7 +13409,8 @@ var sections35 = [
12414
13409
  { label: "Run Matching", slug: "run-matching" }
12415
13410
  ],
12416
13411
  faq: [
12417
- { question: "What is a matching configuration?", answer: "A matching configuration defines which fields to compare between documents and reference data, what comparison strategy to use per field (exact, fuzzy, date_range, numeric_range), and how much weight each field carries in the overall confidence score." }
13412
+ { question: "What is a matching configuration?", answer: "A matching configuration defines which fields to compare between documents and reference data, what comparison strategy to use per field (exact, fuzzy, date_range, numeric_range), and how much weight each field carries in the overall confidence score." },
13413
+ { question: "Can I have multiple configs for the same reference dataset?", answer: "Yes. You can create multiple configs targeting the same reference dataset with different field mappings, strategies, or thresholds to test different matching approaches." }
12418
13414
  ],
12419
13415
  mentions: ["matching", "reference data", "field mapping"]
12420
13416
  },
@@ -12425,6 +13421,17 @@ var sections35 = [
12425
13421
  seoTitle: "Create Matching Config \u2014 Talonic Docs",
12426
13422
  description: "Create a matching configuration with field mappings, comparison strategies (exact, fuzzy, date_range, numeric_range), and per-field weights that sum to 1.0.",
12427
13423
  content: [
13424
+ { type: "paragraph", text: "Create a matching configuration that defines how documents are compared against a reference dataset. Each field mapping specifies a source field (from extracted documents), a target column (in the reference data), a comparison strategy, and a relative weight." },
13425
+ { type: "callout", variant: "info", text: "Field weights should sum to 1.0. The overall confidence score for a match is the weighted sum of per-field scores. Use the **generate strategy** endpoint to get AI-recommended mappings if you are unsure which fields and weights to use." },
13426
+ {
13427
+ type: "list",
13428
+ items: [
13429
+ "**exact** \u2014 case-insensitive string equality. Best for codes, IDs, and standardized values.",
13430
+ "**fuzzy** \u2014 Levenshtein/token similarity. Handles name variations and minor typos.",
13431
+ "**date_range** \u2014 date proximity within a configurable tolerance window.",
13432
+ "**numeric_range** \u2014 numeric proximity within a configurable tolerance. Handles rounding differences."
13433
+ ]
13434
+ },
12428
13435
  {
12429
13436
  type: "endpoint",
12430
13437
  method: "POST",
@@ -12522,7 +13529,9 @@ var sections35 = [
12522
13529
  { label: "Generate Strategy", slug: "generate-strategy" }
12523
13530
  ],
12524
13531
  faq: [
12525
- { question: "What strategies are available for field matching?", answer: "Four strategies: exact (case-insensitive equality), fuzzy (Levenshtein/token similarity), date_range (date proximity within tolerance), and numeric_range (numeric proximity within tolerance)." }
13532
+ { question: "What strategies are available for field matching?", answer: "Four strategies: exact (case-insensitive equality), fuzzy (Levenshtein/token similarity), date_range (date proximity within tolerance), and numeric_range (numeric proximity within tolerance)." },
13533
+ { question: "Do field weights need to sum to exactly 1.0?", answer: "Weights should sum to 1.0 for meaningful confidence scores. If they do not sum to 1.0, the system normalizes them internally, but explicitly setting weights to sum to 1.0 gives you predictable confidence values." },
13534
+ { question: "Can I use the same reference dataset column in multiple mappings?", answer: "Yes. A single target column can appear in multiple field mappings with different source fields and strategies, which is useful when multiple document fields might correspond to the same reference column." }
12526
13535
  ],
12527
13536
  mentions: ["matching config", "field strategy", "weights"]
12528
13537
  },
@@ -12533,6 +13542,7 @@ var sections35 = [
12533
13542
  seoTitle: "Manage Matching Config \u2014 Talonic Docs",
12534
13543
  description: "Get matching configuration details, update field mappings and weights, or delete a configuration. Deleting a config does not remove past run results.",
12535
13544
  content: [
13545
+ { type: "paragraph", text: "Retrieve, update, or delete a matching configuration. Updates to field mappings and thresholds take effect on the next run \u2014 they do not retroactively change past results. Deleting a config removes the configuration but preserves all historical run results for audit purposes." },
12536
13546
  {
12537
13547
  type: "endpoint",
12538
13548
  method: "GET",
@@ -12673,7 +13683,10 @@ var sections35 = [
12673
13683
  { label: "List Configs", slug: "list-matching-configs" },
12674
13684
  { label: "Run Matching", slug: "run-matching" }
12675
13685
  ],
12676
- faq: [],
13686
+ faq: [
13687
+ { question: "Can I change the reference_data_id on an existing config?", answer: "No. The reference dataset is fixed at creation. To match against a different dataset, create a new configuration." },
13688
+ { question: "Does updating a config affect past run results?", answer: "No. Updates only affect future runs. Past results are immutable and retain the field mappings and thresholds that were active when the run executed." }
13689
+ ],
12677
13690
  mentions: ["matching config management"]
12678
13691
  },
12679
13692
  {
@@ -12683,6 +13696,9 @@ var sections35 = [
12683
13696
  seoTitle: "Run Matching \u2014 Talonic Docs",
12684
13697
  description: "Execute a matching run against reference data asynchronously, or use smart-run for AI-assisted matching that auto-tunes strategies. Poll run status via the run detail endpoint.",
12685
13698
  content: [
13699
+ { type: "paragraph", text: "Execute a matching run to compare documents against the reference dataset defined in the config. Runs are **asynchronous** \u2014 the endpoint returns immediately with a `queued` status. Poll the run detail endpoint to track progress and retrieve results." },
13700
+ { type: "paragraph", text: "The **smart-run** variant uses a pre-generated AI strategy that automatically tunes comparison thresholds and field priorities. This can significantly improve accuracy on datasets with heterogeneous formatting, inconsistent naming, or mixed-language content." },
13701
+ { type: "callout", variant: "info", text: "Matching runs are processed via a BullMQ job queue. Large datasets may take several minutes. Use `GET /v1/matching/runs/:id/progress` to poll for live progress updates while the run is active." },
12686
13702
  {
12687
13703
  type: "endpoint",
12688
13704
  method: "POST",
@@ -12791,7 +13807,9 @@ var sections35 = [
12791
13807
  { label: "Results", slug: "get-matching-results" }
12792
13808
  ],
12793
13809
  faq: [
12794
- { question: "What is the difference between run and smart-run?", answer: "A standard run uses the strategies and weights defined in the config. A smart-run uses a pre-generated strategy with AI-tuned comparison thresholds, which can improve accuracy on heterogeneous datasets." }
13810
+ { question: "What is the difference between run and smart-run?", answer: "A standard run uses the strategies and weights defined in the config. A smart-run uses a pre-generated strategy with AI-tuned comparison thresholds, which can improve accuracy on heterogeneous datasets." },
13811
+ { question: "How long does a matching run take?", answer: "Runtime depends on the number of documents and reference rows. For a typical dataset of a few hundred documents against a few thousand reference rows, expect 1-5 minutes. Large datasets may take longer." },
13812
+ { question: "Can I run matching on a subset of documents?", answer: "Currently, runs match all unmatched documents in the workspace. Document-level filtering is not yet supported \u2014 use the results endpoint to filter outcomes after the run completes." }
12795
13813
  ],
12796
13814
  mentions: ["matching run", "smart matching", "async"]
12797
13815
  },
@@ -12802,6 +13820,8 @@ var sections35 = [
12802
13820
  seoTitle: "Matching Run Detail \u2014 Talonic Docs",
12803
13821
  description: "Get the status, progress, and summary of a matching run. Status progresses from queued to running to completed or failed.",
12804
13822
  content: [
13823
+ { type: "paragraph", text: "Retrieve the current state of a matching run. Poll this endpoint while `status` is `queued` or `running` to track progress. Once `completed`, the response includes the top 50 results by confidence. Use the results endpoint for full paginated access." },
13824
+ { type: "callout", variant: "info", text: "The `ai_resolving` status indicates that the run has finished standard matching and is now running an AI resolution pass on low-confidence rows. This pass uses Claude Haiku to disambiguate borderline matches." },
12805
13825
  {
12806
13826
  type: "endpoint",
12807
13827
  method: "GET",
@@ -12889,7 +13909,10 @@ var sections35 = [
12889
13909
  { label: "Results", slug: "get-matching-results" },
12890
13910
  { label: "Run Matching", slug: "run-matching" }
12891
13911
  ],
12892
- faq: [],
13912
+ faq: [
13913
+ { question: "Why does the run detail only show 50 results?", answer: "The run detail endpoint includes the top 50 results by confidence for quick inspection. Use GET /v1/matching/runs/:id/results with pagination for the full result set." },
13914
+ { question: "What does the ai_resolving status mean?", answer: "The run has completed standard field-level matching and is now running an AI resolution pass (using Claude Haiku) on rows with low confidence scores. This can upgrade borderline matches or confirm non-matches." }
13915
+ ],
12893
13916
  mentions: ["matching run status", "progress"]
12894
13917
  },
12895
13918
  {
@@ -12899,6 +13922,8 @@ var sections35 = [
12899
13922
  seoTitle: "Matching Results \u2014 Talonic Docs",
12900
13923
  description: "Retrieve matching results for a completed run. Returns the top 5 candidates per document with weighted confidence scores and per-field evidence breakdowns.",
12901
13924
  content: [
13925
+ { type: "paragraph", text: "Retrieve the full paginated results for a completed matching run. Each result represents a document matched (or unmatched) against the reference dataset, with a weighted confidence score and per-field evidence breakdown showing how each field contributed to the overall score." },
13926
+ { type: "callout", variant: "info", text: "Results with `status: pending` have not been reviewed. Use `POST /v1/matching/runs/:runId/results/:resultId/review` to approve or reject individual matches. Approved matches can be used downstream for data enrichment and reconciliation workflows." },
12902
13927
  {
12903
13928
  type: "endpoint",
12904
13929
  method: "GET",
@@ -12977,7 +14002,9 @@ var sections35 = [
12977
14002
  { label: "List Configs", slug: "list-matching-configs" }
12978
14003
  ],
12979
14004
  faq: [
12980
- { question: "How is the confidence score calculated?", answer: "Each field produces a strategy-specific score (0-1). The overall confidence is the weighted sum of per-field scores, using the weights defined in the matching config." }
14005
+ { question: "How is the confidence score calculated?", answer: "Each field produces a strategy-specific score (0-1). The overall confidence is the weighted sum of per-field scores, using the weights defined in the matching config." },
14006
+ { question: "What does null matched_reference_row_id mean?", answer: "A null matched_reference_row_id means no reference row scored above the configured threshold for this document. The document is effectively unmatched." },
14007
+ { question: "Can I review results programmatically?", answer: "Yes. Use POST /v1/matching/runs/:runId/results/:resultId/review with a status of approved or rejected to programmatically review match results." }
12981
14008
  ],
12982
14009
  mentions: ["matching results", "confidence score", "evidence"]
12983
14010
  },
@@ -12988,6 +14015,9 @@ var sections35 = [
12988
14015
  seoTitle: "Generate Matching Strategy \u2014 Talonic Docs",
12989
14016
  description: "Use AI to auto-suggest field mappings and strategies for a reference dataset. Analyses column names, data types, and sample values to recommend optimal matching configurations.",
12990
14017
  content: [
14018
+ { type: "paragraph", text: "Let AI analyse your reference dataset and recommend field mappings, comparison strategies, and weights. The strategy generator examines column names, data types, and sample values to produce an optimal matching configuration. Use the generated strategy as a starting point or pass it directly to a smart-run." },
14019
+ { type: "paragraph", text: 'Each generated strategy includes a `rationale` per field mapping explaining why the AI chose that strategy and weight. You can optionally provide a `user_prompt` with natural language guidance (e.g. "prioritise date matching" or "vendor name is the most important field") to steer the generation.' },
14020
+ { type: "callout", variant: "info", text: "Strategy generation does not modify any existing configs. The generated strategy is saved independently and can be applied to a config via the smart-run endpoint, or used as a reference when manually creating a config." },
12991
14021
  {
12992
14022
  type: "endpoint",
12993
14023
  method: "POST",
@@ -13060,9 +14090,14 @@ var sections35 = [
13060
14090
  }
13061
14091
  ],
13062
14092
  related: [
13063
- { label: "Create Config", slug: "create-matching-config" }
14093
+ { label: "Create Config", slug: "create-matching-config" },
14094
+ { label: "Run Smart Matching", slug: "run-matching" }
14095
+ ],
14096
+ faq: [
14097
+ { question: "Does strategy generation cost credits?", answer: "Yes. Strategy generation uses AI (Claude) to analyse columns and sample data. The cost is minimal \u2014 typically a single Haiku call \u2014 and is reflected in your credit usage." },
14098
+ { question: "Can I regenerate a strategy with different guidance?", answer: "Yes. Call the generate endpoint again with a different user_prompt. Each call creates a new strategy version. Previous versions are retained." },
14099
+ { question: "How do I use a generated strategy?", answer: "Pass the strategy ID to POST /v1/matching/configs/:id/smart-run to execute a run using the AI-generated thresholds and weights. Alternatively, copy the field_mappings from the strategy into a new config via POST /v1/matching/configs." }
13064
14100
  ],
13065
- faq: [],
13066
14101
  mentions: ["AI strategy", "auto-suggest", "field mapping"]
13067
14102
  }
13068
14103
  ];
@@ -13077,6 +14112,18 @@ var sections36 = [
13077
14112
  description: "List all configured delivery destinations for the workspace. Destinations define where processed data is sent \u2014 webhook, SFTP, S3, Azure Blob, Google Drive, or OneDrive.",
13078
14113
  content: [
13079
14114
  { type: "paragraph", text: "Delivery routes processed data to external systems through a four-part pipeline: **signals** (domain events) are matched to **bindings** (routing rules), resolved into **deliverables** (payloads), serialized, and sent to **destinations** (connectors). Destinations define the transport protocol and credentials." },
14115
+ { type: "paragraph", text: "Each destination is an instance of one of the six supported connector types. A single destination can serve multiple bindings \u2014 for example, you might have one S3 destination with separate bindings for extraction results and run outcomes. Auth credentials are stored securely and never returned in API responses." },
14116
+ {
14117
+ type: "list",
14118
+ items: [
14119
+ "**webhook** \u2014 HTTP POST with optional HMAC-SHA256 signing and idempotency headers.",
14120
+ "**sftp** \u2014 File upload via SSH with password or private key auth.",
14121
+ "**s3** \u2014 Object upload to AWS S3 with access key auth.",
14122
+ "**azure_blob** \u2014 Blob upload to Azure Storage with connection string or account key.",
14123
+ "**google_drive** \u2014 File upload via OAuth (`drive.file` scope).",
14124
+ "**onedrive** \u2014 File upload via OAuth (`Files.ReadWrite.All` scope)."
14125
+ ]
14126
+ },
13080
14127
  {
13081
14128
  type: "endpoint",
13082
14129
  method: "GET",
@@ -13156,7 +14203,9 @@ var sections36 = [
13156
14203
  { label: "List Bindings", slug: "list-delivery-bindings" }
13157
14204
  ],
13158
14205
  faq: [
13159
- { question: "What destination types are supported?", answer: "Six connector types: webhook (HTTP POST with HMAC-SHA256 signing), sftp, s3, azure_blob, google_drive (OAuth), and onedrive (OAuth)." }
14206
+ { question: "What destination types are supported?", answer: "Six connector types: webhook (HTTP POST with HMAC-SHA256 signing), sftp, s3, azure_blob, google_drive (OAuth), and onedrive (OAuth)." },
14207
+ { question: "Why is my destination marked as inactive?", answer: "Destinations are automatically deactivated when an auth failure or SSRF block occurs during delivery. Fix the credentials or URL, then re-enable via PUT /v1/delivery/destinations/:id." },
14208
+ { question: "Are auth credentials returned in the response?", answer: "No. Auth credentials are never returned in API responses. The has_auth_config and has_signing_secret boolean fields indicate whether credentials are configured." }
13160
14209
  ],
13161
14210
  mentions: ["delivery", "destinations", "connectors"]
13162
14211
  },
@@ -13167,6 +14216,8 @@ var sections36 = [
13167
14216
  seoTitle: "Create Delivery Destination \u2014 Talonic Docs",
13168
14217
  description: "Create a delivery destination with connector type, transport config, and authentication. Supported types: webhook, sftp, s3, azure_blob, google_drive, onedrive.",
13169
14218
  content: [
14219
+ { type: "paragraph", text: "Create a new delivery destination by specifying the connector type, transport configuration, and optional authentication. The `config` and `auth_config` schemas vary by destination type \u2014 see the catalog endpoint for connector capabilities." },
14220
+ { type: "callout", variant: "info", text: "OAuth-based destinations (google_drive, onedrive) require completing an OAuth flow before creating the destination. Use the OAuth start endpoint to initiate the flow and obtain tokens." },
13170
14221
  {
13171
14222
  type: "endpoint",
13172
14223
  method: "POST",
@@ -13254,8 +14305,11 @@ var sections36 = [
13254
14305
  { label: "List Destinations", slug: "list-delivery-destinations" },
13255
14306
  { label: "Test Destination", slug: "manage-delivery-destination" }
13256
14307
  ],
13257
- faq: [],
13258
- mentions: ["destination creation", "webhook", "HMAC"]
14308
+ faq: [
14309
+ { question: "How does webhook signing work?", answer: 'When a signing_secret is configured, every delivery includes an X-Talonic-Signature header with format t=<timestamp>,v1=<hex>. The signature is HMAC-SHA256 of the signing secret over "<timestamp>.<body>". Verify this on your server to confirm authenticity.' },
14310
+ { question: "What is the payload cap?", answer: "The global payload cap is 5 MiB per delivery. You can override this per destination with payload_cap_bytes. Payloads exceeding the cap are rejected with a non-retryable payload_too_large error." }
14311
+ ],
14312
+ mentions: ["destination creation", "webhook", "HMAC", "signing"]
13259
14313
  },
13260
14314
  {
13261
14315
  slug: "manage-delivery-destination",
@@ -13264,6 +14318,8 @@ var sections36 = [
13264
14318
  seoTitle: "Manage Delivery Destination \u2014 Talonic Docs",
13265
14319
  description: "Get destination details, update config, delete a destination, or send a test payload to verify connectivity. Auth credentials are always redacted in responses.",
13266
14320
  content: [
14321
+ { type: "paragraph", text: "Manage a single destination: retrieve its current config, update transport settings or credentials, delete it, or test connectivity. The **test** endpoint probes the destination without delivering real data \u2014 file-drop connectors (S3, SFTP, Azure Blob) verify bucket/container reachability without writing any objects." },
14322
+ { type: "callout", variant: "warning", text: "Deleting a destination cascades to all its bindings, delivery items, and DLQ entries. This is irreversible. Disable the destination (`is_active: false`) instead if you want to preserve history." },
13267
14323
  {
13268
14324
  type: "endpoint",
13269
14325
  method: "GET",
@@ -13453,10 +14509,14 @@ var sections36 = [
13453
14509
  }
13454
14510
  ],
13455
14511
  related: [
13456
- { label: "List Destinations", slug: "list-delivery-destinations" }
14512
+ { label: "List Destinations", slug: "list-delivery-destinations" },
14513
+ { label: "Create Destination", slug: "create-delivery-destination" }
13457
14514
  ],
13458
- faq: [],
13459
- mentions: ["destination management", "test delivery"]
14515
+ faq: [
14516
+ { question: "Does the test endpoint write data to the destination?", answer: "No. File-drop connectors (S3, SFTP, Azure Blob, Google Drive, OneDrive) use lightweight probes (HeadBucket, list, getProperties) that verify reachability without creating any objects. Webhook destinations receive a small synthetic payload." },
14517
+ { question: "Can I re-enable a destination that was auto-disabled?", answer: "Yes. Fix the underlying issue (expired credentials, unreachable URL), then update the destination with the corrected config. The destination will be re-enabled automatically, or you can explicitly set is_active: true." }
14518
+ ],
14519
+ mentions: ["destination management", "test delivery", "cascade delete"]
13460
14520
  },
13461
14521
  {
13462
14522
  slug: "list-delivery-bindings",
@@ -13466,6 +14526,7 @@ var sections36 = [
13466
14526
  description: "List all delivery bindings that route signals to destinations. Each binding maps an event type to a deliverable resolver, serializer format, and destination.",
13467
14527
  content: [
13468
14528
  { type: "paragraph", text: "A binding connects the four parts of the delivery pipeline: a **signal filter** (which events to listen for), a **deliverable type** (what payload to build), a **serializer format** (how to encode it), and a **destination** (where to send it). The compatibility triangle is enforced on creation." },
14529
+ { type: "paragraph", text: "Bindings are the core routing configuration for delivery. When a domain event (e.g. `document.extracted`) fires, the poller matches it against all active bindings. Each matching binding produces a separate delivery attempt \u2014 so a single event can fan out to multiple destinations simultaneously." },
13469
14530
  {
13470
14531
  type: "endpoint",
13471
14532
  method: "GET",
@@ -13533,7 +14594,8 @@ var sections36 = [
13533
14594
  { label: "Catalog", slug: "delivery-catalog" }
13534
14595
  ],
13535
14596
  faq: [
13536
- { question: "What is the compatibility triangle?", answer: "When creating a binding, the system verifies that the signal event type, deliverable resolver, and serializer format are mutually compatible. For example, a CSV serializer cannot serialize a graph deliverable." }
14597
+ { question: "What is the compatibility triangle?", answer: "When creating a binding, the system verifies that the signal event type, deliverable resolver, and serializer format are mutually compatible. For example, a CSV serializer cannot serialize a graph deliverable." },
14598
+ { question: "Can multiple bindings target the same destination?", answer: "Yes. A single destination can serve many bindings with different signal filters, deliverable types, and serializer formats. Each binding produces independent delivery attempts." }
13537
14599
  ],
13538
14600
  mentions: ["bindings", "signal filter", "compatibility triangle"]
13539
14601
  },
@@ -13544,6 +14606,8 @@ var sections36 = [
13544
14606
  seoTitle: "Create Delivery Binding \u2014 Talonic Docs",
13545
14607
  description: "Create a delivery binding that routes domain signals through a deliverable resolver and serializer to a destination. Includes field mapping and retry policy configuration.",
13546
14608
  content: [
14609
+ { type: "paragraph", text: "Create a binding that wires a domain event to a destination. The **compatibility triangle** is validated on creation: the signal event type must be compatible with the deliverable resolver, the serializer must support the deliverable shape, and the connector must support the serializer format." },
14610
+ { type: "callout", variant: "info", text: "Use the catalog endpoints (`/v1/delivery/catalog/*`) to discover valid combinations before creating a binding. The catalog lists all available signals, deliverables, serializers, and connectors with their compatibility constraints." },
13547
14611
  {
13548
14612
  type: "endpoint",
13549
14613
  method: "POST",
@@ -13631,8 +14695,11 @@ var sections36 = [
13631
14695
  { label: "List Bindings", slug: "list-delivery-bindings" },
13632
14696
  { label: "Catalog", slug: "delivery-catalog" }
13633
14697
  ],
13634
- faq: [],
13635
- mentions: ["binding creation", "field map", "delivery policy"]
14698
+ faq: [
14699
+ { question: "What is the default retry policy?", answer: "By default, deliveries are retried up to 7 times with an exponential backoff schedule: 0s, 30s, 2m, 8m, 30m, 2h, 8h. Override this with the delivery_policy field." },
14700
+ { question: "What is the field_map for?", answer: "The field_map applies a JSONPath projection to the resolved payload before serialization. Use it to rename fields, drop internal fields, or add static values. If omitted, the full payload is delivered as-is." }
14701
+ ],
14702
+ mentions: ["binding creation", "field map", "delivery policy", "retry"]
13636
14703
  },
13637
14704
  {
13638
14705
  slug: "manage-delivery-binding",
@@ -13641,6 +14708,7 @@ var sections36 = [
13641
14708
  seoTitle: "Manage Delivery Binding \u2014 Talonic Docs",
13642
14709
  description: "Get binding details, update signal filters or field maps, delete a binding, or preview the resolved payload for a binding without sending it.",
13643
14710
  content: [
14711
+ { type: "paragraph", text: "Manage a single delivery binding: retrieve its configuration, update the signal filter or field map, delete it, or preview the payload it would produce. Updates re-validate the compatibility triangle. Deleting a binding stops future routing but allows in-flight deliveries to complete." },
13644
14712
  {
13645
14713
  type: "endpoint",
13646
14714
  method: "GET",
@@ -13832,9 +14900,13 @@ var sections36 = [
13832
14900
  }
13833
14901
  ],
13834
14902
  related: [
13835
- { label: "List Bindings", slug: "list-delivery-bindings" }
14903
+ { label: "List Bindings", slug: "list-delivery-bindings" },
14904
+ { label: "Catalog", slug: "delivery-catalog" }
14905
+ ],
14906
+ faq: [
14907
+ { question: "Does deleting a binding affect in-flight deliveries?", answer: "No. In-flight deliveries (already queued in the delivery job queue) will complete normally. Deletion only prevents new events from being routed to this binding." },
14908
+ { question: "Is the preview endpoint live?", answer: "The public API preview endpoint currently returns a stub response. The internal preview endpoint is fully functional and walks the full resolve -> project -> serialize pipeline." }
13836
14909
  ],
13837
- faq: [],
13838
14910
  mentions: ["binding management", "preview"]
13839
14911
  },
13840
14912
  {
@@ -13844,6 +14916,8 @@ var sections36 = [
13844
14916
  seoTitle: "Delivery History \u2014 Talonic Docs",
13845
14917
  description: "View delivery attempt history with status, HTTP codes, and timing. Get detail for a single item or replay a failed delivery attempt.",
13846
14918
  content: [
14919
+ { type: "paragraph", text: "The delivery history tracks every attempt to deliver a payload to a destination. Each attempt is recorded as a **delivery item** with status, timing, HTTP response code, and optional request/response bodies. Use this endpoint to audit delivery performance and debug failures." },
14920
+ { type: "callout", variant: "info", text: "Request and response bodies are truncated to 10 KB and retained for a configurable period (default 30 days). After the retention period, bodies are nulled but metadata (status, HTTP code, duration, error code) is preserved indefinitely." },
13847
14921
  {
13848
14922
  type: "endpoint",
13849
14923
  method: "GET",
@@ -14015,8 +15089,11 @@ var sections36 = [
14015
15089
  { label: "Dead Letter Queue", slug: "delivery-dlq" },
14016
15090
  { label: "List Bindings", slug: "list-delivery-bindings" }
14017
15091
  ],
14018
- faq: [],
14019
- mentions: ["delivery history", "replay", "attempt log"]
15092
+ faq: [
15093
+ { question: "What is the idempotency key?", answer: "The idempotency key is a deterministic SHA-256 hash of the binding ID and event ID. It is sent on the wire (as an HTTP header, object metadata, or filename token depending on the connector) so receivers can deduplicate repeated deliveries." },
15094
+ { question: "How does replay differ from DLQ replay?", answer: "Item replay re-enqueues a specific (binding, event) pair with a new attempt number. DLQ replay deletes the dead-letter row and re-enqueues with attempt=1. Both preserve the same idempotency key so receivers can deduplicate." }
15095
+ ],
15096
+ mentions: ["delivery history", "replay", "attempt log", "idempotency"]
14020
15097
  },
14021
15098
  {
14022
15099
  slug: "delivery-dlq",
@@ -14026,6 +15103,8 @@ var sections36 = [
14026
15103
  description: "Inspect and manage permanently failed deliveries in the dead letter queue. Replay individual items or discard them.",
14027
15104
  content: [
14028
15105
  { type: "paragraph", text: "Deliveries that exhaust all retry attempts are moved to the dead letter queue (DLQ). Items in the DLQ can be inspected, replayed (enqueues a fresh attempt), or deleted." },
15106
+ { type: "paragraph", text: "The DLQ is the terminal state for failed deliveries. Common error codes include `connector_5xx` (destination returned a server error), `auth_failed` (invalid credentials), `ssrf_blocked` (destination URL resolves to a private network), and `payload_too_large` (payload exceeds the cap). Non-retryable errors (`auth_failed`, `ssrf_blocked`) skip the retry ladder and go directly to the DLQ." },
15107
+ { type: "callout", variant: "warning", text: "DLQ replay **deletes** the dead-letter row before enqueuing the new attempt. If the enqueue fails, the DLQ row is lost. This is an intentional trade-off: the system prefers losing a DLQ row over duplicating a delivery." },
14029
15108
  {
14030
15109
  type: "endpoint",
14031
15110
  method: "GET",
@@ -14158,9 +15237,11 @@ var sections36 = [
14158
15237
  { label: "List Destinations", slug: "list-delivery-destinations" }
14159
15238
  ],
14160
15239
  faq: [
14161
- { question: "When does a delivery end up in the DLQ?", answer: "A delivery enters the dead letter queue after exhausting all retry attempts defined in the binding delivery_policy (default 7 attempts with exponential backoff over ~10 hours)." }
15240
+ { question: "When does a delivery end up in the DLQ?", answer: "A delivery enters the dead letter queue after exhausting all retry attempts defined in the binding delivery_policy (default 7 attempts with exponential backoff over ~10 hours)." },
15241
+ { question: "What error codes indicate non-retryable failures?", answer: "auth_failed (invalid credentials), ssrf_blocked (destination URL resolves to private network), and payload_too_large (exceeds payload cap) skip the retry ladder and go directly to the DLQ after a single attempt." },
15242
+ { question: "Can I set up alerts for DLQ entries?", answer: "Yes. Create a binding with signal_filter event_type: delivery.item.failed targeting your alerting webhook. The delivery system emits meta-signals for failed deliveries, with built-in loop prevention to avoid infinite cascades." }
14162
15243
  ],
14163
- mentions: ["dead letter queue", "DLQ", "failed delivery"]
15244
+ mentions: ["dead letter queue", "DLQ", "failed delivery", "error codes"]
14164
15245
  },
14165
15246
  {
14166
15247
  slug: "delivery-catalog",
@@ -14170,6 +15251,7 @@ var sections36 = [
14170
15251
  description: "Discover available signals, deliverable types, serializer formats, and connector types from the delivery registry. Use the catalog to build valid bindings.",
14171
15252
  content: [
14172
15253
  { type: "paragraph", text: "The catalog endpoints expose the four delivery registries. Use them to discover which event types, deliverable resolvers, serializer formats, and connectors are available before creating bindings." },
15254
+ { type: "paragraph", text: "Walk the catalog **top-down** to build valid binding configurations: start with signals to pick an event type, then check which deliverables are compatible with that signal, which serializers support the deliverable shape, and which connectors accept the serializer format. This ensures every combination passes the compatibility triangle." },
14173
15255
  {
14174
15256
  type: "endpoint",
14175
15257
  method: "GET",
@@ -14362,9 +15444,11 @@ var sections36 = [
14362
15444
  { label: "List Destinations", slug: "list-delivery-destinations" }
14363
15445
  ],
14364
15446
  faq: [
14365
- { question: "How do I know which combinations are valid for a binding?", answer: "Use the catalog endpoints to check compatibility. A valid binding requires: the signal event_type exists, the deliverable type lists that signal in compatible_signals, the serializer supports the deliverable shape, and the connector supports the serializer format." }
15447
+ { question: "How do I know which combinations are valid for a binding?", answer: "Use the catalog endpoints to check compatibility. A valid binding requires: the signal event_type exists, the deliverable type lists that signal in compatible_signals, the serializer supports the deliverable shape, and the connector supports the serializer format." },
15448
+ { question: "What is the difference between record and file delivery semantics?", answer: "Record semantics (webhook) deliver one event per HTTP request. File semantics (S3, SFTP, Azure Blob, Google Drive, OneDrive) write each delivery as a separate file/object, using a configurable filename template with tokens like {event_id} and {timestamp_iso}." },
15449
+ { question: "Are all catalog entries available for use?", answer: "Most entries are live. Some deliverable types (case_snapshot, graph_relations) are registered as stubs with empty compatible_signals arrays \u2014 they appear in the catalog but cannot be used in bindings until their resolvers are implemented." }
14366
15450
  ],
14367
- mentions: ["delivery catalog", "registry discovery"]
15451
+ mentions: ["delivery catalog", "registry discovery", "compatibility"]
14368
15452
  }
14369
15453
  ];
14370
15454
 
@@ -14378,6 +15462,17 @@ var sections37 = [
14378
15462
  description: "List all extraction batches with status, item counts, and provider information. Batches group documents submitted for deferred extraction at 50% cost.",
14379
15463
  content: [
14380
15464
  { type: "paragraph", text: "Batch inference defers Claude extraction to the provider batch API at **50% cost**. Documents uploaded with `processing_mode=batch` run OCR and classification immediately but queue extraction. Batches accumulate items, submit to the provider on a timer or threshold, and poll for results." },
15465
+ { type: "paragraph", text: "Talonic supports two batch providers: the **Anthropic Message Batches API** (direct) and **AWS Bedrock** (`CreateModelInvocationJob`). The provider is auto-detected from your workspace configuration. Results are typically delivered within 24 hours, with a maximum SLA of 48 hours." },
15466
+ {
15467
+ type: "list",
15468
+ items: [
15469
+ "Documents run Stage 1 (OCR + classification + triage) immediately on upload.",
15470
+ "Stage 2 (Claude extraction) is deferred to the batch API at 50% of standard cost.",
15471
+ "Batches require a minimum of 100 items (Bedrock requirement). Smaller uploads fall back to realtime extraction.",
15472
+ "Results are polled hourly. Use the sync endpoint to check sooner."
15473
+ ]
15474
+ },
15475
+ { type: "callout", variant: "info", text: "Image-only documents (no text content) cannot enter the batch pipeline because the text-only batch payload cannot carry image bytes. These are automatically routed to realtime extraction." },
14381
15476
  {
14382
15477
  type: "endpoint",
14383
15478
  method: "GET",
@@ -14467,7 +15562,8 @@ var sections37 = [
14467
15562
  ],
14468
15563
  faq: [
14469
15564
  { question: "What are the batch statuses?", answer: "Batches progress through: accumulating (collecting items), submitted (sent to provider), completed (all results received), failed (provider error), or cancelled (manually cancelled)." },
14470
- { question: "How much does batch processing cost?", answer: "Batch processing runs at 50% of the standard extraction cost. Results are delivered within 48 hours." }
15565
+ { question: "How much does batch processing cost?", answer: "Batch processing runs at 50% of the standard extraction cost. Results are delivered within 48 hours." },
15566
+ { question: "What is the minimum batch size?", answer: "Batches require a minimum of 100 items (a Bedrock requirement enforced for both providers). Uploads below this threshold fall back to realtime extraction with a warning." }
14471
15567
  ],
14472
15568
  mentions: ["batch inference", "extraction batches", "cost reduction"]
14473
15569
  },
@@ -14478,6 +15574,8 @@ var sections37 = [
14478
15574
  seoTitle: "Get Batch Detail \u2014 Talonic Docs",
14479
15575
  description: "Get detailed information for a single extraction batch including item counts, provider, status, and timing. Shows per-item breakdown when the batch is completed.",
14480
15576
  content: [
15577
+ { type: "paragraph", text: "Retrieve the full batch record including per-item status. Poll this endpoint while `status` is `submitted` to track progress. Once `completed`, each item shows its individual outcome and processing timestamp." },
15578
+ { type: "callout", variant: "info", text: "Items that fail extraction in the batch are retried via **realtime** extraction (never re-batched) to preserve the original 48-hour SLA. Check `items[].status` for per-document outcomes." },
14481
15579
  {
14482
15580
  type: "endpoint",
14483
15581
  method: "GET",
@@ -14561,8 +15659,11 @@ var sections37 = [
14561
15659
  { label: "List Batches", slug: "list-batches" },
14562
15660
  { label: "Sync Batch", slug: "sync-batch" }
14563
15661
  ],
14564
- faq: [],
14565
- mentions: ["batch detail", "provider metadata"]
15662
+ faq: [
15663
+ { question: "What happens to failed batch items?", answer: "Items that fail during batch processing are retried via realtime extraction (not re-batched). The document status transitions from batch_queued to the standard extraction pipeline." },
15664
+ { question: "How long do batch results take?", answer: "Results are typically delivered within 24 hours. The maximum SLA is 48 hours. Use the sync endpoint to poll the provider for results before the automatic hourly check." }
15665
+ ],
15666
+ mentions: ["batch detail", "provider metadata", "per-item status"]
14566
15667
  },
14567
15668
  {
14568
15669
  slug: "sync-batch",
@@ -14571,6 +15672,7 @@ var sections37 = [
14571
15672
  seoTitle: "Sync Batch with Provider \u2014 Talonic Docs",
14572
15673
  description: "Force a sync with the provider to check for batch results. Useful when you do not want to wait for the hourly automatic poll.",
14573
15674
  content: [
15675
+ { type: "paragraph", text: "Force an immediate check with the batch provider (Anthropic or Bedrock) for results. By default, batches are polled automatically every hour. Use this endpoint when you need results sooner or want to verify the current provider-side status." },
14574
15676
  {
14575
15677
  type: "endpoint",
14576
15678
  method: "POST",
@@ -14635,7 +15737,8 @@ var sections37 = [
14635
15737
  { label: "Cancel Batch", slug: "cancel-batch" }
14636
15738
  ],
14637
15739
  faq: [
14638
- { question: "How often are batches polled automatically?", answer: "The system polls the provider every hour for results. Use the sync endpoint to trigger an immediate check." }
15740
+ { question: "How often are batches polled automatically?", answer: "The system polls the provider every hour for results. Use the sync endpoint to trigger an immediate check." },
15741
+ { question: "Can I sync a batch that is still accumulating?", answer: "Yes, but it will have no effect since the batch has not been submitted to the provider yet. The status will remain accumulating." }
14639
15742
  ],
14640
15743
  mentions: ["batch sync", "provider poll"]
14641
15744
  },
@@ -14646,6 +15749,7 @@ var sections37 = [
14646
15749
  seoTitle: "Cancel Extraction Batch \u2014 Talonic Docs",
14647
15750
  description: "Cancel an in-progress extraction batch. Only batches in accumulating or submitted status can be cancelled. Completed batches cannot be rolled back.",
14648
15751
  content: [
15752
+ { type: "paragraph", text: "Cancel a batch that is still `accumulating` or `submitted`. Cancellation sends a stop request to the provider if the batch was already submitted. Documents in the cancelled batch revert to `batch_queued` status and can be resubmitted or processed via realtime extraction." },
14649
15753
  {
14650
15754
  type: "endpoint",
14651
15755
  method: "POST",
@@ -14710,7 +15814,10 @@ var sections37 = [
14710
15814
  { label: "List Batches", slug: "list-batches" },
14711
15815
  { label: "Batch Detail", slug: "get-batch" }
14712
15816
  ],
14713
- faq: [],
15817
+ faq: [
15818
+ { question: "Can I cancel a completed batch?", answer: "No. Only batches in accumulating or submitted status can be cancelled. Completed, failed, and already-cancelled batches return a 400 error." },
15819
+ { question: "What happens to documents after cancellation?", answer: "Documents revert to batch_queued status. You can resubmit them in a new batch or process them via realtime extraction by updating their processing_mode." }
15820
+ ],
14714
15821
  mentions: ["batch cancellation"]
14715
15822
  }
14716
15823
  ];
@@ -14725,6 +15832,15 @@ var sections38 = [
14725
15832
  description: "List cases with optional status and search filters. Supports pagination for large result sets. Cases group related documents via shared entities.",
14726
15833
  content: [
14727
15834
  { type: "paragraph", text: "Cases group documents that share entities discovered through linking. Use filters to narrow results by status or search term." },
15835
+ { type: "paragraph", text: "A **case** is a cluster of documents connected by shared field values in the linking graph. The linking engine discovers entity overlaps \u2014 such as matching vendor IDs, policy numbers, or account references \u2014 and groups the connected documents into a single case for review." },
15836
+ {
15837
+ type: "list",
15838
+ items: [
15839
+ "Filter by `search` to find cases by label or document content.",
15840
+ "Use `min_documents` to exclude trivial single-document cases.",
15841
+ "Cases are created automatically \u2014 there is no manual create endpoint."
15842
+ ]
15843
+ },
14728
15844
  {
14729
15845
  type: "endpoint",
14730
15846
  method: "GET",
@@ -14788,7 +15904,9 @@ var sections38 = [
14788
15904
  { label: "Case Anomalies", slug: "case-anomalies" }
14789
15905
  ],
14790
15906
  faq: [
14791
- { question: "How are cases created?", answer: "Cases are created automatically when the linking engine discovers shared entities across two or more documents." }
15907
+ { question: "How are cases created?", answer: "Cases are created automatically when the linking engine discovers shared entities across two or more documents." },
15908
+ { question: "Can I create a case manually?", answer: "No. Cases are discovered automatically through the linking graph. You can manually adjust case membership using the merge, split, pin, and remove document endpoints." },
15909
+ { question: "What is a case key?", answer: "A case key is a hex hash (8-64 characters) that uniquely identifies a case within your workspace. It is derived from the linked entity values that form the case." }
14792
15910
  ],
14793
15911
  mentions: ["cases", "linking", "documents"]
14794
15912
  },
@@ -14799,6 +15917,8 @@ var sections38 = [
14799
15917
  seoTitle: "Get Case Endpoint \u2014 Talonic Docs",
14800
15918
  description: "Retrieve a case by its key (e.g. CASE-001) including linked documents, shared entities, AI-generated narration, label, and anomaly count.",
14801
15919
  content: [
15920
+ { type: "paragraph", text: "Retrieve the full detail of a case including its documents, AI-generated narrative summary, and anomaly count. The narrative is generated by Claude and summarizes the relationships between documents in the case." },
15921
+ { type: "callout", variant: "info", text: "The `narrative` field is generated on demand via `POST /v1/cases/:key/narrate`. It will be `null` until narration is triggered for this case." },
14802
15922
  {
14803
15923
  type: "endpoint",
14804
15924
  method: "GET",
@@ -14881,7 +16001,10 @@ var sections38 = [
14881
16001
  { label: "Case Operations", slug: "case-operations" },
14882
16002
  { label: "Case Evidence", slug: "case-evidence" }
14883
16003
  ],
14884
- faq: [],
16004
+ faq: [
16005
+ { question: "What is the case narrative?", answer: "The narrative is an AI-generated summary produced by Claude that describes the relationships, patterns, and key facts across the documents in the case. It is generated on demand via POST /v1/cases/:key/narrate." },
16006
+ { question: "Why is the narrative field null?", answer: "Narration is generated on demand. Call POST /v1/cases/:key/narrate to generate a narrative, then retrieve it with this endpoint." }
16007
+ ],
14885
16008
  mentions: ["case detail", "narration", "shared entities"]
14886
16009
  },
14887
16010
  {
@@ -14892,6 +16015,17 @@ var sections38 = [
14892
16015
  description: "List anomalies detected within a case with severity filtering and dismissed toggle. Dismiss individual anomalies after review.",
14893
16016
  content: [
14894
16017
  { type: "paragraph", text: "Anomalies are structural issues detected across the documents in a case \u2014 field conflicts, duplicate key divergence, value reuse patterns, and more." },
16018
+ { type: "paragraph", text: "Five **structural detectors** run automatically when cases are materialized. They surface inconsistencies that may indicate data quality issues, duplicate submissions, or missing documents within a case." },
16019
+ {
16020
+ type: "list",
16021
+ items: [
16022
+ "**Validation cluster** \u2014 groups of fields that consistently fail validation together.",
16023
+ "**Field conflict** \u2014 contradictory values for the same field across documents.",
16024
+ "**Duplicate key divergence** \u2014 documents share a key but differ on other fields.",
16025
+ "**Missing document type** \u2014 a case is missing an expected document type based on templates.",
16026
+ "**Value reuse** \u2014 the same value appears in unrelated fields across documents."
16027
+ ]
16028
+ },
14895
16029
  {
14896
16030
  type: "endpoint",
14897
16031
  method: "GET",
@@ -14975,9 +16109,11 @@ var sections38 = [
14975
16109
  { label: "Case Evidence", slug: "case-evidence" }
14976
16110
  ],
14977
16111
  faq: [
14978
- { question: "What types of anomalies are detected?", answer: "Five structural detectors: validation cluster, field conflict, duplicate key divergence, missing document type, and value reuse." }
16112
+ { question: "What types of anomalies are detected?", answer: "Five structural detectors: validation cluster, field conflict, duplicate key divergence, missing document type, and value reuse." },
16113
+ { question: "Can I un-dismiss an anomaly?", answer: "Dismissal is a one-way operation. If a similar anomaly is detected again during case recomputation, it will appear as a new anomaly entry." },
16114
+ { question: "How are anomaly severities assigned?", answer: "Severity is assigned by each detector based on the scope of the issue. For example, a field conflict across many documents is rated higher than a conflict in just two." }
14979
16115
  ],
14980
- mentions: ["anomalies", "severity", "dismiss"]
16116
+ mentions: ["anomalies", "severity", "dismiss", "structural detectors"]
14981
16117
  },
14982
16118
  {
14983
16119
  slug: "case-evidence",
@@ -14986,6 +16122,8 @@ var sections38 = [
14986
16122
  seoTitle: "Case Evidence Endpoint \u2014 Talonic Docs",
14987
16123
  description: "List evidence items within a case. Filter by validation status, source document, category, or free-text search across evidence fields.",
14988
16124
  content: [
16125
+ { type: "paragraph", text: "Evidence items are the extracted field values from documents in a case, annotated with validation status and confidence scores. Use evidence to audit the data quality within a case and understand which fields link documents together." },
16126
+ { type: "callout", variant: "info", text: "Evidence is produced by the evidence validation engine, which runs rule-based validators (structural checks, checksum validation, domain packs) against extracted values. Each evidence item records the validation outcome for a specific field on a specific document." },
14989
16127
  {
14990
16128
  type: "endpoint",
14991
16129
  method: "GET",
@@ -15052,8 +16190,11 @@ var sections38 = [
15052
16190
  { label: "Get Case", slug: "get-case" },
15053
16191
  { label: "Case Anomalies", slug: "case-anomalies" }
15054
16192
  ],
15055
- faq: [],
15056
- mentions: ["evidence", "validation", "case evidence"]
16193
+ faq: [
16194
+ { question: "What is the difference between evidence and anomalies?", answer: "Evidence items are individual field values with validation status and confidence. Anomalies are higher-level structural issues detected across multiple evidence items, such as field conflicts or duplicate key divergence." },
16195
+ { question: "How is the confidence score computed?", answer: "Confidence is assigned during extraction and reflects how certain the AI is about the extracted value. It ranges from 0 (low confidence) to 1 (high confidence) and is independent of the validation status." }
16196
+ ],
16197
+ mentions: ["evidence", "validation", "case evidence", "confidence"]
15057
16198
  },
15058
16199
  {
15059
16200
  slug: "case-operations",
@@ -15063,6 +16204,8 @@ var sections38 = [
15063
16204
  description: "Update the status of a case. These operations modify case metadata.",
15064
16205
  content: [
15065
16206
  { type: "paragraph", text: "Operations that modify a case: update the case status (lifecycle management)." },
16207
+ { type: "paragraph", text: "Cases follow a lifecycle: `discovered` &rarr; `confirmed` &rarr; `active` &rarr; `resolved`. Transition the status as your team reviews and processes the case. Resolved cases can include optional notes documenting the resolution." },
16208
+ { type: "callout", variant: "info", text: "Status transitions are not strictly enforced \u2014 you can move a case to any valid status. However, setting `resolved` records a `resolved_at` timestamp that cannot be cleared by reverting to an earlier status." },
15066
16209
  {
15067
16210
  type: "endpoint",
15068
16211
  method: "PATCH",
@@ -15116,8 +16259,11 @@ var sections38 = [
15116
16259
  { label: "Get Case", slug: "get-case" },
15117
16260
  { label: "Case Merge & Split", slug: "case-merge-split" }
15118
16261
  ],
15119
- faq: [],
15120
- mentions: ["status", "case operations"]
16262
+ faq: [
16263
+ { question: "What are the valid case statuses?", answer: "Four lifecycle statuses: discovered (initial), confirmed (verified as real), active (being worked), and resolved (complete). The typical flow is discovered -> confirmed -> active -> resolved." },
16264
+ { question: "Can I reopen a resolved case?", answer: "Yes, you can set the status back to active or confirmed. However, the resolved_at timestamp will remain set from the original resolution." }
16265
+ ],
16266
+ mentions: ["status", "case operations", "lifecycle"]
15121
16267
  },
15122
16268
  {
15123
16269
  slug: "case-edges",
@@ -15127,6 +16273,8 @@ var sections38 = [
15127
16273
  description: "List, confirm, or reject linking edges within a case. Edges represent shared-value connections between documents in the linking graph.",
15128
16274
  content: [
15129
16275
  { type: "paragraph", text: "Edges are the connections between documents in a case, created by the linking engine when documents share field values. Confirm or reject edges to refine case membership." },
16276
+ { type: "paragraph", text: "Each edge connects two documents through a shared field value \u2014 for example, both documents might contain the same `vendor_id`. The `final_score` reflects how strong the connection is, based on learned field-pair weights. Confirming or rejecting edges feeds back into the weight-learning system to improve future case discovery." },
16277
+ { type: "callout", variant: "warning", text: "Rejecting an edge may split the case if the rejected edge was the only connection between a subset of documents. The affected documents will form a separate case." },
15130
16278
  {
15131
16279
  type: "endpoint",
15132
16280
  method: "GET",
@@ -15225,9 +16373,10 @@ var sections38 = [
15225
16373
  { label: "Linking", slug: "linking-overview" }
15226
16374
  ],
15227
16375
  faq: [
15228
- { question: "What happens when I reject an edge?", answer: "The connection between the two documents is removed. If this disconnects a document from all others in the case, it may be removed from the case." }
16376
+ { question: "What happens when I reject an edge?", answer: "The connection between the two documents is removed. If this disconnects a document from all others in the case, it may be removed from the case." },
16377
+ { question: "Do edge confirmations and rejections affect future cases?", answer: "Yes. Confirmations and rejections update the learned field-pair weights (FieldBindingWeight), which adjusts edge scores in future case discovery runs. This creates a feedback loop that improves case quality over time." }
15229
16378
  ],
15230
- mentions: ["edges", "linking", "confirm", "reject"]
16379
+ mentions: ["edges", "linking", "confirm", "reject", "field-pair weights"]
15231
16380
  },
15232
16381
  {
15233
16382
  slug: "case-documents",
@@ -15236,6 +16385,8 @@ var sections38 = [
15236
16385
  seoTitle: "Case Documents Endpoints \u2014 Talonic Docs",
15237
16386
  description: "Pin or remove documents within a case. Pinned documents are highlighted in the case view and preserved during case operations.",
15238
16387
  content: [
16388
+ { type: "paragraph", text: "Manage document membership within a case. **Pin** a document to mark it as important \u2014 pinned documents are highlighted in the UI and preserved during split operations. **Remove** a document to detach it from the case entirely." },
16389
+ { type: "callout", variant: "info", text: "Removing a document from a case does not delete the document itself. The document remains in your workspace and may be re-linked into a case during the next recompute cycle if linking edges still exist." },
15239
16390
  {
15240
16391
  type: "endpoint",
15241
16392
  method: "POST",
@@ -15292,7 +16443,10 @@ var sections38 = [
15292
16443
  { label: "Get Case", slug: "get-case" },
15293
16444
  { label: "Case Edges", slug: "case-edges" }
15294
16445
  ],
15295
- faq: [],
16446
+ faq: [
16447
+ { question: "Does removing a document delete it?", answer: "No. Removing a document only detaches it from the case. The document remains in your workspace and can still appear in other cases or be re-linked in a future recompute." },
16448
+ { question: "What does pinning a document do?", answer: "Pinning highlights the document in the case view and ensures it is preserved in the original partition during split operations. It does not affect linking or anomaly detection." }
16449
+ ],
15296
16450
  mentions: ["pin", "documents", "case documents"]
15297
16451
  },
15298
16452
  {
@@ -15303,6 +16457,8 @@ var sections38 = [
15303
16457
  description: "Split a case into two cases or merge two cases into one. Restructure case boundaries when automatic grouping needs adjustment.",
15304
16458
  content: [
15305
16459
  { type: "paragraph", text: "When the automatic case grouping does not match your needs, split a case into two partitions or merge two cases together." },
16460
+ { type: "paragraph", text: "These operations let you manually adjust case boundaries. **Splitting** divides a case by assigning each document to one of two partitions. **Merging** combines all documents from two cases into a single case. Both operations preserve linking edges and re-evaluate anomalies on the resulting cases." },
16461
+ { type: "callout", variant: "warning", text: "When merging, `case_key_b` is resolved (set to `resolved` status) and its documents are moved into `case_key_a`. The merged case key is derived from case A." },
15306
16462
  {
15307
16463
  type: "endpoint",
15308
16464
  method: "POST",
@@ -15386,7 +16542,9 @@ var sections38 = [
15386
16542
  { label: "Case Operations", slug: "case-operations" }
15387
16543
  ],
15388
16544
  faq: [
15389
- { question: "What happens to anomalies when cases are merged?", answer: "Anomalies from all source cases are carried over to the merged case and re-evaluated." }
16545
+ { question: "What happens to anomalies when cases are merged?", answer: "Anomalies from all source cases are carried over to the merged case and re-evaluated." },
16546
+ { question: "Do I need to include every document when splitting?", answer: "Yes. Every document in the case must appear in exactly one of partition_a or partition_b. Omitting a document or including it in both partitions will return a 400 error." },
16547
+ { question: "Can I merge more than two cases at once?", answer: "No. The merge endpoint accepts exactly two case keys. To merge multiple cases, chain merge calls \u2014 merge A and B first, then merge the result with C." }
15390
16548
  ],
15391
16549
  mentions: ["merge", "split", "case restructuring"]
15392
16550
  }
@@ -15402,6 +16560,11 @@ var sections39 = [
15402
16560
  description: "List pending review items in the review queue with pagination. Review items are validation records awaiting human approval, rejection, or flagging.",
15403
16561
  content: [
15404
16562
  { type: "paragraph", text: "The review queue surfaces validation records that require human judgment before delivery. Items appear when extraction confidence is below the auto-approval threshold or when anomalies are detected." },
16563
+ { type: "list", ordered: false, items: [
16564
+ "Filter by `status` to see only `pending`, `approved`, or `rejected` records",
16565
+ "Use cursor-based pagination to iterate through large queues",
16566
+ "Sort by `created_at` in ascending or descending order"
16567
+ ] },
15405
16568
  {
15406
16569
  type: "endpoint",
15407
16570
  method: "GET",
@@ -15488,9 +16651,11 @@ var sections39 = [
15488
16651
  { label: "Get Review Item", slug: "get-review-item" }
15489
16652
  ],
15490
16653
  faq: [
15491
- { question: "When do items appear in the review queue?", answer: "Items are queued when extraction confidence falls below the auto-approval threshold or when anomalies require human verification." }
16654
+ { question: "When do items appear in the review queue?", answer: "Items are queued when extraction confidence falls below the auto-approval threshold or when anomalies require human verification." },
16655
+ { question: "How do I paginate through all review items?", answer: "Pass the `next_cursor` value from the response as the `cursor` query parameter in your next request. Continue until `has_more` is false." },
16656
+ { question: "Can I filter review items by document or schema?", answer: "The list endpoint supports filtering by `status`. To find review items for a specific document, retrieve all pending items and filter client-side by `document_id`." }
15492
16657
  ],
15493
- mentions: ["review", "queue", "validation"]
16658
+ mentions: ["review", "queue", "validation", "pagination"]
15494
16659
  },
15495
16660
  {
15496
16661
  slug: "review-stats",
@@ -15499,6 +16664,7 @@ var sections39 = [
15499
16664
  seoTitle: "Review Queue Statistics \u2014 Talonic Docs",
15500
16665
  description: "Get statistics for the review queue including total pending items, items by status, and average time in queue. Useful for monitoring review backlog.",
15501
16666
  content: [
16667
+ { type: "paragraph", text: "Get a summary of the review queue broken down by status. Use this endpoint to monitor backlog size, track review throughput, and trigger alerts when pending items exceed a threshold." },
15502
16668
  {
15503
16669
  type: "endpoint",
15504
16670
  method: "GET",
@@ -15541,8 +16707,11 @@ var sections39 = [
15541
16707
  { label: "List Review Items", slug: "list-review-items" },
15542
16708
  { label: "Review Batch", slug: "review-batch" }
15543
16709
  ],
15544
- faq: [],
15545
- mentions: ["review statistics", "queue metrics"]
16710
+ faq: [
16711
+ { question: "Does the stats endpoint count all-time or only active items?", answer: "It counts all review records across all statuses, including already-approved and rejected items. Use the `by_status.pending` value to see only the active backlog." },
16712
+ { question: "How often should I poll review stats?", answer: "Stats are computed on each request. For dashboard polling, an interval of 30-60 seconds is reasonable. For high-throughput pipelines, consider using webhooks for real-time notifications." }
16713
+ ],
16714
+ mentions: ["review statistics", "queue metrics", "backlog monitoring"]
15546
16715
  },
15547
16716
  {
15548
16717
  slug: "get-review-item",
@@ -15551,6 +16720,8 @@ var sections39 = [
15551
16720
  seoTitle: "Get Review Item Endpoint \u2014 Talonic Docs",
15552
16721
  description: "Retrieve a single review item by ID with full detail including extracted data, confidence scores, and validation flags for human review.",
15553
16722
  content: [
16723
+ { type: "paragraph", text: "Retrieve full details for a single review item, including per-field review decisions, low-confidence fields that triggered the review, and any reviewer comments. This endpoint provides the data needed to build custom review interfaces." },
16724
+ { type: "callout", variant: "info", text: "The `low_confidence_fields` array lists the specific field keys that fell below the confidence threshold and triggered this item for review. Use this to highlight problematic fields in your review UI." },
15554
16725
  {
15555
16726
  type: "endpoint",
15556
16727
  method: "GET",
@@ -15617,8 +16788,11 @@ var sections39 = [
15617
16788
  { label: "Review Action", slug: "review-action" },
15618
16789
  { label: "Review Assign", slug: "review-assign" }
15619
16790
  ],
15620
- faq: [],
15621
- mentions: ["review detail", "confidence", "flagged fields"]
16791
+ faq: [
16792
+ { question: "What are low_confidence_fields?", answer: "An array of field keys where the extraction confidence fell below the auto-approval threshold. These are the fields that caused the record to be queued for human review." },
16793
+ { question: "Can I see the extracted data for a review item?", answer: "The review item includes the document_id and run_id. Use these to retrieve the full extraction output from the extraction or structuring endpoints." }
16794
+ ],
16795
+ mentions: ["review detail", "confidence", "flagged fields", "field decisions"]
15622
16796
  },
15623
16797
  {
15624
16798
  slug: "review-action",
@@ -15627,6 +16801,8 @@ var sections39 = [
15627
16801
  seoTitle: "Review Action Endpoint \u2014 Talonic Docs",
15628
16802
  description: "Take an action on a review item: approve, reject, or flag. Optionally include a comment explaining the decision for audit purposes.",
15629
16803
  content: [
16804
+ { type: "paragraph", text: "Approve or reject a review item to advance it through the delivery pipeline. Approved items proceed to delivery. Rejected items are returned for re-extraction or manual correction. Include an optional `reason` for audit trail purposes." },
16805
+ { type: "callout", variant: "warning", text: "Review actions are final. Once a record is approved or rejected, it cannot be reverted to pending status through the API." },
15630
16806
  {
15631
16807
  type: "endpoint",
15632
16808
  method: "POST",
@@ -15697,9 +16873,10 @@ var sections39 = [
15697
16873
  { label: "Review Batch", slug: "review-batch" }
15698
16874
  ],
15699
16875
  faq: [
15700
- { question: "What happens after approval?", answer: "Approved items proceed to delivery. Rejected items are returned for re-extraction or manual correction." }
16876
+ { question: "What happens after approval?", answer: "Approved items proceed to delivery. Rejected items are returned for re-extraction or manual correction." },
16877
+ { question: "Is the reason field stored for audit purposes?", answer: "Yes. The reason is stored as the review_comment on the record and is visible when retrieving the review item detail." }
15701
16878
  ],
15702
- mentions: ["approve", "reject", "flag", "review action"]
16879
+ mentions: ["approve", "reject", "flag", "review action", "audit trail"]
15703
16880
  },
15704
16881
  {
15705
16882
  slug: "review-batch",
@@ -15708,6 +16885,8 @@ var sections39 = [
15708
16885
  seoTitle: "Batch Review Endpoint \u2014 Talonic Docs",
15709
16886
  description: "Batch approve or reject multiple review items in a single request. Useful for clearing the review queue when items share similar characteristics.",
15710
16887
  content: [
16888
+ { type: "paragraph", text: "Process multiple review items in a single API call. This is useful for clearing backlogs when you have high-confidence items that can be bulk-approved, or when rejecting a batch of items from a failed extraction run." },
16889
+ { type: "callout", variant: "info", text: "The batch endpoint processes items independently. If some items fail (e.g. not found), the remaining items are still processed. Check the `results` array for per-item outcomes." },
15711
16890
  {
15712
16891
  type: "endpoint",
15713
16892
  method: "POST",
@@ -15763,8 +16942,11 @@ var sections39 = [
15763
16942
  { label: "Review Action", slug: "review-action" },
15764
16943
  { label: "Review Stats", slug: "review-stats" }
15765
16944
  ],
15766
- faq: [],
15767
- mentions: ["batch", "bulk review"]
16945
+ faq: [
16946
+ { question: "Is there a limit on how many items I can batch?", answer: "There is no hard limit on array size, but very large batches may take longer to process. For best performance, batch in groups of 50-100 items." },
16947
+ { question: "What happens if some items in the batch are already approved?", answer: "Already-actioned items are skipped and reported with an error status in the results array. The remaining items are still processed." }
16948
+ ],
16949
+ mentions: ["batch", "bulk review", "batch processing"]
15768
16950
  },
15769
16951
  {
15770
16952
  slug: "review-assign",
@@ -15773,6 +16955,7 @@ var sections39 = [
15773
16955
  seoTitle: "Assign Review Item Endpoint \u2014 Talonic Docs",
15774
16956
  description: "Assign a review item to a team member for review. Assigned items appear in the assignee's personal review queue.",
15775
16957
  content: [
16958
+ { type: "paragraph", text: "Assign a pending review item to a specific team member. Assignments help distribute review workload and track who is responsible for each item. Pass `null` as the `user_id` to unassign an item." },
15776
16959
  {
15777
16960
  type: "endpoint",
15778
16961
  method: "POST",
@@ -15841,8 +17024,11 @@ var sections39 = [
15841
17024
  { label: "Get Review Item", slug: "get-review-item" },
15842
17025
  { label: "Review Action", slug: "review-action" }
15843
17026
  ],
15844
- faq: [],
15845
- mentions: ["assign", "team", "review assignment"]
17027
+ faq: [
17028
+ { question: "Can I assign an already-reviewed item?", answer: "Assignment is only meaningful for pending items. You can technically assign a reviewed item, but it has no effect on the review workflow since the item has already been actioned." },
17029
+ { question: "How do I unassign a review item?", answer: "Pass `null` as the `user_id` in the request body. The `assigned_to` field will be set to null." }
17030
+ ],
17031
+ mentions: ["assign", "team", "review assignment", "workload distribution"]
15846
17032
  }
15847
17033
  ];
15848
17034
 
@@ -15856,6 +17042,11 @@ var sections40 = [
15856
17042
  description: "List all ground truth datasets used for benchmarking extraction accuracy. Each dataset contains manually verified entries that serve as the gold standard.",
15857
17043
  content: [
15858
17044
  { type: "paragraph", text: "Ground truth datasets contain manually verified data entries that serve as the gold standard for measuring extraction accuracy. Create datasets, add entries, then run benchmarks against extraction results." },
17045
+ { type: "list", ordered: false, items: [
17046
+ "Each dataset contains verified entries mapping documents to expected field values",
17047
+ "Datasets can be scoped to a specific user schema via `user_schema_id`",
17048
+ "Use datasets as inputs to benchmark runs for per-field accuracy measurement"
17049
+ ] },
15859
17050
  {
15860
17051
  type: "endpoint",
15861
17052
  method: "GET",
@@ -15931,8 +17122,11 @@ var sections40 = [
15931
17122
  { label: "Create Dataset", slug: "create-quality-dataset" },
15932
17123
  { label: "List Benchmarks", slug: "list-benchmarks" }
15933
17124
  ],
15934
- faq: [],
15935
- mentions: ["ground truth", "quality", "benchmarking"]
17125
+ faq: [
17126
+ { question: "How many ground truth datasets can I create?", answer: "There is no hard limit on the number of datasets. Create separate datasets for different document types or schema versions to track accuracy independently." },
17127
+ { question: "What is the recommended number of entries per dataset?", answer: "For statistically meaningful accuracy scores, aim for at least 30-50 entries per dataset. Smaller datasets may produce volatile accuracy metrics." }
17128
+ ],
17129
+ mentions: ["ground truth", "quality", "benchmarking", "datasets"]
15936
17130
  },
15937
17131
  {
15938
17132
  slug: "create-quality-dataset",
@@ -15941,6 +17135,7 @@ var sections40 = [
15941
17135
  seoTitle: "Create Ground Truth Dataset \u2014 Talonic Docs",
15942
17136
  description: "Create a new ground truth dataset linked to a schema. The dataset defines the expected extraction output used for accuracy benchmarking.",
15943
17137
  content: [
17138
+ { type: "paragraph", text: "Create an empty ground truth dataset that you can populate with verified entries. Datasets serve as the baseline for benchmark runs that measure extraction accuracy. After creating a dataset, add entries individually or import them in bulk via CSV." },
15944
17139
  {
15945
17140
  type: "endpoint",
15946
17141
  method: "POST",
@@ -16001,7 +17196,10 @@ var sections40 = [
16001
17196
  { label: "List Datasets", slug: "list-quality-datasets" },
16002
17197
  { label: "Quality Entries", slug: "quality-entries" }
16003
17198
  ],
16004
- faq: [],
17199
+ faq: [
17200
+ { question: "Do I need to link a dataset to a schema?", answer: "No. The user_schema_id is optional. However, linking to a schema ensures that your ground truth entries use the correct field names and makes benchmark results more meaningful." },
17201
+ { question: "Can I rename a dataset after creation?", answer: "Dataset metadata (name, description) is set at creation time. To change it, delete the dataset and create a new one with the desired name." }
17202
+ ],
16005
17203
  mentions: ["create ground truth", "dataset"]
16006
17204
  },
16007
17205
  {
@@ -16011,6 +17209,8 @@ var sections40 = [
16011
17209
  seoTitle: "Get or Delete Ground Truth Dataset \u2014 Talonic Docs",
16012
17210
  description: "Retrieve a ground truth dataset by ID with metadata and entry count, or delete it permanently. Deleting a dataset does not remove associated benchmark results.",
16013
17211
  content: [
17212
+ { type: "paragraph", text: "Retrieve a dataset with its metadata and sample entries, or delete it permanently. The GET response includes a `samples` array with the actual ground truth entries, allowing you to inspect the expected values for each document." },
17213
+ { type: "callout", variant: "warning", text: "Deleting a dataset is permanent. However, benchmark results that used this dataset are retained for historical reference. The benchmark will show the dataset_id but the dataset itself will no longer be retrievable." },
16014
17214
  {
16015
17215
  type: "endpoint",
16016
17216
  method: "GET",
@@ -16096,8 +17296,11 @@ var sections40 = [
16096
17296
  { label: "List Datasets", slug: "list-quality-datasets" },
16097
17297
  { label: "Quality Entries", slug: "quality-entries" }
16098
17298
  ],
16099
- faq: [],
16100
- mentions: ["ground truth detail", "delete dataset"]
17299
+ faq: [
17300
+ { question: "Are benchmark results deleted when I delete a dataset?", answer: "No. Benchmark results are retained for historical reference even after the source dataset is deleted." },
17301
+ { question: "Does the GET response include all entries?", answer: "Yes. The `samples` array contains all ground truth entries in the dataset. For very large datasets, this response may be sizable." }
17302
+ ],
17303
+ mentions: ["ground truth detail", "delete dataset", "samples"]
16101
17304
  },
16102
17305
  {
16103
17306
  slug: "quality-entries",
@@ -16107,6 +17310,7 @@ var sections40 = [
16107
17310
  description: "List, add, import, or delete entries in a ground truth dataset. Entries represent individual verified data points used for benchmarking extraction accuracy.",
16108
17311
  content: [
16109
17312
  { type: "paragraph", text: "Entries are the individual verified data points within a ground truth dataset. Add them manually, or import in bulk via CSV." },
17313
+ { type: "callout", variant: "info", text: "Each entry maps a `document_id` to an `expected_data` object containing the verified field values. Field keys in `expected_data` should match the field names used in your extraction schema for accurate benchmark comparisons." },
16110
17314
  {
16111
17315
  type: "endpoint",
16112
17316
  method: "GET",
@@ -16190,6 +17394,7 @@ var sections40 = [
16190
17394
  "created_at": "2024-09-05T12:00:00.000Z"
16191
17395
  }`
16192
17396
  },
17397
+ { type: "paragraph", text: "For bulk entry creation, use the CSV import endpoint. The CSV must have `document_id` as the first column, with remaining columns matching your schema field names." },
16193
17398
  {
16194
17399
  type: "endpoint",
16195
17400
  method: "POST",
@@ -16256,9 +17461,11 @@ var sections40 = [
16256
17461
  { label: "Create Benchmark", slug: "create-benchmark" }
16257
17462
  ],
16258
17463
  faq: [
16259
- { question: "What CSV format is expected for import?", answer: "The first column must be document_id. Remaining columns should match the schema field names. Header row is required." }
17464
+ { question: "What CSV format is expected for import?", answer: "The first column must be document_id. Remaining columns should match the schema field names. Header row is required." },
17465
+ { question: "Can I add multiple entries for the same document?", answer: "Each document should have one entry per dataset. Duplicate document_id rows in CSV imports are skipped and counted in the `skipped` total." },
17466
+ { question: "What happens if expected_data field names do not match the schema?", answer: "Unmatched fields are stored but ignored during benchmark comparison. Only fields present in both the ground truth entry and the extraction output are compared." }
16260
17467
  ],
16261
- mentions: ["entries", "ground truth values", "CSV import"]
17468
+ mentions: ["entries", "ground truth values", "CSV import", "bulk import"]
16262
17469
  },
16263
17470
  {
16264
17471
  slug: "list-benchmarks",
@@ -16267,6 +17474,7 @@ var sections40 = [
16267
17474
  seoTitle: "List Benchmark Runs \u2014 Talonic Docs",
16268
17475
  description: "List benchmark runs that compare extraction results against ground truth datasets. Each run produces per-field accuracy metrics.",
16269
17476
  content: [
17477
+ { type: "paragraph", text: "Benchmark runs compare your extraction output against ground truth datasets to produce per-field accuracy scores. Each run evaluates every document in the dataset and produces an `accuracy_overall` score along with per-field breakdowns. Use benchmarks to track extraction quality over time and measure the impact of schema or pipeline changes." },
16270
17478
  {
16271
17479
  type: "endpoint",
16272
17480
  method: "GET",
@@ -16362,8 +17570,11 @@ var sections40 = [
16362
17570
  { label: "Create Benchmark", slug: "create-benchmark" },
16363
17571
  { label: "Benchmark Results", slug: "get-benchmark-results" }
16364
17572
  ],
16365
- faq: [],
16366
- mentions: ["benchmarks", "accuracy", "quality runs"]
17573
+ faq: [
17574
+ { question: "What benchmark statuses are possible?", answer: "Benchmarks progress through `queued` (waiting to start), `running` (evaluating documents), `completed` (results available), or `failed` (an error occurred during evaluation)." },
17575
+ { question: "Why is accuracy_overall null?", answer: "Accuracy scores are only computed after the benchmark run completes. While the status is `queued` or `running`, accuracy fields are null." }
17576
+ ],
17577
+ mentions: ["benchmarks", "accuracy", "quality runs", "per-field accuracy"]
16367
17578
  },
16368
17579
  {
16369
17580
  slug: "create-benchmark",
@@ -16372,6 +17583,8 @@ var sections40 = [
16372
17583
  seoTitle: "Create Benchmark Run \u2014 Talonic Docs",
16373
17584
  description: "Start a benchmark run that compares a job run output against a ground truth dataset. Produces per-field accuracy scores and overall metrics.",
16374
17585
  content: [
17586
+ { type: "paragraph", text: "Start a new benchmark run that evaluates your current extraction output against a ground truth dataset. The benchmark compares each document in the dataset entry-by-entry and field-by-field, producing an overall accuracy score and per-field breakdowns." },
17587
+ { type: "callout", variant: "info", text: "Benchmark runs are asynchronous. The endpoint returns immediately with status `queued`. Poll the benchmark detail endpoint or list benchmarks to check when the run completes." },
16375
17588
  {
16376
17589
  type: "endpoint",
16377
17590
  method: "POST",
@@ -16448,8 +17661,11 @@ var sections40 = [
16448
17661
  { label: "List Benchmarks", slug: "list-benchmarks" },
16449
17662
  { label: "Benchmark Results", slug: "get-benchmark-results" }
16450
17663
  ],
16451
- faq: [],
16452
- mentions: ["create benchmark", "run comparison"]
17664
+ faq: [
17665
+ { question: "Can I run multiple benchmarks simultaneously?", answer: "Yes. Benchmark runs are independent and can execute in parallel against different datasets or the same dataset." },
17666
+ { question: "How long does a benchmark take to complete?", answer: "Duration depends on the number of entries in the dataset. A 50-entry dataset typically completes in a few seconds. Poll the benchmark detail to track progress." }
17667
+ ],
17668
+ mentions: ["create benchmark", "run comparison", "accuracy evaluation"]
16453
17669
  },
16454
17670
  {
16455
17671
  slug: "get-benchmark-results",
@@ -16458,6 +17674,7 @@ var sections40 = [
16458
17674
  seoTitle: "Benchmark Results & Comparison \u2014 Talonic Docs",
16459
17675
  description: "Get per-field accuracy results for a benchmark run, or compare two benchmark runs side by side to track extraction quality improvements over time.",
16460
17676
  content: [
17677
+ { type: "paragraph", text: "Retrieve per-document accuracy results for a completed benchmark run, showing which fields matched and which diverged from the ground truth. Each result includes the extracted value, expected value, and whether they matched. Use the compare endpoint to track accuracy improvements across runs." },
16461
17678
  {
16462
17679
  type: "endpoint",
16463
17680
  method: "GET",
@@ -16499,6 +17716,7 @@ var sections40 = [
16499
17716
  ]
16500
17717
  }`
16501
17718
  },
17719
+ { type: "paragraph", text: "To track accuracy trends over time, compare two benchmark runs side by side. The `accuracy_delta` shows the difference in overall accuracy between the two runs." },
16502
17720
  {
16503
17721
  type: "endpoint",
16504
17722
  method: "GET",
@@ -16565,9 +17783,11 @@ var sections40 = [
16565
17783
  { label: "List Datasets", slug: "list-quality-datasets" }
16566
17784
  ],
16567
17785
  faq: [
16568
- { question: "How is field accuracy calculated?", answer: "Each extracted value is compared to the ground truth entry for the same document and field. Accuracy is the ratio of correct matches to total entries." }
17786
+ { question: "How is field accuracy calculated?", answer: "Each extracted value is compared to the ground truth entry for the same document and field. Accuracy is the ratio of correct matches to total entries." },
17787
+ { question: "What does a negative accuracy_delta mean?", answer: "A negative delta means run_a has lower accuracy than run_b. For example, -0.03 means run_a is 3 percentage points less accurate. Use chronological ordering (older run as run_a) to see improvement as a positive delta." },
17788
+ { question: "Can I compare runs from different datasets?", answer: "Yes, but the comparison only shows overall accuracy differences. Per-field comparisons are most meaningful when both runs use the same ground truth dataset." }
16569
17789
  ],
16570
- mentions: ["accuracy", "benchmark results", "comparison", "field-level metrics"]
17790
+ mentions: ["accuracy", "benchmark results", "comparison", "field-level metrics", "accuracy delta"]
16571
17791
  }
16572
17792
  ];
16573
17793
 
@@ -16581,6 +17801,16 @@ var sections41 = [
16581
17801
  description: "List all routing rules ordered by priority. Routing rules match incoming documents by conditions and apply actions like schema assignment, job triggering, or team routing.",
16582
17802
  content: [
16583
17803
  { type: "paragraph", text: "Routing rules automate document processing by matching incoming documents against conditions and applying actions. Rules are evaluated in priority order \u2014 the first matching rule wins." },
17804
+ { type: "paragraph", text: "When a document is classified (after OCR and type detection), the routing engine evaluates each active rule in priority order. The first rule whose conditions match is applied, and subsequent rules are skipped. This lets you build a priority chain: specific rules at the top, catch-all rules at the bottom." },
17805
+ {
17806
+ type: "list",
17807
+ items: [
17808
+ "Rules trigger on `document_classified` events after OCR and type detection.",
17809
+ "Conditions can match `document_type`, `source`, `language`, `sensitivity`, and other metadata.",
17810
+ "Actions include schema assignment, job triggering, and team routing.",
17811
+ "Lower priority numbers are evaluated first."
17812
+ ]
17813
+ },
16584
17814
  {
16585
17815
  type: "endpoint",
16586
17816
  method: "GET",
@@ -16669,7 +17899,9 @@ var sections41 = [
16669
17899
  { label: "Reorder Rules", slug: "reorder-routing-rules" }
16670
17900
  ],
16671
17901
  faq: [
16672
- { question: "How are routing rules evaluated?", answer: "Rules are evaluated in priority order (lowest number first). The first rule whose conditions match the incoming document is applied." }
17902
+ { question: "How are routing rules evaluated?", answer: "Rules are evaluated in priority order (lowest number first). The first rule whose conditions match the incoming document is applied." },
17903
+ { question: "What happens if no rule matches a document?", answer: "If no routing rule matches, the document proceeds through the default pipeline without any schema assignment or special routing. You can add a low-priority catch-all rule to handle unmatched documents." },
17904
+ { question: "Can I scope a rule to a specific source connection?", answer: "Yes. Set source_connection_id to limit the rule to documents ingested from a specific source (e.g. a particular Google Drive folder or S3 bucket). Documents from other sources will skip the rule." }
16673
17905
  ],
16674
17906
  mentions: ["routing rules", "priority", "document routing"]
16675
17907
  },
@@ -16680,6 +17912,8 @@ var sections41 = [
16680
17912
  seoTitle: "Create Routing Rule Endpoint \u2014 Talonic Docs",
16681
17913
  description: "Create a new routing rule with conditions on document properties and actions to apply when matched. Conditions can match document type, source, and other metadata.",
16682
17914
  content: [
17915
+ { type: "paragraph", text: 'Create a rule that automatically applies actions to incoming documents based on their metadata. Conditions define what to match (e.g. document type equals "invoice"), and actions define what to do (e.g. assign the finance schema). Rules are evaluated on every `document_classified` event.' },
17916
+ { type: "callout", variant: "info", text: "New rules are created with `is_active: true` by default. If you want to test a rule before activating it, create it, then immediately disable it via `PATCH /v1/routing-rules/:id` with `is_active: false`." },
16683
17917
  {
16684
17918
  type: "endpoint",
16685
17919
  method: "POST",
@@ -16768,7 +18002,8 @@ var sections41 = [
16768
18002
  ],
16769
18003
  faq: [
16770
18004
  { question: "What condition fields are available?", answer: "Conditions can match on `document_type`, `source`, `language`, `sensitivity`, and other document metadata fields." },
16771
- { question: "Can a rule have multiple actions?", answer: "Yes. Actions are executed in order. Common combinations include assigning a schema and triggering a job." }
18005
+ { question: "Can a rule have multiple actions?", answer: "Yes. Actions are executed in order. Common combinations include assigning a schema and triggering a job." },
18006
+ { question: "What is the default priority?", answer: "If you omit the priority field, it defaults to 100. Use the reorder endpoint to adjust priorities after creation." }
16772
18007
  ],
16773
18008
  mentions: ["create rule", "conditions", "actions", "assign_schema", "trigger_job", "route_to"]
16774
18009
  },
@@ -16779,6 +18014,7 @@ var sections41 = [
16779
18014
  seoTitle: "Get, Update, Delete Routing Rule \u2014 Talonic Docs",
16780
18015
  description: "Retrieve, update, or delete a routing rule by ID. Update conditions, actions, priority, or enabled state. Deleting a rule does not affect previously routed documents.",
16781
18016
  content: [
18017
+ { type: "paragraph", text: "Retrieve, update, or delete a single routing rule. Updates take effect immediately \u2014 the next `document_classified` event will use the updated rule. Deleting a rule does not retroactively affect documents that were already routed by it." },
16782
18018
  {
16783
18019
  type: "endpoint",
16784
18020
  method: "GET",
@@ -16946,7 +18182,10 @@ var sections41 = [
16946
18182
  { label: "List Routing Rules", slug: "list-routing-rules" },
16947
18183
  { label: "Reorder Rules", slug: "reorder-routing-rules" }
16948
18184
  ],
16949
- faq: [],
18185
+ faq: [
18186
+ { question: "Do updates affect already-routed documents?", answer: "No. Rule changes only affect future document_classified events. Documents that were already routed by the previous version of the rule are not retroactively updated." },
18187
+ { question: "Can I temporarily disable a rule without deleting it?", answer: "Yes. Use PATCH with is_active: false to disable the rule. It will be skipped during evaluation but retained for future re-activation." }
18188
+ ],
16950
18189
  mentions: ["update rule", "delete rule", "manage routing"]
16951
18190
  },
16952
18191
  {
@@ -16956,6 +18195,8 @@ var sections41 = [
16956
18195
  seoTitle: "Reorder Routing Rules Endpoint \u2014 Talonic Docs",
16957
18196
  description: "Reorder routing rules by providing an ordered array of rule IDs. Priority values are reassigned sequentially based on the new order.",
16958
18197
  content: [
18198
+ { type: "paragraph", text: "Reassign priority values for all routing rules at once. Pass an ordered array of rule IDs \u2014 the first ID receives priority 1, the second receives priority 2, and so on. This is the recommended way to change evaluation order after initial creation." },
18199
+ { type: "callout", variant: "warning", text: "All active rule IDs must be included in the `rule_ids` array. Omitting any rule returns a validation error. Inactive rules should also be included to maintain a consistent priority sequence." },
16959
18200
  {
16960
18201
  type: "endpoint",
16961
18202
  method: "POST",
@@ -17007,7 +18248,8 @@ var sections41 = [
17007
18248
  { label: "Manage Routing Rule", slug: "manage-routing-rule" }
17008
18249
  ],
17009
18250
  faq: [
17010
- { question: "Do I need to include all rule IDs?", answer: "Yes. All active rule IDs must be included in the array. Omitting a rule ID will result in an error." }
18251
+ { question: "Do I need to include all rule IDs?", answer: "Yes. All active rule IDs must be included in the array. Omitting a rule ID will result in an error." },
18252
+ { question: "Does reordering affect currently processing documents?", answer: "No. Reordering only affects future document_classified events. Documents currently being processed continue with their already-matched rule." }
17011
18253
  ],
17012
18254
  mentions: ["reorder", "priority", "rule ordering"]
17013
18255
  }
@@ -17026,6 +18268,18 @@ var sections42 = [
17026
18268
  type: "paragraph",
17027
18269
  text: "Billing settings control whether AI agents can autonomously top up credits. A human must enable auto top-up and configure the threshold and amount before agents can call the topup endpoint."
17028
18270
  },
18271
+ {
18272
+ type: "paragraph",
18273
+ text: "The auto top-up system is designed with a **human-in-the-loop** safety model. An organization admin enables it once with a threshold and amount, and from that point agents can autonomously maintain the credit balance without further human intervention."
18274
+ },
18275
+ {
18276
+ type: "list",
18277
+ items: [
18278
+ "Read current settings with `GET /v1/billing/settings`.",
18279
+ "Enable auto top-up and set threshold/amount with `PATCH /v1/billing/settings`.",
18280
+ "Only users with `write` scope can modify settings \u2014 agents cannot enable auto top-up themselves."
18281
+ ]
18282
+ },
17029
18283
  {
17030
18284
  type: "endpoint",
17031
18285
  method: "GET",
@@ -17118,6 +18372,14 @@ var sections42 = [
17118
18372
  {
17119
18373
  question: "Who can enable auto top-up?",
17120
18374
  answer: "Only a human with write access can enable auto top-up via PATCH /v1/billing/settings. Agents cannot enable it themselves."
18375
+ },
18376
+ {
18377
+ question: "What are the limits for auto_topup_amount?",
18378
+ answer: "The minimum is 1,000 credits and the maximum is 500,000 credits per top-up. Values outside this range return a 400 error."
18379
+ },
18380
+ {
18381
+ question: "Does disabling auto top-up affect the current balance?",
18382
+ answer: "No. Disabling auto top-up only prevents future autonomous top-ups. The current credit balance is unchanged."
17121
18383
  }
17122
18384
  ],
17123
18385
  mentions: ["billing settings", "auto top-up", "threshold", "credits"]
@@ -17133,6 +18395,11 @@ var sections42 = [
17133
18395
  type: "paragraph",
17134
18396
  text: "AI agents call this endpoint to autonomously add credits when the balance falls below the configured threshold. **A human must first enable auto top-up** via `PATCH /v1/billing/settings`."
17135
18397
  },
18398
+ {
18399
+ type: "paragraph",
18400
+ text: "This endpoint is idempotent when the balance is already above the threshold \u2014 it returns `topped_up: false` without adding credits. Agents can safely call it on every extraction cycle without risk of over-provisioning."
18401
+ },
18402
+ { type: "callout", variant: "info", text: "Combine this endpoint with the `X-Talonic-Balance-Credits` response header from `POST /v1/extract` to build an autonomous credit management loop. Check the balance header after each extraction and call top-up when it drops below your threshold." },
17136
18403
  {
17137
18404
  type: "endpoint",
17138
18405
  method: "POST",
@@ -17213,6 +18480,10 @@ var sections42 = [
17213
18480
  {
17214
18481
  question: "What scope does the API key need?",
17215
18482
  answer: "The billing scope. This must be explicitly granted when creating the API key \u2014 existing keys do not have it by default."
18483
+ },
18484
+ {
18485
+ question: "Is it safe to call top-up on every request?",
18486
+ answer: "Yes. When the balance is above the threshold, the endpoint returns topped_up: false without adding credits. There is no cost or side effect for a no-op call."
17216
18487
  }
17217
18488
  ],
17218
18489
  mentions: ["auto top-up", "agent", "billing scope", "credits", "autonomous"]
@@ -17228,6 +18499,10 @@ var sections42 = [
17228
18499
  type: "paragraph",
17229
18500
  text: "Every successful `POST /v1/extract` response includes cost headers so AI agents can track spending without a separate API call:"
17230
18501
  },
18502
+ {
18503
+ type: "paragraph",
18504
+ text: "Cost tracking is built into the extraction response to support autonomous agent workflows. Instead of polling a separate balance endpoint, agents read the cost headers inline and decide whether to trigger a top-up. The `Cells-Resolved-Registry` vs `Cells-Resolved-AI` breakdown shows how many fields were resolved from cached registry data (free) versus AI extraction (metered)."
18505
+ },
17231
18506
  {
17232
18507
  type: "param-table",
17233
18508
  title: "Response headers",
@@ -17253,7 +18528,8 @@ X-Talonic-Cells-Resolved-AI: 1`
17253
18528
  {
17254
18529
  type: "paragraph",
17255
18530
  text: "Agents can read these headers after every extraction to decide whether to call `POST /v1/billing/topup` to replenish credits."
17256
- }
18531
+ },
18532
+ { type: "callout", variant: "info", text: "Registry-resolved cells are free because the value was already known from a previous extraction. Over time, as your field registry grows, more cells resolve from the registry and fewer require paid AI extraction." }
17257
18533
  ],
17258
18534
  related: [
17259
18535
  { label: "POST /v1/extract", slug: "post-extract" },
@@ -17264,6 +18540,14 @@ X-Talonic-Cells-Resolved-AI: 1`
17264
18540
  {
17265
18541
  question: "Are cost headers included on async (202) responses?",
17266
18542
  answer: "No. Cost headers are only included on synchronous 200 responses where extraction completes immediately. Async responses return a poll URL instead."
18543
+ },
18544
+ {
18545
+ question: "Are cost headers included on batch extraction responses?",
18546
+ answer: "No. Batch extraction (processing_mode=batch) defers extraction to the provider batch API. Cost is calculated when the batch completes, not at upload time."
18547
+ },
18548
+ {
18549
+ question: "What is the credit-to-EUR conversion rate?",
18550
+ answer: "The rate is configured per organization and visible in the billing settings. One credit typically equals EUR 0.001, but this may vary by plan tier."
17267
18551
  }
17268
18552
  ],
17269
18553
  mentions: ["cost headers", "X-Talonic-Cost", "credits", "balance", "cells resolved"]