@talonic/docs 0.20.13 → 0.20.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/content.js +149 -41
  2. package/package.json +1 -1
package/dist/content.js CHANGED
@@ -574,11 +574,11 @@ var sections = [
574
574
  faq: [
575
575
  {
576
576
  question: "What is the Field Registry?",
577
- answer: "The Field Registry is a unified knowledge graph of all canonical fields discovered across your documents, organized by tier, clustered semantically, and enriched with master extraction instructions."
577
+ answer: "The Field Registry is a unified knowledge graph of all canonical fields discovered across your documents, organized by tier, clustered semantically, and enriched with master extraction instructions. Fields progress through three tiers as they mature: Tier 3 (emerging, newly discovered), Tier 2 (established, promoted after repeated occurrence), and Tier 1 (universal, core fields present across most document types). Each tier transition triggers instruction synthesis so the platform learns the optimal way to extract that field."
578
578
  },
579
579
  {
580
580
  question: "What is provenance in Talonic?",
581
- answer: "Provenance is per-cell metadata that tracks which pipeline phase filled the value, the confidence score, an AI reasoning trace, and source references back to the original document."
581
+ answer: "Provenance is per-cell metadata that tracks which pipeline phase filled the value, the confidence score, an AI reasoning trace, and source references back to the original document. You can inspect provenance by hovering any cell in the job results grid to see its confidence score, then clicking to expand the full provenance panel. The panel shows which strategy resolved the value, the raw source text it was derived from, and the AI reasoning chain when applicable."
582
582
  },
583
583
  {
584
584
  question: "How do Cases form?",
@@ -737,11 +737,11 @@ var sections = [
737
737
  faq: [
738
738
  {
739
739
  question: "What is the fastest way to get started with Talonic?",
740
- answer: "Upload documents in Sources, then go to Structuring > Runs > New to create your first extraction job. Results appear progressively as each phase completes."
740
+ answer: "Upload documents in Sources, then go to Structuring > Runs > New to create your first extraction job. Results appear progressively as each phase completes. For a single document, use the quick extract shortcut (Cmd+J / Ctrl+J) to upload and process from any page without navigating to Sources first. Most users see their first structured output within two to three minutes of uploading."
741
741
  },
742
742
  {
743
743
  question: "How is the Talonic platform organized?",
744
- answer: "The platform is organized into three primary sections: Sources (document ingest), Structuring (processing & validation), and Outputs (delivery to downstream systems)."
744
+ answer: "The platform is organized into three primary sections: Sources (document ingest), Structuring (processing & validation), and Outputs (delivery to downstream systems). Sources handles all document ingestion \u2014 manual uploads, cloud connectors, email inboxes, and API ingestion. Structuring is where you define schemas, run extraction jobs, review results, and approve output. Outputs manages delivery bindings that push approved data to webhooks, SFTP, cloud storage, and other downstream systems."
745
745
  },
746
746
  {
747
747
  question: "Do I need to define a schema before processing documents?",
@@ -1182,7 +1182,7 @@ var sections3 = [
1182
1182
  },
1183
1183
  {
1184
1184
  question: "How does Talonic handle image files?",
1185
- answer: "Image files (PNG, JPG, JPEG, GIF, WEBP) are sent to AI for multimodal visual extraction."
1185
+ answer: "Image files (PNG, JPG, JPEG, GIF, WEBP) are sent to AI for multimodal visual extraction. The AI model sees the image directly and extracts data visually, which is useful for photos of receipts, scanned handwritten notes, or diagrams. If an image was previously OCR'd and produced meaningful Markdown (more than 100 characters), the system uses the Markdown extraction path instead, which enables richer quality metrics and confidence scoring."
1186
1186
  },
1187
1187
  {
1188
1188
  question: "How does Talonic handle large PDF files?",
@@ -1511,7 +1511,7 @@ var sections3 = [
1511
1511
  },
1512
1512
  {
1513
1513
  question: "Can routing rules fully automate my document processing pipeline?",
1514
- answer: "Yes. By combining routing rules with source connectors and delivery bindings, you can create a fully automated pipeline: documents arrive from a connected source, routing rules assign schemas and trigger extraction jobs, and delivery bindings push approved results to downstream systems."
1514
+ answer: "Yes. By combining routing rules with source connectors and delivery bindings, you can create a fully automated pipeline: documents arrive from a connected source, routing rules assign schemas and trigger extraction jobs, and delivery bindings push approved results to downstream systems. For example, a Google Drive folder receiving weekly invoices can be connected as a source with a routing rule that auto-assigns your Invoice schema and triggers extraction. A delivery binding then pushes approved results to your ERP via webhook \u2014 zero manual steps required."
1515
1515
  }
1516
1516
  ],
1517
1517
  mentions: ["routing rules", "auto-assign", "schema assignment", "document workflows"]
@@ -2212,6 +2212,20 @@ var sections5 = [
2212
2212
  type: "paragraph",
2213
2213
  text: "When configuring a field, start with the basics \u2014 name, type, and registry mapping \u2014 then layer on advanced features as needed. For example, add a **format constraint** to enforce a date pattern, attach a **reference table** for code lookups, or define **capture submoves** to control the exact extraction sequence. Features compose independently, so you can mix and match without conflicts."
2214
2214
  },
2215
+ {
2216
+ type: "list",
2217
+ ordered: false,
2218
+ items: [
2219
+ "**Format constraint** \u2014 Regex validation with configurable mismatch behavior (clear, flag, or replace).",
2220
+ "**Modifiers** \u2014 Post-processing pipeline: format (date/number conversion), alias (value mapping), max_length (truncation).",
2221
+ "**Constraints** \u2014 Validation rules: required, enum, date-format, length, cross-field expressions.",
2222
+ "**Bypass strategy** \u2014 Skip AI extraction: constant value, deterministic ID generator, or reference table lookup.",
2223
+ "**Reference table** \u2014 Key-value pairs for code mapping with a 3-tier lookup cascade (normalization, fuzzy, AI).",
2224
+ "**Manual instruction** \u2014 User-written extraction directive that overrides the AI-synthesized master instruction.",
2225
+ "**Capture submoves** \u2014 Ordered extraction sequence: match (field matching), compute (calculation), reason (LLM inference).",
2226
+ "**Output name** \u2014 Remap the field name in delivery and export output without changing the internal schema name."
2227
+ ]
2228
+ },
2215
2229
  {
2216
2230
  type: "paragraph",
2217
2231
  text: "The **modifier pipeline** runs in a fixed order during Phase 4 of the extraction pipeline: format transforms first (converting dates or numbers to your target format), then alias mapping (replacing values using a lookup), and finally max_length truncation. Constraint evaluation happens after all modifiers have been applied, so constraints validate the final transformed value, not the raw extraction."
@@ -2369,6 +2383,10 @@ var sections5 = [
2369
2383
  type: "paragraph",
2370
2384
  text: "Reference tables are used in two pipeline stages. In **Phase 1**, the lookup cascade runs as part of the resolve step, mapping extracted labels to codes without any AI calls (Tier 1 and Tier 2). In **Phase 3**, the cascade runs again on values produced by Phase 2's AI extraction, normalizing free-text AI output to your canonical codes. This two-pass approach ensures maximum code coverage across the entire pipeline."
2371
2385
  },
2386
+ {
2387
+ type: "paragraph",
2388
+ text: 'For example, consider a "Contract Type" field with a reference table mapping codes to labels: `std_master` = "Master Agreement", `std_service` = "Service Agreement", `std_nda` = "Non-Disclosure Agreement". When the AI extracts "Frame Agreement" from a document, the Phase 3 lookup cascade normalizes it: Tier 1 finds no exact match, Tier 2 fuzzy matching scores "Frame Agreement" against "Master Agreement" at ~0.65 (below the threshold), so Tier 3 AI fallback maps it to `std_master` at 0.50 confidence. Adding "Frame Agreement" as a synonym pointing to `std_master` would promote this to a Tier 1 match (0.95 confidence) in future runs.'
2389
+ },
2372
2390
  {
2373
2391
  type: "paragraph",
2374
2392
  text: 'For best results, include common variations and abbreviations as separate value entries all pointing to the same key. For example, if your code is `US`, add values for "United States", "USA", "U.S.A.", and "United States of America". The more variations you cover, the more values resolve at Tier 1 (highest confidence) without falling through to fuzzy or AI matching.'
@@ -2441,15 +2459,15 @@ var sections5 = [
2441
2459
  faq: [
2442
2460
  {
2443
2461
  question: "How does schema versioning work?",
2444
- answer: "Templates use a workshop system with three states: Live (published, read-only), Workshop (mutable draft), and Version History (timeline with diffs). Breaking changes like field removals or type changes are detected on promotion. Every published version is immutable, creating a complete audit trail of how your schema evolved over time."
2462
+ answer: "Templates use a workshop system with three states: Live (published, read-only), Workshop (mutable draft), and Version History (timeline with diffs). Breaking changes like field removals or type changes are detected on promotion. Every published version is immutable, creating a complete audit trail of how your schema evolved over time. The diff view highlights added fields, removed fields, type changes, and updated instructions between any two versions."
2445
2463
  },
2446
2464
  {
2447
2465
  question: "What are breaking changes in a schema?",
2448
- answer: "Breaking changes include field removals and data type changes. The system detects and warns about these when promoting a draft to live, helping you avoid unintended downstream impacts. If a downstream delivery binding depends on a specific field, the warning helps you assess the impact before committing the change."
2466
+ answer: "Breaking changes include field removals and data type changes. The system detects and warns about these when promoting a draft to live, helping you avoid unintended downstream impacts. If a downstream delivery binding depends on a specific field, the warning helps you assess the impact before committing the change. Always run a Test Extraction on representative documents before publishing a draft that includes breaking changes."
2449
2467
  },
2450
2468
  {
2451
2469
  question: "Can I revert to a previous schema version?",
2452
- answer: "Version history is append-only, so you cannot revert directly. However, you can review any previous version in the timeline, compare it with the current live version using the diff view, and manually re-add fields or settings that were changed. This design ensures that every historical job result always references the exact schema version that produced it."
2470
+ answer: "Version history is append-only, so you cannot revert directly. However, you can review any previous version in the timeline, compare it with the current live version using the diff view, and manually re-add fields or settings that were changed. This design ensures that every historical job result always references the exact schema version that produced it. For safe iteration, always use the Workshop draft to test changes via Test Extraction before publishing a new version."
2453
2471
  }
2454
2472
  ],
2455
2473
  mentions: ["versioning", "drafts", "workshop", "live version", "breaking changes"]
@@ -2554,6 +2572,10 @@ var sections5 = [
2554
2572
  }
2555
2573
  ]
2556
2574
  },
2575
+ {
2576
+ type: "paragraph",
2577
+ text: 'For example, to configure date formatting for a European accounting system: set `date_format` to `DD.MM.YYYY` so dates render as `15.03.2025` instead of the default `YYYY/MM/DD`. Pair this with `number_locale: "de-DE"` for comma-decimal formatting (`1.234,56`) and `delimiter: ";"` so CSV files open correctly in Excel on European locale machines. Save this configuration as a shared dialect named "EU Accounting" and attach it to every schema that feeds into that system \u2014 all future exports and deliveries will use consistent formatting without per-schema configuration.'
2578
+ },
2557
2579
  {
2558
2580
  type: "paragraph",
2559
2581
  text: "When working with international data, configure the dialect to match your downstream system requirements. For example, set **number_locale** to `fr-FR` for European comma-decimal formatting, switch the **delimiter** to semicolon for CSV compatibility, and choose **UTF-8-BOM** encoding if your data will be opened in Excel. Creating a shared dialect and reusing it across schemas ensures consistent formatting across all your exports."
@@ -2640,6 +2662,17 @@ var sections5 = [
2640
2662
  type: "paragraph",
2641
2663
  text: 'Use bypass strategies for fields whose values are known ahead of time or can be derived without reading the document. For example, set a **constant** of `"USD"` for a currency field that is always the same, or use a **generator** to produce a deterministic ID for each row. Fields with bypass strategies skip the AI extraction phase entirely, reducing processing time and credit usage.'
2642
2664
  },
2665
+ {
2666
+ type: "list",
2667
+ ordered: false,
2668
+ items: [
2669
+ "**none** \u2014 Use when a field should always be blank. Useful for placeholder columns in your output that will be populated by a downstream system.",
2670
+ '**constant** \u2014 Use when the value never varies across documents (e.g., currency `"USD"`, data source `"talonic"`, processing status `"pending"`).',
2671
+ "**generator (deterministic-id)** \u2014 Use when you need a unique, reproducible identifier for each row. Produces a hash-based ID from entity attributes.",
2672
+ "**generator (context-fallback)** \u2014 Use when the value can be derived from other fields in the schema without reading the document.",
2673
+ "**reference** \u2014 Use when the value should be looked up from a reference table using a `key_expression` that references another schema field (e.g., map supplier name to ERP vendor code)."
2674
+ ]
2675
+ },
2643
2676
  {
2644
2677
  type: "paragraph",
2645
2678
  text: "The **reference** bypass strategy is particularly powerful for enrichment fields. Define a `key_expression` that references another field in the schema (e.g., the supplier name), and the system will automatically look up the corresponding code from your reference table without any AI involvement. This is ideal for mapping extracted entity names to internal system identifiers, ERP codes, or classification labels."
@@ -2737,15 +2770,15 @@ var sections5 = [
2737
2770
  faq: [
2738
2771
  {
2739
2772
  question: "What are format constraints?",
2740
- answer: "Format constraints apply regex-based validation to schema fields, evaluated post-extraction in Phase 4. Mismatch behaviors: empty (clear), flag (amber dot), or constant (replace with a fixed value)."
2773
+ answer: 'Format constraints apply regex-based validation to schema fields, evaluated post-extraction in Phase 4 after all transforms have been applied. Mismatch behaviors: empty (clear the cell, the default), flag (keep the value but show an amber dot in the results grid), or constant (replace with a fixed value like "INVALID" or "N/A"). The constraint validates the final transformed value, not the raw extraction.'
2741
2774
  },
2742
2775
  {
2743
2776
  question: "Are original values preserved when format constraints clear a cell?",
2744
- answer: "Yes. Original values are always preserved for audit in the original_extractions table, regardless of the mismatch behavior applied."
2777
+ answer: "Yes. Original values are always preserved for audit in the original_extractions table, regardless of the mismatch behavior applied. This means you can always review what the AI originally extracted before the constraint was applied, giving you full visibility into the extraction pipeline."
2745
2778
  },
2746
2779
  {
2747
2780
  question: "Can I use case-insensitive regex patterns?",
2748
- answer: "Yes. Use the (?i) inline flag at the start of your pattern for case-insensitive matching. The evaluator supports standard JavaScript regex syntax with inline flags."
2781
+ answer: "Yes. Use the (?i) inline flag at the start of your pattern for case-insensitive matching. The evaluator supports standard JavaScript regex syntax including character classes, alternation, and lookahead assertions. ReDoS protection is built in \u2014 nested quantifiers are rejected and input is capped at 1,000 characters."
2749
2782
  }
2750
2783
  ],
2751
2784
  mentions: [
@@ -2855,11 +2888,11 @@ var sections6 = [
2855
2888
  faq: [
2856
2889
  {
2857
2890
  question: "What are the four phases of the extraction pipeline?",
2858
- answer: "Phase 1: Resolve (graph matches, ~30% of cells), Phase 2: Agent (AI strategies), Phase 3: Validation (cross-field checks), and Phase 4: Re-read (targeted gap filling)."
2891
+ answer: "Phase 1: Resolve (graph matches and deterministic lookups, fills 30-80% of cells depending on registry maturity). Phase 2: Agent (AI extraction for remaining gaps, grouped into batches of 10 fields per call). Phase 3: Validation (cross-field checks and reference table re-normalization of AI output). Phase 4: Re-read (targeted gap filling with full grid context, plus deterministic transforms and format constraint evaluation)."
2859
2892
  },
2860
2893
  {
2861
2894
  question: "Can I see results before all phases complete?",
2862
- answer: "Yes. Results are visible as each phase completes. The fill rate increases progressively through the pipeline."
2895
+ answer: "Yes. The grid is flushed to the database after each phase, enabling progressive rendering in the UI. You can watch cells fill in real time and begin reviewing Phase 1 results while Phase 2 is still running. The phase timeline on the job detail page shows which phase is active and the cumulative fill rate at each stage."
2863
2896
  },
2864
2897
  {
2865
2898
  question: "Why does the pipeline use multiple phases instead of a single AI call?",
@@ -2922,6 +2955,10 @@ var sections6 = [
2922
2955
  type: "paragraph",
2923
2956
  text: "The resolution strategies execute in a fixed order: registry transfer first, then raw extraction mapping, then the 3-tier lookup cascade, and finally deterministic compute (formulas like `Total = Unit Price x Quantity`). Each strategy only attempts to fill cells that are still empty after the previous strategy ran. This ordering ensures that the highest-confidence method always gets priority."
2924
2957
  },
2958
+ {
2959
+ type: "paragraph",
2960
+ text: `For example, consider an invoice with a "Vendor Name" field. The system first checks the Field Registry for a direct transfer \u2014 if "Vendor Name" was extracted from a previous document and promoted to Tier 1, it resolves instantly at 0.85+ confidence. If no registry match exists, the raw extraction mapping looks for a semantically equivalent field in the document's extracted data (e.g., "supplier_name"). If that also misses, the 3-tier lookup cascade checks the reference table: exact normalization first (0.95), then fuzzy token overlap (~0.70), then AI fallback (0.50). Only if all four strategies fail does the cell pass to Phase 2 for AI extraction.`
2961
+ },
2925
2962
  {
2926
2963
  type: "callout",
2927
2964
  text: "Phase 1 fill rates improve over time as your Field Registry grows. The more documents you process, the richer the registry becomes, and the more cells Phase 1 can resolve without AI \u2014 reducing both cost and latency for every subsequent job."
@@ -3004,7 +3041,7 @@ var sections6 = [
3004
3041
  },
3005
3042
  {
3006
3043
  type: "paragraph",
3007
- text: "Phase 2 processes documents with grouped extraction calls \u2014 schema fields are divided into batches of up to 10 fields per call to balance extraction quality with throughput. For each document, the agent sends the document text along with the schema field definitions and any already-resolved values from Phase 1 as context. This context-aware approach means the AI can use related values (like a contract start date) to more accurately extract dependent values (like the end date)."
3044
+ text: 'Phase 2 processes documents with grouped extraction calls \u2014 schema fields are divided into batches of up to 10 fields per call to balance extraction quality with throughput. For each document, the agent sends the document text along with the schema field definitions and any already-resolved values from Phase 1 as context. This context-aware approach means the AI can use related values (like a contract start date) to more accurately extract dependent values (like the end date). For example, if Phase 1 resolved "Contract Start Date" to 2025-01-15 via a registry transfer, and the "Contract End Date" cell is still empty, the agent receives the start date as context and can search the document for a corresponding end date with higher precision \u2014 producing a more accurate result than extracting the end date in isolation.'
3008
3045
  },
3009
3046
  {
3010
3047
  type: "paragraph",
@@ -3098,6 +3135,10 @@ var sections6 = [
3098
3135
  type: "paragraph",
3099
3136
  text: "Validation flags are designed to surface the most impactful issues first. The **low_confidence_outlier** flag is particularly useful \u2014 it highlights cells where the system is uncertain in an otherwise high-confidence row, pointing you to the exact cells most likely to contain errors. For large runs with hundreds of documents, filtering by flags and reviewing those cells first can reduce your review time by 80% or more."
3100
3137
  },
3138
+ {
3139
+ type: "paragraph",
3140
+ text: "What gets flagged and why depends on cross-field relationships, not just individual values. A **date_sanity** flag fires when temporal fields contradict each other \u2014 for example, a contract end date that falls before the start date, or a signature date after the effective date. An **amount_mismatch** flag fires when a computed total deviates more than 20% from the product of its component values (e.g., monthly rent times term length versus total contract value). The **unexpected_empty** flag fires when a field that appears in over 80% of documents in your registry is missing from this particular document, suggesting the AI may have missed it rather than it being genuinely absent."
3141
+ },
3101
3142
  {
3102
3143
  type: "callout",
3103
3144
  text: "Validation flags never modify cell values. They are purely informational annotations that help you prioritize review. The actual cell value and confidence score remain unchanged by Phase 3 flagging."
@@ -3166,15 +3207,15 @@ var sections6 = [
3166
3207
  faq: [
3167
3208
  {
3168
3209
  question: "What does Phase 4 Re-read do?",
3169
- answer: "Phase 4 performs context-aware gap filling by re-reading the original document with field instructions and full grid context for each empty or low-confidence cell."
3210
+ answer: "Phase 4 performs context-aware gap filling by re-reading the original document with field instructions and full grid context for each empty or low-confidence cell. Because it has access to all values resolved in earlier phases, it can use surrounding data as clues \u2014 for example, using a resolved start date to locate the corresponding end date more accurately."
3170
3211
  },
3171
3212
  {
3172
3213
  question: "Can Phase 4 overwrite high-confidence values?",
3173
- answer: "No. Phase 4 respects the confidence gate \u2014 it can only fill empty cells or upgrade cells below the confidence threshold. High-confidence values from earlier phases are permanently protected."
3214
+ answer: "No. Phase 4 respects the confidence gate \u2014 it can only fill empty cells or upgrade cells below the confidence threshold. High-confidence values from earlier phases are permanently protected. This is the single most important pipeline rule, ensuring that reliable lookup results are never replaced by lower-confidence AI extractions."
3174
3215
  },
3175
3216
  {
3176
3217
  question: "What else happens in Phase 4 besides gap filling?",
3177
- answer: "Phase 4 also applies deterministic transforms (ISO codes, dates, units), evaluates format constraints (regex validation), and runs the modifier pipeline (format, alias, max_length). Original values are preserved for audit."
3218
+ answer: "Phase 4 also applies deterministic transforms (ISO codes, dates, units), evaluates format constraints (regex validation), and runs the modifier pipeline in a fixed order: format transforms first, then alias mapping, then max_length truncation. Constraint evaluation happens after all modifiers. Original values are always preserved in the original_extractions table for audit, regardless of whether constraints clear, flag, or replace them."
3178
3219
  }
3179
3220
  ],
3180
3221
  mentions: ["Phase 4", "re-read", "gap filling", "confidence gate", "targeted extraction"]
@@ -3221,15 +3262,15 @@ var sections6 = [
3221
3262
  faq: [
3222
3263
  {
3223
3264
  question: "What do the colored dots in the results grid mean?",
3224
- answer: "Each dot indicates how a cell was resolved: blue = graph match, purple = computed, teal = agent transfer, indigo = agent extract, amber = lookup."
3265
+ answer: "Each dot indicates how a cell was resolved: blue = graph match (Phase 1 registry transfer, highest reliability), purple = computed (deterministic formula), teal = agent transfer (copy from equivalent field), indigo = agent extract (AI read from document), amber = lookup result or format flag. A grid dominated by blue and purple dots typically requires minimal review."
3225
3266
  },
3226
3267
  {
3227
3268
  question: "Can I export extraction results?",
3228
- answer: "Yes. Use CSV export from the job detail page. You can export clean data only or full data with metadata including confidence scores and resolution types."
3269
+ answer: "Yes. Use CSV export from the job detail page. The clean export includes only extracted values, ready for direct import into downstream systems. The full export includes metadata columns for each field: confidence score, resolution type, phase number, and reasoning trace \u2014 useful for audit trails or analyzing extraction performance across your document corpus."
3229
3270
  },
3230
3271
  {
3231
3272
  question: "What is the most efficient way to review a large extraction run?",
3232
- answer: "Start with the Flagged filter to address cells with validation warnings, low confidence, or format mismatches. Then spot-check a random sample of Clean rows. Focus corrections on recurring field-level patterns rather than individual cells."
3273
+ answer: "Start with the Flagged filter to address cells with validation warnings, low confidence, or format mismatches. Then spot-check a random sample of Clean rows. Focus corrections on recurring field-level patterns rather than individual cells. If you find a field that is consistently wrong, update its manual instruction or reference table in the schema rather than correcting cells one by one \u2014 this improves future runs as well."
3233
3274
  }
3234
3275
  ],
3235
3276
  mentions: [
@@ -3290,6 +3331,17 @@ var sections6 = [
3290
3331
  type: "paragraph",
3291
3332
  text: "Confidence scores follow predictable patterns by resolution type. Graph matches from Phase 1 typically score 0.7-0.95 because they are derived from verified registry data. Reference table lookups score 0.95 for exact normalization matches, ~0.70 for fuzzy matches, and 0.50 for AI fallback. Agent-derived values from Phase 2 generally score 0.5-0.9 depending on the clarity of the source document and the specificity of the extraction instruction."
3292
3333
  },
3334
+ {
3335
+ type: "list",
3336
+ ordered: false,
3337
+ items: [
3338
+ "**0.90-0.95** \u2014 Tier 1 lookup or exact registry transfer. Highest reliability; safe to trust without review in most workflows.",
3339
+ "**0.70-0.89** \u2014 Strong graph match or fuzzy registry transfer. Generally reliable; spot-check a sample to validate.",
3340
+ "**0.50-0.69** \u2014 AI extraction or fuzzy lookup result. Review recommended; the system found a plausible value but certainty is moderate.",
3341
+ "**0.30-0.49** \u2014 Low-confidence AI extraction. The source document was ambiguous or the field instruction was vague. Always review manually.",
3342
+ "**Below 0.30** \u2014 Very low confidence. The value is likely a best guess. Consider updating the schema instruction or adding a reference table to improve future runs."
3343
+ ]
3344
+ },
3293
3345
  {
3294
3346
  type: "paragraph",
3295
3347
  text: "Use confidence scores to set your review threshold. Cells above 0.8 are generally reliable and can be trusted without manual verification for most use cases. Cells between 0.5 and 0.8 warrant a quick check. Cells below 0.5 should always be reviewed manually. You can use the full CSV export to filter and sort by confidence, making it easy to batch-review low-confidence cells efficiently."
@@ -3363,15 +3415,15 @@ var sections6 = [
3363
3415
  faq: [
3364
3416
  {
3365
3417
  question: "How do I correct an extracted value?",
3366
- answer: "Click any cell in the results grid to edit its value. Choose propagation scope: this_document_only (single cell) or all_similar (same field + method across all documents)."
3418
+ answer: "Click any cell in the results grid to edit its value. Choose propagation scope: this_document_only (single cell) or all_similar (same field + method across all documents). When using all_similar, the system shows a preview count of how many cells will be affected before you confirm \u2014 always verify this count to avoid unintended bulk changes."
3367
3419
  },
3368
3420
  {
3369
3421
  question: "Do corrections improve future extractions?",
3370
- answer: "Yes. Corrections feed back as training signals for future runs, helping the system learn from your corrections and improve accuracy over time."
3422
+ answer: "Yes. Corrections feed back as training signals for future runs, helping the system learn from your corrections and improve accuracy over time. For maximum impact, correct the root cause rather than individual symptoms \u2014 update the schema field instruction or reference table so that future runs resolve correctly without manual intervention."
3371
3423
  },
3372
3424
  {
3373
3425
  question: "Is there an audit trail for corrections?",
3374
- answer: "Yes. Every correction logs the original value, the corrected value, the user who made the change, and the timestamp. This audit history is preserved and included in full metadata CSV exports."
3426
+ answer: "Yes. Every correction logs the original value, the corrected value, the user who made the change, and the timestamp. This audit history is preserved even after subsequent jobs run and is included in full metadata CSV exports. Downstream systems can use this data to distinguish between AI-extracted and human-corrected values."
3375
3427
  }
3376
3428
  ],
3377
3429
  mentions: [
@@ -3786,11 +3838,11 @@ var sections7 = [
3786
3838
  faq: [
3787
3839
  {
3788
3840
  question: "What anomalies does Talonic detect?",
3789
- answer: "Five structural patterns: validation clusters, field conflicts, duplicate key divergence, missing document types, and value reuse. Each is surfaced as a dismissable card on the case detail page."
3841
+ answer: "Five structural patterns: validation clusters (D1), field conflicts (D2), duplicate key divergence (D3), missing document types (D4), and value reuse (D5). Each is surfaced as a dismissable card on the case detail page. D2 and D3 are the highest-value detectors for procurement and financial workflows \u2014 they catch contradictory values across related documents, such as mismatched amounts between an invoice and its corresponding purchase order."
3790
3842
  },
3791
3843
  {
3792
3844
  question: "Do anomalies update automatically when cases change?",
3793
- answer: "Yes. The detection engine re-runs whenever case membership changes \u2014 documents added or removed, cases merged or split. Anomaly badges in the case header update in real time."
3845
+ answer: "Yes. The detection engine re-runs whenever case membership changes \u2014 documents added or removed, cases merged or split. Anomaly badges in the case header update in real time. Each detector operates independently, so a single case can trigger multiple anomaly types simultaneously. This continuous re-evaluation ensures that anomalies stay current as your document corpus evolves."
3794
3846
  },
3795
3847
  {
3796
3848
  question: "Can I dismiss anomalies?",
@@ -4011,11 +4063,11 @@ var sections8 = [
4011
4063
  },
4012
4064
  {
4013
4065
  question: "Why should I use assemblies for production data?",
4014
- answer: "Assemblies provide a single audit trail from source documents through extraction, resolution, and validation to the final output, making them the recommended approach for production datasets."
4066
+ answer: "Assemblies provide a single audit trail from source documents through extraction, resolution, and validation to the final output, making them the recommended approach for production datasets. Unlike ad-hoc exports, assemblies are versioned and reproducible \u2014 you can regenerate the same output shape from different document sets without reconfiguring columns or transforms. Previous versions are retained automatically, so you can compare outputs across time periods and demonstrate compliance with audit requirements."
4015
4067
  },
4016
4068
  {
4017
4069
  question: "Can an assembly pull from multiple sources?",
4018
- answer: "Yes. An assembly can combine documents from any number of sources \u2014 uploaded files, connected drives, email attachments, and more \u2014 into a single structured dataset."
4070
+ answer: "Yes. An assembly can combine documents from any number of sources \u2014 uploaded files, connected drives, email attachments, and more \u2014 into a single structured dataset. This is particularly useful for cross-functional reporting where data arrives through different channels. For example, you can combine invoices from a Google Drive connector, purchase orders uploaded manually, and contracts ingested via the API into a single unified procurement dataset."
4019
4071
  }
4020
4072
  ],
4021
4073
  mentions: [
@@ -4499,7 +4551,7 @@ var sections10 = [
4499
4551
  },
4500
4552
  {
4501
4553
  type: "paragraph",
4502
- text: "Every delivery flows through a five-stage pipeline. Producers are stateless \u2014 they only publish typed events into an outbox and never interact with destinations or bindings directly. A background poller drains the outbox every 5 seconds, matches events against active bindings, and enqueues delivery jobs for processing:"
4554
+ text: "Every delivery flows through a five-stage pipeline. Producers are stateless \u2014 they only publish typed events into an outbox and never interact with destinations or bindings directly. A background poller drains the outbox every 5 seconds (configurable via `delivery.poll_interval_ms`), claiming up to 50 rows per tick using `FOR UPDATE SKIP LOCKED` for safe multi-instance operation. When the BullMQ queue depth exceeds the backpressure threshold (default 10,000), the poller pauses until the queue drains, preventing memory exhaustion under burst load. Matched events are enqueued as delivery jobs processed by workers (default concurrency: 10):"
4503
4555
  },
4504
4556
  {
4505
4557
  type: "param-table",
@@ -4633,6 +4685,10 @@ var sections10 = [
4633
4685
  type: "paragraph",
4634
4686
  text: "A single destination can back multiple bindings. For example, one S3 bucket destination can receive both `document.extracted` and `result.approved` events through separate bindings, each with its own serializer and field map. This keeps your destination inventory small while supporting diverse routing requirements."
4635
4687
  },
4688
+ {
4689
+ type: "paragraph",
4690
+ text: 'For example, to set up a webhook destination via the API: `POST /v1/delivery/destinations` with a body containing `name`, `type: "webhook"`, `config: { url: "https://ops.example.com/talonic" }`, and optionally `auth_config`, `signing_secret`, and `payload_cap_bytes`. The response returns the destination ID, which you then reference when creating a binding. After creation, call `POST /v1/delivery/destinations/:id/test` to verify the connection end-to-end before routing live events to it.'
4691
+ },
4636
4692
  {
4637
4693
  type: "paragraph",
4638
4694
  text: "For best results, always run a live-ping test after creating a destination. The test exercises the full transport envelope \u2014 SSRF validation, payload cap, and authentication \u2014 with a tiny test payload, so you catch configuration errors before real events start flowing. OAuth-based destinations (Google Drive, Google Sheets) require connecting your account first via the OAuth flow in the dashboard."
@@ -4688,7 +4744,7 @@ var sections10 = [
4688
4744
  },
4689
4745
  {
4690
4746
  type: "paragraph",
4691
- text: "The compatibility triangle is enforced on every create and update. The backend checks that your chosen serializer supports the deliverable resolver's output shape, and that the connector accepts the serializer's format. If any predicate fails, the binding is rejected with a descriptive error \u2014 you never end up with a binding that cannot deliver."
4747
+ text: "The compatibility triangle is enforced on every create and update via six predicates. The backend checks that: (1) the `signal_filter` is well-formed with a known event type and valid match values, (2) the `deliverable_type` resolves to a registered resolver, (3) the `serializer_format` resolves to a registered serializer, (4) the serializer supports the resolver's output shape, (5) the connector's supported serializer list includes the chosen format, and (6) the resolver's compatible signals include the signal filter's event type. If any predicate fails, the binding is rejected with a descriptive error \u2014 you never end up with a binding that cannot deliver."
4692
4748
  },
4693
4749
  {
4694
4750
  type: "paragraph",
@@ -4805,7 +4861,7 @@ var sections10 = [
4805
4861
  },
4806
4862
  {
4807
4863
  type: "paragraph",
4808
- text: "Signals are typed events emitted by the platform when meaningful state changes occur. Document-level signals fire on extraction success or failure. Run-level signals fire when a job completes across dataspace, structuring, resolution, or extraction runs. Result-level signals fire when a reviewer approves, rejects, or flags a record."
4864
+ text: "Signals are typed events emitted by the platform when meaningful state changes occur. They fall into four categories. **Document signals** (`document.extracted`, `document.extraction_failed`) fire on extraction success or failure for individual documents. **Run signals** (`run.dataspace.completed`, `run.structuring.completed`, `run.resolution.completed`, `run.extraction.completed`) fire when a job run completes across the four pipeline domains. **Result signals** (`result.approved`, `result.rejected`, `result.flagged`) fire when a reviewer takes action on a record. **Meta-signals** (`delivery.item.completed`, `delivery.item.failed`) fire when a delivery attempt itself succeeds or fails, enabling self-monitoring workflows."
4809
4865
  },
4810
4866
  {
4811
4867
  type: "paragraph",
@@ -4883,15 +4939,15 @@ var sections10 = [
4883
4939
  faq: [
4884
4940
  {
4885
4941
  question: "How is delivery history tracked?",
4886
- answer: "Every delivery attempt writes a row to /v1/delivery/items with status, HTTP code, error code, and request/response bodies. The log is strictly append-only \u2014 nothing is ever mutated."
4942
+ answer: "Every delivery attempt writes a row to /v1/delivery/items with status, HTTP code, error code, and request/response bodies (truncated to 10 KB each). The log is strictly append-only \u2014 nothing is ever mutated. You can filter items by binding_id, destination_id, or status to narrow results when debugging a specific integration."
4887
4943
  },
4888
4944
  {
4889
4945
  question: "What is the dead letter queue (DLQ)?",
4890
- answer: "Terminal failures (retry ladder exhausted or permanent 4xx) escalate to /v1/delivery/dlq. DLQ entries are fully replayable \u2014 replay enqueues a fresh attempt with a new idempotency key."
4946
+ answer: "Terminal failures (retry ladder exhausted or permanent 4xx) escalate to /v1/delivery/dlq. DLQ entries are fully replayable \u2014 replay enqueues a fresh attempt while preserving the deterministic idempotency key, so receivers that deduplicate on the key will not process the same delivery twice. Destinations returning authentication errors are automatically disabled to prevent further failed attempts."
4891
4947
  },
4892
4948
  {
4893
4949
  question: "How long are request and response bodies retained?",
4894
- answer: "Request and response bodies are cleaned up after the configured retention period (default 30 days). Row metadata \u2014 status, HTTP code, error code, and duration \u2014 is retained indefinitely for audit purposes."
4950
+ answer: "Request and response bodies are cleaned up after the configured retention period (default 30 days) by a daily cleanup job that runs at 03:00 server time. Row metadata \u2014 status, HTTP code, error code, and duration \u2014 is retained indefinitely for audit purposes. Configure the retention period via the delivery.item_body_retention_days setting in pipeline.yaml."
4895
4951
  }
4896
4952
  ],
4897
4953
  mentions: [
@@ -4964,7 +5020,7 @@ var sections11 = [
4964
5020
  },
4965
5021
  {
4966
5022
  question: "When should I use a shared dialect vs an inline dialect?",
4967
- answer: "Use shared dialects for workspace-wide defaults that apply to most schemas. Use inline dialects only when a specific schema needs different formatting \u2014 for example, a schema that outputs dates in a different format for a particular downstream system."
5023
+ answer: "Use shared dialects for workspace-wide defaults that apply to most schemas. Use inline dialects only when a specific schema needs different formatting \u2014 for example, a schema that outputs dates in DD/MM/YYYY for a European ERP while the rest of your workspace uses YYYY-MM-DD. Inline overrides apply only to that one schema, so they do not affect any other output. If you find yourself overriding the same setting in multiple schemas, consider updating the shared dialect instead."
4968
5024
  },
4969
5025
  {
4970
5026
  question: "Do shared dialects affect the extraction process?",
@@ -5036,7 +5092,7 @@ var sections11 = [
5036
5092
  },
5037
5093
  {
5038
5094
  question: "How does the lookup cascade work?",
5039
- answer: "The platform tries three tiers: first, exact string normalization (whitespace and case normalization). If that fails, token-based fuzzy matching. If the fuzzy match is below the confidence threshold, a Haiku LLM call resolves the ambiguity."
5095
+ answer: "The platform tries three tiers in sequence. First, exact string normalization strips whitespace and normalizes casing to find a direct match. If no exact match is found, token-based fuzzy matching compares individual tokens against all reference values and scores similarity. If the best fuzzy match falls below the confidence threshold, a Haiku LLM call evaluates the ambiguous value in context against the top candidates and selects the most likely match. This three-tier approach balances speed and accuracy \u2014 most lookups resolve in the first two tiers without any LLM cost."
5040
5096
  },
5041
5097
  {
5042
5098
  question: "What happens when I update a reference primitive?",
@@ -5081,7 +5137,10 @@ var sections11 = [
5081
5137
  items: [
5082
5138
  "**Schema changes** \u2014 field additions, removals, mapping updates, and format constraint modifications.",
5083
5139
  "**Shared dialect changes** \u2014 date format, number locale, delimiter, and encoding updates.",
5084
- "**Reference primitive changes** \u2014 new versions of lookup tables and key-value modifications."
5140
+ "**Reference primitive changes** \u2014 new versions of lookup tables and key-value modifications.",
5141
+ "**Delivery binding changes** \u2014 modifications to outbound delivery destinations, field maps, or signal filters.",
5142
+ "**Routing rule changes** \u2014 additions or modifications to document routing rules that assign schemas automatically.",
5143
+ "**Format constraint changes** \u2014 regex pattern updates or fallback behavior modifications on schema fields."
5085
5144
  ]
5086
5145
  },
5087
5146
  {
@@ -5162,7 +5221,9 @@ var sections12 = [
5162
5221
  "**Extracted values** \u2014 finds specific data points across all processed documents.",
5163
5222
  "**Field names** \u2014 searches the Field Registry for canonical field definitions.",
5164
5223
  "**Schema names** \u2014 locates generated and template schemas by title.",
5165
- "**Sources** \u2014 matches source connection names and configurations."
5224
+ "**Sources** \u2014 matches source connection names and configurations.",
5225
+ "**Matching configurations** \u2014 finds matching configs and reference datasets by name.",
5226
+ "**Delivery bindings** \u2014 locates delivery pipeline bindings and destination configurations."
5166
5227
  ]
5167
5228
  }
5168
5229
  ],
@@ -5229,6 +5290,10 @@ var sections12 = [
5229
5290
  {
5230
5291
  type: "paragraph",
5231
5292
  text: 'For best results, save your most common filter combinations as presets. Most teams create presets for categories like "high-value invoices this quarter," "documents missing key fields," or "recently failed extractions." Presets appear as one-click buttons on the Documents page, eliminating the need to rebuild complex filter conditions from scratch each time.'
5293
+ },
5294
+ {
5295
+ type: "paragraph",
5296
+ text: 'For example, to find all invoices from a specific vendor with outstanding amounts, build a filter with `vendor_name eq "Acme Corp"` AND `document_type eq "Invoice"` AND `total_amount gt 5000`. The field autocomplete ensures you are filtering on valid extracted fields, and the materialized index returns results instantly even across thousands of documents. Save this as a preset called "Acme high-value invoices" for one-click access when you need to review that vendor\'s billing history.'
5232
5297
  }
5233
5298
  ],
5234
5299
  related: [
@@ -5350,6 +5415,10 @@ var sections13 = [
5350
5415
  {
5351
5416
  question: "Can I have multiple API keys?",
5352
5417
  answer: "Yes. You can create as many API keys as needed. Best practice is to create separate keys for each integration so you can rotate or revoke them independently without disrupting other services."
5418
+ },
5419
+ {
5420
+ question: "What are best practices for API key management?",
5421
+ answer: "Store keys in a secrets manager rather than source code or environment files checked into version control. Create one key per integration so each can be rotated independently. Use the narrowest scope possible \u2014 a read-only dashboard needs only the read scope, not extract or write. Rotate keys on a regular schedule and immediately revoke any key that may have been exposed. Monitor API usage per key to detect anomalies early."
5353
5422
  }
5354
5423
  ],
5355
5424
  mentions: ["API keys", "tlnc_", "SHA-256", "Bearer token", "scopes"]
@@ -5727,6 +5796,16 @@ var sections14 = [
5727
5796
  variant: "info",
5728
5797
  text: "Domain matching streamlines onboarding for larger teams. When a new user signs up with an email address matching your organization's domain (e.g., `@yourcompany.com`), they are automatically associated with your org in a **pending** state. An admin must approve them before they gain access."
5729
5798
  },
5799
+ {
5800
+ type: "list",
5801
+ ordered: false,
5802
+ items: [
5803
+ "**Viewer** \u2014 read-only access to documents, extraction results, schemas, and reports. Cannot create, edit, or delete any resources.",
5804
+ "**Member** \u2014 full CRUD access to documents, schemas, jobs, matching configurations, and delivery bindings. Cannot manage team members or workspace settings.",
5805
+ "**Admin** \u2014 all Member permissions plus team management (approve/reject members, change roles), workspace settings (shared dialects, reference primitives, change review), and routing rules.",
5806
+ "**Owner** \u2014 all Admin permissions plus billing management, API key generation and revocation, organization-level settings, and the ability to transfer ownership."
5807
+ ]
5808
+ },
5730
5809
  {
5731
5810
  type: "list",
5732
5811
  ordered: true,
@@ -5861,7 +5940,7 @@ var sections14 = [
5861
5940
  },
5862
5941
  {
5863
5942
  question: "How can I reduce my usage costs?",
5864
- answer: "Use batch mode for non-urgent documents to cut extraction costs by 50%. Review the per-feature breakdown to identify your highest-cost operations, and use the daily cost chart to spot and investigate usage spikes."
5943
+ answer: "Use batch mode for non-urgent documents to cut extraction costs by 50%. Review the per-feature breakdown to identify your highest-cost operations, and use the daily cost chart to spot and investigate usage spikes. Additionally, invest in building your Field Registry \u2014 as more fields reach Tier 1 and Tier 2, values are resolved via deterministic lookup instead of LLM calls, which reduces per-document extraction cost over time. Leverage routing rules to assign schemas automatically, which avoids manual re-extractions and wasted processing."
5865
5944
  }
5866
5945
  ],
5867
5946
  mentions: [
@@ -6014,7 +6093,7 @@ var sections14 = [
6014
6093
  },
6015
6094
  {
6016
6095
  question: "What does the quick extract shortcut do?",
6017
- answer: "Cmd+J / Ctrl+J opens the quick extract interface, allowing you to upload and process a document directly from any page. It provides a streamlined drag-and-drop area that immediately processes the uploaded file and displays extraction results."
6096
+ answer: "Cmd+J / Ctrl+J opens the quick extract interface, allowing you to upload and process a document directly from any page. It provides a streamlined drag-and-drop area that immediately processes the uploaded file and displays extraction results. This is the fastest path from receiving a document to seeing structured data \u2014 ideal for one-off documents that arrive via email or chat and need immediate attention without navigating to the upload page."
6018
6097
  },
6019
6098
  {
6020
6099
  question: "Do shortcuts work inside modals or overlays?",
@@ -6134,6 +6213,17 @@ var sections15 = [
6134
6213
  {
6135
6214
  type: "paragraph",
6136
6215
  text: "You can also enable batch mode on a per-source basis. When a source connection has the batch processing toggle enabled, all documents ingested through that source are automatically routed to the batch queue. This is ideal for source connections that handle non-urgent, high-volume ingestion \u2014 such as a shared drive that collects documents overnight."
6216
+ },
6217
+ {
6218
+ type: "list",
6219
+ ordered: false,
6220
+ items: [
6221
+ "**Included in batch:** Stage 2 Claude extraction, markdown pre-processing, field parsing, quality metrics computation, extraction metadata, and all post-processing that does not require LLM calls.",
6222
+ "**Excluded from batch:** LLM-based quality passes (field estimation, verification, cross-reference enrichment) are skipped to preserve cost savings.",
6223
+ "**Excluded from batch:** Image-only documents (PNG, JPG) are automatically routed to real-time processing because the batch payload is text-only.",
6224
+ "**Fallback behavior:** Parse failures in batch mode are retried through the real-time extraction path \u2014 never as a new batch \u2014 to maintain the 48-hour SLA.",
6225
+ "**Minimum threshold:** Batches require at least 100 items (a provider requirement). Uploads below this threshold fall back to real-time processing with a warning."
6226
+ ]
6137
6227
  }
6138
6228
  ],
6139
6229
  related: [
@@ -6187,6 +6277,10 @@ var sections15 = [
6187
6277
  type: "paragraph",
6188
6278
  text: "The batch detail view shows individual items within a batch, including which documents are included, their current processing state, and any errors that occurred. Use this view to verify that a specific document was included in the expected batch and to troubleshoot items that failed to parse."
6189
6279
  },
6280
+ {
6281
+ type: "paragraph",
6282
+ text: "For example, after uploading 500 invoices in batch mode, navigate to `/sources/batches` to check progress. You will see a batch in **accumulating** status collecting items until the 15-minute timer fires. Once submitted, the status changes to **submitted** and the platform polls the provider hourly. Click the batch row to see each document's individual state \u2014 if 3 items show parse errors, those documents were automatically retried via the real-time path while the remaining 497 completed normally. When the batch transitions to **completed**, all results have been applied and documents are ready for review."
6283
+ },
6190
6284
  {
6191
6285
  type: "paragraph",
6192
6286
  text: "The platform includes built-in crash recovery for batch processing. If the application restarts while a batch is in a transient `processing` state, the recovery logic automatically reverts it to `submitted` so the next polling cycle can retry. This means batch jobs are resilient to infrastructure disruptions without requiring manual intervention."
@@ -6306,7 +6400,7 @@ var sections16 = [
6306
6400
  },
6307
6401
  {
6308
6402
  question: "How is reference data used?",
6309
- answer: "Reference datasets are used by the matching engine for field-to-field comparisons and by reference strategies in schemas for code mapping and value resolution."
6403
+ answer: "Reference datasets serve two purposes. First, the matching engine uses them for field-to-field comparisons \u2014 comparing extracted document values against reference rows using weighted strategies (exact, fuzzy, date_range, numeric_range). Second, reference strategies in schemas use them for code mapping and value resolution, translating labels found in documents into canonical codes defined in the reference dataset."
6310
6404
  },
6311
6405
  {
6312
6406
  question: "Can I import reference data from a database?",
@@ -6379,6 +6473,16 @@ var sections16 = [
6379
6473
  type: "callout",
6380
6474
  variant: "info",
6381
6475
  text: "Use **AI strategy generation** when setting up matching for the first time. The platform analyzes your schema fields and reference data columns, then suggests which fields to compare and which strategy to use for each. You can review and adjust the suggestions before saving."
6476
+ },
6477
+ {
6478
+ type: "list",
6479
+ ordered: false,
6480
+ items: [
6481
+ "**exact** \u2014 case-insensitive string comparison. Best for unique identifiers like PO numbers, invoice IDs, and reference codes where values should match verbatim.",
6482
+ "**fuzzy** \u2014 token-based similarity with a configurable threshold (0-100%). Handles misspellings, abbreviations, and word reordering. Ideal for company names, addresses, and descriptions.",
6483
+ "**date_range** \u2014 matches dates within a configurable tolerance window (e.g., +/- 7 days). Useful when documents report dates with slight offsets, such as invoice date vs. received date.",
6484
+ "**numeric_range** \u2014 matches numbers within a percentage or absolute tolerance. Handles rounding differences in amounts, quantities, and prices across systems."
6485
+ ]
6382
6486
  }
6383
6487
  ],
6384
6488
  related: [
@@ -6527,6 +6631,10 @@ var sections16 = [
6527
6631
  type: "callout",
6528
6632
  variant: "info",
6529
6633
  text: "You can **approve or reject** individual match results. Approved matches can be used downstream in delivery pipelines. Rejected matches are excluded from future consideration for that document."
6634
+ },
6635
+ {
6636
+ type: "paragraph",
6637
+ text: 'Consider a practical example: you receive an invoice from "Acme Corp" with a total of $12,450 dated 2025-03-15. The matching engine returns the top candidate as "ACME Corporation" in your reference data with a confidence score of 87%. The evidence view shows the vendor name scored 92% via fuzzy match (handling "Corp" vs "Corporation"), the amount scored 100% via exact match, and the date scored 78% via date_range because the reference shows a PO date of 2025-03-10 \u2014 within the 7-day tolerance. You can quickly verify the match is correct and approve it, sending the linked record downstream.'
6530
6638
  }
6531
6639
  ],
6532
6640
  related: [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@talonic/docs",
3
- "version": "0.20.13",
3
+ "version": "0.20.14",
4
4
  "description": "Talonic documentation components — API Reference & Platform Guide",
5
5
  "license": "UNLICENSED",
6
6
  "private": false,