@talonic/docs 0.20.11 → 0.20.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/content.js CHANGED
@@ -6028,7 +6028,11 @@ var sections17 = [
6028
6028
  description: "Overview of the Talonic API for extracting structured, schema-validated data from any document with a single API call using HTTPS and JSON.",
6029
6029
  content: [
6030
6030
  { type: "paragraph", text: "Extract any document into schema-validated data with a single API call." },
6031
- { type: "paragraph", text: "**Base URL:** `https://api.talonic.com` | **Protocol:** HTTPS + JSON | **Auth:** `Bearer tlnc_...`" }
6031
+ { type: "paragraph", text: "**Base URL:** `https://api.talonic.com` | **Protocol:** HTTPS + JSON | **Auth:** `Bearer tlnc_...`" },
6032
+ { type: "paragraph", text: "Most integrations start with `POST /v1/extract` to submit a document and receive structured fields back. A typical workflow is: create an API key, upload a file with an optional schema, and consume the JSON response with per-field confidence scores and cost headers." },
6033
+ { type: "paragraph", text: "The API supports three extraction modes: **auto-detect** (no schema, discovers all fields), **schema-driven** (returns exactly the fields you define), and **query** (filter previously extracted data without re-processing). Every response includes a `request_id` for tracing and support." },
6034
+ { type: "paragraph", text: "Pair the extract endpoint with `GET /v1/documents` and `GET /v1/extractions` to manage your document library and retrieve results later. Webhook callbacks via `extraction.complete` events eliminate the need for polling on async extractions." },
6035
+ { type: "callout", text: "All API keys use the `tlnc_` prefix. Create and rotate keys from **Settings \u2192 API Keys** in the dashboard. Keys carry scopes (`extract`, `read`, `write`, `billing`) that control endpoint access." }
6032
6036
  ],
6033
6037
  related: [
6034
6038
  { label: "Authentication", slug: "authentication" },
@@ -6084,7 +6088,11 @@ var sections17 = [
6084
6088
  description: "The base URL for all Talonic API endpoints. All requests must use HTTPS and are relative to the v1 base path.",
6085
6089
  content: [
6086
6090
  { type: "paragraph", text: "All endpoints are relative to the base URL below. All requests must use HTTPS." },
6087
- { type: "code", language: "bash", code: "https://api.talonic.com/v1" }
6091
+ { type: "code", language: "bash", code: "https://api.talonic.com/v1" },
6092
+ { type: "paragraph", text: "Most integrations set this as a constant in their HTTP client configuration. A typical request URL looks like `https://api.talonic.com/v1/extract` or `https://api.talonic.com/v1/documents`. All paths in this reference are relative to the `/v1` prefix." },
6093
+ { type: "paragraph", text: "The API uses standard JSON request and response bodies with `Content-Type: application/json`, except for file uploads which use `multipart/form-data`. Responses include standard HTTP status codes and rate limit headers on every call." },
6094
+ { type: "paragraph", text: "There is no versioning in the URL beyond `/v1`. Breaking changes will be communicated in advance and introduced under a new version prefix. Non-breaking additions (new fields, new endpoints) are shipped continuously." },
6095
+ { type: "callout", text: "Plain HTTP requests are rejected. Always use `https://` in your base URL configuration to ensure encrypted transport." }
6088
6096
  ],
6089
6097
  related: [
6090
6098
  { label: "Authentication", slug: "authentication" }
@@ -6204,6 +6212,9 @@ X-Talonic-Cells-Resolved-AI: 5` },
6204
6212
  description: "All list endpoints use cursor-based pagination with cursor, limit, and order parameters. Responses include next_cursor and has_more for iteration.",
6205
6213
  content: [
6206
6214
  { type: "paragraph", text: "All list endpoints use cursor-based pagination. Pass a `cursor` token from the previous response to fetch the next page." },
6215
+ { type: "paragraph", text: "Most integrations call list endpoints after bulk ingestion to iterate through results. A typical workflow is to fetch the first page with a `limit`, then loop using `pagination.next_cursor` until `has_more` is `false`." },
6216
+ { type: "paragraph", text: "The response always includes a `pagination` object with `total`, `limit`, `has_more`, and `next_cursor`. The `total` field reflects the full count of matching items, not just the current page. Use `order` to control sort direction by `created_at`." },
6217
+ { type: "paragraph", text: "Pair pagination with query filters (e.g. `status`, `after`, `before`, `search`) on endpoints like `GET /v1/documents` and `GET /v1/extractions` to narrow results before paginating. Note that cursors are opaque and short-lived \u2014 do not persist or parse them." },
6207
6218
  {
6208
6219
  type: "param-table",
6209
6220
  title: "Request parameters",
@@ -6291,6 +6302,9 @@ print(f"Fetched {len(all_documents)} documents")`
6291
6302
  description: "Use the Idempotency-Key header to safely retry POST requests without creating duplicate extractions. Keys are valid for 24 hours.",
6292
6303
  content: [
6293
6304
  { type: "paragraph", text: "Pass an `Idempotency-Key` header on POST requests to safely retry without creating duplicate work. If a request with the same key has already been processed, the API returns the cached response." },
6305
+ { type: "paragraph", text: "Most integrations use idempotency keys when calling `POST /v1/extract` to guard against network timeouts or duplicate submissions. A typical workflow is to generate a UUID per logical operation, attach it as the `Idempotency-Key` header, and retry the same request on failure without risk of double-processing." },
6306
+ { type: "paragraph", text: "The cached response is stored for **24 hours** and is scoped to your API key. A duplicate request within that window returns the original response body and HTTP status immediately, with no additional credit cost. After 24 hours the key expires and can be reused for a new request." },
6307
+ { type: "paragraph", text: "Pair idempotency with webhook callbacks (`webhook_url` option) for robust async workflows. Note that reusing a key with different request parameters will still return the first request's cached result \u2014 always generate a fresh key for each distinct operation." },
6294
6308
  {
6295
6309
  type: "param-table",
6296
6310
  title: "Idempotency details",
@@ -6774,6 +6788,7 @@ X-Talonic-Cells-Resolved-AI: 5`
6774
6788
  seoTitle: "Extract Options \u2014 Talonic Docs",
6775
6789
  description: "Configure extraction options including output format, strict mode, async processing, webhook callbacks, raw text inclusion, page ranges, and language hints.",
6776
6790
  content: [
6791
+ { type: "paragraph", text: "Pass these options as fields in the `options` JSON object on `POST /v1/extract` to control extraction behavior. Options let you switch between sync and async mode, include raw text, restrict page ranges, and configure webhook delivery." },
6777
6792
  {
6778
6793
  type: "param-table",
6779
6794
  params: [
@@ -6785,7 +6800,11 @@ X-Talonic-Cells-Resolved-AI: 5`
6785
6800
  { name: "page_range", type: "string", description: 'Pages to extract from. E.g. "1-5", "1,3,7-10". PDF only.' },
6786
6801
  { name: "language_hint", type: "string", description: "ISO 639-1 language code hint. Improves extraction for non-English documents." }
6787
6802
  ]
6788
- }
6803
+ },
6804
+ { type: "paragraph", text: "Most integrations use `strict: true` (default) to receive only the schema-defined fields. Set `strict: false` when you want the AI to also return additional fields it discovers beyond your schema. The `async` and `webhook_url` options are mutually beneficial \u2014 set `webhook_url` to avoid polling entirely." },
6805
+ { type: "paragraph", text: 'The `page_range` option accepts comma-separated page numbers and ranges (e.g. `"1-5"`, `"1,3,7-10"`) and applies only to PDF files. Use `language_hint` with an ISO 639-1 code (e.g. `"de"`, `"ja"`) to improve extraction accuracy for non-English documents, especially when the OCR needs guidance on character sets.' },
6806
+ { type: "paragraph", text: "Pair `include_raw_text: true` with schema-driven extraction when your downstream system needs both structured data and the original text for audit or display purposes. Note that setting `webhook_url` implicitly enables async behavior \u2014 the response will be `202 Accepted` regardless of the `async` flag." },
6807
+ { type: "callout", text: 'The `format` option controls the output shape of the `data` field. Use `"json"` (default) for programmatic consumption. CSV format is available on the `GET /v1/extractions/:id/data` endpoint instead.' }
6789
6808
  ],
6790
6809
  related: [
6791
6810
  { label: "POST /v1/extract", slug: "post-extract" },
@@ -7079,6 +7098,10 @@ var sections19 = [
7079
7098
  }
7080
7099
  }`
7081
7100
  },
7101
+ { type: "paragraph", text: "Most integrations call this endpoint after receiving an `extraction.complete` webhook or after polling a document's status until it reaches `completed`. A typical workflow is to extract a document via `POST /v1/extract`, store the returned `document.id`, then fetch full metadata here when needed." },
7102
+ { type: "paragraph", text: "The response includes the current `status` field which will be `completed` when extraction has finished, `processing` while in progress, or `error` if something went wrong. Use the `latest_extraction_id` to navigate directly to the extraction result via `GET /v1/extractions/:id`." },
7103
+ { type: "paragraph", text: "Pair this with `GET /v1/documents/:id/markdown` to retrieve the raw OCR text, or with `GET /v1/extractions/:id/data` for just the structured field values. Note that the `triage` object is only populated after ingestion completes and may be `null` for documents still in processing." },
7104
+ { type: "callout", variant: "info", text: "The `links.dashboard` URL opens the document directly in the Talonic platform UI, which is useful for sharing with team members who need to review or correct extractions." },
7082
7105
  { type: "heading", level: 2, id: "get-document-errors", text: "Errors" },
7083
7106
  {
7084
7107
  type: "param-table",
@@ -7134,6 +7157,9 @@ var sections19 = [
7134
7157
  "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
7135
7158
  }`
7136
7159
  },
7160
+ { type: "paragraph", text: "Most integrations call this endpoint as part of a cleanup workflow after data has been exported or when a document was uploaded in error. A typical pattern is to list documents with `GET /v1/documents`, identify candidates for deletion, then call this endpoint for each one." },
7161
+ { type: "paragraph", text: "The response includes a `deleted` field set to `true` and the `id` of the removed document. There is no soft-delete mechanism \u2014 the original file, OCR markdown, and all extraction results are permanently purged from storage." },
7162
+ { type: "paragraph", text: "Pair this with `GET /v1/documents/:id` beforehand to verify you are deleting the correct resource. Note that if the document participated in entity linking or cases, those links are removed and affected cases may be recomputed during the next backfill cycle." },
7137
7163
  { type: "heading", level: 2, id: "delete-document-errors", text: "Errors" },
7138
7164
  {
7139
7165
  type: "param-table",
@@ -7393,6 +7419,9 @@ var sections20 = [
7393
7419
  "due_date": "2024-03-15"
7394
7420
  }`
7395
7421
  },
7422
+ { type: "paragraph", text: "Most integrations call this endpoint to feed extraction output into downstream systems (CRMs, ERPs, data warehouses) that only need the raw key-value data. A typical workflow is to extract a document, then call this endpoint with the `extraction_id` from the response to get a clean data payload without metadata overhead." },
7423
+ { type: "paragraph", text: "The response is a flat JSON object where each key is a field name and each value is the extracted value, typed according to the schema (strings, numbers, dates, arrays). Use `?format=csv` to download the same data as a CSV file with field names as headers \u2014 the `Content-Disposition` header provides a suggested filename." },
7424
+ { type: "paragraph", text: "Pair this with `GET /v1/extractions/:id` when you also need confidence scores, locked field status, or processing metadata. Note that the response shape matches the schema used during extraction \u2014 if no schema was provided, auto-discovered field names are used as keys." },
7396
7425
  { type: "heading", level: 2, id: "get-extraction-fields-errors", text: "Errors" },
7397
7426
  {
7398
7427
  type: "param-table",
@@ -7716,6 +7745,9 @@ var sections21 = [
7716
7745
  }
7717
7746
  }`
7718
7747
  },
7748
+ { type: "paragraph", text: "Most integrations call this endpoint before running an extraction to verify the schema definition is correct, or after an update to confirm the new version was applied. A typical workflow is to create a schema with `POST /v1/schemas`, store the returned `id`, then fetch it here whenever you need the current definition." },
7749
+ { type: "paragraph", text: "The response includes the full `definition` object in normalized JSON Schema format, along with the `version` number and `field_count`. Use the `links.extractions` URL to list all extractions that used this schema, and `links.dashboard` to open it in the platform UI." },
7750
+ { type: "paragraph", text: "Pair this with `PUT /v1/schemas/:id` to update the definition, or pass the `id` as `schema_id` on `POST /v1/extract` to run schema-driven extraction. Note that both UUID and `SCH-` prefixed short IDs are accepted as the `:id` parameter." },
7719
7751
  { type: "heading", level: 2, id: "get-schema-errors", text: "Errors" },
7720
7752
  {
7721
7753
  type: "param-table",
@@ -7913,6 +7945,10 @@ var sections21 = [
7913
7945
  }
7914
7946
  }`
7915
7947
  },
7948
+ { type: "paragraph", text: "Most integrations call this endpoint when extraction requirements evolve \u2014 for example, adding a new field to an invoice schema or renaming an existing one. A typical workflow is to fetch the current schema with `GET /v1/schemas/:id`, modify the `definition`, then send the updated payload here." },
7949
+ { type: "paragraph", text: "The response includes the updated `definition`, `field_count`, and `version` number. The `updated_at` timestamp reflects when the change was applied. All body parameters are optional \u2014 send only `name`, `definition`, or `description` to update that field without touching the others." },
7950
+ { type: "paragraph", text: "Pair this with `GET /v1/extractions?schema_id=:id` to review historical extractions that used previous versions. Note that schema versioning is append-only internally, so you can always compare before-and-after definitions through the dashboard." },
7951
+ { type: "callout", variant: "info", text: "Schema updates do not retroactively change existing extractions. If you need to re-extract documents with the new schema, call `POST /v1/extract` with `document_id` and the updated `schema_id`." },
7916
7952
  { type: "heading", level: 2, id: "update-schema-errors", text: "Errors" },
7917
7953
  {
7918
7954
  type: "param-table",
@@ -7968,6 +8004,9 @@ var sections21 = [
7968
8004
  "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
7969
8005
  }`
7970
8006
  },
8007
+ { type: "paragraph", text: "Most integrations call this endpoint during cleanup when a schema is no longer needed, or when consolidating duplicate schemas. A typical workflow is to list schemas with `GET /v1/schemas`, identify obsolete ones, then delete them individually by `id`." },
8008
+ { type: "paragraph", text: "The response confirms deletion with `deleted: true` and the `id` of the removed schema. All extraction results that used this schema remain intact and queryable via `GET /v1/extractions` \u2014 only the schema definition itself is removed from the system." },
8009
+ { type: "paragraph", text: "Pair this with `GET /v1/schemas/:id` beforehand to review the schema before removing it. Note that deletion is permanent with no undo \u2014 if you need the same structure later, you must recreate it with `POST /v1/schemas`." },
7971
8010
  { type: "heading", level: 2, id: "delete-schema-errors", text: "Errors" },
7972
8011
  {
7973
8012
  type: "param-table",
@@ -8545,6 +8584,9 @@ var sections23 = [
8545
8584
  description: "Create a new input source and receive a source-scoped API key. The key is only shown once in the creation response \u2014 store it securely.",
8546
8585
  content: [
8547
8586
  { type: "paragraph", text: "Create a new source to start ingesting documents. The response includes a **source-scoped API key** (`tlnc_sk_*`) that authenticates uploads to this source's endpoint. This key is shown only once \u2014 store it securely immediately after creation." },
8587
+ { type: "paragraph", text: "The typical workflow is: create a source, store the returned `api_key` securely, then use it to authenticate document uploads to the source's `endpoint` URL. Optionally pass a `default_schema_id` to automatically apply an extraction schema to all documents ingested through this source." },
8588
+ { type: "paragraph", text: "The response returns the source with `status: active`, `document_count: 0`, and the one-time `api_key` field. The `endpoint` URL is the path for `POST` document uploads. The `links` object includes URLs for the source detail, document list, and dashboard view." },
8589
+ { type: "paragraph", text: "Store the `api_key` immediately \u2014 it cannot be retrieved again. If lost, delete the source and create a new one. The source type defaults to `api` (programmatic ingestion); use `upload` for manual file uploads or `connector` for third-party integrations like Google Drive or SharePoint." },
8548
8590
  { type: "callout", variant: "warning", text: "The `api_key` is only returned in the creation response. It cannot be retrieved later. If you lose it, delete the source and create a new one." },
8549
8591
  {
8550
8592
  type: "endpoint",
@@ -8630,6 +8672,10 @@ var sections23 = [
8630
8672
  description: "Get source details, update a source name, or delete a source. Documents are retained but unlinked when a source is deleted.",
8631
8673
  content: [
8632
8674
  { type: "paragraph", text: "Manage an individual source with GET, PATCH, and DELETE operations on the same path. Retrieve source details, update its name, or permanently delete it. When a source is deleted, its documents are **retained** but unlinked from the source." },
8675
+ { type: "paragraph", text: "Use `GET` to inspect a source's current status, document count, and default schema assignment. Use `PATCH` to rename a source. Use `DELETE` when a source is no longer needed \u2014 this immediately invalidates the source-scoped API key, so any integration using it will start receiving `401` errors." },
8676
+ { type: "paragraph", text: "The `GET` response includes `document_count`, `default_schema` (with its `id` if set), and the `endpoint` URL for document ingestion. The `status` field shows the current state \u2014 `active` for API sources, or sync status values for connector-based sources (Google Drive, SharePoint, etc.)." },
8677
+ { type: "paragraph", text: "Deleting a source retains all its documents in your workspace \u2014 they remain accessible via the documents API and any existing extractions are preserved. Only the source-to-document link is removed. Pair `GET /v1/sources/:id` with `GET /v1/sources/:id/documents` to see documents belonging to a specific source." },
8678
+ { type: "callout", variant: "info", text: "Deleting a source immediately invalidates its API key. Any integration using that key will receive `401` errors. Documents are retained but unlinked from the source." },
8633
8679
  {
8634
8680
  type: "endpoint",
8635
8681
  method: "GET",
@@ -9537,7 +9583,11 @@ var sections25 = [
9537
9583
  "`extraction.complete` \u2014 Extraction finished successfully. Payload includes the full extraction result.",
9538
9584
  "`extraction.failed` \u2014 Extraction failed. Payload includes the error details.",
9539
9585
  "`document.ingested` \u2014 A new document has been processed and is ready for extraction."
9540
- ] }
9586
+ ] },
9587
+ { type: "paragraph", text: "Most integrations subscribe to `extraction.complete` to trigger downstream processing (e.g. writing structured data to a database or notifying a user). A typical workflow is to pass `webhook_url` on `POST /v1/extract`, then handle the callback payload in your server without polling." },
9588
+ { type: "paragraph", text: "The `extraction.complete` payload includes the `extraction_id`, `document_id`, `schema_id`, `status`, and `confidence` score. Use the `extraction_id` to fetch the full result via `GET /v1/extractions/:id` if the payload does not contain all the fields you need." },
9589
+ { type: "paragraph", text: "Pair event handling with [Signature Verification](webhook-security) to ensure payloads are authentic. Note that `extraction.failed` events include an `error` field with a machine-readable code and human-readable message \u2014 use this to decide whether to retry via `POST /v1/extract` with `document_id`." },
9590
+ { type: "callout", text: "Webhook URLs must be HTTPS endpoints. HTTP URLs are rejected at configuration time to ensure payload confidentiality in transit." }
9541
9591
  ],
9542
9592
  related: [
9543
9593
  { label: "Signature Verification", slug: "webhook-security" },
@@ -9655,7 +9705,11 @@ echo -n '{"event":"extraction.complete","delivery_id":"dlv_test123","timestamp":
9655
9705
  "3rd retry \u2014 30 minutes",
9656
9706
  "4th retry (final) \u2014 4 hours"
9657
9707
  ] },
9658
- { type: "paragraph", text: "After 4 failed attempts, the delivery is marked as failed. You can check delivery status and replay events from the dashboard." }
9708
+ { type: "paragraph", text: "After 4 failed attempts, the delivery is marked as failed. You can check delivery status and replay events from the dashboard." },
9709
+ { type: "paragraph", text: "Most integrations rely on the default retry schedule and only intervene when a delivery reaches the failed state. A typical debugging workflow is to check the delivery history in the dashboard, identify the HTTP status or timeout that caused the failure, then fix the endpoint and replay the event." },
9710
+ { type: "paragraph", text: "Your endpoint must return a `2xx` status code within **30 seconds** to be considered successful. Non-`2xx` responses (including `3xx` redirects) and timeouts trigger retries. The `X-Talonic-Delivery-Id` header remains the same across retries, so use it for idempotent processing on your end." },
9711
+ { type: "paragraph", text: "Pair retry awareness with [Signature Verification](webhook-security) to reject spoofed payloads early. Note that the total retry window spans approximately **4.5 hours** from the initial attempt \u2014 if your endpoint is down longer than that, use the dashboard replay feature to re-send missed events." },
9712
+ { type: "callout", text: "If your endpoint consistently fails, check for firewall rules blocking Talonic IPs, TLS certificate issues, or response timeouts exceeding 30 seconds. The dashboard delivery log shows the HTTP status and error for each attempt." }
9659
9713
  ],
9660
9714
  related: [
9661
9715
  { label: "Webhook Events", slug: "webhook-events" },
@@ -9673,7 +9727,11 @@ echo -n '{"event":"extraction.complete","delivery_id":"dlv_test123","timestamp":
9673
9727
  seoTitle: "Webhook Delivery Format \u2014 Talonic Docs",
9674
9728
  description: "Webhook delivery format details including POST request structure, JSON body format, and standard headers for event type, signature, delivery ID, and timestamp.",
9675
9729
  content: [
9676
- { type: "paragraph", text: "Webhooks are delivered as `POST` requests with a JSON body. Configure webhook URLs per-source or per-extraction via the `webhook_url` option on the extract endpoint." }
9730
+ { type: "paragraph", text: "Webhooks are delivered as `POST` requests with a JSON body. Configure webhook URLs per-source or per-extraction via the `webhook_url` option on the extract endpoint." },
9731
+ { type: "paragraph", text: "Most integrations configure a single webhook endpoint that handles all event types, using the `X-Talonic-Event` header to route internally. A typical setup is to pass `webhook_url` on `POST /v1/extract` calls, or configure a default URL in the dashboard for all extractions from a specific source." },
9732
+ { type: "paragraph", text: "Each delivery includes four standard headers: `X-Talonic-Event` (event type), `X-Talonic-Signature` (HMAC-SHA256 for verification), `X-Talonic-Delivery-Id` (unique ID for idempotency), and `X-Talonic-Timestamp` (Unix timestamp). Your endpoint must return a `2xx` status within **30 seconds** or the delivery is considered failed." },
9733
+ { type: "paragraph", text: "Pair webhook delivery with the [Signature Verification](webhook-security) guide to authenticate incoming payloads. Note that failed deliveries are retried with exponential backoff up to 4 times \u2014 see [Retry Policy](webhook-retry) for the schedule." },
9734
+ { type: "callout", text: "Use the `X-Talonic-Delivery-Id` header to deduplicate webhook deliveries on your end. Retries reuse the same delivery ID, so you can safely discard duplicates." }
9677
9735
  ],
9678
9736
  related: [
9679
9737
  { label: "Webhook Events", slug: "webhook-events" },
@@ -10229,6 +10287,9 @@ var sections27 = [
10229
10287
  description: "Classify link keys into categories (identity, transaction, reference) using AI. Runs asynchronously on ambiguous fields.",
10230
10288
  content: [
10231
10289
  { type: "paragraph", text: "When new fields are extracted, some may not be automatically classified as link keys. The classify endpoint runs AI-powered classification on ambiguous fields to determine whether they are **identity**, **transaction**, or **reference** link keys. This is useful after onboarding new document types or when the field registry grows." },
10290
+ { type: "paragraph", text: "Call this endpoint after uploading a new batch of documents or after adding a new document type to your workspace. The endpoint returns immediately with the count of fields that were classified \u2014 any graph rebuilding happens asynchronously via a triggered backfill." },
10291
+ { type: "paragraph", text: "The response includes a `classified` count (number of fields newly assigned a category) and a `backfillTriggered` boolean. When `backfillTriggered` is `true`, entity links across all documents are being rebuilt in the background. Poll the **Backfill** progress endpoint to monitor completion." },
10292
+ { type: "paragraph", text: "Only fields with a `null` category are evaluated \u2014 already-classified link keys are not re-assessed. To verify which fields were classified, call the **Link Keys** endpoint before and after. If no ambiguous fields remain, `classified` returns `0` and no backfill is triggered." },
10232
10293
  { type: "callout", variant: "info", text: "Classification uses a two-pass approach: rule-based heuristics handle obvious cases (e.g. fields named `invoice_number`), then an LLM call classifies the remaining ambiguous fields. A backfill is automatically triggered when new link keys are identified." },
10233
10294
  {
10234
10295
  type: "endpoint",
@@ -10283,6 +10344,9 @@ var sections27 = [
10283
10344
  description: "Get all entity links for a specific document showing entity values, types, link keys, and linked document IDs.",
10284
10345
  content: [
10285
10346
  { type: "paragraph", text: "Retrieve all entity links discovered for a specific document. Each link represents a shared field value \u2014 such as a customer ID or PO number \u2014 that connects this document to others in the workspace. Use this endpoint to understand how a document relates to the rest of your corpus." },
10347
+ { type: "paragraph", text: "Call this endpoint when building a document detail view or when you need to trace the relationships of a single document before exploring the broader graph. Pass the document UUID as a path parameter \u2014 the endpoint returns all entity links regardless of link key category." },
10348
+ { type: "paragraph", text: "Each entry in the response includes the **entity_value** (the raw shared value), the **field_key** (which field it was extracted from), and the **link_key_category** (`identity`, `transaction`, or `reference`). Documents with no extracted field values matching other documents return an empty `data` array." },
10349
+ { type: "paragraph", text: "Use this alongside the **Full Graph** subgraph endpoint to progressively explore the linking graph. Start here for a flat list of connections, then call the subgraph endpoint with `depth=2` to expand outward from the document and discover second-degree relationships." },
10286
10350
  { type: "callout", variant: "info", text: "The `document_count` field on each entity indicates how many documents share that value. A high count on an identity entity (e.g. a vendor ID appearing in 50+ documents) is expected, while a high count on a transaction entity may indicate a data quality issue." },
10287
10351
  {
10288
10352
  type: "endpoint",
@@ -10597,6 +10661,10 @@ var sections27 = [
10597
10661
  description: "List and retrieve cases \u2014 automatically created groups of 2+ related documents linked through shared field values with narrative summaries.",
10598
10662
  content: [
10599
10663
  { type: "paragraph", text: "Cases are automatically created groups of two or more documents that are connected through shared **transaction** or **reference** entity values. For example, an invoice, a purchase order, and a delivery note sharing the same PO number form a case. Cases provide a high-level view of document relationships without needing to navigate the full graph." },
10664
+ { type: "paragraph", text: "Use this endpoint to retrieve all cases in your workspace for building case lists, dashboards, or approval queues. The response is ordered by most recent first based on the earliest document timestamp in each case. Each case includes a `document_count` and a stable `case_key` that you can use for subsequent detail lookups." },
10665
+ { type: "paragraph", text: "The response includes a `links.self` URL for each case that points to the case detail endpoint. The `label` field contains an auto-generated human-readable name when available, or `null` for cases that have not yet been labelled. The `created_at` field reflects the timestamp of the earliest document in the group." },
10666
+ { type: "callout", variant: "info", text: "Each document belongs to at most one case. Documents linked only through identity entities (e.g. shared vendor ID) appear as entity groups in the full graph but are not returned by this endpoint." },
10667
+ { type: "paragraph", text: "Pair this endpoint with **Case Graph** to visualize individual cases, or with **Document-Case Map** for a flat document-to-case lookup. Cases are rebuilt automatically during backfill \u2014 if you have recently reclassified link keys, trigger a backfill first to ensure case assignments are up to date." },
10600
10668
  { type: "list", ordered: false, items: [
10601
10669
  "Each case has a deterministic **case key** (hex hash of its document IDs)",
10602
10670
  "Cases are created by the linking pipeline during backfill or real-time processing",
@@ -10671,6 +10739,10 @@ var sections27 = [
10671
10739
  description: "Retrieve the D3-compatible graph visualization for a single case, showing document nodes and entity edges within the case boundary.",
10672
10740
  content: [
10673
10741
  { type: "paragraph", text: "Retrieve the graph structure for a single case, formatted for **D3.js** or similar graph visualization libraries. The response contains only the nodes and edges within the case boundary, making it suitable for rendering focused relationship diagrams." },
10742
+ { type: "paragraph", text: "The typical workflow is to first list cases via the **Cases** endpoint, then call this endpoint with a specific `case_key` to fetch the renderable graph. This is the primary endpoint for building case-level visualizations in custom UIs or embedded dashboards." },
10743
+ { type: "paragraph", text: "The response includes both **document nodes** (with filename and inferred document type) and **entity nodes** (with the shared value and link key category). Edges always connect a document to an entity \u2014 never document-to-document directly. Node IDs are stable across requests, so you can preserve force-layout positions between refreshes." },
10744
+ { type: "callout", variant: "info", text: "The case graph is a strict subset of the full workspace graph. Only entities that contributed to forming the case are included \u2014 high-frequency entities excluded from BFS do not appear." },
10745
+ { type: "paragraph", text: "Pair this endpoint with **Document Links** to enrich each node with additional entity metadata, or with **Full Graph** when you need cross-case visibility. The graph structure mirrors the full graph format, so the same rendering code works for both." },
10674
10746
  {
10675
10747
  type: "endpoint",
10676
10748
  method: "GET",
@@ -10738,6 +10810,9 @@ var sections27 = [
10738
10810
  description: "Get the mapping of documents to their resolved cases. Returns a mapping of document IDs to assigned case keys.",
10739
10811
  content: [
10740
10812
  { type: "paragraph", text: "The document-case map provides a flat lookup from document ID to case assignment. Use it to quickly determine which case a document belongs to, or to identify documents that are not part of any case. Documents in **entity groups** (linked only through identity entities) are included with `is_case: false`." },
10813
+ { type: "paragraph", text: "Call this endpoint when you need to enrich a document list with case membership \u2014 for example, to display a case badge next to each document in a table view. The response is a flat object keyed by document UUID, so lookups are O(1) without client-side joins." },
10814
+ { type: "paragraph", text: "Each entry includes a `case_key` (the deterministic hex hash identifying the case), a `document_count` (total documents in that case or entity group), and an `is_case` boolean. When `is_case` is `false`, the `case_key` is an empty string \u2014 the document is linked via identity entities only." },
10815
+ { type: "paragraph", text: "This endpoint pairs well with the **Cases** list endpoint. Use the map for bulk lookups across your document set, and the Cases endpoint when you need case-level metadata like labels or timestamps. Documents with no entity links at all are omitted from the map entirely." },
10741
10816
  { type: "callout", variant: "info", text: "Documents with `is_case: false` are linked to other documents only through identity entities (e.g. same vendor). They appear in the map but do not form a case. Documents with no links at all are not included in the map." },
10742
10817
  {
10743
10818
  type: "endpoint",
@@ -13073,6 +13148,9 @@ var sections31 = [
13073
13148
  description: "Get metric trends over time for a schema. Returns time-series telemetry data across recent runs for tracking quality changes.",
13074
13149
  content: [
13075
13150
  { type: "paragraph", text: "Track how structuring metrics evolve over successive runs for a schema. This endpoint returns a **time-series** of telemetry snapshots, allowing you to detect quality improvements, regressions, or shifts in strategy distribution as your field registry matures." },
13151
+ { type: "paragraph", text: "Call this endpoint after several extraction runs to build trend charts or to detect regressions. The default window returns the 10 most recent runs \u2014 use the `window` query parameter to expand up to 50 runs for longer-term analysis." },
13152
+ { type: "paragraph", text: "Each snapshot in the `data` array contains the same metrics as the **Schema Summary** \u2014 `capture_hit_rate`, `synthesize_rate`, `strategy_distribution`, and `tier_funnel` \u2014 plus a `created_at` timestamp and `run_id`. The array is ordered by most recent run first." },
13153
+ { type: "paragraph", text: "Compare the trend data with the **Schema Fields** endpoint to pinpoint which specific fields are driving changes. A sudden spike in `synthesize_rate` across runs may indicate a new document type that the field registry has not yet learned, while a steady decrease signals healthy registry maturation." },
13076
13154
  { type: "callout", variant: "info", text: "A rising `capture_hit_rate` over time indicates the field registry is learning from extractions and resolving more fields deterministically, reducing LLM costs." },
13077
13155
  {
13078
13156
  type: "endpoint",
@@ -13177,6 +13255,9 @@ var sections31 = [
13177
13255
  description: "Get per-field structuring metrics for a schema including field-level state distribution, capture rates, and strategy breakdown.",
13178
13256
  content: [
13179
13257
  { type: "paragraph", text: "Drill down to **individual field performance** within a schema. This endpoint returns per-field capture rates, synthesis rates, the most common strategy used, and the distribution of cell states (filled, empty, skipped). Use it to identify underperforming fields that may need instruction tuning or manual review." },
13258
+ { type: "paragraph", text: "Call this endpoint after reviewing the **Schema Summary** to investigate which fields are driving low capture rates or high synthesis costs. The field-level breakdown reveals whether issues are concentrated in a few problematic fields or spread evenly across the schema." },
13259
+ { type: "paragraph", text: "Each entry in the `data` array includes the `field_name`, `capture_rate` and `synthesize_rate` (both 0-1 fractions), the dominant `strategy` (one of `transfer`, `extract`, `compute`, `skip`), and a `state_distribution` object with `filled`, `empty`, and `skipped` counts. Fields with a `strategy` of `extract` are LLM-dependent and contribute most to cost." },
13260
+ { type: "paragraph", text: "Pair this with the **Schema Trend** endpoint to track how individual field performance changes across runs. Fields that remain stuck on `extract` strategy after multiple runs are strong candidates for adding explicit instructions or seeding the field registry with example values." },
13180
13261
  { type: "callout", variant: "info", text: "Fields with a high `synthesize_rate` and low `capture_rate` are candidates for field registry enrichment or instruction refinement to reduce LLM dependency." },
13181
13262
  {
13182
13263
  type: "endpoint",
@@ -13258,6 +13339,9 @@ var sections31 = [
13258
13339
  description: "Get aggregate structuring metrics for a single job run including strategy distribution, tier funnel, and capture hit rate.",
13259
13340
  content: [
13260
13341
  { type: "paragraph", text: "Retrieve structuring telemetry for a **specific job run** rather than the latest run for a schema. Use this when you need to inspect the performance of a particular execution, compare two runs side by side, or debug a run that produced unexpected results." },
13342
+ { type: "paragraph", text: "The typical workflow is to list runs from your jobs pipeline, then call this endpoint with the run UUID to inspect its metrics. This is especially useful when a run produces unexpected accuracy \u2014 the telemetry reveals whether the issue is in capture (registry gaps), synthesis (LLM errors), or strategy selection." },
13343
+ { type: "paragraph", text: "The response includes `capture_hit_rate`, `synthesize_rate`, `strategy_distribution`, and `tier_funnel` \u2014 identical in shape to the **Schema Summary**. The `schema_id` field identifies which schema was used, allowing you to cross-reference with field-level telemetry. Runs that are still `pending` or `running` return a `404` until they complete." },
13344
+ { type: "paragraph", text: "To compare two runs, call this endpoint twice with different run IDs and diff the `strategy_distribution` and `tier_funnel` values. Pair with the **Schema Trend** endpoint when you need the full historical view rather than a point-in-time comparison." },
13261
13345
  { type: "callout", variant: "info", text: "The response shape is identical to the Schema Summary endpoint. The only difference is that this endpoint targets a specific run by ID instead of returning the latest run for a schema." },
13262
13346
  {
13263
13347
  type: "endpoint",
@@ -13407,6 +13491,9 @@ var sections32 = [
13407
13491
  description: "Get detail with expected values or delete a ground-truth dataset. Supports GET (read scope) and DELETE (write scope) on the same path.",
13408
13492
  content: [
13409
13493
  { type: "paragraph", text: "Retrieve the full details of a ground-truth dataset including all expected value entries, or permanently delete the dataset. The GET response includes every document-field pair with the expected value, which you can use to audit the benchmark data before running a validation." },
13494
+ { type: "paragraph", text: "Call GET before starting a validation run to verify that expected values are correct and complete. The `values` array contains every document-field pair with its `expected_value`, `document_id`, and `field_name` \u2014 review these to ensure the benchmark data reflects your current extraction requirements." },
13495
+ { type: "paragraph", text: "The response includes `entry_count` for a quick size check and `user_schema_id` to confirm schema scope. The `values` array entries each have their own UUID (`id`) and `created_at` timestamp. If the dataset is unscoped (`user_schema_id: null`), it can validate fields across any schema." },
13496
+ { type: "paragraph", text: "Use DELETE only when the dataset is no longer relevant. Existing validation runs that referenced this dataset are retained with their results intact, but you cannot create new runs against a deleted dataset. To update individual entries, delete and recreate the dataset with corrected values." },
13410
13497
  { type: "callout", variant: "warning", text: "Deleting a ground-truth dataset also removes all associated expected value entries. Existing validation runs that used this dataset are retained but can no longer be re-run." },
13411
13498
  {
13412
13499
  type: "endpoint",
@@ -13668,6 +13755,10 @@ var sections32 = [
13668
13755
  description: "Get validation run detail with accuracy summary or delete a run. Supports GET (read scope) and DELETE (write scope) on the same path.",
13669
13756
  content: [
13670
13757
  { type: "paragraph", text: "Retrieve the full details of a validation run including its status, accuracy score, and total comparisons. Or permanently delete a run and its associated results. Use GET to poll a run's status until it reaches `completed`, then fetch the detailed results." },
13758
+ { type: "paragraph", text: "After creating a validation run, poll this endpoint until the `status` field transitions from `pending` or `running` to `completed` or `failed`. Once completed, the `accuracy` field contains the overall score (0-1) and `total_comparisons` shows how many field-level comparisons were made." },
13759
+ { type: "paragraph", text: "The response includes `links.results` which points directly to the per-field results endpoint. Once the run reaches `completed` status, follow this link to retrieve the granular comparison data including match types, similarity scores, and LLM judge verdicts." },
13760
+ { type: "callout", variant: "warning", text: "Deleting a validation run permanently removes all per-field results. The ground-truth dataset and the original job run are not affected. Use DELETE only when you want to clean up outdated or erroneous runs." },
13761
+ { type: "paragraph", text: "Pair this endpoint with **Create Validation Run** for the create-then-poll workflow, or with **List Validation Runs** to find specific runs by recency. Comparing the `accuracy` values of multiple runs against the same ground-truth dataset is the primary way to track extraction quality over time." },
13671
13762
  {
13672
13763
  type: "endpoint",
13673
13764
  method: "GET",
@@ -13919,6 +14010,9 @@ var sections33 = [
13919
14010
  description: "Get credit transaction history including purchases, deductions, and adjustments with page-based pagination.",
13920
14011
  content: [
13921
14012
  { type: "paragraph", text: "Retrieve a chronological log of every credit transaction on your account. Transactions include **purchases** (positive amounts), **consumption deductions** (negative amounts), **bonuses**, and **manual adjustments**. Use this to audit spending and reconcile usage." },
14013
+ { type: "paragraph", text: "Call this endpoint to build a transaction ledger view or to reconcile credit changes over a billing period. The response uses page-based pagination \u2014 pass `page` and `limit` query parameters to navigate through large transaction histories. The default page size is 20 with a maximum of 100." },
14014
+ { type: "paragraph", text: "Each transaction includes an `amount` (negative for deductions, positive for purchases), a `type` field (`consumption`, `purchase`, `bonus`, or `adjustment`), and an `operation_type` that identifies the pipeline operation responsible. The `total` field in the response gives the full count for pagination math." },
14015
+ { type: "paragraph", text: "Use this alongside the **Balance** endpoint to understand how your balance arrived at its current value. For aggregate cost analysis by operation type and model, the **Usage Summary** endpoint provides a more efficient grouped view without per-transaction detail." },
13922
14016
  { type: "callout", variant: "info", text: "Transactions are ordered by most recent first. Each entry includes the `operation_type` that triggered it (e.g. `extraction`, `manual`), making it easy to trace costs back to specific pipeline operations." },
13923
14017
  {
13924
14018
  type: "endpoint",
@@ -14001,6 +14095,9 @@ var sections33 = [
14001
14095
  description: "Get aggregate credit usage summary broken down by operation type and model for a configurable time period.",
14002
14096
  content: [
14003
14097
  { type: "paragraph", text: "Get a high-level view of your API usage grouped by **operation type** and **model**. This endpoint aggregates call counts, token consumption, and estimated costs over a configurable lookback period. Use it to understand which operations drive your spending." },
14098
+ { type: "paragraph", text: "Call this endpoint to build cost dashboards or to identify which pipeline operations consume the most credits. The default lookback is 30 days \u2014 pass the `days` query parameter to adjust. Each row in the `stats` array represents a unique combination of `operation_type` and `model`." },
14099
+ { type: "paragraph", text: "The response includes `call_count`, `total_input_tokens`, `total_output_tokens`, `total_cache_read_tokens`, and `total_cost_usd` per grouping. Note that token-based operations (e.g. `extraction` via Claude) report full token breakdowns, while page-based operations (e.g. `document_ai_ocr`) report zero tokens since cost is calculated from pages processed." },
14100
+ { type: "paragraph", text: "Pair with **Daily Usage** for time-series analysis of the same period, or with **Usage Log** to drill into individual requests behind a high-cost grouping. The `period_days` field in the response confirms the actual lookback window applied." },
14004
14101
  { type: "callout", variant: "info", text: "Cost estimates include all token classes: input tokens, output tokens, cache creation tokens, and cache read tokens. Each is priced at the model-specific rate." },
14005
14102
  {
14006
14103
  type: "endpoint",
@@ -14089,6 +14186,10 @@ var sections33 = [
14089
14186
  description: "Get per-day credit usage breakdown for the specified period (default last 30 days) with call counts and token totals per day.",
14090
14187
  content: [
14091
14188
  { type: "paragraph", text: "Get a per-day breakdown of API usage over a configurable period. Each entry includes the total number of API calls, input/output token counts, and estimated cost for that calendar date. Use this for usage trend analysis and daily cost monitoring." },
14189
+ { type: "paragraph", text: "Call this endpoint to populate daily usage charts or to set up alerting on cost spikes. The default lookback is 30 days \u2014 use the `days` query parameter to widen or narrow the window. Days with zero API calls are omitted from the response array." },
14190
+ { type: "paragraph", text: "Each entry contains a `date` (YYYY-MM-DD in UTC), `calls` (total API calls), `input_tokens`, `output_tokens`, and `cost_usd`. All timestamps are UTC \u2014 a call made at 23:59 UTC on a given date appears under that UTC date, not the caller's local date." },
14191
+ { type: "callout", variant: "info", text: "Daily usage is ordered by date ascending, making it ready for time-series charting without client-side sorting. Pair with the **Usage Summary** endpoint for operation-level breakdowns within the same period." },
14192
+ { type: "paragraph", text: "Combine this endpoint with **Balance** to correlate daily burn against remaining runway. If you notice a cost spike on a specific date, drill into the **Usage Log** to identify the individual requests responsible." },
14092
14193
  {
14093
14194
  type: "endpoint",
14094
14195
  method: "GET",
@@ -14358,6 +14459,9 @@ var sections34 = [
14358
14459
  description: "List all tools available to the embedded agent including their impact level (read/write) and descriptions for discovering agent capabilities.",
14359
14460
  content: [
14360
14461
  { type: "paragraph", text: "Discover all tools available to the embedded AI agent. Each tool declares its **impact level** \u2014 whether it performs a read-only operation or a mutation \u2014 so you can build permission-aware integrations. Use this endpoint to dynamically generate tool descriptions for external AI agents or to audit available capabilities." },
14462
+ { type: "paragraph", text: "Call this endpoint at startup to populate your integration's tool registry, or periodically to detect newly added capabilities. The response includes every tool the agent can invoke, with a stable `name` identifier, a human-readable `description`, and the `impact` classification." },
14463
+ { type: "paragraph", text: "The `totalCount` field gives the total number of tools available. Each tool's `impact` field follows a four-level severity scale: `read`, `draft_mutation`, `live_mutation`, and `irreversible`. Use these levels to build confirmation gates \u2014 for example, auto-approve `read` tools but require user confirmation for `live_mutation` and above." },
14464
+ { type: "paragraph", text: "Pair this with the **Workspace Context** endpoint to give your external AI agent both situational awareness (context) and available actions (tools). The tool names returned here are stable identifiers that can be referenced in custom orchestration logic or permission policies." },
14361
14465
  { type: "callout", variant: "info", text: "Impact levels follow a severity scale: `read` (no side effects), `draft_mutation` (creates drafts only), `live_mutation` (modifies live data), and `irreversible` (permanent changes like deletion). Use these to implement confirmation gates in your integration." },
14362
14466
  {
14363
14467
  type: "endpoint",
@@ -14536,6 +14640,9 @@ var sections35 = [
14536
14640
  description: "Create a matching configuration with field mappings, comparison strategies (exact, fuzzy, date_range, numeric_range), and per-field weights that sum to 1.0.",
14537
14641
  content: [
14538
14642
  { type: "paragraph", text: "Create a matching configuration that defines how documents are compared against a reference dataset. Each field mapping specifies a source field (from extracted documents), a target column (in the reference data), a comparison strategy, and a relative weight." },
14643
+ { type: "paragraph", text: "The typical workflow is: upload reference data via `POST /v1/matching/reference-data`, create a config with field mappings, then trigger a run via `POST /v1/matching/configs/:id/run`. For complex datasets, use `POST /v1/matching/strategies/generate` first to get AI-recommended mappings and weights." },
14644
+ { type: "paragraph", text: "The response returns the config with the saved `field_mappings`, `threshold` (defaults to 0.85), and `links.runs` URL for triggering runs. The `reference_data_id` is fixed at creation \u2014 to match against a different dataset, create a new config." },
14645
+ { type: "paragraph", text: "Choose strategies carefully: use `exact` for standardized codes and IDs, `fuzzy` for names with potential typos, `date_range` for dates with tolerance, and `numeric_range` for amounts with rounding differences. Weights must sum to 1.0 \u2014 fields with higher weights have more influence on the overall confidence score." },
14539
14646
  { type: "callout", variant: "info", text: "Field weights should sum to 1.0. The overall confidence score for a match is the weighted sum of per-field scores. Use the **generate strategy** endpoint to get AI-recommended mappings if you are unsure which fields and weights to use." },
14540
14647
  {
14541
14648
  type: "list",
@@ -14657,6 +14764,10 @@ var sections35 = [
14657
14764
  description: "Get matching configuration details, update field mappings and weights, or delete a configuration. Deleting a config does not remove past run results.",
14658
14765
  content: [
14659
14766
  { type: "paragraph", text: "Retrieve, update, or delete a matching configuration. Updates to field mappings and thresholds take effect on the next run \u2014 they do not retroactively change past results. Deleting a config removes the configuration but preserves all historical run results for audit purposes." },
14767
+ { type: "paragraph", text: "Use `GET` to inspect the current field mappings, threshold, and targeting mode before running a match. Use `PUT` to adjust weights, swap strategies, or change the threshold \u2014 a common pattern is to lower the threshold after reviewing low-confidence results, then re-run to capture more matches." },
14768
+ { type: "paragraph", text: "The `PUT` response returns the full updated config. The `reference_data_id` cannot be changed after creation \u2014 to match against a different dataset, create a new config. The `links.runs` URL provides a convenient shortcut to trigger a new run with the updated config." },
14769
+ { type: "paragraph", text: "Deleting a config is safe for audit \u2014 all historical run results, including per-document evidence and confidence scores, are preserved. Pair config updates with the generate strategy endpoint to get AI-recommended adjustments based on your reference dataset." },
14770
+ { type: "callout", variant: "info", text: "Past run results are immutable. Updating field mappings or thresholds only affects future runs \u2014 re-run matching after config changes to see the updated results." },
14660
14771
  {
14661
14772
  type: "endpoint",
14662
14773
  method: "GET",
@@ -14935,6 +15046,9 @@ var sections35 = [
14935
15046
  description: "Get the status, progress, and summary of a matching run. Status progresses from queued to running to completed or failed.",
14936
15047
  content: [
14937
15048
  { type: "paragraph", text: "Retrieve the current state of a matching run. Poll this endpoint while `status` is `queued` or `running` to track progress. Once `completed`, the response includes the top 50 results by confidence. Use the results endpoint for full paginated access." },
15049
+ { type: "paragraph", text: "Poll this endpoint after triggering a run via `POST /v1/matching/configs/:id/run`. A typical polling pattern is to check every 5-10 seconds while `status` is `queued` or `running`. Use `GET /v1/matching/runs/:id/progress` for lighter-weight progress updates during long runs." },
15050
+ { type: "paragraph", text: "Once completed, the response includes `rows_processed`, `rows_matched`, and `avg_confidence` at the run level, plus a `results` array with the top 50 matches by confidence. Each result includes `document_id`, `matched_reference_row_id`, `confidence` score, review `status` (`pending`, `approved`, `rejected`), and per-field `evidence` breakdown." },
15051
+ { type: "paragraph", text: "For the full result set beyond the top 50, use `GET /v1/matching/runs/:id/results` with pagination. Use `POST /v1/matching/runs/:runId/results/:resultId/review` to approve or reject individual matches. If `status` is `ai_resolving`, the run is using Claude Haiku to disambiguate borderline matches \u2014 this phase adds latency but can significantly improve accuracy on ambiguous rows." },
14938
15052
  { type: "callout", variant: "info", text: "The `ai_resolving` status indicates that the run has finished standard matching and is now running an AI resolution pass on low-confidence rows. This pass uses Claude Haiku to disambiguate borderline matches." },
14939
15053
  {
14940
15054
  type: "endpoint",
@@ -15037,6 +15151,9 @@ var sections35 = [
15037
15151
  description: "Retrieve matching results for a completed run. Returns the top 5 candidates per document with weighted confidence scores and per-field evidence breakdowns.",
15038
15152
  content: [
15039
15153
  { type: "paragraph", text: "Retrieve the full paginated results for a completed matching run. Each result represents a document matched (or unmatched) against the reference dataset, with a weighted confidence score and per-field evidence breakdown showing how each field contributed to the overall score." },
15154
+ { type: "paragraph", text: "Use this endpoint after a run completes to review all matches. Filter by `status=pending` to see matches awaiting review, or `status=approved` to see confirmed matches. Paginate with `page` and `limit` \u2014 the run detail endpoint only shows the top 50 results, while this endpoint provides full access." },
15155
+ { type: "paragraph", text: "Each result includes a per-field `evidence` object showing the strategy used and individual score for each field mapping. A `null` `matched_reference_row_id` means no reference row scored above the configured threshold for that document. The `confidence` score is the weighted sum of per-field scores using the weights from the matching config." },
15156
+ { type: "paragraph", text: "Use `POST /v1/matching/runs/:runId/results/:resultId/review` to approve or reject individual matches programmatically. Pair with the config detail endpoint to understand which field mappings and thresholds produced these results. Re-run matching with adjusted weights or a lower threshold to capture more matches." },
15040
15157
  { type: "callout", variant: "info", text: "Results with `status: pending` have not been reviewed. Use `POST /v1/matching/runs/:runId/results/:resultId/review` to approve or reject individual matches. Approved matches can be used downstream for data enrichment and reconciliation workflows." },
15041
15158
  {
15042
15159
  type: "endpoint",
@@ -15331,6 +15448,9 @@ var sections36 = [
15331
15448
  description: "Create a delivery destination with connector type, transport config, and authentication. Supported types: webhook, sftp, s3, azure_blob, google_drive, onedrive.",
15332
15449
  content: [
15333
15450
  { type: "paragraph", text: "Create a new delivery destination by specifying the connector type, transport configuration, and optional authentication. The `config` and `auth_config` schemas vary by destination type \u2014 see the catalog endpoint for connector capabilities." },
15451
+ { type: "paragraph", text: "The typical workflow is: create a destination first, then create one or more bindings that route signals to it. Call `GET /v1/delivery/catalog/connectors` to see which connector types are available and what `config` and `auth_config` schemas each expects." },
15452
+ { type: "paragraph", text: "The response returns the created destination with `is_active: true` and `last_delivery_at: null`. Auth credentials are never echoed back \u2014 use the `has_auth_config` and `has_signing_secret` booleans to confirm they were stored. After creation, use `POST /v1/delivery/destinations/:id/test` to verify connectivity before setting up bindings." },
15453
+ { type: "paragraph", text: "For webhook destinations, include a `signing_secret` in `auth_config` to enable HMAC-SHA256 request signing. For file-drop destinations (S3, SFTP, Azure Blob), set `payload_cap_bytes` if you need to override the global 5 MiB cap. OAuth destinations (Google Drive, OneDrive) require completing the OAuth flow first." },
15334
15454
  { type: "callout", variant: "info", text: "OAuth-based destinations (google_drive, onedrive) require completing an OAuth flow before creating the destination. Use the OAuth start endpoint to initiate the flow and obtain tokens." },
15335
15455
  {
15336
15456
  type: "endpoint",
@@ -15433,6 +15553,9 @@ var sections36 = [
15433
15553
  description: "Get destination details, update config, delete a destination, or send a test payload to verify connectivity. Auth credentials are always redacted in responses.",
15434
15554
  content: [
15435
15555
  { type: "paragraph", text: "Manage a single destination: retrieve its current config, update transport settings or credentials, delete it, or test connectivity. The **test** endpoint probes the destination without delivering real data \u2014 file-drop connectors (S3, SFTP, Azure Blob) verify bucket/container reachability without writing any objects." },
15556
+ { type: "paragraph", text: "Use `GET` to inspect current config and delivery status. Use `PUT` to rotate credentials or change the target URL/bucket. Use `POST /test` after updating credentials to verify the new config works before live traffic flows through it. Use `DELETE` only when permanently removing a destination." },
15557
+ { type: "paragraph", text: "The `GET` response includes `last_delivery_at` and `last_delivery_status` to show the most recent delivery attempt. The `is_active` flag indicates whether the destination is enabled \u2014 destinations are automatically disabled on `auth_failed` or `ssrf_blocked` errors. The test endpoint returns `success`, `durationMs`, and an optional `message` describing what was probed." },
15558
+ { type: "paragraph", text: "If a destination becomes inactive due to auth failure, fix the credentials via `PUT`, then call the test endpoint to verify. The destination will be re-enabled automatically on a successful update. Prefer disabling (`is_active: false` via `PUT`) over deleting when you want to pause delivery but keep the history." },
15436
15559
  { type: "callout", variant: "warning", text: "Deleting a destination cascades to all its bindings, delivery items, and DLQ entries. This is irreversible. Disable the destination (`is_active: false`) instead if you want to preserve history." },
15437
15560
  {
15438
15561
  type: "endpoint",
@@ -15721,6 +15844,9 @@ var sections36 = [
15721
15844
  description: "Create a delivery binding that routes domain signals through a deliverable resolver and serializer to a destination. Includes field mapping and retry policy configuration.",
15722
15845
  content: [
15723
15846
  { type: "paragraph", text: "Create a binding that wires a domain event to a destination. The **compatibility triangle** is validated on creation: the signal event type must be compatible with the deliverable resolver, the serializer must support the deliverable shape, and the connector must support the serializer format." },
15847
+ { type: "paragraph", text: "The typical workflow is: query the catalog endpoints top-down (signals, then deliverables, then serializers, then connectors), pick compatible values, and create the binding. A single event can fan out to multiple bindings \u2014 create separate bindings for each destination or output format you need." },
15848
+ { type: "paragraph", text: "The response returns the binding with `is_active: true` and `last_status: null`. The `field_map` controls payload projection: use `static` to inject fixed values, `drop` to remove fields, and key-value pairs to rename fields. The `delivery_policy` defaults to 7 attempts with exponential backoff over ~10 hours if omitted." },
15849
+ { type: "paragraph", text: "After creation, the binding is immediately live \u2014 the next matching signal will trigger delivery. Use `POST /v1/delivery/bindings/:id/preview` (internal) to dry-run the resolve-project-serialize pipeline. Monitor delivery health via the history and DLQ endpoints." },
15724
15850
  { type: "callout", variant: "info", text: "Use the catalog endpoints (`/v1/delivery/catalog/*`) to discover valid combinations before creating a binding. The catalog lists all available signals, deliverables, serializers, and connectors with their compatibility constraints." },
15725
15851
  {
15726
15852
  type: "endpoint",
@@ -15823,6 +15949,10 @@ var sections36 = [
15823
15949
  description: "Get binding details, update signal filters or field maps, delete a binding, or preview the resolved payload for a binding without sending it.",
15824
15950
  content: [
15825
15951
  { type: "paragraph", text: "Manage a single delivery binding: retrieve its configuration, update the signal filter or field map, delete it, or preview the payload it would produce. Updates re-validate the compatibility triangle. Deleting a binding stops future routing but allows in-flight deliveries to complete." },
15952
+ { type: "paragraph", text: "Use `GET` to inspect the current binding config and `last_status`. Use `PUT` to adjust the signal filter, field map, or retry policy \u2014 changes take effect on the next matching event. Use `DELETE` when the binding is no longer needed; in-flight deliveries already in the job queue will still complete." },
15953
+ { type: "paragraph", text: "The `PUT` response returns the full updated binding. The compatibility triangle is re-validated on every update \u2014 if you change the `signal_filter.event_type` or `serializer_format`, the system verifies the new combination is still valid. The preview endpoint (`POST /preview`) walks the resolve-project-serialize pipeline with a synthetic signal and returns the wire output without delivering." },
15954
+ { type: "paragraph", text: "Pair updates with the delivery history endpoint to verify the binding is producing expected results. If `last_status` shows `failed`, check the DLQ for error details before adjusting the binding config." },
15955
+ { type: "callout", variant: "info", text: "The public API preview endpoint currently returns a stub response. The internal preview endpoint is fully functional and walks the full resolve, project, and serialize pipeline with structural fallback." },
15826
15956
  {
15827
15957
  type: "endpoint",
15828
15958
  method: "GET",
@@ -16031,6 +16161,9 @@ var sections36 = [
16031
16161
  description: "View delivery attempt history with status, HTTP codes, and timing. Get detail for a single item or replay a failed delivery attempt.",
16032
16162
  content: [
16033
16163
  { type: "paragraph", text: "The delivery history tracks every attempt to deliver a payload to a destination. Each attempt is recorded as a **delivery item** with status, timing, HTTP response code, and optional request/response bodies. Use this endpoint to audit delivery performance and debug failures." },
16164
+ { type: "paragraph", text: "Query items by `binding_id` or `destination_id` to narrow results to a specific delivery path. Filter by `status` to find failures (`failed`) or in-progress attempts (`in_flight`). Use `GET /v1/delivery/items/:id` to inspect the full request and response bodies for a single attempt." },
16165
+ { type: "paragraph", text: "Each item includes an `idempotency_key` (deterministic SHA-256 of binding ID and event ID) that is sent on the wire so receivers can deduplicate. The `attempt` field is 1-indexed \u2014 multiple items with the same `event_id` and `binding_id` represent retries of the same delivery. Status values are `in_flight`, `succeeded`, or `failed`." },
16166
+ { type: "paragraph", text: "Use `POST /v1/delivery/items/:id/replay` to re-enqueue a specific attempt with a fresh attempt number but the same idempotency key. For terminal failures, check the DLQ endpoint instead \u2014 items that exhausted all retries are moved there automatically. Pair history inspection with binding and destination detail to diagnose delivery issues end-to-end." },
16034
16167
  { type: "callout", variant: "info", text: "Request and response bodies are truncated to 10 KB and retained for a configurable period (default 30 days). After the retention period, bodies are nulled but metadata (status, HTTP code, duration, error code) is preserved indefinitely." },
16035
16168
  {
16036
16169
  type: "endpoint",
@@ -16689,6 +16822,9 @@ var sections37 = [
16689
16822
  description: "Get detailed information for a single extraction batch including item counts, provider, status, and timing. Shows per-item breakdown when the batch is completed.",
16690
16823
  content: [
16691
16824
  { type: "paragraph", text: "Retrieve the full batch record including per-item status. Poll this endpoint while `status` is `submitted` to track progress. Once `completed`, each item shows its individual outcome and processing timestamp." },
16825
+ { type: "paragraph", text: "Use this endpoint to monitor a batch after submission. Poll periodically while `status` is `submitted` \u2014 typically results arrive within 24 hours. Once `status` changes to `completed`, `failed`, or `cancelled`, polling can stop. Use the sync endpoint to force an immediate provider check instead of waiting for the hourly poll." },
16826
+ { type: "paragraph", text: "The response includes `items` \u2014 an array of per-document results. Each item has a `status` (`pending`, `processing`, `completed`, or `failed`), the associated `document_id` and `document_filename`, and a `processed_at` timestamp. The `custom_id` field shows the provider-assigned identifier used when submitting to Anthropic or Bedrock." },
16827
+ { type: "paragraph", text: "Failed items are automatically retried via **realtime** extraction, never re-batched, to preserve the 48-hour SLA. Check the `errored_count` and `expired_count` fields at the batch level, and individual `items[].error_message` for per-document failure details. Pair with `GET /v1/documents/:id` to check the final extraction status of any document in the batch." },
16692
16828
  { type: "callout", variant: "info", text: "Items that fail extraction in the batch are retried via **realtime** extraction (never re-batched) to preserve the original 48-hour SLA. Check `items[].status` for per-document outcomes." },
16693
16829
  {
16694
16830
  type: "endpoint",
@@ -16787,6 +16923,9 @@ var sections37 = [
16787
16923
  description: "Force a sync with the provider to check for batch results. Useful when you do not want to wait for the hourly automatic poll.",
16788
16924
  content: [
16789
16925
  { type: "paragraph", text: "Force an immediate check with the batch provider (Anthropic or Bedrock) for results. By default, batches are polled automatically every hour. Use this endpoint when you need results sooner or want to verify the current provider-side status." },
16926
+ { type: "paragraph", text: "Call sync when you need results before the next hourly poll. A typical pattern is to submit documents in batch mode, wait a few hours, then call sync to check if results are ready. If the batch is still processing, the response reflects the current provider-side status without changing anything." },
16927
+ { type: "paragraph", text: "The response returns the full batch object with updated counts. If results are ready, `status` transitions to `completed` and `succeeded_count`, `errored_count`, and `expired_count` are populated. If the batch is still processing on the provider side, `status` remains `submitted` and counts stay at zero." },
16928
+ { type: "paragraph", text: "Syncing an `accumulating` batch has no effect since it has not been submitted to the provider yet. Syncing a `completed` or `cancelled` batch is safe but returns the same data. Pair with `GET /v1/batches/:id` to inspect per-item results after the sync completes." },
16790
16929
  {
16791
16930
  type: "endpoint",
16792
16931
  method: "POST",
@@ -16864,6 +17003,9 @@ var sections37 = [
16864
17003
  description: "Cancel an in-progress extraction batch. Only batches in accumulating or submitted status can be cancelled. Completed batches cannot be rolled back.",
16865
17004
  content: [
16866
17005
  { type: "paragraph", text: "Cancel a batch that is still `accumulating` or `submitted`. Cancellation sends a stop request to the provider if the batch was already submitted. Documents in the cancelled batch revert to `batch_queued` status and can be resubmitted or processed via realtime extraction." },
17006
+ { type: "paragraph", text: "Use cancellation when you need to abort a batch \u2014 for example, if documents were submitted with an incorrect schema or you need results faster via realtime extraction. Cancel as early as possible; items already processed by the provider before the cancellation lands may still have their results applied." },
17007
+ { type: "paragraph", text: "The response returns the batch with `status: cancelled`. The `succeeded_count` may be non-zero if some items were processed before cancellation took effect. Documents revert to `batch_queued` status and can be re-processed by updating their `processing_mode` to `realtime` or by including them in a new batch." },
17008
+ { type: "paragraph", text: "Only batches in `accumulating` or `submitted` status can be cancelled \u2014 calling cancel on a `completed`, `failed`, or already `cancelled` batch returns `400`. Pair with `GET /v1/batches/:id` after cancellation to inspect which items were processed before the stop request landed." },
16867
17009
  {
16868
17010
  type: "endpoint",
16869
17011
  method: "POST",
@@ -17032,6 +17174,9 @@ var sections38 = [
17032
17174
  description: "Retrieve a case by its key (e.g. CASE-001) including linked documents, shared entities, AI-generated narration, label, and anomaly count.",
17033
17175
  content: [
17034
17176
  { type: "paragraph", text: "Retrieve the full detail of a case including its documents, AI-generated narrative summary, and anomaly count. The narrative is generated by Claude and summarizes the relationships between documents in the case." },
17177
+ { type: "paragraph", text: "Call this endpoint after listing cases to drill into a specific case. The typical workflow is to list cases with filters, then fetch detail for cases that need review. The response includes the full document list and anomaly count, so you can assess case health in a single call." },
17178
+ { type: "paragraph", text: "The response includes `documents` (array of document objects with `id`, `filename`, `document_type`, and `created_at`), a `narrative` string (or `null` if narration has not been triggered), and `anomaly_count`. The `links` object provides convenience URLs for the case itself and its documents list." },
17179
+ { type: "paragraph", text: "Pair with `POST /v1/cases/:key/narrate` to generate narratives, and `GET /v1/cases/:key/evidence` to inspect the field-level linking data. If `anomaly_count` is non-zero, fetch the anomalies endpoint to see which structural issues were detected." },
17035
17180
  { type: "callout", variant: "info", text: "The `narrative` field is generated on demand via `POST /v1/cases/:key/narrate`. It will be `null` until narration is triggered for this case." },
17036
17181
  {
17037
17182
  type: "endpoint",
@@ -17237,6 +17382,9 @@ var sections38 = [
17237
17382
  description: "List evidence items within a case. Filter by validation status, source document, category, or free-text search across evidence fields.",
17238
17383
  content: [
17239
17384
  { type: "paragraph", text: "Evidence items are the extracted field values from documents in a case, annotated with validation status and confidence scores. Use evidence to audit the data quality within a case and understand which fields link documents together." },
17385
+ { type: "paragraph", text: "Use this endpoint after fetching case detail to inspect the field-level data that forms the case. A typical workflow is to filter by `status=invalid` to surface extraction issues, or by `document_id` to audit a specific document's contribution to the case." },
17386
+ { type: "paragraph", text: "Each evidence item includes a `field_key`, extracted `value`, validation `status` (`valid`, `invalid`, or `pending`), the source `document_id`, an optional `category` (e.g. `identity`, `financial`), and a `confidence` score between 0 and 1. The confidence score reflects extraction certainty and is independent of the validation outcome." },
17387
+ { type: "paragraph", text: "Combine evidence with the anomalies endpoint to get a complete quality picture. Evidence shows individual field values; anomalies show structural patterns across multiple evidence items (e.g. conflicting values for the same field). Use the `search` parameter for free-text queries across all evidence fields." },
17240
17388
  { type: "callout", variant: "info", text: "Evidence is produced by the evidence validation engine, which runs rule-based validators (structural checks, checksum validation, domain packs) against extracted values. Each evidence item records the validation outcome for a specific field on a specific document." },
17241
17389
  {
17242
17390
  type: "endpoint",
@@ -17500,6 +17648,9 @@ var sections38 = [
17500
17648
  description: "Pin or remove documents within a case. Pinned documents are highlighted in the case view and preserved during case operations.",
17501
17649
  content: [
17502
17650
  { type: "paragraph", text: "Manage document membership within a case. **Pin** a document to mark it as important \u2014 pinned documents are highlighted in the UI and preserved during split operations. **Remove** a document to detach it from the case entirely." },
17651
+ { type: "paragraph", text: "Use pinning to flag key documents during case review \u2014 for example, pin the primary invoice in a multi-document case so it stays visible. Use removal when a document was incorrectly linked and should not belong to this case. Both operations are immediate and do not require a recompute." },
17652
+ { type: "paragraph", text: 'Pin returns `{ "success": true }` on success. Remove also returns `{ "success": true }`. Both endpoints return `404` if the case or document is not found. The pin status is reflected in the case detail response from `GET /v1/cases/:key`.' },
17653
+ { type: "paragraph", text: "Pinned documents are preserved in the original partition during split operations \u2014 they always stay with the case they are pinned to. If you plan to split a case, pin the anchor documents first. Removed documents may reappear in the case after a recompute if linking edges still connect them." },
17503
17654
  { type: "callout", variant: "info", text: "Removing a document from a case does not delete the document itself. The document remains in your workspace and may be re-linked into a case during the next recompute cycle if linking edges still exist." },
17504
17655
  {
17505
17656
  type: "endpoint",
@@ -18162,6 +18313,9 @@ var sections40 = [
18162
18313
  description: "List all ground truth datasets used for benchmarking extraction accuracy. Each dataset contains manually verified entries that serve as the gold standard.",
18163
18314
  content: [
18164
18315
  { type: "paragraph", text: "Ground truth datasets contain manually verified data entries that serve as the gold standard for measuring extraction accuracy. Create datasets, add entries, then run benchmarks against extraction results." },
18316
+ { type: "paragraph", text: "Use this endpoint to see all available datasets before creating a benchmark run. A typical workflow is to list datasets, select the one covering the document type you want to evaluate, then pass its `id` to `POST /v1/quality/benchmarks` to start a run." },
18317
+ { type: "paragraph", text: "Each dataset includes a `name`, optional `description`, `user_schema_id` (if scoped to a schema), `document_count` (number of verified entries), and a `links.self` URL for the detail endpoint. Datasets are returned in descending creation order with cursor-based pagination." },
18318
+ { type: "paragraph", text: "Create separate datasets for different document types or schema versions to track accuracy independently. Pair with the benchmark endpoints to measure extraction quality over time \u2014 run benchmarks after schema changes or pipeline updates to detect regressions." },
18165
18319
  { type: "list", ordered: false, items: [
18166
18320
  "Each dataset contains verified entries mapping documents to expected field values",
18167
18321
  "Datasets can be scoped to a specific user schema via `user_schema_id`",
@@ -18256,6 +18410,10 @@ var sections40 = [
18256
18410
  description: "Create a new ground truth dataset linked to a schema. The dataset defines the expected extraction output used for accuracy benchmarking.",
18257
18411
  content: [
18258
18412
  { type: "paragraph", text: "Create an empty ground truth dataset that you can populate with verified entries. Datasets serve as the baseline for benchmark runs that measure extraction accuracy. After creating a dataset, add entries individually or import them in bulk via CSV." },
18413
+ { type: "paragraph", text: "The typical workflow is: create the dataset, then populate it using `POST /v1/quality/ground-truth/:id/entries` for individual entries or `POST /v1/quality/ground-truth/:id/entries/import-csv` for bulk import. Once populated, create a benchmark run with `POST /v1/quality/benchmarks`." },
18414
+ { type: "paragraph", text: "The response returns the dataset with `document_count: 0` since it is initially empty. The `user_schema_id` is `null` unless you associate it with a schema. The `links.self` URL points to the detail endpoint where you can retrieve entries or delete the dataset." },
18415
+ { type: "paragraph", text: "For best results, aim for at least 30-50 entries per dataset. Linking a dataset to a `user_schema_id` ensures ground truth field names align with your extraction schema, producing more meaningful benchmark comparisons." },
18416
+ { type: "callout", variant: "info", text: "Field keys in `expected_data` entries should match the field names used in your extraction schema. Unmatched fields are stored but ignored during benchmark comparison." },
18259
18417
  {
18260
18418
  type: "endpoint",
18261
18419
  method: "POST",
@@ -18330,6 +18488,9 @@ var sections40 = [
18330
18488
  description: "Retrieve a ground truth dataset by ID with metadata and entry count, or delete it permanently. Deleting a dataset does not remove associated benchmark results.",
18331
18489
  content: [
18332
18490
  { type: "paragraph", text: "Retrieve a dataset with its metadata and sample entries, or delete it permanently. The GET response includes a `samples` array with the actual ground truth entries, allowing you to inspect the expected values for each document." },
18491
+ { type: "paragraph", text: "Use `GET` to inspect the dataset contents before running a benchmark. The `samples` array contains all ground truth entries with their `document_id`, `expected_data` (key-value map of verified field values), and optional `notes`. This lets you verify the dataset is correctly populated." },
18492
+ { type: "paragraph", text: "The `document_count` field shows how many entries exist. For large datasets, the `samples` array may produce a sizable response. The `user_schema_id` indicates whether the dataset is scoped to a specific extraction schema, which improves benchmark accuracy by ensuring field name alignment." },
18493
+ { type: "paragraph", text: "Use `DELETE` when a dataset is outdated or no longer needed. Benchmark results that referenced this dataset are preserved for historical tracking \u2014 the benchmark retains the `dataset_id` even after the dataset itself is removed. Create a new dataset with updated entries rather than modifying existing ones." },
18333
18494
  { type: "callout", variant: "warning", text: "Deleting a dataset is permanent. However, benchmark results that used this dataset are retained for historical reference. The benchmark will show the dataset_id but the dataset itself will no longer be retrievable." },
18334
18495
  {
18335
18496
  type: "endpoint",
@@ -18595,6 +18756,9 @@ var sections40 = [
18595
18756
  description: "List benchmark runs that compare extraction results against ground truth datasets. Each run produces per-field accuracy metrics.",
18596
18757
  content: [
18597
18758
  { type: "paragraph", text: "Benchmark runs compare your extraction output against ground truth datasets to produce per-field accuracy scores. Each run evaluates every document in the dataset and produces an `accuracy_overall` score along with per-field breakdowns. Use benchmarks to track extraction quality over time and measure the impact of schema or pipeline changes." },
18759
+ { type: "paragraph", text: "Use this endpoint to see all benchmark runs and their accuracy scores. A typical workflow is to list benchmarks after making schema or pipeline changes, then compare the latest run against previous ones using `GET /v1/quality/benchmarks/compare` to measure improvement or detect regressions." },
18760
+ { type: "paragraph", text: "Each benchmark includes `status` (`queued`, `running`, `completed`, or `failed`), `accuracy_overall` (0-1 score, null while running), `accuracy_by_field` (per-field breakdown), and `documents_processed`/`documents_total` for progress tracking. The `accuracy_delta` and `compared_to_run_id` fields support cross-run comparisons." },
18761
+ { type: "paragraph", text: "Run benchmarks regularly after extraction pipeline changes. Pair with `GET /v1/quality/benchmarks/:id/results` for per-document drill-down showing which fields matched and which diverged. Use the compare endpoint to track accuracy trends across multiple runs." },
18598
18762
  {
18599
18763
  type: "endpoint",
18600
18764
  method: "GET",
@@ -18704,6 +18868,9 @@ var sections40 = [
18704
18868
  description: "Start a benchmark run that compares a job run output against a ground truth dataset. Produces per-field accuracy scores and overall metrics.",
18705
18869
  content: [
18706
18870
  { type: "paragraph", text: "Start a new benchmark run that evaluates your current extraction output against a ground truth dataset. The benchmark compares each document in the dataset entry-by-entry and field-by-field, producing an overall accuracy score and per-field breakdowns." },
18871
+ { type: "paragraph", text: "The typical workflow is: create a benchmark after making extraction pipeline changes, poll `GET /v1/quality/benchmarks/:id` until `status` is `completed`, then inspect results. Run multiple benchmarks against the same dataset over time to track accuracy trends." },
18872
+ { type: "paragraph", text: "The response returns the benchmark with `status: queued`, `accuracy_overall: null`, and `documents_processed: 0`. The `documents_total` field reflects how many entries are in the dataset. Poll the detail endpoint to check `status` and `documents_processed` for progress. Once completed, `accuracy_overall` and `accuracy_by_field` are populated." },
18873
+ { type: "paragraph", text: "Multiple benchmarks can run in parallel against different datasets. Use `GET /v1/quality/benchmarks/compare` after completion to compare two runs side by side. The `dataset_id` is fixed at creation \u2014 to benchmark against a different dataset, create a new run." },
18707
18874
  { type: "callout", variant: "info", text: "Benchmark runs are asynchronous. The endpoint returns immediately with status `queued`. Poll the benchmark detail endpoint or list benchmarks to check when the run completes." },
18708
18875
  {
18709
18876
  type: "endpoint",
@@ -19033,6 +19200,9 @@ var sections41 = [
19033
19200
  description: "Create a new routing rule with conditions on document properties and actions to apply when matched. Conditions can match document type, source, and other metadata.",
19034
19201
  content: [
19035
19202
  { type: "paragraph", text: 'Create a rule that automatically applies actions to incoming documents based on their metadata. Conditions define what to match (e.g. document type equals "invoice"), and actions define what to do (e.g. assign the finance schema). Rules are evaluated on every `document_classified` event.' },
19203
+ { type: "paragraph", text: 'The typical workflow is: create rules ordered by specificity \u2014 put narrow, high-priority rules first (e.g. "contracts from vendor X") and broader catch-all rules last. New rules are active immediately upon creation, so the next classified document will be evaluated against them.' },
19204
+ { type: "paragraph", text: "The response returns the rule with `is_active: true`, a `trigger_type` of `document_classified`, and the assigned `priority` (defaults to 100 if omitted). The `action_type` is resolved from the `actions` object. Use the reorder endpoint after creation to adjust the priority relative to existing rules." },
19205
+ { type: "paragraph", text: "Pair with `GET /v1/routing-rules` to verify the full priority chain after creating a rule. Use `source_connection_id` to scope rules to documents from a specific source \u2014 documents from other sources will skip the rule entirely. To test a rule before going live, create it and immediately disable it via `PATCH` with `is_active: false`." },
19036
19206
  { type: "callout", variant: "info", text: "New rules are created with `is_active: true` by default. If you want to test a rule before activating it, create it, then immediately disable it via `PATCH /v1/routing-rules/:id` with `is_active: false`." },
19037
19207
  {
19038
19208
  type: "endpoint",
@@ -19135,6 +19305,10 @@ var sections41 = [
19135
19305
  description: "Retrieve, update, or delete a routing rule by ID. Update conditions, actions, priority, or enabled state. Deleting a rule does not affect previously routed documents.",
19136
19306
  content: [
19137
19307
  { type: "paragraph", text: "Retrieve, update, or delete a single routing rule. Updates take effect immediately \u2014 the next `document_classified` event will use the updated rule. Deleting a rule does not retroactively affect documents that were already routed by it." },
19308
+ { type: "paragraph", text: "Use `GET` to inspect a rule's conditions, actions, and priority. Use `PATCH` to adjust conditions, change the schema assignment, toggle `is_active`, or update the priority. Use `DELETE` when a rule is no longer needed \u2014 previously routed documents are not affected." },
19309
+ { type: "paragraph", text: "The `PATCH` response returns the full updated rule including the new `updated_at` timestamp. All fields are optional \u2014 only include fields you want to change. The `is_active` toggle lets you temporarily disable a rule without deleting it, which is useful for testing or during maintenance windows." },
19310
+ { type: "paragraph", text: "After updating priority via `PATCH`, use `GET /v1/routing-rules` to verify the full evaluation order. For bulk priority changes, prefer the `POST /v1/routing-rules/reorder` endpoint instead of patching individual rules. Pair deletion with rule creation to replace a rule atomically." },
19311
+ { type: "callout", variant: "info", text: "Rule changes only affect future `document_classified` events. Documents already routed by a previous version of the rule retain their assigned schema and routing actions." },
19138
19312
  {
19139
19313
  type: "endpoint",
19140
19314
  method: "GET",
@@ -19316,6 +19490,9 @@ var sections41 = [
19316
19490
  description: "Reorder routing rules by providing an ordered array of rule IDs. Priority values are reassigned sequentially based on the new order.",
19317
19491
  content: [
19318
19492
  { type: "paragraph", text: "Reassign priority values for all routing rules at once. Pass an ordered array of rule IDs \u2014 the first ID receives priority 1, the second receives priority 2, and so on. This is the recommended way to change evaluation order after initial creation." },
19493
+ { type: "paragraph", text: "Use this endpoint when you need to rearrange the evaluation order of multiple rules at once \u2014 for example, when promoting a new rule to the top of the chain or inserting a rule between two existing ones. This is more reliable than patching individual rule priorities, which can create gaps or collisions." },
19494
+ { type: "paragraph", text: "The response returns a `reordered` array with each rule's `id` and new `priority` value. Priority 1 is evaluated first. The reorder takes effect immediately \u2014 the next `document_classified` event uses the new priority sequence." },
19495
+ { type: "paragraph", text: "List all rules first via `GET /v1/routing-rules` to get the current IDs and order, then construct the reordered array. Include both active and inactive rules in the array to maintain a consistent priority sequence. Omitting any rule ID results in a validation error." },
19319
19496
  { type: "callout", variant: "warning", text: "All active rule IDs must be included in the `rule_ids` array. Omitting any rule returns a validation error. Inactive rules should also be included to maintain a consistent priority sequence." },
19320
19497
  {
19321
19498
  type: "endpoint",
@@ -19821,6 +19998,10 @@ var sections44 = [
19821
19998
  description: "All Talonic API errors return a consistent JSON envelope with a machine-readable code, human-readable message, HTTP status, retryable flag, request ID, and timestamp.",
19822
19999
  content: [
19823
20000
  { type: "paragraph", text: "All errors return a consistent JSON envelope. The `retryable` field tells you whether the request can be retried with the same parameters." },
20001
+ { type: "paragraph", text: "Most integrations parse the `code` field for programmatic error handling and display the `message` field to users. A typical error handler checks `retryable` first \u2014 if `true`, queue the request for retry with exponential backoff; if `false`, surface the `message` to the caller and stop." },
20002
+ { type: "paragraph", text: "The `request_id` field (prefixed with `req_`) uniquely identifies the failed request and is essential for debugging with Talonic support. The `path` field confirms which endpoint produced the error, and `timestamp` records when it occurred in ISO 8601 format." },
20003
+ { type: "paragraph", text: "Pair error handling with the [Error Codes](error-codes) reference to map each `code` value to the correct remediation action. Note that `statusCode` always matches the HTTP response status, so you can use either for branching logic in your client." },
20004
+ { type: "callout", text: "Always log the `request_id` from error responses. When contacting support, include it for faster resolution \u2014 it links directly to the server-side request trace." },
19824
20005
  {
19825
20006
  type: "code",
19826
20007
  title: "Error response envelope",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@talonic/docs",
3
- "version": "0.20.11",
3
+ "version": "0.20.12",
4
4
  "description": "Talonic documentation components — API Reference & Platform Guide",
5
5
  "license": "UNLICENSED",
6
6
  "private": false,
@@ -1,45 +0,0 @@
1
- /**
2
- * Tailwind CSS preset for @talonic/docs consumers.
3
- * Adds the Void design system color tokens and font families
4
- * so doc components render correctly in any host app.
5
- */
6
- declare const voidDocsPreset: {
7
- darkMode: "class";
8
- theme: {
9
- extend: {
10
- colors: {
11
- 'void-bg': string;
12
- 'void-bg-elevated': string;
13
- 'void-surface': string;
14
- 'void-surface-2': string;
15
- 'void-surface-3': string;
16
- 'void-border': string;
17
- 'void-border-hover': string;
18
- 'void-text-primary': string;
19
- 'void-text-secondary': string;
20
- 'void-text-muted': string;
21
- 'void-text-tertiary': string;
22
- 'void-accent': string;
23
- 'void-accent-hover': string;
24
- 'void-accent-dim': string;
25
- 'void-accent-tint': string;
26
- 'void-danger': string;
27
- 'void-danger-solid': string;
28
- 'void-divider': string;
29
- 'void-warning': string;
30
- 'void-warning-dim': string;
31
- 'void-tier-1': string;
32
- 'void-tier-2': string;
33
- 'void-tier-3': string;
34
- };
35
- fontFamily: {
36
- space: string[];
37
- body: string[];
38
- mono: string[];
39
- };
40
- };
41
- };
42
- plugins: never[];
43
- };
44
-
45
- export { voidDocsPreset as default };
@@ -1,45 +0,0 @@
1
- /**
2
- * Tailwind CSS preset for @talonic/docs consumers.
3
- * Adds the Void design system color tokens and font families
4
- * so doc components render correctly in any host app.
5
- */
6
- declare const voidDocsPreset: {
7
- darkMode: "class";
8
- theme: {
9
- extend: {
10
- colors: {
11
- 'void-bg': string;
12
- 'void-bg-elevated': string;
13
- 'void-surface': string;
14
- 'void-surface-2': string;
15
- 'void-surface-3': string;
16
- 'void-border': string;
17
- 'void-border-hover': string;
18
- 'void-text-primary': string;
19
- 'void-text-secondary': string;
20
- 'void-text-muted': string;
21
- 'void-text-tertiary': string;
22
- 'void-accent': string;
23
- 'void-accent-hover': string;
24
- 'void-accent-dim': string;
25
- 'void-accent-tint': string;
26
- 'void-danger': string;
27
- 'void-danger-solid': string;
28
- 'void-divider': string;
29
- 'void-warning': string;
30
- 'void-warning-dim': string;
31
- 'void-tier-1': string;
32
- 'void-tier-2': string;
33
- 'void-tier-3': string;
34
- };
35
- fontFamily: {
36
- space: string[];
37
- body: string[];
38
- mono: string[];
39
- };
40
- };
41
- };
42
- plugins: never[];
43
- };
44
-
45
- export { voidDocsPreset as default };