@talonic/docs 0.20.18 → 0.20.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/content.js CHANGED
@@ -1,5 +1,5 @@
1
1
  // src/content/helpers.ts
2
- function deriveBreadcrumbs(sections52, leafId, domain) {
2
+ function deriveBreadcrumbs(sections55, leafId, domain) {
3
3
  const domainLabels = {
4
4
  api: "API Reference",
5
5
  platform: "Platform Guide",
@@ -10,20 +10,20 @@ function deriveBreadcrumbs(sections52, leafId, domain) {
10
10
  label: domainLabels[domain] ?? domain,
11
11
  slug: domain
12
12
  };
13
- for (const group of sections52) {
13
+ for (const group of sections55) {
14
14
  const child = group.children?.find((c) => c.id === leafId);
15
15
  if (child) {
16
16
  return [root, { label: group.label, slug: group.id }, { label: child.label, slug: child.id }];
17
17
  }
18
18
  }
19
- const topLevel = sections52.find((s) => s.id === leafId);
19
+ const topLevel = sections55.find((s) => s.id === leafId);
20
20
  if (topLevel) {
21
21
  return [root, { label: topLevel.label, slug: topLevel.id }];
22
22
  }
23
23
  return [root];
24
24
  }
25
- function derivePrevNext(sections52, leafId) {
26
- const flat = sections52.flatMap(
25
+ function derivePrevNext(sections55, leafId) {
26
+ const flat = sections55.flatMap(
27
27
  (s) => s.children ?? [{ id: s.id, label: s.label }]
28
28
  );
29
29
  const idx = flat.findIndex((c) => c.id === leafId);
@@ -250,6 +250,29 @@ var API_NAV_SECTIONS = [
250
250
  { id: "registry", label: "Registry", children: [
251
251
  { id: "registry-query", label: "Query" }
252
252
  ] },
253
+ { id: "data-products", label: "Data Products", children: [
254
+ { id: "list-data-products", label: "List Data Products" },
255
+ { id: "get-data-product", label: "Get Data Product" },
256
+ { id: "delete-data-product", label: "Delete Data Product" },
257
+ { id: "get-data-product-results", label: "Get Results" }
258
+ ] },
259
+ { id: "data-policies", label: "Data Policies", children: [
260
+ { id: "list-data-policies", label: "List Policies" },
261
+ { id: "create-data-policy", label: "Create Policy" },
262
+ { id: "get-data-policy", label: "Get Policy" },
263
+ { id: "update-data-policy", label: "Update Policy" },
264
+ { id: "delete-data-policy", label: "Delete Policy" },
265
+ { id: "list-data-policy-versions", label: "List Versions" },
266
+ { id: "list-data-policy-fields", label: "List Fields" },
267
+ { id: "list-data-policy-rules", label: "List Rules" }
268
+ ] },
269
+ { id: "record-sets", label: "Record Sets", children: [
270
+ { id: "list-record-sets", label: "List Record Sets" },
271
+ { id: "get-record-set", label: "Get Record Set" },
272
+ { id: "list-record-set-fields", label: "List Fields" },
273
+ { id: "list-record-set-records", label: "List Records" },
274
+ { id: "export-record-set", label: "Export" }
275
+ ] },
253
276
  { id: "billing", label: "Billing", children: [
254
277
  { id: "billing-settings", label: "Settings" },
255
278
  { id: "billing-topup", label: "Auto Top-Up" },
@@ -25906,99 +25929,1593 @@ var sections49 = [
25906
25929
  }
25907
25930
  ];
25908
25931
 
25909
- // src/content/sdk/sections.json
25910
- var sections_default = [
25932
+ // src/content/api/data-products.ts
25933
+ var sections50 = [
25911
25934
  {
25912
- slug: "sdk-introduction",
25913
- parentSlug: "sdk-overview",
25914
- title: "Introduction",
25915
- seoTitle: "Node SDK Introduction \u2014 Talonic Docs",
25916
- description: "Official Talonic SDK for Node.js and TypeScript. Extract structured, schema-validated data from any document with a single function call.",
25935
+ slug: "list-data-products",
25936
+ parentSlug: "data-products",
25937
+ title: "List Data Products",
25938
+ seoTitle: "List Data Products Endpoint \u2014 Talonic Docs",
25939
+ description: "List assembled data products with cursor-based pagination. Filter by status to find active, draft, or archived products across your workspace.",
25917
25940
  content: [
25918
- { type: "paragraph", text: "The `@talonic/node` SDK is the official Node.js and TypeScript client for the Talonic API. Extract structured, schema-validated data from any document with a single function call." },
25919
- { type: "paragraph", text: "Zero runtime dependencies. Requires Node.js 18 or newer." },
25920
- { type: "callout", text: "Looking for the AI agent path? [`@talonic/mcp`](https://github.com/talonicdev/talonic-mcp) wraps this SDK as a Model Context Protocol server. Install it into Claude Desktop, Cursor, Cline, Continue, or Cowork and any MCP-aware agent can extract documents directly." }
25941
+ { type: "paragraph", text: "Data products are the final assembled output datasets produced by the Talonic pipeline. Each data product represents a structured, validated, and optionally resolved collection of records ready for downstream consumption. Data products are created automatically when a job run completes validation, or manually via the platform UI." },
25942
+ { type: "paragraph", text: "Use this endpoint to list all data products in your workspace. Results are returned in reverse chronological order by default and support cursor-based pagination. You can filter by status to find only active products, or locate draft and archived products for lifecycle management." },
25943
+ {
25944
+ type: "endpoint",
25945
+ method: "GET",
25946
+ path: "/v1/data-products",
25947
+ summary: "List data products with optional status filter and cursor-based pagination.",
25948
+ description: "Requires read scope.",
25949
+ blocks: [
25950
+ {
25951
+ type: "param-table",
25952
+ title: "Query parameters",
25953
+ params: [
25954
+ { name: "status", type: "string", description: "Filter by product status (e.g. active, draft, archived)." },
25955
+ { name: "limit", type: "integer", default: "20", description: "Maximum number of items to return (1-100)." },
25956
+ { name: "cursor", type: "string", description: "Opaque pagination cursor from a previous response." },
25957
+ { name: "order", type: "string", default: "desc", description: "Sort order by creation date (asc | desc)." }
25958
+ ]
25959
+ }
25960
+ ]
25961
+ },
25962
+ { type: "heading", level: 2, id: "list-data-products-response", text: "Response" },
25963
+ {
25964
+ type: "param-table",
25965
+ title: "Response fields",
25966
+ params: [
25967
+ { name: "data", type: "array", description: "Array of data product objects." },
25968
+ { name: "data[].id", type: "string", description: "Data product UUID." },
25969
+ { name: "data[].name", type: "string", description: "Human-readable product name." },
25970
+ { name: "data[].description", type: "string | null", description: "Optional description of the data product." },
25971
+ { name: "data[].schema_id", type: "string", description: "UUID of the user schema that defines the output columns." },
25972
+ { name: "data[].run_id", type: "string", description: "UUID of the source job run that produced the data." },
25973
+ { name: "data[].status", type: "string", description: "Product status: active, draft, or archived." },
25974
+ { name: "data[].created_at", type: "string", description: "ISO 8601 creation timestamp." },
25975
+ { name: "data[].updated_at", type: "string", description: "ISO 8601 last update timestamp." }
25976
+ ]
25977
+ },
25978
+ {
25979
+ type: "code",
25980
+ title: "curl",
25981
+ language: "bash",
25982
+ code: `curl -s https://api.talonic.ai/v1/data-products?status=active&limit=10 \\
25983
+ -H "Authorization: Bearer tlnc_your_api_key"`
25984
+ },
25985
+ {
25986
+ type: "code",
25987
+ title: "Response",
25988
+ code: `{
25989
+ "data": [
25990
+ {
25991
+ "id": "d1a2b3c4-e5f6-7890-abcd-ef1234567890",
25992
+ "name": "Q4 Invoice Extract",
25993
+ "description": "Structured invoice data from Q4 batch",
25994
+ "schema_id": "s1a2b3c4-e5f6-7890-abcd-ef1234567890",
25995
+ "run_id": "r1a2b3c4-e5f6-7890-abcd-ef1234567890",
25996
+ "status": "active",
25997
+ "created_at": "2024-11-01T14:20:00.000Z",
25998
+ "updated_at": "2024-11-01T14:25:30.000Z"
25999
+ }
26000
+ ],
26001
+ "cursor": "eyJpZCI6ImQxYTJiM2M0In0="
26002
+ }`
26003
+ },
26004
+ { type: "heading", level: 2, id: "list-data-products-errors", text: "Errors" },
26005
+ {
26006
+ type: "param-table",
26007
+ title: "Error responses",
26008
+ params: [
26009
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26010
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26011
+ ]
26012
+ }
25921
26013
  ],
25922
26014
  related: [
25923
- { label: "Installation", slug: "sdk-installation" },
25924
- { label: "Quick Start", slug: "sdk-quickstart" },
25925
- { label: "MCP Server", slug: "mcp-introduction" }
26015
+ { label: "Resolutions", slug: "list-resolutions" },
26016
+ { label: "Jobs", slug: "list-jobs" },
26017
+ { label: "Schemas", slug: "list-schemas" }
25926
26018
  ],
25927
26019
  faq: [
25928
- { question: "What is the Talonic Node SDK?", answer: "The official Node.js and TypeScript client for extracting structured, schema-validated data from documents via the Talonic API." },
25929
- { question: "What Node.js version is required?", answer: "Node.js 18 or newer. The SDK has zero runtime dependencies." }
26020
+ { question: "What is a data product?", answer: "A data product is the final assembled dataset from the Talonic pipeline. It contains structured, validated, and optionally resolved records ready for export or delivery. Each product is tied to a schema and a source job run." },
26021
+ { question: "How do data product statuses work?", answer: "Data products start as `draft` during assembly, move to `active` once validated and ready for consumption, and can be `archived` when no longer needed. Only `active` products are included in delivery bindings by default." },
26022
+ { question: "Can I filter data products by schema?", answer: "The list endpoint currently supports filtering by `status`. To find products for a specific schema, list all products and filter client-side by `schema_id`, or use the filter/search endpoints for more advanced queries." }
25930
26023
  ],
25931
- mentions: ["Node.js", "TypeScript", "SDK", "npm", "document extraction"]
26024
+ mentions: ["data product", "export", "structured output"]
25932
26025
  },
25933
26026
  {
25934
- slug: "sdk-installation",
25935
- parentSlug: "sdk-overview",
25936
- title: "Installation",
25937
- seoTitle: "Install Talonic Node SDK \u2014 Talonic Docs",
25938
- description: "Install the @talonic/node package via npm. Zero runtime dependencies, requires Node.js 18+.",
26027
+ slug: "get-data-product",
26028
+ parentSlug: "data-products",
26029
+ title: "Get Data Product",
26030
+ seoTitle: "Get Data Product Endpoint \u2014 Talonic Docs",
26031
+ description: "Retrieve a single data product by ID with its schema reference, source run, status, and metadata. Requires read scope for the workspace.",
25939
26032
  content: [
25940
- { type: "code", language: "bash", title: "Install via npm", code: "npm install @talonic/node" },
25941
- { type: "paragraph", text: "Requires Node.js 18 or newer. Zero runtime dependencies." }
26033
+ { type: "paragraph", text: "Retrieve the full details of a specific data product by its UUID. The response includes the product name, description, associated schema and source run references, current status, and timestamps. Use this endpoint to inspect a product before fetching its result rows." },
26034
+ { type: "paragraph", text: "The `schema_id` field references the user schema that defines the output columns. The `run_id` field references the job run that produced the underlying extracted data. Together these let you trace the full lineage of the data product back to the original documents." },
26035
+ {
26036
+ type: "endpoint",
26037
+ method: "GET",
26038
+ path: "/v1/data-products/{id}",
26039
+ summary: "Get a single data product by ID.",
26040
+ description: "Requires read scope.",
26041
+ blocks: [
26042
+ {
26043
+ type: "param-table",
26044
+ title: "Path parameters",
26045
+ params: [
26046
+ { name: "id", type: "uuid", required: true, description: "Data product UUID." }
26047
+ ]
26048
+ }
26049
+ ]
26050
+ },
26051
+ { type: "heading", level: 2, id: "get-data-product-response", text: "Response" },
26052
+ {
26053
+ type: "param-table",
26054
+ title: "Response fields",
26055
+ params: [
26056
+ { name: "id", type: "string", description: "Data product UUID." },
26057
+ { name: "name", type: "string", description: "Human-readable product name." },
26058
+ { name: "description", type: "string | null", description: "Optional description." },
26059
+ { name: "schema_id", type: "string", description: "UUID of the user schema." },
26060
+ { name: "run_id", type: "string", description: "UUID of the source job run." },
26061
+ { name: "status", type: "string", description: "Product status: active, draft, or archived." },
26062
+ { name: "created_at", type: "string", description: "ISO 8601 creation timestamp." },
26063
+ { name: "updated_at", type: "string", description: "ISO 8601 last update timestamp." }
26064
+ ]
26065
+ },
26066
+ {
26067
+ type: "code",
26068
+ title: "curl",
26069
+ language: "bash",
26070
+ code: `curl -s https://api.talonic.ai/v1/data-products/d1a2b3c4-e5f6-7890-abcd-ef1234567890 \\
26071
+ -H "Authorization: Bearer tlnc_your_api_key"`
26072
+ },
26073
+ {
26074
+ type: "code",
26075
+ title: "Response",
26076
+ code: `{
26077
+ "id": "d1a2b3c4-e5f6-7890-abcd-ef1234567890",
26078
+ "name": "Q4 Invoice Extract",
26079
+ "description": "Structured invoice data from Q4 batch",
26080
+ "schema_id": "s1a2b3c4-e5f6-7890-abcd-ef1234567890",
26081
+ "run_id": "r1a2b3c4-e5f6-7890-abcd-ef1234567890",
26082
+ "status": "active",
26083
+ "created_at": "2024-11-01T14:20:00.000Z",
26084
+ "updated_at": "2024-11-01T14:25:30.000Z"
26085
+ }`
26086
+ },
26087
+ { type: "heading", level: 2, id: "get-data-product-errors", text: "Errors" },
26088
+ {
26089
+ type: "param-table",
26090
+ title: "Error responses",
26091
+ params: [
26092
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26093
+ { name: "404", type: "not_found", description: "Data product not found or does not belong to your organization." },
26094
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26095
+ ]
26096
+ }
25942
26097
  ],
25943
26098
  related: [
25944
- { label: "Authentication", slug: "sdk-authentication" },
25945
- { label: "Quick Start", slug: "sdk-quickstart" }
26099
+ { label: "List Data Products", slug: "list-data-products" },
26100
+ { label: "Get Data Product Results", slug: "get-data-product-results" },
26101
+ { label: "Jobs", slug: "list-jobs" }
25946
26102
  ],
25947
26103
  faq: [
25948
- { question: "How do I install the Talonic Node SDK?", answer: "Run npm install @talonic/node. Requires Node.js 18 or newer. Zero runtime dependencies." }
26104
+ { question: "How do I trace a data product back to its source documents?", answer: "The `run_id` field references the job run. Use `GET /v1/jobs/{run_id}` to find the job details, including the list of documents that were processed. The `schema_id` field tells you which output schema was used." },
26105
+ { question: "What does the schema_id represent?", answer: "The `schema_id` references the user schema that defines the output columns and field mappings for this data product. It determines which fields appear in the result rows and how they are named." }
25949
26106
  ],
25950
- mentions: ["npm", "Node.js"]
26107
+ mentions: ["data product", "export", "structured output", "lineage"]
25951
26108
  },
25952
26109
  {
25953
- slug: "sdk-authentication",
25954
- parentSlug: "sdk-overview",
25955
- title: "Authentication",
25956
- seoTitle: "SDK Authentication \u2014 Talonic Docs",
25957
- description: "Get a Talonic API key and configure it for the Node SDK. Each workspace is isolated with private documents and schemas.",
26110
+ slug: "delete-data-product",
26111
+ parentSlug: "data-products",
26112
+ title: "Delete Data Product",
26113
+ seoTitle: "Delete Data Product Endpoint \u2014 Talonic Docs",
26114
+ description: "Permanently delete a data product and its result rows. Requires write scope. The source job run and extracted data are not affected.",
25958
26115
  content: [
25959
- { type: "paragraph", text: "Every user runs against their own Talonic workspace, so each user needs their own key. Workspaces are isolated; your documents and schemas are private to you." },
25960
- { type: "list", ordered: true, items: [
25961
- "Sign up at [https://app.talonic.com](https://app.talonic.com). Free tier: 50 extractions per day, no credit card.",
25962
- "Settings → API Keys → Create New Key.",
25963
- "Copy the `tlnc_` value.",
25964
- "Set it as the `TALONIC_API_KEY` environment variable, or pass it directly to the client constructor."
25965
- ] },
25966
- { type: "callout", text: "Keep your API key secret. Do not expose it in client-side code or version control." }
26116
+ { type: "paragraph", text: "Permanently delete a data product and all associated result rows. This action is irreversible. The source job run, its documents, and extracted data are not affected by this operation. Use this to remove products that are no longer needed or to clean up test data." },
26117
+ { type: "callout", variant: "warning", text: "Deletion is permanent. All result rows associated with this data product are removed. Any active delivery bindings referencing this product will stop delivering data." },
26118
+ {
26119
+ type: "endpoint",
26120
+ method: "DELETE",
26121
+ path: "/v1/data-products/{id}",
26122
+ summary: "Delete a data product and its results.",
26123
+ description: "Requires write scope.",
26124
+ blocks: [
26125
+ {
26126
+ type: "param-table",
26127
+ title: "Path parameters",
26128
+ params: [
26129
+ { name: "id", type: "uuid", required: true, description: "Data product UUID." }
26130
+ ]
26131
+ }
26132
+ ]
26133
+ },
26134
+ { type: "heading", level: 2, id: "delete-data-product-response", text: "Response" },
26135
+ {
26136
+ type: "param-table",
26137
+ title: "Response fields",
26138
+ params: [
26139
+ { name: "deleted", type: "boolean", description: "Always true on success." }
26140
+ ]
26141
+ },
26142
+ {
26143
+ type: "code",
26144
+ title: "curl",
26145
+ language: "bash",
26146
+ code: `curl -s -X DELETE https://api.talonic.ai/v1/data-products/d1a2b3c4-e5f6-7890-abcd-ef1234567890 \\
26147
+ -H "Authorization: Bearer tlnc_your_api_key"`
26148
+ },
26149
+ {
26150
+ type: "code",
26151
+ title: "Response",
26152
+ code: `{
26153
+ "deleted": true
26154
+ }`
26155
+ },
26156
+ { type: "heading", level: 2, id: "delete-data-product-errors", text: "Errors" },
26157
+ {
26158
+ type: "param-table",
26159
+ title: "Error responses",
26160
+ params: [
26161
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26162
+ { name: "404", type: "not_found", description: "Data product not found or does not belong to your organization." },
26163
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26164
+ ]
26165
+ }
25967
26166
  ],
25968
26167
  related: [
25969
- { label: "Configuration", slug: "sdk-configuration" },
25970
- { label: "API Authentication", slug: "authentication" }
26168
+ { label: "List Data Products", slug: "list-data-products" },
26169
+ { label: "Resolutions", slug: "list-resolutions" },
26170
+ { label: "Schemas", slug: "list-schemas" }
25971
26171
  ],
25972
26172
  faq: [
25973
- { question: "Where do I get a Talonic API key?", answer: "Sign up at app.talonic.com, go to Settings \u2192 API Keys \u2192 Create New Key. Free tier includes 50 extractions per day." }
26173
+ { question: "Does deleting a data product affect the source job run?", answer: "No. The source job run, its documents, and all extracted data are completely unaffected. Only the data product and its assembled result rows are removed." },
26174
+ { question: "What happens to delivery bindings when a product is deleted?", answer: "Any delivery bindings that reference the deleted product will stop delivering data. The bindings themselves are not deleted, but they will produce errors on the next delivery attempt. Update or remove affected bindings after deleting a product." }
25974
26175
  ],
25975
- mentions: ["API key", "authentication", "workspace"]
26176
+ mentions: ["data product", "export", "structured output"]
25976
26177
  },
25977
26178
  {
25978
- slug: "sdk-quickstart",
25979
- parentSlug: "sdk-overview",
25980
- title: "Quick Start",
25981
- seoTitle: "Node SDK Quick Start \u2014 Talonic Docs",
25982
- description: "Extract structured data from a document in five lines of TypeScript using the Talonic Node SDK.",
26179
+ slug: "get-data-product-results",
26180
+ parentSlug: "data-products",
26181
+ title: "Get Data Product Results",
26182
+ seoTitle: "Get Data Product Results \u2014 Talonic Docs",
26183
+ description: "Retrieve the structured result rows of a data product with cursor-based pagination. Each row contains field values aligned to the product schema.",
25983
26184
  content: [
25984
- { type: "code", language: "typescript", title: "Extract an invoice", code: 'import { Talonic } from "@talonic/node"\n\nconst talonic = new Talonic({ apiKey: process.env.TALONIC_API_KEY! })\n\nconst result = await talonic.extract({\n file_path: "./invoice.pdf",\n schema: {\n vendor_name: "string",\n invoice_number: "string",\n total_amount: "number",\n due_date: "date",\n },\n})\n\nconsole.log(result.data)\n// { vendor_name: "Acme Corp", invoice_number: "INV-2024-0847", total_amount: 14250, due_date: "2024-03-15" }' }
26185
+ { type: "paragraph", text: "Retrieve the assembled result rows for a data product. Each row contains the structured field values aligned to the product's schema, with one row per document or entity. Results are returned with cursor-based pagination for efficient traversal of large datasets." },
26186
+ { type: "paragraph", text: "The result rows represent the final output of the Talonic pipeline: extracted, validated, and optionally resolved field values. Each row maps field names (from the associated user schema) to their values. Use this endpoint to programmatically consume the structured data for downstream integration, analytics, or export." },
26187
+ {
26188
+ type: "endpoint",
26189
+ method: "GET",
26190
+ path: "/v1/data-products/{id}/results",
26191
+ summary: "Get paginated result rows for a data product.",
26192
+ description: "Requires read scope.",
26193
+ blocks: [
26194
+ {
26195
+ type: "param-table",
26196
+ title: "Path parameters",
26197
+ params: [
26198
+ { name: "id", type: "uuid", required: true, description: "Data product UUID." }
26199
+ ]
26200
+ },
26201
+ {
26202
+ type: "param-table",
26203
+ title: "Query parameters",
26204
+ params: [
26205
+ { name: "limit", type: "integer", default: "20", description: "Maximum number of rows to return (1-100)." },
26206
+ { name: "cursor", type: "string", description: "Opaque pagination cursor from a previous response." }
26207
+ ]
26208
+ }
26209
+ ]
26210
+ },
26211
+ { type: "heading", level: 2, id: "get-data-product-results-response", text: "Response" },
26212
+ {
26213
+ type: "param-table",
26214
+ title: "Response fields",
26215
+ params: [
26216
+ { name: "data", type: "array", description: "Array of result row objects." },
26217
+ { name: "data[].id", type: "string", description: "Result row UUID." },
26218
+ { name: "data[].document_id", type: "string", description: "Source document UUID." },
26219
+ { name: "data[].fields", type: "object", description: "Key-value map of field names to extracted values." },
26220
+ { name: "data[].created_at", type: "string", description: "ISO 8601 timestamp." },
26221
+ { name: "cursor", type: "string | null", description: "Pagination cursor for the next page, or null if no more results." }
26222
+ ]
26223
+ },
26224
+ {
26225
+ type: "code",
26226
+ title: "curl",
26227
+ language: "bash",
26228
+ code: `curl -s "https://api.talonic.ai/v1/data-products/d1a2b3c4-e5f6-7890-abcd-ef1234567890/results?limit=10" \\
26229
+ -H "Authorization: Bearer tlnc_your_api_key"`
26230
+ },
26231
+ {
26232
+ type: "code",
26233
+ title: "Response",
26234
+ code: `{
26235
+ "data": [
26236
+ {
26237
+ "id": "row-1a2b3c4d-e5f6-7890-abcd-ef1234567890",
26238
+ "document_id": "doc-a1b2c3d4-e5f6-7890-abcd-ef1234567890",
26239
+ "fields": {
26240
+ "invoice_number": "INV-2024-0042",
26241
+ "vendor_name": "Acme Corp",
26242
+ "total_amount": "1250.00",
26243
+ "currency": "USD",
26244
+ "invoice_date": "2024-10-15"
26245
+ },
26246
+ "created_at": "2024-11-01T14:25:00.000Z"
26247
+ }
26248
+ ],
26249
+ "cursor": "eyJpZCI6InJvdy0xYTJiIn0="
26250
+ }`
26251
+ },
26252
+ { type: "heading", level: 2, id: "get-data-product-results-errors", text: "Errors" },
26253
+ {
26254
+ type: "param-table",
26255
+ title: "Error responses",
26256
+ params: [
26257
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26258
+ { name: "404", type: "not_found", description: "Data product not found or does not belong to your organization." },
26259
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26260
+ ]
26261
+ }
25985
26262
  ],
25986
26263
  related: [
25987
- { label: "Extract", slug: "sdk-extract" },
25988
- { label: "Configuration", slug: "sdk-configuration" },
25989
- { label: "Schemas", slug: "sdk-schemas" }
26264
+ { label: "Get Data Product", slug: "get-data-product" },
26265
+ { label: "Resolutions", slug: "list-resolutions" },
26266
+ { label: "Jobs", slug: "list-jobs" },
26267
+ { label: "Schemas", slug: "list-schemas" }
25990
26268
  ],
25991
26269
  faq: [
25992
- { question: "How do I extract data from a document with the SDK?", answer: "Call talonic.extract() with a file_path and a schema defining the fields you want. Returns structured JSON with confidence scores." }
26270
+ { question: "What format are the result row fields in?", answer: "The `fields` object is a flat key-value map where keys are field names from the associated schema and values are strings. All values are returned as strings regardless of the field type defined in the schema." },
26271
+ { question: "How do I export all results at once?", answer: "Paginate through all results using the `cursor` parameter until `cursor` is null. Alternatively, use the platform UI to export the data product as CSV or XLSX, which handles pagination automatically." },
26272
+ { question: "Are resolved values included in the results?", answer: "Yes. If a resolution run was executed against the source job, the result rows contain the resolved canonical values rather than the raw extracted values. The data product always reflects the latest resolved state." }
25993
26273
  ],
25994
- mentions: ["extract", "schema", "TypeScript", "quickstart"]
25995
- },
26274
+ mentions: ["data product", "export", "structured output", "result rows"]
26275
+ }
26276
+ ];
26277
+
26278
+ // src/content/api/data-policies.ts
26279
+ var sections51 = [
25996
26280
  {
25997
- slug: "sdk-configuration",
25998
- parentSlug: "sdk-client",
25999
- title: "Configuration",
26000
- seoTitle: "SDK Configuration \u2014 Talonic Docs",
26001
- description: "Configure the Talonic client with API key, base URL, timeout, max retries, and custom fetch function.",
26281
+ slug: "list-data-policies",
26282
+ parentSlug: "data-policies",
26283
+ title: "List Data Policies",
26284
+ seoTitle: "List Data Policies Endpoint \u2014 Talonic Docs",
26285
+ description: "List data policies that define field transformations, normalization rules, and lookup cascades applied during resolution runs. Supports cursor-based pagination.",
26286
+ content: [
26287
+ { type: "paragraph", text: "Data policies are first-class, versioned configuration objects that define how field values are transformed, normalized, and validated during resolution runs. Each policy contains a set of fields (output contract) and rules (transformation logic) that are compiled into an executable pipeline. Policies support 14 rule types including direct mapping, lookup cascades, Lua scripting, and deterministic computation." },
26288
+ { type: "paragraph", text: "Use this endpoint to list all data policies in your workspace. Policies are returned in reverse chronological order by default. Each policy can have multiple versions, allowing you to iterate on transformation logic without affecting in-flight resolution runs that have already captured a policy snapshot." },
26289
+ {
26290
+ type: "endpoint",
26291
+ method: "GET",
26292
+ path: "/v1/data-policies",
26293
+ summary: "List all data policies in the workspace.",
26294
+ description: "Requires read scope.",
26295
+ blocks: [
26296
+ {
26297
+ type: "param-table",
26298
+ title: "Query parameters",
26299
+ params: [
26300
+ { name: "limit", type: "integer", default: "20", description: "Maximum number of items to return (1-100)." },
26301
+ { name: "cursor", type: "string", description: "Opaque pagination cursor from a previous response." },
26302
+ { name: "order", type: "string", default: "desc", description: "Sort order by creation date (asc | desc)." }
26303
+ ]
26304
+ }
26305
+ ]
26306
+ },
26307
+ { type: "heading", level: 2, id: "list-data-policies-response", text: "Response" },
26308
+ {
26309
+ type: "param-table",
26310
+ title: "Response fields",
26311
+ params: [
26312
+ { name: "data", type: "array", description: "Array of data policy objects." },
26313
+ { name: "data[].id", type: "string", description: "Data policy UUID." },
26314
+ { name: "data[].name", type: "string", description: "Human-readable policy name." },
26315
+ { name: "data[].description", type: "string | null", description: "Optional description of the policy purpose." },
26316
+ { name: "data[].version", type: "integer", description: "Current version number." },
26317
+ { name: "data[].created_at", type: "string", description: "ISO 8601 creation timestamp." },
26318
+ { name: "data[].updated_at", type: "string", description: "ISO 8601 last update timestamp." }
26319
+ ]
26320
+ },
26321
+ {
26322
+ type: "code",
26323
+ title: "curl",
26324
+ language: "bash",
26325
+ code: `curl -s https://api.talonic.ai/v1/data-policies \\
26326
+ -H "Authorization: Bearer tlnc_your_api_key"`
26327
+ },
26328
+ {
26329
+ type: "code",
26330
+ title: "Response",
26331
+ code: `{
26332
+ "data": [
26333
+ {
26334
+ "id": "p1a2b3c4-e5f6-7890-abcd-ef1234567890",
26335
+ "name": "Invoice Normalization",
26336
+ "description": "Standardize currency codes, country names, and date formats for invoice processing",
26337
+ "version": 3,
26338
+ "created_at": "2024-10-01T09:00:00.000Z",
26339
+ "updated_at": "2024-10-15T11:30:00.000Z"
26340
+ }
26341
+ ],
26342
+ "cursor": "eyJpZCI6InAxYTJiM2M0In0="
26343
+ }`
26344
+ },
26345
+ { type: "heading", level: 2, id: "list-data-policies-errors", text: "Errors" },
26346
+ {
26347
+ type: "param-table",
26348
+ title: "Error responses",
26349
+ params: [
26350
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26351
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26352
+ ]
26353
+ }
26354
+ ],
26355
+ related: [
26356
+ { label: "Create Data Policy", slug: "create-data-policy" },
26357
+ { label: "Resolutions", slug: "list-resolutions" },
26358
+ { label: "Schemas", slug: "list-schemas" }
26359
+ ],
26360
+ faq: [
26361
+ { question: "What do data policies do?", answer: "Data policies define how field values are transformed and normalized during resolution runs. They support lookup cascades (mapping raw values to canonical codes via reference tables), Lua scripting for custom logic, deterministic computation, and direct field-to-field mappings." },
26362
+ { question: "How do data policy versions work?", answer: "Each time you update a policy, a new version is created. Resolution runs capture a snapshot of the active version at run time, so changes to a policy do not retroactively affect completed runs. You can inspect which version was used via the `policy_snapshot` field on a resolution run." },
26363
+ { question: "Can I have multiple data policies per workspace?", answer: "Yes. You can create multiple policies for different transformation scenarios. Each resolution run uses the active policy configured for the workspace, but you can switch between policies as needed." }
26364
+ ],
26365
+ mentions: ["data policy", "transformation", "normalization", "lookup", "Lua"]
26366
+ },
26367
+ {
26368
+ slug: "create-data-policy",
26369
+ parentSlug: "data-policies",
26370
+ title: "Create Data Policy",
26371
+ seoTitle: "Create Data Policy Endpoint \u2014 Talonic Docs",
26372
+ description: "Create a new data policy to define field transformation rules, lookup cascades, and normalization logic for resolution runs.",
26373
+ content: [
26374
+ { type: "paragraph", text: "Create a new data policy in your workspace. The policy starts at version 1 with no fields or rules. After creation, use the fields and rules endpoints to define the output contract and transformation logic. Policies are not active until they contain at least one field and one rule." },
26375
+ { type: "paragraph", text: "The policy name should be descriptive enough to distinguish it from other policies in the workspace. Use the optional description field to document the policy purpose, the types of documents it targets, and the normalization strategies it employs." },
26376
+ {
26377
+ type: "endpoint",
26378
+ method: "POST",
26379
+ path: "/v1/data-policies",
26380
+ summary: "Create a new data policy.",
26381
+ description: "Requires write scope.",
26382
+ blocks: [
26383
+ {
26384
+ type: "param-table",
26385
+ title: "Body parameters",
26386
+ params: [
26387
+ { name: "name", type: "string", required: true, description: "Human-readable policy name." },
26388
+ { name: "description", type: "string", description: "Optional description of the policy purpose." }
26389
+ ]
26390
+ },
26391
+ {
26392
+ type: "code",
26393
+ title: "Request body",
26394
+ code: `{
26395
+ "name": "Invoice Normalization",
26396
+ "description": "Standardize currency codes, country names, and date formats"
26397
+ }`
26398
+ }
26399
+ ]
26400
+ },
26401
+ { type: "heading", level: 2, id: "create-data-policy-response", text: "Response" },
26402
+ {
26403
+ type: "param-table",
26404
+ title: "Response fields (201 Created)",
26405
+ params: [
26406
+ { name: "id", type: "string", description: "Data policy UUID." },
26407
+ { name: "name", type: "string", description: "Policy name." },
26408
+ { name: "description", type: "string | null", description: "Policy description." },
26409
+ { name: "version", type: "integer", description: "Initial version, always 1." },
26410
+ { name: "created_at", type: "string", description: "ISO 8601 creation timestamp." },
26411
+ { name: "updated_at", type: "string", description: "ISO 8601 last update timestamp." }
26412
+ ]
26413
+ },
26414
+ {
26415
+ type: "code",
26416
+ title: "curl",
26417
+ language: "bash",
26418
+ code: `curl -s -X POST https://api.talonic.ai/v1/data-policies \\
26419
+ -H "Authorization: Bearer tlnc_your_api_key" \\
26420
+ -H "Content-Type: application/json" \\
26421
+ -d '{"name":"Invoice Normalization","description":"Standardize currency codes, country names, and date formats"}'`
26422
+ },
26423
+ {
26424
+ type: "code",
26425
+ title: "Response (201 Created)",
26426
+ code: `{
26427
+ "id": "p1a2b3c4-e5f6-7890-abcd-ef1234567890",
26428
+ "name": "Invoice Normalization",
26429
+ "description": "Standardize currency codes, country names, and date formats",
26430
+ "version": 1,
26431
+ "created_at": "2024-10-01T09:00:00.000Z",
26432
+ "updated_at": "2024-10-01T09:00:00.000Z"
26433
+ }`
26434
+ },
26435
+ { type: "heading", level: 2, id: "create-data-policy-errors", text: "Errors" },
26436
+ {
26437
+ type: "param-table",
26438
+ title: "Error responses",
26439
+ params: [
26440
+ { name: "400", type: "bad_request", description: "Invalid request body or missing required fields." },
26441
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26442
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26443
+ ]
26444
+ }
26445
+ ],
26446
+ related: [
26447
+ { label: "List Data Policies", slug: "list-data-policies" },
26448
+ { label: "List Data Policy Fields", slug: "list-data-policy-fields" },
26449
+ { label: "List Data Policy Rules", slug: "list-data-policy-rules" }
26450
+ ],
26451
+ faq: [
26452
+ { question: "What should I do after creating a policy?", answer: "After creating a policy, define its output fields via the fields endpoint and add transformation rules via the rules endpoint. The policy is not active until it has at least one field and one rule configured." },
26453
+ { question: "Can I rename a policy after creation?", answer: "Yes. Use the `PATCH /v1/data-policies/{id}` endpoint to update the name or description. Renaming does not affect the policy version or any existing resolution runs that captured a snapshot of this policy." }
26454
+ ],
26455
+ mentions: ["data policy", "transformation", "normalization"]
26456
+ },
26457
+ {
26458
+ slug: "get-data-policy",
26459
+ parentSlug: "data-policies",
26460
+ title: "Get Data Policy",
26461
+ seoTitle: "Get Data Policy Endpoint \u2014 Talonic Docs",
26462
+ description: "Retrieve a data policy by ID with its fields and rules inlined. Returns the full policy configuration needed to understand transformation behavior.",
26463
+ content: [
26464
+ { type: "paragraph", text: "Retrieve the complete details of a specific data policy, including its fields and rules inlined in the response. This provides a single-call view of the entire policy configuration, which is useful for auditing transformation logic or debugging resolution results." },
26465
+ { type: "paragraph", text: "The inlined fields define the output contract: which field keys the policy declares and their expected types. The inlined rules define the transformation logic: lookup tables, direct mappings, Lua scripts, and computed fields. Rules are executed in topological order based on their declared dependencies." },
26466
+ {
26467
+ type: "endpoint",
26468
+ method: "GET",
26469
+ path: "/v1/data-policies/{id}",
26470
+ summary: "Get a data policy with fields and rules inlined.",
26471
+ description: "Requires read scope.",
26472
+ blocks: [
26473
+ {
26474
+ type: "param-table",
26475
+ title: "Path parameters",
26476
+ params: [
26477
+ { name: "id", type: "uuid", required: true, description: "Data policy UUID." }
26478
+ ]
26479
+ }
26480
+ ]
26481
+ },
26482
+ { type: "heading", level: 2, id: "get-data-policy-response", text: "Response" },
26483
+ {
26484
+ type: "param-table",
26485
+ title: "Response fields",
26486
+ params: [
26487
+ { name: "id", type: "string", description: "Data policy UUID." },
26488
+ { name: "name", type: "string", description: "Policy name." },
26489
+ { name: "description", type: "string | null", description: "Policy description." },
26490
+ { name: "version", type: "integer", description: "Current version number." },
26491
+ { name: "fields", type: "array", description: "Array of declared output field objects." },
26492
+ { name: "rules", type: "array", description: "Array of transformation rule objects." },
26493
+ { name: "created_at", type: "string", description: "ISO 8601 creation timestamp." },
26494
+ { name: "updated_at", type: "string", description: "ISO 8601 last update timestamp." }
26495
+ ]
26496
+ },
26497
+ {
26498
+ type: "code",
26499
+ title: "curl",
26500
+ language: "bash",
26501
+ code: `curl -s https://api.talonic.ai/v1/data-policies/p1a2b3c4-e5f6-7890-abcd-ef1234567890 \\
26502
+ -H "Authorization: Bearer tlnc_your_api_key"`
26503
+ },
26504
+ {
26505
+ type: "code",
26506
+ title: "Response",
26507
+ code: `{
26508
+ "id": "p1a2b3c4-e5f6-7890-abcd-ef1234567890",
26509
+ "name": "Invoice Normalization",
26510
+ "description": "Standardize currency codes, country names, and date formats",
26511
+ "version": 3,
26512
+ "fields": [
26513
+ { "field_key": "country_code", "type": "string" },
26514
+ { "field_key": "currency", "type": "string" }
26515
+ ],
26516
+ "rules": [
26517
+ {
26518
+ "id": "r1a2b3c4-0001",
26519
+ "field_key": "country_code",
26520
+ "rule_type": "lookup",
26521
+ "config": { "table": "country_codes", "source_field": "country" }
26522
+ }
26523
+ ],
26524
+ "created_at": "2024-10-01T09:00:00.000Z",
26525
+ "updated_at": "2024-10-15T11:30:00.000Z"
26526
+ }`
26527
+ },
26528
+ { type: "heading", level: 2, id: "get-data-policy-errors", text: "Errors" },
26529
+ {
26530
+ type: "param-table",
26531
+ title: "Error responses",
26532
+ params: [
26533
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26534
+ { name: "404", type: "not_found", description: "Data policy not found or does not belong to your organization." },
26535
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26536
+ ]
26537
+ }
26538
+ ],
26539
+ related: [
26540
+ { label: "List Data Policies", slug: "list-data-policies" },
26541
+ { label: "Resolutions", slug: "list-resolutions" },
26542
+ { label: "Schemas", slug: "list-schemas" }
26543
+ ],
26544
+ faq: [
26545
+ { question: "Why are fields and rules inlined in the response?", answer: "Inlining fields and rules gives you a complete view of the policy in a single API call. This is useful for auditing the full transformation pipeline, debugging resolution results, or exporting the policy configuration for version control." },
26546
+ { question: "What are the supported rule types?", answer: "Data policies support 14 rule types including `lookup` (reference table matching), `direct` (field-to-field mapping), `lua` (custom Lua scripting), `compute` (deterministic formulas), `format` (string formatting), and more. Each rule type has its own configuration schema." },
26547
+ { question: "How does rule execution order work?", answer: "Rules are compiled into a topological order based on their declared field dependencies. A rule that reads from `country` and writes to `country_code` will always execute after the rule that produces `country`. Circular dependencies are detected at compile time." }
26548
+ ],
26549
+ mentions: ["data policy", "transformation", "normalization", "lookup", "Lua"]
26550
+ },
26551
+ {
26552
+ slug: "update-data-policy",
26553
+ parentSlug: "data-policies",
26554
+ title: "Update Data Policy",
26555
+ seoTitle: "Update Data Policy Endpoint \u2014 Talonic Docs",
26556
+ description: "Update the name or description of a data policy. Creates a new version. Does not affect in-flight resolution runs that captured a previous snapshot.",
26557
+ content: [
26558
+ { type: "paragraph", text: "Update the metadata of an existing data policy. You can change the name, description, or both. Each update increments the policy version number. Resolution runs that have already captured a policy snapshot are not affected by this change." },
26559
+ { type: "paragraph", text: "To modify the transformation logic (fields and rules), use the dedicated fields and rules endpoints. This endpoint only updates the policy-level metadata. Version increments from metadata changes are tracked separately from version increments caused by field or rule modifications." },
26560
+ {
26561
+ type: "endpoint",
26562
+ method: "PATCH",
26563
+ path: "/v1/data-policies/{id}",
26564
+ summary: "Update data policy name or description.",
26565
+ description: "Requires write scope.",
26566
+ blocks: [
26567
+ {
26568
+ type: "param-table",
26569
+ title: "Path parameters",
26570
+ params: [
26571
+ { name: "id", type: "uuid", required: true, description: "Data policy UUID." }
26572
+ ]
26573
+ },
26574
+ {
26575
+ type: "param-table",
26576
+ title: "Body parameters",
26577
+ params: [
26578
+ { name: "name", type: "string", description: "Updated policy name." },
26579
+ { name: "description", type: "string", description: "Updated policy description." }
26580
+ ]
26581
+ },
26582
+ {
26583
+ type: "code",
26584
+ title: "Request body",
26585
+ code: `{
26586
+ "name": "Invoice Normalization v2",
26587
+ "description": "Updated: added VAT normalization rules"
26588
+ }`
26589
+ }
26590
+ ]
26591
+ },
26592
+ { type: "heading", level: 2, id: "update-data-policy-response", text: "Response" },
26593
+ {
26594
+ type: "param-table",
26595
+ title: "Response fields",
26596
+ params: [
26597
+ { name: "id", type: "string", description: "Data policy UUID." },
26598
+ { name: "name", type: "string", description: "Updated policy name." },
26599
+ { name: "description", type: "string | null", description: "Updated policy description." },
26600
+ { name: "version", type: "integer", description: "Incremented version number." },
26601
+ { name: "created_at", type: "string", description: "ISO 8601 creation timestamp." },
26602
+ { name: "updated_at", type: "string", description: "ISO 8601 last update timestamp." }
26603
+ ]
26604
+ },
26605
+ {
26606
+ type: "code",
26607
+ title: "curl",
26608
+ language: "bash",
26609
+ code: `curl -s -X PATCH https://api.talonic.ai/v1/data-policies/p1a2b3c4-e5f6-7890-abcd-ef1234567890 \\
26610
+ -H "Authorization: Bearer tlnc_your_api_key" \\
26611
+ -H "Content-Type: application/json" \\
26612
+ -d '{"name":"Invoice Normalization v2"}'`
26613
+ },
26614
+ {
26615
+ type: "code",
26616
+ title: "Response",
26617
+ code: `{
26618
+ "id": "p1a2b3c4-e5f6-7890-abcd-ef1234567890",
26619
+ "name": "Invoice Normalization v2",
26620
+ "description": "Updated: added VAT normalization rules",
26621
+ "version": 4,
26622
+ "created_at": "2024-10-01T09:00:00.000Z",
26623
+ "updated_at": "2024-10-20T08:15:00.000Z"
26624
+ }`
26625
+ },
26626
+ { type: "heading", level: 2, id: "update-data-policy-errors", text: "Errors" },
26627
+ {
26628
+ type: "param-table",
26629
+ title: "Error responses",
26630
+ params: [
26631
+ { name: "400", type: "bad_request", description: "Invalid request body." },
26632
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26633
+ { name: "404", type: "not_found", description: "Data policy not found or does not belong to your organization." },
26634
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26635
+ ]
26636
+ }
26637
+ ],
26638
+ related: [
26639
+ { label: "Get Data Policy", slug: "get-data-policy" },
26640
+ { label: "List Data Policy Versions", slug: "list-data-policy-versions" }
26641
+ ],
26642
+ faq: [
26643
+ { question: "Does updating a policy affect running resolutions?", answer: "No. Resolution runs capture a snapshot of the policy at creation time. Updates to the policy only affect future resolution runs. Completed and in-flight runs are unaffected." },
26644
+ { question: "Do I need to provide all fields in the update?", answer: "No. This is a PATCH endpoint, so you only need to include the fields you want to change. Omitted fields retain their current values." }
26645
+ ],
26646
+ mentions: ["data policy", "transformation", "normalization"]
26647
+ },
26648
+ {
26649
+ slug: "delete-data-policy",
26650
+ parentSlug: "data-policies",
26651
+ title: "Delete Data Policy",
26652
+ seoTitle: "Delete Data Policy Endpoint \u2014 Talonic Docs",
26653
+ description: "Permanently delete a data policy and all its versions, fields, and rules. Requires write scope. Does not affect resolution runs that captured a snapshot.",
26654
+ content: [
26655
+ { type: "paragraph", text: "Permanently delete a data policy and all associated versions, fields, and rules. This action is irreversible. Resolution runs that previously captured a snapshot of this policy are not affected, as they retain their own copy of the policy configuration." },
26656
+ { type: "callout", variant: "warning", text: "Deletion is permanent. All versions, fields, and rules of this policy are removed. Future resolution runs will not be able to reference this policy. Existing resolution runs with captured snapshots are unaffected." },
26657
+ {
26658
+ type: "endpoint",
26659
+ method: "DELETE",
26660
+ path: "/v1/data-policies/{id}",
26661
+ summary: "Delete a data policy and all its versions.",
26662
+ description: "Requires write scope.",
26663
+ blocks: [
26664
+ {
26665
+ type: "param-table",
26666
+ title: "Path parameters",
26667
+ params: [
26668
+ { name: "id", type: "uuid", required: true, description: "Data policy UUID." }
26669
+ ]
26670
+ }
26671
+ ]
26672
+ },
26673
+ { type: "heading", level: 2, id: "delete-data-policy-response", text: "Response" },
26674
+ {
26675
+ type: "param-table",
26676
+ title: "Response fields",
26677
+ params: [
26678
+ { name: "deleted", type: "boolean", description: "Always true on success." }
26679
+ ]
26680
+ },
26681
+ {
26682
+ type: "code",
26683
+ title: "curl",
26684
+ language: "bash",
26685
+ code: `curl -s -X DELETE https://api.talonic.ai/v1/data-policies/p1a2b3c4-e5f6-7890-abcd-ef1234567890 \\
26686
+ -H "Authorization: Bearer tlnc_your_api_key"`
26687
+ },
26688
+ {
26689
+ type: "code",
26690
+ title: "Response",
26691
+ code: `{
26692
+ "deleted": true
26693
+ }`
26694
+ },
26695
+ { type: "heading", level: 2, id: "delete-data-policy-errors", text: "Errors" },
26696
+ {
26697
+ type: "param-table",
26698
+ title: "Error responses",
26699
+ params: [
26700
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26701
+ { name: "404", type: "not_found", description: "Data policy not found or does not belong to your organization." },
26702
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26703
+ ]
26704
+ }
26705
+ ],
26706
+ related: [
26707
+ { label: "List Data Policies", slug: "list-data-policies" },
26708
+ { label: "Resolutions", slug: "list-resolutions" }
26709
+ ],
26710
+ faq: [
26711
+ { question: "Does deleting a policy break existing resolution runs?", answer: "No. Resolution runs capture a complete policy snapshot at creation time. Deleting the policy only prevents future resolution runs from using it. All historical results are preserved." },
26712
+ { question: "Can I recover a deleted policy?", answer: "No. Deletion is permanent. If you need to preserve a policy configuration, export it via the GET endpoint before deleting. You can inspect historical snapshots in completed resolution runs." }
26713
+ ],
26714
+ mentions: ["data policy", "transformation", "normalization"]
26715
+ },
26716
+ {
26717
+ slug: "list-data-policy-versions",
26718
+ parentSlug: "data-policies",
26719
+ title: "List Data Policy Versions",
26720
+ seoTitle: "List Data Policy Versions \u2014 Talonic Docs",
26721
+ description: "List all versions of a data policy showing the change history. Each version captures the policy state at a point in time for auditability.",
26722
+ content: [
26723
+ { type: "paragraph", text: "Retrieve the version history of a data policy. Each version represents a snapshot of the policy configuration at a point in time. Versions are created automatically when the policy, its fields, or its rules are modified. This endpoint is useful for auditing changes and understanding how the policy evolved." },
26724
+ { type: "paragraph", text: "Version numbers are monotonically increasing integers starting at 1. When a resolution run captures a policy snapshot, it records the version number, allowing you to correlate resolution results with the specific policy configuration that produced them." },
26725
+ {
26726
+ type: "endpoint",
26727
+ method: "GET",
26728
+ path: "/v1/data-policies/{id}/versions",
26729
+ summary: "List all versions of a data policy.",
26730
+ description: "Requires read scope.",
26731
+ blocks: [
26732
+ {
26733
+ type: "param-table",
26734
+ title: "Path parameters",
26735
+ params: [
26736
+ { name: "id", type: "uuid", required: true, description: "Data policy UUID." }
26737
+ ]
26738
+ }
26739
+ ]
26740
+ },
26741
+ { type: "heading", level: 2, id: "list-data-policy-versions-response", text: "Response" },
26742
+ {
26743
+ type: "param-table",
26744
+ title: "Response fields",
26745
+ params: [
26746
+ { name: "data", type: "array", description: "Array of version objects." },
26747
+ { name: "data[].version", type: "integer", description: "Version number." },
26748
+ { name: "data[].created_at", type: "string", description: "ISO 8601 timestamp when this version was created." },
26749
+ { name: "data[].fields_count", type: "integer", description: "Number of declared fields in this version." },
26750
+ { name: "data[].rules_count", type: "integer", description: "Number of rules in this version." }
26751
+ ]
26752
+ },
26753
+ {
26754
+ type: "code",
26755
+ title: "curl",
26756
+ language: "bash",
26757
+ code: `curl -s https://api.talonic.ai/v1/data-policies/p1a2b3c4-e5f6-7890-abcd-ef1234567890/versions \\
26758
+ -H "Authorization: Bearer tlnc_your_api_key"`
26759
+ },
26760
+ {
26761
+ type: "code",
26762
+ title: "Response",
26763
+ code: `{
26764
+ "data": [
26765
+ { "version": 3, "created_at": "2024-10-15T11:30:00.000Z", "fields_count": 5, "rules_count": 8 },
26766
+ { "version": 2, "created_at": "2024-10-10T14:00:00.000Z", "fields_count": 4, "rules_count": 6 },
26767
+ { "version": 1, "created_at": "2024-10-01T09:00:00.000Z", "fields_count": 2, "rules_count": 3 }
26768
+ ]
26769
+ }`
26770
+ },
26771
+ { type: "heading", level: 2, id: "list-data-policy-versions-errors", text: "Errors" },
26772
+ {
26773
+ type: "param-table",
26774
+ title: "Error responses",
26775
+ params: [
26776
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26777
+ { name: "404", type: "not_found", description: "Data policy not found or does not belong to your organization." },
26778
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26779
+ ]
26780
+ }
26781
+ ],
26782
+ related: [
26783
+ { label: "Get Data Policy", slug: "get-data-policy" },
26784
+ { label: "Update Data Policy", slug: "update-data-policy" }
26785
+ ],
26786
+ faq: [
26787
+ { question: "How do I see which version a resolution used?", answer: "The `policy_snapshot` field on a resolution run includes the version number. Compare it with the version list to understand which configuration was active when the resolution executed." },
26788
+ { question: "Can I roll back to a previous version?", answer: "The API does not support direct rollback. To revert to a previous configuration, inspect the version history and manually re-apply the desired fields and rules, which creates a new version with the old configuration." }
26789
+ ],
26790
+ mentions: ["data policy", "transformation", "normalization", "version history"]
26791
+ },
26792
+ {
26793
+ slug: "list-data-policy-fields",
26794
+ parentSlug: "data-policies",
26795
+ title: "List Data Policy Fields",
26796
+ seoTitle: "List Data Policy Fields \u2014 Talonic Docs",
26797
+ description: "List the declared output fields of a data policy. Fields define the output contract \u2014 which field keys the policy produces and their expected types.",
26798
+ content: [
26799
+ { type: "paragraph", text: "Retrieve the list of declared output fields for a data policy. Fields define the output contract: the set of field keys that the policy executor will emit in resolution results. Only fields declared here appear in the final output; any intermediate values computed by rules but not declared as fields are discarded." },
26800
+ { type: "paragraph", text: "Each field has a `field_key` (the output column name) and a `type` (the expected value type). The field list is used by the resolution pipeline to validate output schema compliance and by the results UI to determine which columns to display. Adding or removing fields creates a new policy version." },
26801
+ {
26802
+ type: "endpoint",
26803
+ method: "GET",
26804
+ path: "/v1/data-policies/{id}/fields",
26805
+ summary: "List declared output fields for a data policy.",
26806
+ description: "Requires read scope.",
26807
+ blocks: [
26808
+ {
26809
+ type: "param-table",
26810
+ title: "Path parameters",
26811
+ params: [
26812
+ { name: "id", type: "uuid", required: true, description: "Data policy UUID." }
26813
+ ]
26814
+ }
26815
+ ]
26816
+ },
26817
+ { type: "heading", level: 2, id: "list-data-policy-fields-response", text: "Response" },
26818
+ {
26819
+ type: "param-table",
26820
+ title: "Response fields",
26821
+ params: [
26822
+ { name: "data", type: "array", description: "Array of field definition objects." },
26823
+ { name: "data[].field_key", type: "string", description: "Output field key name." },
26824
+ { name: "data[].type", type: "string", description: "Expected value type (e.g. string, number, date)." },
26825
+ { name: "data[].description", type: "string | null", description: "Optional field description." }
26826
+ ]
26827
+ },
26828
+ {
26829
+ type: "code",
26830
+ title: "curl",
26831
+ language: "bash",
26832
+ code: `curl -s https://api.talonic.ai/v1/data-policies/p1a2b3c4-e5f6-7890-abcd-ef1234567890/fields \\
26833
+ -H "Authorization: Bearer tlnc_your_api_key"`
26834
+ },
26835
+ {
26836
+ type: "code",
26837
+ title: "Response",
26838
+ code: `{
26839
+ "data": [
26840
+ { "field_key": "country_code", "type": "string", "description": "ISO 3166-1 alpha-2 country code" },
26841
+ { "field_key": "currency", "type": "string", "description": "ISO 4217 currency code" },
26842
+ { "field_key": "invoice_date", "type": "date", "description": "Normalized invoice date in ISO 8601 format" }
26843
+ ]
26844
+ }`
26845
+ },
26846
+ { type: "heading", level: 2, id: "list-data-policy-fields-errors", text: "Errors" },
26847
+ {
26848
+ type: "param-table",
26849
+ title: "Error responses",
26850
+ params: [
26851
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26852
+ { name: "404", type: "not_found", description: "Data policy not found or does not belong to your organization." },
26853
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26854
+ ]
26855
+ }
26856
+ ],
26857
+ related: [
26858
+ { label: "Get Data Policy", slug: "get-data-policy" },
26859
+ { label: "List Data Policy Rules", slug: "list-data-policy-rules" },
26860
+ { label: "Schemas", slug: "list-schemas" }
26861
+ ],
26862
+ faq: [
26863
+ { question: "What happens to values not declared as fields?", answer: "Values computed by rules but not declared as policy fields are treated as intermediate variables and discarded from the final output. Only declared fields appear in resolution results. You can enable `emit_undeclared_cells` in pipeline config for debugging." },
26864
+ { question: "Can Lua scripts write to undeclared field keys?", answer: "Yes. Lua scripts can write to any cell name as a temporary variable for intermediate computation. However, only values written to declared field keys will appear in the resolution output. Undeclared keys are silently dropped unless debugging is enabled." }
26865
+ ],
26866
+ mentions: ["data policy", "transformation", "normalization", "Lua"]
26867
+ },
26868
+ {
26869
+ slug: "list-data-policy-rules",
26870
+ parentSlug: "data-policies",
26871
+ title: "List Data Policy Rules",
26872
+ seoTitle: "List Data Policy Rules \u2014 Talonic Docs",
26873
+ description: "List the transformation rules of a data policy. Rules define lookup cascades, Lua scripts, direct mappings, and computed fields executed during resolution.",
26874
+ content: [
26875
+ { type: "paragraph", text: "Retrieve the list of transformation rules for a data policy. Rules define the actual transformation logic executed during resolution: lookup cascades against reference tables, Lua scripting for custom logic, direct field-to-field mappings, deterministic computations, and format transformations. Rules are compiled into topological execution order based on field dependencies." },
26876
+ { type: "paragraph", text: "Each rule targets a specific output field and declares its rule type and configuration. The policy compiler validates that all rule dependencies are satisfiable and detects circular references at compile time. During execution, rules are processed in dependency order so that upstream fields are resolved before downstream rules that read from them." },
26877
+ {
26878
+ type: "endpoint",
26879
+ method: "GET",
26880
+ path: "/v1/data-policies/{id}/rules",
26881
+ summary: "List transformation rules for a data policy.",
26882
+ description: "Requires read scope.",
26883
+ blocks: [
26884
+ {
26885
+ type: "param-table",
26886
+ title: "Path parameters",
26887
+ params: [
26888
+ { name: "id", type: "uuid", required: true, description: "Data policy UUID." }
26889
+ ]
26890
+ }
26891
+ ]
26892
+ },
26893
+ { type: "heading", level: 2, id: "list-data-policy-rules-response", text: "Response" },
26894
+ {
26895
+ type: "param-table",
26896
+ title: "Response fields",
26897
+ params: [
26898
+ { name: "data", type: "array", description: "Array of rule objects." },
26899
+ { name: "data[].id", type: "string", description: "Rule UUID." },
26900
+ { name: "data[].field_key", type: "string", description: "Target output field key." },
26901
+ { name: "data[].rule_type", type: "string", description: "Rule type (e.g. lookup, direct, lua, compute, format)." },
26902
+ { name: "data[].config", type: "object", description: "Rule-type-specific configuration." },
26903
+ { name: "data[].order", type: "integer", description: "Execution order within the compiled pipeline." }
26904
+ ]
26905
+ },
26906
+ {
26907
+ type: "code",
26908
+ title: "curl",
26909
+ language: "bash",
26910
+ code: `curl -s https://api.talonic.ai/v1/data-policies/p1a2b3c4-e5f6-7890-abcd-ef1234567890/rules \\
26911
+ -H "Authorization: Bearer tlnc_your_api_key"`
26912
+ },
26913
+ {
26914
+ type: "code",
26915
+ title: "Response",
26916
+ code: `{
26917
+ "data": [
26918
+ {
26919
+ "id": "r1a2b3c4-0001",
26920
+ "field_key": "country_code",
26921
+ "rule_type": "lookup",
26922
+ "config": {
26923
+ "table": "country_codes",
26924
+ "source_field": "country",
26925
+ "fallback": "llm"
26926
+ },
26927
+ "order": 1
26928
+ },
26929
+ {
26930
+ "id": "r1a2b3c4-0002",
26931
+ "field_key": "currency",
26932
+ "rule_type": "lua",
26933
+ "config": {
26934
+ "script": "if cell.country_code == 'US' then return 'USD' end"
26935
+ },
26936
+ "order": 2
26937
+ }
26938
+ ]
26939
+ }`
26940
+ },
26941
+ { type: "heading", level: 2, id: "list-data-policy-rules-errors", text: "Errors" },
26942
+ {
26943
+ type: "param-table",
26944
+ title: "Error responses",
26945
+ params: [
26946
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
26947
+ { name: "404", type: "not_found", description: "Data policy not found or does not belong to your organization." },
26948
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
26949
+ ]
26950
+ }
26951
+ ],
26952
+ related: [
26953
+ { label: "Get Data Policy", slug: "get-data-policy" },
26954
+ { label: "List Data Policy Fields", slug: "list-data-policy-fields" },
26955
+ { label: "Resolutions", slug: "list-resolutions" }
26956
+ ],
26957
+ faq: [
26958
+ { question: "What rule types are available?", answer: "Data policies support 14 rule types: `lookup` (reference table matching with optional LLM fallback), `direct` (field-to-field mapping), `lua` (custom Lua scripting), `compute` (deterministic formulas), `format` (string formatting), `coalesce`, `constant`, `concat`, `split`, `regex`, `date_format`, `number_format`, `conditional`, and `aggregate`." },
26959
+ { question: "How does the Lua scripting work?", answer: "Lua rules execute a sandboxed Lua 5.4 chunk for each record. The script receives a `cell` table with the current record values and can call chain methods like `:matches()` for reference table matching and `:llm_match()` for LLM-powered fuzzy matching. Scripts return the resolved value for the target field." },
26960
+ { question: "What happens when a lookup fails to find a match?", answer: "Lookup rules support a 3-tier cascade: string normalization (exact match after lowercasing and trimming), token-based fuzzy matching (Jaccard similarity), and an optional LLM fallback (Haiku). If all tiers fail, the field retains its original value. Configure the `fallback` option in the rule config to control cascade behavior." }
26961
+ ],
26962
+ mentions: ["data policy", "transformation", "normalization", "lookup", "Lua"]
26963
+ }
26964
+ ];
26965
+
26966
+ // src/content/api/record-sets.ts
26967
+ var sections52 = [
26968
+ {
26969
+ slug: "list-record-sets",
26970
+ parentSlug: "record-sets",
26971
+ title: "List Record Sets",
26972
+ seoTitle: "List Record Sets Endpoint \u2014 Talonic Docs",
26973
+ description: "List record sets across the value plane with cursor-based pagination. Filter by layer (capture, structured, resolved, product) to find sets at specific pipeline stages.",
26974
+ content: [
26975
+ { type: "paragraph", text: "Record sets are the core storage abstraction in the Talonic value plane. Each record set is a table-like collection of records at a specific layer of the pipeline. The value plane organizes data into four layers: **capture** (raw OCR output), **structured** (extracted field values), **resolved** (normalized canonical values), and **product** (final assembled output). Record sets at each layer share the same underlying cell storage model but represent progressively refined data." },
26976
+ { type: "paragraph", text: "Use this endpoint to list all record sets in your workspace. Filter by `layer` to find sets at a specific pipeline stage, or by `source_type` to locate sets from a particular origin. Results support cursor-based pagination and can be sorted by creation date. Each record set contains typed cells with confidence scores and provenance metadata." },
26977
+ {
26978
+ type: "endpoint",
26979
+ method: "GET",
26980
+ path: "/v1/record-sets",
26981
+ summary: "List record sets with optional layer and source type filters.",
26982
+ description: "Requires read scope.",
26983
+ blocks: [
26984
+ {
26985
+ type: "param-table",
26986
+ title: "Query parameters",
26987
+ params: [
26988
+ { name: "layer", type: "string", description: "Filter by value plane layer: capture, structured, resolved, or product." },
26989
+ { name: "source_type", type: "string", description: "Filter by source type (e.g. extraction, resolution, job)." },
26990
+ { name: "limit", type: "integer", default: "20", description: "Maximum number of items to return (1-100)." },
26991
+ { name: "cursor", type: "string", description: "Opaque pagination cursor from a previous response." },
26992
+ { name: "order", type: "string", default: "desc", description: "Sort order by creation date (asc | desc)." }
26993
+ ]
26994
+ }
26995
+ ]
26996
+ },
26997
+ { type: "heading", level: 2, id: "list-record-sets-response", text: "Response" },
26998
+ {
26999
+ type: "param-table",
27000
+ title: "Response fields",
27001
+ params: [
27002
+ { name: "data", type: "array", description: "Array of record set objects." },
27003
+ { name: "data[].id", type: "string", description: "Record set UUID." },
27004
+ { name: "data[].name", type: "string", description: "Human-readable record set name." },
27005
+ { name: "data[].layer", type: "string", description: "Value plane layer: capture, structured, resolved, or product." },
27006
+ { name: "data[].source_type", type: "string", description: "Origin type that created this record set." },
27007
+ { name: "data[].source_id", type: "string", description: "UUID of the source entity (e.g. extraction run, job run)." },
27008
+ { name: "data[].record_count", type: "integer", description: "Total number of records in this set." },
27009
+ { name: "data[].field_count", type: "integer", description: "Number of fields defined on this set." },
27010
+ { name: "data[].created_at", type: "string", description: "ISO 8601 creation timestamp." },
27011
+ { name: "data[].updated_at", type: "string", description: "ISO 8601 last update timestamp." }
27012
+ ]
27013
+ },
27014
+ {
27015
+ type: "code",
27016
+ title: "curl",
27017
+ language: "bash",
27018
+ code: `curl -s "https://api.talonic.ai/v1/record-sets?layer=resolved&limit=10" \\
27019
+ -H "Authorization: Bearer tlnc_your_api_key"`
27020
+ },
27021
+ {
27022
+ type: "code",
27023
+ title: "Response",
27024
+ code: `{
27025
+ "data": [
27026
+ {
27027
+ "id": "rs-a1b2c3d4-e5f6-7890-abcd-ef1234567890",
27028
+ "name": "Resolution Run 2024-10-15",
27029
+ "layer": "resolved",
27030
+ "source_type": "resolution",
27031
+ "source_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
27032
+ "record_count": 142,
27033
+ "field_count": 12,
27034
+ "created_at": "2024-10-15T11:30:00.000Z",
27035
+ "updated_at": "2024-10-15T11:35:42.000Z"
27036
+ }
27037
+ ],
27038
+ "cursor": "eyJpZCI6InJzLWExYjJjM2Q0In0="
27039
+ }`
27040
+ },
27041
+ { type: "heading", level: 2, id: "list-record-sets-errors", text: "Errors" },
27042
+ {
27043
+ type: "param-table",
27044
+ title: "Error responses",
27045
+ params: [
27046
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
27047
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
27048
+ ]
27049
+ }
27050
+ ],
27051
+ related: [
27052
+ { label: "Extractions", slug: "list-extractions" },
27053
+ { label: "Jobs", slug: "list-jobs" },
27054
+ { label: "Resolutions", slug: "list-resolutions" }
27055
+ ],
27056
+ faq: [
27057
+ { question: "What are the value plane layers?", answer: "The value plane has four layers: **capture** (raw OCR/parsed text), **structured** (extracted field values from Claude), **resolved** (normalized canonical values from resolution), and **product** (final assembled output ready for delivery). Each layer represents a progressive refinement of the data." },
27058
+ { question: "What is a record set?", answer: "A record set is a table-like collection of records at a specific value plane layer. It provides typed cell storage where each cell holds a value, confidence score, status, and provenance trace. Record sets are the primary read model for hot table operations in the platform." },
27059
+ { question: "How do record sets relate to jobs and resolutions?", answer: "Job runs produce record sets at the **structured** layer (extracted values). Resolution runs consume structured record sets and produce new record sets at the **resolved** layer (normalized values). The `source_type` and `source_id` fields let you trace each record set back to its origin." }
27060
+ ],
27061
+ mentions: ["record set", "value plane", "cell", "provenance", "confidence"]
27062
+ },
27063
+ {
27064
+ slug: "get-record-set",
27065
+ parentSlug: "record-sets",
27066
+ title: "Get Record Set",
27067
+ seoTitle: "Get Record Set Endpoint \u2014 Talonic Docs",
27068
+ description: "Retrieve a single record set by ID with its layer, source reference, record count, and field count. Requires read scope for the workspace.",
27069
+ content: [
27070
+ { type: "paragraph", text: "Retrieve the full metadata of a specific record set by its UUID. The response includes the value plane layer, source entity reference, record and field counts, and timestamps. Use this endpoint to inspect a record set before fetching its fields or records." },
27071
+ { type: "paragraph", text: "The `layer` field tells you where this record set sits in the pipeline progression. The `source_type` and `source_id` fields let you trace the record set back to the extraction, job, or resolution run that created it. The `record_count` and `field_count` give you a quick summary of the dataset size without fetching the actual records." },
27072
+ {
27073
+ type: "endpoint",
27074
+ method: "GET",
27075
+ path: "/v1/record-sets/{id}",
27076
+ summary: "Get a single record set by ID.",
27077
+ description: "Requires read scope.",
27078
+ blocks: [
27079
+ {
27080
+ type: "param-table",
27081
+ title: "Path parameters",
27082
+ params: [
27083
+ { name: "id", type: "uuid", required: true, description: "Record set UUID." }
27084
+ ]
27085
+ }
27086
+ ]
27087
+ },
27088
+ { type: "heading", level: 2, id: "get-record-set-response", text: "Response" },
27089
+ {
27090
+ type: "param-table",
27091
+ title: "Response fields",
27092
+ params: [
27093
+ { name: "id", type: "string", description: "Record set UUID." },
27094
+ { name: "name", type: "string", description: "Human-readable name." },
27095
+ { name: "layer", type: "string", description: "Value plane layer: capture, structured, resolved, or product." },
27096
+ { name: "source_type", type: "string", description: "Origin type that created this record set." },
27097
+ { name: "source_id", type: "string", description: "UUID of the source entity." },
27098
+ { name: "record_count", type: "integer", description: "Total number of records." },
27099
+ { name: "field_count", type: "integer", description: "Number of defined fields." },
27100
+ { name: "created_at", type: "string", description: "ISO 8601 creation timestamp." },
27101
+ { name: "updated_at", type: "string", description: "ISO 8601 last update timestamp." }
27102
+ ]
27103
+ },
27104
+ {
27105
+ type: "code",
27106
+ title: "curl",
27107
+ language: "bash",
27108
+ code: `curl -s https://api.talonic.ai/v1/record-sets/rs-a1b2c3d4-e5f6-7890-abcd-ef1234567890 \\
27109
+ -H "Authorization: Bearer tlnc_your_api_key"`
27110
+ },
27111
+ {
27112
+ type: "code",
27113
+ title: "Response",
27114
+ code: `{
27115
+ "id": "rs-a1b2c3d4-e5f6-7890-abcd-ef1234567890",
27116
+ "name": "Resolution Run 2024-10-15",
27117
+ "layer": "resolved",
27118
+ "source_type": "resolution",
27119
+ "source_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
27120
+ "record_count": 142,
27121
+ "field_count": 12,
27122
+ "created_at": "2024-10-15T11:30:00.000Z",
27123
+ "updated_at": "2024-10-15T11:35:42.000Z"
27124
+ }`
27125
+ },
27126
+ { type: "heading", level: 2, id: "get-record-set-errors", text: "Errors" },
27127
+ {
27128
+ type: "param-table",
27129
+ title: "Error responses",
27130
+ params: [
27131
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
27132
+ { name: "404", type: "not_found", description: "Record set not found or does not belong to your organization." },
27133
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
27134
+ ]
27135
+ }
27136
+ ],
27137
+ related: [
27138
+ { label: "List Record Sets", slug: "list-record-sets" },
27139
+ { label: "List Record Set Fields", slug: "list-record-set-fields" },
27140
+ { label: "List Record Set Records", slug: "list-record-set-records" }
27141
+ ],
27142
+ faq: [
27143
+ { question: "How do I find which job or resolution produced a record set?", answer: 'The `source_type` field indicates the origin (e.g. "extraction", "job", "resolution") and the `source_id` is the UUID of that entity. Use the corresponding GET endpoint to retrieve the full source details.' },
27144
+ { question: "What does record_count represent?", answer: "The `record_count` is the total number of records (rows) in the set. For structured and resolved layers, each record typically corresponds to one document. For the product layer, records correspond to assembled output rows." }
27145
+ ],
27146
+ mentions: ["record set", "value plane", "cell", "provenance", "confidence"]
27147
+ },
27148
+ {
27149
+ slug: "list-record-set-fields",
27150
+ parentSlug: "record-sets",
27151
+ title: "List Record Set Fields",
27152
+ seoTitle: "List Record Set Fields \u2014 Talonic Docs",
27153
+ description: "List the field definitions of a record set including field keys, types, and display metadata. Fields define the columns available in the record set.",
27154
+ content: [
27155
+ { type: "paragraph", text: "Retrieve the field definitions for a record set. Fields define the columns available in the set \u2014 each field has a key (column name), a type, and optional display metadata. The field list determines which cell values can appear in each record and how they should be interpreted by downstream consumers." },
27156
+ { type: "paragraph", text: "Field definitions are derived from the schema used during extraction or the policy used during resolution. They include the field key, value type, and optional attributes like display name and description. Use this endpoint to understand the shape of the data before fetching records, or to build dynamic table UIs that adapt to the field list." },
27157
+ {
27158
+ type: "endpoint",
27159
+ method: "GET",
27160
+ path: "/v1/record-sets/{id}/fields",
27161
+ summary: "List field definitions for a record set.",
27162
+ description: "Requires read scope.",
27163
+ blocks: [
27164
+ {
27165
+ type: "param-table",
27166
+ title: "Path parameters",
27167
+ params: [
27168
+ { name: "id", type: "uuid", required: true, description: "Record set UUID." }
27169
+ ]
27170
+ }
27171
+ ]
27172
+ },
27173
+ { type: "heading", level: 2, id: "list-record-set-fields-response", text: "Response" },
27174
+ {
27175
+ type: "param-table",
27176
+ title: "Response fields",
27177
+ params: [
27178
+ { name: "data", type: "array", description: "Array of field definition objects." },
27179
+ { name: "data[].field_key", type: "string", description: "Field key (column name)." },
27180
+ { name: "data[].type", type: "string", description: "Value type (e.g. string, number, date, boolean)." },
27181
+ { name: "data[].display_name", type: "string | null", description: "Human-readable display name." },
27182
+ { name: "data[].description", type: "string | null", description: "Optional field description." },
27183
+ { name: "data[].ordinal", type: "integer", description: "Display order position." }
27184
+ ]
27185
+ },
27186
+ {
27187
+ type: "code",
27188
+ title: "curl",
27189
+ language: "bash",
27190
+ code: `curl -s https://api.talonic.ai/v1/record-sets/rs-a1b2c3d4-e5f6-7890-abcd-ef1234567890/fields \\
27191
+ -H "Authorization: Bearer tlnc_your_api_key"`
27192
+ },
27193
+ {
27194
+ type: "code",
27195
+ title: "Response",
27196
+ code: `{
27197
+ "data": [
27198
+ { "field_key": "invoice_number", "type": "string", "display_name": "Invoice Number", "description": null, "ordinal": 0 },
27199
+ { "field_key": "vendor_name", "type": "string", "display_name": "Vendor Name", "description": null, "ordinal": 1 },
27200
+ { "field_key": "total_amount", "type": "number", "display_name": "Total Amount", "description": "Invoice total in local currency", "ordinal": 2 },
27201
+ { "field_key": "invoice_date", "type": "date", "display_name": "Invoice Date", "description": null, "ordinal": 3 }
27202
+ ]
27203
+ }`
27204
+ },
27205
+ { type: "heading", level: 2, id: "list-record-set-fields-errors", text: "Errors" },
27206
+ {
27207
+ type: "param-table",
27208
+ title: "Error responses",
27209
+ params: [
27210
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
27211
+ { name: "404", type: "not_found", description: "Record set not found or does not belong to your organization." },
27212
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
27213
+ ]
27214
+ }
27215
+ ],
27216
+ related: [
27217
+ { label: "Get Record Set", slug: "get-record-set" },
27218
+ { label: "List Record Set Records", slug: "list-record-set-records" },
27219
+ { label: "Extractions", slug: "list-extractions" }
27220
+ ],
27221
+ faq: [
27222
+ { question: "Where do the field definitions come from?", answer: "Field definitions are derived from the user schema (for structured layer sets) or the data policy output contract (for resolved layer sets). They represent the declared columns that the extraction or resolution pipeline was configured to produce." },
27223
+ { question: "Can different record sets have different fields?", answer: "Yes. Each record set has its own independent field definitions. A structured record set may have different fields than a resolved record set, even if they originate from the same documents, because the resolution policy may add, rename, or transform fields." }
27224
+ ],
27225
+ mentions: ["record set", "value plane", "cell", "field definition"]
27226
+ },
27227
+ {
27228
+ slug: "list-record-set-records",
27229
+ parentSlug: "record-sets",
27230
+ title: "List Record Set Records",
27231
+ seoTitle: "List Record Set Records \u2014 Talonic Docs",
27232
+ description: "List records in a record set with offset-based pagination. Each record contains typed cells with values, confidence scores, and provenance metadata.",
27233
+ content: [
27234
+ { type: "paragraph", text: "Retrieve the records (rows) in a record set with offset-based pagination. Each record contains a set of typed cells keyed by field name. Every cell carries a value, a confidence score (0-1), a status indicator, and provenance metadata tracing the value back to its source. This endpoint is the primary way to read structured data from the value plane." },
27235
+ { type: "paragraph", text: "Unlike the cursor-based pagination used by most list endpoints, record set records use offset-based pagination with `page` and `limit` parameters. This is intentional: record sets are table-like structures where random access by page number is a common use case for building paginated table UIs. The total record count is available on the parent record set object." },
27236
+ {
27237
+ type: "endpoint",
27238
+ method: "GET",
27239
+ path: "/v1/record-sets/{id}/records",
27240
+ summary: "List records with typed cells, confidence, and provenance.",
27241
+ description: "Requires read scope. Uses offset-based pagination.",
27242
+ blocks: [
27243
+ {
27244
+ type: "param-table",
27245
+ title: "Path parameters",
27246
+ params: [
27247
+ { name: "id", type: "uuid", required: true, description: "Record set UUID." }
27248
+ ]
27249
+ },
27250
+ {
27251
+ type: "param-table",
27252
+ title: "Query parameters",
27253
+ params: [
27254
+ { name: "page", type: "integer", default: "1", description: "Page number (1-indexed)." },
27255
+ { name: "limit", type: "integer", default: "20", description: "Number of records per page (1-100)." }
27256
+ ]
27257
+ }
27258
+ ]
27259
+ },
27260
+ { type: "heading", level: 2, id: "list-record-set-records-response", text: "Response" },
27261
+ {
27262
+ type: "param-table",
27263
+ title: "Response fields",
27264
+ params: [
27265
+ { name: "data", type: "array", description: "Array of record objects." },
27266
+ { name: "data[].id", type: "string", description: "Record UUID." },
27267
+ { name: "data[].document_id", type: "string | null", description: "Associated document UUID, if applicable." },
27268
+ { name: "data[].cells", type: "object", description: "Map of field_key to cell objects." },
27269
+ { name: "data[].cells[key].value", type: "string | null", description: "The cell value." },
27270
+ { name: "data[].cells[key].confidence", type: "number", description: "Confidence score (0-1)." },
27271
+ { name: "data[].cells[key].status", type: "string", description: "Cell status (e.g. extracted, resolved, manual)." },
27272
+ { name: "data[].cells[key].source", type: "string | null", description: "Provenance trace indicating how the value was produced." },
27273
+ { name: "page", type: "integer", description: "Current page number." },
27274
+ { name: "total", type: "integer", description: "Total number of records." }
27275
+ ]
27276
+ },
27277
+ {
27278
+ type: "code",
27279
+ title: "curl",
27280
+ language: "bash",
27281
+ code: `curl -s "https://api.talonic.ai/v1/record-sets/rs-a1b2c3d4-e5f6-7890-abcd-ef1234567890/records?page=1&limit=10" \\
27282
+ -H "Authorization: Bearer tlnc_your_api_key"`
27283
+ },
27284
+ {
27285
+ type: "code",
27286
+ title: "Response",
27287
+ code: `{
27288
+ "data": [
27289
+ {
27290
+ "id": "rec-f1e2d3c4-b5a6-7890-fedc-ba0987654321",
27291
+ "document_id": "doc-a1b2c3d4-e5f6-7890-abcd-ef1234567890",
27292
+ "cells": {
27293
+ "invoice_number": {
27294
+ "value": "INV-2024-0042",
27295
+ "confidence": 0.97,
27296
+ "status": "extracted",
27297
+ "source": "chunk:3/line:12"
27298
+ },
27299
+ "country_code": {
27300
+ "value": "DE",
27301
+ "confidence": 0.95,
27302
+ "status": "resolved",
27303
+ "source": "lookup:country_codes"
27304
+ }
27305
+ }
27306
+ }
27307
+ ],
27308
+ "page": 1,
27309
+ "total": 142
27310
+ }`
27311
+ },
27312
+ { type: "heading", level: 2, id: "list-record-set-records-errors", text: "Errors" },
27313
+ {
27314
+ type: "param-table",
27315
+ title: "Error responses",
27316
+ params: [
27317
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
27318
+ { name: "404", type: "not_found", description: "Record set not found or does not belong to your organization." },
27319
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
27320
+ ]
27321
+ }
27322
+ ],
27323
+ related: [
27324
+ { label: "Get Record Set", slug: "get-record-set" },
27325
+ { label: "List Record Set Fields", slug: "list-record-set-fields" },
27326
+ { label: "Export Record Set", slug: "export-record-set" }
27327
+ ],
27328
+ faq: [
27329
+ { question: "What does the confidence score mean?", answer: "The confidence score (0-1) reflects how certain the system is about the cell value. Values extracted directly from documents with high textual clarity score 0.9+. Values resolved via fuzzy matching or LLM fallback score lower (0.5-0.9). Manually entered values always have confidence 1.0." },
27330
+ { question: "What does the source field contain?", answer: "The `source` field is a provenance trace that indicates how the value was produced. For extracted values, it references the document chunk and line. For resolved values, it references the lookup table or resolution strategy. For computed values, it names the computation formula." },
27331
+ { question: "Why does this endpoint use page/limit instead of cursor?", answer: "Record sets are table-like structures where random access by page number is common in table UIs. Offset pagination supports jumping to arbitrary pages, which cursor pagination does not. For sequential traversal of very large sets, use the export endpoint instead." }
27332
+ ],
27333
+ mentions: ["record set", "value plane", "cell", "provenance", "confidence"]
27334
+ },
27335
+ {
27336
+ slug: "export-record-set",
27337
+ parentSlug: "record-sets",
27338
+ title: "Export Record Set",
27339
+ seoTitle: "Export Record Set Endpoint \u2014 Talonic Docs",
27340
+ description: "Export a complete record set as a downloadable file. Returns all records with their cell values in a single response for bulk consumption.",
27341
+ content: [
27342
+ { type: "paragraph", text: "Export the complete contents of a record set as a downloadable payload. Unlike the paginated records endpoint, this returns all records in a single response, making it suitable for bulk data consumption, ETL pipelines, and offline analysis. The export includes all cell values for every record in the set." },
27343
+ { type: "paragraph", text: "The export endpoint streams the full record set content without pagination. For large record sets, this may result in significant response payloads. Consider using the paginated records endpoint for interactive use cases, and reserve this endpoint for batch export workflows where you need the complete dataset in one call." },
27344
+ {
27345
+ type: "endpoint",
27346
+ method: "GET",
27347
+ path: "/v1/record-sets/{id}/export",
27348
+ summary: "Export a complete record set.",
27349
+ description: "Requires read scope. Returns all records without pagination.",
27350
+ blocks: [
27351
+ {
27352
+ type: "param-table",
27353
+ title: "Path parameters",
27354
+ params: [
27355
+ { name: "id", type: "uuid", required: true, description: "Record set UUID." }
27356
+ ]
27357
+ }
27358
+ ]
27359
+ },
27360
+ { type: "heading", level: 2, id: "export-record-set-response", text: "Response" },
27361
+ {
27362
+ type: "param-table",
27363
+ title: "Response fields",
27364
+ params: [
27365
+ { name: "data", type: "array", description: "Complete array of record objects with cells." },
27366
+ { name: "fields", type: "array", description: "Array of field definitions for column ordering." },
27367
+ { name: "record_count", type: "integer", description: "Total number of exported records." },
27368
+ { name: "exported_at", type: "string", description: "ISO 8601 timestamp of the export." }
27369
+ ]
27370
+ },
27371
+ {
27372
+ type: "code",
27373
+ title: "curl",
27374
+ language: "bash",
27375
+ code: `curl -s https://api.talonic.ai/v1/record-sets/rs-a1b2c3d4-e5f6-7890-abcd-ef1234567890/export \\
27376
+ -H "Authorization: Bearer tlnc_your_api_key"`
27377
+ },
27378
+ {
27379
+ type: "code",
27380
+ title: "Response",
27381
+ code: `{
27382
+ "data": [
27383
+ {
27384
+ "id": "rec-f1e2d3c4-b5a6-7890-fedc-ba0987654321",
27385
+ "document_id": "doc-a1b2c3d4-e5f6-7890-abcd-ef1234567890",
27386
+ "cells": {
27387
+ "invoice_number": { "value": "INV-2024-0042", "confidence": 0.97 },
27388
+ "country_code": { "value": "DE", "confidence": 0.95 }
27389
+ }
27390
+ }
27391
+ ],
27392
+ "fields": [
27393
+ { "field_key": "invoice_number", "type": "string", "ordinal": 0 },
27394
+ { "field_key": "country_code", "type": "string", "ordinal": 1 }
27395
+ ],
27396
+ "record_count": 142,
27397
+ "exported_at": "2024-10-16T09:00:00.000Z"
27398
+ }`
27399
+ },
27400
+ { type: "heading", level: 2, id: "export-record-set-errors", text: "Errors" },
27401
+ {
27402
+ type: "param-table",
27403
+ title: "Error responses",
27404
+ params: [
27405
+ { name: "401", type: "unauthorized", description: "Missing or invalid API key." },
27406
+ { name: "404", type: "not_found", description: "Record set not found or does not belong to your organization." },
27407
+ { name: "429", type: "rate_limited", description: "Too many requests. Retry after the period indicated in the Retry-After header." }
27408
+ ]
27409
+ }
27410
+ ],
27411
+ related: [
27412
+ { label: "List Record Set Records", slug: "list-record-set-records" },
27413
+ { label: "Extractions", slug: "list-extractions" },
27414
+ { label: "Jobs", slug: "list-jobs" },
27415
+ { label: "Resolutions", slug: "list-resolutions" }
27416
+ ],
27417
+ faq: [
27418
+ { question: "When should I use export vs the paginated records endpoint?", answer: "Use the export endpoint for batch workflows, ETL pipelines, and offline analysis where you need the complete dataset in one call. Use the paginated records endpoint for interactive UIs, incremental processing, or when working with very large record sets where memory is a concern." },
27419
+ { question: "Does the export include confidence and provenance?", answer: "The export includes cell values and confidence scores. Full provenance traces are available through the paginated records endpoint. The export is optimized for bulk data consumption rather than detailed audit trails." },
27420
+ { question: "Is there a size limit on exports?", answer: "There is no hard limit, but very large record sets (10,000+ records) may result in large response payloads and slower response times. For extremely large datasets, consider using the delivery pipeline to push data to an S3 bucket or SFTP server instead." }
27421
+ ],
27422
+ mentions: ["record set", "value plane", "cell", "export", "bulk"]
27423
+ }
27424
+ ];
27425
+
27426
+ // src/content/sdk/sections.json
27427
+ var sections_default = [
27428
+ {
27429
+ slug: "sdk-introduction",
27430
+ parentSlug: "sdk-overview",
27431
+ title: "Introduction",
27432
+ seoTitle: "Node SDK Introduction \u2014 Talonic Docs",
27433
+ description: "Official Talonic SDK for Node.js and TypeScript. Extract structured, schema-validated data from any document with a single function call.",
27434
+ content: [
27435
+ { type: "paragraph", text: "The `@talonic/node` SDK is the official Node.js and TypeScript client for the Talonic API. Extract structured, schema-validated data from any document with a single function call." },
27436
+ { type: "paragraph", text: "Zero runtime dependencies. Requires Node.js 18 or newer." },
27437
+ { type: "callout", text: "Looking for the AI agent path? [`@talonic/mcp`](https://github.com/talonicdev/talonic-mcp) wraps this SDK as a Model Context Protocol server. Install it into Claude Desktop, Cursor, Cline, Continue, or Cowork and any MCP-aware agent can extract documents directly." }
27438
+ ],
27439
+ related: [
27440
+ { label: "Installation", slug: "sdk-installation" },
27441
+ { label: "Quick Start", slug: "sdk-quickstart" },
27442
+ { label: "MCP Server", slug: "mcp-introduction" }
27443
+ ],
27444
+ faq: [
27445
+ { question: "What is the Talonic Node SDK?", answer: "The official Node.js and TypeScript client for extracting structured, schema-validated data from documents via the Talonic API." },
27446
+ { question: "What Node.js version is required?", answer: "Node.js 18 or newer. The SDK has zero runtime dependencies." }
27447
+ ],
27448
+ mentions: ["Node.js", "TypeScript", "SDK", "npm", "document extraction"]
27449
+ },
27450
+ {
27451
+ slug: "sdk-installation",
27452
+ parentSlug: "sdk-overview",
27453
+ title: "Installation",
27454
+ seoTitle: "Install Talonic Node SDK \u2014 Talonic Docs",
27455
+ description: "Install the @talonic/node package via npm. Zero runtime dependencies, requires Node.js 18+.",
27456
+ content: [
27457
+ { type: "code", language: "bash", title: "Install via npm", code: "npm install @talonic/node" },
27458
+ { type: "paragraph", text: "Requires Node.js 18 or newer. Zero runtime dependencies." }
27459
+ ],
27460
+ related: [
27461
+ { label: "Authentication", slug: "sdk-authentication" },
27462
+ { label: "Quick Start", slug: "sdk-quickstart" }
27463
+ ],
27464
+ faq: [
27465
+ { question: "How do I install the Talonic Node SDK?", answer: "Run npm install @talonic/node. Requires Node.js 18 or newer. Zero runtime dependencies." }
27466
+ ],
27467
+ mentions: ["npm", "Node.js"]
27468
+ },
27469
+ {
27470
+ slug: "sdk-authentication",
27471
+ parentSlug: "sdk-overview",
27472
+ title: "Authentication",
27473
+ seoTitle: "SDK Authentication \u2014 Talonic Docs",
27474
+ description: "Get a Talonic API key and configure it for the Node SDK. Each workspace is isolated with private documents and schemas.",
27475
+ content: [
27476
+ { type: "paragraph", text: "Every user runs against their own Talonic workspace, so each user needs their own key. Workspaces are isolated; your documents and schemas are private to you." },
27477
+ { type: "list", ordered: true, items: [
27478
+ "Sign up at [https://app.talonic.com](https://app.talonic.com). Free tier: 50 extractions per day, no credit card.",
27479
+ "Settings → API Keys → Create New Key.",
27480
+ "Copy the `tlnc_` value.",
27481
+ "Set it as the `TALONIC_API_KEY` environment variable, or pass it directly to the client constructor."
27482
+ ] },
27483
+ { type: "callout", text: "Keep your API key secret. Do not expose it in client-side code or version control." }
27484
+ ],
27485
+ related: [
27486
+ { label: "Configuration", slug: "sdk-configuration" },
27487
+ { label: "API Authentication", slug: "authentication" }
27488
+ ],
27489
+ faq: [
27490
+ { question: "Where do I get a Talonic API key?", answer: "Sign up at app.talonic.com, go to Settings \u2192 API Keys \u2192 Create New Key. Free tier includes 50 extractions per day." }
27491
+ ],
27492
+ mentions: ["API key", "authentication", "workspace"]
27493
+ },
27494
+ {
27495
+ slug: "sdk-quickstart",
27496
+ parentSlug: "sdk-overview",
27497
+ title: "Quick Start",
27498
+ seoTitle: "Node SDK Quick Start \u2014 Talonic Docs",
27499
+ description: "Extract structured data from a document in five lines of TypeScript using the Talonic Node SDK.",
27500
+ content: [
27501
+ { type: "code", language: "typescript", title: "Extract an invoice", code: 'import { Talonic } from "@talonic/node"\n\nconst talonic = new Talonic({ apiKey: process.env.TALONIC_API_KEY! })\n\nconst result = await talonic.extract({\n file_path: "./invoice.pdf",\n schema: {\n vendor_name: "string",\n invoice_number: "string",\n total_amount: "number",\n due_date: "date",\n },\n})\n\nconsole.log(result.data)\n// { vendor_name: "Acme Corp", invoice_number: "INV-2024-0847", total_amount: 14250, due_date: "2024-03-15" }' }
27502
+ ],
27503
+ related: [
27504
+ { label: "Extract", slug: "sdk-extract" },
27505
+ { label: "Configuration", slug: "sdk-configuration" },
27506
+ { label: "Schemas", slug: "sdk-schemas" }
27507
+ ],
27508
+ faq: [
27509
+ { question: "How do I extract data from a document with the SDK?", answer: "Call talonic.extract() with a file_path and a schema defining the fields you want. Returns structured JSON with confidence scores." }
27510
+ ],
27511
+ mentions: ["extract", "schema", "TypeScript", "quickstart"]
27512
+ },
27513
+ {
27514
+ slug: "sdk-configuration",
27515
+ parentSlug: "sdk-client",
27516
+ title: "Configuration",
27517
+ seoTitle: "SDK Configuration \u2014 Talonic Docs",
27518
+ description: "Configure the Talonic client with API key, base URL, timeout, max retries, and custom fetch function.",
26002
27519
  content: [
26003
27520
  { type: "code", language: "typescript", title: "Client configuration", code: 'const talonic = new Talonic({\n apiKey: process.env.TALONIC_API_KEY!,\n baseUrl: "https://api.talonic.com", // default\n timeout: 60_000, // ms; default 60s\n maxRetries: 3, // 429, 500, 502, 503, 504, network, timeout\n fetch: customFetch, // optional override (e.g. for testing)\n})' },
26004
27521
  { type: "param-table", title: "Constructor options", params: [
@@ -26174,7 +27691,7 @@ talonic --help` }
26174
27691
  ];
26175
27692
 
26176
27693
  // src/content/sdk/index.ts
26177
- var sections50 = sections_default;
27694
+ var sections53 = sections_default;
26178
27695
 
26179
27696
  // src/content/mcp/sections.json
26180
27697
  var sections_default2 = [
@@ -26183,11 +27700,11 @@ var sections_default2 = [
26183
27700
  parentSlug: "mcp-overview",
26184
27701
  title: "Introduction",
26185
27702
  seoTitle: "MCP Server Introduction \u2014 Talonic Docs",
26186
- description: "Official Talonic MCP server for AI agents. Eight tools and two resources for structured document extraction via the Model Context Protocol.",
27703
+ description: "Official Talonic MCP server for AI agents. Nine tools and two resources for structured document extraction via the Model Context Protocol.",
26187
27704
  content: [
26188
27705
  {
26189
27706
  type: "paragraph",
26190
- text: "The `@talonic/mcp` package is the official Talonic MCP server. It gives AI agents eight tools and two resources for extracting structured, schema-validated data from any document via the [Model Context Protocol](https://modelcontextprotocol.io)."
27707
+ text: "The `@talonic/mcp` package is the official Talonic MCP server. It gives AI agents nine tools and two resources for extracting structured, schema-validated data from any document via the [Model Context Protocol](https://modelcontextprotocol.io)."
26191
27708
  },
26192
27709
  {
26193
27710
  type: "paragraph",
@@ -26195,7 +27712,7 @@ var sections_default2 = [
26195
27712
  },
26196
27713
  {
26197
27714
  type: "paragraph",
26198
- text: "With this MCP server installed, the agent has a `talonic_extract` tool that returns schema-validated JSON with per-field confidence scores, a detected document type, and stable IDs for follow-up calls. The other seven tools cover the rest of the workflow: searching the workspace, filtering by extracted field values, fetching document metadata, getting OCR markdown, listing saved schemas, saving new ones, and reading the workspace credit balance for budget-aware behaviour."
27715
+ text: "With this MCP server installed, the agent has a `talonic_extract` tool that returns schema-validated JSON with per-field confidence scores, a detected document type, and stable IDs for follow-up calls. The other eight tools cover the rest of the workflow: searching the workspace, filtering by extracted field values, fetching document metadata, getting OCR markdown, listing saved schemas, saving new ones, and reading the workspace credit balance for budget-aware behaviour."
26199
27716
  },
26200
27717
  {
26201
27718
  type: "callout",
@@ -26203,17 +27720,31 @@ var sections_default2 = [
26203
27720
  }
26204
27721
  ],
26205
27722
  related: [
26206
- { label: "Installation", slug: "mcp-installation" },
26207
- { label: "Tools", slug: "mcp-talonic-extract" },
26208
- { label: "Node SDK", slug: "sdk-introduction" }
27723
+ {
27724
+ label: "Installation",
27725
+ slug: "mcp-installation"
27726
+ },
27727
+ {
27728
+ label: "Tools",
27729
+ slug: "mcp-talonic-extract"
27730
+ },
27731
+ {
27732
+ label: "Node SDK",
27733
+ slug: "sdk-introduction"
27734
+ }
26209
27735
  ],
26210
27736
  faq: [
26211
27737
  {
26212
27738
  question: "What is the Talonic MCP server?",
26213
- answer: "An official Model Context Protocol server that gives AI agents eight tools for document extraction, search, filtering, schema management, and credit-balance lookup via the Talonic API."
27739
+ answer: "An official Model Context Protocol server that gives AI agents nine tools for document extraction, search, filtering, schema management, and credit-balance lookup via the Talonic API."
26214
27740
  }
26215
27741
  ],
26216
- mentions: ["MCP", "Model Context Protocol", "AI agents", "document extraction"]
27742
+ mentions: [
27743
+ "MCP",
27744
+ "Model Context Protocol",
27745
+ "AI agents",
27746
+ "document extraction"
27747
+ ]
26217
27748
  },
26218
27749
  {
26219
27750
  slug: "mcp-installation",
@@ -26226,6 +27757,10 @@ var sections_default2 = [
26226
27757
  type: "paragraph",
26227
27758
  text: "Three install paths. Pick the one that matches your client."
26228
27759
  },
27760
+ {
27761
+ type: "callout",
27762
+ text: "The hosted MCP endpoint accepts both `https://mcp.talonic.com/mcp` and the bare origin `https://mcp.talonic.com` as connector URLs \u2014 POST/DELETE/SSE traffic at either path routes through the same Streamable HTTP transport. Plain `GET /` still returns the discovery JSON. Either URL works in any of the install paths below."
27763
+ },
26229
27764
  {
26230
27765
  type: "heading",
26231
27766
  level: 3,
@@ -26252,7 +27787,11 @@ var sections_default2 = [
26252
27787
  type: "callout",
26253
27788
  text: "The connector does not need an API key in its config. Token rotation is handled by the OAuth flow."
26254
27789
  },
26255
- { type: "heading", level: 3, text: "Local stdio (npx)" },
27790
+ {
27791
+ type: "heading",
27792
+ level: 3,
27793
+ text: "Local stdio (npx)"
27794
+ },
26256
27795
  {
26257
27796
  type: "paragraph",
26258
27797
  text: "Recommended for IDE-style clients (Claude Desktop, Cursor, Cline, Continue, Cowork). Runs on your machine via stdio; requires Node.js 18 or later. Uses a `TALONIC_API_KEY` from `app.talonic.com`."
@@ -26288,9 +27827,18 @@ var sections_default2 = [
26288
27827
  }
26289
27828
  ],
26290
27829
  related: [
26291
- { label: "Claude Desktop", slug: "mcp-claude-desktop" },
26292
- { label: "Cursor", slug: "mcp-cursor" },
26293
- { label: "Authentication", slug: "mcp-authentication" }
27830
+ {
27831
+ label: "Claude Desktop",
27832
+ slug: "mcp-claude-desktop"
27833
+ },
27834
+ {
27835
+ label: "Cursor",
27836
+ slug: "mcp-cursor"
27837
+ },
27838
+ {
27839
+ label: "Authentication",
27840
+ slug: "mcp-authentication"
27841
+ }
26294
27842
  ],
26295
27843
  faq: [
26296
27844
  {
@@ -26324,7 +27872,11 @@ var sections_default2 = [
26324
27872
  type: "paragraph",
26325
27873
  text: "Each user runs against their own isolated Talonic workspace. Your documents and schemas are private to you. There are three authentication paths depending on how you connect."
26326
27874
  },
26327
- { type: "heading", level: 3, text: "OAuth 2.1 (Claude.ai connector, recommended)" },
27875
+ {
27876
+ type: "heading",
27877
+ level: 3,
27878
+ text: "OAuth 2.1 (Claude.ai connector, recommended)"
27879
+ },
26328
27880
  {
26329
27881
  type: "paragraph",
26330
27882
  text: "When you connect via Claude.ai's custom-connector flow, the connector launches an OAuth 2.1 sign-in to `app.talonic.com` (PKCE + Dynamic Client Registration). After consent, the connector exchanges a short-lived bearer token that is rotated automatically; no API key sits in the connector config. The Talonic MCP server validates the token on each request against the API, so revocation propagates immediately."
@@ -26355,7 +27907,11 @@ var sections_default2 = [
26355
27907
  "`?apiKey=tlnc_...` query parameter (only for clients that cannot set custom headers)."
26356
27908
  ]
26357
27909
  },
26358
- { type: "heading", level: 3, text: "Environment variable (local stdio)" },
27910
+ {
27911
+ type: "heading",
27912
+ level: 3,
27913
+ text: "Environment variable (local stdio)"
27914
+ },
26359
27915
  {
26360
27916
  type: "paragraph",
26361
27917
  text: "Set `TALONIC_API_KEY` in the `env` block of your MCP client config. The local server reads it at startup."
@@ -26371,8 +27927,14 @@ var sections_default2 = [
26371
27927
  }
26372
27928
  ],
26373
27929
  related: [
26374
- { label: "Installation", slug: "mcp-installation" },
26375
- { label: "API Authentication", slug: "authentication" }
27930
+ {
27931
+ label: "Installation",
27932
+ slug: "mcp-installation"
27933
+ },
27934
+ {
27935
+ label: "API Authentication",
27936
+ slug: "authentication"
27937
+ }
26376
27938
  ],
26377
27939
  faq: [
26378
27940
  {
@@ -26401,14 +27963,22 @@ var sections_default2 = [
26401
27963
  type: "paragraph",
26402
27964
  text: "Edit `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\\Claude\\claude_desktop_config.json` (Windows)."
26403
27965
  },
26404
- { type: "heading", level: 3, text: "Hosted (recommended)" },
27966
+ {
27967
+ type: "heading",
27968
+ level: 3,
27969
+ text: "Hosted (recommended)"
27970
+ },
26405
27971
  {
26406
27972
  type: "code",
26407
27973
  language: "json",
26408
27974
  title: "claude_desktop_config.json",
26409
27975
  code: '{\n "mcpServers": {\n "talonic": {\n "url": "https://mcp.talonic.com/mcp",\n "headers": {\n "Authorization": "Bearer tlnc_your_key_here"\n }\n }\n }\n}'
26410
27976
  },
26411
- { type: "heading", level: 3, text: "Local (npx)" },
27977
+ {
27978
+ type: "heading",
27979
+ level: 3,
27980
+ text: "Local (npx)"
27981
+ },
26412
27982
  {
26413
27983
  type: "code",
26414
27984
  language: "json",
@@ -26421,8 +27991,14 @@ var sections_default2 = [
26421
27991
  }
26422
27992
  ],
26423
27993
  related: [
26424
- { label: "Cursor", slug: "mcp-cursor" },
26425
- { label: "Tool Reference", slug: "mcp-talonic-extract" }
27994
+ {
27995
+ label: "Cursor",
27996
+ slug: "mcp-cursor"
27997
+ },
27998
+ {
27999
+ label: "Tool Reference",
28000
+ slug: "mcp-talonic-extract"
28001
+ }
26426
28002
  ],
26427
28003
  faq: [
26428
28004
  {
@@ -26430,7 +28006,11 @@ var sections_default2 = [
26430
28006
  answer: "Edit claude_desktop_config.json, add the Talonic MCP server config (hosted URL or local npx) with your API key, and fully restart Claude Desktop (Cmd+Q on macOS)."
26431
28007
  }
26432
28008
  ],
26433
- mentions: ["Claude Desktop", "macOS", "Windows"]
28009
+ mentions: [
28010
+ "Claude Desktop",
28011
+ "macOS",
28012
+ "Windows"
28013
+ ]
26434
28014
  },
26435
28015
  {
26436
28016
  slug: "mcp-cursor",
@@ -26443,14 +28023,22 @@ var sections_default2 = [
26443
28023
  type: "paragraph",
26444
28024
  text: "Edit `~/.cursor/mcp.json` (or open Cursor settings \u2192 MCP \u2192 edit config):"
26445
28025
  },
26446
- { type: "heading", level: 3, text: "Hosted (recommended)" },
28026
+ {
28027
+ type: "heading",
28028
+ level: 3,
28029
+ text: "Hosted (recommended)"
28030
+ },
26447
28031
  {
26448
28032
  type: "code",
26449
28033
  language: "json",
26450
28034
  title: "~/.cursor/mcp.json",
26451
28035
  code: '{\n "mcpServers": {\n "talonic": {\n "url": "https://mcp.talonic.com/mcp",\n "headers": {\n "Authorization": "Bearer tlnc_your_key_here"\n }\n }\n }\n}'
26452
28036
  },
26453
- { type: "heading", level: 3, text: "Local (npx)" },
28037
+ {
28038
+ type: "heading",
28039
+ level: 3,
28040
+ text: "Local (npx)"
28041
+ },
26454
28042
  {
26455
28043
  type: "code",
26456
28044
  language: "json",
@@ -26459,8 +28047,14 @@ var sections_default2 = [
26459
28047
  }
26460
28048
  ],
26461
28049
  related: [
26462
- { label: "Claude Desktop", slug: "mcp-claude-desktop" },
26463
- { label: "Cline", slug: "mcp-cline" }
28050
+ {
28051
+ label: "Claude Desktop",
28052
+ slug: "mcp-claude-desktop"
28053
+ },
28054
+ {
28055
+ label: "Cline",
28056
+ slug: "mcp-cline"
28057
+ }
26464
28058
  ],
26465
28059
  faq: [
26466
28060
  {
@@ -26468,7 +28062,10 @@ var sections_default2 = [
26468
28062
  answer: "Edit ~/.cursor/mcp.json and add the Talonic MCP server config with your API key. Hosted or local."
26469
28063
  }
26470
28064
  ],
26471
- mentions: ["Cursor", "IDE"]
28065
+ mentions: [
28066
+ "Cursor",
28067
+ "IDE"
28068
+ ]
26472
28069
  },
26473
28070
  {
26474
28071
  slug: "mcp-cline",
@@ -26481,23 +28078,40 @@ var sections_default2 = [
26481
28078
  type: "paragraph",
26482
28079
  text: "Open the Cline panel \u2192 settings (gear icon) \u2192 MCP Servers \u2192 Edit."
26483
28080
  },
26484
- { type: "heading", level: 3, text: "Hosted (recommended)" },
28081
+ {
28082
+ type: "heading",
28083
+ level: 3,
28084
+ text: "Hosted (recommended)"
28085
+ },
26485
28086
  {
26486
28087
  type: "code",
26487
28088
  language: "json",
26488
28089
  code: '{\n "mcpServers": {\n "talonic": {\n "url": "https://mcp.talonic.com/mcp",\n "headers": {\n "Authorization": "Bearer tlnc_your_key_here"\n }\n }\n }\n}'
26489
28090
  },
26490
- { type: "heading", level: 3, text: "Local (npx)" },
28091
+ {
28092
+ type: "heading",
28093
+ level: 3,
28094
+ text: "Local (npx)"
28095
+ },
26491
28096
  {
26492
28097
  type: "code",
26493
28098
  language: "json",
26494
28099
  code: '{\n "mcpServers": {\n "talonic": {\n "command": "npx",\n "args": ["-y", "@talonic/mcp@latest"],\n "env": {\n "TALONIC_API_KEY": "tlnc_your_key_here"\n }\n }\n }\n}'
26495
28100
  },
26496
- { type: "paragraph", text: "Save and restart the panel." }
28101
+ {
28102
+ type: "paragraph",
28103
+ text: "Save and restart the panel."
28104
+ }
26497
28105
  ],
26498
28106
  related: [
26499
- { label: "Continue", slug: "mcp-continue" },
26500
- { label: "Cursor", slug: "mcp-cursor" }
28107
+ {
28108
+ label: "Continue",
28109
+ slug: "mcp-continue"
28110
+ },
28111
+ {
28112
+ label: "Cursor",
28113
+ slug: "mcp-cursor"
28114
+ }
26501
28115
  ],
26502
28116
  faq: [
26503
28117
  {
@@ -26505,7 +28119,10 @@ var sections_default2 = [
26505
28119
  answer: "Open the Cline panel settings, go to MCP Servers, click Edit, and add the Talonic config entry."
26506
28120
  }
26507
28121
  ],
26508
- mentions: ["Cline", "VS Code"]
28122
+ mentions: [
28123
+ "Cline",
28124
+ "VS Code"
28125
+ ]
26509
28126
  },
26510
28127
  {
26511
28128
  slug: "mcp-continue",
@@ -26518,14 +28135,22 @@ var sections_default2 = [
26518
28135
  type: "paragraph",
26519
28136
  text: "Edit `~/.continue/config.json`. Add to the `mcpServers` array:"
26520
28137
  },
26521
- { type: "heading", level: 3, text: "Hosted (recommended)" },
28138
+ {
28139
+ type: "heading",
28140
+ level: 3,
28141
+ text: "Hosted (recommended)"
28142
+ },
26522
28143
  {
26523
28144
  type: "code",
26524
28145
  language: "json",
26525
28146
  title: "~/.continue/config.json",
26526
28147
  code: '{\n "name": "talonic",\n "url": "https://mcp.talonic.com/mcp",\n "headers": {\n "Authorization": "Bearer tlnc_your_key_here"\n }\n}'
26527
28148
  },
26528
- { type: "heading", level: 3, text: "Local (npx)" },
28149
+ {
28150
+ type: "heading",
28151
+ level: 3,
28152
+ text: "Local (npx)"
28153
+ },
26529
28154
  {
26530
28155
  type: "code",
26531
28156
  language: "json",
@@ -26534,8 +28159,14 @@ var sections_default2 = [
26534
28159
  }
26535
28160
  ],
26536
28161
  related: [
26537
- { label: "Cowork", slug: "mcp-cowork" },
26538
- { label: "Cline", slug: "mcp-cline" }
28162
+ {
28163
+ label: "Cowork",
28164
+ slug: "mcp-cowork"
28165
+ },
28166
+ {
28167
+ label: "Cline",
28168
+ slug: "mcp-cline"
28169
+ }
26539
28170
  ],
26540
28171
  faq: [
26541
28172
  {
@@ -26543,7 +28174,11 @@ var sections_default2 = [
26543
28174
  answer: "Edit ~/.continue/config.json and add a Talonic entry to the mcpServers array with your API key."
26544
28175
  }
26545
28176
  ],
26546
- mentions: ["Continue", "VS Code", "JetBrains"]
28177
+ mentions: [
28178
+ "Continue",
28179
+ "VS Code",
28180
+ "JetBrains"
28181
+ ]
26547
28182
  },
26548
28183
  {
26549
28184
  slug: "mcp-cowork",
@@ -26552,14 +28187,25 @@ var sections_default2 = [
26552
28187
  seoTitle: "MCP Setup for Cowork \u2014 Talonic Docs",
26553
28188
  description: "Configure the Talonic MCP server in Cowork. Hosted and local configs.",
26554
28189
  content: [
26555
- { type: "paragraph", text: "Open Cowork settings \u2192 MCP Servers \u2192 Add." },
26556
- { type: "heading", level: 3, text: "Hosted (recommended)" },
28190
+ {
28191
+ type: "paragraph",
28192
+ text: "Open Cowork settings \u2192 MCP Servers \u2192 Add."
28193
+ },
28194
+ {
28195
+ type: "heading",
28196
+ level: 3,
28197
+ text: "Hosted (recommended)"
28198
+ },
26557
28199
  {
26558
28200
  type: "code",
26559
28201
  language: "json",
26560
28202
  code: '{\n "mcpServers": {\n "talonic": {\n "url": "https://mcp.talonic.com/mcp",\n "headers": {\n "Authorization": "Bearer tlnc_your_key_here"\n }\n }\n }\n}'
26561
28203
  },
26562
- { type: "heading", level: 3, text: "Local (npx)" },
28204
+ {
28205
+ type: "heading",
28206
+ level: 3,
28207
+ text: "Local (npx)"
28208
+ },
26563
28209
  {
26564
28210
  type: "code",
26565
28211
  language: "json",
@@ -26567,8 +28213,14 @@ var sections_default2 = [
26567
28213
  }
26568
28214
  ],
26569
28215
  related: [
26570
- { label: "Claude Desktop", slug: "mcp-claude-desktop" },
26571
- { label: "Tool Reference", slug: "mcp-talonic-extract" }
28216
+ {
28217
+ label: "Claude Desktop",
28218
+ slug: "mcp-claude-desktop"
28219
+ },
28220
+ {
28221
+ label: "Tool Reference",
28222
+ slug: "mcp-talonic-extract"
28223
+ }
26572
28224
  ],
26573
28225
  faq: [
26574
28226
  {
@@ -26576,7 +28228,9 @@ var sections_default2 = [
26576
28228
  answer: "Open Cowork settings, go to MCP Servers, click Add, and paste the standard Talonic config with your API key."
26577
28229
  }
26578
28230
  ],
26579
- mentions: ["Cowork"]
28231
+ mentions: [
28232
+ "Cowork"
28233
+ ]
26580
28234
  },
26581
28235
  {
26582
28236
  slug: "mcp-talonic-extract",
@@ -26589,7 +28243,11 @@ var sections_default2 = [
26589
28243
  type: "paragraph",
26590
28244
  text: "Extract structured, schema-validated data from a document. Returns clean JSON matching the schema, with per-field confidence scores and document metadata."
26591
28245
  },
26592
- { type: "heading", level: 3, text: "When to use" },
28246
+ {
28247
+ type: "heading",
28248
+ level: 3,
28249
+ text: "When to use"
28250
+ },
26593
28251
  {
26594
28252
  type: "list",
26595
28253
  ordered: false,
@@ -26600,7 +28258,11 @@ var sections_default2 = [
26600
28258
  "You want validated JSON instead of trying to OCR + parse with raw LLM calls."
26601
28259
  ]
26602
28260
  },
26603
- { type: "heading", level: 3, text: "When NOT to use" },
28261
+ {
28262
+ type: "heading",
28263
+ level: 3,
28264
+ text: "When NOT to use"
28265
+ },
26604
28266
  {
26605
28267
  type: "list",
26606
28268
  ordered: false,
@@ -26609,7 +28271,11 @@ var sections_default2 = [
26609
28271
  "The user wants to find documents matching a query \u2192 use `talonic_search` or `talonic_filter`."
26610
28272
  ]
26611
28273
  },
26612
- { type: "heading", level: 3, text: "Input schema" },
28274
+ {
28275
+ type: "heading",
28276
+ level: 3,
28277
+ text: "Input schema"
28278
+ },
26613
28279
  {
26614
28280
  type: "param-table",
26615
28281
  title: "File source (provide exactly one)",
@@ -26678,14 +28344,22 @@ var sections_default2 = [
26678
28344
  }
26679
28345
  ]
26680
28346
  },
26681
- { type: "heading", level: 3, text: "Response shape" },
28347
+ {
28348
+ type: "heading",
28349
+ level: 3,
28350
+ text: "Response shape"
28351
+ },
26682
28352
  {
26683
28353
  type: "code",
26684
28354
  language: "json",
26685
28355
  title: "Example response",
26686
28356
  code: '{\n "data": {\n "vendor_name": "Acme Corp",\n "invoice_number": "INV-2024-0847",\n "total_amount": 14250.00,\n "due_date": "2024-03-15"\n },\n "confidence": {\n "vendor_name": 0.97,\n "invoice_number": 0.99,\n "total_amount": 0.94,\n "due_date": 0.91\n },\n "document": {\n "id": "d_abc123",\n "filename": "invoice.pdf",\n "documentType": "invoice",\n "language": "en",\n "pageCount": 2\n },\n "extraction": {\n "id": "ext_xyz789",\n "schemaId": "sch_def456"\n },\n "cost": {\n "costCredits": 1,\n "costEur": 0.05,\n "balanceCredits": 999,\n "cellsResolvedRegistry": 3,\n "cellsResolvedAi": 1\n }\n}'
26687
28357
  },
26688
- { type: "heading", level: 3, text: "Confidence scores and human escalation" },
28358
+ {
28359
+ type: "heading",
28360
+ level: 3,
28361
+ text: "Confidence scores and human escalation"
28362
+ },
26689
28363
  {
26690
28364
  type: "paragraph",
26691
28365
  text: "Each field in the `confidence` object is a float from 0.0 to 1.0. Values above **0.90** are high confidence. Values between **0.70\u20130.90** should be treated with caution \u2014 flag them to the user for verification. Values below **0.70** indicate low confidence \u2014 the agent should ask the user to verify the value or re-extract with more specific instructions."
@@ -26695,17 +28369,29 @@ var sections_default2 = [
26695
28369
  variant: "warning",
26696
28370
  text: "Always provide either a `schema` or `schema_id`. The MCP layer rejects schema-less calls with a validation error before they reach the API."
26697
28371
  },
26698
- { type: "heading", level: 3, text: "Cost" },
28372
+ {
28373
+ type: "heading",
28374
+ level: 3,
28375
+ text: "Cost"
28376
+ },
26699
28377
  {
26700
28378
  type: "paragraph",
26701
28379
  text: "Each `talonic_extract` call with a new file consumes **one extraction credit**. Re-extracting the same `document_id` with a different schema also consumes one credit. The per-call cost is surfaced on the response under `cost` (`costCredits`, `costEur`, `balanceCredits`, plus a breakdown of how many cells were resolved by the registry vs the AI), parsed from the `X-Talonic-Cost-*` and `X-Talonic-Balance-*` response headers. To avoid unnecessary cost, check if a document has already been extracted before calling again \u2014 use `talonic_search` or `talonic_filter` to find existing results, and `talonic_get_balance` to check your runway before kicking off a large batch."
26702
28380
  },
26703
- { type: "heading", level: 3, text: "Errors" },
28381
+ {
28382
+ type: "heading",
28383
+ level: 3,
28384
+ text: "Errors"
28385
+ },
26704
28386
  {
26705
28387
  type: "param-table",
26706
28388
  title: "Common errors",
26707
28389
  params: [
26708
- { name: "unauthorized", type: "401", description: "Invalid or missing API key." },
28390
+ {
28391
+ name: "unauthorized",
28392
+ type: "401",
28393
+ description: "Invalid or missing API key."
28394
+ },
26709
28395
  {
26710
28396
  name: "validation_error",
26711
28397
  type: "422",
@@ -26730,9 +28416,18 @@ var sections_default2 = [
26730
28416
  }
26731
28417
  ],
26732
28418
  related: [
26733
- { label: "SDK Extract", slug: "sdk-extract" },
26734
- { label: "POST /v1/extract", slug: "post-extract" },
26735
- { label: "Cost & Rate Limits", slug: "mcp-cost-and-limits" }
28419
+ {
28420
+ label: "SDK Extract",
28421
+ slug: "sdk-extract"
28422
+ },
28423
+ {
28424
+ label: "POST /v1/extract",
28425
+ slug: "post-extract"
28426
+ },
28427
+ {
28428
+ label: "Cost & Rate Limits",
28429
+ slug: "mcp-cost-and-limits"
28430
+ }
26736
28431
  ],
26737
28432
  faq: [
26738
28433
  {
@@ -26744,7 +28439,13 @@ var sections_default2 = [
26744
28439
  answer: "Scores above 0.90 are reliable. Between 0.70\u20130.90, flag to the user for verification. Below 0.70, ask the user to verify or re-extract with more specific instructions."
26745
28440
  }
26746
28441
  ],
26747
- mentions: ["talonic_extract", "file_data", "schema", "confidence", "extraction"]
28442
+ mentions: [
28443
+ "talonic_extract",
28444
+ "file_data",
28445
+ "schema",
28446
+ "confidence",
28447
+ "extraction"
28448
+ ]
26748
28449
  },
26749
28450
  {
26750
28451
  slug: "mcp-talonic-search",
@@ -26757,7 +28458,11 @@ var sections_default2 = [
26757
28458
  type: "paragraph",
26758
28459
  text: "Omnisearch across documents, extracted field values, field names, sources, and schemas in the workspace. Returns ranked results across all entity types in one call."
26759
28460
  },
26760
- { type: "heading", level: 3, text: "When to use" },
28461
+ {
28462
+ type: "heading",
28463
+ level: 3,
28464
+ text: "When to use"
28465
+ },
26761
28466
  {
26762
28467
  type: "list",
26763
28468
  ordered: false,
@@ -26769,7 +28474,11 @@ var sections_default2 = [
26769
28474
  "You need to discover canonical field names before using `talonic_filter`."
26770
28475
  ]
26771
28476
  },
26772
- { type: "heading", level: 3, text: "When NOT to use" },
28477
+ {
28478
+ type: "heading",
28479
+ level: 3,
28480
+ text: "When NOT to use"
28481
+ },
26773
28482
  {
26774
28483
  type: "list",
26775
28484
  ordered: false,
@@ -26779,7 +28488,11 @@ var sections_default2 = [
26779
28488
  "The user wants to extract data from a new document \u2192 use `talonic_extract`."
26780
28489
  ]
26781
28490
  },
26782
- { type: "heading", level: 3, text: "Input schema" },
28491
+ {
28492
+ type: "heading",
28493
+ level: 3,
28494
+ text: "Input schema"
28495
+ },
26783
28496
  {
26784
28497
  type: "param-table",
26785
28498
  title: "Parameters",
@@ -26797,28 +28510,48 @@ var sections_default2 = [
26797
28510
  }
26798
28511
  ]
26799
28512
  },
26800
- { type: "heading", level: 3, text: "Response shape" },
28513
+ {
28514
+ type: "heading",
28515
+ level: 3,
28516
+ text: "Response shape"
28517
+ },
26801
28518
  {
26802
28519
  type: "code",
26803
28520
  language: "json",
26804
28521
  title: "Example response",
26805
- code: '{\n "documents": [\n {\n "id": "d_abc123",\n "filename": "acme-invoice-q4.pdf",\n "documentType": "invoice",\n "score": 0.92\n }\n ],\n "fieldMatches": [\n {\n "resolvedFieldId": "f_ghi789",\n "displayName": "Vendor Name",\n "matchedValue": "Acme Corp",\n "documentCount": 3,\n "filterable": true\n }\n ],\n "sources": [],\n "schemas": [\n {\n "id": "sch_def456",\n "name": "Standard Invoice"\n }\n ],\n "fields": [\n {\n "id": "f_ghi789",\n "canonicalName": "vendor.name",\n "displayName": "Vendor Name",\n "documentCount": 12,\n "filterable": true\n }\n ]\n}'
28522
+ code: '{\n "documents": [\n {\n "id": "d_abc123",\n "filename": "acme-invoice-q4.pdf",\n "documentType": "invoice",\n "score": 0.92\n }\n ],\n "fieldMatches": [\n {\n "resolvedFieldId": "f_ghi789",\n "displayName": "Vendor Name",\n "matchedValue": "Acme Corp",\n "documentCount": 3,\n "filterable": true,\n "dataType": "string"\n }\n ],\n "sources": [],\n "schemas": [\n {\n "id": "sch_def456",\n "name": "Standard Invoice"\n }\n ],\n "fields": [\n {\n "id": "f_total",\n "canonicalName": "total_amount",\n "displayName": "Total Amount",\n "documentCount": 14,\n "filterable": true,\n "dataType": "number"\n }\n ]\n}'
26806
28523
  },
26807
28524
  {
26808
28525
  type: "callout",
26809
28526
  text: "Only `fields[]` entries with `filterable: true` can be used with `talonic_filter`. These have extracted data in the workspace. Fields with `filterable: false` exist in a schema definition but have no extracted data yet \u2014 they become filterable after documents are processed against their schema."
26810
28527
  },
26811
- { type: "heading", level: 3, text: "Cost" },
28528
+ {
28529
+ type: "callout",
28530
+ text: 'Every `fieldMatches[]` and `fields[]` entry carries a `dataType` (`"string"`, `"number"`, `"array"`, etc.). Use it to pick the right `talonic_filter` operator on the first call \u2014 numeric operators (`gt`, `gte`, `lt`, `lte`, `between`) only resolve correctly when `dataType === "number"`. See the *Schema typing* section under `talonic_filter` for the full preventive / reactive pattern.'
28531
+ },
28532
+ {
28533
+ type: "heading",
28534
+ level: 3,
28535
+ text: "Cost"
28536
+ },
26812
28537
  {
26813
28538
  type: "paragraph",
26814
28539
  text: "Search calls are **free** \u2014 they do not consume extraction credits. Use search liberally to explore before extracting."
26815
28540
  },
26816
- { type: "heading", level: 3, text: "Errors" },
28541
+ {
28542
+ type: "heading",
28543
+ level: 3,
28544
+ text: "Errors"
28545
+ },
26817
28546
  {
26818
28547
  type: "param-table",
26819
28548
  title: "Common errors",
26820
28549
  params: [
26821
- { name: "unauthorized", type: "401", description: "Invalid or missing API key." },
28550
+ {
28551
+ name: "unauthorized",
28552
+ type: "401",
28553
+ description: "Invalid or missing API key."
28554
+ },
26822
28555
  {
26823
28556
  name: "validation_error",
26824
28557
  type: "422",
@@ -26828,8 +28561,14 @@ var sections_default2 = [
26828
28561
  }
26829
28562
  ],
26830
28563
  related: [
26831
- { label: "talonic_filter", slug: "mcp-talonic-filter" },
26832
- { label: "Omnisearch", slug: "omnisearch" }
28564
+ {
28565
+ label: "talonic_filter",
28566
+ slug: "mcp-talonic-filter"
28567
+ },
28568
+ {
28569
+ label: "Omnisearch",
28570
+ slug: "omnisearch"
28571
+ }
26833
28572
  ],
26834
28573
  faq: [
26835
28574
  {
@@ -26839,22 +28578,38 @@ var sections_default2 = [
26839
28578
  {
26840
28579
  question: "What entities does talonic_search return?",
26841
28580
  answer: "Documents, field matches (with canonical names and values), sources, schemas, and field definitions \u2014 all ranked by relevance score."
28581
+ },
28582
+ {
28583
+ question: "How do I avoid a `talonic_filter` numeric query returning zero matches?",
28584
+ answer: 'Check `dataType` on the field entry in the search response before constructing the filter. Numeric operators (`gt`, `gte`, `lt`, `lte`, `between`) only resolve against fields where `dataType === "number"`. If the type is `string` (common for monetary or formatted-number fields), suggest the user change the field\'s data type in the schema before filtering.'
26842
28585
  }
26843
28586
  ],
26844
- mentions: ["talonic_search", "omnisearch", "canonicalName", "field discovery"]
28587
+ mentions: [
28588
+ "canonicalName",
28589
+ "dataType",
28590
+ "field discovery",
28591
+ "omnisearch",
28592
+ "preventive guard",
28593
+ "schema typing",
28594
+ "talonic_search"
28595
+ ]
26845
28596
  },
26846
28597
  {
26847
28598
  slug: "mcp-talonic-filter",
26848
28599
  parentSlug: "mcp-tools",
26849
28600
  title: "talonic_filter",
26850
28601
  seoTitle: "talonic_filter MCP Tool \u2014 Talonic Docs",
26851
- description: "Filter documents by extracted field values. Full operator reference, input/output schema, and composable condition examples.",
28602
+ description: "Filter documents by extracted field values. Full operator reference, input/output schema, composable condition examples, and the preventive + reactive pattern for guarding numeric operators against string-typed fields.",
26852
28603
  content: [
26853
28604
  {
26854
28605
  type: "paragraph",
26855
28606
  text: "Filter documents by extracted field values using composable conditions. Conditions accept canonical field names (e.g. `vendor.name`, `policy.0_coverage_type`) or field UUIDs. The Talonic API resolves names to IDs server-side."
26856
28607
  },
26857
- { type: "heading", level: 3, text: "When to use" },
28608
+ {
28609
+ type: "heading",
28610
+ level: 3,
28611
+ text: "When to use"
28612
+ },
26858
28613
  {
26859
28614
  type: "list",
26860
28615
  ordered: false,
@@ -26864,7 +28619,11 @@ var sections_default2 = [
26864
28619
  "You need a sortable, paginated list filtered by field conditions."
26865
28620
  ]
26866
28621
  },
26867
- { type: "heading", level: 3, text: "When NOT to use" },
28622
+ {
28623
+ type: "heading",
28624
+ level: 3,
28625
+ text: "When NOT to use"
28626
+ },
26868
28627
  {
26869
28628
  type: "list",
26870
28629
  ordered: false,
@@ -26874,7 +28633,11 @@ var sections_default2 = [
26874
28633
  "The user wants to extract from a new document \u2192 use `talonic_extract`."
26875
28634
  ]
26876
28635
  },
26877
- { type: "heading", level: 3, text: "Input schema" },
28636
+ {
28637
+ type: "heading",
28638
+ level: 3,
28639
+ text: "Input schema"
28640
+ },
26878
28641
  {
26879
28642
  type: "param-table",
26880
28643
  title: "Top-level parameters",
@@ -26900,7 +28663,11 @@ var sections_default2 = [
26900
28663
  type: "number",
26901
28664
  description: "Page number for pagination (1-based)."
26902
28665
  },
26903
- { name: "limit", type: "number", description: "Results per page. Default: 50." },
28666
+ {
28667
+ name: "limit",
28668
+ type: "number",
28669
+ description: "Results per page. Default: 50."
28670
+ },
26904
28671
  {
26905
28672
  name: "source_connection_id",
26906
28673
  type: "string",
@@ -26940,13 +28707,25 @@ var sections_default2 = [
26940
28707
  }
26941
28708
  ]
26942
28709
  },
26943
- { type: "heading", level: 3, text: "Operator reference" },
28710
+ {
28711
+ type: "heading",
28712
+ level: 3,
28713
+ text: "Operator reference"
28714
+ },
26944
28715
  {
26945
28716
  type: "param-table",
26946
28717
  title: "Operators",
26947
28718
  params: [
26948
- { name: "eq", type: "=", description: "Exact equality." },
26949
- { name: "neq", type: "!=", description: "Not equal." },
28719
+ {
28720
+ name: "eq",
28721
+ type: "=",
28722
+ description: "Exact equality."
28723
+ },
28724
+ {
28725
+ name: "neq",
28726
+ type: "!=",
28727
+ description: "Not equal."
28728
+ },
26950
28729
  {
26951
28730
  name: "gt / gte",
26952
28731
  type: "> / >=",
@@ -26967,7 +28746,11 @@ var sections_default2 = [
26967
28746
  type: "substring",
26968
28747
  description: "Case-insensitive substring match on string fields."
26969
28748
  },
26970
- { name: "is_empty", type: "null check", description: "Field has no value." },
28749
+ {
28750
+ name: "is_empty",
28751
+ type: "null check",
28752
+ description: "Field has no value."
28753
+ },
26971
28754
  {
26972
28755
  name: "is_not_empty",
26973
28756
  type: "presence",
@@ -26975,14 +28758,22 @@ var sections_default2 = [
26975
28758
  }
26976
28759
  ]
26977
28760
  },
26978
- { type: "heading", level: 3, text: "Example" },
28761
+ {
28762
+ type: "heading",
28763
+ level: 3,
28764
+ text: "Example"
28765
+ },
26979
28766
  {
26980
28767
  type: "code",
26981
28768
  language: "json",
26982
28769
  title: "Find invoices over 1000 from Acme",
26983
28770
  code: '{\n "conditions": [\n { "field": "vendor.name", "operator": "contains", "value": "Acme" },\n { "field": "total_amount", "operator": "gt", "value": 1000 }\n ],\n "sort": { "field": "total_amount", "direction": "desc" },\n "limit": 10\n}'
26984
28771
  },
26985
- { type: "heading", level: 3, text: "Response shape" },
28772
+ {
28773
+ type: "heading",
28774
+ level: 3,
28775
+ text: "Response shape"
28776
+ },
26986
28777
  {
26987
28778
  type: "code",
26988
28779
  language: "json",
@@ -26990,20 +28781,61 @@ var sections_default2 = [
26990
28781
  code: '{\n "documents": [\n {\n "id": "d_abc123",\n "filename": "acme-invoice-q4.pdf",\n "documentType": "invoice",\n "extractedFields": {\n "vendor.name": "Acme Corp",\n "total_amount": 14250.00\n }\n }\n ],\n "total": 1,\n "page": 1,\n "perPage": 10\n}'
26991
28782
  },
26992
28783
  {
26993
- type: "callout",
26994
- text: "Numeric operators (`gt`, `gte`, `lt`, `lte`, `between`) only resolve correctly when the schema field is typed as `number`. A field typed as `string` that holds numeric content (e.g. `\u20AC1,500.00`) will silently return zero matches. The API now returns a `warnings[]` array on the filter response when a numeric operator is applied to a string-typed field, explaining the lexicographic-comparison issue and suggesting a `data_type` change."
28784
+ type: "heading",
28785
+ level: 3,
28786
+ text: "Schema typing (preventive + reactive)"
28787
+ },
28788
+ {
28789
+ type: "paragraph",
28790
+ text: "Numeric operators (`gt`, `gte`, `lt`, `lte`, `between`) only resolve correctly when the schema field is typed as `number`. A field typed as `string` that holds numeric content (e.g. `\u20AC1,500.00`) will silently return zero matches even after extraction. There are two ways to handle this \u2014 pick the right one before constructing the call."
28791
+ },
28792
+ {
28793
+ type: "heading",
28794
+ level: 4,
28795
+ text: "Preventive \u2014 gate on `dataType`"
28796
+ },
28797
+ {
28798
+ type: "paragraph",
28799
+ text: 'Call `talonic_search` first and read `dataType` on the field entry. If `dataType !== "number"`, do **not** issue a numeric operator on that field. Pick a string-friendly operator (`eq`, `contains`) or warn the user that the field needs a `data_type` change in the schema before the query can succeed. This avoids the silent-zero-matches outcome entirely.'
28800
+ },
28801
+ {
28802
+ type: "heading",
28803
+ level: 4,
28804
+ text: "Reactive \u2014 handle `warnings[]`"
28805
+ },
28806
+ {
28807
+ type: "paragraph",
28808
+ text: "When a numeric operator is applied to a string-typed field, the API attaches a `warnings[]` array to the filter response. Each entry has `code`, `message`, `field`/`field_id`, and a `suggestion`. The MCP tool surfaces this in `structuredContent` \u2014 agents should relay the `message` (and `suggestion`, when present) to the user rather than silently retrying."
28809
+ },
28810
+ {
28811
+ type: "code",
28812
+ language: "json",
28813
+ title: "Response with a warning",
28814
+ code: '{\n "data": [],\n "total": 0,\n "warnings": [\n {\n "code": "numeric_operator_on_string_field",\n "message": "Operator `gt` was applied to field `invoice_total` typed as string. Numeric comparisons against string-typed fields use lexicographic ordering and may return zero matches.",\n "field": "invoice_total",\n "field_id": "fld_inv_total",\n "suggestion": "Change the field\'s data_type to `number` in the schema definition."\n }\n ]\n}'
28815
+ },
28816
+ {
28817
+ type: "heading",
28818
+ level: 3,
28819
+ text: "Cost"
26995
28820
  },
26996
- { type: "heading", level: 3, text: "Cost" },
26997
28821
  {
26998
28822
  type: "paragraph",
26999
28823
  text: "Filter calls are **free** \u2014 they query already-extracted data and do not consume extraction credits."
27000
28824
  },
27001
- { type: "heading", level: 3, text: "Errors" },
28825
+ {
28826
+ type: "heading",
28827
+ level: 3,
28828
+ text: "Errors"
28829
+ },
27002
28830
  {
27003
28831
  type: "param-table",
27004
28832
  title: "Common errors",
27005
28833
  params: [
27006
- { name: "unauthorized", type: "401", description: "Invalid or missing API key." },
28834
+ {
28835
+ name: "unauthorized",
28836
+ type: "401",
28837
+ description: "Invalid or missing API key."
28838
+ },
27007
28839
  {
27008
28840
  name: "no_field_match",
27009
28841
  type: "422",
@@ -27018,8 +28850,14 @@ var sections_default2 = [
27018
28850
  }
27019
28851
  ],
27020
28852
  related: [
27021
- { label: "talonic_search", slug: "mcp-talonic-search" },
27022
- { label: "Filter & Search API", slug: "field-autocomplete" }
28853
+ {
28854
+ label: "talonic_search",
28855
+ slug: "mcp-talonic-search"
28856
+ },
28857
+ {
28858
+ label: "Filter & Search API",
28859
+ slug: "field-autocomplete"
28860
+ }
27023
28861
  ],
27024
28862
  faq: [
27025
28863
  {
@@ -27029,16 +28867,23 @@ var sections_default2 = [
27029
28867
  {
27030
28868
  question: "How do I find field names for filtering?",
27031
28869
  answer: "Call talonic_search first. Use fields[] entries where filterable is true \u2014 their canonicalName values are what you pass as the field parameter in filter conditions. Fields with filterable: false have no extracted data yet and cannot be filtered."
28870
+ },
28871
+ {
28872
+ question: "Why does my `talonic_filter` query with `gt` return zero matches on a numeric-looking field?",
28873
+ answer: "The schema field is almost certainly typed as `string`, not `number`. Numeric operators against string-typed fields fall back to lexicographic comparison and silently return zero. Prevention: call `talonic_search` first and check `dataType` before issuing the filter. Recovery: the response's `warnings[]` array explains the issue and suggests a `data_type` change in the schema definition."
27032
28874
  }
27033
28875
  ],
27034
28876
  mentions: [
27035
- "talonic_filter",
27036
- "filter",
28877
+ "canonical field name",
27037
28878
  "conditions",
28879
+ "dataType",
28880
+ "filter",
28881
+ "is_not_empty",
27038
28882
  "operators",
27039
- "canonical field name",
27040
- "warnings",
27041
- "is_not_empty"
28883
+ "preventive guard",
28884
+ "schema typing",
28885
+ "talonic_filter",
28886
+ "warnings"
27042
28887
  ]
27043
28888
  },
27044
28889
  {
@@ -27052,7 +28897,11 @@ var sections_default2 = [
27052
28897
  type: "paragraph",
27053
28898
  text: "Fetch full metadata for a single document by ID. Returns filename, page count, detected document type, language, processing log, and link URLs."
27054
28899
  },
27055
- { type: "heading", level: 3, text: "When to use" },
28900
+ {
28901
+ type: "heading",
28902
+ level: 3,
28903
+ text: "When to use"
28904
+ },
27056
28905
  {
27057
28906
  type: "list",
27058
28907
  ordered: false,
@@ -27062,7 +28911,11 @@ var sections_default2 = [
27062
28911
  "The user asks 'tell me about document X'."
27063
28912
  ]
27064
28913
  },
27065
- { type: "heading", level: 3, text: "When NOT to use" },
28914
+ {
28915
+ type: "heading",
28916
+ level: 3,
28917
+ text: "When NOT to use"
28918
+ },
27066
28919
  {
27067
28920
  type: "list",
27068
28921
  ordered: false,
@@ -27072,7 +28925,11 @@ var sections_default2 = [
27072
28925
  "The user has a file but no `document_id` yet \u2192 call `talonic_extract` first."
27073
28926
  ]
27074
28927
  },
27075
- { type: "heading", level: 3, text: "Input schema" },
28928
+ {
28929
+ type: "heading",
28930
+ level: 3,
28931
+ text: "Input schema"
28932
+ },
27076
28933
  {
27077
28934
  type: "param-table",
27078
28935
  title: "Parameters",
@@ -27085,22 +28942,43 @@ var sections_default2 = [
27085
28942
  }
27086
28943
  ]
27087
28944
  },
27088
- { type: "heading", level: 3, text: "Response shape" },
28945
+ {
28946
+ type: "heading",
28947
+ level: 3,
28948
+ text: "Response shape"
28949
+ },
27089
28950
  {
27090
28951
  type: "code",
27091
28952
  language: "json",
27092
28953
  title: "Example response",
27093
28954
  code: '{\n "id": "d_abc123",\n "filename": "invoice.pdf",\n "documentType": "invoice",\n "language": "en",\n "pageCount": 2,\n "processingLog": [...],\n "links": {\n "self": "https://api.talonic.com/v1/documents/d_abc123",\n "extractions": "https://api.talonic.com/v1/documents/d_abc123/extractions",\n "dashboard": "https://app.talonic.com/documents/d_abc123"\n }\n}'
27094
28955
  },
27095
- { type: "heading", level: 3, text: "Cost" },
27096
- { type: "paragraph", text: "Free \u2014 metadata lookups do not consume extraction credits." }
28956
+ {
28957
+ type: "heading",
28958
+ level: 3,
28959
+ text: "Cost"
28960
+ },
28961
+ {
28962
+ type: "paragraph",
28963
+ text: "Free \u2014 metadata lookups do not consume extraction credits."
28964
+ }
27097
28965
  ],
27098
28966
  related: [
27099
- { label: "SDK Documents", slug: "sdk-documents" },
27100
- { label: "Get Document", slug: "get-document" }
28967
+ {
28968
+ label: "SDK Documents",
28969
+ slug: "sdk-documents"
28970
+ },
28971
+ {
28972
+ label: "Get Document",
28973
+ slug: "get-document"
28974
+ }
27101
28975
  ],
27102
28976
  faq: [],
27103
- mentions: ["talonic_get_document", "metadata", "document_id"]
28977
+ mentions: [
28978
+ "talonic_get_document",
28979
+ "metadata",
28980
+ "document_id"
28981
+ ]
27104
28982
  },
27105
28983
  {
27106
28984
  slug: "mcp-talonic-to-markdown",
@@ -27113,7 +28991,11 @@ var sections_default2 = [
27113
28991
  type: "paragraph",
27114
28992
  text: "Get OCR-converted markdown for a document. Accepts an existing `document_id` (cheapest \u2014 one API call, no re-processing), or raw file bytes, a local path, or a URL."
27115
28993
  },
27116
- { type: "heading", level: 3, text: "When to use" },
28994
+ {
28995
+ type: "heading",
28996
+ level: 3,
28997
+ text: "When to use"
28998
+ },
27117
28999
  {
27118
29000
  type: "list",
27119
29001
  ordered: false,
@@ -27124,7 +29006,11 @@ var sections_default2 = [
27124
29006
  "The user has a raw PDF / scan / image and wants markdown directly without designing a schema."
27125
29007
  ]
27126
29008
  },
27127
- { type: "heading", level: 3, text: "When NOT to use" },
29009
+ {
29010
+ type: "heading",
29011
+ level: 3,
29012
+ text: "When NOT to use"
29013
+ },
27128
29014
  {
27129
29015
  type: "list",
27130
29016
  ordered: false,
@@ -27132,8 +29018,15 @@ var sections_default2 = [
27132
29018
  "The user wants specific structured fields \u2192 use `talonic_extract` with a schema."
27133
29019
  ]
27134
29020
  },
27135
- { type: "heading", level: 3, text: "Input schema" },
27136
- { type: "paragraph", text: "Provide **exactly one** of the following:" },
29021
+ {
29022
+ type: "heading",
29023
+ level: 3,
29024
+ text: "Input schema"
29025
+ },
29026
+ {
29027
+ type: "paragraph",
29028
+ text: "Provide **exactly one** of the following:"
29029
+ },
27137
29030
  {
27138
29031
  type: "param-table",
27139
29032
  title: "Parameters",
@@ -27165,19 +29058,31 @@ var sections_default2 = [
27165
29058
  }
27166
29059
  ]
27167
29060
  },
27168
- { type: "heading", level: 3, text: "Response shape" },
29061
+ {
29062
+ type: "heading",
29063
+ level: 3,
29064
+ text: "Response shape"
29065
+ },
27169
29066
  {
27170
29067
  type: "code",
27171
29068
  language: "json",
27172
29069
  title: "Example response",
27173
29070
  code: '{\n "documentId": "d_abc123",\n "markdown": "# Invoice INV-2024-0847\\n\\n**Vendor:** Acme Corp\\n**Date:** 2024-01-15\\n\\n| Item | Qty | Unit Price | Total |\\n|------|-----|------------|-------|\\n| Widget A | 100 | 42.50 | 4,250.00 |\\n| Widget B | 200 | 50.00 | 10,000.00 |\\n\\n**Total: 14,250.00 EUR**"\n}'
27174
29071
  },
27175
- { type: "heading", level: 3, text: "Cost" },
29072
+ {
29073
+ type: "heading",
29074
+ level: 3,
29075
+ text: "Cost"
29076
+ },
27176
29077
  {
27177
29078
  type: "paragraph",
27178
29079
  text: "**Free when using `document_id`** \u2014 the document is already ingested. When passing a raw file (`file_data`, `file_path`, `file_url`), the tool auto-ingests via extract first, consuming **one extraction credit**. To avoid unnecessary cost: if you've already extracted a document, reuse the `document_id` from that response."
27179
29080
  },
27180
- { type: "heading", level: 3, text: "Errors" },
29081
+ {
29082
+ type: "heading",
29083
+ level: 3,
29084
+ text: "Errors"
29085
+ },
27181
29086
  {
27182
29087
  type: "param-table",
27183
29088
  title: "Common errors",
@@ -27206,8 +29111,14 @@ var sections_default2 = [
27206
29111
  }
27207
29112
  ],
27208
29113
  related: [
27209
- { label: "talonic_extract", slug: "mcp-talonic-extract" },
27210
- { label: "SDK getMarkdown", slug: "sdk-documents" }
29114
+ {
29115
+ label: "talonic_extract",
29116
+ slug: "mcp-talonic-extract"
29117
+ },
29118
+ {
29119
+ label: "SDK getMarkdown",
29120
+ slug: "sdk-documents"
29121
+ }
27211
29122
  ],
27212
29123
  faq: [
27213
29124
  {
@@ -27219,7 +29130,12 @@ var sections_default2 = [
27219
29130
  answer: "If you already called talonic_extract, reuse the document_id from that response to call talonic_to_markdown for free."
27220
29131
  }
27221
29132
  ],
27222
- mentions: ["talonic_to_markdown", "OCR", "markdown", "document_id"]
29133
+ mentions: [
29134
+ "talonic_to_markdown",
29135
+ "OCR",
29136
+ "markdown",
29137
+ "document_id"
29138
+ ]
27223
29139
  },
27224
29140
  {
27225
29141
  slug: "mcp-talonic-list-schemas",
@@ -27232,7 +29148,11 @@ var sections_default2 = [
27232
29148
  type: "paragraph",
27233
29149
  text: "List all saved schemas in the workspace. Returns each schema with its ID, name, description, version, field count, and full JSON Schema definition."
27234
29150
  },
27235
- { type: "heading", level: 3, text: "When to use" },
29151
+ {
29152
+ type: "heading",
29153
+ level: 3,
29154
+ text: "When to use"
29155
+ },
27236
29156
  {
27237
29157
  type: "list",
27238
29158
  ordered: false,
@@ -27243,7 +29163,11 @@ var sections_default2 = [
27243
29163
  "You need a `schema_id` for `talonic_extract`."
27244
29164
  ]
27245
29165
  },
27246
- { type: "heading", level: 3, text: "When NOT to use" },
29166
+ {
29167
+ type: "heading",
29168
+ level: 3,
29169
+ text: "When NOT to use"
29170
+ },
27247
29171
  {
27248
29172
  type: "list",
27249
29173
  ordered: false,
@@ -27251,17 +29175,41 @@ var sections_default2 = [
27251
29175
  "The user wants to extract data and provides an inline schema \u2192 call `talonic_extract` directly."
27252
29176
  ]
27253
29177
  },
27254
- { type: "heading", level: 3, text: "Input schema" },
27255
- { type: "paragraph", text: "No parameters required." },
27256
- { type: "heading", level: 3, text: "Cost" },
27257
- { type: "paragraph", text: "Free \u2014 listing schemas does not consume extraction credits." }
29178
+ {
29179
+ type: "heading",
29180
+ level: 3,
29181
+ text: "Input schema"
29182
+ },
29183
+ {
29184
+ type: "paragraph",
29185
+ text: "No parameters required."
29186
+ },
29187
+ {
29188
+ type: "heading",
29189
+ level: 3,
29190
+ text: "Cost"
29191
+ },
29192
+ {
29193
+ type: "paragraph",
29194
+ text: "Free \u2014 listing schemas does not consume extraction credits."
29195
+ }
27258
29196
  ],
27259
29197
  related: [
27260
- { label: "talonic_save_schema", slug: "mcp-talonic-save-schema" },
27261
- { label: "SDK Schemas", slug: "sdk-schemas" }
29198
+ {
29199
+ label: "talonic_save_schema",
29200
+ slug: "mcp-talonic-save-schema"
29201
+ },
29202
+ {
29203
+ label: "SDK Schemas",
29204
+ slug: "sdk-schemas"
29205
+ }
27262
29206
  ],
27263
29207
  faq: [],
27264
- mentions: ["talonic_list_schemas", "schemas", "schema_id"]
29208
+ mentions: [
29209
+ "talonic_list_schemas",
29210
+ "schemas",
29211
+ "schema_id"
29212
+ ]
27265
29213
  },
27266
29214
  {
27267
29215
  slug: "mcp-talonic-save-schema",
@@ -27274,7 +29222,11 @@ var sections_default2 = [
27274
29222
  type: "paragraph",
27275
29223
  text: "Save a schema definition to the workspace for reuse across future extractions. Returns the saved schema with its assigned `id` (UUID) and `short_id` (`SCH-XXXXXXXX`)."
27276
29224
  },
27277
- { type: "heading", level: 3, text: "When to use" },
29225
+ {
29226
+ type: "heading",
29227
+ level: 3,
29228
+ text: "When to use"
29229
+ },
27278
29230
  {
27279
29231
  type: "list",
27280
29232
  ordered: false,
@@ -27284,7 +29236,11 @@ var sections_default2 = [
27284
29236
  "The user wants to standardise extraction across many documents of the same type."
27285
29237
  ]
27286
29238
  },
27287
- { type: "heading", level: 3, text: "When NOT to use" },
29239
+ {
29240
+ type: "heading",
29241
+ level: 3,
29242
+ text: "When NOT to use"
29243
+ },
27288
29244
  {
27289
29245
  type: "list",
27290
29246
  ordered: false,
@@ -27293,7 +29249,11 @@ var sections_default2 = [
27293
29249
  "The user has not confirmed the schema design \u2014 avoid creating clutter."
27294
29250
  ]
27295
29251
  },
27296
- { type: "heading", level: 3, text: "Input schema" },
29252
+ {
29253
+ type: "heading",
29254
+ level: 3,
29255
+ text: "Input schema"
29256
+ },
27297
29257
  {
27298
29258
  type: "param-table",
27299
29259
  title: "Parameters",
@@ -27317,8 +29277,15 @@ var sections_default2 = [
27317
29277
  }
27318
29278
  ]
27319
29279
  },
27320
- { type: "heading", level: 3, text: "Schema format guidance" },
27321
- { type: "paragraph", text: "**Full JSON Schema (recommended):**" },
29280
+ {
29281
+ type: "heading",
29282
+ level: 3,
29283
+ text: "Schema format guidance"
29284
+ },
29285
+ {
29286
+ type: "paragraph",
29287
+ text: "**Full JSON Schema (recommended):**"
29288
+ },
27322
29289
  {
27323
29290
  type: "code",
27324
29291
  language: "json",
@@ -27344,12 +29311,25 @@ var sections_default2 = [
27344
29311
  type: "callout",
27345
29312
  text: "When you call `talonic_save_schema` (or update an existing schema), the API samples the field's prior extracted values. If 80% or more of a string-typed field's values parse as numbers (with at least 5 samples), the response includes a `warnings[]` suggesting `data_type: \"number\"`. Heed the warning if you plan to filter on that field with numeric operators."
27346
29313
  },
27347
- { type: "heading", level: 3, text: "Cost" },
27348
- { type: "paragraph", text: "Free \u2014 saving a schema does not consume extraction credits." }
29314
+ {
29315
+ type: "heading",
29316
+ level: 3,
29317
+ text: "Cost"
29318
+ },
29319
+ {
29320
+ type: "paragraph",
29321
+ text: "Free \u2014 saving a schema does not consume extraction credits."
29322
+ }
27349
29323
  ],
27350
29324
  related: [
27351
- { label: "talonic_list_schemas", slug: "mcp-talonic-list-schemas" },
27352
- { label: "Schemas API", slug: "create-schema" }
29325
+ {
29326
+ label: "talonic_list_schemas",
29327
+ slug: "mcp-talonic-list-schemas"
29328
+ },
29329
+ {
29330
+ label: "Schemas API",
29331
+ slug: "create-schema"
29332
+ }
27353
29333
  ],
27354
29334
  faq: [
27355
29335
  {
@@ -27357,7 +29337,12 @@ var sections_default2 = [
27357
29337
  answer: "Full JSON Schema ({type: 'object', properties: {...}}) is most reliable. Flat key-type maps ({field: 'type'}) work for simple schemas but are normalized server-side and may produce errors with complex structures."
27358
29338
  }
27359
29339
  ],
27360
- mentions: ["talonic_save_schema", "schema", "JSON Schema", "flat key-type"]
29340
+ mentions: [
29341
+ "talonic_save_schema",
29342
+ "schema",
29343
+ "JSON Schema",
29344
+ "flat key-type"
29345
+ ]
27361
29346
  },
27362
29347
  {
27363
29348
  slug: "mcp-talonic-get-balance",
@@ -27370,7 +29355,11 @@ var sections_default2 = [
27370
29355
  type: "paragraph",
27371
29356
  text: "Read the user's current Talonic credit balance, EUR value, 30-day burn rate, projected runway, tier, and next-tier-reset timestamp. Use this to make budget-aware decisions before kicking off large batches or re-extractions."
27372
29357
  },
27373
- { type: "heading", level: 3, text: "When to use" },
29358
+ {
29359
+ type: "heading",
29360
+ level: 3,
29361
+ text: "When to use"
29362
+ },
27374
29363
  {
27375
29364
  type: "list",
27376
29365
  ordered: false,
@@ -27380,7 +29369,11 @@ var sections_default2 = [
27380
29369
  "The user asks how long their balance will last at the current rate."
27381
29370
  ]
27382
29371
  },
27383
- { type: "heading", level: 3, text: "When NOT to use" },
29372
+ {
29373
+ type: "heading",
29374
+ level: 3,
29375
+ text: "When NOT to use"
29376
+ },
27384
29377
  {
27385
29378
  type: "list",
27386
29379
  ordered: false,
@@ -27389,14 +29382,29 @@ var sections_default2 = [
27389
29382
  "The user wants to top up credits \u2014 route them to the dashboard at `https://app.talonic.com`."
27390
29383
  ]
27391
29384
  },
27392
- { type: "heading", level: 3, text: "Input schema" },
27393
- { type: "paragraph", text: "No parameters required." },
27394
- { type: "heading", level: 3, text: "Response shape" },
29385
+ {
29386
+ type: "heading",
29387
+ level: 3,
29388
+ text: "Input schema"
29389
+ },
29390
+ {
29391
+ type: "paragraph",
29392
+ text: "No parameters required."
29393
+ },
29394
+ {
29395
+ type: "heading",
29396
+ level: 3,
29397
+ text: "Response shape"
29398
+ },
27395
29399
  {
27396
29400
  type: "param-table",
27397
29401
  title: "Fields",
27398
29402
  params: [
27399
- { name: "balance_credits", type: "number", description: "Current credit balance." },
29403
+ {
29404
+ name: "balance_credits",
29405
+ type: "number",
29406
+ description: "Current credit balance."
29407
+ },
27400
29408
  {
27401
29409
  name: "balance_eur",
27402
29410
  type: "number",
@@ -27430,12 +29438,25 @@ var sections_default2 = [
27430
29438
  title: "Example response",
27431
29439
  code: '{\n "balance_credits": 1000,\n "balance_eur": 50.00,\n "burn_rate_30d_credits": 240,\n "projected_runway_days": 125,\n "tier": "pro",\n "tier_resets_at": "2026-06-01T00:00:00.000Z"\n}'
27432
29440
  },
27433
- { type: "heading", level: 3, text: "Cost" },
27434
- { type: "paragraph", text: "Free \u2014 balance lookups do not consume extraction credits." }
29441
+ {
29442
+ type: "heading",
29443
+ level: 3,
29444
+ text: "Cost"
29445
+ },
29446
+ {
29447
+ type: "paragraph",
29448
+ text: "Free \u2014 balance lookups do not consume extraction credits."
29449
+ }
27435
29450
  ],
27436
29451
  related: [
27437
- { label: "talonic_extract", slug: "mcp-talonic-extract" },
27438
- { label: "Cost & Rate Limits", slug: "mcp-cost-and-limits" }
29452
+ {
29453
+ label: "talonic_extract",
29454
+ slug: "mcp-talonic-extract"
29455
+ },
29456
+ {
29457
+ label: "Cost & Rate Limits",
29458
+ slug: "mcp-cost-and-limits"
29459
+ }
27439
29460
  ],
27440
29461
  faq: [
27441
29462
  {
@@ -27447,7 +29468,157 @@ var sections_default2 = [
27447
29468
  answer: "Days of runway at the trailing 30-day average burn rate. The value -1 means no consumption in the trailing window, so runway cannot be computed."
27448
29469
  }
27449
29470
  ],
27450
- mentions: ["talonic_get_balance", "credits", "balance", "tier", "burn rate", "runway"]
29471
+ mentions: [
29472
+ "talonic_get_balance",
29473
+ "credits",
29474
+ "balance",
29475
+ "tier",
29476
+ "burn rate",
29477
+ "runway"
29478
+ ]
29479
+ },
29480
+ {
29481
+ slug: "mcp-talonic-request-upload",
29482
+ parentSlug: "mcp-tools",
29483
+ title: "talonic_request_upload",
29484
+ seoTitle: "talonic_request_upload MCP Tool \u2014 Talonic Docs",
29485
+ description: "Request a browser upload link for files too large for tool-call arguments or when running in a sandboxed hosted environment.",
29486
+ content: [
29487
+ {
29488
+ type: "paragraph",
29489
+ text: "Request a browser upload link for the user. Use this when the user wants to extract a file but you cannot deliver it directly \u2014 the file is too large for tool-call arguments (~32 KB cap on hosted connectors), or you're running in a sandboxed environment (Claude.ai, ChatGPT) that blocks outbound file transfers."
29490
+ },
29491
+ {
29492
+ type: "heading",
29493
+ level: 3,
29494
+ text: "When to use"
29495
+ },
29496
+ {
29497
+ type: "list",
29498
+ ordered: false,
29499
+ items: [
29500
+ "The user has a file to extract but you cannot send it via `file_data` (file larger than ~32 KB, or the environment blocks outbound data).",
29501
+ "You are running in a hosted/sandboxed environment (Claude.ai, ChatGPT) where `file_data` cannot be used reliably.",
29502
+ "The user explicitly asks for an upload link."
29503
+ ]
29504
+ },
29505
+ {
29506
+ type: "heading",
29507
+ level: 3,
29508
+ text: "When NOT to use"
29509
+ },
29510
+ {
29511
+ type: "list",
29512
+ ordered: false,
29513
+ items: [
29514
+ "You can deliver the file directly via `file_data` (local stdio installs with small files).",
29515
+ "The file is already accessible via a public URL \u2192 use `file_url` on `talonic_extract`.",
29516
+ "The document is already in the workspace \u2192 use `document_id` on `talonic_extract`."
29517
+ ]
29518
+ },
29519
+ {
29520
+ type: "heading",
29521
+ level: 3,
29522
+ text: "How the flow works"
29523
+ },
29524
+ {
29525
+ type: "list",
29526
+ ordered: true,
29527
+ items: [
29528
+ "Call `talonic_request_upload` with the filename. You receive a `document_id`, an `upload_url`, and an `expires_at` timestamp.",
29529
+ "Show the `upload_url` to the user and ask them to open it in their browser.",
29530
+ "The user drops the file on the upload page. The browser uploads directly to Talonic \u2014 no tool-call size cap, no sandbox restriction.",
29531
+ "Poll with `talonic_get_document` using the `document_id` until `status` is `uploaded`.",
29532
+ "Call `talonic_extract` with the `document_id` and a schema to extract structured data."
29533
+ ]
29534
+ },
29535
+ {
29536
+ type: "heading",
29537
+ level: 3,
29538
+ text: "Input schema"
29539
+ },
29540
+ {
29541
+ type: "param-table",
29542
+ title: "Parameters",
29543
+ params: [
29544
+ {
29545
+ name: "filename",
29546
+ type: "string",
29547
+ required: true,
29548
+ description: "The name of the file being uploaded, including extension (e.g. `invoice.pdf`). Used to pre-allocate the document and infer MIME type."
29549
+ }
29550
+ ]
29551
+ },
29552
+ {
29553
+ type: "heading",
29554
+ level: 3,
29555
+ text: "Response shape"
29556
+ },
29557
+ {
29558
+ type: "code",
29559
+ language: "json",
29560
+ code: '{\n "document_id": "d8f3a1b2-...",\n "upload_url": "https://app.talonic.com/u/abc12345-...",\n "expires_at": "2026-05-27T22:15:00.000Z"\n}'
29561
+ },
29562
+ {
29563
+ type: "param-table",
29564
+ title: "Response fields",
29565
+ params: [
29566
+ {
29567
+ name: "document_id",
29568
+ type: "string",
29569
+ description: "The pre-allocated document ID. Use with `talonic_get_document` to poll status, and with `talonic_extract` once uploaded."
29570
+ },
29571
+ {
29572
+ name: "upload_url",
29573
+ type: "string",
29574
+ description: "URL the user should open in their browser to drop the file. Expires after 15 minutes."
29575
+ },
29576
+ {
29577
+ name: "expires_at",
29578
+ type: "string",
29579
+ description: "ISO 8601 timestamp when the upload link expires."
29580
+ }
29581
+ ]
29582
+ },
29583
+ {
29584
+ type: "callout",
29585
+ text: "Upload links are single-use and expire after 15 minutes. If the user doesn't upload in time, call `talonic_request_upload` again to get a fresh link."
29586
+ }
29587
+ ],
29588
+ related: [
29589
+ {
29590
+ label: "talonic_extract",
29591
+ slug: "mcp-talonic-extract"
29592
+ },
29593
+ {
29594
+ label: "talonic_get_document",
29595
+ slug: "mcp-talonic-get-document"
29596
+ },
29597
+ {
29598
+ label: "Drag & Drop in Chat",
29599
+ slug: "mcp-drag-drop"
29600
+ }
29601
+ ],
29602
+ faq: [
29603
+ {
29604
+ question: "How do I upload a file through Claude.ai to Talonic?",
29605
+ answer: "Call talonic_request_upload with the filename. Show the returned upload_url to the user. They open it in their browser and drop the file. Poll talonic_get_document until status is 'uploaded', then call talonic_extract with the document_id."
29606
+ },
29607
+ {
29608
+ question: "Why can't I just send the file through file_data on Claude.ai?",
29609
+ answer: "Claude.ai's hosted connector caps tool-call arguments at ~32 KB (decoded). Real documents are typically 100 KB to several MB. The browser-handoff upload bypasses this limit entirely by moving the file transfer to the user's browser."
29610
+ }
29611
+ ],
29612
+ mentions: [
29613
+ "upload",
29614
+ "browser handoff",
29615
+ "hosted connector",
29616
+ "Claude.ai",
29617
+ "ChatGPT",
29618
+ "file size limit",
29619
+ "sandbox",
29620
+ "upload link"
29621
+ ]
27451
29622
  },
27452
29623
  {
27453
29624
  slug: "mcp-schemas-resource",
@@ -27456,25 +29627,43 @@ var sections_default2 = [
27456
29627
  seoTitle: "MCP Resources \u2014 Talonic Docs",
27457
29628
  description: "Two MCP resources exposed by the Talonic server: talonic://schemas (saved schemas) and talonic://webhooks/reference (webhook event reference).",
27458
29629
  content: [
27459
- { type: "heading", level: 3, text: "talonic://schemas" },
29630
+ {
29631
+ type: "heading",
29632
+ level: 3,
29633
+ text: "talonic://schemas"
29634
+ },
27460
29635
  {
27461
29636
  type: "paragraph",
27462
29637
  text: "Exposes the saved-schemas list to clients that browse MCP resources separately. Claude Desktop and Cowork render these in the UI. The contents mirror `talonic_list_schemas` but in a browseable form."
27463
29638
  },
27464
- { type: "heading", level: 3, text: "talonic://webhooks/reference" },
29639
+ {
29640
+ type: "heading",
29641
+ level: 3,
29642
+ text: "talonic://webhooks/reference"
29643
+ },
27465
29644
  {
27466
29645
  type: "paragraph",
27467
29646
  text: "Static reference documenting the webhook events the Talonic API can fire (extraction lifecycle, document classification, etc.), their payload shapes, and how to subscribe. Useful when an agent is helping the user wire Talonic into a backend that needs to react to extraction events."
27468
29647
  }
27469
29648
  ],
27470
- related: [{ label: "talonic_list_schemas", slug: "mcp-talonic-list-schemas" }],
29649
+ related: [
29650
+ {
29651
+ label: "talonic_list_schemas",
29652
+ slug: "mcp-talonic-list-schemas"
29653
+ }
29654
+ ],
27471
29655
  faq: [
27472
29656
  {
27473
29657
  question: "What resources does the Talonic MCP server expose?",
27474
29658
  answer: "Two resources: talonic://schemas (browseable list of saved schemas) and talonic://webhooks/reference (static reference for the API's webhook events and payloads)."
27475
29659
  }
27476
29660
  ],
27477
- mentions: ["MCP resource", "talonic://schemas", "talonic://webhooks/reference", "webhooks"]
29661
+ mentions: [
29662
+ "MCP resource",
29663
+ "talonic://schemas",
29664
+ "talonic://webhooks/reference",
29665
+ "webhooks"
29666
+ ]
27478
29667
  },
27479
29668
  {
27480
29669
  slug: "mcp-cost-and-limits",
@@ -27483,7 +29672,11 @@ var sections_default2 = [
27483
29672
  seoTitle: "MCP Cost and Rate Limits \u2014 Talonic Docs",
27484
29673
  description: "Which MCP tool calls cost extraction credits, rate limit behavior, insufficient-credit handling, and how to avoid re-extraction.",
27485
29674
  content: [
27486
- { type: "heading", level: 3, text: "What costs credits" },
29675
+ {
29676
+ type: "heading",
29677
+ level: 3,
29678
+ text: "What costs credits"
29679
+ },
27487
29680
  {
27488
29681
  type: "paragraph",
27489
29682
  text: "Only extraction operations consume credits. Everything else is free:"
@@ -27507,14 +29700,26 @@ var sections_default2 = [
27507
29700
  type: "free",
27508
29701
  description: "Document already ingested \u2014 just fetches stored markdown."
27509
29702
  },
27510
- { name: "talonic_search", type: "free", description: "Queries indexed data." },
29703
+ {
29704
+ name: "talonic_search",
29705
+ type: "free",
29706
+ description: "Queries indexed data."
29707
+ },
27511
29708
  {
27512
29709
  name: "talonic_filter",
27513
29710
  type: "free",
27514
29711
  description: "Queries extracted field values."
27515
29712
  },
27516
- { name: "talonic_get_document", type: "free", description: "Metadata lookup." },
27517
- { name: "talonic_list_schemas", type: "free", description: "Lists saved schemas." },
29713
+ {
29714
+ name: "talonic_get_document",
29715
+ type: "free",
29716
+ description: "Metadata lookup."
29717
+ },
29718
+ {
29719
+ name: "talonic_list_schemas",
29720
+ type: "free",
29721
+ description: "Lists saved schemas."
29722
+ },
27518
29723
  {
27519
29724
  name: "talonic_save_schema",
27520
29725
  type: "free",
@@ -27531,7 +29736,11 @@ var sections_default2 = [
27531
29736
  type: "callout",
27532
29737
  text: "The per-call cost of `talonic_extract` (and `talonic_to_markdown` with a raw file) is also surfaced on the response under `cost` (`costCredits`, `costEur`, `balanceCredits`, plus a breakdown of `cellsResolvedRegistry` and `cellsResolvedAi`). Agents can read this immediately after the call rather than calling `talonic_get_balance` again."
27533
29738
  },
27534
- { type: "heading", level: 3, text: "Avoiding re-extraction" },
29739
+ {
29740
+ type: "heading",
29741
+ level: 3,
29742
+ text: "Avoiding re-extraction"
29743
+ },
27535
29744
  {
27536
29745
  type: "paragraph",
27537
29746
  text: "Agents should avoid extracting the same document twice. Best practices:"
@@ -27545,26 +29754,47 @@ var sections_default2 = [
27545
29754
  "Use `talonic_filter` to query already-extracted data instead of re-extracting with a different schema when the fields you need are already captured."
27546
29755
  ]
27547
29756
  },
27548
- { type: "heading", level: 3, text: "Rate limits" },
29757
+ {
29758
+ type: "heading",
29759
+ level: 3,
29760
+ text: "Rate limits"
29761
+ },
27549
29762
  {
27550
29763
  type: "paragraph",
27551
29764
  text: "The Talonic API enforces per-key rate limits. When exceeded, the server returns `429 Too Many Requests` with a `X-RateLimit-Reset` header. The MCP server (and the underlying Node SDK) retries automatically with exponential backoff up to `maxRetries` (default: 3). If retries are exhausted, the tool returns an error with the reset timestamp."
27552
29765
  },
27553
- { type: "heading", level: 3, text: "Insufficient credits" },
29766
+ {
29767
+ type: "heading",
29768
+ level: 3,
29769
+ text: "Insufficient credits"
29770
+ },
27554
29771
  {
27555
29772
  type: "paragraph",
27556
29773
  text: "When extraction credits are exhausted, the API returns `402 Payment Required`. The tool surfaces this as an error. The agent should inform the user that their credit balance is depleted and suggest upgrading their plan or waiting for the daily reset (free tier: 50 extractions/day, resets at midnight UTC)."
27557
29774
  },
27558
- { type: "heading", level: 3, text: "Free tier limits" },
29775
+ {
29776
+ type: "heading",
29777
+ level: 3,
29778
+ text: "Free tier limits"
29779
+ },
27559
29780
  {
27560
29781
  type: "paragraph",
27561
29782
  text: "The free tier includes 50 extractions per day (resets at midnight UTC). Search, filter, metadata, and schema operations are unlimited. No credit card required."
27562
29783
  }
27563
29784
  ],
27564
29785
  related: [
27565
- { label: "talonic_extract", slug: "mcp-talonic-extract" },
27566
- { label: "Authentication", slug: "mcp-authentication" },
27567
- { label: "SDK Retries", slug: "sdk-retries" }
29786
+ {
29787
+ label: "talonic_extract",
29788
+ slug: "mcp-talonic-extract"
29789
+ },
29790
+ {
29791
+ label: "Authentication",
29792
+ slug: "mcp-authentication"
29793
+ },
29794
+ {
29795
+ label: "SDK Retries",
29796
+ slug: "sdk-retries"
29797
+ }
27568
29798
  ],
27569
29799
  faq: [
27570
29800
  {
@@ -27580,7 +29810,14 @@ var sections_default2 = [
27580
29810
  answer: "The API returns 429 with a reset timestamp. The SDK retries automatically with exponential backoff (up to 3 retries by default)."
27581
29811
  }
27582
29812
  ],
27583
- mentions: ["credits", "rate limits", "429", "402", "free tier", "cost"]
29813
+ mentions: [
29814
+ "credits",
29815
+ "rate limits",
29816
+ "429",
29817
+ "402",
29818
+ "free tier",
29819
+ "cost"
29820
+ ]
27584
29821
  },
27585
29822
  {
27586
29823
  slug: "mcp-drag-drop",
@@ -27602,30 +29839,49 @@ var sections_default2 = [
27602
29839
  type: "paragraph",
27603
29840
  text: "From `@talonic/mcp@0.1.4`, agents can pass **`file_data`** (base64-encoded file bytes) and **`filename`** on `talonic_extract` and `talonic_to_markdown`. The agent reads the file bytes from the conversation, base64-encodes them, and passes them through the MCP tool call. The MCP server decodes, infers MIME type from the filename, and uploads to the Talonic API as a normal multipart request. Tool descriptions advertise `file_data` as the recommended input here, so well-trained agents reach for it automatically. No client-side configuration required."
27604
29841
  },
27605
- { type: "heading", level: 3, text: "Claude.ai hosted connector" },
29842
+ {
29843
+ type: "heading",
29844
+ level: 3,
29845
+ text: "Claude.ai hosted connector"
29846
+ },
27606
29847
  {
27607
29848
  type: "callout",
27608
29849
  variant: "warning",
27609
- text: "Claude.ai's hosted-connector pipeline imposes a hard size limit on tool-call arguments (effectively under ~1 KB). A base64-encoded real PDF (typically hundreds of KB at minimum) gets truncated before reaching the MCP server. The Talonic API receives a few hundred bytes, registers an empty document, and the response comes back with `null` extracted fields. This is a Claude.ai platform limit on connectors, not a Talonic MCP server bug."
29850
+ text: "Claude.ai's hosted-connector pipeline imposes a hard size limit on tool-call arguments (effectively ~32 KB decoded). A base64-encoded real PDF (typically hundreds of KB at minimum) gets truncated before reaching the MCP server. The Talonic API receives ~32 KB, registers an empty document, and the response comes back with `null` extracted fields. This is a Claude.ai platform limit on connectors, not a Talonic MCP server bug."
29851
+ },
29852
+ {
29853
+ type: "paragraph",
29854
+ text: "**Workarounds for Claude.ai users:**"
27610
29855
  },
27611
- { type: "paragraph", text: "**Workarounds for Claude.ai users:**" },
27612
29856
  {
27613
29857
  type: "list",
27614
29858
  ordered: false,
27615
29859
  items: [
29860
+ "`talonic_request_upload`: the recommended path. The agent gets an upload link, the user drops the file in their browser, and the agent continues with the `document_id`. Works with any file size the API accepts.",
27616
29861
  "`file_url`: pass a publicly reachable URL; the Talonic API fetches it server-side. Best for files already on the public web.",
27617
29862
  "`document_id`: upload the file once via `app.talonic.com`, then reference the returned id. Best for sensitive files you don't want to expose publicly.",
27618
29863
  "Switch to a local stdio install (`npx -y @talonic/mcp@latest` in Claude Desktop, Cursor, Cline, etc.) \u2014 local stdio has no parameter-size cap and `file_data` works for any file size the API accepts."
27619
- ]
29864
+ ],
29865
+ text: ""
27620
29866
  },
27621
29867
  {
27622
29868
  type: "paragraph",
27623
- text: "The architectural fix that would unblock drag-and-drop through the Claude.ai connector is a pre-signed upload URL flow: a new MCP tool returns a one-time URL plus a reserved `document_id`, the user uploads from their browser directly to Talonic's storage, and the agent then calls `talonic_extract` with the `document_id`. This bypasses the connector's argument-size pipe entirely."
29869
+ text: "From `@talonic/mcp@0.1.7`, this is solved by the **`talonic_request_upload`** tool. The agent calls it to get a one-time upload URL plus a reserved `document_id`. The user opens the link in their browser and drops the file \u2014 no tool-call size cap, no sandbox restriction. The agent then polls `talonic_get_document` until the file is ready and proceeds with `talonic_extract` using the `document_id`."
27624
29870
  }
27625
29871
  ],
27626
29872
  related: [
27627
- { label: "talonic_extract", slug: "mcp-talonic-extract" },
27628
- { label: "Installation", slug: "mcp-installation" }
29873
+ {
29874
+ label: "talonic_extract",
29875
+ slug: "mcp-talonic-extract"
29876
+ },
29877
+ {
29878
+ label: "Installation",
29879
+ slug: "mcp-installation"
29880
+ },
29881
+ {
29882
+ label: "talonic_request_upload",
29883
+ slug: "mcp-talonic-request-upload"
29884
+ }
27629
29885
  ],
27630
29886
  faq: [
27631
29887
  {
@@ -27634,7 +29890,7 @@ var sections_default2 = [
27634
29890
  },
27635
29891
  {
27636
29892
  question: "Why does drag-and-drop fail on the Claude.ai connector?",
27637
- answer: "Claude.ai imposes a hard ~1 KB cap on tool-call argument values. A base64-encoded real PDF cannot fit, so file_data is truncated to a few hundred bytes before the MCP server receives it. Workarounds: file_url (public URL), document_id (upload via app.talonic.com first), or switch to a local stdio install."
29893
+ answer: "Claude.ai imposes a ~32 KB cap on tool-call argument values. A base64-encoded real PDF cannot fit. Use talonic_request_upload to get a browser upload link \u2014 the user drops the file in their browser, bypassing the cap entirely. Alternatives: file_url (public URL), document_id (upload via app.talonic.com first), or switch to a local stdio install."
27638
29894
  }
27639
29895
  ],
27640
29896
  mentions: [
@@ -27680,11 +29936,23 @@ var sections_default2 = [
27680
29936
  }
27681
29937
  ],
27682
29938
  related: [
27683
- { label: "Configuration", slug: "mcp-configuration" },
27684
- { label: "Introduction", slug: "mcp-introduction" }
29939
+ {
29940
+ label: "Configuration",
29941
+ slug: "mcp-configuration"
29942
+ },
29943
+ {
29944
+ label: "Introduction",
29945
+ slug: "mcp-introduction"
29946
+ }
27685
29947
  ],
27686
29948
  faq: [],
27687
- mentions: ["architecture", "stdio", "HTTP", "Streamable HTTP", "session"]
29949
+ mentions: [
29950
+ "architecture",
29951
+ "stdio",
29952
+ "HTTP",
29953
+ "Streamable HTTP",
29954
+ "session"
29955
+ ]
27688
29956
  },
27689
29957
  {
27690
29958
  slug: "mcp-configuration",
@@ -27693,8 +29961,15 @@ var sections_default2 = [
27693
29961
  seoTitle: "MCP Server Configuration \u2014 Talonic Docs",
27694
29962
  description: "Environment variables for the local MCP server and header options for the hosted server.",
27695
29963
  content: [
27696
- { type: "heading", level: 3, text: "Local server (env vars)" },
27697
- { type: "paragraph", text: "Set via the `env` block in your MCP client config:" },
29964
+ {
29965
+ type: "heading",
29966
+ level: 3,
29967
+ text: "Local server (env vars)"
29968
+ },
29969
+ {
29970
+ type: "paragraph",
29971
+ text: "Set via the `env` block in your MCP client config:"
29972
+ },
27698
29973
  {
27699
29974
  type: "param-table",
27700
29975
  title: "Environment variables",
@@ -27712,7 +29987,11 @@ var sections_default2 = [
27712
29987
  }
27713
29988
  ]
27714
29989
  },
27715
- { type: "heading", level: 3, text: "Hosted server (headers)" },
29990
+ {
29991
+ type: "heading",
29992
+ level: 3,
29993
+ text: "Hosted server (headers)"
29994
+ },
27716
29995
  {
27717
29996
  type: "paragraph",
27718
29997
  text: "The hosted server at `mcp.talonic.com` is configured entirely via the MCP client config:"
@@ -27728,11 +30007,22 @@ var sections_default2 = [
27728
30007
  }
27729
30008
  ],
27730
30009
  related: [
27731
- { label: "Installation", slug: "mcp-installation" },
27732
- { label: "Authentication", slug: "mcp-authentication" }
30010
+ {
30011
+ label: "Installation",
30012
+ slug: "mcp-installation"
30013
+ },
30014
+ {
30015
+ label: "Authentication",
30016
+ slug: "mcp-authentication"
30017
+ }
27733
30018
  ],
27734
30019
  faq: [],
27735
- mentions: ["TALONIC_API_KEY", "TALONIC_BASE_URL", "configuration", "headers"]
30020
+ mentions: [
30021
+ "TALONIC_API_KEY",
30022
+ "TALONIC_BASE_URL",
30023
+ "configuration",
30024
+ "headers"
30025
+ ]
27736
30026
  },
27737
30027
  {
27738
30028
  slug: "mcp-troubleshooting",
@@ -27821,7 +30111,12 @@ var sections_default2 = [
27821
30111
  type: "paragraph",
27822
30112
  text: "Extraction credit balance is exhausted. Free tier: 50 extractions/day, resets at midnight UTC. Upgrade plan or wait for reset."
27823
30113
  },
27824
- { type: "heading", level: 3, id: "ts-cached", text: "Tool descriptions look wrong" },
30114
+ {
30115
+ type: "heading",
30116
+ level: 3,
30117
+ id: "ts-cached",
30118
+ text: "Tool descriptions look wrong"
30119
+ },
27825
30120
  {
27826
30121
  type: "paragraph",
27827
30122
  text: "Some MCP clients cache tool descriptions. Restart the client after a server update."
@@ -27838,17 +30133,33 @@ var sections_default2 = [
27838
30133
  }
27839
30134
  ],
27840
30135
  related: [
27841
- { label: "Installation", slug: "mcp-installation" },
27842
- { label: "Configuration", slug: "mcp-configuration" },
27843
- { label: "Cost & Rate Limits", slug: "mcp-cost-and-limits" }
30136
+ {
30137
+ label: "Installation",
30138
+ slug: "mcp-installation"
30139
+ },
30140
+ {
30141
+ label: "Configuration",
30142
+ slug: "mcp-configuration"
30143
+ },
30144
+ {
30145
+ label: "Cost & Rate Limits",
30146
+ slug: "mcp-cost-and-limits"
30147
+ }
27844
30148
  ],
27845
30149
  faq: [],
27846
- mentions: ["troubleshooting", "debugging", "errors", "401", "402", "500"]
30150
+ mentions: [
30151
+ "troubleshooting",
30152
+ "debugging",
30153
+ "errors",
30154
+ "401",
30155
+ "402",
30156
+ "500"
30157
+ ]
27847
30158
  }
27848
30159
  ];
27849
30160
 
27850
30161
  // src/content/mcp/index.ts
27851
- var sections51 = sections_default2;
30162
+ var sections54 = sections_default2;
27852
30163
 
27853
30164
  // src/content/index.ts
27854
30165
  var ALL_PLATFORM_RAW = [
@@ -27902,10 +30213,13 @@ var ALL_API_RAW = [
27902
30213
  ...sections47,
27903
30214
  ...sections48,
27904
30215
  ...sections49,
30216
+ ...sections50,
30217
+ ...sections51,
30218
+ ...sections52,
27905
30219
  ...sections45
27906
30220
  ];
27907
- var ALL_SDK_RAW = [...sections50];
27908
- var ALL_MCP_RAW = [...sections51];
30221
+ var ALL_SDK_RAW = [...sections53];
30222
+ var ALL_MCP_RAW = [...sections54];
27909
30223
  function enrich(raw, navSections, domain) {
27910
30224
  return raw.map((r) => {
27911
30225
  const { prev, next } = derivePrevNext(navSections, r.slug);