npm - pulse-ts-sdk - Versions diffs - 1.0.7 → 1.0.9 - Mend

pulse-ts-sdk 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/dist/esm/api/types/ExtractOptions.d.mts CHANGED Viewed

@@ -10,6 +10,8 @@ export interface ExtractOptions {
     figureProcessing?: ExtractOptions.FigureProcessing;
     /** Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`. */
     extensions?: ExtractOptions.Extensions;
+    /** Settings for Excel/spreadsheet extraction. Controls handling of hidden rows, columns, and sheets. Only applies to `.xlsx` and `.xls` files. Accepts both camelCase and snake_case field names. */
+    spreadsheet?: ExtractOptions.Spreadsheet;
     /** Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created. */
     storage?: ExtractOptions.Storage;
     /** If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously. */
@@ -56,8 +58,6 @@ export declare namespace ExtractOptions {
      * Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`.
      */
     interface Extensions {
-        /** Merge tables that span multiple pages into a single table. */
-        mergeTables?: boolean;
         /** Link footnote markers to their corresponding footnote text. */
         footnoteReferences?: boolean;
         /** Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`. */
@@ -99,6 +99,17 @@ export declare namespace ExtractOptions {
             returnXml?: boolean;
         }
     }
+    /**
+     * Settings for Excel/spreadsheet extraction. Controls handling of hidden rows, columns, and sheets. Only applies to `.xlsx` and `.xls` files. Accepts both camelCase and snake_case field names.
+     */
+    interface Spreadsheet {
+        /** Include rows that are hidden in the Excel workbook. */
+        includeHiddenRows?: boolean;
+        /** Include columns that are hidden in the Excel workbook. */
+        includeHiddenCols?: boolean;
+        /** Include sheets that are hidden in the Excel workbook. */
+        includeHiddenSheets?: boolean;
+    }
     /**
      * Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
      */

package/dist/esm/api/types/ExtractResponse.d.mts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type * as Pulse from "../index.mjs";
 /**
- * Full extraction result returned by the synchronous `/extract` endpoint. Contains the extracted markdown, optional extensions output, bounding boxes, and storage metadata.
+ * Full extraction result returned by the synchronous `/extract` endpoint. Inherits all core fields and adds deprecated backward-compatibility fields.
  */
 export interface ExtractResponse {
     /** Primary markdown content extracted from the document. Always present in the new format. */
@@ -19,6 +19,8 @@ export interface ExtractResponse {
     plan_info?: ExtractResponse.PlanInfo;
     /** Non-fatal warnings generated during extraction. Includes deprecation notices when legacy input parameters are used, as well as processing warnings (e.g. word-level bounding box limitations). */
     warnings?: string[];
+    /** Number of credits consumed by this request. Only present when the organization has the credit billing system enabled. */
+    credits_used?: number | null;
     /** **Deprecated** — Use `extensions.altOutputs.html` instead. HTML representation of the extracted content. Present when the legacy `returnHtml` input was used. */
     html?: string;
     /** **Deprecated** — Use `extensions.chunking` instead. Document content split into chunks. Present when the legacy `chunking` input was used. */
@@ -45,8 +47,6 @@ export declare namespace ExtractResponse {
     interface Extensions {
         /** Chunk results by strategy. Present when `extensions.chunking` was provided in the request. */
         chunking?: Extensions.Chunking;
-        /** Merge tables result/metadata. Present when `extensions.mergeTables` was enabled. */
-        mergeTables?: Record<string, unknown>;
         /** List of detected footnotes with their in-text references. Present when `extensions.footnoteReferences` was enabled. Each item links a footnote paragraph to the body-text paragraphs that reference it, using bounding-box text IDs. */
         footnoteReferences?: Extensions.FootnoteReferences.Item[];
         /** Alternate output formats. Each key corresponds to an enabled alt output. */

package/dist/esm/api/types/ExtractResultCore.d.mts ADDED Viewed

@@ -0,0 +1,112 @@
+/**
+ * Core extraction result fields shared by the synchronous `/extract` endpoint and the pipeline extract step.
+ */
+export interface ExtractResultCore {
+    /** Primary markdown content extracted from the document. Always present in the new format. */
+    markdown?: string;
+    /** Output from enabled extensions. Each key corresponds to an extension that was enabled in the request under `extensions.*`. Only keys for enabled extensions are present. */
+    extensions?: ExtractResultCore.Extensions;
+    /** Positional bounding-box data for text, titles, headers, footers, images, and tables. Used by the frontend for annotation overlays. */
+    bounding_boxes?: Record<string, unknown>;
+    /** Persisted extraction ID. Present when storage is enabled (default). Use this ID with `/split` and `/schema` endpoints. */
+    extraction_id?: string;
+    /** URL to view the extraction on the Pulse platform. Present when storage is enabled. */
+    extraction_url?: string;
+    /** Number of pages processed. */
+    page_count?: number;
+    /** Billing tier and usage information. */
+    plan_info?: ExtractResultCore.PlanInfo;
+    /** Non-fatal warnings generated during extraction. Includes deprecation notices when legacy input parameters are used, as well as processing warnings (e.g. word-level bounding box limitations). */
+    warnings?: string[];
+    /** Number of credits consumed by this request. Only present when the organization has the credit billing system enabled. */
+    credits_used?: number | null;
+}
+export declare namespace ExtractResultCore {
+    /**
+     * Output from enabled extensions. Each key corresponds to an extension that was enabled in the request under `extensions.*`. Only keys for enabled extensions are present.
+     */
+    interface Extensions {
+        /** Chunk results by strategy. Present when `extensions.chunking` was provided in the request. */
+        chunking?: Extensions.Chunking;
+        /** List of detected footnotes with their in-text references. Present when `extensions.footnoteReferences` was enabled. Each item links a footnote paragraph to the body-text paragraphs that reference it, using bounding-box text IDs. */
+        footnoteReferences?: Extensions.FootnoteReferences.Item[];
+        /** Alternate output formats. Each key corresponds to an enabled alt output. */
+        altOutputs?: Extensions.AltOutputs;
+    }
+    namespace Extensions {
+        /**
+         * Chunk results by strategy. Present when `extensions.chunking` was provided in the request.
+         */
+        interface Chunking {
+            /** Semantically-segmented chunks. */
+            semantic?: string[];
+            /** Chunks split by document headers/headings. */
+            header?: string[];
+            /** One chunk per page. */
+            page?: string[];
+            /** Recursively-split chunks respecting size limits. */
+            recursive?: string[];
+        }
+        type FootnoteReferences = FootnoteReferences.Item[];
+        namespace FootnoteReferences {
+            interface Item {
+                /** The footnote marker symbol (e.g. "*", "†", "1", "#"). */
+                symbol?: string;
+                /** The bounding-box text ID (e.g. "txt-15") of the footnote explanation paragraph. */
+                footnoteTextId?: string;
+                /** Bounding-box text IDs of body-text paragraphs that contain a reference to this footnote marker. */
+                referenceTextIds?: string[];
+            }
+        }
+        /**
+         * Alternate output formats. Each key corresponds to an enabled alt output.
+         */
+        interface AltOutputs {
+            /** Word-level bounding box data. Present when `extensions.altOutputs.wlbb` was enabled and input is a PDF. */
+            wlbb?: AltOutputs.Wlbb;
+            /** HTML representation of the document. Present when `extensions.altOutputs.returnHtml` was enabled. */
+            html?: string;
+            /** XML representation of the document. Present when `extensions.altOutputs.returnXml` was enabled. (WIP) */
+            xml?: string;
+        }
+        namespace AltOutputs {
+            /**
+             * Word-level bounding box data. Present when `extensions.altOutputs.wlbb` was enabled and input is a PDF.
+             */
+            interface Wlbb {
+                /** List of detected words with their positions. */
+                words?: Wlbb.Words.Item[];
+                /** Error message if word-level extraction failed. */
+                error?: string;
+            }
+            namespace Wlbb {
+                type Words = Words.Item[];
+                namespace Words {
+                    interface Item {
+                        /** Unique identifier for the word (e.g. "w-1", "w-2", …). */
+                        id?: string;
+                        /** The recognised word text. */
+                        text?: string;
+                        /** 1-indexed page number in the original document. */
+                        page_number?: number;
+                        /** Flat 4-corner polygon: [x1,y1, x2,y2, x3,y3, x4,y4]. All coordinates normalised to 0–1 range. */
+                        bounding_box?: number[];
+                        /** Recognition confidence score (0–1). */
+                        average_word_confidence?: number;
+                    }
+                }
+            }
+        }
+    }
+    /**
+     * Billing tier and usage information.
+     */
+    interface PlanInfo {
+        /** Current plan tier name. */
+        tier?: string;
+        /** Cumulative pages used after this extraction. */
+        pages_used?: number;
+        /** Human-readable plan note. */
+        note?: string;
+    }
+}

package/dist/esm/api/types/ExtractResultCore.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/GoneErrorBody.d.mts ADDED Viewed

@@ -0,0 +1,9 @@
+export interface GoneErrorBody {
+    error?: GoneErrorBody.Error_;
+}
+export declare namespace GoneErrorBody {
+    interface Error_ {
+        code?: string;
+        message?: string;
+    }
+}

package/dist/esm/api/types/GoneErrorBody.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/JobStatusResponse.d.mts CHANGED Viewed

@@ -10,7 +10,7 @@ export interface JobStatusResponse {
     created_at: string;
     /** Timestamp of the last status update, if available. */
     updated_at?: string;
-    /** Structured payload that contains output when the job is completed. */
+    /** Structured payload returned when the job completes.  For large extractions (70+ pages) this object contains `is_url: true` and a single-use `url` to download the full result via `GET /large_results/{jobId}`. */
     result?: Record<string, unknown>;
     /** Error message describing why the job failed, if applicable. */
     error?: string;

package/dist/esm/api/types/PipelineBatchExtractResult.d.mts ADDED Viewed

@@ -0,0 +1,13 @@
+/**
+ * Result from the batch_extract step in a pipeline.
+ */
+export interface PipelineBatchExtractResult {
+    /** List of extraction IDs for each processed file. */
+    extraction_ids?: string[];
+    /** Number of files processed. */
+    total_files?: number;
+    /** Step execution time in seconds. */
+    latency?: number;
+    /** Accepts any additional properties */
+    [key: string]: any;
+}

package/dist/esm/api/types/PipelineBatchExtractResult.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/PipelineExecuteMultipartInput.d.mts ADDED Viewed

@@ -0,0 +1,13 @@
+/**
+ * Multipart form-data request body for ad-hoc pipeline execution. Upload file(s) directly and provide step configs as a JSON string.
+ */
+export interface PipelineExecuteMultipartInput {
+    /** Document file to upload. For batch_extract, multiple `file` fields can be provided. */
+    file?: string;
+    /** JSON string containing the step configurations (same structure as `PipelineSteps`). */
+    steps: string;
+    /** Set to `"true"` for async execution. */
+    async?: string;
+    /** Set to `"true"` for zero-retention mode. */
+    auto_delete?: string;
+}

package/dist/esm/api/types/PipelineExecuteMultipartInput.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/PipelineExecuteResponse.d.mts ADDED Viewed

@@ -0,0 +1,21 @@
+import type * as Pulse from "../index.mjs";
+/**
+ * Synchronous pipeline execution result. Contains the status, total latency, and results from each step.
+ */
+export interface PipelineExecuteResponse {
+    /** Pipeline execution status. */
+    status: PipelineExecuteResponse.Status;
+    /** Total pipeline execution time in seconds. */
+    total_latency?: number;
+    /** Per-step results. */
+    results: Pulse.PipelineResults;
+    /** Present and true when auto-delete mode was used. Confirms that artifacts have been purged. */
+    auto_delete?: boolean;
+}
+export declare namespace PipelineExecuteResponse {
+    /** Pipeline execution status. */
+    const Status: {
+        readonly Completed: "completed";
+    };
+    type Status = (typeof Status)[keyof typeof Status];
+}

package/dist/esm/api/types/PipelineExecuteResponse.mjs ADDED Viewed

@@ -0,0 +1,8 @@
+// This file was auto-generated by Fern from our API Definition.
+export var PipelineExecuteResponse;
+(function (PipelineExecuteResponse) {
+    /** Pipeline execution status. */
+    PipelineExecuteResponse.Status = {
+        Completed: "completed",
+    };
+})(PipelineExecuteResponse || (PipelineExecuteResponse = {}));

package/dist/esm/api/types/PipelineExtractResult.d.mts ADDED Viewed

@@ -0,0 +1,118 @@
+/**
+ * Result from the extract step in a pipeline. Inherits all core extraction fields and adds pipeline-specific timing.
+ */
+export interface PipelineExtractResult {
+    /** Primary markdown content extracted from the document. Always present in the new format. */
+    markdown?: string;
+    /** Output from enabled extensions. Each key corresponds to an extension that was enabled in the request under `extensions.*`. Only keys for enabled extensions are present. */
+    extensions?: PipelineExtractResult.Extensions;
+    /** Positional bounding-box data for text, titles, headers, footers, images, and tables. Used by the frontend for annotation overlays. */
+    bounding_boxes?: Record<string, unknown>;
+    /** Persisted extraction ID. Present when storage is enabled (default). Use this ID with `/split` and `/schema` endpoints. */
+    extraction_id?: string;
+    /** URL to view the extraction on the Pulse platform. Present when storage is enabled. */
+    extraction_url?: string;
+    /** Number of pages processed. */
+    page_count?: number;
+    /** Billing tier and usage information. */
+    plan_info?: PipelineExtractResult.PlanInfo;
+    /** Non-fatal warnings generated during extraction. Includes deprecation notices when legacy input parameters are used, as well as processing warnings (e.g. word-level bounding box limitations). */
+    warnings?: string[];
+    /** Number of credits consumed by this request. Only present when the organization has the credit billing system enabled. */
+    credits_used?: number | null;
+    /** Number of pages processed. Convenience alias for `page_count`. */
+    pages?: number;
+    /** Step execution time in seconds. */
+    latency?: number;
+    /** Accepts any additional properties */
+    [key: string]: any;
+}
+export declare namespace PipelineExtractResult {
+    /**
+     * Output from enabled extensions. Each key corresponds to an extension that was enabled in the request under `extensions.*`. Only keys for enabled extensions are present.
+     */
+    interface Extensions {
+        /** Chunk results by strategy. Present when `extensions.chunking` was provided in the request. */
+        chunking?: Extensions.Chunking;
+        /** List of detected footnotes with their in-text references. Present when `extensions.footnoteReferences` was enabled. Each item links a footnote paragraph to the body-text paragraphs that reference it, using bounding-box text IDs. */
+        footnoteReferences?: Extensions.FootnoteReferences.Item[];
+        /** Alternate output formats. Each key corresponds to an enabled alt output. */
+        altOutputs?: Extensions.AltOutputs;
+    }
+    namespace Extensions {
+        /**
+         * Chunk results by strategy. Present when `extensions.chunking` was provided in the request.
+         */
+        interface Chunking {
+            /** Semantically-segmented chunks. */
+            semantic?: string[];
+            /** Chunks split by document headers/headings. */
+            header?: string[];
+            /** One chunk per page. */
+            page?: string[];
+            /** Recursively-split chunks respecting size limits. */
+            recursive?: string[];
+        }
+        type FootnoteReferences = FootnoteReferences.Item[];
+        namespace FootnoteReferences {
+            interface Item {
+                /** The footnote marker symbol (e.g. "*", "†", "1", "#"). */
+                symbol?: string;
+                /** The bounding-box text ID (e.g. "txt-15") of the footnote explanation paragraph. */
+                footnoteTextId?: string;
+                /** Bounding-box text IDs of body-text paragraphs that contain a reference to this footnote marker. */
+                referenceTextIds?: string[];
+            }
+        }
+        /**
+         * Alternate output formats. Each key corresponds to an enabled alt output.
+         */
+        interface AltOutputs {
+            /** Word-level bounding box data. Present when `extensions.altOutputs.wlbb` was enabled and input is a PDF. */
+            wlbb?: AltOutputs.Wlbb;
+            /** HTML representation of the document. Present when `extensions.altOutputs.returnHtml` was enabled. */
+            html?: string;
+            /** XML representation of the document. Present when `extensions.altOutputs.returnXml` was enabled. (WIP) */
+            xml?: string;
+        }
+        namespace AltOutputs {
+            /**
+             * Word-level bounding box data. Present when `extensions.altOutputs.wlbb` was enabled and input is a PDF.
+             */
+            interface Wlbb {
+                /** List of detected words with their positions. */
+                words?: Wlbb.Words.Item[];
+                /** Error message if word-level extraction failed. */
+                error?: string;
+            }
+            namespace Wlbb {
+                type Words = Words.Item[];
+                namespace Words {
+                    interface Item {
+                        /** Unique identifier for the word (e.g. "w-1", "w-2", …). */
+                        id?: string;
+                        /** The recognised word text. */
+                        text?: string;
+                        /** 1-indexed page number in the original document. */
+                        page_number?: number;
+                        /** Flat 4-corner polygon: [x1,y1, x2,y2, x3,y3, x4,y4]. All coordinates normalised to 0–1 range. */
+                        bounding_box?: number[];
+                        /** Recognition confidence score (0–1). */
+                        average_word_confidence?: number;
+                    }
+                }
+            }
+        }
+    }
+    /**
+     * Billing tier and usage information.
+     */
+    interface PlanInfo {
+        /** Current plan tier name. */
+        tier?: string;
+        /** Cumulative pages used after this extraction. */
+        pages_used?: number;
+        /** Human-readable plan note. */
+        note?: string;
+    }
+}

package/dist/esm/api/types/PipelineExtractResult.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/PipelineResults.d.mts ADDED Viewed

@@ -0,0 +1,14 @@
+import type * as Pulse from "../index.mjs";
+/**
+ * Results from each pipeline step, keyed by step name. Only steps that were executed are present.
+ */
+export interface PipelineResults {
+    /** Extract step result. */
+    extract?: Pulse.PipelineExtractResult;
+    /** Batch extract step result. */
+    batch_extract?: Pulse.PipelineBatchExtractResult;
+    /** Schema step result. */
+    schema?: Pulse.PipelineSchemaResult;
+    /** Split step result. */
+    split?: Pulse.PipelineSplitResult;
+}

package/dist/esm/api/types/PipelineResults.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/PipelineSchemaResult.d.mts ADDED Viewed

@@ -0,0 +1,14 @@
+import type * as Pulse from "../index.mjs";
+/**
+ * Result from the schema step in a pipeline.
+ */
+export interface PipelineSchemaResult {
+    /** Persisted schema version ID. */
+    schema_id?: string;
+    /** Extracted values and citations. */
+    schema_output?: Pulse.StructuredOutputResult;
+    /** Step execution time in seconds. */
+    latency?: number;
+    /** Accepts any additional properties */
+    [key: string]: any;
+}

package/dist/esm/api/types/PipelineSchemaResult.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/PipelineSplitResult.d.mts ADDED Viewed

@@ -0,0 +1,18 @@
+import type * as Pulse from "../index.mjs";
+/**
+ * Result from the split step in a pipeline. Inherits all core split fields and adds pipeline-specific timing and input echo.
+ */
+export interface PipelineSplitResult {
+    /** Unique identifier for this split result. Use this ID with the `/schema` endpoint (split mode) to apply schemas to specific page groups. */
+    split_id?: string;
+    /** Page assignments per topic. */
+    split_output?: Pulse.SplitOutput;
+    /** Number of credits consumed by this request. Only present when the organization has the credit billing system enabled. */
+    credits_used?: number | null;
+    /** Echo of the topic definitions used. */
+    split_input?: Pulse.TopicDefinition[];
+    /** Step execution time in seconds. */
+    latency?: number;
+    /** Accepts any additional properties */
+    [key: string]: any;
+}

package/dist/esm/api/types/PipelineSplitResult.mjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ // This file was auto-generated by Fern from our API Definition.
2	+ export {};

package/dist/esm/api/types/PipelineStepBatchExtractConfig.d.mts ADDED Viewed

@@ -0,0 +1,146 @@
+import type * as Pulse from "../index.mjs";
+/**
+ * Configuration for the batch_extract step. Same extract options as `POST /extract` (applied to every file) plus file inputs and a `workers` field for parallelism. Provide files via `file_urls` (URL list) or `files` (base64 inline uploads).
+ */
+export interface PipelineStepBatchExtractConfig {
+    /** Extraction model to use. When set to `enterprise-preview`, routes the request through Pulse's self-hosted VPC extraction model instead of the default cloud-based service. If omitted or set to any other value, the default model is used. */
+    model?: PipelineStepBatchExtractConfig.Model;
+    /** Page range filter supporting segments such as `1-2` or mixed ranges like `1-2,5`. */
+    pages?: string;
+    /** Settings that control how figures in the document are processed. These affect the markdown output directly (e.g. figure descriptions, chart-to-table conversion, image embedding) and do not produce additional output fields in the response. */
+    figureProcessing?: PipelineStepBatchExtractConfig.FigureProcessing;
+    /** Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`. */
+    extensions?: PipelineStepBatchExtractConfig.Extensions;
+    /** Settings for Excel/spreadsheet extraction. Controls handling of hidden rows, columns, and sheets. Only applies to `.xlsx` and `.xls` files. Accepts both camelCase and snake_case field names. */
+    spreadsheet?: PipelineStepBatchExtractConfig.Spreadsheet;
+    /** Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created. */
+    storage?: PipelineStepBatchExtractConfig.Storage;
+    /** If true, returns immediately with a job_id for polling via GET /job/{jobId}. Otherwise processes synchronously. */
+    async?: boolean;
+    /** **⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version. */
+    structuredOutput?: PipelineStepBatchExtractConfig.StructuredOutput;
+    /** (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation. */
+    schema?: PipelineStepBatchExtractConfig.Schema;
+    /** (Deprecated) Natural language prompt for schema-guided extraction. Use structuredOutput.schemaPrompt instead. */
+    schemaPrompt?: string;
+    /** (Deprecated) Custom instructions that augment the default extraction behaviour. Use `figureProcessing` or `extensions` instead. */
+    customPrompt?: string;
+    /** **⚠️ DEPRECATED** — Use `extensions.chunking.chunkTypes` instead. Comma-separated list of chunking strategies to apply (for example `semantic,header,page,recursive`). Still accepted for backward compatibility. */
+    chunking?: string;
+    /** **⚠️ DEPRECATED** — Use `extensions.chunking.chunkSize` instead. Override for maximum characters per chunk when chunking is enabled. */
+    chunkSize?: number;
+    /** **⚠️ DEPRECATED** — Toggle to enable figure extraction in results. */
+    extractFigure?: boolean;
+    /** **⚠️ DEPRECATED** — Use `figureProcessing.description` instead. Toggle to generate descriptive captions for extracted figures. */
+    figureDescription?: boolean;
+    /** **⚠️ DEPRECATED** — Use `figureProcessing.showImages` instead. Embed base64-encoded images inline in figure tags in the output. Increases response size. */
+    showImages?: boolean;
+    /** **⚠️ DEPRECATED** — Use `extensions.altOutputs.returnHtml` instead. Whether to include HTML representation alongside markdown in the response. */
+    returnHtml?: boolean;
+    /** (Deprecated) Enables expanded rationale output for debugging. */
+    thinking?: boolean;
+    /** List of document URLs to process in parallel. */
+    file_urls?: string[];
+    /** Inline file uploads. Each entry contains a filename and base64-encoded file content. Use instead of `file_urls` when you have local files. */
+    files?: Pulse.BatchFileUpload[];
+    /** Number of parallel workers. */
+    workers?: number;
+}
+export declare namespace PipelineStepBatchExtractConfig {
+    /** Extraction model to use. When set to `enterprise-preview`, routes the request through Pulse's self-hosted VPC extraction model instead of the default cloud-based service. If omitted or set to any other value, the default model is used. */
+    const Model: {
+        readonly EnterprisePreview: "enterprise-preview";
+    };
+    type Model = (typeof Model)[keyof typeof Model];
+    /**
+     * Settings that control how figures in the document are processed. These affect the markdown output directly (e.g. figure descriptions, chart-to-table conversion, image embedding) and do not produce additional output fields in the response.
+     */
+    interface FigureProcessing {
+        /** Generate descriptive captions for extracted figures. */
+        description?: boolean;
+        /** Embed base64-encoded images inline in figure tags in the output. Increases response size. */
+        showImages?: boolean;
+    }
+    /**
+     * Settings that enable additional processing passes or alternate output formats. Each enabled extension produces a corresponding output field under `response.extensions.*`.
+     */
+    interface Extensions {
+        /** Link footnote markers to their corresponding footnote text. */
+        footnoteReferences?: boolean;
+        /** Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`. */
+        chunking?: Extensions.Chunking;
+        /** Alternate output format options. Each enabled format produces a corresponding field under `response.extensions.altOutputs`. */
+        altOutputs?: Extensions.AltOutputs;
+    }
+    namespace Extensions {
+        /**
+         * Chunking configuration. When provided, the document is split into chunks using the specified strategies. Results appear in `response.extensions.chunking`.
+         */
+        interface Chunking {
+            /** List of chunking strategies to apply (e.g. `["semantic", "header", "page", "recursive"]`). */
+            chunkTypes?: Chunking.ChunkTypes.Item[];
+            /** Maximum characters per chunk. */
+            chunkSize?: number;
+        }
+        namespace Chunking {
+            type ChunkTypes = ChunkTypes.Item[];
+            namespace ChunkTypes {
+                const Item: {
+                    readonly Semantic: "semantic";
+                    readonly Header: "header";
+                    readonly Page: "page";
+                    readonly Recursive: "recursive";
+                };
+                type Item = (typeof Item)[keyof typeof Item];
+            }
+        }
+        /**
+         * Alternate output format options. Each enabled format produces a corresponding field under `response.extensions.altOutputs`.
+         */
+        interface AltOutputs {
+            /** Enable word-level bounding boxes. Runs an additional OCR model to derive bounding boxes for each word. Only applies to PDFs. Results in `response.extensions.altOutputs.wlbb`. */
+            wlbb?: boolean;
+            /** Include an HTML representation of the document. When enabled, `response.markdown` is still present and the HTML is available at `response.extensions.altOutputs.html`. */
+            returnHtml?: boolean;
+            /** Include an XML representation of the document. Results in `response.extensions.altOutputs.xml`. (Work in progress.) */
+            returnXml?: boolean;
+        }
+    }
+    /**
+     * Settings for Excel/spreadsheet extraction. Controls handling of hidden rows, columns, and sheets. Only applies to `.xlsx` and `.xls` files. Accepts both camelCase and snake_case field names.
+     */
+    interface Spreadsheet {
+        /** Include rows that are hidden in the Excel workbook. */
+        includeHiddenRows?: boolean;
+        /** Include columns that are hidden in the Excel workbook. */
+        includeHiddenCols?: boolean;
+        /** Include sheets that are hidden in the Excel workbook. */
+        includeHiddenSheets?: boolean;
+    }
+    /**
+     * Options for persisting extraction artifacts. When enabled (default), artifacts are saved to storage and a database record is created.
+     */
+    interface Storage {
+        /** Whether to persist extraction artifacts. Set to false for temporary extractions with no storage or database record. */
+        enabled?: boolean;
+        /** Target folder name to save the extraction to. Creates the folder if it doesn't exist. */
+        folderName?: string;
+        /** Target folder ID to save the extraction to. Takes precedence over folderName if both are provided. */
+        folderId?: string;
+    }
+    /**
+     * **⚠️ DEPRECATED** — Use the `/schema` endpoint after extraction instead. Pass the `extraction_id` from the extract response to `/schema` with your `schema_config`. This parameter still works for backward compatibility but will be removed in a future version.
+     */
+    interface StructuredOutput {
+        /** JSON schema describing the structured data to extract. */
+        schema?: Record<string, unknown>;
+        /** Natural language prompt with additional extraction instructions. */
+        schemaPrompt?: string;
+        /** Use higher quality model for better results. When true, uses a more capable model at the cost of higher latency. */
+        effort?: boolean;
+    }
+    /**
+     * (Deprecated) JSON schema describing structured data to extract. Use structuredOutput instead. Accepts either a JSON object or a stringified JSON representation.
+     */
+    type Schema = Record<string, unknown> | string;
+}

package/dist/esm/api/types/PipelineStepBatchExtractConfig.mjs ADDED Viewed

@@ -0,0 +1,23 @@
+// This file was auto-generated by Fern from our API Definition.
+export var PipelineStepBatchExtractConfig;
+(function (PipelineStepBatchExtractConfig) {
+    /** Extraction model to use. When set to `enterprise-preview`, routes the request through Pulse's self-hosted VPC extraction model instead of the default cloud-based service. If omitted or set to any other value, the default model is used. */
+    PipelineStepBatchExtractConfig.Model = {
+        EnterprisePreview: "enterprise-preview",
+    };
+    let Extensions;
+    (function (Extensions) {
+        let Chunking;
+        (function (Chunking) {
+            let ChunkTypes;
+            (function (ChunkTypes) {
+                ChunkTypes.Item = {
+                    Semantic: "semantic",
+                    Header: "header",
+                    Page: "page",
+                    Recursive: "recursive",
+                };
+            })(ChunkTypes = Chunking.ChunkTypes || (Chunking.ChunkTypes = {}));
+        })(Chunking = Extensions.Chunking || (Extensions.Chunking = {}));
+    })(Extensions = PipelineStepBatchExtractConfig.Extensions || (PipelineStepBatchExtractConfig.Extensions = {}));
+})(PipelineStepBatchExtractConfig || (PipelineStepBatchExtractConfig = {}));