npm - @heripo/model - Versions diffs - 0.1.0 - Mend

@heripo/model 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,700 @@
+interface DoclingReference {
+    $ref: string;
+}
+interface DoclingBBox {
+    l: number;
+    t: number;
+    r: number;
+    b: number;
+    coord_origin: 'BOTTOMLEFT' | 'TOPLEFT' | string;
+}
+interface DoclingProv {
+    page_no: number;
+    bbox: DoclingBBox;
+    charspan: [number, number];
+}
+interface DoclingOrigin {
+    mimetype: string;
+    binary_hash: number;
+    filename: string;
+}
+interface DoclingBaseNode {
+    self_ref: string;
+    parent?: DoclingReference;
+    children: DoclingReference[];
+    content_layer: string;
+    label?: string;
+}
+interface DoclingTextItem extends DoclingBaseNode {
+    label: 'text' | 'section_header' | 'list_item' | 'footnote' | 'caption' | 'page_footer' | 'page_header' | string;
+    prov: DoclingProv[];
+    orig: string;
+    text: string;
+    level?: number;
+    enumerated?: boolean;
+    marker?: string;
+}
+interface DoclingGroupItem extends DoclingBaseNode {
+    name: 'list' | 'group' | string;
+    label: 'list' | 'key_value_area' | string;
+}
+interface DoclingPictureItem extends DoclingBaseNode {
+    label: 'picture' | string;
+    prov: DoclingProv[];
+    captions: DoclingReference[];
+    references: any[];
+    footnotes: any[];
+    annotations: any[];
+}
+interface DoclingTableCell {
+    bbox: DoclingBBox;
+    row_span: number;
+    col_span: number;
+    start_row_offset_idx: number;
+    end_row_offset_idx: number;
+    start_col_offset_idx: number;
+    end_col_offset_idx: number;
+    text: string;
+    column_header: boolean;
+    row_header: boolean;
+    row_section: boolean;
+    fillable: boolean;
+}
+interface DoclingTableData {
+    table_cells: DoclingTableCell[];
+    num_rows: number;
+    num_cols: number;
+    grid: DoclingTableCell[][];
+}
+interface DoclingTableItem extends DoclingBaseNode {
+    label: 'table' | 'document_index' | string;
+    prov: DoclingProv[];
+    captions: DoclingReference[];
+    references: any[];
+    footnotes: DoclingReference[];
+    data: DoclingTableData;
+}
+interface DoclingBody extends DoclingBaseNode {
+    name: '_root_' | string;
+    label: 'unspecified' | string;
+}
+interface DoclingPageImage {
+    mimetype: string;
+    dpi: number;
+    size: {
+        width: number;
+        height: number;
+    };
+    uri: string;
+}
+interface DoclingPage {
+    size: {
+        width: number;
+        height: number;
+    };
+    image: DoclingPageImage;
+    page_no: number;
+}
+interface DoclingDocument {
+    schema_name: 'DoclingDocument' | string;
+    version: string;
+    name: string;
+    origin: DoclingOrigin;
+    furniture: DoclingBody;
+    body: DoclingBody;
+    groups: DoclingGroupItem[];
+    texts: DoclingTextItem[];
+    pictures: DoclingPictureItem[];
+    tables: DoclingTableItem[];
+    pages: Record<string, DoclingPage>;
+}
+/**
+ * Caption information
+ *
+ * Represents captions for images, tables, etc.
+ * Includes number and full text.
+ *
+ * @interface Caption
+ */
+interface Caption {
+    /**
+     * Caption prefix with number (optional)
+     *
+     * Extracted prefix with number from caption text, preserving original spacing.
+     * Example: "도판 1" from "도판 1 유적 전경", "Figure 2" from "Figure 2: Site overview"
+     * Set as optional to handle captions that start without a number.
+     *
+     * @type {string}
+     */
+    num?: string;
+    /**
+     * Full text of the caption
+     *
+     * Complete caption text including number and description
+     * Example: "도판 1 유적 전경", "Figure 2: Site overview", "Table 3-2. 유물 목록"
+     *
+     * @type {string}
+     */
+    fullText: string;
+}
+/**
+ * Page range of actual document contained in one PDF page
+ *
+ * In the case of PDF scans, multiple pages of the actual document may be
+ * contained in a single PDF page. (Example: A double-sided document scanned on one page)
+ *
+ * @interface PageRange
+ */
+interface PageRange {
+    /**
+     * Starting page number in the actual document (inclusive)
+     * @type {number}
+     */
+    startPageNo: number;
+    /**
+     * Ending page number in the actual document (inclusive)
+     * @type {number}
+     */
+    endPageNo: number;
+}
+/**
+ * Text block (paragraph, sentence, etc.)
+ *
+ * Represents actual text content inside a chapter.
+ *
+ * @interface TextBlock
+ */
+interface TextBlock {
+    /**
+     * Content of the text block
+     * @type {string}
+     */
+    text: string;
+    /**
+     * Page number in the PDF file
+     * @type {number}
+     */
+    pdfPageNo: number;
+}
+/**
+ * Chapter (section) of the document
+ *
+ * Represents the hierarchical structure of the document, with each item containing
+ * original title and cleaned title, actual page number, hierarchy depth, text content,
+ * images, tables, and child sections.
+ *
+ * @interface Chapter
+ */
+interface Chapter {
+    /**
+     * Unique identifier of the chapter
+     *
+     * Used when referencing the chapter in images, tables, etc.
+     *
+     * @type {string}
+     */
+    id: string;
+    /**
+     * Title from the original report
+     * @type {string}
+     */
+    originTitle: string;
+    /**
+     * Chapter title (cleaned title)
+     * @type {string}
+     */
+    title: string;
+    /**
+     * Page number in the actual document (page where this chapter starts)
+     * @type {number}
+     */
+    pageNo: number;
+    /**
+     * Hierarchy depth of the section (1 = top-level, 2 = subsection, etc.)
+     * @type {number}
+     */
+    level: number;
+    /**
+     * Text blocks inside the chapter
+     *
+     * Stores all text content included in this chapter as an array.
+     * Each text block includes a PDF page number.
+     *
+     * @type {TextBlock[]}
+     */
+    textBlocks: TextBlock[];
+    /**
+     * List of image IDs included in the chapter
+     *
+     * Images can be found by ID in ProcessedDocument.images.
+     *
+     * @type {string[]}
+     */
+    imageIds: string[];
+    /**
+     * List of table IDs included in the chapter
+     *
+     * Tables can be found by ID in ProcessedDocument.tables.
+     *
+     * @type {string[]}
+     */
+    tableIds: string[];
+    /**
+     * List of footnote IDs included in the chapter
+     *
+     * Footnotes can be found by ID in ProcessedDocument.footnotes.
+     *
+     * @type {string[]}
+     */
+    footnoteIds: string[];
+    /**
+     * Child chapters (recursive structure)
+     * @type {Chapter[]}
+     */
+    children?: Chapter[];
+}
+/**
+ * Image information included in the processed PDF document
+ *
+ * Represents images extracted from the document and their metadata.
+ *
+ * @interface ProcessedImage
+ */
+interface ProcessedImage {
+    /**
+     * Unique identifier of the image
+     *
+     * Used when referencing the image in chapters.
+     *
+     * @type {string}
+     */
+    id: string;
+    /**
+     * Caption information for the image (if available)
+     * @type {Caption}
+     */
+    caption?: Caption;
+    /**
+     * Page number in the PDF file (page where this image is located)
+     * @type {number}
+     */
+    pdfPageNo: number;
+    /**
+     * Path of the extracted image file
+     *
+     * Location of the image file saved as absolute or relative path
+     *
+     * @type {string}
+     */
+    path: string;
+}
+/**
+ * Cell information of a table
+ *
+ * @interface ProcessedTableCell
+ */
+interface ProcessedTableCell {
+    /**
+     * Text content of the cell
+     * @type {string}
+     */
+    text: string;
+    /**
+     * Number of rows to span (default: 1)
+     * @type {number}
+     */
+    rowSpan: number;
+    /**
+     * Number of columns to span (default: 1)
+     * @type {number}
+     */
+    colSpan: number;
+    /**
+     * Whether the cell is a header cell (column or row header)
+     * @type {boolean}
+     */
+    isHeader: boolean;
+}
+/**
+ * Table information included in the processed PDF document
+ *
+ * Represents tables extracted from the document and their metadata.
+ * Structured data such as artifact lists, stratigraphy information, etc., are mainly provided in table form.
+ *
+ * @interface ProcessedTable
+ */
+interface ProcessedTable {
+    /**
+     * Unique identifier of the table
+     *
+     * Used when referencing the table in chapters.
+     *
+     * @type {string}
+     */
+    id: string;
+    /**
+     * Caption information for the table (if available)
+     * @type {Caption}
+     */
+    caption?: Caption;
+    /**
+     * Page number in the PDF file (page where this table is located)
+     * @type {number}
+     */
+    pdfPageNo: number;
+    /**
+     * Number of rows in the table
+     * @type {number}
+     */
+    numRows: number;
+    /**
+     * Number of columns in the table
+     * @type {number}
+     */
+    numCols: number;
+    /**
+     * Table data (2D array)
+     *
+     * Access using grid[row][col].
+     *
+     * @type {ProcessedTableCell[][]}
+     */
+    grid: ProcessedTableCell[][];
+}
+/**
+ * Footnote information included in the processed PDF document
+ *
+ * Represents footnotes extracted from the document and their metadata.
+ * Footnotes provide supplementary information referenced in the main text.
+ *
+ * @interface ProcessedFootnote
+ */
+interface ProcessedFootnote {
+    /**
+     * Unique identifier of the footnote
+     *
+     * Used when referencing the footnote in chapters.
+     *
+     * @type {string}
+     */
+    id: string;
+    /**
+     * Text content of the footnote
+     *
+     * @type {string}
+     */
+    text: string;
+    /**
+     * Page number in the PDF file (page where this footnote is located)
+     * @type {number}
+     */
+    pdfPageNo: number;
+}
+/**
+ * Processed PDF document model
+ *
+ * An intermediate model that has been cleaned and structured to efficiently deliver
+ * the original document extracted from Docling for LLM analysis.
+ *
+ * @interface ProcessedDocument
+ */
+interface ProcessedDocument {
+    /**
+     * Unique identifier of the report
+     * @type {string}
+     */
+    reportId: string;
+    /**
+     * Mapping of page ranges for actual document pages per PDF page
+     *
+     * When multiple pages of the actual document are contained in a single PDF page,
+     * this map tracks which actual pages are included in each PDF page.
+     *
+     * @type {Record<number, PageRange>}
+     *
+     * @example
+     * ```typescript
+     * {
+     *   1: { startPageNo: 1, endPageNo: 1 },     // PDF 1 = actual 1
+     *   2: { startPageNo: 2, endPageNo: 3 },     // PDF 2 = actual 2~3 (double-sided)
+     *   3: { startPageNo: 4, endPageNo: 4 },     // PDF 3 = actual 4
+     * }
+     * ```
+     */
+    pageRangeMap: Record<number, PageRange>;
+    /**
+     * Chapter structure of the document (hierarchical)
+     *
+     * Represents all chapters of the document in a hierarchical structure, where each chapter
+     * contains title, page information, text content, and child chapters.
+     *
+     * @type {Chapter[]}
+     *
+     * @example
+     * ```typescript
+     * [
+     *   {
+     *     originTitle: '  Chapter 1  Introduction  ',
+     *     title: 'Chapter 1 Introduction',
+     *     pageNo: 1,
+     *     level: 1,
+     *     textBlocks: [
+     *       {
+     *         text: 'This chapter describes the background of the excavation project.',
+     *         pdfPageNo: 1
+     *       },
+     *       {
+     *         text: 'The site is located in the central region of the peninsula.',
+     *         pdfPageNo: 2
+     *       }
+     *     ],
+     *     children: [
+     *       {
+     *         originTitle: '1.1 Background',
+     *         title: '1.1 Background',
+     *         pageNo: 1,
+     *         level: 2,
+     *         textBlocks: [
+     *           {
+     *             text: 'The archaeological significance of the region...',
+     *             pdfPageNo: 1
+     *           }
+     *         ]
+     *       },
+     *       {
+     *         originTitle: '1.2 Objectives',
+     *         title: '1.2 Objectives',
+     *         pageNo: 3,
+     *         level: 2,
+     *         textBlocks: [
+     *           {
+     *             text: 'The main objectives of this survey are...',
+     *             pdfPageNo: 3
+     *           }
+     *         ]
+     *       }
+     *     ]
+     *   },
+     *   {
+     *     originTitle: 'Chapter 2 Methodology',
+     *     title: 'Chapter 2 Methodology',
+     *     pageNo: 5,
+     *     level: 1,
+     *     textBlocks: [
+     *       {
+     *         text: 'This chapter describes the survey methodology.',
+     *         pdfPageNo: 5
+     *       }
+     *     ]
+     *   }
+     * ]
+     * ```
+     */
+    chapters: Chapter[];
+    /**
+     * Images included in the document
+     *
+     * A list of extracted images, where each image includes unique ID, caption, PDF page number,
+     * and file path. Referenced through imageIds in chapters.
+     *
+     * @type {ProcessedImage[]}
+     */
+    images: ProcessedImage[];
+    /**
+     * Tables included in the document
+     *
+     * A list of extracted tables containing structured data such as artifact lists, stratigraphy information, etc.
+     * Referenced through tableIds in chapters.
+     *
+     * @type {ProcessedTable[]}
+     */
+    tables: ProcessedTable[];
+    /**
+     * Footnotes included in the document
+     *
+     * A list of extracted footnotes providing supplementary information.
+     * Referenced through footnoteIds in chapters.
+     *
+     * @type {ProcessedFootnote[]}
+     */
+    footnotes: ProcessedFootnote[];
+}
+/**
+ * Token usage report types for document processing
+ *
+ * Provides structured types for tracking and reporting LLM token consumption
+ * across document processing pipeline, with detailed breakdown by component,
+ * phase, and model type (primary vs fallback).
+ */
+/**
+ * Detailed token usage report for document processing
+ *
+ * Contains comprehensive breakdown of token usage across all components
+ * and phases of the processing pipeline.
+ */
+interface TokenUsageReport {
+    /**
+     * Breakdown by component
+     *
+     * Array of ComponentUsageReport for each component that performed LLM calls.
+     * Components are ordered by the order they appear in the processing pipeline.
+     */
+    components: ComponentUsageReport[];
+    /**
+     * Grand total across all components and phases
+     *
+     * Sum of all input tokens, output tokens, and total tokens from all components.
+     */
+    total: TokenUsageSummary;
+}
+/**
+ * Token usage for a specific component
+ *
+ * Examples: PageRangeParser, TocExtractor, CaptionParser, CaptionValidator, etc.
+ */
+interface ComponentUsageReport {
+    /**
+     * Component name
+     *
+     * Examples: 'PageRangeParser', 'TocExtractor', 'TocContentValidator',
+     *           'CaptionParser', 'CaptionValidator', 'VisionTocExtractor'
+     */
+    component: string;
+    /**
+     * Breakdown by phase within this component
+     *
+     * Array of PhaseUsageReport for each phase executed by this component.
+     * A component may have multiple phases (e.g., extraction, validation, sampling).
+     */
+    phases: PhaseUsageReport[];
+    /**
+     * Total usage for this component
+     *
+     * Sum of all phases within this component.
+     */
+    total: TokenUsageSummary;
+}
+/**
+ * Token usage for a specific phase
+ *
+ * Examples: extraction, validation, sampling, caption-extraction
+ *
+ * A phase may use both primary and fallback models if primary fails and fallback retry is configured.
+ */
+interface PhaseUsageReport {
+    /**
+     * Phase name
+     *
+     * Examples: 'extraction', 'validation', 'sampling', 'caption-extraction'
+     *
+     * Phase names are set by the component performing the LLM call.
+     */
+    phase: string;
+    /**
+     * Usage by primary model (if any)
+     *
+     * Present if the primary model was attempted and succeeded.
+     * Absent if primary model was never attempted or failed.
+     *
+     * When fallback retry is enabled and primary fails, primary usage data
+     * is not recorded (only the successful fallback attempt is recorded).
+     */
+    primary?: ModelUsageDetail;
+    /**
+     * Usage by fallback model (if any)
+     *
+     * Present if the fallback model was used after primary failure.
+     * Only present when primaryModel failed and fallbackModel was available.
+     */
+    fallback?: ModelUsageDetail;
+    /**
+     * Total usage for this phase
+     *
+     * Sum of primary usage and fallback usage if both are present.
+     * If only primary or only fallback is present, equals that model's usage.
+     */
+    total: TokenUsageSummary;
+}
+/**
+ * Detailed usage for a specific model
+ *
+ * Contains the exact token counts for a model used in a specific phase.
+ */
+interface ModelUsageDetail {
+    /**
+     * Model identifier
+     *
+     * Examples: 'gpt-5', 'gpt-5-mini', 'claude-opus-4-5-20251101',
+     *           'claude-opus-4-5', 'claude-sonnet-4-20250514'
+     */
+    modelName: string;
+    /**
+     * Number of input tokens consumed
+     *
+     * Tokens in the prompt (system + user input).
+     */
+    inputTokens: number;
+    /**
+     * Number of output tokens consumed
+     *
+     * Tokens in the model's response.
+     */
+    outputTokens: number;
+    /**
+     * Total tokens
+     *
+     * Always equals inputTokens + outputTokens.
+     */
+    totalTokens: number;
+}
+/**
+ * Summary of token usage
+ *
+ * Minimal representation of token counts for aggregation and reporting.
+ */
+interface TokenUsageSummary {
+    /**
+     * Total input tokens
+     */
+    inputTokens: number;
+    /**
+     * Total output tokens
+     */
+    outputTokens: number;
+    /**
+     * Total tokens (input + output)
+     */
+    totalTokens: number;
+}
+/**
+ * Result type for document processing operation
+ *
+ * Contains both the processed document and detailed token usage information.
+ */
+/**
+ * Complete result of document processing
+ *
+ * Combines the processed document output with comprehensive token usage tracking.
+ */
+interface DocumentProcessResult {
+    /**
+     * The processed document
+     *
+     * Contains the structured document with text blocks, chapters, images, tables,
+     * and page range mapping, optimized for LLM analysis.
+     */
+    document: ProcessedDocument;
+    /**
+     * Token usage report for the processing operation
+     *
+     * Detailed breakdown of LLM token consumption by component, phase, and model type.
+     * Includes information about fallback model usage when primary models fail.
+     */
+    usage: TokenUsageReport;
+}
+export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };