@heripo/model 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,700 @@
1
+ interface DoclingReference {
2
+ $ref: string;
3
+ }
4
+ interface DoclingBBox {
5
+ l: number;
6
+ t: number;
7
+ r: number;
8
+ b: number;
9
+ coord_origin: 'BOTTOMLEFT' | 'TOPLEFT' | string;
10
+ }
11
+ interface DoclingProv {
12
+ page_no: number;
13
+ bbox: DoclingBBox;
14
+ charspan: [number, number];
15
+ }
16
+ interface DoclingOrigin {
17
+ mimetype: string;
18
+ binary_hash: number;
19
+ filename: string;
20
+ }
21
+ interface DoclingBaseNode {
22
+ self_ref: string;
23
+ parent?: DoclingReference;
24
+ children: DoclingReference[];
25
+ content_layer: string;
26
+ label?: string;
27
+ }
28
+ interface DoclingTextItem extends DoclingBaseNode {
29
+ label: 'text' | 'section_header' | 'list_item' | 'footnote' | 'caption' | 'page_footer' | 'page_header' | string;
30
+ prov: DoclingProv[];
31
+ orig: string;
32
+ text: string;
33
+ level?: number;
34
+ enumerated?: boolean;
35
+ marker?: string;
36
+ }
37
+ interface DoclingGroupItem extends DoclingBaseNode {
38
+ name: 'list' | 'group' | string;
39
+ label: 'list' | 'key_value_area' | string;
40
+ }
41
+ interface DoclingPictureItem extends DoclingBaseNode {
42
+ label: 'picture' | string;
43
+ prov: DoclingProv[];
44
+ captions: DoclingReference[];
45
+ references: any[];
46
+ footnotes: any[];
47
+ annotations: any[];
48
+ }
49
+ interface DoclingTableCell {
50
+ bbox: DoclingBBox;
51
+ row_span: number;
52
+ col_span: number;
53
+ start_row_offset_idx: number;
54
+ end_row_offset_idx: number;
55
+ start_col_offset_idx: number;
56
+ end_col_offset_idx: number;
57
+ text: string;
58
+ column_header: boolean;
59
+ row_header: boolean;
60
+ row_section: boolean;
61
+ fillable: boolean;
62
+ }
63
+ interface DoclingTableData {
64
+ table_cells: DoclingTableCell[];
65
+ num_rows: number;
66
+ num_cols: number;
67
+ grid: DoclingTableCell[][];
68
+ }
69
+ interface DoclingTableItem extends DoclingBaseNode {
70
+ label: 'table' | 'document_index' | string;
71
+ prov: DoclingProv[];
72
+ captions: DoclingReference[];
73
+ references: any[];
74
+ footnotes: DoclingReference[];
75
+ data: DoclingTableData;
76
+ }
77
+ interface DoclingBody extends DoclingBaseNode {
78
+ name: '_root_' | string;
79
+ label: 'unspecified' | string;
80
+ }
81
+ interface DoclingPageImage {
82
+ mimetype: string;
83
+ dpi: number;
84
+ size: {
85
+ width: number;
86
+ height: number;
87
+ };
88
+ uri: string;
89
+ }
90
+ interface DoclingPage {
91
+ size: {
92
+ width: number;
93
+ height: number;
94
+ };
95
+ image: DoclingPageImage;
96
+ page_no: number;
97
+ }
98
+ interface DoclingDocument {
99
+ schema_name: 'DoclingDocument' | string;
100
+ version: string;
101
+ name: string;
102
+ origin: DoclingOrigin;
103
+ furniture: DoclingBody;
104
+ body: DoclingBody;
105
+ groups: DoclingGroupItem[];
106
+ texts: DoclingTextItem[];
107
+ pictures: DoclingPictureItem[];
108
+ tables: DoclingTableItem[];
109
+ pages: Record<string, DoclingPage>;
110
+ }
111
+
112
+ /**
113
+ * Caption information
114
+ *
115
+ * Represents captions for images, tables, etc.
116
+ * Includes number and full text.
117
+ *
118
+ * @interface Caption
119
+ */
120
+ interface Caption {
121
+ /**
122
+ * Caption prefix with number (optional)
123
+ *
124
+ * Extracted prefix with number from caption text, preserving original spacing.
125
+ * Example: "도판 1" from "도판 1 유적 전경", "Figure 2" from "Figure 2: Site overview"
126
+ * Set as optional to handle captions that start without a number.
127
+ *
128
+ * @type {string}
129
+ */
130
+ num?: string;
131
+ /**
132
+ * Full text of the caption
133
+ *
134
+ * Complete caption text including number and description
135
+ * Example: "도판 1 유적 전경", "Figure 2: Site overview", "Table 3-2. 유물 목록"
136
+ *
137
+ * @type {string}
138
+ */
139
+ fullText: string;
140
+ }
141
+ /**
142
+ * Page range of actual document contained in one PDF page
143
+ *
144
+ * In the case of PDF scans, multiple pages of the actual document may be
145
+ * contained in a single PDF page. (Example: A double-sided document scanned on one page)
146
+ *
147
+ * @interface PageRange
148
+ */
149
+ interface PageRange {
150
+ /**
151
+ * Starting page number in the actual document (inclusive)
152
+ * @type {number}
153
+ */
154
+ startPageNo: number;
155
+ /**
156
+ * Ending page number in the actual document (inclusive)
157
+ * @type {number}
158
+ */
159
+ endPageNo: number;
160
+ }
161
+ /**
162
+ * Text block (paragraph, sentence, etc.)
163
+ *
164
+ * Represents actual text content inside a chapter.
165
+ *
166
+ * @interface TextBlock
167
+ */
168
+ interface TextBlock {
169
+ /**
170
+ * Content of the text block
171
+ * @type {string}
172
+ */
173
+ text: string;
174
+ /**
175
+ * Page number in the PDF file
176
+ * @type {number}
177
+ */
178
+ pdfPageNo: number;
179
+ }
180
+ /**
181
+ * Chapter (section) of the document
182
+ *
183
+ * Represents the hierarchical structure of the document, with each item containing
184
+ * original title and cleaned title, actual page number, hierarchy depth, text content,
185
+ * images, tables, and child sections.
186
+ *
187
+ * @interface Chapter
188
+ */
189
+ interface Chapter {
190
+ /**
191
+ * Unique identifier of the chapter
192
+ *
193
+ * Used when referencing the chapter in images, tables, etc.
194
+ *
195
+ * @type {string}
196
+ */
197
+ id: string;
198
+ /**
199
+ * Title from the original report
200
+ * @type {string}
201
+ */
202
+ originTitle: string;
203
+ /**
204
+ * Chapter title (cleaned title)
205
+ * @type {string}
206
+ */
207
+ title: string;
208
+ /**
209
+ * Page number in the actual document (page where this chapter starts)
210
+ * @type {number}
211
+ */
212
+ pageNo: number;
213
+ /**
214
+ * Hierarchy depth of the section (1 = top-level, 2 = subsection, etc.)
215
+ * @type {number}
216
+ */
217
+ level: number;
218
+ /**
219
+ * Text blocks inside the chapter
220
+ *
221
+ * Stores all text content included in this chapter as an array.
222
+ * Each text block includes a PDF page number.
223
+ *
224
+ * @type {TextBlock[]}
225
+ */
226
+ textBlocks: TextBlock[];
227
+ /**
228
+ * List of image IDs included in the chapter
229
+ *
230
+ * Images can be found by ID in ProcessedDocument.images.
231
+ *
232
+ * @type {string[]}
233
+ */
234
+ imageIds: string[];
235
+ /**
236
+ * List of table IDs included in the chapter
237
+ *
238
+ * Tables can be found by ID in ProcessedDocument.tables.
239
+ *
240
+ * @type {string[]}
241
+ */
242
+ tableIds: string[];
243
+ /**
244
+ * List of footnote IDs included in the chapter
245
+ *
246
+ * Footnotes can be found by ID in ProcessedDocument.footnotes.
247
+ *
248
+ * @type {string[]}
249
+ */
250
+ footnoteIds: string[];
251
+ /**
252
+ * Child chapters (recursive structure)
253
+ * @type {Chapter[]}
254
+ */
255
+ children?: Chapter[];
256
+ }
257
+ /**
258
+ * Image information included in the processed PDF document
259
+ *
260
+ * Represents images extracted from the document and their metadata.
261
+ *
262
+ * @interface ProcessedImage
263
+ */
264
+ interface ProcessedImage {
265
+ /**
266
+ * Unique identifier of the image
267
+ *
268
+ * Used when referencing the image in chapters.
269
+ *
270
+ * @type {string}
271
+ */
272
+ id: string;
273
+ /**
274
+ * Caption information for the image (if available)
275
+ * @type {Caption}
276
+ */
277
+ caption?: Caption;
278
+ /**
279
+ * Page number in the PDF file (page where this image is located)
280
+ * @type {number}
281
+ */
282
+ pdfPageNo: number;
283
+ /**
284
+ * Path of the extracted image file
285
+ *
286
+ * Location of the image file saved as absolute or relative path
287
+ *
288
+ * @type {string}
289
+ */
290
+ path: string;
291
+ }
292
+ /**
293
+ * Cell information of a table
294
+ *
295
+ * @interface ProcessedTableCell
296
+ */
297
+ interface ProcessedTableCell {
298
+ /**
299
+ * Text content of the cell
300
+ * @type {string}
301
+ */
302
+ text: string;
303
+ /**
304
+ * Number of rows to span (default: 1)
305
+ * @type {number}
306
+ */
307
+ rowSpan: number;
308
+ /**
309
+ * Number of columns to span (default: 1)
310
+ * @type {number}
311
+ */
312
+ colSpan: number;
313
+ /**
314
+ * Whether the cell is a header cell (column or row header)
315
+ * @type {boolean}
316
+ */
317
+ isHeader: boolean;
318
+ }
319
+ /**
320
+ * Table information included in the processed PDF document
321
+ *
322
+ * Represents tables extracted from the document and their metadata.
323
+ * Structured data such as artifact lists, stratigraphy information, etc., are mainly provided in table form.
324
+ *
325
+ * @interface ProcessedTable
326
+ */
327
+ interface ProcessedTable {
328
+ /**
329
+ * Unique identifier of the table
330
+ *
331
+ * Used when referencing the table in chapters.
332
+ *
333
+ * @type {string}
334
+ */
335
+ id: string;
336
+ /**
337
+ * Caption information for the table (if available)
338
+ * @type {Caption}
339
+ */
340
+ caption?: Caption;
341
+ /**
342
+ * Page number in the PDF file (page where this table is located)
343
+ * @type {number}
344
+ */
345
+ pdfPageNo: number;
346
+ /**
347
+ * Number of rows in the table
348
+ * @type {number}
349
+ */
350
+ numRows: number;
351
+ /**
352
+ * Number of columns in the table
353
+ * @type {number}
354
+ */
355
+ numCols: number;
356
+ /**
357
+ * Table data (2D array)
358
+ *
359
+ * Access using grid[row][col].
360
+ *
361
+ * @type {ProcessedTableCell[][]}
362
+ */
363
+ grid: ProcessedTableCell[][];
364
+ }
365
+ /**
366
+ * Footnote information included in the processed PDF document
367
+ *
368
+ * Represents footnotes extracted from the document and their metadata.
369
+ * Footnotes provide supplementary information referenced in the main text.
370
+ *
371
+ * @interface ProcessedFootnote
372
+ */
373
+ interface ProcessedFootnote {
374
+ /**
375
+ * Unique identifier of the footnote
376
+ *
377
+ * Used when referencing the footnote in chapters.
378
+ *
379
+ * @type {string}
380
+ */
381
+ id: string;
382
+ /**
383
+ * Text content of the footnote
384
+ *
385
+ * @type {string}
386
+ */
387
+ text: string;
388
+ /**
389
+ * Page number in the PDF file (page where this footnote is located)
390
+ * @type {number}
391
+ */
392
+ pdfPageNo: number;
393
+ }
394
+ /**
395
+ * Processed PDF document model
396
+ *
397
+ * An intermediate model that has been cleaned and structured to efficiently deliver
398
+ * the original document extracted from Docling for LLM analysis.
399
+ *
400
+ * @interface ProcessedDocument
401
+ */
402
+ interface ProcessedDocument {
403
+ /**
404
+ * Unique identifier of the report
405
+ * @type {string}
406
+ */
407
+ reportId: string;
408
+ /**
409
+ * Mapping of page ranges for actual document pages per PDF page
410
+ *
411
+ * When multiple pages of the actual document are contained in a single PDF page,
412
+ * this map tracks which actual pages are included in each PDF page.
413
+ *
414
+ * @type {Record<number, PageRange>}
415
+ *
416
+ * @example
417
+ * ```typescript
418
+ * {
419
+ * 1: { startPageNo: 1, endPageNo: 1 }, // PDF 1 = actual 1
420
+ * 2: { startPageNo: 2, endPageNo: 3 }, // PDF 2 = actual 2~3 (double-sided)
421
+ * 3: { startPageNo: 4, endPageNo: 4 }, // PDF 3 = actual 4
422
+ * }
423
+ * ```
424
+ */
425
+ pageRangeMap: Record<number, PageRange>;
426
+ /**
427
+ * Chapter structure of the document (hierarchical)
428
+ *
429
+ * Represents all chapters of the document in a hierarchical structure, where each chapter
430
+ * contains title, page information, text content, and child chapters.
431
+ *
432
+ * @type {Chapter[]}
433
+ *
434
+ * @example
435
+ * ```typescript
436
+ * [
437
+ * {
438
+ * originTitle: ' Chapter 1 Introduction ',
439
+ * title: 'Chapter 1 Introduction',
440
+ * pageNo: 1,
441
+ * level: 1,
442
+ * textBlocks: [
443
+ * {
444
+ * text: 'This chapter describes the background of the excavation project.',
445
+ * pdfPageNo: 1
446
+ * },
447
+ * {
448
+ * text: 'The site is located in the central region of the peninsula.',
449
+ * pdfPageNo: 2
450
+ * }
451
+ * ],
452
+ * children: [
453
+ * {
454
+ * originTitle: '1.1 Background',
455
+ * title: '1.1 Background',
456
+ * pageNo: 1,
457
+ * level: 2,
458
+ * textBlocks: [
459
+ * {
460
+ * text: 'The archaeological significance of the region...',
461
+ * pdfPageNo: 1
462
+ * }
463
+ * ]
464
+ * },
465
+ * {
466
+ * originTitle: '1.2 Objectives',
467
+ * title: '1.2 Objectives',
468
+ * pageNo: 3,
469
+ * level: 2,
470
+ * textBlocks: [
471
+ * {
472
+ * text: 'The main objectives of this survey are...',
473
+ * pdfPageNo: 3
474
+ * }
475
+ * ]
476
+ * }
477
+ * ]
478
+ * },
479
+ * {
480
+ * originTitle: 'Chapter 2 Methodology',
481
+ * title: 'Chapter 2 Methodology',
482
+ * pageNo: 5,
483
+ * level: 1,
484
+ * textBlocks: [
485
+ * {
486
+ * text: 'This chapter describes the survey methodology.',
487
+ * pdfPageNo: 5
488
+ * }
489
+ * ]
490
+ * }
491
+ * ]
492
+ * ```
493
+ */
494
+ chapters: Chapter[];
495
+ /**
496
+ * Images included in the document
497
+ *
498
+ * A list of extracted images, where each image includes unique ID, caption, PDF page number,
499
+ * and file path. Referenced through imageIds in chapters.
500
+ *
501
+ * @type {ProcessedImage[]}
502
+ */
503
+ images: ProcessedImage[];
504
+ /**
505
+ * Tables included in the document
506
+ *
507
+ * A list of extracted tables containing structured data such as artifact lists, stratigraphy information, etc.
508
+ * Referenced through tableIds in chapters.
509
+ *
510
+ * @type {ProcessedTable[]}
511
+ */
512
+ tables: ProcessedTable[];
513
+ /**
514
+ * Footnotes included in the document
515
+ *
516
+ * A list of extracted footnotes providing supplementary information.
517
+ * Referenced through footnoteIds in chapters.
518
+ *
519
+ * @type {ProcessedFootnote[]}
520
+ */
521
+ footnotes: ProcessedFootnote[];
522
+ }
523
+
524
+ /**
525
+ * Token usage report types for document processing
526
+ *
527
+ * Provides structured types for tracking and reporting LLM token consumption
528
+ * across document processing pipeline, with detailed breakdown by component,
529
+ * phase, and model type (primary vs fallback).
530
+ */
531
+ /**
532
+ * Detailed token usage report for document processing
533
+ *
534
+ * Contains comprehensive breakdown of token usage across all components
535
+ * and phases of the processing pipeline.
536
+ */
537
+ interface TokenUsageReport {
538
+ /**
539
+ * Breakdown by component
540
+ *
541
+ * Array of ComponentUsageReport for each component that performed LLM calls.
542
+ * Components are ordered by the order they appear in the processing pipeline.
543
+ */
544
+ components: ComponentUsageReport[];
545
+ /**
546
+ * Grand total across all components and phases
547
+ *
548
+ * Sum of all input tokens, output tokens, and total tokens from all components.
549
+ */
550
+ total: TokenUsageSummary;
551
+ }
552
+ /**
553
+ * Token usage for a specific component
554
+ *
555
+ * Examples: PageRangeParser, TocExtractor, CaptionParser, CaptionValidator, etc.
556
+ */
557
+ interface ComponentUsageReport {
558
+ /**
559
+ * Component name
560
+ *
561
+ * Examples: 'PageRangeParser', 'TocExtractor', 'TocContentValidator',
562
+ * 'CaptionParser', 'CaptionValidator', 'VisionTocExtractor'
563
+ */
564
+ component: string;
565
+ /**
566
+ * Breakdown by phase within this component
567
+ *
568
+ * Array of PhaseUsageReport for each phase executed by this component.
569
+ * A component may have multiple phases (e.g., extraction, validation, sampling).
570
+ */
571
+ phases: PhaseUsageReport[];
572
+ /**
573
+ * Total usage for this component
574
+ *
575
+ * Sum of all phases within this component.
576
+ */
577
+ total: TokenUsageSummary;
578
+ }
579
+ /**
580
+ * Token usage for a specific phase
581
+ *
582
+ * Examples: extraction, validation, sampling, caption-extraction
583
+ *
584
+ * A phase may use both primary and fallback models if primary fails and fallback retry is configured.
585
+ */
586
+ interface PhaseUsageReport {
587
+ /**
588
+ * Phase name
589
+ *
590
+ * Examples: 'extraction', 'validation', 'sampling', 'caption-extraction'
591
+ *
592
+ * Phase names are set by the component performing the LLM call.
593
+ */
594
+ phase: string;
595
+ /**
596
+ * Usage by primary model (if any)
597
+ *
598
+ * Present if the primary model was attempted and succeeded.
599
+ * Absent if primary model was never attempted or failed.
600
+ *
601
+ * When fallback retry is enabled and primary fails, primary usage data
602
+ * is not recorded (only the successful fallback attempt is recorded).
603
+ */
604
+ primary?: ModelUsageDetail;
605
+ /**
606
+ * Usage by fallback model (if any)
607
+ *
608
+ * Present if the fallback model was used after primary failure.
609
+ * Only present when primaryModel failed and fallbackModel was available.
610
+ */
611
+ fallback?: ModelUsageDetail;
612
+ /**
613
+ * Total usage for this phase
614
+ *
615
+ * Sum of primary usage and fallback usage if both are present.
616
+ * If only primary or only fallback is present, equals that model's usage.
617
+ */
618
+ total: TokenUsageSummary;
619
+ }
620
+ /**
621
+ * Detailed usage for a specific model
622
+ *
623
+ * Contains the exact token counts for a model used in a specific phase.
624
+ */
625
+ interface ModelUsageDetail {
626
+ /**
627
+ * Model identifier
628
+ *
629
+ * Examples: 'gpt-5', 'gpt-5-mini', 'claude-opus-4-5-20251101',
630
+ * 'claude-opus-4-5', 'claude-sonnet-4-20250514'
631
+ */
632
+ modelName: string;
633
+ /**
634
+ * Number of input tokens consumed
635
+ *
636
+ * Tokens in the prompt (system + user input).
637
+ */
638
+ inputTokens: number;
639
+ /**
640
+ * Number of output tokens consumed
641
+ *
642
+ * Tokens in the model's response.
643
+ */
644
+ outputTokens: number;
645
+ /**
646
+ * Total tokens
647
+ *
648
+ * Always equals inputTokens + outputTokens.
649
+ */
650
+ totalTokens: number;
651
+ }
652
+ /**
653
+ * Summary of token usage
654
+ *
655
+ * Minimal representation of token counts for aggregation and reporting.
656
+ */
657
+ interface TokenUsageSummary {
658
+ /**
659
+ * Total input tokens
660
+ */
661
+ inputTokens: number;
662
+ /**
663
+ * Total output tokens
664
+ */
665
+ outputTokens: number;
666
+ /**
667
+ * Total tokens (input + output)
668
+ */
669
+ totalTokens: number;
670
+ }
671
+
672
+ /**
673
+ * Result type for document processing operation
674
+ *
675
+ * Contains both the processed document and detailed token usage information.
676
+ */
677
+
678
+ /**
679
+ * Complete result of document processing
680
+ *
681
+ * Combines the processed document output with comprehensive token usage tracking.
682
+ */
683
+ interface DocumentProcessResult {
684
+ /**
685
+ * The processed document
686
+ *
687
+ * Contains the structured document with text blocks, chapters, images, tables,
688
+ * and page range mapping, optimized for LLM analysis.
689
+ */
690
+ document: ProcessedDocument;
691
+ /**
692
+ * Token usage report for the processing operation
693
+ *
694
+ * Detailed breakdown of LLM token consumption by component, phase, and model type.
695
+ * Includes information about fallback model usage when primary models fail.
696
+ */
697
+ usage: TokenUsageReport;
698
+ }
699
+
700
+ export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}