@heripo/model 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["export type * from './docling-document';\nexport type * from './processed-document';\nexport type * from './token-usage-report';\nexport type * from './document-process-result';\n"],"mappings":";;;;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["export type * from './docling-document';\nexport type * from './hanja-assessment';\nexport type * from './processed-document';\nexport type * from './token-usage-report';\nexport type * from './document-process-result';\n"],"mappings":";;;;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
package/dist/index.d.cts CHANGED
@@ -109,6 +109,43 @@ interface DoclingDocument {
109
109
  pages: Record<string, DoclingPage>;
110
110
  }
111
111
 
112
+ /**
113
+ * Result of Hanja (KCJ) quality assessment
114
+ *
115
+ * Evaluates OCR quality of Korean-Chinese-Japanese (KCJ/KCJ) characters
116
+ * in the document by sampling pages and comparing with Vision LLM.
117
+ */
118
+ interface HanjaAssessment {
119
+ /**
120
+ * Whether the document should be re-parsed using VLM pipeline
121
+ * due to significant KCJ character corruption
122
+ */
123
+ needsVlmReparse: boolean;
124
+ /**
125
+ * Severity of KCJ character corruption
126
+ * - 'none': No KCJ characters found or no corruption detected
127
+ * - 'minor': Some corruption but still usable
128
+ * - 'severe': Significant corruption requiring VLM re-parse
129
+ */
130
+ severity: 'none' | 'minor' | 'severe';
131
+ /**
132
+ * Total number of text pages considered as candidates for assessment
133
+ */
134
+ kcjPageCount: number;
135
+ /**
136
+ * Number of pages actually sampled for quality assessment
137
+ */
138
+ sampledPageCount: number;
139
+ /**
140
+ * Ratio of corrupted characters (0.0 ~ 1.0)
141
+ */
142
+ corruptedRatio: number;
143
+ /**
144
+ * Human-readable reason for the assessment result
145
+ */
146
+ reason: string;
147
+ }
148
+
112
149
  /**
113
150
  * Caption information
114
151
  *
@@ -697,4 +734,4 @@ interface DocumentProcessResult {
697
734
  usage: TokenUsageReport;
698
735
  }
699
736
 
700
- export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
737
+ export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, HanjaAssessment, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
package/dist/index.d.ts CHANGED
@@ -109,6 +109,43 @@ interface DoclingDocument {
109
109
  pages: Record<string, DoclingPage>;
110
110
  }
111
111
 
112
+ /**
113
+ * Result of Hanja (KCJ) quality assessment
114
+ *
115
+ * Evaluates OCR quality of Korean-Chinese-Japanese (KCJ/KCJ) characters
116
+ * in the document by sampling pages and comparing with Vision LLM.
117
+ */
118
+ interface HanjaAssessment {
119
+ /**
120
+ * Whether the document should be re-parsed using VLM pipeline
121
+ * due to significant KCJ character corruption
122
+ */
123
+ needsVlmReparse: boolean;
124
+ /**
125
+ * Severity of KCJ character corruption
126
+ * - 'none': No KCJ characters found or no corruption detected
127
+ * - 'minor': Some corruption but still usable
128
+ * - 'severe': Significant corruption requiring VLM re-parse
129
+ */
130
+ severity: 'none' | 'minor' | 'severe';
131
+ /**
132
+ * Total number of text pages considered as candidates for assessment
133
+ */
134
+ kcjPageCount: number;
135
+ /**
136
+ * Number of pages actually sampled for quality assessment
137
+ */
138
+ sampledPageCount: number;
139
+ /**
140
+ * Ratio of corrupted characters (0.0 ~ 1.0)
141
+ */
142
+ corruptedRatio: number;
143
+ /**
144
+ * Human-readable reason for the assessment result
145
+ */
146
+ reason: string;
147
+ }
148
+
112
149
  /**
113
150
  * Caption information
114
151
  *
@@ -697,4 +734,4 @@ interface DocumentProcessResult {
697
734
  usage: TokenUsageReport;
698
735
  }
699
736
 
700
- export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
737
+ export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, HanjaAssessment, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@heripo/model",
3
3
  "private": false,
4
4
  "type": "module",
5
- "version": "0.1.4",
5
+ "version": "0.1.6",
6
6
  "description": "Document models and type definitions for heripo engine",
7
7
  "main": "dist/index.cjs",
8
8
  "module": "dist/index.js",
@@ -45,7 +45,7 @@
45
45
  "archaeology"
46
46
  ],
47
47
  "engines": {
48
- "node": ">=22"
48
+ "node": ">=24"
49
49
  },
50
50
  "publishConfig": {
51
51
  "access": "public"