@heripo/model 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -1
- package/dist/index.d.ts +38 -1
- package/package.json +2 -2
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["export type * from './docling-document';\nexport type * from './processed-document';\nexport type * from './token-usage-report';\nexport type * from './document-process-result';\n"],"mappings":";;;;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["export type * from './docling-document';\nexport type * from './hanja-assessment';\nexport type * from './processed-document';\nexport type * from './token-usage-report';\nexport type * from './document-process-result';\n"],"mappings":";;;;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
package/dist/index.d.cts
CHANGED
|
@@ -109,6 +109,43 @@ interface DoclingDocument {
|
|
|
109
109
|
pages: Record<string, DoclingPage>;
|
|
110
110
|
}
|
|
111
111
|
|
|
112
|
+
/**
|
|
113
|
+
* Result of Hanja (KCJ) quality assessment
|
|
114
|
+
*
|
|
115
|
+
* Evaluates OCR quality of Korean-Chinese-Japanese (KCJ/KCJ) characters
|
|
116
|
+
* in the document by sampling pages and comparing with Vision LLM.
|
|
117
|
+
*/
|
|
118
|
+
interface HanjaAssessment {
|
|
119
|
+
/**
|
|
120
|
+
* Whether the document should be re-parsed using VLM pipeline
|
|
121
|
+
* due to significant KCJ character corruption
|
|
122
|
+
*/
|
|
123
|
+
needsVlmReparse: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Severity of KCJ character corruption
|
|
126
|
+
* - 'none': No KCJ characters found or no corruption detected
|
|
127
|
+
* - 'minor': Some corruption but still usable
|
|
128
|
+
* - 'severe': Significant corruption requiring VLM re-parse
|
|
129
|
+
*/
|
|
130
|
+
severity: 'none' | 'minor' | 'severe';
|
|
131
|
+
/**
|
|
132
|
+
* Total number of text pages considered as candidates for assessment
|
|
133
|
+
*/
|
|
134
|
+
kcjPageCount: number;
|
|
135
|
+
/**
|
|
136
|
+
* Number of pages actually sampled for quality assessment
|
|
137
|
+
*/
|
|
138
|
+
sampledPageCount: number;
|
|
139
|
+
/**
|
|
140
|
+
* Ratio of corrupted characters (0.0 ~ 1.0)
|
|
141
|
+
*/
|
|
142
|
+
corruptedRatio: number;
|
|
143
|
+
/**
|
|
144
|
+
* Human-readable reason for the assessment result
|
|
145
|
+
*/
|
|
146
|
+
reason: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
112
149
|
/**
|
|
113
150
|
* Caption information
|
|
114
151
|
*
|
|
@@ -697,4 +734,4 @@ interface DocumentProcessResult {
|
|
|
697
734
|
usage: TokenUsageReport;
|
|
698
735
|
}
|
|
699
736
|
|
|
700
|
-
export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
|
|
737
|
+
export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, HanjaAssessment, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
|
package/dist/index.d.ts
CHANGED
|
@@ -109,6 +109,43 @@ interface DoclingDocument {
|
|
|
109
109
|
pages: Record<string, DoclingPage>;
|
|
110
110
|
}
|
|
111
111
|
|
|
112
|
+
/**
|
|
113
|
+
* Result of Hanja (KCJ) quality assessment
|
|
114
|
+
*
|
|
115
|
+
* Evaluates OCR quality of Korean-Chinese-Japanese (KCJ/KCJ) characters
|
|
116
|
+
* in the document by sampling pages and comparing with Vision LLM.
|
|
117
|
+
*/
|
|
118
|
+
interface HanjaAssessment {
|
|
119
|
+
/**
|
|
120
|
+
* Whether the document should be re-parsed using VLM pipeline
|
|
121
|
+
* due to significant KCJ character corruption
|
|
122
|
+
*/
|
|
123
|
+
needsVlmReparse: boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Severity of KCJ character corruption
|
|
126
|
+
* - 'none': No KCJ characters found or no corruption detected
|
|
127
|
+
* - 'minor': Some corruption but still usable
|
|
128
|
+
* - 'severe': Significant corruption requiring VLM re-parse
|
|
129
|
+
*/
|
|
130
|
+
severity: 'none' | 'minor' | 'severe';
|
|
131
|
+
/**
|
|
132
|
+
* Total number of text pages considered as candidates for assessment
|
|
133
|
+
*/
|
|
134
|
+
kcjPageCount: number;
|
|
135
|
+
/**
|
|
136
|
+
* Number of pages actually sampled for quality assessment
|
|
137
|
+
*/
|
|
138
|
+
sampledPageCount: number;
|
|
139
|
+
/**
|
|
140
|
+
* Ratio of corrupted characters (0.0 ~ 1.0)
|
|
141
|
+
*/
|
|
142
|
+
corruptedRatio: number;
|
|
143
|
+
/**
|
|
144
|
+
* Human-readable reason for the assessment result
|
|
145
|
+
*/
|
|
146
|
+
reason: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
112
149
|
/**
|
|
113
150
|
* Caption information
|
|
114
151
|
*
|
|
@@ -697,4 +734,4 @@ interface DocumentProcessResult {
|
|
|
697
734
|
usage: TokenUsageReport;
|
|
698
735
|
}
|
|
699
736
|
|
|
700
|
-
export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
|
|
737
|
+
export type { Caption, Chapter, ComponentUsageReport, DoclingBBox, DoclingBaseNode, DoclingBody, DoclingDocument, DoclingGroupItem, DoclingOrigin, DoclingPage, DoclingPageImage, DoclingPictureItem, DoclingProv, DoclingReference, DoclingTableCell, DoclingTableData, DoclingTableItem, DoclingTextItem, DocumentProcessResult, HanjaAssessment, ModelUsageDetail, PageRange, PhaseUsageReport, ProcessedDocument, ProcessedFootnote, ProcessedImage, ProcessedTable, ProcessedTableCell, TextBlock, TokenUsageReport, TokenUsageSummary };
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@heripo/model",
|
|
3
3
|
"private": false,
|
|
4
4
|
"type": "module",
|
|
5
|
-
"version": "0.1.
|
|
5
|
+
"version": "0.1.6",
|
|
6
6
|
"description": "Document models and type definitions for heripo engine",
|
|
7
7
|
"main": "dist/index.cjs",
|
|
8
8
|
"module": "dist/index.js",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"archaeology"
|
|
46
46
|
],
|
|
47
47
|
"engines": {
|
|
48
|
-
"node": ">=
|
|
48
|
+
"node": ">=24"
|
|
49
49
|
},
|
|
50
50
|
"publishConfig": {
|
|
51
51
|
"access": "public"
|