@clazic/kordoc 2.4.11 → 2.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/dist/{chunk-PJSXZBZB.js → chunk-5R37N6KE.js} +19 -4
- package/dist/chunk-5R37N6KE.js.map +1 -0
- package/dist/chunk-I6YC6ZGK.js +219 -0
- package/dist/chunk-I6YC6ZGK.js.map +1 -0
- package/dist/{chunk-JGMLDBW5.js → chunk-UX75CBUO.js} +680 -301
- package/dist/chunk-UX75CBUO.js.map +1 -0
- package/dist/cli.js +68 -8
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1700 -329
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +122 -1
- package/dist/index.d.ts +122 -1
- package/dist/index.js +1678 -310
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +11 -2
- package/dist/mcp.js.map +1 -1
- package/dist/{provider-PYZL2VNN.js → provider-T2D5XRTI.js} +30 -2
- package/dist/provider-T2D5XRTI.js.map +1 -0
- package/dist/{resolve-4I65IGMM.js → resolve-673XFZQ6.js} +18 -1
- package/dist/resolve-673XFZQ6.js.map +1 -0
- package/dist/{utils-HKVOS2O3.js → utils-XLLXVB7V.js} +4 -2
- package/dist/{watch-EYOGF3HY.js → watch-3MTAXFEA.js} +4 -3
- package/dist/{watch-EYOGF3HY.js.map → watch-3MTAXFEA.js.map} +1 -1
- package/package.json +2 -1
- package/dist/chunk-JGMLDBW5.js.map +0 -1
- package/dist/chunk-PJSXZBZB.js.map +0 -1
- package/dist/provider-PYZL2VNN.js.map +0 -1
- package/dist/resolve-4I65IGMM.js.map +0 -1
- /package/dist/{utils-HKVOS2O3.js.map → utils-XLLXVB7V.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -125,6 +125,18 @@ interface ParseOptions {
|
|
|
125
125
|
ocrBatchSize?: number;
|
|
126
126
|
/** PDF 머리글/바닥글 자동 제거 */
|
|
127
127
|
removeHeaderFooter?: boolean;
|
|
128
|
+
/**
|
|
129
|
+
* 부분 파싱 실패 허용 비율 (0~1).
|
|
130
|
+
* - undefined: 실패 페이지가 있어도 경고만 기록하고 계속 진행
|
|
131
|
+
* - 예: 0.3이면 대상 페이지의 30%를 초과해 실패 시 오류로 중단
|
|
132
|
+
*/
|
|
133
|
+
maxPartialFailureRatio?: number;
|
|
134
|
+
/**
|
|
135
|
+
* PDF 텍스트 정규화 모드.
|
|
136
|
+
* - "default": 줄 병합/균등배분 공백 보정 적용 (기본값)
|
|
137
|
+
* - "strict-preserve": 원문 줄바꿈/공백 보존 우선 (페이지 번호 제거만 수행)
|
|
138
|
+
*/
|
|
139
|
+
pdfTextNormalization?: "default" | "strict-preserve";
|
|
128
140
|
}
|
|
129
141
|
/** 파싱 중 스킵/실패한 요소 보고 */
|
|
130
142
|
interface ParseWarning {
|
|
@@ -320,9 +332,118 @@ declare function detectZipFormat(buffer: ArrayBuffer): Promise<{
|
|
|
320
332
|
|
|
321
333
|
declare function blocksToMarkdown(blocks: IRBlock[]): string;
|
|
322
334
|
|
|
335
|
+
type LogLevel = "error" | "warn" | "info" | "debug" | "trace";
|
|
336
|
+
type LogEventType = "start" | "progress" | "done" | "error" | "message";
|
|
337
|
+
type LogStage = "detect" | "convert" | "render" | "probe" | "ocr" | "proofread" | "merge" | "finalize" | "unknown";
|
|
338
|
+
interface LogEvent {
|
|
339
|
+
ts?: string;
|
|
340
|
+
level: LogLevel;
|
|
341
|
+
runId?: string;
|
|
342
|
+
stage?: LogStage;
|
|
343
|
+
event?: LogEventType;
|
|
344
|
+
component?: string;
|
|
345
|
+
message: string;
|
|
346
|
+
meta?: Record<string, unknown>;
|
|
347
|
+
error?: {
|
|
348
|
+
code?: string;
|
|
349
|
+
name?: string;
|
|
350
|
+
message?: string;
|
|
351
|
+
stack?: string;
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
interface Logger {
|
|
355
|
+
log(event: LogEvent): void;
|
|
356
|
+
child(context: Partial<LogEvent>): Logger;
|
|
357
|
+
withRun(runId: string): Logger;
|
|
358
|
+
}
|
|
359
|
+
|
|
323
360
|
/** kordoc 공용 유틸리티 */
|
|
361
|
+
|
|
324
362
|
declare const VERSION: string;
|
|
325
363
|
|
|
364
|
+
/**
|
|
365
|
+
* 다중 API 키 로테이션 + rate-limit 완화 유틸.
|
|
366
|
+
*
|
|
367
|
+
* 정책:
|
|
368
|
+
* - 기본 선택은 라운드 로빈
|
|
369
|
+
* - 429/5xx/timeout 발생 키는 cooldown 적용
|
|
370
|
+
* - cooldown 중 키는 제외
|
|
371
|
+
* - 모든 키가 cooldown이면 가장 빠른 복구 시각을 계산해 안내
|
|
372
|
+
*/
|
|
373
|
+
interface ApiKeyRotationOptions {
|
|
374
|
+
baseCooldownMs?: number;
|
|
375
|
+
maxCooldownMs?: number;
|
|
376
|
+
}
|
|
377
|
+
interface KeyHealthSnapshot {
|
|
378
|
+
keyId: string;
|
|
379
|
+
totalRequests: number;
|
|
380
|
+
successCount: number;
|
|
381
|
+
failureCount: number;
|
|
382
|
+
consecutiveFailures: number;
|
|
383
|
+
lastUsedAt?: number;
|
|
384
|
+
cooldownUntil?: number;
|
|
385
|
+
}
|
|
386
|
+
interface AcquireResult {
|
|
387
|
+
key: string;
|
|
388
|
+
keyId: string;
|
|
389
|
+
}
|
|
390
|
+
declare class AllKeysCoolingDownError extends Error {
|
|
391
|
+
waitMs: number;
|
|
392
|
+
constructor(waitMs: number);
|
|
393
|
+
}
|
|
394
|
+
declare class ApiKeyRotationPool {
|
|
395
|
+
private readonly states;
|
|
396
|
+
private readonly baseCooldownMs;
|
|
397
|
+
private readonly maxCooldownMs;
|
|
398
|
+
private cursor;
|
|
399
|
+
constructor(keys: string[], options?: ApiKeyRotationOptions);
|
|
400
|
+
static fromEnv(env?: NodeJS.ProcessEnv): ApiKeyRotationPool;
|
|
401
|
+
acquire(now?: number): AcquireResult;
|
|
402
|
+
markSuccess(keyId: string): void;
|
|
403
|
+
markFailure(keyId: string, opts?: {
|
|
404
|
+
status?: number;
|
|
405
|
+
retryAfterMs?: number;
|
|
406
|
+
timeout?: boolean;
|
|
407
|
+
}, now?: number): void;
|
|
408
|
+
snapshot(): KeyHealthSnapshot[];
|
|
409
|
+
private isRetryableFailure;
|
|
410
|
+
private find;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
type UnifiedStage = "convert" | "render" | "probe" | "ocr" | "proofread" | "merge";
|
|
414
|
+
type UnifiedOcrErrorCode = "UNSUPPORTED_INPUT" | "SOFFICE_NOT_FOUND" | "CONVERT_FAILED" | "RENDER_FAILED" | "PROBE_FAILED" | "OCR_FAILED" | "PROOFREAD_FAILED" | "MERGE_FAILED" | "UNKNOWN";
|
|
415
|
+
interface UnifiedOcrProgressEvent {
|
|
416
|
+
type: "stage_start" | "stage_progress" | "stage_done" | "error";
|
|
417
|
+
stage: UnifiedStage;
|
|
418
|
+
stagePercent: number;
|
|
419
|
+
overallPercent: number;
|
|
420
|
+
current?: number;
|
|
421
|
+
total?: number;
|
|
422
|
+
code?: UnifiedOcrErrorCode;
|
|
423
|
+
message?: string;
|
|
424
|
+
}
|
|
425
|
+
interface UnifiedOcrOptions {
|
|
426
|
+
workspaceDir?: string;
|
|
427
|
+
outputPath?: string;
|
|
428
|
+
dpi?: number;
|
|
429
|
+
baseUrl?: string;
|
|
430
|
+
onEvent?: (event: UnifiedOcrProgressEvent) => void;
|
|
431
|
+
modelCandidates?: string[];
|
|
432
|
+
modelMaxTokens?: Record<string, number>;
|
|
433
|
+
stageWeights?: Partial<Record<UnifiedStage, number>>;
|
|
434
|
+
timeoutMs?: number;
|
|
435
|
+
maxRetriesPerPage?: number;
|
|
436
|
+
probeConcurrency?: number;
|
|
437
|
+
logger?: Logger;
|
|
438
|
+
runId?: string;
|
|
439
|
+
}
|
|
440
|
+
interface UnifiedOcrResult {
|
|
441
|
+
outputPath: string;
|
|
442
|
+
reportPath: string;
|
|
443
|
+
selectedModel: string;
|
|
444
|
+
}
|
|
445
|
+
declare function runUnifiedOcrPipeline(inputPath: string, options?: UnifiedOcrOptions): Promise<UnifiedOcrResult>;
|
|
446
|
+
|
|
326
447
|
/**
|
|
327
448
|
* kordoc — 모두 파싱해버리겠다
|
|
328
449
|
*
|
|
@@ -353,4 +474,4 @@ declare function parseXlsx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JS
|
|
|
353
474
|
/** DOCX 파일을 Markdown으로 변환 */
|
|
354
475
|
declare function parseDocx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JSZip): Promise<ParseResult>;
|
|
355
476
|
|
|
356
|
-
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
|
|
477
|
+
export { type AcquireResult, AllKeysCoolingDownError, type ApiKeyRotationOptions, ApiKeyRotationPool, type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type KeyHealthSnapshot, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, type UnifiedOcrOptions, type UnifiedOcrProgressEvent, type UnifiedOcrResult, type UnifiedStage, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx, runUnifiedOcrPipeline };
|
package/dist/index.d.ts
CHANGED
|
@@ -125,6 +125,18 @@ interface ParseOptions {
|
|
|
125
125
|
ocrBatchSize?: number;
|
|
126
126
|
/** PDF 머리글/바닥글 자동 제거 */
|
|
127
127
|
removeHeaderFooter?: boolean;
|
|
128
|
+
/**
|
|
129
|
+
* 부분 파싱 실패 허용 비율 (0~1).
|
|
130
|
+
* - undefined: 실패 페이지가 있어도 경고만 기록하고 계속 진행
|
|
131
|
+
* - 예: 0.3이면 대상 페이지의 30%를 초과해 실패 시 오류로 중단
|
|
132
|
+
*/
|
|
133
|
+
maxPartialFailureRatio?: number;
|
|
134
|
+
/**
|
|
135
|
+
* PDF 텍스트 정규화 모드.
|
|
136
|
+
* - "default": 줄 병합/균등배분 공백 보정 적용 (기본값)
|
|
137
|
+
* - "strict-preserve": 원문 줄바꿈/공백 보존 우선 (페이지 번호 제거만 수행)
|
|
138
|
+
*/
|
|
139
|
+
pdfTextNormalization?: "default" | "strict-preserve";
|
|
128
140
|
}
|
|
129
141
|
/** 파싱 중 스킵/실패한 요소 보고 */
|
|
130
142
|
interface ParseWarning {
|
|
@@ -320,9 +332,118 @@ declare function detectZipFormat(buffer: ArrayBuffer): Promise<{
|
|
|
320
332
|
|
|
321
333
|
declare function blocksToMarkdown(blocks: IRBlock[]): string;
|
|
322
334
|
|
|
335
|
+
type LogLevel = "error" | "warn" | "info" | "debug" | "trace";
|
|
336
|
+
type LogEventType = "start" | "progress" | "done" | "error" | "message";
|
|
337
|
+
type LogStage = "detect" | "convert" | "render" | "probe" | "ocr" | "proofread" | "merge" | "finalize" | "unknown";
|
|
338
|
+
interface LogEvent {
|
|
339
|
+
ts?: string;
|
|
340
|
+
level: LogLevel;
|
|
341
|
+
runId?: string;
|
|
342
|
+
stage?: LogStage;
|
|
343
|
+
event?: LogEventType;
|
|
344
|
+
component?: string;
|
|
345
|
+
message: string;
|
|
346
|
+
meta?: Record<string, unknown>;
|
|
347
|
+
error?: {
|
|
348
|
+
code?: string;
|
|
349
|
+
name?: string;
|
|
350
|
+
message?: string;
|
|
351
|
+
stack?: string;
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
interface Logger {
|
|
355
|
+
log(event: LogEvent): void;
|
|
356
|
+
child(context: Partial<LogEvent>): Logger;
|
|
357
|
+
withRun(runId: string): Logger;
|
|
358
|
+
}
|
|
359
|
+
|
|
323
360
|
/** kordoc 공용 유틸리티 */
|
|
361
|
+
|
|
324
362
|
declare const VERSION: string;
|
|
325
363
|
|
|
364
|
+
/**
|
|
365
|
+
* 다중 API 키 로테이션 + rate-limit 완화 유틸.
|
|
366
|
+
*
|
|
367
|
+
* 정책:
|
|
368
|
+
* - 기본 선택은 라운드 로빈
|
|
369
|
+
* - 429/5xx/timeout 발생 키는 cooldown 적용
|
|
370
|
+
* - cooldown 중 키는 제외
|
|
371
|
+
* - 모든 키가 cooldown이면 가장 빠른 복구 시각을 계산해 안내
|
|
372
|
+
*/
|
|
373
|
+
interface ApiKeyRotationOptions {
|
|
374
|
+
baseCooldownMs?: number;
|
|
375
|
+
maxCooldownMs?: number;
|
|
376
|
+
}
|
|
377
|
+
interface KeyHealthSnapshot {
|
|
378
|
+
keyId: string;
|
|
379
|
+
totalRequests: number;
|
|
380
|
+
successCount: number;
|
|
381
|
+
failureCount: number;
|
|
382
|
+
consecutiveFailures: number;
|
|
383
|
+
lastUsedAt?: number;
|
|
384
|
+
cooldownUntil?: number;
|
|
385
|
+
}
|
|
386
|
+
interface AcquireResult {
|
|
387
|
+
key: string;
|
|
388
|
+
keyId: string;
|
|
389
|
+
}
|
|
390
|
+
declare class AllKeysCoolingDownError extends Error {
|
|
391
|
+
waitMs: number;
|
|
392
|
+
constructor(waitMs: number);
|
|
393
|
+
}
|
|
394
|
+
declare class ApiKeyRotationPool {
|
|
395
|
+
private readonly states;
|
|
396
|
+
private readonly baseCooldownMs;
|
|
397
|
+
private readonly maxCooldownMs;
|
|
398
|
+
private cursor;
|
|
399
|
+
constructor(keys: string[], options?: ApiKeyRotationOptions);
|
|
400
|
+
static fromEnv(env?: NodeJS.ProcessEnv): ApiKeyRotationPool;
|
|
401
|
+
acquire(now?: number): AcquireResult;
|
|
402
|
+
markSuccess(keyId: string): void;
|
|
403
|
+
markFailure(keyId: string, opts?: {
|
|
404
|
+
status?: number;
|
|
405
|
+
retryAfterMs?: number;
|
|
406
|
+
timeout?: boolean;
|
|
407
|
+
}, now?: number): void;
|
|
408
|
+
snapshot(): KeyHealthSnapshot[];
|
|
409
|
+
private isRetryableFailure;
|
|
410
|
+
private find;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
type UnifiedStage = "convert" | "render" | "probe" | "ocr" | "proofread" | "merge";
|
|
414
|
+
type UnifiedOcrErrorCode = "UNSUPPORTED_INPUT" | "SOFFICE_NOT_FOUND" | "CONVERT_FAILED" | "RENDER_FAILED" | "PROBE_FAILED" | "OCR_FAILED" | "PROOFREAD_FAILED" | "MERGE_FAILED" | "UNKNOWN";
|
|
415
|
+
interface UnifiedOcrProgressEvent {
|
|
416
|
+
type: "stage_start" | "stage_progress" | "stage_done" | "error";
|
|
417
|
+
stage: UnifiedStage;
|
|
418
|
+
stagePercent: number;
|
|
419
|
+
overallPercent: number;
|
|
420
|
+
current?: number;
|
|
421
|
+
total?: number;
|
|
422
|
+
code?: UnifiedOcrErrorCode;
|
|
423
|
+
message?: string;
|
|
424
|
+
}
|
|
425
|
+
interface UnifiedOcrOptions {
|
|
426
|
+
workspaceDir?: string;
|
|
427
|
+
outputPath?: string;
|
|
428
|
+
dpi?: number;
|
|
429
|
+
baseUrl?: string;
|
|
430
|
+
onEvent?: (event: UnifiedOcrProgressEvent) => void;
|
|
431
|
+
modelCandidates?: string[];
|
|
432
|
+
modelMaxTokens?: Record<string, number>;
|
|
433
|
+
stageWeights?: Partial<Record<UnifiedStage, number>>;
|
|
434
|
+
timeoutMs?: number;
|
|
435
|
+
maxRetriesPerPage?: number;
|
|
436
|
+
probeConcurrency?: number;
|
|
437
|
+
logger?: Logger;
|
|
438
|
+
runId?: string;
|
|
439
|
+
}
|
|
440
|
+
interface UnifiedOcrResult {
|
|
441
|
+
outputPath: string;
|
|
442
|
+
reportPath: string;
|
|
443
|
+
selectedModel: string;
|
|
444
|
+
}
|
|
445
|
+
declare function runUnifiedOcrPipeline(inputPath: string, options?: UnifiedOcrOptions): Promise<UnifiedOcrResult>;
|
|
446
|
+
|
|
326
447
|
/**
|
|
327
448
|
* kordoc — 모두 파싱해버리겠다
|
|
328
449
|
*
|
|
@@ -353,4 +474,4 @@ declare function parseXlsx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JS
|
|
|
353
474
|
/** DOCX 파일을 Markdown으로 변환 */
|
|
354
475
|
declare function parseDocx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JSZip): Promise<ParseResult>;
|
|
355
476
|
|
|
356
|
-
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
|
|
477
|
+
export { type AcquireResult, AllKeysCoolingDownError, type ApiKeyRotationOptions, ApiKeyRotationPool, type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type KeyHealthSnapshot, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, type UnifiedOcrOptions, type UnifiedOcrProgressEvent, type UnifiedOcrResult, type UnifiedStage, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx, runUnifiedOcrPipeline };
|