@clazic/kordoc 2.4.11 → 2.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -125,6 +125,18 @@ interface ParseOptions {
125
125
  ocrBatchSize?: number;
126
126
  /** PDF 머리글/바닥글 자동 제거 */
127
127
  removeHeaderFooter?: boolean;
128
+ /**
129
+ * 부분 파싱 실패 허용 비율 (0~1).
130
+ * - undefined: 실패 페이지가 있어도 경고만 기록하고 계속 진행
131
+ * - 예: 0.3이면 대상 페이지의 30%를 초과해 실패 시 오류로 중단
132
+ */
133
+ maxPartialFailureRatio?: number;
134
+ /**
135
+ * PDF 텍스트 정규화 모드.
136
+ * - "default": 줄 병합/균등배분 공백 보정 적용 (기본값)
137
+ * - "strict-preserve": 원문 줄바꿈/공백 보존 우선 (페이지 번호 제거만 수행)
138
+ */
139
+ pdfTextNormalization?: "default" | "strict-preserve";
128
140
  }
129
141
  /** 파싱 중 스킵/실패한 요소 보고 */
130
142
  interface ParseWarning {
@@ -320,9 +332,117 @@ declare function detectZipFormat(buffer: ArrayBuffer): Promise<{
320
332
 
321
333
  declare function blocksToMarkdown(blocks: IRBlock[]): string;
322
334
 
335
+ type LogLevel = "error" | "warn" | "info" | "debug" | "trace";
336
+ type LogEventType = "start" | "progress" | "done" | "error" | "message";
337
+ type LogStage = "detect" | "convert" | "render" | "probe" | "ocr" | "proofread" | "merge" | "finalize" | "unknown";
338
+ interface LogEvent {
339
+ ts?: string;
340
+ level: LogLevel;
341
+ runId?: string;
342
+ stage?: LogStage;
343
+ event?: LogEventType;
344
+ component?: string;
345
+ message: string;
346
+ meta?: Record<string, unknown>;
347
+ error?: {
348
+ code?: string;
349
+ name?: string;
350
+ message?: string;
351
+ stack?: string;
352
+ };
353
+ }
354
+ interface Logger {
355
+ log(event: LogEvent): void;
356
+ child(context: Partial<LogEvent>): Logger;
357
+ withRun(runId: string): Logger;
358
+ }
359
+
323
360
  /** kordoc 공용 유틸리티 */
361
+
324
362
  declare const VERSION: string;
325
363
 
364
+ /**
365
+ * 다중 API 키 로테이션 + rate-limit 완화 유틸.
366
+ *
367
+ * 정책:
368
+ * - 기본 선택은 라운드 로빈
369
+ * - 429/5xx/timeout 발생 키는 cooldown 적용
370
+ * - cooldown 중 키는 제외
371
+ * - 모든 키가 cooldown이면 가장 빠른 복구 시각을 계산해 안내
372
+ */
373
+ interface ApiKeyRotationOptions {
374
+ baseCooldownMs?: number;
375
+ maxCooldownMs?: number;
376
+ }
377
+ interface KeyHealthSnapshot {
378
+ keyId: string;
379
+ totalRequests: number;
380
+ successCount: number;
381
+ failureCount: number;
382
+ consecutiveFailures: number;
383
+ lastUsedAt?: number;
384
+ cooldownUntil?: number;
385
+ }
386
+ interface AcquireResult {
387
+ key: string;
388
+ keyId: string;
389
+ }
390
+ declare class AllKeysCoolingDownError extends Error {
391
+ waitMs: number;
392
+ constructor(waitMs: number);
393
+ }
394
+ declare class ApiKeyRotationPool {
395
+ private readonly states;
396
+ private readonly baseCooldownMs;
397
+ private readonly maxCooldownMs;
398
+ private cursor;
399
+ constructor(keys: string[], options?: ApiKeyRotationOptions);
400
+ static fromEnv(env?: NodeJS.ProcessEnv): ApiKeyRotationPool;
401
+ acquire(now?: number): AcquireResult;
402
+ markSuccess(keyId: string): void;
403
+ markFailure(keyId: string, opts?: {
404
+ status?: number;
405
+ retryAfterMs?: number;
406
+ timeout?: boolean;
407
+ }, now?: number): void;
408
+ snapshot(): KeyHealthSnapshot[];
409
+ private isRetryableFailure;
410
+ private find;
411
+ }
412
+
413
+ type UnifiedStage = "convert" | "render" | "probe" | "ocr" | "proofread" | "merge";
414
+ type UnifiedOcrErrorCode = "UNSUPPORTED_INPUT" | "SOFFICE_NOT_FOUND" | "CONVERT_FAILED" | "RENDER_FAILED" | "PROBE_FAILED" | "OCR_FAILED" | "PROOFREAD_FAILED" | "MERGE_FAILED" | "UNKNOWN";
415
+ interface UnifiedOcrProgressEvent {
416
+ type: "stage_start" | "stage_progress" | "stage_done" | "error";
417
+ stage: UnifiedStage;
418
+ stagePercent: number;
419
+ overallPercent: number;
420
+ current?: number;
421
+ total?: number;
422
+ code?: UnifiedOcrErrorCode;
423
+ message?: string;
424
+ }
425
+ interface UnifiedOcrOptions {
426
+ workspaceDir?: string;
427
+ outputPath?: string;
428
+ dpi?: number;
429
+ baseUrl?: string;
430
+ onEvent?: (event: UnifiedOcrProgressEvent) => void;
431
+ modelCandidates?: string[];
432
+ modelMaxTokens?: Record<string, number>;
433
+ stageWeights?: Partial<Record<UnifiedStage, number>>;
434
+ timeoutMs?: number;
435
+ maxRetriesPerPage?: number;
436
+ logger?: Logger;
437
+ runId?: string;
438
+ }
439
+ interface UnifiedOcrResult {
440
+ outputPath: string;
441
+ reportPath: string;
442
+ selectedModel: string;
443
+ }
444
+ declare function runUnifiedOcrPipeline(inputPath: string, options?: UnifiedOcrOptions): Promise<UnifiedOcrResult>;
445
+
326
446
  /**
327
447
  * kordoc — 모두 파싱해버리겠다
328
448
  *
@@ -353,4 +473,4 @@ declare function parseXlsx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JS
353
473
  /** DOCX 파일을 Markdown으로 변환 */
354
474
  declare function parseDocx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JSZip): Promise<ParseResult>;
355
475
 
356
- export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
476
+ export { type AcquireResult, AllKeysCoolingDownError, type ApiKeyRotationOptions, ApiKeyRotationPool, type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type KeyHealthSnapshot, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, type UnifiedOcrOptions, type UnifiedOcrProgressEvent, type UnifiedOcrResult, type UnifiedStage, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx, runUnifiedOcrPipeline };
package/dist/index.d.ts CHANGED
@@ -125,6 +125,18 @@ interface ParseOptions {
125
125
  ocrBatchSize?: number;
126
126
  /** PDF 머리글/바닥글 자동 제거 */
127
127
  removeHeaderFooter?: boolean;
128
+ /**
129
+ * 부분 파싱 실패 허용 비율 (0~1).
130
+ * - undefined: 실패 페이지가 있어도 경고만 기록하고 계속 진행
131
+ * - 예: 0.3이면 대상 페이지의 30%를 초과해 실패 시 오류로 중단
132
+ */
133
+ maxPartialFailureRatio?: number;
134
+ /**
135
+ * PDF 텍스트 정규화 모드.
136
+ * - "default": 줄 병합/균등배분 공백 보정 적용 (기본값)
137
+ * - "strict-preserve": 원문 줄바꿈/공백 보존 우선 (페이지 번호 제거만 수행)
138
+ */
139
+ pdfTextNormalization?: "default" | "strict-preserve";
128
140
  }
129
141
  /** 파싱 중 스킵/실패한 요소 보고 */
130
142
  interface ParseWarning {
@@ -320,9 +332,117 @@ declare function detectZipFormat(buffer: ArrayBuffer): Promise<{
320
332
 
321
333
  declare function blocksToMarkdown(blocks: IRBlock[]): string;
322
334
 
335
+ type LogLevel = "error" | "warn" | "info" | "debug" | "trace";
336
+ type LogEventType = "start" | "progress" | "done" | "error" | "message";
337
+ type LogStage = "detect" | "convert" | "render" | "probe" | "ocr" | "proofread" | "merge" | "finalize" | "unknown";
338
+ interface LogEvent {
339
+ ts?: string;
340
+ level: LogLevel;
341
+ runId?: string;
342
+ stage?: LogStage;
343
+ event?: LogEventType;
344
+ component?: string;
345
+ message: string;
346
+ meta?: Record<string, unknown>;
347
+ error?: {
348
+ code?: string;
349
+ name?: string;
350
+ message?: string;
351
+ stack?: string;
352
+ };
353
+ }
354
+ interface Logger {
355
+ log(event: LogEvent): void;
356
+ child(context: Partial<LogEvent>): Logger;
357
+ withRun(runId: string): Logger;
358
+ }
359
+
323
360
  /** kordoc 공용 유틸리티 */
361
+
324
362
  declare const VERSION: string;
325
363
 
364
+ /**
365
+ * 다중 API 키 로테이션 + rate-limit 완화 유틸.
366
+ *
367
+ * 정책:
368
+ * - 기본 선택은 라운드 로빈
369
+ * - 429/5xx/timeout 발생 키는 cooldown 적용
370
+ * - cooldown 중 키는 제외
371
+ * - 모든 키가 cooldown이면 가장 빠른 복구 시각을 계산해 안내
372
+ */
373
+ interface ApiKeyRotationOptions {
374
+ baseCooldownMs?: number;
375
+ maxCooldownMs?: number;
376
+ }
377
+ interface KeyHealthSnapshot {
378
+ keyId: string;
379
+ totalRequests: number;
380
+ successCount: number;
381
+ failureCount: number;
382
+ consecutiveFailures: number;
383
+ lastUsedAt?: number;
384
+ cooldownUntil?: number;
385
+ }
386
+ interface AcquireResult {
387
+ key: string;
388
+ keyId: string;
389
+ }
390
+ declare class AllKeysCoolingDownError extends Error {
391
+ waitMs: number;
392
+ constructor(waitMs: number);
393
+ }
394
+ declare class ApiKeyRotationPool {
395
+ private readonly states;
396
+ private readonly baseCooldownMs;
397
+ private readonly maxCooldownMs;
398
+ private cursor;
399
+ constructor(keys: string[], options?: ApiKeyRotationOptions);
400
+ static fromEnv(env?: NodeJS.ProcessEnv): ApiKeyRotationPool;
401
+ acquire(now?: number): AcquireResult;
402
+ markSuccess(keyId: string): void;
403
+ markFailure(keyId: string, opts?: {
404
+ status?: number;
405
+ retryAfterMs?: number;
406
+ timeout?: boolean;
407
+ }, now?: number): void;
408
+ snapshot(): KeyHealthSnapshot[];
409
+ private isRetryableFailure;
410
+ private find;
411
+ }
412
+
413
+ type UnifiedStage = "convert" | "render" | "probe" | "ocr" | "proofread" | "merge";
414
+ type UnifiedOcrErrorCode = "UNSUPPORTED_INPUT" | "SOFFICE_NOT_FOUND" | "CONVERT_FAILED" | "RENDER_FAILED" | "PROBE_FAILED" | "OCR_FAILED" | "PROOFREAD_FAILED" | "MERGE_FAILED" | "UNKNOWN";
415
+ interface UnifiedOcrProgressEvent {
416
+ type: "stage_start" | "stage_progress" | "stage_done" | "error";
417
+ stage: UnifiedStage;
418
+ stagePercent: number;
419
+ overallPercent: number;
420
+ current?: number;
421
+ total?: number;
422
+ code?: UnifiedOcrErrorCode;
423
+ message?: string;
424
+ }
425
+ interface UnifiedOcrOptions {
426
+ workspaceDir?: string;
427
+ outputPath?: string;
428
+ dpi?: number;
429
+ baseUrl?: string;
430
+ onEvent?: (event: UnifiedOcrProgressEvent) => void;
431
+ modelCandidates?: string[];
432
+ modelMaxTokens?: Record<string, number>;
433
+ stageWeights?: Partial<Record<UnifiedStage, number>>;
434
+ timeoutMs?: number;
435
+ maxRetriesPerPage?: number;
436
+ logger?: Logger;
437
+ runId?: string;
438
+ }
439
+ interface UnifiedOcrResult {
440
+ outputPath: string;
441
+ reportPath: string;
442
+ selectedModel: string;
443
+ }
444
+ declare function runUnifiedOcrPipeline(inputPath: string, options?: UnifiedOcrOptions): Promise<UnifiedOcrResult>;
445
+
326
446
  /**
327
447
  * kordoc — 모두 파싱해버리겠다
328
448
  *
@@ -353,4 +473,4 @@ declare function parseXlsx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JS
353
473
  /** DOCX 파일을 Markdown으로 변환 */
354
474
  declare function parseDocx(buffer: ArrayBuffer, options?: ParseOptions, zip?: JSZip): Promise<ParseResult>;
355
475
 
356
- export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
476
+ export { type AcquireResult, AllKeysCoolingDownError, type ApiKeyRotationOptions, ApiKeyRotationPool, type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type KeyHealthSnapshot, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, type UnifiedOcrOptions, type UnifiedOcrProgressEvent, type UnifiedOcrResult, type UnifiedStage, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx, runUnifiedOcrPipeline };