@clazic/kordoc 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -190,7 +190,7 @@ kordoc scan.pdf --ocr gemini # gemini CLI 강제 지정
190
190
  kordoc scan.pdf --ocr claude # claude CLI 강제 지정
191
191
  kordoc scan.pdf --ocr codex # codex CLI 강제 지정
192
192
  kordoc scan.pdf --ocr ollama # Ollama REST API 사용 (기본 모델: gemma4:27b)
193
- kordoc scan.pdf --ocr tesseract # tesseract.js 사용 (npm install tesseract.js 필요)
193
+ kordoc scan.pdf --ocr tesseract # 내장 tesseract.js 사용 (별도 설치 불필요)
194
194
  kordoc scan.pdf --ocr off # OCR 비활성화
195
195
  ```
196
196
 
@@ -238,7 +238,7 @@ kordoc scan.pdf --ocr gemini # gemini CLI 지정
238
238
  kordoc scan.pdf --ocr claude # claude CLI 지정
239
239
  kordoc scan.pdf --ocr codex # codex CLI 지정
240
240
  kordoc scan.pdf --ocr ollama # Ollama (KORDOC_OLLAMA_MODEL 환경변수로 모델 지정)
241
- kordoc scan.pdf --ocr tesseract # tesseract.js (npm install tesseract.js 필요)
241
+ kordoc scan.pdf --ocr tesseract # 내장 tesseract.js (별도 설치 불필요)
242
242
 
243
243
  # Markdown → 문서 변환
244
244
  kordoc convert 보고서.md # → 보고서.hwpx (기본)
@@ -6,7 +6,7 @@ import {
6
6
  precheckZipSize,
7
7
  sanitizeHref,
8
8
  toArrayBuffer
9
- } from "./chunk-XWET7ONC.js";
9
+ } from "./chunk-7MXQWWUW.js";
10
10
  import {
11
11
  parsePageRange
12
12
  } from "./chunk-MOL7MDBG.js";
@@ -5406,7 +5406,7 @@ async function parsePdfDocument(buffer, options) {
5406
5406
  const ocrMode = options?.ocrMode;
5407
5407
  if (!ocrProvider && ocrMode && ocrMode !== "off") {
5408
5408
  try {
5409
- const { resolveOcrProvider } = await import("./resolve-Y3KMGD3R.js");
5409
+ const { resolveOcrProvider } = await import("./resolve-Z4DEPDUS.js");
5410
5410
  ocrProvider = await resolveOcrProvider(ocrMode, warnings);
5411
5411
  } catch (resolveErr) {
5412
5412
  if (ocrMode !== "auto") {
@@ -9614,4 +9614,4 @@ export {
9614
9614
  cfb/cfb.js:
9615
9615
  (*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
9616
9616
  */
9617
- //# sourceMappingURL=chunk-JJMA5HGQ.js.map
9617
+ //# sourceMappingURL=chunk-3NF22UFF.js.map
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/utils.ts
4
- var VERSION = true ? "2.2.0" : "0.0.0-dev";
4
+ var VERSION = true ? "2.2.1" : "0.0.0-dev";
5
5
  function toArrayBuffer(buf) {
6
6
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
7
7
  return buf.buffer;
@@ -90,4 +90,4 @@ export {
90
90
  sanitizeHref,
91
91
  classifyError
92
92
  };
93
- //# sourceMappingURL=chunk-XWET7ONC.js.map
93
+ //# sourceMappingURL=chunk-7MXQWWUW.js.map
package/dist/cli.js CHANGED
@@ -4,11 +4,11 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-JJMA5HGQ.js";
7
+ } from "./chunk-3NF22UFF.js";
8
8
  import {
9
9
  VERSION,
10
10
  toArrayBuffer
11
- } from "./chunk-XWET7ONC.js";
11
+ } from "./chunk-7MXQWWUW.js";
12
12
  import "./chunk-MOL7MDBG.js";
13
13
  import "./chunk-ZWE3DS7E.js";
14
14
 
@@ -121,7 +121,7 @@ program.enablePositionalOptions().name("kordoc").description("\uBAA8\uB450 \uD30
121
121
  saveImages(absPath);
122
122
  }
123
123
  } catch (err) {
124
- const { sanitizeError } = await import("./utils-4NP2VUFW.js");
124
+ const { sanitizeError } = await import("./utils-I4UIMOH7.js");
125
125
  process.stderr.write(`
126
126
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
127
127
  `);
@@ -195,7 +195,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
195
195
  `));
196
196
  }
197
197
  } catch (err) {
198
- const { sanitizeError } = await import("./utils-4NP2VUFW.js");
198
+ const { sanitizeError } = await import("./utils-I4UIMOH7.js");
199
199
  process.stderr.write(` FAIL
200
200
  `);
201
201
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -204,7 +204,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
204
204
  }
205
205
  });
206
206
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
207
- const { watchDirectory } = await import("./watch-4VVWG2WC.js");
207
+ const { watchDirectory } = await import("./watch-XPLMUIZB.js");
208
208
  await watchDirectory({
209
209
  dir,
210
210
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -1961,8 +1961,7 @@ function detectAvailableOcr() {
1961
1961
  for (const cli of CLI_PRIORITY) {
1962
1962
  if (isCliInstalled(cli)) return cli;
1963
1963
  }
1964
- if (isTesseractAvailable()) return "tesseract";
1965
- return null;
1964
+ return "tesseract";
1966
1965
  }
1967
1966
  function isCliInstalled(name) {
1968
1967
  try {
@@ -1973,25 +1972,8 @@ function isCliInstalled(name) {
1973
1972
  return false;
1974
1973
  }
1975
1974
  }
1976
- function isTesseractAvailable() {
1977
- try {
1978
- const require2 = (0, import_module.createRequire)(import_meta.url);
1979
- require2.resolve("tesseract.js");
1980
- return true;
1981
- } catch {
1982
- return false;
1983
- }
1984
- }
1985
1975
  function validateOcrMode(mode) {
1986
- if (mode === "auto" || mode === "off") return;
1987
- if (mode === "tesseract") {
1988
- if (!isTesseractAvailable()) {
1989
- throw new Error(
1990
- "tesseract.js\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.\n\uC124\uCE58: npm install tesseract.js"
1991
- );
1992
- }
1993
- return;
1994
- }
1976
+ if (mode === "auto" || mode === "off" || mode === "tesseract") return;
1995
1977
  if (!isCliInstalled(mode)) {
1996
1978
  throw new Error(`'${mode}' CLI\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.
1997
1979
  ${getInstallGuide(mode)}`);
@@ -2006,13 +1988,22 @@ function getInstallGuide(mode) {
2006
1988
  };
2007
1989
  return guides[mode] || `'${mode}'\uC744(\uB97C) \uC124\uCE58\uD574\uC8FC\uC138\uC694.`;
2008
1990
  }
2009
- var import_child_process, import_module, import_meta, CLI_PRIORITY;
1991
+ function getTesseractFallbackMessage() {
1992
+ return [
1993
+ "\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4 \uB0B4\uC7A5 tesseract.js\uB85C OCR\uC744 \uC218\uD589\uD569\uB2C8\uB2E4.",
1994
+ "\uB354 \uB098\uC740 \uD488\uC9C8(\uD14C\uC774\uBE14/\uD5E4\uB529 \uAD6C\uC870 \uBCF4\uC874)\uC744 \uC704\uD574 AI CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4:",
1995
+ "",
1996
+ " [\uAD8C\uC7A5] Gemini CLI: https://ai.google.dev/gemini-api/docs/cli",
1997
+ " Claude CLI: npm install -g @anthropic-ai/claude-code",
1998
+ " Codex CLI: npm install -g @openai/codex",
1999
+ " Ollama: brew install ollama (+ ollama pull gemma4:27b)"
2000
+ ].join("\n");
2001
+ }
2002
+ var import_child_process, CLI_PRIORITY;
2010
2003
  var init_auto_detect = __esm({
2011
2004
  "src/ocr/auto-detect.ts"() {
2012
2005
  "use strict";
2013
2006
  import_child_process = require("child_process");
2014
- import_module = require("module");
2015
- import_meta = {};
2016
2007
  CLI_PRIORITY = ["gemini", "claude", "codex", "ollama"];
2017
2008
  }
2018
2009
  });
@@ -2122,15 +2113,7 @@ __export(tesseract_provider_exports, {
2122
2113
  createTesseractProvider: () => createTesseractProvider
2123
2114
  });
2124
2115
  async function createTesseractProvider() {
2125
- let tesseract;
2126
- try {
2127
- tesseract = await import("tesseract.js");
2128
- } catch {
2129
- throw new Error(
2130
- "tesseract.js\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.\n\uC124\uCE58: npm install tesseract.js"
2131
- );
2132
- }
2133
- const worker = await tesseract.createWorker("kor+eng");
2116
+ const worker = await (0, import_tesseract.createWorker)("kor+eng");
2134
2117
  let terminated = false;
2135
2118
  const provider = async (pageImage, _pageNumber, _mimeType) => {
2136
2119
  const { data } = await worker.recognize(pageImage);
@@ -2144,9 +2127,11 @@ async function createTesseractProvider() {
2144
2127
  };
2145
2128
  return provider;
2146
2129
  }
2130
+ var import_tesseract;
2147
2131
  var init_tesseract_provider = __esm({
2148
2132
  "src/ocr/tesseract-provider.ts"() {
2149
2133
  "use strict";
2134
+ import_tesseract = require("tesseract.js");
2150
2135
  }
2151
2136
  });
2152
2137
 
@@ -2168,20 +2153,20 @@ async function resolveOcrProvider(mode, warnings) {
2168
2153
  return createCliOcrProvider(mode);
2169
2154
  }
2170
2155
  const detected = detectAvailableOcr();
2171
- if (!detected) {
2172
- throw new Error("\uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uB3C4\uAD6C\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
2173
- }
2174
2156
  if (detected !== "gemini") {
2175
- warnings?.push({
2176
- message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (gemini CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 gemini CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
2177
- code: "OCR_CLI_FALLBACK"
2178
- });
2157
+ if (detected === "tesseract") {
2158
+ warnings?.push({
2159
+ message: getTesseractFallbackMessage(),
2160
+ code: "OCR_CLI_FALLBACK"
2161
+ });
2162
+ } else {
2163
+ warnings?.push({
2164
+ message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (gemini CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 gemini CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
2165
+ code: "OCR_CLI_FALLBACK"
2166
+ });
2167
+ }
2179
2168
  }
2180
2169
  if (detected === "tesseract") {
2181
- warnings?.push({
2182
- message: "tesseract.js\uB294 \uD14C\uC774\uBE14 \uAD6C\uC870\uB97C \uBCF5\uC6D0\uD558\uC9C0 \uBABB\uD569\uB2C8\uB2E4. Vision LLM CLI(gemini/claude/codex) \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.",
2183
- code: "OCR_CLI_FALLBACK"
2184
- });
2185
2170
  const { createTesseractProvider: createTesseractProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
2186
2171
  return createTesseractProvider2();
2187
2172
  }
@@ -2443,7 +2428,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
2443
2428
  var import_xmldom = require("@xmldom/xmldom");
2444
2429
 
2445
2430
  // src/utils.ts
2446
- var VERSION = true ? "2.2.0" : "0.0.0-dev";
2431
+ var VERSION = true ? "2.2.1" : "0.0.0-dev";
2447
2432
  function toArrayBuffer(buf) {
2448
2433
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
2449
2434
  return buf.buffer;