kordoc 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/utils.ts"],"sourcesContent":["/** kordoc 공용 유틸리티 */\r\n\r\n/** 빌드 타임에 tsup define으로 주입되는 버전 */\r\ndeclare const __KORDOC_VERSION__: string\r\nexport const VERSION: string = typeof __KORDOC_VERSION__ !== \"undefined\" ? __KORDOC_VERSION__ : \"0.0.0-dev\"\r\n\r\n/**\r\n * Node.js Buffer → ArrayBuffer 변환\r\n * pool Buffer의 공유 ArrayBuffer 문제를 안전하게 처리.\r\n * offset=0이고 전체 ArrayBuffer를 차지하면 복사 없이 직접 반환.\r\n */\r\nexport function toArrayBuffer(buf: Buffer): ArrayBuffer {\r\n if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {\r\n return buf.buffer as ArrayBuffer\r\n }\r\n return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer\r\n}\r\n\r\n/**\r\n * kordoc 내부 에러 클래스 — 사용자에게 노출해도 안전한 메시지만 포함.\r\n * MCP 에러 정제에서 instanceof로 판별하여 allowlist 패턴 매칭 없이 안전하게 통과.\r\n */\r\nexport class KordocError extends Error {\r\n constructor(message: string) {\r\n super(message)\r\n this.name = \"KordocError\"\r\n }\r\n}\r\n\r\n/**\r\n * 에러 메시지 정제 — KordocError는 그대로, 나머지는 일반 메시지로 대체.\r\n * 파일시스템 경로, 스택 트레이스 등 내부 정보 노출 방지.\r\n */\r\nexport function sanitizeError(err: unknown): string {\r\n if (err instanceof KordocError) return err.message\r\n return \"문서 처리 중 오류가 발생했습니다\"\r\n}\r\n\r\n/**\r\n * ZIP 엔트리 경로의 경로 순회 여부 판별.\r\n * 백슬래시 정규화, .., 절대경로, Windows 드라이브 문자 모두 차단.\r\n */\r\nexport function isPathTraversal(name: string): boolean {\r\n if (name.includes(\"\\x00\")) return true\r\n const normalized = name.replace(/\\\\/g, \"/\")\r\n return normalized.includes(\"..\") || normalized.startsWith(\"/\") || /^[A-Za-z]:/.test(normalized)\r\n}\r\n\r\n// ─── ZIP 안전 로딩 (ZIP bomb 방지) ────────────────────\r\n\r\n/**\r\n * ZIP bomb 사전 검사 — Central Directory에서 비압축 합계와 엔트리 수 확인.\r\n * HWPX/XLSX/DOCX 등 모든 ZIP 기반 포맷에서 공통 사용.\r\n */\r\nexport function precheckZipSize(\r\n buffer: ArrayBuffer,\r\n maxUncompressedSize = 100 * 1024 * 1024,\r\n maxEntries = 500,\r\n): { totalUncompressed: number; entryCount: number } {\r\n try {\r\n const data = new DataView(buffer)\r\n const len = buffer.byteLength\r\n // EOCD 시그니처 역방향 스캔\r\n let eocdOffset = -1\r\n for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {\r\n if (data.getUint32(i, true) === 0x06054b50) { eocdOffset = i; break }\r\n }\r\n if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 }\r\n\r\n const entryCount = data.getUint16(eocdOffset + 10, true)\r\n if (entryCount > maxEntries) {\r\n throw new KordocError(`ZIP 엔트리 수 초과: ${entryCount} (최대 ${maxEntries})`)\r\n }\r\n\r\n const cdSize = data.getUint32(eocdOffset + 12, true)\r\n const cdOffset = data.getUint32(eocdOffset + 16, true)\r\n if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount }\r\n\r\n let totalUncompressed = 0\r\n let pos = cdOffset\r\n for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {\r\n if (data.getUint32(pos, true) !== 0x02014b50) break\r\n totalUncompressed += data.getUint32(pos + 24, true)\r\n const nameLen = data.getUint16(pos + 28, true)\r\n const extraLen = data.getUint16(pos + 30, true)\r\n const commentLen = data.getUint16(pos + 32, true)\r\n pos += 46 + nameLen + extraLen + commentLen\r\n }\r\n\r\n if (totalUncompressed > maxUncompressedSize) {\r\n throw new KordocError(`ZIP 비압축 크기 초과: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (최대 ${maxUncompressedSize / 1024 / 1024}MB)`)\r\n }\r\n\r\n return { totalUncompressed, entryCount }\r\n } catch (err) {\r\n if (err instanceof KordocError) throw err\r\n return { totalUncompressed: 0, entryCount: 0 }\r\n }\r\n}\r\n\r\n/** 하이퍼링크 URL 살균 — javascript: 등 XSS 위험 스킴 차단 */\r\nconst SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i\r\nexport function sanitizeHref(href: string): string | null {\r\n const trimmed = href.trim()\r\n if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null\r\n return trimmed\r\n}\r\n\r\n// ─── 에러 분류 ──────────────────────────────────────\r\n\r\nimport type { ErrorCode } from \"./types.js\"\r\n\r\n/** 에러를 구조화된 ErrorCode로 분류 — KordocError 메시지 패턴 매칭 */\r\nexport function classifyError(err: unknown): ErrorCode {\r\n if (!(err instanceof Error)) return \"PARSE_ERROR\"\r\n const msg = err.message\r\n if (msg.includes(\"암호화\")) return \"ENCRYPTED\"\r\n if (msg.includes(\"DRM\")) return \"DRM_PROTECTED\"\r\n if (msg.includes(\"ZIP bomb\") || msg.includes(\"ZIP 비압축 크기 초과\") || msg.includes(\"ZIP 엔트리 수 초과\")) return \"ZIP_BOMB\"\r\n if (msg.includes(\"bomb\") || msg.includes(\"크기 초과\") || msg.includes(\"압축 해제\")) return \"DECOMPRESSION_BOMB\"\r\n if (msg.includes(\"이미지 기반\")) return \"IMAGE_BASED_PDF\"\r\n if (msg.includes(\"섹션\") && (msg.includes(\"찾을 수 없\") || msg.includes(\"없음\"))) return \"NO_SECTIONS\"\r\n if (msg.includes(\"시그니처\") || msg.includes(\"복구할 수 없\")) return \"CORRUPTED\"\r\n return \"PARSE_ERROR\"\r\n}\r\n"],"mappings":";;;AAIO,IAAM,UAAkB,OAA4C,UAAqB;AAOzF,SAAS,cAAc,KAA0B;AACtD,MAAI,IAAI,eAAe,KAAK,IAAI,eAAe,IAAI,OAAO,YAAY;AACpE,WAAO,IAAI;AAAA,EACb;AACA,SAAO,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AACzE;AAMO,IAAM,cAAN,cAA0B,MAAM;AAAA,EACrC,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAMO,SAAS,cAAc,KAAsB;AAClD,MAAI,eAAe,YAAa,QAAO,IAAI;AAC3C,SAAO;AACT;AAMO,SAAS,gBAAgB,MAAuB;AACrD,MAAI,KAAK,SAAS,IAAM,EAAG,QAAO;AAClC,QAAM,aAAa,KAAK,QAAQ,OAAO,GAAG;AAC1C,SAAO,WAAW,SAAS,IAAI,KAAK,WAAW,WAAW,GAAG,KAAK,aAAa,KAAK,UAAU;AAChG;AAQO,SAAS,gBACd,QACA,sBAAsB,MAAM,OAAO,MACnC,aAAa,KACsC;AACnD,MAAI;AACF,UAAM,OAAO,IAAI,SAAS,MAAM;AAChC,UAAM,MAAM,OAAO;AAEnB,QAAI,aAAa;AACjB,aAAS,IAAI,MAAM,IAAI,KAAK,KAAK,IAAI,GAAG,MAAM,KAAK,GAAG,KAAK;AACzD,UAAI,KAAK,UAAU,GAAG,IAAI,MAAM,WAAY;AAAE,qBAAa;AAAG;AAAA,MAAM;AAAA,IACtE;AACA,QAAI,aAAa,EAAG,QAAO,EAAE,mBAAmB,GAAG,YAAY,EAAE;AAEjE,UAAM,aAAa,KAAK,UAAU,aAAa,IAAI,IAAI;AACvD,QAAI,aAAa,YAAY;AAC3B,YAAM,IAAI,YAAY,+CAAiB,UAAU,kBAAQ,UAAU,GAAG;AAAA,IACxE;AAEA,UAAM,SAAS,KAAK,UAAU,aAAa,IAAI,IAAI;AACnD,UAAM,WAAW,KAAK,UAAU,aAAa,IAAI,IAAI;AACrD,QAAI,WAAW,SAAS,IAAK,QAAO,EAAE,mBAAmB,GAAG,WAAW;AAEvE,QAAI,oBAAoB;AACxB,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,cAAc,MAAM,MAAM,WAAW,QAAQ,KAAK;AACpE,UAAI,KAAK,UAAU,KAAK,IAAI,MAAM,SAAY;AAC9C,2BAAqB,KAAK,UAAU,MAAM,IAAI,IAAI;AAClD,YAAM,UAAU,KAAK,UAAU,MAAM,IAAI,IAAI;AAC7C,YAAM,WAAW,KAAK,UAAU,MAAM,IAAI,IAAI;AAC9C,YAAM,aAAa,KAAK,UAAU,MAAM,IAAI,IAAI;AAChD,aAAO,KAAK,UAAU,WAAW;AAAA,IACnC;AAEA,QAAI,oBAAoB,qBAAqB;AAC3C,YAAM,IAAI,YAAY,sDAAmB,oBAAoB,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,sBAAsB,OAAO,IAAI,KAAK;AAAA,IACtI;AAEA,WAAO,EAAE,mBAAmB,WAAW;AAAA,EACzC,SAAS,KAAK;AACZ,QAAI,eAAe,YAAa,OAAM;AACtC,WAAO,EAAE,mBAAmB,GAAG,YAAY,EAAE;AAAA,EAC/C;AACF;AAGA,IAAM,eAAe;AACd,SAAS,aAAa,MAA6B;AACxD,QAAM,UAAU,KAAK,KAAK;AAC1B,MAAI,CAAC,WAAW,CAAC,aAAa,KAAK,OAAO,EAAG,QAAO;AACpD,SAAO;AACT;AAOO,SAAS,cAAc,KAAyB;AACrD,MAAI,EAAE,eAAe,OAAQ,QAAO;AACpC,QAAM,MAAM,IAAI;AAChB,MAAI,IAAI,SAAS,oBAAK,EAAG,QAAO;AAChC,MAAI,IAAI,SAAS,KAAK,EAAG,QAAO;AAChC,MAAI,IAAI,SAAS,UAAU,KAAK,IAAI,SAAS,kDAAe,KAAK,IAAI,SAAS,4CAAc,EAAG,QAAO;AACtG,MAAI,IAAI,SAAS,MAAM,KAAK,IAAI,SAAS,2BAAO,KAAK,IAAI,SAAS,2BAAO,EAAG,QAAO;AACnF,MAAI,IAAI,SAAS,iCAAQ,EAAG,QAAO;AACnC,MAAI,IAAI,SAAS,cAAI,MAAM,IAAI,SAAS,4BAAQ,KAAK,IAAI,SAAS,cAAI,GAAI,QAAO;AACjF,MAAI,IAAI,SAAS,0BAAM,KAAK,IAAI,SAAS,kCAAS,EAAG,QAAO;AAC5D,SAAO;AACT;","names":[]}
@@ -32,4 +32,4 @@ function parsePageRange(spec, maxPages) {
32
32
  export {
33
33
  parsePageRange
34
34
  };
35
- //# sourceMappingURL=chunk-MOL7MDBG.js.map
35
+ //# sourceMappingURL=chunk-3TBUDJDE.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/page-range.ts"],"sourcesContent":["/** 페이지/섹션 범위 파싱 유틸리티 */\r\n\r\n/**\r\n * 페이지 범위 지정을 1-based Set<number>로 변환.\r\n *\r\n * @param spec - [1,2,3] 또는 \"1-3\" 또는 \"1,3,5-7\"\r\n * @param maxPages - 최대 페이지 수 (클램핑 상한)\r\n * @returns 1-based 페이지 번호 Set\r\n */\r\nexport function parsePageRange(spec: number[] | string, maxPages: number): Set<number> {\r\n const result = new Set<number>()\r\n if (maxPages <= 0) return result\r\n\r\n if (Array.isArray(spec)) {\r\n for (const n of spec) {\r\n const page = Math.round(n)\r\n if (page >= 1 && page <= maxPages) result.add(page)\r\n }\r\n return result\r\n }\r\n\r\n if (typeof spec !== \"string\" || spec.trim() === \"\") return result\r\n\r\n const parts = spec.split(\",\")\r\n for (const part of parts) {\r\n const trimmed = part.trim()\r\n if (!trimmed) continue\r\n\r\n const rangeMatch = trimmed.match(/^(\\d+)\\s*-\\s*(\\d+)$/)\r\n if (rangeMatch) {\r\n const start = Math.max(1, parseInt(rangeMatch[1], 10))\r\n const end = Math.min(maxPages, parseInt(rangeMatch[2], 10))\r\n for (let i = start; i <= end; i++) result.add(i)\r\n } else {\r\n const page = parseInt(trimmed, 10)\r\n if (!isNaN(page) && page >= 1 && page <= maxPages) result.add(page)\r\n }\r\n }\r\n\r\n return result\r\n}\r\n"],"mappings":";;;AASO,SAAS,eAAe,MAAyB,UAA+B;AACrF,QAAM,SAAS,oBAAI,IAAY;AAC/B,MAAI,YAAY,EAAG,QAAO;AAE1B,MAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,eAAW,KAAK,MAAM;AACpB,YAAM,OAAO,KAAK,MAAM,CAAC;AACzB,UAAI,QAAQ,KAAK,QAAQ,SAAU,QAAO,IAAI,IAAI;AAAA,IACpD;AACA,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,SAAS,YAAY,KAAK,KAAK,MAAM,GAAI,QAAO;AAE3D,QAAM,QAAQ,KAAK,MAAM,GAAG;AAC5B,aAAW,QAAQ,OAAO;AACxB,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,CAAC,QAAS;AAEd,UAAM,aAAa,QAAQ,MAAM,qBAAqB;AACtD,QAAI,YAAY;AACd,YAAM,QAAQ,KAAK,IAAI,GAAG,SAAS,WAAW,CAAC,GAAG,EAAE,CAAC;AACrD,YAAM,MAAM,KAAK,IAAI,UAAU,SAAS,WAAW,CAAC,GAAG,EAAE,CAAC;AAC1D,eAAS,IAAI,OAAO,KAAK,KAAK,IAAK,QAAO,IAAI,CAAC;AAAA,IACjD,OAAO;AACL,YAAM,OAAO,SAAS,SAAS,EAAE;AACjC,UAAI,CAAC,MAAM,IAAI,KAAK,QAAQ,KAAK,QAAQ,SAAU,QAAO,IAAI,IAAI;AAAA,IACpE;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
@@ -6,10 +6,10 @@ import {
6
6
  precheckZipSize,
7
7
  sanitizeHref,
8
8
  toArrayBuffer
9
- } from "./chunk-EVWOJ4T5.js";
9
+ } from "./chunk-25TXW6EP.js";
10
10
  import {
11
11
  parsePageRange
12
- } from "./chunk-MOL7MDBG.js";
12
+ } from "./chunk-3TBUDJDE.js";
13
13
 
14
14
  // src/detect.ts
15
15
  import JSZip from "jszip";
@@ -163,6 +163,47 @@ function sanitizeText(text) {
163
163
  }
164
164
  return result;
165
165
  }
166
+ function flattenLayoutTables(blocks) {
167
+ const result = [];
168
+ for (const block of blocks) {
169
+ if (block.type !== "table" || !block.table) {
170
+ result.push(block);
171
+ continue;
172
+ }
173
+ const { rows: numRows, cols: numCols, cells } = block.table;
174
+ if (numRows === 1 && numCols === 1) {
175
+ result.push(block);
176
+ continue;
177
+ }
178
+ if (numRows <= 3) {
179
+ let totalNewlines = 0;
180
+ let totalTextLen = 0;
181
+ for (let r = 0; r < numRows; r++) {
182
+ for (let c = 0; c < numCols; c++) {
183
+ const t = cells[r]?.[c]?.text || "";
184
+ totalNewlines += (t.match(/\n/g) || []).length;
185
+ totalTextLen += t.length;
186
+ }
187
+ }
188
+ if (totalNewlines > 5 || numRows <= 2 && totalTextLen > 300) {
189
+ for (let r = 0; r < numRows; r++) {
190
+ for (let c = 0; c < numCols; c++) {
191
+ const cellText = cells[r]?.[c]?.text?.trim();
192
+ if (!cellText) continue;
193
+ for (const line of cellText.split("\n")) {
194
+ const trimmed = line.trim();
195
+ if (!trimmed) continue;
196
+ result.push({ type: "paragraph", text: trimmed, pageNumber: block.pageNumber });
197
+ }
198
+ }
199
+ }
200
+ continue;
201
+ }
202
+ }
203
+ result.push(block);
204
+ }
205
+ return result;
206
+ }
166
207
  function blocksToMarkdown(blocks) {
167
208
  const lines = [];
168
209
  for (let i = 0; i < blocks.length; i++) {
@@ -1078,8 +1119,9 @@ var TAG_CHAR_SHAPE = 68;
1078
1119
  var TAG_CTRL_HEADER = 71;
1079
1120
  var TAG_LIST_HEADER = 72;
1080
1121
  var TAG_TABLE = 77;
1081
- var TAG_DOC_CHAR_SHAPE = 55;
1082
- var TAG_DOC_STYLE = 58;
1122
+ var TAG_DOC_CHAR_SHAPE = 21;
1123
+ var TAG_DOC_PARA_SHAPE = 25;
1124
+ var TAG_DOC_STYLE = 26;
1083
1125
  var CHAR_LINE = 0;
1084
1126
  var CHAR_SECTION_BREAK = 10;
1085
1127
  var CHAR_PARA = 13;
@@ -1135,8 +1177,14 @@ function parseFileHeader(data) {
1135
1177
  }
1136
1178
  function parseDocInfo(records) {
1137
1179
  const charShapes = [];
1180
+ const paraShapes = [];
1138
1181
  const styles = [];
1139
1182
  for (const rec of records) {
1183
+ if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
1184
+ const flags = rec.data.readUInt32LE(0);
1185
+ const outlineLevel = flags >> 25 & 7;
1186
+ paraShapes.push({ outlineLevel });
1187
+ }
1140
1188
  if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
1141
1189
  if (rec.data.length >= 50) {
1142
1190
  const fontSize = rec.data.readUInt32LE(42);
@@ -1176,7 +1224,7 @@ function parseDocInfo(records) {
1176
1224
  }
1177
1225
  }
1178
1226
  }
1179
- return { charShapes, styles };
1227
+ return { charShapes, paraShapes, styles };
1180
1228
  }
1181
1229
  function extractText(data) {
1182
1230
  let result = "";
@@ -2186,12 +2234,13 @@ function parseHwp5Document(buffer, options) {
2186
2234
  }
2187
2235
  }
2188
2236
  const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
2237
+ const flatBlocks = flattenLayoutTables(blocks);
2189
2238
  if (docInfo) {
2190
- detectHwp5Headings(blocks, docInfo);
2239
+ detectHwp5Headings(flatBlocks, docInfo);
2191
2240
  }
2192
- const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
2193
- const markdown = blocksToMarkdown(blocks);
2194
- return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
2241
+ const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
2242
+ const markdown = blocksToMarkdown(flatBlocks);
2243
+ return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
2195
2244
  }
2196
2245
  function parseDocInfoStream(cfb, compressed) {
2197
2246
  try {
@@ -2242,16 +2291,21 @@ function detectHwp5Headings(blocks, docInfo) {
2242
2291
  }
2243
2292
  if (baseFontSize <= 0) return;
2244
2293
  for (const block of blocks) {
2245
- if (block.type !== "paragraph" || !block.text || !block.style?.fontSize) continue;
2294
+ if (block.type === "heading") continue;
2295
+ if (block.type !== "paragraph" || !block.text) continue;
2246
2296
  const text = block.text.trim();
2247
2297
  if (text.length === 0 || text.length > 200) continue;
2248
2298
  if (/^\d+$/.test(text)) continue;
2249
- const ratio = block.style.fontSize / baseFontSize;
2250
2299
  let level = 0;
2251
- if (ratio >= HEADING_RATIO_H1) level = 1;
2252
- else if (ratio >= HEADING_RATIO_H2) level = 2;
2253
- else if (ratio >= HEADING_RATIO_H3) level = 3;
2254
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
2300
+ if (block.style?.fontSize && baseFontSize > 0) {
2301
+ const ratio = block.style.fontSize / baseFontSize;
2302
+ if (ratio >= HEADING_RATIO_H1) level = 1;
2303
+ else if (ratio >= HEADING_RATIO_H2) level = 2;
2304
+ else if (ratio >= HEADING_RATIO_H3) level = 3;
2305
+ }
2306
+ if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
2307
+ if (level === 0) level = 2;
2308
+ } else if (/^제\d+(조의?\d*)\s*[\((]/.test(text) && text.length <= 80) {
2255
2309
  if (level === 0) level = 3;
2256
2310
  }
2257
2311
  if (level > 0) {
@@ -2497,13 +2551,20 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2497
2551
  while (i < records.length) {
2498
2552
  const rec = records[i];
2499
2553
  if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
2500
- const { paragraph, tables, nextIdx, charShapeIds } = parseParagraphWithTables(records, i);
2554
+ const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
2501
2555
  if (paragraph) {
2502
2556
  const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
2503
2557
  if (docInfo && charShapeIds.length > 0) {
2504
2558
  const style = resolveCharStyle(charShapeIds, docInfo);
2505
2559
  if (style) block.style = style;
2506
2560
  }
2561
+ if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
2562
+ const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
2563
+ if (ol >= 1 && ol <= 6) {
2564
+ block.type = "heading";
2565
+ block.level = ol;
2566
+ }
2567
+ }
2507
2568
  blocks.push(block);
2508
2569
  }
2509
2570
  for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
@@ -2623,6 +2684,8 @@ function parseParagraphWithTables(records, startIdx) {
2623
2684
  let text = "";
2624
2685
  const tables = [];
2625
2686
  const charShapeIds = [];
2687
+ const paraHeaderData = records[startIdx].data;
2688
+ const paraShapeId = paraHeaderData.length >= 10 ? paraHeaderData.readUInt16LE(8) : -1;
2626
2689
  let i = startIdx + 1;
2627
2690
  while (i < records.length) {
2628
2691
  const rec = records[i];
@@ -2647,7 +2710,7 @@ function parseParagraphWithTables(records, startIdx) {
2647
2710
  i++;
2648
2711
  }
2649
2712
  const trimmed = text.trim();
2650
- return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds };
2713
+ return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
2651
2714
  }
2652
2715
  function parseTableBlock(records, startIdx) {
2653
2716
  const tableLevel = records[startIdx].level;
@@ -3465,7 +3528,7 @@ async function parsePdfDocument(buffer, options) {
3465
3528
  if (totalChars / Math.max(parsedPageCount, 1) < 10) {
3466
3529
  if (options?.ocr) {
3467
3530
  try {
3468
- const { ocrPages } = await import("./provider-A4FHJSID.js");
3531
+ const { ocrPages } = await import("./provider-EU3CG724.js");
3469
3532
  const ocrBlocks = await ocrPages(doc, options.ocr, pageFilter, effectivePageCount);
3470
3533
  if (ocrBlocks.length > 0) {
3471
3534
  const ocrMarkdown = ocrBlocks.map((b) => b.text || "").filter(Boolean).join("\n\n");
@@ -4563,7 +4626,7 @@ async function parseXlsxDocument(buffer, options) {
4563
4626
  }
4564
4627
  let pageFilter = null;
4565
4628
  if (options?.pages) {
4566
- const { parsePageRange: parsePageRange2 } = await import("./page-range-737B4EZW.js");
4629
+ const { parsePageRange: parsePageRange2 } = await import("./page-range-OF5I4PQY.js");
4567
4630
  pageFilter = parsePageRange2(options.pages, sheets.length);
4568
4631
  }
4569
4632
  const blocks = [];
@@ -5446,4 +5509,4 @@ export {
5446
5509
  extractFormFields,
5447
5510
  parse
5448
5511
  };
5449
- //# sourceMappingURL=chunk-XJYM2AUA.js.map
5512
+ //# sourceMappingURL=chunk-4UH6ABAY.js.map