@dragon708/docmind-markdown 1.2.6 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -992,6 +992,20 @@ async function convertDocxBufferToMarkdown(input, options) {
992
992
  return { markdown: r.markdown, messages: r.messages };
993
993
  }
994
994
 
995
+ // src/cognipeer-runtime.ts
996
+ async function loadCognipeerConvertToMarkdown() {
997
+ const { createRequire } = await importEsm("node:module");
998
+ const require2 = createRequire(import.meta.url);
999
+ const mod = require2("@cognipeer/to-markdown");
1000
+ return mod.convertToMarkdown;
1001
+ }
1002
+ async function toNodeBuffer2(input) {
1003
+ const { Buffer: Buffer2 } = await importEsm("node:buffer");
1004
+ if (Buffer2.isBuffer(input)) return input;
1005
+ if (input instanceof ArrayBuffer) return Buffer2.from(input);
1006
+ return Buffer2.from(input);
1007
+ }
1008
+
995
1009
  // src/pdf-markdown.ts
996
1010
  var BROWSER_WARNING = "@dragon708/docmind-markdown: PDF \u2192 Markdown via @cognipeer/to-markdown requires Node.js. In the browser, use a server-side conversion or supply structured text/Markdown from your backend.";
997
1011
  var COGNIPEER_WARN_TAG = "[docmind-markdown:pdf] pdf-cognipeer-specialized:";
@@ -1015,23 +1029,11 @@ function cognipeerConverterOptions(options) {
1015
1029
  if (url !== void 0) o.url = url;
1016
1030
  return o;
1017
1031
  }
1018
- async function toNodeBuffer2(input) {
1019
- const { Buffer: Buffer2 } = await importEsm("node:buffer");
1020
- if (Buffer2.isBuffer(input)) return input;
1021
- if (input instanceof ArrayBuffer) return Buffer2.from(input);
1022
- return Buffer2.from(input);
1023
- }
1024
- async function loadCognipeerConvertToMarkdown() {
1025
- const { createRequire } = await importEsm("node:module");
1026
- const require2 = createRequire(import.meta.url);
1027
- const mod = require2("@cognipeer/to-markdown");
1028
- return mod.convertToMarkdown;
1029
- }
1030
1032
  async function convertPdfToMarkdown(input, options) {
1031
1033
  const clean = options?.cleanMarkdown !== false;
1032
1034
  const resolveStructured = options?.resolveStructured;
1033
1035
  const structuredMdOpts = options?.structuredMarkdown;
1034
- const cognipeerOpts = cognipeerConverterOptions(options);
1036
+ const cognipeerOpts2 = cognipeerConverterOptions(options);
1035
1037
  if (!isNodeRuntime()) {
1036
1038
  return {
1037
1039
  markdown: "",
@@ -1097,7 +1099,7 @@ async function convertPdfToMarkdown(input, options) {
1097
1099
  }
1098
1100
  let rawMarkdown;
1099
1101
  try {
1100
- rawMarkdown = await convertToMarkdown(inputPath, cognipeerOpts);
1102
+ rawMarkdown = await convertToMarkdown(inputPath, cognipeerOpts2);
1101
1103
  } catch (e) {
1102
1104
  const msg = e instanceof Error ? e.message : String(e);
1103
1105
  warnings.push(`${COGNIPEER_WARN_TAG} ${msg}`);
@@ -1194,6 +1196,473 @@ async function convertPdfBufferToMarkdown(input, options) {
1194
1196
  throwIfLegacyFailure(r);
1195
1197
  return { markdown: r.markdown };
1196
1198
  }
1199
+
1200
+ // src/cognipeer-file-markdown.ts
1201
+ var BROWSER = (label) => `@dragon708/docmind-markdown: ${label} \u2192 Markdown via @cognipeer/to-markdown requires Node.js. In the browser, use a server-side conversion or supply structured input / structuredFallback.`;
1202
+ function cognipeerOpts(options) {
1203
+ if (!options) return {};
1204
+ const { fileName, forceExtension, url } = options;
1205
+ const o = {};
1206
+ if (fileName !== void 0) o.fileName = fileName;
1207
+ if (forceExtension !== void 0) o.forceExtension = forceExtension;
1208
+ if (url !== void 0) o.url = url;
1209
+ return o;
1210
+ }
1211
+ function normalizeMarkdown(markdown, clean) {
1212
+ const t = markdown.trim();
1213
+ if (!clean) return t;
1214
+ return t.replace(/\n{3,}/g, "\n\n");
1215
+ }
1216
+ function structuredFallbackWarnings2(format, reason, detail) {
1217
+ const tag = `[docmind-markdown:${format}] ${format}-structured-fallback:`;
1218
+ const tail = reason === "module-not-found" ? "@cognipeer/to-markdown could not be loaded." : reason === "error" ? "the specialized engine raised an error or rejected the input." : reason === "empty" ? "the specialized engine returned empty Markdown." : "the specialized path is unavailable in this runtime.";
1219
+ const extra = detail ? ` (${detail})` : "";
1220
+ return [`${tag} serializing StructuredDocumentResult to Markdown because ${tail}${extra}`];
1221
+ }
1222
+ function warnTag(format) {
1223
+ return `[docmind-markdown:${format}] ${format}-cognipeer-specialized:`;
1224
+ }
1225
+ async function convertCognipeerFileToMarkdown(format, defaultTempFile, input, options) {
1226
+ const clean = options?.cleanMarkdown !== false;
1227
+ const resolveStructured = options?.resolveStructured;
1228
+ const structuredMdOpts = options?.structuredMarkdown;
1229
+ const browserLabel = format === "html" ? "HTML" : format === "csv" ? "CSV" : "Spreadsheet";
1230
+ if (!isNodeRuntime()) {
1231
+ return {
1232
+ markdown: "",
1233
+ warnings: [BROWSER(browserLabel)],
1234
+ source: "unsupported-runtime",
1235
+ fallbackReason: "unsupported-runtime"
1236
+ };
1237
+ }
1238
+ const warnings = [];
1239
+ let cleanup;
1240
+ try {
1241
+ let inputPath;
1242
+ if (typeof input === "string") {
1243
+ inputPath = input;
1244
+ } else {
1245
+ const [{ mkdtemp, writeFile, rm }, { join }, { tmpdir }, buffer] = await Promise.all([
1246
+ importEsm("node:fs/promises"),
1247
+ importEsm("node:path"),
1248
+ importEsm("node:os"),
1249
+ toNodeBuffer2(input)
1250
+ ]);
1251
+ const dir = await mkdtemp(join(tmpdir(), `docmind-markdown-${format}-`));
1252
+ inputPath = join(dir, defaultTempFile);
1253
+ await writeFile(inputPath, buffer);
1254
+ cleanup = async () => rm(dir, { recursive: true, force: true });
1255
+ }
1256
+ let convertToMarkdown;
1257
+ try {
1258
+ convertToMarkdown = await loadCognipeerConvertToMarkdown();
1259
+ } catch (e) {
1260
+ const hint = e instanceof Error && /Cannot find module|MODULE_NOT_FOUND/i.test(e.message) ? " Install `@cognipeer/to-markdown` in your project." : "";
1261
+ warnings.push(
1262
+ `${warnTag(format)} package could not be loaded (${e instanceof Error ? e.message : String(e)}).${hint}`
1263
+ );
1264
+ if (resolveStructured) {
1265
+ try {
1266
+ const structured = await resolveStructured();
1267
+ const md = normalizeMarkdown(
1268
+ convertStructuredToMarkdown(structured, structuredMdOpts),
1269
+ clean
1270
+ );
1271
+ return {
1272
+ markdown: md,
1273
+ warnings: [
1274
+ ...structuredFallbackWarnings2(format, "module-not-found"),
1275
+ ...warnings
1276
+ ],
1277
+ source: "structured-fallback",
1278
+ fallbackReason: "module-not-found"
1279
+ };
1280
+ } catch (e2) {
1281
+ warnings.push(
1282
+ `Structured fallback failed: ${e2 instanceof Error ? e2.message : String(e2)}`
1283
+ );
1284
+ }
1285
+ }
1286
+ return {
1287
+ markdown: "",
1288
+ warnings,
1289
+ source: "cognipeer-unavailable",
1290
+ fallbackReason: "module-not-found"
1291
+ };
1292
+ }
1293
+ let rawMarkdown;
1294
+ try {
1295
+ rawMarkdown = await convertToMarkdown(inputPath, cognipeerOpts(options));
1296
+ } catch (e) {
1297
+ const msg = e instanceof Error ? e.message : String(e);
1298
+ warnings.push(`${warnTag(format)} ${msg}`);
1299
+ if (resolveStructured) {
1300
+ try {
1301
+ const structured = await resolveStructured();
1302
+ const md = normalizeMarkdown(
1303
+ convertStructuredToMarkdown(structured, structuredMdOpts),
1304
+ clean
1305
+ );
1306
+ return {
1307
+ markdown: md,
1308
+ warnings: [
1309
+ ...structuredFallbackWarnings2(format, "error", msg.slice(0, 500)),
1310
+ ...warnings
1311
+ ],
1312
+ source: "structured-fallback",
1313
+ fallbackReason: "error"
1314
+ };
1315
+ } catch (e2) {
1316
+ warnings.push(
1317
+ `Structured fallback failed: ${e2 instanceof Error ? e2.message : String(e2)}`
1318
+ );
1319
+ }
1320
+ }
1321
+ return {
1322
+ markdown: "",
1323
+ warnings,
1324
+ source: "cognipeer-failed",
1325
+ fallbackReason: "error"
1326
+ };
1327
+ }
1328
+ let markdown = normalizeMarkdown(
1329
+ typeof rawMarkdown === "string" ? rawMarkdown : String(rawMarkdown ?? ""),
1330
+ clean
1331
+ );
1332
+ if (markdown.length === 0) {
1333
+ warnings.push(
1334
+ `${warnTag(format)} returned empty Markdown for this input (whitespace-only after normalize).`
1335
+ );
1336
+ if (resolveStructured) {
1337
+ try {
1338
+ const structured = await resolveStructured();
1339
+ markdown = normalizeMarkdown(
1340
+ convertStructuredToMarkdown(structured, structuredMdOpts),
1341
+ clean
1342
+ );
1343
+ return {
1344
+ markdown,
1345
+ warnings: [...structuredFallbackWarnings2(format, "empty"), ...warnings],
1346
+ source: "structured-fallback",
1347
+ fallbackReason: "empty"
1348
+ };
1349
+ } catch (e2) {
1350
+ warnings.push(
1351
+ `Structured fallback failed: ${e2 instanceof Error ? e2.message : String(e2)}`
1352
+ );
1353
+ }
1354
+ }
1355
+ return {
1356
+ markdown: "",
1357
+ warnings,
1358
+ source: "cognipeer-failed",
1359
+ fallbackReason: "empty"
1360
+ };
1361
+ }
1362
+ return { markdown, warnings, source: "cognipeer" };
1363
+ } finally {
1364
+ if (cleanup) {
1365
+ await cleanup().catch(() => {
1366
+ });
1367
+ }
1368
+ }
1369
+ }
1370
+
1371
+ // src/node-is-regular-file.ts
1372
+ async function isExistingRegularFile(path) {
1373
+ try {
1374
+ const { stat } = await importEsm("node:fs/promises");
1375
+ const s = await stat(path);
1376
+ return s.isFile();
1377
+ } catch {
1378
+ return false;
1379
+ }
1380
+ }
1381
+
1382
+ // src/html-markdown.ts
1383
+ function looksLikeHtmlString(s) {
1384
+ const t = s.trimStart();
1385
+ if (t.length === 0) return false;
1386
+ if (/^<!DOCTYPE\s+html/i.test(t)) return true;
1387
+ if (/^<html[\s>]/i.test(t)) return true;
1388
+ if (/^<head[\s>]/i.test(t)) return true;
1389
+ if (/^<!--/.test(t)) return true;
1390
+ const c0 = t[0];
1391
+ const c1 = t[1] ?? "";
1392
+ if (c0 === "<" && /[a-zA-Z!?]/.test(c1)) return true;
1393
+ return false;
1394
+ }
1395
+ async function resolveHtmlStringInput(s, mode) {
1396
+ const { Buffer: Buffer2 } = await importEsm("node:buffer");
1397
+ if (mode === "html") {
1398
+ return { kind: "buffer", buffer: Buffer2.from(s, "utf8") };
1399
+ }
1400
+ if (mode === "path") {
1401
+ return { kind: "path", path: s };
1402
+ }
1403
+ if (isNodeRuntime() && await isExistingRegularFile(s)) {
1404
+ return { kind: "path", path: s };
1405
+ }
1406
+ if (looksLikeHtmlString(s)) {
1407
+ return { kind: "buffer", buffer: Buffer2.from(s, "utf8") };
1408
+ }
1409
+ return { kind: "path", path: s };
1410
+ }
1411
+ async function convertHtmlToMarkdown(input, options) {
1412
+ const mode = options?.inputMode ?? "auto";
1413
+ const { inputMode: _omit, ...cognipeerOptions } = options ?? {};
1414
+ if (typeof input === "string") {
1415
+ if (!isNodeRuntime()) {
1416
+ return convertCognipeerFileToMarkdown("html", "document.html", input, cognipeerOptions);
1417
+ }
1418
+ const resolved = await resolveHtmlStringInput(input, mode);
1419
+ if (resolved.kind === "path") {
1420
+ return convertCognipeerFileToMarkdown("html", "document.html", resolved.path, cognipeerOptions);
1421
+ }
1422
+ return convertCognipeerFileToMarkdown("html", "document.html", resolved.buffer, {
1423
+ ...cognipeerOptions,
1424
+ forceExtension: cognipeerOptions.forceExtension ?? ".html",
1425
+ fileName: cognipeerOptions.fileName ?? "document.html"
1426
+ });
1427
+ }
1428
+ return convertCognipeerFileToMarkdown("html", "document.html", input, cognipeerOptions);
1429
+ }
1430
+
1431
+ // src/tabular-markdown-postprocess.ts
1432
+ function compactMarkdownOutput(markdown) {
1433
+ return markdown.replace(/\n{3,}/g, "\n\n").split("\n").map((l) => l.trimEnd()).join("\n").trim();
1434
+ }
1435
+ function countCsvColumns(firstLine) {
1436
+ let n = 1;
1437
+ let inQuotes = false;
1438
+ for (let i = 0; i < firstLine.length; i++) {
1439
+ const c = firstLine[i];
1440
+ if (c === '"') {
1441
+ inQuotes = !inQuotes;
1442
+ } else if (c === "," && !inQuotes) {
1443
+ n++;
1444
+ }
1445
+ }
1446
+ return n;
1447
+ }
1448
+ function prepareCsvTextForCognipeer(text, options) {
1449
+ const warnings = [];
1450
+ const includeHeader = options?.includeHeader !== false;
1451
+ const maxRows = options?.maxRows;
1452
+ let lines = text.split(/\r?\n/).filter((l) => l.length > 0);
1453
+ if (lines.length === 0) {
1454
+ return { text, warnings };
1455
+ }
1456
+ if (!includeHeader) {
1457
+ const colCount = Math.max(1, countCsvColumns(lines[0]));
1458
+ const synth = Array.from({ length: colCount }, (_, i) => `Column ${i + 1}`).join(",");
1459
+ lines = [synth, ...lines];
1460
+ warnings.push(
1461
+ "[docmind-markdown:csv] includeHeader:false: prepended synthetic header row so the first CSV row appears as table data."
1462
+ );
1463
+ }
1464
+ if (maxRows != null && maxRows >= 0) {
1465
+ const header = lines[0];
1466
+ const rest = lines.slice(1);
1467
+ const data = rest.slice(0, maxRows);
1468
+ if (rest.length > maxRows) {
1469
+ warnings.push(
1470
+ `[docmind-markdown:csv] maxRows:${maxRows}: truncated data rows before conversion (line-based; quoted newlines inside fields may skew counts).`
1471
+ );
1472
+ }
1473
+ lines = [header, ...data];
1474
+ }
1475
+ return { text: lines.join("\n"), warnings };
1476
+ }
1477
+ function stripSpreadsheetSheetHeadings(markdown) {
1478
+ return markdown.replace(/^##[^\n]+\n+/gm, "");
1479
+ }
1480
+ function limitSpreadsheetMarkdownRowsPerSheet(markdown, maxRowsPerSheet) {
1481
+ const warnings = [];
1482
+ if (maxRowsPerSheet < 0) return { markdown, warnings };
1483
+ const lines = markdown.split("\n");
1484
+ const out = [];
1485
+ let i = 0;
1486
+ let truncatedAny = false;
1487
+ const emitLimitedTable = (tableLines) => {
1488
+ if (tableLines.length >= 3) {
1489
+ const header = tableLines[0];
1490
+ const sep = tableLines[1];
1491
+ const body = tableLines.slice(2, 2 + maxRowsPerSheet);
1492
+ if (tableLines.length - 2 > maxRowsPerSheet) truncatedAny = true;
1493
+ out.push(header, sep, ...body);
1494
+ } else {
1495
+ out.push(...tableLines);
1496
+ }
1497
+ };
1498
+ while (i < lines.length) {
1499
+ const line = lines[i];
1500
+ const isSheetTitle = /^##\s+.+$/.test(line);
1501
+ if (isSheetTitle) {
1502
+ out.push(line);
1503
+ i++;
1504
+ while (i < lines.length && lines[i].trim() === "") {
1505
+ out.push(lines[i]);
1506
+ i++;
1507
+ }
1508
+ const tableStart = i;
1509
+ while (i < lines.length && lines[i].trim().startsWith("|")) {
1510
+ i++;
1511
+ }
1512
+ emitLimitedTable(lines.slice(tableStart, i));
1513
+ continue;
1514
+ }
1515
+ if (line.trim().startsWith("|")) {
1516
+ const tableStart = i;
1517
+ while (i < lines.length && lines[i].trim().startsWith("|")) {
1518
+ i++;
1519
+ }
1520
+ emitLimitedTable(lines.slice(tableStart, i));
1521
+ continue;
1522
+ }
1523
+ out.push(line);
1524
+ i++;
1525
+ }
1526
+ if (truncatedAny) {
1527
+ warnings.push(
1528
+ `[docmind-markdown:spreadsheet] maxRowsPerSheet:${maxRowsPerSheet}: truncated data rows in one or more sheet tables.`
1529
+ );
1530
+ }
1531
+ return { markdown: out.join("\n"), warnings };
1532
+ }
1533
+
1534
+ // src/csv-markdown.ts
1535
+ function looksLikeCsvContent(s) {
1536
+ return s.includes(",") && /[\r\n]/.test(s);
1537
+ }
1538
+ function stripCsvOptionKeys(o) {
1539
+ if (!o) return {};
1540
+ const {
1541
+ inputMode: _im,
1542
+ includeHeader: _ih,
1543
+ compactMode: _cm,
1544
+ maxRows: _mr,
1545
+ ...rest
1546
+ } = o;
1547
+ return rest;
1548
+ }
1549
+ function finishCsvResult(r, prependWarnings, options) {
1550
+ const markdown = options?.compactMode === true ? compactMarkdownOutput(r.markdown) : r.markdown;
1551
+ if (prependWarnings.length === 0 && markdown === r.markdown) return r;
1552
+ return {
1553
+ ...r,
1554
+ markdown,
1555
+ warnings: [...prependWarnings, ...r.warnings]
1556
+ };
1557
+ }
1558
+ async function readUtf8File(path) {
1559
+ const { readFile } = await importEsm("node:fs/promises");
1560
+ return readFile(path, "utf8");
1561
+ }
1562
+ async function resolveCsvStringInput(s, mode) {
1563
+ if (mode === "content") return { kind: "text", text: s };
1564
+ if (mode === "path") return { kind: "path", path: s };
1565
+ if (isNodeRuntime() && await isExistingRegularFile(s)) return { kind: "path", path: s };
1566
+ if (looksLikeCsvContent(s)) return { kind: "text", text: s };
1567
+ return { kind: "path", path: s };
1568
+ }
1569
+ function csvNeedsPreprocess(options) {
1570
+ return options?.maxRows != null || options?.includeHeader === false;
1571
+ }
1572
+ async function convertCsvToMarkdown(input, options) {
1573
+ const cognipeerOptions = stripCsvOptionKeys(options);
1574
+ const prepArgs = { includeHeader: options?.includeHeader, maxRows: options?.maxRows };
1575
+ const needsPrep = csvNeedsPreprocess(options);
1576
+ const strMode = options?.inputMode ?? "auto";
1577
+ if (typeof input === "string") {
1578
+ if (!isNodeRuntime()) {
1579
+ const r3 = await convertCognipeerFileToMarkdown("csv", "document.csv", input, cognipeerOptions);
1580
+ return finishCsvResult(r3, [], options);
1581
+ }
1582
+ const resolved = await resolveCsvStringInput(input, strMode);
1583
+ if (resolved.kind === "path") {
1584
+ if (needsPrep) {
1585
+ const raw = await readUtf8File(resolved.path);
1586
+ const { text: text3, warnings: w3 } = prepareCsvTextForCognipeer(raw, prepArgs);
1587
+ const r4 = await convertCognipeerFileToMarkdown(
1588
+ "csv",
1589
+ "document.csv",
1590
+ Buffer.from(text3, "utf8"),
1591
+ cognipeerOptions
1592
+ );
1593
+ return finishCsvResult(r4, w3, options);
1594
+ }
1595
+ const r3 = await convertCognipeerFileToMarkdown(
1596
+ "csv",
1597
+ "document.csv",
1598
+ resolved.path,
1599
+ cognipeerOptions
1600
+ );
1601
+ return finishCsvResult(r3, [], options);
1602
+ }
1603
+ const { text: text2, warnings: w2 } = prepareCsvTextForCognipeer(resolved.text, prepArgs);
1604
+ const r2 = await convertCognipeerFileToMarkdown(
1605
+ "csv",
1606
+ "document.csv",
1607
+ Buffer.from(text2, "utf8"),
1608
+ cognipeerOptions
1609
+ );
1610
+ return finishCsvResult(r2, w2, options);
1611
+ }
1612
+ if (!needsPrep) {
1613
+ const r2 = await convertCognipeerFileToMarkdown("csv", "document.csv", input, cognipeerOptions);
1614
+ return finishCsvResult(r2, [], options);
1615
+ }
1616
+ const buf = await toNodeBuffer2(input);
1617
+ const { text, warnings: w } = prepareCsvTextForCognipeer(buf.toString("utf8"), prepArgs);
1618
+ const r = await convertCognipeerFileToMarkdown(
1619
+ "csv",
1620
+ "document.csv",
1621
+ Buffer.from(text, "utf8"),
1622
+ cognipeerOptions
1623
+ );
1624
+ return finishCsvResult(r, w, options);
1625
+ }
1626
+
1627
+ // src/spreadsheet-markdown.ts
1628
+ function stripSpreadsheetOptionKeys(o) {
1629
+ if (!o) return {};
1630
+ const { includeSheetNames: _isn, compactMode: _cm, maxRowsPerSheet: _mr, ...rest } = o;
1631
+ return rest;
1632
+ }
1633
+ function finishSpreadsheetResult(r, options) {
1634
+ if (!options) return r;
1635
+ let markdown = r.markdown;
1636
+ const warnings = [...r.warnings];
1637
+ if (r.source === "cognipeer") {
1638
+ if (options.maxRowsPerSheet != null) {
1639
+ const lim = limitSpreadsheetMarkdownRowsPerSheet(markdown, options.maxRowsPerSheet);
1640
+ markdown = lim.markdown;
1641
+ warnings.push(...lim.warnings);
1642
+ }
1643
+ if (options.includeSheetNames === false) {
1644
+ markdown = stripSpreadsheetSheetHeadings(markdown);
1645
+ warnings.push(
1646
+ "[docmind-markdown:spreadsheet] includeSheetNames:false: removed ## sheet title lines from specialized output."
1647
+ );
1648
+ }
1649
+ }
1650
+ if (options.compactMode === true) {
1651
+ markdown = compactMarkdownOutput(markdown);
1652
+ }
1653
+ if (markdown === r.markdown && warnings.length === r.warnings.length) return r;
1654
+ return { ...r, markdown, warnings };
1655
+ }
1656
+ async function convertSpreadsheetToMarkdown(input, options) {
1657
+ const cognipeerOptions = stripSpreadsheetOptionKeys(options);
1658
+ const r = await convertCognipeerFileToMarkdown(
1659
+ "spreadsheet",
1660
+ "document.xlsx",
1661
+ input,
1662
+ cognipeerOptions
1663
+ );
1664
+ return finishSpreadsheetResult(r, options);
1665
+ }
1197
1666
  function isArrayBufferLike(data) {
1198
1667
  if (data instanceof ArrayBuffer) return true;
1199
1668
  if (typeof Uint8Array !== "undefined" && data instanceof Uint8Array) return true;
@@ -1210,7 +1679,15 @@ function isExtractMarkdownPathInput(value) {
1210
1679
  }
1211
1680
  function pickStructuredMarkdownOptions(options) {
1212
1681
  if (!options) return {};
1213
- const { structuredFallback: _a, docx: _b, pdf: _c, ...rest } = options;
1682
+ const {
1683
+ structuredFallback: _a,
1684
+ docx: _b,
1685
+ pdf: _c,
1686
+ html: _h,
1687
+ csv: _csv,
1688
+ spreadsheet: _s,
1689
+ ...rest
1690
+ } = options;
1214
1691
  return rest;
1215
1692
  }
1216
1693
  function buildDocxOptions(extract) {
@@ -1233,25 +1710,109 @@ function buildPdfOptions(extract) {
1233
1710
  structuredMarkdown: { ...sm, ...pdf?.structuredMarkdown }
1234
1711
  };
1235
1712
  }
1713
+ function buildHtmlOptions(extract) {
1714
+ const html = extract?.html;
1715
+ const fb = extract?.structuredFallback;
1716
+ const sm = pickStructuredMarkdownOptions(extract);
1717
+ return {
1718
+ ...html,
1719
+ resolveStructured: html?.resolveStructured ?? (fb ? () => Promise.resolve(fb) : void 0),
1720
+ structuredMarkdown: { ...sm, ...html?.structuredMarkdown }
1721
+ };
1722
+ }
1723
+ function buildCsvOptions(extract) {
1724
+ const csv = extract?.csv;
1725
+ const fb = extract?.structuredFallback;
1726
+ const sm = pickStructuredMarkdownOptions(extract);
1727
+ return {
1728
+ ...csv,
1729
+ resolveStructured: csv?.resolveStructured ?? (fb ? () => Promise.resolve(fb) : void 0),
1730
+ structuredMarkdown: { ...sm, ...csv?.structuredMarkdown }
1731
+ };
1732
+ }
1733
+ function buildSpreadsheetOptions(extract) {
1734
+ const spreadsheet = extract?.spreadsheet;
1735
+ const fb = extract?.structuredFallback;
1736
+ const sm = pickStructuredMarkdownOptions(extract);
1737
+ return {
1738
+ ...spreadsheet,
1739
+ resolveStructured: spreadsheet?.resolveStructured ?? (fb ? () => Promise.resolve(fb) : void 0),
1740
+ structuredMarkdown: { ...sm, ...spreadsheet?.structuredMarkdown }
1741
+ };
1742
+ }
1236
1743
  function toUint8View(data) {
1237
1744
  if (data instanceof Uint8Array) return data;
1238
1745
  if (data instanceof ArrayBuffer) return new Uint8Array(data);
1239
1746
  return new Uint8Array(data);
1240
1747
  }
1748
+ var XLS_OLE_MAGIC = new Uint8Array([208, 207, 17, 224, 161, 177, 26, 225]);
1749
+ function uint8ArraysEqual(a, b) {
1750
+ if (a.length !== b.length) return false;
1751
+ for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
1752
+ return true;
1753
+ }
1754
+ function containsUtf8Substring(haystack, needle) {
1755
+ const bytes = new TextEncoder().encode(needle);
1756
+ if (bytes.length === 0 || haystack.length < bytes.length) return false;
1757
+ outer: for (let i = 0; i <= haystack.length - bytes.length; i++) {
1758
+ for (let j = 0; j < bytes.length; j++) {
1759
+ if (haystack[i + j] !== bytes[j]) continue outer;
1760
+ }
1761
+ return true;
1762
+ }
1763
+ return false;
1764
+ }
1765
+ function isZipLocalHeader(u) {
1766
+ return u.length >= 4 && u[0] === 80 && u[1] === 75 && (u[2] === 3 || u[2] === 5 || u[2] === 7);
1767
+ }
1768
+ function looksLikeUtf8HtmlPrefix(u) {
1769
+ if (u.length === 0) return false;
1770
+ let start = 0;
1771
+ if (u.length >= 3 && u[0] === 239 && u[1] === 187 && u[2] === 191) start = 3;
1772
+ let s = "";
1773
+ const n = Math.min(u.length, 256);
1774
+ for (let i = start; i < n; i++) {
1775
+ const c = u[i];
1776
+ if (c === 0 || c > 127) return false;
1777
+ s += String.fromCharCode(c);
1778
+ }
1779
+ const t = s.trimStart().slice(0, 96).toLowerCase();
1780
+ return t.startsWith("<!doctype html") || t.startsWith("<html") || t.startsWith("<head") || t.startsWith("<!--");
1781
+ }
1241
1782
  function detectBinaryFormat(data, filename, mimeType) {
1242
1783
  const u = toUint8View(data);
1243
1784
  const lower = filename?.toLowerCase() ?? "";
1244
1785
  const mime = mimeType?.toLowerCase() ?? "";
1245
1786
  if (mime.includes("pdf") || lower.endsWith(".pdf")) return "pdf";
1787
+ if (mime.includes("text/html") || mime.includes("application/xhtml+xml") || lower.endsWith(".html") || lower.endsWith(".htm")) {
1788
+ return "html";
1789
+ }
1790
+ if (mime.includes("text/csv") || mime.includes("application/csv") || lower.endsWith(".csv")) {
1791
+ return "csv";
1792
+ }
1793
+ if (mime.includes("spreadsheetml") || mime.includes("officedocument.spreadsheetml") || mime.includes("application/vnd.ms-excel") || lower.endsWith(".xlsx") || lower.endsWith(".xls")) {
1794
+ return "spreadsheet";
1795
+ }
1246
1796
  if (mime.includes("wordprocessingml") || mime.includes("officedocument.wordprocessingml.document") || lower.endsWith(".docx")) {
1247
1797
  return "docx";
1248
1798
  }
1249
1799
  if (u.length >= 4 && u[0] === 37 && u[1] === 80 && u[2] === 68 && u[3] === 70) {
1250
1800
  return "pdf";
1251
1801
  }
1252
- if (u.length >= 4 && u[0] === 80 && u[1] === 75 && (u[2] === 3 || u[2] === 5 || u[2] === 7)) {
1253
- return "docx";
1802
+ if (u.length >= XLS_OLE_MAGIC.length && uint8ArraysEqual(u.subarray(0, XLS_OLE_MAGIC.length), XLS_OLE_MAGIC)) {
1803
+ return "spreadsheet";
1254
1804
  }
1805
+ if (isZipLocalHeader(u)) {
1806
+ const hasWordDoc = containsUtf8Substring(u, "word/document");
1807
+ const hasXlWorkbook = containsUtf8Substring(u, "xl/workbook");
1808
+ if (hasWordDoc && !hasXlWorkbook) return "docx";
1809
+ if (hasXlWorkbook && !hasWordDoc) return "spreadsheet";
1810
+ if (hasWordDoc && hasXlWorkbook) return "docx";
1811
+ if (lower.endsWith(".docx")) return "docx";
1812
+ if (lower.endsWith(".xlsx")) return "spreadsheet";
1813
+ return "unknown";
1814
+ }
1815
+ if (looksLikeUtf8HtmlPrefix(u)) return "html";
1255
1816
  return "unknown";
1256
1817
  }
1257
1818
  function docxStrategyFromSource(source) {
@@ -1272,6 +1833,21 @@ function pdfStrategyFromResult(r) {
1272
1833
  return "pdf-cognipeer-specialized";
1273
1834
  }
1274
1835
  }
1836
+ function cognipeerFileStrategyFromResult(format, r) {
1837
+ switch (r.source) {
1838
+ case "structured-fallback":
1839
+ return format === "html" ? "html-structured-fallback" : format === "csv" ? "csv-structured-fallback" : "spreadsheet-structured-fallback";
1840
+ case "unsupported-runtime":
1841
+ return format === "html" ? "html-unsupported-runtime" : format === "csv" ? "csv-unsupported-runtime" : "spreadsheet-unsupported-runtime";
1842
+ case "cognipeer-unavailable":
1843
+ return format === "html" ? "html-cognipeer-unavailable" : format === "csv" ? "csv-cognipeer-unavailable" : "spreadsheet-cognipeer-unavailable";
1844
+ case "cognipeer-failed":
1845
+ return format === "html" ? "html-cognipeer-failed" : format === "csv" ? "csv-cognipeer-failed" : "spreadsheet-cognipeer-failed";
1846
+ case "cognipeer":
1847
+ default:
1848
+ return format === "html" ? "html-cognipeer-specialized" : format === "csv" ? "csv-cognipeer-specialized" : "spreadsheet-cognipeer-specialized";
1849
+ }
1850
+ }
1275
1851
  function mergeWarnings(base, ...more) {
1276
1852
  const out = [...base];
1277
1853
  for (const m of more) {
@@ -1280,6 +1856,30 @@ function mergeWarnings(base, ...more) {
1280
1856
  return out;
1281
1857
  }
1282
1858
  var EXTRACT_WARN = "[docmind-markdown:extractMarkdown]";
1859
+ var ROUTING_TAG = "[docmind-markdown:extractMarkdown:routing]";
1860
+ function inferMediaHint(mimeType, filename) {
1861
+ const m = mimeType?.toLowerCase().trim() ?? "";
1862
+ const f = filename?.toLowerCase() ?? "";
1863
+ if (m.startsWith("image/") || /\.(png|jpe?g|gif|webp|bmp|ico|svg|tiff?)$/i.test(f)) {
1864
+ return "image";
1865
+ }
1866
+ if (m.startsWith("text/") || m === "application/json" || /\.(txt|md|json|log)$/i.test(f)) {
1867
+ return "text";
1868
+ }
1869
+ if (m.startsWith("audio/")) return "audio";
1870
+ if (m.startsWith("video/")) return "video";
1871
+ return void 0;
1872
+ }
1873
+ function buildRouting(p) {
1874
+ const hintPart = p.mediaHint ? ` mediaHint=${p.mediaHint}` : "";
1875
+ return {
1876
+ detectedFormat: p.detectedFormat,
1877
+ specializedPipeline: p.specializedPipeline,
1878
+ usedStructuredFallback: p.usedStructuredFallback,
1879
+ mediaHint: p.mediaHint,
1880
+ routingSummary: `${ROUTING_TAG} strategy=${p.strategy} format=${p.detectedFormat} pipeline=${p.specializedPipeline} structuredFallback=${p.usedStructuredFallback}${hintPart}`
1881
+ };
1882
+ }
1283
1883
  function traceUsedStructuredFallback(context) {
1284
1884
  return `${EXTRACT_WARN} ${context}: final Markdown from structuredFallback (specialized route unavailable, failed, or insufficient).`;
1285
1885
  }
@@ -1295,6 +1895,75 @@ function tracePdfStructuredAfterCognipeer() {
1295
1895
  function tracePdfSpecializedDeadEnd() {
1296
1896
  return `${EXTRACT_WARN} pdf: Cognipeer specialized route did not produce Markdown and structuredFallback was not provided.`;
1297
1897
  }
1898
+ function traceCognipeerFileStructuredAfterUnsupportedRuntime(label) {
1899
+ return `${EXTRACT_WARN} ${label}-unsupported-runtime: final Markdown from structuredFallback \u2014 @cognipeer/to-markdown cannot run in this environment.`;
1900
+ }
1901
+ function traceCognipeerFileStructuredAfterCognipeer(label) {
1902
+ return `${EXTRACT_WARN} ${label}-structured-fallback: final Markdown from structured envelope after Cognipeer ${label} path did not yield the result.`;
1903
+ }
1904
+ function traceCognipeerFileSpecializedDeadEnd(label) {
1905
+ return `${EXTRACT_WARN} ${label}: Cognipeer specialized route did not produce Markdown and structuredFallback was not provided.`;
1906
+ }
1907
+ function traceCognipeerFileExtractLayerFallback(label) {
1908
+ return `${EXTRACT_WARN} ${label}-extract-layer-fallback: specialized route returned empty Markdown but structuredFallback is set \u2014 applying convertStructuredToMarkdown at extractMarkdown layer.`;
1909
+ }
1910
+ function tracePdfStructuredExtractLayerFallback() {
1911
+ return `${EXTRACT_WARN} pdf-extract-layer-fallback: specialized route returned empty Markdown but structuredFallback is set \u2014 applying convertStructuredToMarkdown at extractMarkdown layer.`;
1912
+ }
1913
+ async function extractCognipeerFileMarkdownBranch(format, data, options, baseWarnings, smOpts, fb) {
1914
+ const r = format === "html" ? await convertHtmlToMarkdown(data, buildHtmlOptions(options)) : format === "csv" ? await convertCsvToMarkdown(data, buildCsvOptions(options)) : await convertSpreadsheetToMarkdown(data, buildSpreadsheetOptions(options));
1915
+ const strategy = cognipeerFileStrategyFromResult(format, r);
1916
+ let w = mergeWarnings(baseWarnings, r.warnings);
1917
+ const unsupported = format === "html" ? "html-unsupported-runtime" : format === "csv" ? "csv-unsupported-runtime" : "spreadsheet-unsupported-runtime";
1918
+ const structuredFb = format === "html" ? "html-structured-fallback" : format === "csv" ? "csv-structured-fallback" : "spreadsheet-structured-fallback";
1919
+ const failed = format === "html" ? "html-cognipeer-failed" : format === "csv" ? "csv-cognipeer-failed" : "spreadsheet-cognipeer-failed";
1920
+ const unavailable = format === "html" ? "html-cognipeer-unavailable" : format === "csv" ? "csv-cognipeer-unavailable" : "spreadsheet-cognipeer-unavailable";
1921
+ if (strategy === unsupported && r.markdown === "" && fb) {
1922
+ w = mergeWarnings(w, fb.warnings, [traceCognipeerFileStructuredAfterUnsupportedRuntime(format)]);
1923
+ return {
1924
+ markdown: convertStructuredToMarkdown(fb, smOpts),
1925
+ warnings: w,
1926
+ strategy: structuredFb,
1927
+ routing: buildRouting({
1928
+ detectedFormat: format,
1929
+ specializedPipeline: format,
1930
+ usedStructuredFallback: true,
1931
+ strategy: structuredFb
1932
+ })
1933
+ };
1934
+ }
1935
+ if ((strategy === failed || strategy === unavailable) && r.markdown.trim() === "" && fb) {
1936
+ w = mergeWarnings(w, fb.warnings, [traceCognipeerFileExtractLayerFallback(format)]);
1937
+ return {
1938
+ markdown: convertStructuredToMarkdown(fb, smOpts),
1939
+ warnings: w,
1940
+ strategy: structuredFb,
1941
+ routing: buildRouting({
1942
+ detectedFormat: format,
1943
+ specializedPipeline: format,
1944
+ usedStructuredFallback: true,
1945
+ strategy: structuredFb
1946
+ })
1947
+ };
1948
+ }
1949
+ if (strategy === structuredFb) {
1950
+ w = mergeWarnings(w, [traceCognipeerFileStructuredAfterCognipeer(format)]);
1951
+ }
1952
+ if ((strategy === failed || strategy === unavailable) && r.markdown.trim() === "" && !fb) {
1953
+ w = mergeWarnings(w, [traceCognipeerFileSpecializedDeadEnd(format)]);
1954
+ }
1955
+ return {
1956
+ markdown: r.markdown,
1957
+ warnings: w,
1958
+ strategy,
1959
+ routing: buildRouting({
1960
+ detectedFormat: format,
1961
+ specializedPipeline: format,
1962
+ usedStructuredFallback: strategy === structuredFb,
1963
+ strategy
1964
+ })
1965
+ };
1966
+ }
1298
1967
  async function extractMarkdown(input, options) {
1299
1968
  const smOpts = pickStructuredMarkdownOptions(options);
1300
1969
  const fb = options?.structuredFallback;
@@ -1323,22 +1992,24 @@ async function extractMarkdown(input, options) {
1323
1992
  markdown: convertStructuredToMarkdown(fb, smOpts),
1324
1993
  warnings: mergeWarnings(warnings, fb.warnings, [traceUsedStructuredFallback("path-requires-node")]),
1325
1994
  strategy: "path-requires-node",
1326
- routing: {
1995
+ routing: buildRouting({
1327
1996
  detectedFormat: "unknown",
1328
1997
  specializedPipeline: "none",
1329
- usedStructuredFallback: true
1330
- }
1998
+ usedStructuredFallback: true,
1999
+ strategy: "path-requires-node"
2000
+ })
1331
2001
  };
1332
2002
  }
1333
2003
  return {
1334
2004
  markdown: "",
1335
2005
  warnings,
1336
2006
  strategy: "path-requires-node",
1337
- routing: {
2007
+ routing: buildRouting({
1338
2008
  detectedFormat: "unknown",
1339
2009
  specializedPipeline: "none",
1340
- usedStructuredFallback: false
1341
- }
2010
+ usedStructuredFallback: false,
2011
+ strategy: "path-requires-node"
2012
+ })
1342
2013
  };
1343
2014
  }
1344
2015
  const { readFile } = await importEsm(
@@ -1360,10 +2031,26 @@ async function extractMarkdown(input, options) {
1360
2031
  return {
1361
2032
  markdown: convertStructuredToMarkdown(fb, smOpts),
1362
2033
  warnings: mergeWarnings(warnings, fb.warnings, [traceUsedStructuredFallback("invalid-input-shape")]),
1363
- strategy: "binary-unidentified-structured-fallback"
2034
+ strategy: "binary-unidentified-structured-fallback",
2035
+ routing: buildRouting({
2036
+ detectedFormat: "unknown",
2037
+ specializedPipeline: "none",
2038
+ usedStructuredFallback: true,
2039
+ strategy: "binary-unidentified-structured-fallback"
2040
+ })
1364
2041
  };
1365
2042
  }
1366
- return { markdown: "", warnings, strategy: "binary-unidentified" };
2043
+ return {
2044
+ markdown: "",
2045
+ warnings,
2046
+ strategy: "binary-unidentified",
2047
+ routing: buildRouting({
2048
+ detectedFormat: "unknown",
2049
+ specializedPipeline: "none",
2050
+ usedStructuredFallback: false,
2051
+ strategy: "binary-unidentified"
2052
+ })
2053
+ };
1367
2054
  }
1368
2055
  const fmt = detectBinaryFormat(data, filename, mimeType);
1369
2056
  if (fmt === "docx") {
@@ -1376,22 +2063,24 @@ async function extractMarkdown(input, options) {
1376
2063
  markdown: convertStructuredToMarkdown(fb, smOpts),
1377
2064
  warnings: mergeWarnings(warnings, fb.warnings, [traceUsedStructuredFallback("docx-requires-node")]),
1378
2065
  strategy: "docx-requires-node",
1379
- routing: {
2066
+ routing: buildRouting({
1380
2067
  detectedFormat: "docx",
1381
2068
  specializedPipeline: "none",
1382
- usedStructuredFallback: true
1383
- }
2069
+ usedStructuredFallback: true,
2070
+ strategy: "docx-requires-node"
2071
+ })
1384
2072
  };
1385
2073
  }
1386
2074
  return {
1387
2075
  markdown: "",
1388
2076
  warnings,
1389
2077
  strategy: "docx-requires-node",
1390
- routing: {
2078
+ routing: buildRouting({
1391
2079
  detectedFormat: "docx",
1392
2080
  specializedPipeline: "none",
1393
- usedStructuredFallback: false
1394
- }
2081
+ usedStructuredFallback: false,
2082
+ strategy: "docx-requires-node"
2083
+ })
1395
2084
  };
1396
2085
  }
1397
2086
  const r = await convertDocxToMarkdown(data, buildDocxOptions(options));
@@ -1405,11 +2094,12 @@ async function extractMarkdown(input, options) {
1405
2094
  markdown: r.markdown,
1406
2095
  warnings: w,
1407
2096
  strategy,
1408
- routing: {
2097
+ routing: buildRouting({
1409
2098
  detectedFormat: "docx",
1410
2099
  specializedPipeline: "docx",
1411
- usedStructuredFallback: strategy === "docx-structured-fallback"
1412
- }
2100
+ usedStructuredFallback: strategy === "docx-structured-fallback",
2101
+ strategy
2102
+ })
1413
2103
  };
1414
2104
  }
1415
2105
  if (fmt === "pdf") {
@@ -1424,11 +2114,26 @@ async function extractMarkdown(input, options) {
1424
2114
  markdown: convertStructuredToMarkdown(fb, smOpts),
1425
2115
  warnings: w,
1426
2116
  strategy: "pdf-structured-fallback",
1427
- routing: {
2117
+ routing: buildRouting({
1428
2118
  detectedFormat: "pdf",
1429
2119
  specializedPipeline: "pdf",
1430
- usedStructuredFallback: true
1431
- }
2120
+ usedStructuredFallback: true,
2121
+ strategy: "pdf-structured-fallback"
2122
+ })
2123
+ };
2124
+ }
2125
+ if ((strategy === "pdf-cognipeer-failed" || strategy === "pdf-cognipeer-unavailable") && r.markdown.trim() === "" && fb) {
2126
+ w = mergeWarnings(w, fb.warnings, [tracePdfStructuredExtractLayerFallback()]);
2127
+ return {
2128
+ markdown: convertStructuredToMarkdown(fb, smOpts),
2129
+ warnings: w,
2130
+ strategy: "pdf-structured-fallback",
2131
+ routing: buildRouting({
2132
+ detectedFormat: "pdf",
2133
+ specializedPipeline: "pdf",
2134
+ usedStructuredFallback: true,
2135
+ strategy: "pdf-structured-fallback"
2136
+ })
1432
2137
  };
1433
2138
  }
1434
2139
  if (strategy === "pdf-structured-fallback") {
@@ -1441,40 +2146,68 @@ async function extractMarkdown(input, options) {
1441
2146
  markdown: r.markdown,
1442
2147
  warnings: w,
1443
2148
  strategy,
1444
- routing: {
2149
+ routing: buildRouting({
1445
2150
  detectedFormat: "pdf",
1446
2151
  specializedPipeline: "pdf",
1447
- usedStructuredFallback: strategy === "pdf-structured-fallback"
1448
- }
2152
+ usedStructuredFallback: strategy === "pdf-structured-fallback",
2153
+ strategy
2154
+ })
1449
2155
  };
1450
2156
  }
2157
+ if (fmt === "html") {
2158
+ return extractCognipeerFileMarkdownBranch("html", data, options, warnings, smOpts, fb);
2159
+ }
2160
+ if (fmt === "csv") {
2161
+ return extractCognipeerFileMarkdownBranch("csv", data, options, warnings, smOpts, fb);
2162
+ }
2163
+ if (fmt === "spreadsheet") {
2164
+ return extractCognipeerFileMarkdownBranch("spreadsheet", data, options, warnings, smOpts, fb);
2165
+ }
2166
+ const mediaHint = inferMediaHint(mimeType, filename);
1451
2167
  warnings.push(
1452
- "@dragon708/docmind-markdown: Unidentified binary format (expected PDF magic or ZIP/DOCX). Using structured fallback if provided."
2168
+ "@dragon708/docmind-markdown: Unidentified binary format (expected PDF, OOXML Word/Excel, HTML, CSV, or related MIME/filename hints). Using structured fallback if provided."
1453
2169
  );
2170
+ if (mediaHint === "image") {
2171
+ warnings.push(
2172
+ `${EXTRACT_WARN} image hint (MIME/filename): raw images are not converted by specialized file routes; pass a StructuredDocumentResult (e.g. after OCR) or structuredFallback.`
2173
+ );
2174
+ } else if (mediaHint === "text") {
2175
+ warnings.push(
2176
+ `${EXTRACT_WARN} text hint (MIME/filename): plain text / JSON bytes are not auto-routed to Markdown here; pass StructuredDocumentResult or structuredFallback for normalized text/OCR pipelines.`
2177
+ );
2178
+ } else if (mediaHint === "audio" || mediaHint === "video") {
2179
+ warnings.push(
2180
+ `${EXTRACT_WARN} ${mediaHint} hint (MIME): no specialized ${mediaHint}\u2192Markdown route in extractMarkdown; use StructuredDocumentResult or structuredFallback.`
2181
+ );
2182
+ }
1454
2183
  if (fb) {
1455
2184
  return {
1456
2185
  markdown: convertStructuredToMarkdown(fb, smOpts),
1457
2186
  warnings: mergeWarnings(warnings, fb.warnings, [traceUsedStructuredFallback("binary-unidentified")]),
1458
2187
  strategy: "binary-unidentified-structured-fallback",
1459
- routing: {
2188
+ routing: buildRouting({
1460
2189
  detectedFormat: "unknown",
1461
2190
  specializedPipeline: "none",
1462
- usedStructuredFallback: true
1463
- }
2191
+ usedStructuredFallback: true,
2192
+ strategy: "binary-unidentified-structured-fallback",
2193
+ mediaHint
2194
+ })
1464
2195
  };
1465
2196
  }
1466
2197
  return {
1467
2198
  markdown: "",
1468
2199
  warnings,
1469
2200
  strategy: "binary-unidentified",
1470
- routing: {
2201
+ routing: buildRouting({
1471
2202
  detectedFormat: "unknown",
1472
2203
  specializedPipeline: "none",
1473
- usedStructuredFallback: false
1474
- }
2204
+ usedStructuredFallback: false,
2205
+ strategy: "binary-unidentified",
2206
+ mediaHint
2207
+ })
1475
2208
  };
1476
2209
  }
1477
2210
 
1478
- export { convertDocxBufferToMarkdown, convertDocxToMarkdown, convertPdfBufferToMarkdown, convertPdfPathToMarkdown, convertPdfToMarkdown, convertStructuredToLlmText, convertStructuredToMarkdown, detectBinaryFormat, extractLlmContent, extractMarkdown, extractStructuredChunks, isExtractMarkdownFileInput, renderLlmText, renderMarkdown, renderMarkdownSections, splitStructuredIntoChunks, structuredDocumentToLlmText, structuredDocumentToMarkdown };
2211
+ export { convertCsvToMarkdown, convertDocxBufferToMarkdown, convertDocxToMarkdown, convertHtmlToMarkdown, convertPdfBufferToMarkdown, convertPdfPathToMarkdown, convertPdfToMarkdown, convertSpreadsheetToMarkdown, convertStructuredToLlmText, convertStructuredToMarkdown, detectBinaryFormat, extractLlmContent, extractMarkdown, extractStructuredChunks, isExtractMarkdownFileInput, looksLikeHtmlString, renderLlmText, renderMarkdown, renderMarkdownSections, splitStructuredIntoChunks, structuredDocumentToLlmText, structuredDocumentToMarkdown };
1479
2212
  //# sourceMappingURL=index.js.map
1480
2213
  //# sourceMappingURL=index.js.map