@virstack/doc-ingest 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -192,7 +192,13 @@ async function main() {
192
192
  const failed = results.filter((r) => r.status === "error");
193
193
  log.step(`${color.bold("Final Results:")} ${color.green(`${succeeded.length} succeeded`)}, ${color.red(`${failed.length} failed`)}`);
194
194
  for (const r of results) {
195
- const fileName = r.file.padEnd(35).slice(0, 35);
195
+ // Use Intl.Segmenter to safely count and truncate visible characters (graphemes)
196
+ // This prevents multi-byte or combining characters (like Sinhala) from breaking table alignment.
197
+ const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" });
198
+ const graphemes = [...segmenter.segment(path.basename(r.file))].map((g) => g.segment);
199
+ const truncatedName = graphemes.slice(0, 35).join("");
200
+ const padding = Math.max(0, 35 - graphemes.slice(0, 35).length);
201
+ const fileName = truncatedName + " ".repeat(padding);
196
202
  if (r.status === "success") {
197
203
  log.message(` ${color.green("✔")} ${color.cyan(fileName)} │ ${r.chunks.toString().padStart(4)} chunks │ ${r.vectors.toString().padStart(4)} vectors │ ${r.durationSec}s`);
198
204
  }
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,cAAc,GACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,eAAe;AACf,OAAO,EACL,KAAK,EACL,KAAK,EACL,IAAI,EACJ,OAAO,EACP,MAAM,EACN,QAAQ,EACR,MAAM,EACN,GAAG,GACJ,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,gCAAgC;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,sBAAsB;AACtB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAErD,8BAA8B;AAC9B,yEAAyE;AACzE,uCAAuC;AACvC,SAAS,CAAC;IACR,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QAC3B,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE;QAC9B,GAAG,CAAC,KAAK,CACP,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC,CAAC;AAEH,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC,CACjF,CAAC;IAEF,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC,CAAC,CAAC;IAErE,yEAAyE;IACzE,IAAI,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3E,IAAI,cAAc,GAAa,EAAE,CAAC;IAClC,IAAI,QAAQ,GAA6C,EAAE,CAAC;IAE5D,IAAI,UAAU,EAAE,CAAC;QACf,+BAA+B;QAC/B,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC/C,cAAc,GAAG,OAAO;iBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;iBACtE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qBAAqB;QACrB,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC;YACxB,OAAO,EAAE,gCAAgC;YACzC,OAAO,EAAE;gBACP;oBACE,KAAK,EAAE,OAAO;oBACd,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;gBACD;oBACE,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;aACF;SACF,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC;gBAC3B,OAAO,EAAE,mCAAmC;gBAC5C,WAAW,EAAE,8BAA8B;gBAC3C,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAmB,CAAC,CAAC;YACvD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;gBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;gBAC/C,cAAc,GAAG,OAAO;qBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACZ,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACxD;qBACA,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,gBAAgB;YAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC;gBACzB,OAAO,EAAE,2BAA2B;gBACpC,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,0BAA0B,CAAC;gBACtE,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC;gBACtB,OAAO,EAAE,wBAAwB;gBACjC,WAAW,EAAE,YAAY;gBACzB,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,QAAQ,GAAG,CAAC,EAAE,OAAO,EAAE,OAAiB,EAAE,IAAI,EAAE,IAAc,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,MAAM,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,CAAC,6BAA6B,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC5D,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC;QAC3B,OAAO,EAAE,oBAAoB,WAAW,WAAW;QACnD,OAAO,EAAE;YACP,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,sBAAsB,EAAE;YAC9C,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE;SACtC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAClC,MAAM,CAAC,8BAA8B,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qBAAqB;IACrB,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;IACpB,CAAC,CAAC,KAAK,CAAC,cAAc,WAAW,4BAA4B,CAAC,CAAC;IAE/D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,MAAM,CACzC;YACE,KAAK,EAAE,cAAc;YACrB,QAAQ,EAAE,QAAQ;SACnB,EACD,EAAE,cAAc,EAAE,cAAc,CAAC,kBAAkB,EAAE,CACtD,CAAC;QAEF,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC;QAEpC,gCAAgC;QAChC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,4BAA4B,YAAY,IAAI,CAAC,CAAC,CAAC;QAElE,uCAAuC;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;QACrE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;QAEhE,GAAG,CAAC,IAAI,CACN,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,MAAM,YAAY,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,SAAS,CAAC,EAAE,CAC3H,CAAC;QAEF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAChD,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC3B,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,WAAW,GAAG,CAC9J,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CACtE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC,CAAC,CAAC;QAEvE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,cAAc,GACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,eAAe;AACf,OAAO,EACL,KAAK,EACL,KAAK,EACL,IAAI,EACJ,OAAO,EACP,MAAM,EACN,QAAQ,EACR,MAAM,EACN,GAAG,GACJ,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,gCAAgC;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,sBAAsB;AACtB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAErD,8BAA8B;AAC9B,yEAAyE;AACzE,uCAAuC;AACvC,SAAS,CAAC;IACR,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QAC3B,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE;QAC9B,GAAG,CAAC,KAAK,CACP,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC,CAAC;AAEH,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CACR,MAAM,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CACrE,CACF,CAAC;IAEF,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC,CAAC,CAAC;IAErE,yEAAyE;IACzE,IAAI,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3E,IAAI,cAAc,GAAa,EAAE,CAAC;IAClC,IAAI,QAAQ,GAA6C,EAAE,CAAC;IAE5D,IAAI,UAAU,EAAE,CAAC;QACf,+BAA+B;QAC/B,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC/C,cAAc,GAAG,OAAO;iBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;iBACtE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qBAAqB;QACrB,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC;YACxB,OAAO,EAAE,gCAAgC;YACzC,OAAO,EAAE;gBACP;oBACE,KAAK,EAAE,OAAO;oBACd,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;gBACD;oBACE,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;aACF;SACF,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC;gBAC3B,OAAO,EAAE,mCAAmC;gBAC5C,WAAW,EAAE,8BAA8B;gBAC3C,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAmB,CAAC,CAAC;YACvD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;gBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;gBAC/C,cAAc,GAAG,OAAO;qBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACZ,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACxD;qBACA,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,gBAAgB;YAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC;gBACzB,OAAO,EAAE,2BAA2B;gBACpC,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,0BAA0B,CAAC;gBACtE,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC;gBACtB,OAAO,EAAE,wBAAwB;gBACjC,WAAW,EAAE,YAAY;gBACzB,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,QAAQ,GAAG,CAAC,EAAE,OAAO,EAAE,OAAiB,EAAE,IAAI,EAAE,IAAc,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,MAAM,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,CAAC,6BAA6B,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC5D,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC;QAC3B,OAAO,EAAE,oBAAoB,WAAW,WAAW;QACnD,OAAO,EAAE;YACP,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,sBAAsB,EAAE;YAC9C,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE;SACtC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAClC,MAAM,CAAC,8BAA8B,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qBAAqB;IACrB,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;IACpB,CAAC,CAAC,KAAK,CAAC,cAAc,WAAW,4BAA4B,CAAC,CAAC;IAE/D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,MAAM,CACzC;YACE,KAAK,EAAE,cAAc;YACrB,QAAQ,EAAE,QAAQ;SACnB,EACD,EAAE,cAAc,EAAE,cAAc,CAAC,kBAAkB,EAAE,CACtD,CAAC;QAEF,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC;QAEpC,gCAAgC;QAChC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,4BAA4B,YAAY,IAAI,CAAC,CAAC,CAAC;QAElE,uCAAuC;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;QACrE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;QAEhE,GAAG,CAAC,IAAI,CACN,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,MAAM,YAAY,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,SAAS,CAAC,EAAE,CAC3H,CAAC;QAEF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,iFAAiF;YACjF,iGAAiG;YACjG,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;YACxE,MAAM,SAAS,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CACjE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CACjB,CAAC;YACF,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;YAChE,MAAM,QAAQ,GAAG,aAAa,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAErD,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC3B,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,WAAW,GAAG,CAC9J,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CACtE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC,CAAC,CAAC;QAEvE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"textExtractorNode.d.ts","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CA+BjC"}
1
+ {"version":3,"file":"textExtractorNode.d.ts","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAkCjC"}
@@ -23,15 +23,14 @@ export async function textExtractorNode(state) {
23
23
  skip_empty_lines: true,
24
24
  });
25
25
  // Convert to a simple text representation
26
- rawText = records
27
- .map((row) => row.join(" | "))
28
- .join("\n");
26
+ rawText = records.map((row) => row.join(" | ")).join("\n");
29
27
  }
30
28
  else {
31
29
  // DOCX, PPTX, XLSX — use officeparser
32
30
  if (!filePath)
33
31
  throw new Error("filePath required for office document parsing");
34
- rawText = await officeparser.parseOfficeAsync(filePath);
32
+ const ast = await officeparser.parseOffice(filePath);
33
+ rawText = ast.toText();
35
34
  }
36
35
  logger.info(LogSource.TEXT_EXTRACTOR, `Extracted ${rawText.length} chars of raw text`);
37
36
  return { rawText };
@@ -1 +1 @@
1
- {"version":3,"file":"textExtractorNode.js","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAoB;IAEpB,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAErC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,YAAY,QAAQ,KAAK,QAAQ,GAAG,CAAC,CAAC;IAE5E,IAAI,OAAe,CAAC;IAEpB,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC9B,kCAAkC;QAClC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAC5E,CAAC;SAAM,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,mDAAmD;QACnD,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,OAAO,GAAe,KAAK,CAAC,SAAS,EAAE;YAC3C,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,OAAO,GAAG,OAAO;aACd,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aAC7B,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;SAAM,CAAC;QACN,sCAAsC;QACtC,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAChF,OAAO,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAAC,QAAQ,CAAW,CAAC;IACpE,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,aAAa,OAAO,CAAC,MAAM,oBAAoB,CAAC,CAAC;IAEvF,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC"}
1
+ {"version":3,"file":"textExtractorNode.js","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAoB;IAEpB,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAErC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,YAAY,QAAQ,KAAK,QAAQ,GAAG,CAAC,CAAC;IAE5E,IAAI,OAAe,CAAC;IAEpB,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC9B,kCAAkC;QAClC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAC5E,CAAC;SAAM,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,mDAAmD;QACnD,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,OAAO,GAAe,KAAK,CAAC,SAAS,EAAE;YAC3C,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7D,CAAC;SAAM,CAAC;QACN,sCAAsC;QACtC,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACrD,OAAO,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;IACzB,CAAC;IAED,MAAM,CAAC,IAAI,CACT,SAAS,CAAC,cAAc,EACxB,aAAa,OAAO,CAAC,MAAM,oBAAoB,CAChD,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@virstack/doc-ingest",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "A high-performance, parallelized document ingestion and vectorization pipeline.",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -39,11 +39,11 @@
39
39
  "dotenv": "^16.4.0",
40
40
  "figlet": "^1.11.0",
41
41
  "mime-types": "^2.1.35",
42
- "officeparser": "^4.1.0",
42
+ "officeparser": "^6.0.7",
43
43
  "p-limit": "^7.3.0",
44
44
  "pdf-lib": "^1.17.1",
45
45
  "picocolors": "^1.1.1",
46
- "terminal-image": "^4.2.0"
46
+ "terminal-image": "^3.0.0"
47
47
  },
48
48
  "devDependencies": {
49
49
  "@langchain/langgraph-cli": "^1.1.16",
@@ -52,4 +52,4 @@
52
52
  "tsx": "^4.19.0",
53
53
  "typescript": "^5.6.0"
54
54
  }
55
- }
55
+ }