@virstack/doc-ingest 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js
CHANGED
|
@@ -192,7 +192,13 @@ async function main() {
|
|
|
192
192
|
const failed = results.filter((r) => r.status === "error");
|
|
193
193
|
log.step(`${color.bold("Final Results:")} ${color.green(`${succeeded.length} succeeded`)}, ${color.red(`${failed.length} failed`)}`);
|
|
194
194
|
for (const r of results) {
|
|
195
|
-
|
|
195
|
+
// Use Intl.Segmenter to safely count and truncate visible characters (graphemes)
|
|
196
|
+
// This prevents multi-byte or combining characters (like Sinhala) from breaking table alignment.
|
|
197
|
+
const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" });
|
|
198
|
+
const graphemes = [...segmenter.segment(path.basename(r.file))].map((g) => g.segment);
|
|
199
|
+
const truncatedName = graphemes.slice(0, 35).join("");
|
|
200
|
+
const padding = Math.max(0, 35 - graphemes.slice(0, 35).length);
|
|
201
|
+
const fileName = truncatedName + " ".repeat(padding);
|
|
196
202
|
if (r.status === "success") {
|
|
197
203
|
log.message(` ${color.green("✔")} ${color.cyan(fileName)} │ ${r.chunks.toString().padStart(4)} chunks │ ${r.vectors.toString().padStart(4)} vectors │ ${r.durationSec}s`);
|
|
198
204
|
}
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,cAAc,GACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,eAAe;AACf,OAAO,EACL,KAAK,EACL,KAAK,EACL,IAAI,EACJ,OAAO,EACP,MAAM,EACN,QAAQ,EACR,MAAM,EACN,GAAG,GACJ,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,gCAAgC;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,sBAAsB;AACtB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAErD,8BAA8B;AAC9B,yEAAyE;AACzE,uCAAuC;AACvC,SAAS,CAAC;IACR,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QAC3B,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE;QAC9B,GAAG,CAAC,KAAK,CACP,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC,CAAC;AAEH,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,cAAc,GACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,eAAe;AACf,OAAO,EACL,KAAK,EACL,KAAK,EACL,IAAI,EACJ,OAAO,EACP,MAAM,EACN,QAAQ,EACR,MAAM,EACN,GAAG,GACJ,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,gCAAgC;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,sBAAsB;AACtB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAErD,8BAA8B;AAC9B,yEAAyE;AACzE,uCAAuC;AACvC,SAAS,CAAC;IACR,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QAC3B,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE;QAC9B,GAAG,CAAC,KAAK,CACP,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC,CAAC;AAEH,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CACR,MAAM,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CACrE,CACF,CAAC;IAEF,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC,CAAC,CAAC;IAErE,yEAAyE;IACzE,IAAI,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3E,IAAI,cAAc,GAAa,EAAE,CAAC;IAClC,IAAI,QAAQ,GAA6C,EAAE,CAAC;IAE5D,IAAI,UAAU,EAAE,CAAC;QACf,+BAA+B;QAC/B,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC/C,cAAc,GAAG,OAAO;iBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;iBACtE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qBAAqB;QACrB,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC;YACxB,OAAO,EAAE,gCAAgC;YACzC,OAAO,EAAE;gBACP;oBACE,KAAK,EAAE,OAAO;oBACd,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;gBACD;oBACE,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;aACF;SACF,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC;gBAC3B,OAAO,EAAE,mCAAmC;gBAC5C,WAAW,EAAE,8BAA8B;gBAC3C,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAmB,CAAC,CAAC;YACvD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;gBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;gBAC/C,cAAc,GAAG,OAAO;qBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACZ,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACxD;qBACA,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,gBAAgB;YAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC;gBACzB,OAAO,EAAE,2BAA2B;gBACpC,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,0BAA0B,CAAC;gBACtE,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC;gBACtB,OAAO,EAAE,wBAAwB;gBACjC,WAAW,EAAE,YAAY;gBACzB,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,QAAQ,GAAG,CAAC,EAAE,OAAO,EAAE,OAAiB,EAAE,IAAI,EAAE,IAAc,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,MAAM,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,CAAC,6BAA6B,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC5D,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC;QAC3B,OAAO,EAAE,oBAAoB,WAAW,WAAW;QACnD,OAAO,EAAE;YACP,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,sBAAsB,EAAE;YAC9C,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE;SACtC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAClC,MAAM,CAAC,8BAA8B,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qBAAqB;IACrB,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;IACpB,CAAC,CAAC,KAAK,CAAC,cAAc,WAAW,4BAA4B,CAAC,CAAC;IAE/D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,MAAM,CACzC;YACE,KAAK,EAAE,cAAc;YACrB,QAAQ,EAAE,QAAQ;SACnB,EACD,EAAE,cAAc,EAAE,cAAc,CAAC,kBAAkB,EAAE,CACtD,CAAC;QAEF,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC;QAEpC,gCAAgC;QAChC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,4BAA4B,YAAY,IAAI,CAAC,CAAC,CAAC;QAElE,uCAAuC;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;QACrE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;QAEhE,GAAG,CAAC,IAAI,CACN,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,MAAM,YAAY,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,SAAS,CAAC,EAAE,CAC3H,CAAC;QAEF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,iFAAiF;YACjF,iGAAiG;YACjG,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;YACxE,MAAM,SAAS,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CACjE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CACjB,CAAC;YACF,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;YAChE,MAAM,QAAQ,GAAG,aAAa,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAErD,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC3B,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,WAAW,GAAG,CAC9J,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CACtE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC,CAAC,CAAC;QAEvE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"textExtractorNode.d.ts","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,
|
|
1
|
+
{"version":3,"file":"textExtractorNode.d.ts","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAkCjC"}
|
|
@@ -23,15 +23,14 @@ export async function textExtractorNode(state) {
|
|
|
23
23
|
skip_empty_lines: true,
|
|
24
24
|
});
|
|
25
25
|
// Convert to a simple text representation
|
|
26
|
-
rawText = records
|
|
27
|
-
.map((row) => row.join(" | "))
|
|
28
|
-
.join("\n");
|
|
26
|
+
rawText = records.map((row) => row.join(" | ")).join("\n");
|
|
29
27
|
}
|
|
30
28
|
else {
|
|
31
29
|
// DOCX, PPTX, XLSX — use officeparser
|
|
32
30
|
if (!filePath)
|
|
33
31
|
throw new Error("filePath required for office document parsing");
|
|
34
|
-
|
|
32
|
+
const ast = await officeparser.parseOffice(filePath);
|
|
33
|
+
rawText = ast.toText();
|
|
35
34
|
}
|
|
36
35
|
logger.info(LogSource.TEXT_EXTRACTOR, `Extracted ${rawText.length} chars of raw text`);
|
|
37
36
|
return { rawText };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"textExtractorNode.js","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAoB;IAEpB,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAErC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,YAAY,QAAQ,KAAK,QAAQ,GAAG,CAAC,CAAC;IAE5E,IAAI,OAAe,CAAC;IAEpB,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC9B,kCAAkC;QAClC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAC5E,CAAC;SAAM,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,mDAAmD;QACnD,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,OAAO,GAAe,KAAK,CAAC,SAAS,EAAE;YAC3C,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,OAAO,GAAG,OAAO
|
|
1
|
+
{"version":3,"file":"textExtractorNode.js","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAoB;IAEpB,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAErC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,YAAY,QAAQ,KAAK,QAAQ,GAAG,CAAC,CAAC;IAE5E,IAAI,OAAe,CAAC;IAEpB,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC9B,kCAAkC;QAClC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAC5E,CAAC;SAAM,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,mDAAmD;QACnD,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,OAAO,GAAe,KAAK,CAAC,SAAS,EAAE;YAC3C,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7D,CAAC;SAAM,CAAC;QACN,sCAAsC;QACtC,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACrD,OAAO,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;IACzB,CAAC;IAED,MAAM,CAAC,IAAI,CACT,SAAS,CAAC,cAAc,EACxB,aAAa,OAAO,CAAC,MAAM,oBAAoB,CAChD,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@virstack/doc-ingest",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "A high-performance, parallelized document ingestion and vectorization pipeline.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -39,11 +39,11 @@
|
|
|
39
39
|
"dotenv": "^16.4.0",
|
|
40
40
|
"figlet": "^1.11.0",
|
|
41
41
|
"mime-types": "^2.1.35",
|
|
42
|
-
"officeparser": "^
|
|
42
|
+
"officeparser": "^6.0.7",
|
|
43
43
|
"p-limit": "^7.3.0",
|
|
44
44
|
"pdf-lib": "^1.17.1",
|
|
45
45
|
"picocolors": "^1.1.1",
|
|
46
|
-
"terminal-image": "^
|
|
46
|
+
"terminal-image": "^3.0.0"
|
|
47
47
|
},
|
|
48
48
|
"devDependencies": {
|
|
49
49
|
"@langchain/langgraph-cli": "^1.1.16",
|
|
@@ -52,4 +52,4 @@
|
|
|
52
52
|
"tsx": "^4.19.0",
|
|
53
53
|
"typescript": "^5.6.0"
|
|
54
54
|
}
|
|
55
|
-
}
|
|
55
|
+
}
|