@virstack/doc-ingest 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/adapters/aiAdapters.d.ts +3 -0
- package/dist/adapters/aiAdapters.d.ts.map +1 -1
- package/dist/adapters/aiAdapters.js +4 -2
- package/dist/adapters/aiAdapters.js.map +1 -1
- package/dist/cli.js +7 -1
- package/dist/cli.js.map +1 -1
- package/dist/core/constants.d.ts +6 -0
- package/dist/core/constants.d.ts.map +1 -0
- package/dist/core/constants.js +33 -0
- package/dist/core/constants.js.map +1 -0
- package/dist/graphs/singleDocument.d.ts +2 -2
- package/dist/graphs/singleDocument.d.ts.map +1 -1
- package/dist/graphs/singleDocument.js +7 -1
- package/dist/graphs/singleDocument.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/nodes/fileTypeRouter.d.ts.map +1 -1
- package/dist/nodes/fileTypeRouter.js +11 -0
- package/dist/nodes/fileTypeRouter.js.map +1 -1
- package/dist/nodes/imageReaderNode.d.ts +8 -0
- package/dist/nodes/imageReaderNode.d.ts.map +1 -0
- package/dist/nodes/imageReaderNode.js +28 -0
- package/dist/nodes/imageReaderNode.js.map +1 -0
- package/dist/nodes/llmExtractionNode.d.ts +1 -0
- package/dist/nodes/llmExtractionNode.d.ts.map +1 -1
- package/dist/nodes/llmExtractionNode.js +4 -3
- package/dist/nodes/llmExtractionNode.js.map +1 -1
- package/dist/nodes/textExtractorNode.d.ts.map +1 -1
- package/dist/nodes/textExtractorNode.js +3 -4
- package/dist/nodes/textExtractorNode.js.map +1 -1
- package/package.json +7 -3
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@ Powered by **LangGraph** for resilient orchestration, **OpenRouter / Gemini** fo
|
|
|
8
8
|
|
|
9
9
|
## ✨ Key Features
|
|
10
10
|
|
|
11
|
-
- **Universal Multi-Format Support:** Natively processes PDF, DOCX, XLSX, PPTX, CSV, TXT, HTML, and
|
|
11
|
+
- **Universal Multi-Format Support:** Natively processes PDF, DOCX, XLSX, PPTX, CSV, TXT, HTML, EPUB, and Images (JPG, JPEG, PNG, GIF, WEBP, SVG).
|
|
12
12
|
- **Dual-Tier Parallelism:** Concurrently processes multiple files while simultaneously splitting and routing large PDFs into parallel Vision-API execution nodes.
|
|
13
13
|
- **Smart Type Routing:** Automatically identifies MIME types and dynamically routes files to the most optimal, parser-specific extraction graph.
|
|
14
14
|
- **Provider Agnostic Architecture:** Built entirely on Dependency Injection. Easily swap out LLMs, Embeddings, and Vector Databases (Pinecone, Qdrant, etc.) to fit your specific stack.
|
|
@@ -121,6 +121,21 @@ virstack-doc-ingest ./documents/ --verbose
|
|
|
121
121
|
|
|
122
122
|
Virstack Doc Ingest is designed to be fully embedded into your own SaaS backends or ETL pipelines. It is rigidly decoupled from concrete implementations.
|
|
123
123
|
|
|
124
|
+
### Validating Supported File Types
|
|
125
|
+
|
|
126
|
+
You can import the list of natively supported file extensions directly from the library to validate user uploads before sending them to the ingestion pipeline.
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
import { SUPPORTED_FILE_EXTENSIONS, batchGraph } from "virstack-doc-ingest";
|
|
130
|
+
|
|
131
|
+
const fileExt = ".jpg"; // e.g. path.extname(file)
|
|
132
|
+
|
|
133
|
+
if (!SUPPORTED_FILE_EXTENSIONS.includes(fileExt.toLowerCase())) {
|
|
134
|
+
console.error(`Unsupported file type: ${fileExt}`);
|
|
135
|
+
// Return a 400 Bad Request to the user
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
124
139
|
### Default Built-In Adapters
|
|
125
140
|
|
|
126
141
|
The package exports fully functional adapters for typical stacks:
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
export interface LlmInput {
|
|
2
2
|
systemPrompt: string;
|
|
3
3
|
userText: string;
|
|
4
|
+
/** @deprecated use base64Data instead */
|
|
4
5
|
base64PdfChunk?: string;
|
|
6
|
+
base64Data?: string;
|
|
7
|
+
mimeType?: string;
|
|
5
8
|
}
|
|
6
9
|
export interface LlmAdapter {
|
|
7
10
|
generateMarkdown(input: LlmInput): Promise<string>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"aiAdapters.d.ts","sourceRoot":"","sources":["../../src/adapters/aiAdapters.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,QAAQ;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"aiAdapters.d.ts","sourceRoot":"","sources":["../../src/adapters/aiAdapters.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,QAAQ;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,UAAU;IACzB,gBAAgB,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACpD;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;CAC9C;AAID,qBAAa,oBAAqB,YAAW,UAAU;IACrD,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,KAAK,CAAS;gBAEV,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IAKnC,gBAAgB,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;CAmCzD;AAED,qBAAa,0BAA2B,YAAW,gBAAgB;IACjE,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,UAAU,CAAS;gBAEf,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,GAAE,MAAa;IAM9D,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CA4BnD"}
|
|
@@ -9,10 +9,12 @@ export class OpenRouterLlmAdapter {
|
|
|
9
9
|
}
|
|
10
10
|
async generateMarkdown(input) {
|
|
11
11
|
const userContent = [];
|
|
12
|
-
|
|
12
|
+
const mediaObj = input.base64Data || input.base64PdfChunk;
|
|
13
|
+
if (mediaObj) {
|
|
14
|
+
const mime = input.mimeType || "application/pdf";
|
|
13
15
|
userContent.push({
|
|
14
16
|
type: "image_url",
|
|
15
|
-
imageUrl: { url: `data
|
|
17
|
+
imageUrl: { url: `data:${mime};base64,${mediaObj}` },
|
|
16
18
|
});
|
|
17
19
|
}
|
|
18
20
|
userContent.push({ type: "text", text: input.userText });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"aiAdapters.js","sourceRoot":"","sources":["../../src/adapters/aiAdapters.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"aiAdapters.js","sourceRoot":"","sources":["../../src/adapters/aiAdapters.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAqB7C,wDAAwD;AAExD,MAAM,OAAO,oBAAoB;IACvB,MAAM,CAAa;IACnB,KAAK,CAAS;IAEtB,YAAY,MAAc,EAAE,KAAa;QACvC,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,KAAe;QACpC,MAAM,WAAW,GAAU,EAAE,CAAC;QAE9B,MAAM,QAAQ,GAAG,KAAK,CAAC,UAAU,IAAI,KAAK,CAAC,cAAc,CAAC;QAE1D,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,IAAI,iBAAiB,CAAC;YACjD,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,WAAW;gBACjB,QAAQ,EAAE,EAAE,GAAG,EAAE,QAAQ,IAAI,WAAW,QAAQ,EAAE,EAAE;aACrD,CAAC,CAAC;QACL,CAAC;QACD,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QAEzD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;YAC3C,oBAAoB,EAAE;gBACpB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,YAAY,EAAE;oBAC/C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAkB,EAAE;iBAC9C;gBACD,WAAW,EAAE,CAAC;aACf;SACF,CAAC,CAAC;QAEH,kDAAkD;QAClD,MAAM,YAAY,GAAG,QAAe,CAAC;QACrC,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAE5D,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACtF,CAAC;QAED,OAAO,CAAC,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC;CACF;AAED,MAAM,OAAO,0BAA0B;IAC7B,MAAM,CAAa;IACnB,KAAK,CAAS;IACd,UAAU,CAAS;IAE3B,YAAY,MAAc,EAAE,KAAa,EAAE,aAAqB,IAAI;QAClE,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,MAAgB;QAC1B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC;YACrD,WAAW,EAAE;gBACX,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,KAAK,EAAE,MAAM;gBACb,UAAU,EAAE,IAAI,CAAC,UAAU;aAC5B;SACF,CAAC,CAAC;QAEH,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,kEAAkE,QAAQ,EAAE,CAAC,CAAC;QAChG,CAAC;QAED,8DAA8D;QAC9D,IAAI,cAAc,GAAG,QAAQ,CAAC,IAAI,CAAC;QACnC,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC7E,cAAc,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC9E,CAAC;QAED,OAAO,cAAc,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE;YACtC,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC;YAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;gBAC3B,2EAA2E;gBAC3E,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;YAC3E,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
|
package/dist/cli.js
CHANGED
|
@@ -192,7 +192,13 @@ async function main() {
|
|
|
192
192
|
const failed = results.filter((r) => r.status === "error");
|
|
193
193
|
log.step(`${color.bold("Final Results:")} ${color.green(`${succeeded.length} succeeded`)}, ${color.red(`${failed.length} failed`)}`);
|
|
194
194
|
for (const r of results) {
|
|
195
|
-
|
|
195
|
+
// Use Intl.Segmenter to safely count and truncate visible characters (graphemes)
|
|
196
|
+
// This prevents multi-byte or combining characters (like Sinhala) from breaking table alignment.
|
|
197
|
+
const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" });
|
|
198
|
+
const graphemes = [...segmenter.segment(path.basename(r.file))].map((g) => g.segment);
|
|
199
|
+
const truncatedName = graphemes.slice(0, 35).join("");
|
|
200
|
+
const padding = Math.max(0, 35 - graphemes.slice(0, 35).length);
|
|
201
|
+
const fileName = truncatedName + " ".repeat(padding);
|
|
196
202
|
if (r.status === "success") {
|
|
197
203
|
log.message(` ${color.green("✔")} ${color.cyan(fileName)} │ ${r.chunks.toString().padStart(4)} chunks │ ${r.vectors.toString().padStart(4)} vectors │ ${r.durationSec}s`);
|
|
198
204
|
}
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,cAAc,GACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,eAAe;AACf,OAAO,EACL,KAAK,EACL,KAAK,EACL,IAAI,EACJ,OAAO,EACP,MAAM,EACN,QAAQ,EACR,MAAM,EACN,GAAG,GACJ,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,gCAAgC;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,sBAAsB;AACtB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAErD,8BAA8B;AAC9B,yEAAyE;AACzE,uCAAuC;AACvC,SAAS,CAAC;IACR,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QAC3B,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE;QAC9B,GAAG,CAAC,KAAK,CACP,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC,CAAC;AAEH,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,cAAc,GACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAExC,eAAe;AACf,OAAO,EACL,KAAK,EACL,KAAK,EACL,IAAI,EACJ,OAAO,EACP,MAAM,EACN,QAAQ,EACR,MAAM,EACN,GAAG,GACJ,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,gCAAgC;AAChC,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,sBAAsB;AACtB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAErD,8BAA8B;AAC9B,yEAAyE;AACzE,uCAAuC;AACvC,SAAS,CAAC;IACR,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QAC3B,IAAI,SAAS,EAAE,CAAC;YACd,GAAG,CAAC,OAAO,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IACD,IAAI,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACxB,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACxD,CAAC;IACD,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE;QAC9B,GAAG,CAAC,KAAK,CACP,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;CACR,CAAC,CAAC;AAEH,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,EAAE,CAAC;IAChB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,IAAI,CACR,MAAM,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CACrE,CACF,CAAC;IAEF,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC,CAAC,CAAC;IAErE,yEAAyE;IACzE,IAAI,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3E,IAAI,cAAc,GAAa,EAAE,CAAC;IAClC,IAAI,QAAQ,GAA6C,EAAE,CAAC;IAE5D,IAAI,UAAU,EAAE,CAAC;QACf,+BAA+B;QAC/B,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QAC9C,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;YAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC/C,cAAc,GAAG,OAAO;iBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;iBACtE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;SAAM,CAAC;QACN,qBAAqB;QACrB,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC;YACxB,OAAO,EAAE,gCAAgC;YACzC,OAAO,EAAE;gBACP;oBACE,KAAK,EAAE,OAAO;oBACd,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;gBACD;oBACE,KAAK,EAAE,MAAM;oBACb,KAAK,EAAE,cAAc;oBACrB,IAAI,EAAE,uCAAuC;iBAC9C;aACF;SACF,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;YAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC;gBAC3B,OAAO,EAAE,mCAAmC;gBAC5C,WAAW,EAAE,8BAA8B;gBAC3C,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,SAAmB,CAAC,CAAC;YACvD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;gBACnD,MAAM,CAAC,mBAAmB,YAAY,EAAE,CAAC,CAAC;gBAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;gBAC/C,cAAc,GAAG,OAAO;qBACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACZ,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACxD;qBACA,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,cAAc,GAAG,CAAC,YAAY,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,gBAAgB;YAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC;gBACzB,OAAO,EAAE,2BAA2B;gBACpC,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,0BAA0B,CAAC;gBACtE,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC;gBACtB,OAAO,EAAE,wBAAwB;gBACjC,WAAW,EAAE,YAAY;gBACzB,QAAQ,CAAC,KAAK;oBACZ,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,OAAO,mBAAmB,CAAC;gBAC/D,CAAC;aACF,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnB,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YAED,QAAQ,GAAG,CAAC,EAAE,OAAO,EAAE,OAAiB,EAAE,IAAI,EAAE,IAAc,EAAE,CAAC,CAAC;QACpE,CAAC;IACH,CAAC;IAED,IAAI,CAAC;QACH,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAC;IACnC,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,MAAM,CAAC,0BAA0B,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,CAAC,6BAA6B,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC5D,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC;QAC3B,OAAO,EAAE,oBAAoB,WAAW,WAAW;QACnD,OAAO,EAAE;YACP,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,sBAAsB,EAAE;YAC9C,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE;SACtC;KACF,CAAC,CAAC;IAEH,IAAI,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAClC,MAAM,CAAC,8BAA8B,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qBAAqB;IACrB,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;IACpB,CAAC,CAAC,KAAK,CAAC,cAAc,WAAW,4BAA4B,CAAC,CAAC;IAE/D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,MAAM,CACzC;YACE,KAAK,EAAE,cAAc;YACrB,QAAQ,EAAE,QAAQ;SACnB,EACD,EAAE,cAAc,EAAE,cAAc,CAAC,kBAAkB,EAAE,CACtD,CAAC;QAEF,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC;QAEpC,gCAAgC;QAChC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,4BAA4B,YAAY,IAAI,CAAC,CAAC,CAAC;QAElE,uCAAuC;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;QACrE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;QAEhE,GAAG,CAAC,IAAI,CACN,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,MAAM,YAAY,CAAC,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,SAAS,CAAC,EAAE,CAC3H,CAAC;QAEF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,iFAAiF;YACjF,iGAAiG;YACjG,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;YACxE,MAAM,SAAS,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CACjE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CACjB,CAAC;YACF,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACtD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC;YAChE,MAAM,QAAQ,GAAG,aAAa,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAErD,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC3B,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,WAAW,GAAG,CAC9J,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,OAAO,CACT,KAAK,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CACtE,CAAC;YACJ,CAAC;QACH,CAAC;QAED,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC,CAAC,CAAC;QAEvE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../../src/core/constants.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,yBAAyB,UA2BrC,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Supported file extensions for document ingestion.
|
|
3
|
+
* This array can be used externally to validate files before sending them to the pipeline.
|
|
4
|
+
*/
|
|
5
|
+
export const SUPPORTED_FILE_EXTENSIONS = [
|
|
6
|
+
// PDFs
|
|
7
|
+
".pdf",
|
|
8
|
+
// Word processing
|
|
9
|
+
".docx",
|
|
10
|
+
".doc",
|
|
11
|
+
".rtf",
|
|
12
|
+
".odt",
|
|
13
|
+
".epub",
|
|
14
|
+
// Presentations
|
|
15
|
+
".pptx",
|
|
16
|
+
".ppt",
|
|
17
|
+
".odp",
|
|
18
|
+
// Spreadsheets and data
|
|
19
|
+
".xlsx",
|
|
20
|
+
".xls",
|
|
21
|
+
".csv",
|
|
22
|
+
// Text & web
|
|
23
|
+
".txt",
|
|
24
|
+
".html",
|
|
25
|
+
// Images
|
|
26
|
+
".jpg",
|
|
27
|
+
".jpeg",
|
|
28
|
+
".png",
|
|
29
|
+
".gif",
|
|
30
|
+
".webp",
|
|
31
|
+
".svg",
|
|
32
|
+
];
|
|
33
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../../src/core/constants.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,OAAO;IACP,MAAM;IACN,kBAAkB;IAClB,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,gBAAgB;IAChB,OAAO;IACP,MAAM;IACN,MAAM;IACN,wBAAwB;IACxB,OAAO;IACP,MAAM;IACN,MAAM;IACN,aAAa;IACb,MAAM;IACN,OAAO;IACP,SAAS;IACT,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;CACP,CAAC"}
|
|
@@ -72,7 +72,7 @@ export declare function buildPipeline(): import("@langchain/langgraph").Compiled
|
|
|
72
72
|
(annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
|
|
73
73
|
Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
|
|
74
74
|
};
|
|
75
|
-
}>, "markdownMerger" | "markdownNormalizer" | "llmExtractionNode" | "__start__" | "fileTypeRouter" | "libreOfficeToPdf" | "pdfSplitter" | "textExtractorNode" | "saveMarkdown" | "markdownChunker" | "vectorEmbedderNode" | "vectorUpsertNode", {
|
|
75
|
+
}>, "markdownMerger" | "markdownNormalizer" | "llmExtractionNode" | "__start__" | "fileTypeRouter" | "libreOfficeToPdf" | "pdfSplitter" | "textExtractorNode" | "imageReaderNode" | "saveMarkdown" | "markdownChunker" | "vectorEmbedderNode" | "vectorUpsertNode", {
|
|
76
76
|
filePath: {
|
|
77
77
|
(): import("@langchain/langgraph").LastValue<string | undefined>;
|
|
78
78
|
(annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
|
|
@@ -225,7 +225,7 @@ export declare const graph: import("@langchain/langgraph").CompiledStateGraph<im
|
|
|
225
225
|
(annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
|
|
226
226
|
Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
|
|
227
227
|
};
|
|
228
|
-
}>, "markdownMerger" | "markdownNormalizer" | "llmExtractionNode" | "__start__" | "fileTypeRouter" | "libreOfficeToPdf" | "pdfSplitter" | "textExtractorNode" | "saveMarkdown" | "markdownChunker" | "vectorEmbedderNode" | "vectorUpsertNode", {
|
|
228
|
+
}>, "markdownMerger" | "markdownNormalizer" | "llmExtractionNode" | "__start__" | "fileTypeRouter" | "libreOfficeToPdf" | "pdfSplitter" | "textExtractorNode" | "imageReaderNode" | "saveMarkdown" | "markdownChunker" | "vectorEmbedderNode" | "vectorUpsertNode", {
|
|
229
229
|
filePath: {
|
|
230
230
|
(): import("@langchain/langgraph").LastValue<string | undefined>;
|
|
231
231
|
(annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"singleDocument.d.ts","sourceRoot":"","sources":["../../src/graphs/singleDocument.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"singleDocument.d.ts","sourceRoot":"","sources":["../../src/graphs/singleDocument.ts"],"names":[],"mappings":"AA4CA,wBAAgB,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;mDAiE5B;AAED;;;GAGG;AACH,eAAO,MAAM,KAAK;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDAAkB,CAAC"}
|
|
@@ -11,6 +11,7 @@ import { vectorEmbedderNode } from "../nodes/vectorEmbedderNode.js";
|
|
|
11
11
|
import { vectorUpsertNode } from "../nodes/vectorUpsertNode.js";
|
|
12
12
|
import { saveMarkdown } from "../nodes/saveMarkdown.js";
|
|
13
13
|
import { libreOfficeToPdf } from "../nodes/libreOfficeToPdf.js";
|
|
14
|
+
import { imageReaderNode } from "../nodes/imageReaderNode.js";
|
|
14
15
|
/**
|
|
15
16
|
* Builds and compiles the Virstack Doc Ingest pipeline as a LangGraph StateGraph.
|
|
16
17
|
*
|
|
@@ -34,6 +35,7 @@ function dispatchPdfChunks(state) {
|
|
|
34
35
|
chunk,
|
|
35
36
|
index,
|
|
36
37
|
totalChunks: state.pdfChunks.length,
|
|
38
|
+
mimeType: state.mimeType,
|
|
37
39
|
});
|
|
38
40
|
});
|
|
39
41
|
}
|
|
@@ -48,6 +50,8 @@ export function buildPipeline() {
|
|
|
48
50
|
// ── Phase 2b: Text / Data Extraction Branch ──
|
|
49
51
|
.addNode("textExtractorNode", textExtractorNode)
|
|
50
52
|
.addNode("llmExtractionNode", llmExtractionNode)
|
|
53
|
+
// ── Phase 2c: Image Branch ──
|
|
54
|
+
.addNode("imageReaderNode", imageReaderNode)
|
|
51
55
|
// ── Phase 3: Normalization & Chunking ──
|
|
52
56
|
.addNode("markdownNormalizer", markdownNormalizer)
|
|
53
57
|
.addNode("saveMarkdown", saveMarkdown)
|
|
@@ -63,11 +67,13 @@ export function buildPipeline() {
|
|
|
63
67
|
pdf: "pdfSplitter",
|
|
64
68
|
convert: "libreOfficeToPdf",
|
|
65
69
|
extract: "textExtractorNode",
|
|
70
|
+
image: "imageReaderNode",
|
|
66
71
|
})
|
|
67
72
|
// Convert branch: LibreOffice → pdfSplitter → (joins PDF branch)
|
|
68
73
|
.addEdge("libreOfficeToPdf", "pdfSplitter")
|
|
69
|
-
// PDF
|
|
74
|
+
// PDF/Image unified dispatcher
|
|
70
75
|
.addConditionalEdges("pdfSplitter", dispatchPdfChunks, ["llmExtractionNode"])
|
|
76
|
+
.addConditionalEdges("imageReaderNode", dispatchPdfChunks, ["llmExtractionNode"])
|
|
71
77
|
// Unified Document/Text branch flow
|
|
72
78
|
.addEdge("textExtractorNode", "llmExtractionNode")
|
|
73
79
|
// After llmExtractionNode, conditionally merge PDF chunks or normalize Text
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"singleDocument.js","sourceRoot":"","sources":["../../src/graphs/singleDocument.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAsB,MAAM,kBAAkB,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAC7E,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAE5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"singleDocument.js","sourceRoot":"","sources":["../../src/graphs/singleDocument.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAsB,MAAM,kBAAkB,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAC7E,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAE5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAE9D;;;;;;;;;GASG;AAEH;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAoB;IAC7C,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;QACpE,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,OAAO,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC1C,OAAO,IAAI,IAAI,CAAC,mBAAmB,EAAE;YACnC,KAAK;YACL,KAAK;YACL,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC,MAAM;YACnC,QAAQ,EAAE,KAAK,CAAC,QAAQ;SACzB,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AACD,MAAM,UAAU,aAAa;IAC3B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,uBAAuB,CAAC;QACnD,yBAAyB;SACxB,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAE1C,6BAA6B;SAC5B,OAAO,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;SAC7C,OAAO,CAAC,aAAa,EAAE,WAAW,CAAC;SACnC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAE1C,gDAAgD;SAC/C,OAAO,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;SAC/C,OAAO,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;QAEhD,+BAA+B;SAC9B,OAAO,CAAC,iBAAiB,EAAE,eAAe,CAAC;QAE5C,0CAA0C;SACzC,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,cAAc,EAAE,YAAY,CAAC;SACrC,OAAO,CAAC,iBAAiB,EAAE,eAAe,CAAC;QAE5C,sCAAsC;SACrC,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;QAE9C,cAAc;QACd,iBAAiB;SAChB,OAAO,CAAC,WAAW,EAAE,gBAAgB,CAAC;QAEvC,8BAA8B;SAC7B,mBAAmB,CAAC,gBAAgB,EAAE,eAAe,EAAE;QACtD,GAAG,EAAE,aAAa;QAClB,OAAO,EAAE,kBAAkB;QAC3B,OAAO,EAAE,mBAAmB;QAC5B,KAAK,EAAE,iBAAiB;KACzB,CAAC;QAEF,iEAAiE;SAChE,OAAO,CAAC,kBAAkB,EAAE,aAAa,CAAC;QAE3C,+BAA+B;SAC9B,mBAAmB,CAAC,aAAa,EAAE,iBAAiB,EAAE,CAAC,mBAAmB,CAAC,CAAC;SAC5E,mBAAmB,CAAC,iBAAiB,EAAE,iBAAiB,EAAE,CAAC,mBAAmB,CAAC,CAAC;QAEjF,oCAAoC;SACnC,OAAO,CAAC,mBAAmB,EAAE,mBAAmB,CAAC;QAElD,4EAA4E;SAC3E,mBAAmB,CAAC,mBAAmB,EAAE,aAAa,EAAE;QACvD,cAAc,EAAE,gBAAgB;QAChC,kBAAkB,EAAE,oBAAoB;KACzC,CAAC;QAEF,+BAA+B;SAC9B,OAAO,CAAC,gBAAgB,EAAE,oBAAoB,CAAC;QAEhD,+DAA+D;SAC9D,OAAO,CAAC,oBAAoB,EAAE,cAAc,CAAC;SAC7C,OAAO,CAAC,cAAc,EAAE,iBAAiB,CAAC;SAC1C,OAAO,CAAC,iBAAiB,EAAE,oBAAoB,CAAC;SAChD,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,KAAK,CAAC,OAAO,EAAE,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG,aAAa,EAAE,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
export { initializeConfig, type VirstackDocIngestConfig, } from "./core/config.js";
|
|
2
2
|
export { graph as batchGraph, BatchStateAnnotation, } from "./graphs/batchProcessor.js";
|
|
3
3
|
export { buildPipeline, graph as singleDocGraph, } from "./graphs/singleDocument.js";
|
|
4
|
+
export { SUPPORTED_FILE_EXTENSIONS } from "./core/constants.js";
|
|
4
5
|
export type { PipelineState } from "./core/state.js";
|
|
5
6
|
export type { BatchState } from "./graphs/batchProcessor.js";
|
|
6
7
|
export { type VectorStoreAdapter, type VectorRecord, UpstashAdapter, } from "./adapters/vectorStore.js";
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,gBAAgB,EAChB,KAAK,uBAAuB,GAC7B,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EACL,KAAK,IAAI,UAAU,EACnB,oBAAoB,GACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,aAAa,EACb,KAAK,IAAI,cAAc,GACxB,MAAM,4BAA4B,CAAC;AAGpC,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACrD,YAAY,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAG7D,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,YAAY,EACjB,cAAc,GACf,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EACL,KAAK,UAAU,EACf,KAAK,QAAQ,EACb,KAAK,gBAAgB,EACrB,oBAAoB,EACpB,0BAA0B,GAC3B,MAAM,0BAA0B,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,gBAAgB,EAChB,KAAK,uBAAuB,GAC7B,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EACL,KAAK,IAAI,UAAU,EACnB,oBAAoB,GACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,aAAa,EACb,KAAK,IAAI,cAAc,GACxB,MAAM,4BAA4B,CAAC;AAGpC,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAGhE,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACrD,YAAY,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AAG7D,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,YAAY,EACjB,cAAc,GACf,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EACL,KAAK,UAAU,EACf,KAAK,QAAQ,EACb,KAAK,gBAAgB,EACrB,oBAAoB,EACpB,0BAA0B,GAC3B,MAAM,0BAA0B,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -3,6 +3,8 @@ export { initializeConfig, } from "./core/config.js";
|
|
|
3
3
|
// Export the processing graphs
|
|
4
4
|
export { graph as batchGraph, BatchStateAnnotation, } from "./graphs/batchProcessor.js";
|
|
5
5
|
export { buildPipeline, graph as singleDocGraph, } from "./graphs/singleDocument.js";
|
|
6
|
+
// Export the core constants
|
|
7
|
+
export { SUPPORTED_FILE_EXTENSIONS } from "./core/constants.js";
|
|
6
8
|
// Export vector store injection types and built-in adapters
|
|
7
9
|
export { UpstashAdapter, } from "./adapters/vectorStore.js";
|
|
8
10
|
// Export AI injection types and built-in adapter
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,wBAAwB;AACxB,OAAO,EACL,gBAAgB,GAEjB,MAAM,kBAAkB,CAAC;AAE1B,+BAA+B;AAC/B,OAAO,EACL,KAAK,IAAI,UAAU,EACnB,oBAAoB,GACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,aAAa,EACb,KAAK,IAAI,cAAc,GACxB,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,wBAAwB;AACxB,OAAO,EACL,gBAAgB,GAEjB,MAAM,kBAAkB,CAAC;AAE1B,+BAA+B;AAC/B,OAAO,EACL,KAAK,IAAI,UAAU,EACnB,oBAAoB,GACrB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,aAAa,EACb,KAAK,IAAI,cAAc,GACxB,MAAM,4BAA4B,CAAC;AAEpC,4BAA4B;AAC5B,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAMhE,4DAA4D;AAC5D,OAAO,EAGL,cAAc,GACf,MAAM,2BAA2B,CAAC;AAEnC,iDAAiD;AACjD,OAAO,EAIL,oBAAoB,EACpB,0BAA0B,GAC3B,MAAM,0BAA0B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileTypeRouter.d.ts","sourceRoot":"","sources":["../../src/nodes/fileTypeRouter.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAiBjC;AAED;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"fileTypeRouter.d.ts","sourceRoot":"","sources":["../../src/nodes/fileTypeRouter.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAiBjC;AAED;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,CA8D5D"}
|
|
@@ -65,6 +65,17 @@ export function routeByMimeType(state) {
|
|
|
65
65
|
if (mime === "text/plain" || mime === "text/html") {
|
|
66
66
|
return "extract";
|
|
67
67
|
}
|
|
68
|
+
// Images
|
|
69
|
+
const imageTypes = [
|
|
70
|
+
"image/jpeg",
|
|
71
|
+
"image/png",
|
|
72
|
+
"image/gif",
|
|
73
|
+
"image/webp",
|
|
74
|
+
"image/svg+xml"
|
|
75
|
+
];
|
|
76
|
+
if (mime && imageTypes.includes(mime)) {
|
|
77
|
+
return "image";
|
|
78
|
+
}
|
|
68
79
|
// Fallback: try to treat as text
|
|
69
80
|
logger.warn(LogSource.FILE_ROUTER, `Unknown MIME "${mime}", falling back to extract branch`);
|
|
70
81
|
return "extract";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileTypeRouter.js","sourceRoot":"","sources":["../../src/nodes/fileTypeRouter.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,YAAY,CAAC;AAE9B,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAoB;IAEpB,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,sCAAsC,CAAC,CAAC;QAC3E,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;IACpC,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,+DAA+D,CAAC,CAAC;IACnF,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,0BAA0B,CAAC;IAEhE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,SAAS,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC7E,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,kBAAkB,QAAQ,EAAE,CAAC,CAAC;IAEjE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AAChC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAAC,KAAoB;IAClD,sDAAsD;IACtD,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QACrC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC;IAE5B,IAAI,IAAI,KAAK,iBAAiB,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,uDAAuD;IACvD,MAAM,gBAAgB,GAAG;QACvB,kBAAkB;QAClB,yEAAyE,EAAE,OAAO;QAClF,oBAAoB,EAAyD,MAAM;QACnF,iBAAiB,EAA4D,MAAM;QACnF,UAAU,EAAmE,gBAAgB;QAC7F,yCAAyC,EAAmC,MAAM;QAClF,sBAAsB,EAAuD,OAAO;QACpF,gBAAgB;QAChB,2EAA2E,EAAE,OAAO;QACpF,+BAA+B,EAA8C,MAAM;QACnF,iDAAiD,EAA2B,MAAM;KACnF,CAAC;IAEF,IAAI,IAAI,IAAI,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,yDAAyD;IACzD,MAAM,WAAW,GAAG;QAClB,mEAAmE,EAAE,OAAO;QAC5E,0BAA0B,EAA4C,MAAM;QAC5E,UAAU;KACX,CAAC;IAEF,IAAI,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;QAClD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,iCAAiC;IACjC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,iBAAiB,IAAI,mCAAmC,CAAC,CAAC;IAC7F,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
1
|
+
{"version":3,"file":"fileTypeRouter.js","sourceRoot":"","sources":["../../src/nodes/fileTypeRouter.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,YAAY,CAAC;AAE9B,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAoB;IAEpB,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,sCAAsC,CAAC,CAAC;QAC3E,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;IACpC,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,IAAI,KAAK,CAAC,+DAA+D,CAAC,CAAC;IACnF,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,0BAA0B,CAAC;IAEhE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,SAAS,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC7E,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,kBAAkB,QAAQ,EAAE,CAAC,CAAC;IAEjE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AAChC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAAC,KAAoB;IAClD,sDAAsD;IACtD,IAAI,KAAK,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QACrC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC;IAE5B,IAAI,IAAI,KAAK,iBAAiB,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,uDAAuD;IACvD,MAAM,gBAAgB,GAAG;QACvB,kBAAkB;QAClB,yEAAyE,EAAE,OAAO;QAClF,oBAAoB,EAAyD,MAAM;QACnF,iBAAiB,EAA4D,MAAM;QACnF,UAAU,EAAmE,gBAAgB;QAC7F,yCAAyC,EAAmC,MAAM;QAClF,sBAAsB,EAAuD,OAAO;QACpF,gBAAgB;QAChB,2EAA2E,EAAE,OAAO;QACpF,+BAA+B,EAA8C,MAAM;QACnF,iDAAiD,EAA2B,MAAM;KACnF,CAAC;IAEF,IAAI,IAAI,IAAI,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,yDAAyD;IACzD,MAAM,WAAW,GAAG;QAClB,mEAAmE,EAAE,OAAO;QAC5E,0BAA0B,EAA4C,MAAM;QAC5E,UAAU;KACX,CAAC;IAEF,IAAI,IAAI,IAAI,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,IAAI,KAAK,YAAY,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;QAClD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,SAAS;IACT,MAAM,UAAU,GAAG;QACjB,YAAY;QACZ,WAAW;QACX,WAAW;QACX,YAAY;QACZ,eAAe;KAChB,CAAC;IAEF,IAAI,IAAI,IAAI,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACtC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,iCAAiC;IACjC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,iBAAiB,IAAI,mCAAmC,CAAC,CAAC;IAC7F,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { PipelineState } from "../core/state.js";
|
|
2
|
+
/**
|
|
3
|
+
* Reads an image file and converts it into a base64 chunk.
|
|
4
|
+
* The resulting chunk is stored in `state.pdfChunks` so it can be
|
|
5
|
+
* processed generically by the same parallel LLM dispatch logic.
|
|
6
|
+
*/
|
|
7
|
+
export declare function imageReaderNode(state: PipelineState): Promise<Partial<PipelineState>>;
|
|
8
|
+
//# sourceMappingURL=imageReaderNode.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"imageReaderNode.d.ts","sourceRoot":"","sources":["../../src/nodes/imageReaderNode.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAItD;;;;GAIG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAoBjC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { logger, LogSource } from "../core/logger.js";
|
|
4
|
+
import { requireInit } from "../core/config.js";
|
|
5
|
+
/**
|
|
6
|
+
* Reads an image file and converts it into a base64 chunk.
|
|
7
|
+
* The resulting chunk is stored in `state.pdfChunks` so it can be
|
|
8
|
+
* processed generically by the same parallel LLM dispatch logic.
|
|
9
|
+
*/
|
|
10
|
+
export async function imageReaderNode(state) {
|
|
11
|
+
requireInit();
|
|
12
|
+
if (!state.filePath)
|
|
13
|
+
throw new Error("[imageReaderNode] filePath is missing");
|
|
14
|
+
const fullPath = path.resolve(process.cwd(), state.filePath);
|
|
15
|
+
logger.info(LogSource.PDF_SPLITTER, `Reading image at: ${fullPath}`); // Reusing PDF_SPLITTER or maybe we can just use generic logging but LogSource is an enum.
|
|
16
|
+
let fileBuffer;
|
|
17
|
+
try {
|
|
18
|
+
fileBuffer = await fs.readFile(fullPath);
|
|
19
|
+
}
|
|
20
|
+
catch (err) {
|
|
21
|
+
throw new Error(`Failed to read image at ${fullPath}: ${err.message}`);
|
|
22
|
+
}
|
|
23
|
+
const base64Data = fileBuffer.toString("base64");
|
|
24
|
+
// We place it in pdfChunks so it uses the exact same parallel mapping logic
|
|
25
|
+
logger.info(LogSource.PDF_SPLITTER, `Created 1 image chunk from ${state.mimeType}`);
|
|
26
|
+
return { pdfChunks: [base64Data] };
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=imageReaderNode.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"imageReaderNode.js","sourceRoot":"","sources":["../../src/nodes/imageReaderNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAoB;IAEpB,WAAW,EAAE,CAAC;IAEd,IAAI,CAAC,KAAK,CAAC,QAAQ;QAAE,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,qBAAqB,QAAQ,EAAE,CAAC,CAAC,CAAC,0FAA0F;IAEhK,IAAI,UAAU,CAAC;IACf,IAAI,CAAC;QACH,UAAU,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC3C,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,KAAK,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,UAAU,GAAG,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEjD,4EAA4E;IAC5E,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,8BAA8B,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;IAEpF,OAAO,EAAE,SAAS,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC;AACrC,CAAC"}
|
|
@@ -9,6 +9,7 @@ export declare function llmExtractionNode(state: Partial<PipelineState> & {
|
|
|
9
9
|
chunk?: string;
|
|
10
10
|
index?: number;
|
|
11
11
|
totalChunks?: number;
|
|
12
|
+
mimeType?: string;
|
|
12
13
|
}): Promise<Partial<PipelineState>>;
|
|
13
14
|
/**
|
|
14
15
|
* Conditional router to determine what happens after llmExtractionNode.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llmExtractionNode.d.ts","sourceRoot":"","sources":["../../src/nodes/llmExtractionNode.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAuBtD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,
|
|
1
|
+
{"version":3,"file":"llmExtractionNode.d.ts","sourceRoot":"","sources":["../../src/nodes/llmExtractionNode.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAuBtD;;;;;GAKG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1G,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAwCjC;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,aAAa,GAAG,MAAM,CAK1D"}
|
|
@@ -35,12 +35,13 @@ export async function llmExtractionNode(state) {
|
|
|
35
35
|
const promptInput = {
|
|
36
36
|
systemPrompt: finalSystemPrompt,
|
|
37
37
|
userText: isChunkFlow
|
|
38
|
-
? `Extract all content from this
|
|
38
|
+
? `Extract all content from this document/image (chunk ${state.index + 1} of ${state.totalChunks}) into clean Markdown.`
|
|
39
39
|
: `Convert the following extracted document text into clean Markdown:\n\n${state.rawText}`,
|
|
40
|
-
|
|
40
|
+
base64Data: isChunkFlow ? state.chunk : undefined,
|
|
41
|
+
mimeType: state.mimeType
|
|
41
42
|
};
|
|
42
43
|
if (isChunkFlow) {
|
|
43
|
-
logger.info(LogSource.LLM_EXTRACTION, `Processing
|
|
44
|
+
logger.info(LogSource.LLM_EXTRACTION, `Processing chunk ${state.index + 1}/${state.totalChunks} (${((state.chunk.length * 0.75) / 1024).toFixed(0)} KB)`);
|
|
44
45
|
}
|
|
45
46
|
else {
|
|
46
47
|
logger.info(LogSource.LLM_EXTRACTION, `Sending ${state.rawText.length} chars to generic LLM Adapter`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llmExtractionNode.js","sourceRoot":"","sources":["../../src/nodes/llmExtractionNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAE1E,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAGtD,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;4GAiB8E,CAAC;AAE7G;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,
|
|
1
|
+
{"version":3,"file":"llmExtractionNode.js","sourceRoot":"","sources":["../../src/nodes/llmExtractionNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAE1E,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAGtD,MAAM,qBAAqB,GAAG;;;;;;;;;;;;;;;;;4GAiB8E,CAAC;AAE7G;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAA2G;IAG3G,WAAW,EAAE,CAAC;IAEd,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,CAAC;IAC9G,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAEnC,IAAI,CAAC,WAAW,IAAI,CAAC,UAAU,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,0EAA0E,CAAC,CAAC;IAC9F,CAAC;IAED,MAAM,iBAAiB,GAAG,cAAc,CAAC,YAAY,IAAI,qBAAqB,CAAC;IAE/E,MAAM,WAAW,GAAa;QAC5B,YAAY,EAAE,iBAAiB;QAC/B,QAAQ,EAAE,WAAW;YACnB,CAAC,CAAC,uDAAuD,KAAK,CAAC,KAAM,GAAG,CAAC,OAAO,KAAK,CAAC,WAAW,wBAAwB;YACzH,CAAC,CAAC,yEAAyE,KAAK,CAAC,OAAO,EAAE;QAC5F,UAAU,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;QACjD,QAAQ,EAAE,KAAK,CAAC,QAAQ;KACzB,CAAC;IAEF,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,oBAAoB,KAAK,CAAC,KAAM,GAAG,CAAC,IAAI,KAAK,CAAC,WAAW,KAAK,CAAC,CAAC,KAAK,CAAC,KAAM,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAC9J,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,WAAW,KAAK,CAAC,OAAQ,CAAC,MAAM,+BAA+B,CAAC,CAAC;IACzG,CAAC;IAED,8DAA8D;IAC9D,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,CACnC,cAAc,CAAC,GAAG,CAAC,gBAAgB,CAAC,WAAW,CAAC,CACjD,CAAC;IAEF,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,KAAK,CAAC,KAAM,GAAG,CAAC,IAAI,KAAK,CAAC,WAAW,eAAe,QAAQ,CAAC,MAAM,SAAS,CAAC,CAAC;QAC7H,OAAO,EAAE,aAAa,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC;IACvC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,uBAAuB,QAAQ,CAAC,MAAM,QAAQ,CAAC,CAAC;IACtF,OAAO,EAAE,QAAQ,EAAE,CAAC;AACtB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,KAAoB;IAChD,IAAI,KAAK,CAAC,aAAa,IAAI,KAAK,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC7E,OAAO,gBAAgB,CAAC;IAC1B,CAAC;IACD,OAAO,oBAAoB,CAAC;AAC9B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"textExtractorNode.d.ts","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,
|
|
1
|
+
{"version":3,"file":"textExtractorNode.d.ts","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGtD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,aAAa,GACnB,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAkCjC"}
|
|
@@ -23,15 +23,14 @@ export async function textExtractorNode(state) {
|
|
|
23
23
|
skip_empty_lines: true,
|
|
24
24
|
});
|
|
25
25
|
// Convert to a simple text representation
|
|
26
|
-
rawText = records
|
|
27
|
-
.map((row) => row.join(" | "))
|
|
28
|
-
.join("\n");
|
|
26
|
+
rawText = records.map((row) => row.join(" | ")).join("\n");
|
|
29
27
|
}
|
|
30
28
|
else {
|
|
31
29
|
// DOCX, PPTX, XLSX — use officeparser
|
|
32
30
|
if (!filePath)
|
|
33
31
|
throw new Error("filePath required for office document parsing");
|
|
34
|
-
|
|
32
|
+
const ast = await officeparser.parseOffice(filePath);
|
|
33
|
+
rawText = ast.toText();
|
|
35
34
|
}
|
|
36
35
|
logger.info(LogSource.TEXT_EXTRACTOR, `Extracted ${rawText.length} chars of raw text`);
|
|
37
36
|
return { rawText };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"textExtractorNode.js","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAoB;IAEpB,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAErC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,YAAY,QAAQ,KAAK,QAAQ,GAAG,CAAC,CAAC;IAE5E,IAAI,OAAe,CAAC;IAEpB,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC9B,kCAAkC;QAClC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAC5E,CAAC;SAAM,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,mDAAmD;QACnD,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,OAAO,GAAe,KAAK,CAAC,SAAS,EAAE;YAC3C,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,OAAO,GAAG,OAAO
|
|
1
|
+
{"version":3,"file":"textExtractorNode.js","sourceRoot":"","sources":["../../src/nodes/textExtractorNode.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAEvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAoB;IAEpB,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAErC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,YAAY,QAAQ,KAAK,QAAQ,GAAG,CAAC,CAAC;IAE5E,IAAI,OAAe,CAAC;IAEpB,IAAI,QAAQ,KAAK,YAAY,EAAE,CAAC;QAC9B,kCAAkC;QAClC,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC;IAC5E,CAAC;SAAM,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACnC,mDAAmD;QACnD,IAAI,CAAC,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACvD,MAAM,OAAO,GAAe,KAAK,CAAC,SAAS,EAAE;YAC3C,gBAAgB,EAAE,IAAI;SACvB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7D,CAAC;SAAM,CAAC;QACN,sCAAsC;QACtC,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACrD,OAAO,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;IACzB,CAAC;IAED,MAAM,CAAC,IAAI,CACT,SAAS,CAAC,cAAc,EACxB,aAAa,OAAO,CAAC,MAAM,oBAAoB,CAChD,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,CAAC;AACrB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@virstack/doc-ingest",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"description": "A high-performance, parallelized document ingestion and vectorization pipeline.",
|
|
5
|
+
"repository": {
|
|
6
|
+
"type": "git",
|
|
7
|
+
"url": "https://github.com/virstack/virstack-doc-ingest"
|
|
8
|
+
},
|
|
5
9
|
"main": "./dist/index.js",
|
|
6
10
|
"types": "./dist/index.d.ts",
|
|
7
11
|
"type": "module",
|
|
@@ -39,11 +43,11 @@
|
|
|
39
43
|
"dotenv": "^16.4.0",
|
|
40
44
|
"figlet": "^1.11.0",
|
|
41
45
|
"mime-types": "^2.1.35",
|
|
42
|
-
"officeparser": "^
|
|
46
|
+
"officeparser": "^6.0.7",
|
|
43
47
|
"p-limit": "^7.3.0",
|
|
44
48
|
"pdf-lib": "^1.17.1",
|
|
45
49
|
"picocolors": "^1.1.1",
|
|
46
|
-
"terminal-image": "^
|
|
50
|
+
"terminal-image": "^3.0.0"
|
|
47
51
|
},
|
|
48
52
|
"devDependencies": {
|
|
49
53
|
"@langchain/langgraph-cli": "^1.1.16",
|