@virstack/doc-ingest 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +203 -0
  2. package/dist/adapters/aiAdapters.d.ts +25 -0
  3. package/dist/adapters/aiAdapters.d.ts.map +1 -0
  4. package/dist/adapters/aiAdapters.js +73 -0
  5. package/dist/adapters/aiAdapters.js.map +1 -0
  6. package/dist/adapters/vectorStore.d.ts +24 -0
  7. package/dist/adapters/vectorStore.d.ts.map +1 -0
  8. package/dist/adapters/vectorStore.js +22 -0
  9. package/dist/adapters/vectorStore.js.map +1 -0
  10. package/dist/aiAdapters.d.ts +25 -0
  11. package/dist/aiAdapters.d.ts.map +1 -0
  12. package/dist/aiAdapters.js +50 -0
  13. package/dist/aiAdapters.js.map +1 -0
  14. package/dist/assets/logo.png +0 -0
  15. package/dist/batchPipeline.d.ts +52 -0
  16. package/dist/batchPipeline.d.ts.map +1 -0
  17. package/dist/batchPipeline.js +81 -0
  18. package/dist/batchPipeline.js.map +1 -0
  19. package/dist/cli.d.ts +3 -0
  20. package/dist/cli.d.ts.map +1 -0
  21. package/dist/cli.js +217 -0
  22. package/dist/cli.js.map +1 -0
  23. package/dist/config.d.ts +26 -0
  24. package/dist/config.d.ts.map +1 -0
  25. package/dist/config.js +97 -0
  26. package/dist/config.js.map +1 -0
  27. package/dist/core/config.d.ts +26 -0
  28. package/dist/core/config.d.ts.map +1 -0
  29. package/dist/core/config.js +106 -0
  30. package/dist/core/config.js.map +1 -0
  31. package/dist/core/logger.d.ts +31 -0
  32. package/dist/core/logger.d.ts.map +1 -0
  33. package/dist/core/logger.js +42 -0
  34. package/dist/core/logger.js.map +1 -0
  35. package/dist/core/state.d.ts +52 -0
  36. package/dist/core/state.d.ts.map +1 -0
  37. package/dist/core/state.js +27 -0
  38. package/dist/core/state.js.map +1 -0
  39. package/dist/graphs/batchProcessor.d.ts +72 -0
  40. package/dist/graphs/batchProcessor.d.ts.map +1 -0
  41. package/dist/graphs/batchProcessor.js +94 -0
  42. package/dist/graphs/batchProcessor.js.map +1 -0
  43. package/dist/graphs/singleDocument.d.ts +303 -0
  44. package/dist/graphs/singleDocument.d.ts.map +1 -0
  45. package/dist/graphs/singleDocument.js +93 -0
  46. package/dist/graphs/singleDocument.js.map +1 -0
  47. package/dist/index.d.ts +8 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +10 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/logger.d.ts +24 -0
  52. package/dist/logger.d.ts.map +1 -0
  53. package/dist/logger.js +36 -0
  54. package/dist/logger.js.map +1 -0
  55. package/dist/logo.d.ts +2 -0
  56. package/dist/logo.d.ts.map +1 -0
  57. package/dist/logo.js +3 -0
  58. package/dist/logo.js.map +1 -0
  59. package/dist/nodes/fileTypeRouter.d.ts +16 -0
  60. package/dist/nodes/fileTypeRouter.d.ts.map +1 -0
  61. package/dist/nodes/fileTypeRouter.js +72 -0
  62. package/dist/nodes/fileTypeRouter.js.map +1 -0
  63. package/dist/nodes/geminiExtraction.d.ts +19 -0
  64. package/dist/nodes/geminiExtraction.d.ts.map +1 -0
  65. package/dist/nodes/geminiExtraction.js +87 -0
  66. package/dist/nodes/geminiExtraction.js.map +1 -0
  67. package/dist/nodes/libreOfficeToPdf.d.ts +8 -0
  68. package/dist/nodes/libreOfficeToPdf.d.ts.map +1 -0
  69. package/dist/nodes/libreOfficeToPdf.js +61 -0
  70. package/dist/nodes/libreOfficeToPdf.js.map +1 -0
  71. package/dist/nodes/llmExtractionNode.d.ts +19 -0
  72. package/dist/nodes/llmExtractionNode.d.ts.map +1 -0
  73. package/dist/nodes/llmExtractionNode.js +68 -0
  74. package/dist/nodes/llmExtractionNode.js.map +1 -0
  75. package/dist/nodes/markdownChunker.d.ts +8 -0
  76. package/dist/nodes/markdownChunker.d.ts.map +1 -0
  77. package/dist/nodes/markdownChunker.js +24 -0
  78. package/dist/nodes/markdownChunker.js.map +1 -0
  79. package/dist/nodes/markdownMerger.d.ts +9 -0
  80. package/dist/nodes/markdownMerger.d.ts.map +1 -0
  81. package/dist/nodes/markdownMerger.js +33 -0
  82. package/dist/nodes/markdownMerger.js.map +1 -0
  83. package/dist/nodes/markdownNormalizer.d.ts +10 -0
  84. package/dist/nodes/markdownNormalizer.d.ts.map +1 -0
  85. package/dist/nodes/markdownNormalizer.js +46 -0
  86. package/dist/nodes/markdownNormalizer.js.map +1 -0
  87. package/dist/nodes/openrouterEmbedder.d.ts +7 -0
  88. package/dist/nodes/openrouterEmbedder.d.ts.map +1 -0
  89. package/dist/nodes/openrouterEmbedder.js +31 -0
  90. package/dist/nodes/openrouterEmbedder.js.map +1 -0
  91. package/dist/nodes/pdfSplitter.d.ts +7 -0
  92. package/dist/nodes/pdfSplitter.d.ts.map +1 -0
  93. package/dist/nodes/pdfSplitter.js +41 -0
  94. package/dist/nodes/pdfSplitter.js.map +1 -0
  95. package/dist/nodes/saveMarkdown.d.ts +7 -0
  96. package/dist/nodes/saveMarkdown.d.ts.map +1 -0
  97. package/dist/nodes/saveMarkdown.js +28 -0
  98. package/dist/nodes/saveMarkdown.js.map +1 -0
  99. package/dist/nodes/textExtractorNode.d.ts +7 -0
  100. package/dist/nodes/textExtractorNode.d.ts.map +1 -0
  101. package/dist/nodes/textExtractorNode.js +39 -0
  102. package/dist/nodes/textExtractorNode.js.map +1 -0
  103. package/dist/nodes/upstashUpsert.d.ts +7 -0
  104. package/dist/nodes/upstashUpsert.d.ts.map +1 -0
  105. package/dist/nodes/upstashUpsert.js +45 -0
  106. package/dist/nodes/upstashUpsert.js.map +1 -0
  107. package/dist/nodes/vectorEmbedderNode.d.ts +7 -0
  108. package/dist/nodes/vectorEmbedderNode.d.ts.map +1 -0
  109. package/dist/nodes/vectorEmbedderNode.js +23 -0
  110. package/dist/nodes/vectorEmbedderNode.js.map +1 -0
  111. package/dist/nodes/vectorUpsertNode.d.ts +7 -0
  112. package/dist/nodes/vectorUpsertNode.d.ts.map +1 -0
  113. package/dist/nodes/vectorUpsertNode.js +45 -0
  114. package/dist/nodes/vectorUpsertNode.js.map +1 -0
  115. package/dist/pipeline.d.ts +303 -0
  116. package/dist/pipeline.d.ts.map +1 -0
  117. package/dist/pipeline.js +93 -0
  118. package/dist/pipeline.js.map +1 -0
  119. package/dist/state.d.ts +52 -0
  120. package/dist/state.d.ts.map +1 -0
  121. package/dist/state.js +27 -0
  122. package/dist/state.js.map +1 -0
  123. package/dist/vectorStore.d.ts +24 -0
  124. package/dist/vectorStore.d.ts.map +1 -0
  125. package/dist/vectorStore.js +22 -0
  126. package/dist/vectorStore.js.map +1 -0
  127. package/package.json +55 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../../src/core/state.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,eAAO,MAAM,uBAAuB;IAClC,wEAAwE;;;;;;IAGxE,6EAA6E;;;;;;IAG7E,gDAAgD;;;;;;IAGhD,6DAA6D;;;;;;IAG7D,0DAA0D;;IAM1D,sEAAsE;;;;;;IAGtE,2CAA2C;;;;;;IAG3C,mDAAmD;;;;;;EAEnD,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,OAAO,uBAAuB,CAAC,KAAK,CAAC"}
@@ -0,0 +1,27 @@
1
+ import { Annotation } from "@langchain/langgraph";
2
+ /**
3
+ * LangGraph pipeline state definition.
4
+ * Every node reads from and writes to this shared state.
5
+ */
6
+ export const PipelineStateAnnotation = Annotation.Root({
7
+ /** Absolute path to the input file (optional if rawText is provided) */
8
+ filePath: (Annotation),
9
+ /** Detected MIME type of the input file (optional if rawText is provided) */
10
+ mimeType: (Annotation),
11
+ /** Extracted raw text (office / text branch) */
12
+ rawText: (Annotation),
13
+ /** 10-page PDF chunk buffers (base64 strings, PDF branch) */
14
+ pdfChunks: (Annotation),
15
+ /** Per-chunk markdown outputs from Gemini (PDF branch) */
16
+ markdownParts: Annotation({
17
+ reducer: (x, y) => x.concat(y),
18
+ default: () => [],
19
+ }),
20
+ /** Final merged / extracted markdown (both branches converge here) */
21
+ markdown: (Annotation),
22
+ /** Semantic text chunks after splitting */
23
+ textChunks: (Annotation),
24
+ /** OpenAI embedding vectors, one per text chunk */
25
+ vectors: (Annotation),
26
+ });
27
+ //# sourceMappingURL=state.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state.js","sourceRoot":"","sources":["../../src/core/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAElD;;;GAGG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,UAAU,CAAC,IAAI,CAAC;IACrD,wEAAwE;IACxE,QAAQ,EAAE,CAAA,UAA8B,CAAA;IAExC,6EAA6E;IAC7E,QAAQ,EAAE,CAAA,UAA8B,CAAA;IAExC,gDAAgD;IAChD,OAAO,EAAE,CAAA,UAAkB,CAAA;IAE3B,6DAA6D;IAC7D,SAAS,EAAE,CAAA,UAAoB,CAAA;IAE/B,0DAA0D;IAC1D,aAAa,EAAE,UAAU,CAAW;QAClC,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC9B,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE;KAClB,CAAC;IAEF,sEAAsE;IACtE,QAAQ,EAAE,CAAA,UAAkB,CAAA;IAE5B,2CAA2C;IAC3C,UAAU,EAAE,CAAA,UAAoB,CAAA;IAEhC,mDAAmD;IACnD,OAAO,EAAE,CAAA,UAAsB,CAAA;CAChC,CAAC,CAAC"}
@@ -0,0 +1,72 @@
1
+ /**
2
+ * State for the batch document processing graph.
3
+ */
4
+ export declare const BatchStateAnnotation: import("@langchain/langgraph").AnnotationRoot<{
5
+ /** Input: List of absolute file paths to process */
6
+ files: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
7
+ /** Input: List of raw text snippets to process */
8
+ rawTexts: import("@langchain/langgraph").BinaryOperatorAggregate<{
9
+ content: string;
10
+ name: string;
11
+ }[], {
12
+ content: string;
13
+ name: string;
14
+ }[]>;
15
+ /** Output: Collection of results from each individual document run */
16
+ results: import("@langchain/langgraph").BinaryOperatorAggregate<any[], any[]>;
17
+ }>;
18
+ export type BatchState = typeof BatchStateAnnotation.State;
19
+ export declare const graph: import("@langchain/langgraph").CompiledStateGraph<import("@langchain/langgraph").StateType<{
20
+ /** Input: List of absolute file paths to process */
21
+ files: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
22
+ /** Input: List of raw text snippets to process */
23
+ rawTexts: import("@langchain/langgraph").BinaryOperatorAggregate<{
24
+ content: string;
25
+ name: string;
26
+ }[], {
27
+ content: string;
28
+ name: string;
29
+ }[]>;
30
+ /** Output: Collection of results from each individual document run */
31
+ results: import("@langchain/langgraph").BinaryOperatorAggregate<any[], any[]>;
32
+ }>, import("@langchain/langgraph").UpdateType<{
33
+ /** Input: List of absolute file paths to process */
34
+ files: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
35
+ /** Input: List of raw text snippets to process */
36
+ rawTexts: import("@langchain/langgraph").BinaryOperatorAggregate<{
37
+ content: string;
38
+ name: string;
39
+ }[], {
40
+ content: string;
41
+ name: string;
42
+ }[]>;
43
+ /** Output: Collection of results from each individual document run */
44
+ results: import("@langchain/langgraph").BinaryOperatorAggregate<any[], any[]>;
45
+ }>, "__start__" | "workerNode" | "orchestrator" | "summaryNode", {
46
+ /** Input: List of absolute file paths to process */
47
+ files: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
48
+ /** Input: List of raw text snippets to process */
49
+ rawTexts: import("@langchain/langgraph").BinaryOperatorAggregate<{
50
+ content: string;
51
+ name: string;
52
+ }[], {
53
+ content: string;
54
+ name: string;
55
+ }[]>;
56
+ /** Output: Collection of results from each individual document run */
57
+ results: import("@langchain/langgraph").BinaryOperatorAggregate<any[], any[]>;
58
+ }, {
59
+ /** Input: List of absolute file paths to process */
60
+ files: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
61
+ /** Input: List of raw text snippets to process */
62
+ rawTexts: import("@langchain/langgraph").BinaryOperatorAggregate<{
63
+ content: string;
64
+ name: string;
65
+ }[], {
66
+ content: string;
67
+ name: string;
68
+ }[]>;
69
+ /** Output: Collection of results from each individual document run */
70
+ results: import("@langchain/langgraph").BinaryOperatorAggregate<any[], any[]>;
71
+ }, import("@langchain/langgraph").StateDefinition>;
72
+ //# sourceMappingURL=batchProcessor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batchProcessor.d.ts","sourceRoot":"","sources":["../../src/graphs/batchProcessor.ts"],"names":[],"mappings":"AAKA;;GAEG;AACH,eAAO,MAAM,oBAAoB;IAC/B,oDAAoD;;IAMpD,kDAAkD;;iBACZ,MAAM;cAAQ,MAAM;;iBAApB,MAAM;cAAQ,MAAM;;IAK1D,sEAAsE;;EAKtE,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG,OAAO,oBAAoB,CAAC,KAAK,CAAC;AAkF3D,eAAO,MAAM,KAAK;IArGhB,oDAAoD;;IAMpD,kDAAkD;;iBACZ,MAAM;cAAQ,MAAM;;iBAApB,MAAM;cAAQ,MAAM;;IAK1D,sEAAsE;;;IAZtE,oDAAoD;;IAMpD,kDAAkD;;iBACZ,MAAM;cAAQ,MAAM;;iBAApB,MAAM;cAAQ,MAAM;;IAK1D,sEAAsE;;;IAZtE,oDAAoD;;IAMpD,kDAAkD;;iBACZ,MAAM;cAAQ,MAAM;;iBAApB,MAAM;cAAQ,MAAM;;IAK1D,sEAAsE;;;IAZtE,oDAAoD;;IAMpD,kDAAkD;;iBACZ,MAAM;cAAQ,MAAM;;iBAApB,MAAM;cAAQ,MAAM;;IAK1D,sEAAsE;;kDAyF/B,CAAC"}
@@ -0,0 +1,94 @@
1
+ import { Annotation, StateGraph, Send, END } from "@langchain/langgraph";
2
+ import { graph as singleDocGraph } from "./singleDocument.js";
3
+ import path from "node:path";
4
+ import { logger, LogSource } from "../core/logger.js";
5
+ /**
6
+ * State for the batch document processing graph.
7
+ */
8
+ export const BatchStateAnnotation = Annotation.Root({
9
+ /** Input: List of absolute file paths to process */
10
+ files: Annotation({
11
+ reducer: (x, y) => x.concat(y),
12
+ default: () => [],
13
+ }),
14
+ /** Input: List of raw text snippets to process */
15
+ rawTexts: Annotation({
16
+ reducer: (x, y) => x.concat(y),
17
+ default: () => [],
18
+ }),
19
+ /** Output: Collection of results from each individual document run */
20
+ results: Annotation({
21
+ reducer: (x, y) => x.concat(y),
22
+ default: () => [],
23
+ }),
24
+ });
25
+ /**
26
+ * Orchestrator node: Prepares the batch and sends it to workers.
27
+ */
28
+ function orchestrator(state) {
29
+ logger.info(LogSource.BATCH, `Starting processing of ${state.files.length} documents.`);
30
+ return {};
31
+ }
32
+ /**
33
+ * Conditional edge: Uses the Send API to spawn parallel worker nodes for each file.
34
+ */
35
+ function distributeFiles(state) {
36
+ const fileSends = state.files.map((file) => new Send("workerNode", { filePath: file }));
37
+ const textSends = state.rawTexts.map((txt) => new Send("workerNode", { rawText: txt.content, name: txt.name }));
38
+ return [...fileSends, ...textSends];
39
+ }
40
+ /**
41
+ * Worker node: Invokes the original single-document pipeline.
42
+ */
43
+ async function workerNode(state) {
44
+ const fileName = state.name || (state.filePath ? path.basename(state.filePath) : "raw-text");
45
+ const startTime = Date.now();
46
+ try {
47
+ // Invoke the existing compiled single-document graph
48
+ const result = await singleDocGraph.invoke({
49
+ filePath: state.filePath,
50
+ rawText: state.rawText
51
+ });
52
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
53
+ return {
54
+ results: [{
55
+ file: fileName,
56
+ status: "success",
57
+ chunks: result.textChunks?.length ?? 0,
58
+ vectors: result.vectors?.length ?? 0,
59
+ durationSec: elapsed,
60
+ }]
61
+ };
62
+ }
63
+ catch (error) {
64
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
65
+ return {
66
+ results: [{
67
+ file: fileName,
68
+ status: "error",
69
+ chunks: 0,
70
+ vectors: 0,
71
+ durationSec: elapsed,
72
+ error: error.message,
73
+ }]
74
+ };
75
+ }
76
+ }
77
+ /**
78
+ * Final node: Prints a summary of the entire batch.
79
+ */
80
+ function summaryNode(state) {
81
+ logger.success(LogSource.BATCH, "All documents processed.");
82
+ return {};
83
+ }
84
+ // Build the batch graph
85
+ const batchGraph = new StateGraph(BatchStateAnnotation)
86
+ .addNode("orchestrator", orchestrator)
87
+ .addNode("workerNode", workerNode)
88
+ .addNode("summaryNode", summaryNode)
89
+ .addEdge("__start__", "orchestrator")
90
+ .addConditionalEdges("orchestrator", distributeFiles, ["workerNode"])
91
+ .addEdge("workerNode", "summaryNode")
92
+ .addEdge("summaryNode", END);
93
+ export const graph = batchGraph.compile();
94
+ //# sourceMappingURL=batchProcessor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batchProcessor.js","sourceRoot":"","sources":["../../src/graphs/batchProcessor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AACzE,OAAO,EAAE,KAAK,IAAI,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAC9D,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEtD;;GAEG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAG,UAAU,CAAC,IAAI,CAAC;IAClD,oDAAoD;IACpD,KAAK,EAAE,UAAU,CAAW;QAC1B,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC9B,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE;KAClB,CAAC;IAEF,kDAAkD;IAClD,QAAQ,EAAE,UAAU,CAA2C;QAC7D,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC9B,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE;KAClB,CAAC;IAEF,sEAAsE;IACtE,OAAO,EAAE,UAAU,CAAQ;QACzB,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC9B,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE;KAClB,CAAC;CACH,CAAC,CAAC;AAIH;;GAEG;AACH,SAAS,YAAY,CAAC,KAAiB;IACrC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,0BAA0B,KAAK,CAAC,KAAK,CAAC,MAAM,aAAa,CAAC,CAAC;IACxF,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAiB;IACxC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CACzC,IAAI,IAAI,CAAC,YAAY,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAC3C,CAAC;IAEF,MAAM,SAAS,GAAG,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAC3C,IAAI,IAAI,CAAC,YAAY,EAAE,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC,CACjE,CAAC;IAEF,OAAO,CAAC,GAAG,SAAS,EAAE,GAAG,SAAS,CAAC,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,UAAU,CAAC,KAA6D;IACrF,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAC7F,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC;QACH,qDAAqD;QACrD,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC;YACzC,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAE7D,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,QAAQ;oBACd,MAAM,EAAE,SAAS;oBACjB,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC;oBACtC,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,MAAM,IAAI,CAAC;oBACpC,WAAW,EAAE,OAAO;iBACrB,CAAC;SACH,CAAC;IACJ,CAAC;IAAC,OAAO,KAAU,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7D,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,QAAQ;oBACd,MAAM,EAAE,OAAO;oBACf,MAAM,EAAE,CAAC;oBACT,OAAO,EAAE,CAAC;oBACV,WAAW,EAAE,OAAO;oBACpB,KAAK,EAAE,KAAK,CAAC,OAAO;iBACrB,CAAC;SACH,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,KAAiB;IACpC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,0BAA0B,CAAC,CAAC;IAC5D,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,wBAAwB;AACxB,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,oBAAoB,CAAC;KACpD,OAAO,CAAC,cAAc,EAAE,YAAY,CAAC;KACrC,OAAO,CAAC,YAAY,EAAE,UAAU,CAAC;KACjC,OAAO,CAAC,aAAa,EAAE,WAAW,CAAC;KACnC,OAAO,CAAC,WAAW,EAAE,cAAc,CAAC;KACpC,mBAAmB,CAAC,cAAc,EAAE,eAAe,EAAE,CAAC,YAAY,CAAC,CAAC;KACpE,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC;KACpC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;AAE/B,MAAM,CAAC,MAAM,KAAK,GAAG,UAAU,CAAC,OAAO,EAAE,CAAC"}
@@ -0,0 +1,303 @@
1
+ export declare function buildPipeline(): import("@langchain/langgraph").CompiledStateGraph<import("@langchain/langgraph").StateType<{
2
+ filePath: {
3
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
4
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
5
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
6
+ };
7
+ mimeType: {
8
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
9
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
10
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
11
+ };
12
+ rawText: {
13
+ (): import("@langchain/langgraph").LastValue<string>;
14
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
15
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
16
+ };
17
+ pdfChunks: {
18
+ (): import("@langchain/langgraph").LastValue<string[]>;
19
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
20
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
21
+ };
22
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
23
+ markdown: {
24
+ (): import("@langchain/langgraph").LastValue<string>;
25
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
26
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
27
+ };
28
+ textChunks: {
29
+ (): import("@langchain/langgraph").LastValue<string[]>;
30
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
31
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
32
+ };
33
+ vectors: {
34
+ (): import("@langchain/langgraph").LastValue<number[][]>;
35
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
36
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
37
+ };
38
+ }>, import("@langchain/langgraph").UpdateType<{
39
+ filePath: {
40
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
41
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
42
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
43
+ };
44
+ mimeType: {
45
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
46
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
47
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
48
+ };
49
+ rawText: {
50
+ (): import("@langchain/langgraph").LastValue<string>;
51
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
52
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
53
+ };
54
+ pdfChunks: {
55
+ (): import("@langchain/langgraph").LastValue<string[]>;
56
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
57
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
58
+ };
59
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
60
+ markdown: {
61
+ (): import("@langchain/langgraph").LastValue<string>;
62
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
63
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
64
+ };
65
+ textChunks: {
66
+ (): import("@langchain/langgraph").LastValue<string[]>;
67
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
68
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
69
+ };
70
+ vectors: {
71
+ (): import("@langchain/langgraph").LastValue<number[][]>;
72
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
73
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
74
+ };
75
+ }>, "markdownMerger" | "markdownNormalizer" | "llmExtractionNode" | "__start__" | "fileTypeRouter" | "libreOfficeToPdf" | "pdfSplitter" | "textExtractorNode" | "saveMarkdown" | "markdownChunker" | "vectorEmbedderNode" | "vectorUpsertNode", {
76
+ filePath: {
77
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
78
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
79
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
80
+ };
81
+ mimeType: {
82
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
83
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
84
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
85
+ };
86
+ rawText: {
87
+ (): import("@langchain/langgraph").LastValue<string>;
88
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
89
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
90
+ };
91
+ pdfChunks: {
92
+ (): import("@langchain/langgraph").LastValue<string[]>;
93
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
94
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
95
+ };
96
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
97
+ markdown: {
98
+ (): import("@langchain/langgraph").LastValue<string>;
99
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
100
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
101
+ };
102
+ textChunks: {
103
+ (): import("@langchain/langgraph").LastValue<string[]>;
104
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
105
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
106
+ };
107
+ vectors: {
108
+ (): import("@langchain/langgraph").LastValue<number[][]>;
109
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
110
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
111
+ };
112
+ }, {
113
+ filePath: {
114
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
115
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
116
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
117
+ };
118
+ mimeType: {
119
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
120
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
121
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
122
+ };
123
+ rawText: {
124
+ (): import("@langchain/langgraph").LastValue<string>;
125
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
126
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
127
+ };
128
+ pdfChunks: {
129
+ (): import("@langchain/langgraph").LastValue<string[]>;
130
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
131
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
132
+ };
133
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
134
+ markdown: {
135
+ (): import("@langchain/langgraph").LastValue<string>;
136
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
137
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
138
+ };
139
+ textChunks: {
140
+ (): import("@langchain/langgraph").LastValue<string[]>;
141
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
142
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
143
+ };
144
+ vectors: {
145
+ (): import("@langchain/langgraph").LastValue<number[][]>;
146
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
147
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
148
+ };
149
+ }, import("@langchain/langgraph").StateDefinition>;
150
+ /**
151
+ * The compiled graph instance.
152
+ * Exported specifically for LangGraph Studio and the LangGraph CLI.
153
+ */
154
+ export declare const graph: import("@langchain/langgraph").CompiledStateGraph<import("@langchain/langgraph").StateType<{
155
+ filePath: {
156
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
157
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
158
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
159
+ };
160
+ mimeType: {
161
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
162
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
163
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
164
+ };
165
+ rawText: {
166
+ (): import("@langchain/langgraph").LastValue<string>;
167
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
168
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
169
+ };
170
+ pdfChunks: {
171
+ (): import("@langchain/langgraph").LastValue<string[]>;
172
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
173
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
174
+ };
175
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
176
+ markdown: {
177
+ (): import("@langchain/langgraph").LastValue<string>;
178
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
179
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
180
+ };
181
+ textChunks: {
182
+ (): import("@langchain/langgraph").LastValue<string[]>;
183
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
184
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
185
+ };
186
+ vectors: {
187
+ (): import("@langchain/langgraph").LastValue<number[][]>;
188
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
189
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
190
+ };
191
+ }>, import("@langchain/langgraph").UpdateType<{
192
+ filePath: {
193
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
194
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
195
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
196
+ };
197
+ mimeType: {
198
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
199
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
200
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
201
+ };
202
+ rawText: {
203
+ (): import("@langchain/langgraph").LastValue<string>;
204
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
205
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
206
+ };
207
+ pdfChunks: {
208
+ (): import("@langchain/langgraph").LastValue<string[]>;
209
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
210
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
211
+ };
212
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
213
+ markdown: {
214
+ (): import("@langchain/langgraph").LastValue<string>;
215
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
216
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
217
+ };
218
+ textChunks: {
219
+ (): import("@langchain/langgraph").LastValue<string[]>;
220
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
221
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
222
+ };
223
+ vectors: {
224
+ (): import("@langchain/langgraph").LastValue<number[][]>;
225
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
226
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
227
+ };
228
+ }>, "markdownMerger" | "markdownNormalizer" | "llmExtractionNode" | "__start__" | "fileTypeRouter" | "libreOfficeToPdf" | "pdfSplitter" | "textExtractorNode" | "saveMarkdown" | "markdownChunker" | "vectorEmbedderNode" | "vectorUpsertNode", {
229
+ filePath: {
230
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
231
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
232
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
233
+ };
234
+ mimeType: {
235
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
236
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
237
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
238
+ };
239
+ rawText: {
240
+ (): import("@langchain/langgraph").LastValue<string>;
241
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
242
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
243
+ };
244
+ pdfChunks: {
245
+ (): import("@langchain/langgraph").LastValue<string[]>;
246
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
247
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
248
+ };
249
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
250
+ markdown: {
251
+ (): import("@langchain/langgraph").LastValue<string>;
252
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
253
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
254
+ };
255
+ textChunks: {
256
+ (): import("@langchain/langgraph").LastValue<string[]>;
257
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
258
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
259
+ };
260
+ vectors: {
261
+ (): import("@langchain/langgraph").LastValue<number[][]>;
262
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
263
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
264
+ };
265
+ }, {
266
+ filePath: {
267
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
268
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
269
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
270
+ };
271
+ mimeType: {
272
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
273
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
274
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
275
+ };
276
+ rawText: {
277
+ (): import("@langchain/langgraph").LastValue<string>;
278
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
279
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
280
+ };
281
+ pdfChunks: {
282
+ (): import("@langchain/langgraph").LastValue<string[]>;
283
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
284
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
285
+ };
286
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
287
+ markdown: {
288
+ (): import("@langchain/langgraph").LastValue<string>;
289
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
290
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
291
+ };
292
+ textChunks: {
293
+ (): import("@langchain/langgraph").LastValue<string[]>;
294
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
295
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
296
+ };
297
+ vectors: {
298
+ (): import("@langchain/langgraph").LastValue<number[][]>;
299
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
300
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
301
+ };
302
+ }, import("@langchain/langgraph").StateDefinition>;
303
+ //# sourceMappingURL=singleDocument.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"singleDocument.d.ts","sourceRoot":"","sources":["../../src/graphs/singleDocument.ts"],"names":[],"mappings":"AA0CA,wBAAgB,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;mDA4D5B;AAED;;;GAGG;AACH,eAAO,MAAM,KAAK;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kDAAkB,CAAC"}
@@ -0,0 +1,93 @@
1
+ import { StateGraph, END, Send } from "@langchain/langgraph";
2
+ import { PipelineStateAnnotation } from "../core/state.js";
3
+ import { fileTypeRouter, routeByMimeType } from "../nodes/fileTypeRouter.js";
4
+ import { pdfSplitter } from "../nodes/pdfSplitter.js";
5
+ import { llmExtractionNode, routeAfterLlm } from "../nodes/llmExtractionNode.js";
6
+ import { markdownMerger } from "../nodes/markdownMerger.js";
7
+ import { textExtractorNode } from "../nodes/textExtractorNode.js";
8
+ import { markdownNormalizer } from "../nodes/markdownNormalizer.js";
9
+ import { markdownChunker } from "../nodes/markdownChunker.js";
10
+ import { vectorEmbedderNode } from "../nodes/vectorEmbedderNode.js";
11
+ import { vectorUpsertNode } from "../nodes/vectorUpsertNode.js";
12
+ import { saveMarkdown } from "../nodes/saveMarkdown.js";
13
+ import { libreOfficeToPdf } from "../nodes/libreOfficeToPdf.js";
14
+ /**
15
+ * Builds and compiles the Virstack Doc Ingest pipeline as a LangGraph StateGraph.
16
+ *
17
+ * Flow:
18
+ * START → fileTypeRouter
19
+ * ├─ "pdf" → pdfSplitter → [llmExtractionNode (Parallel)] → markdownMerger → markdownNormalizer
20
+ * ├─ "convert" → libreOfficeToPdf → pdfSplitter → (same as pdf branch)
21
+ * └─ "extract" → textExtractorNode → llmExtractionNode → markdownNormalizer
22
+ * markdownNormalizer → saveMarkdown → markdownChunker → vectorEmbedderNode → vectorUpsertNode → END
23
+ */
24
+ /**
25
+ * Returns an array of 'Send' objects to process each PDF chunk in parallel.
26
+ */
27
+ function dispatchPdfChunks(state) {
28
+ if (!state.pdfChunks || state.pdfChunks.length === 0) {
29
+ console.warn("[dispatchPdfChunks] No PDF chunks found to process.");
30
+ return [];
31
+ }
32
+ return state.pdfChunks.map((chunk, index) => {
33
+ return new Send("llmExtractionNode", {
34
+ chunk,
35
+ index,
36
+ totalChunks: state.pdfChunks.length,
37
+ });
38
+ });
39
+ }
40
+ export function buildPipeline() {
41
+ const graph = new StateGraph(PipelineStateAnnotation)
42
+ // ── Phase 1: Routing ──
43
+ .addNode("fileTypeRouter", fileTypeRouter)
44
+ // ── Phase 2a: PDF Branch ──
45
+ .addNode("libreOfficeToPdf", libreOfficeToPdf)
46
+ .addNode("pdfSplitter", pdfSplitter)
47
+ .addNode("markdownMerger", markdownMerger)
48
+ // ── Phase 2b: Text / Data Extraction Branch ──
49
+ .addNode("textExtractorNode", textExtractorNode)
50
+ .addNode("llmExtractionNode", llmExtractionNode)
51
+ // ── Phase 3: Normalization & Chunking ──
52
+ .addNode("markdownNormalizer", markdownNormalizer)
53
+ .addNode("saveMarkdown", saveMarkdown)
54
+ .addNode("markdownChunker", markdownChunker)
55
+ // ── Phase 4: Embedding & Indexing ──
56
+ .addNode("vectorEmbedderNode", vectorEmbedderNode)
57
+ .addNode("vectorUpsertNode", vectorUpsertNode)
58
+ // ── Edges ──
59
+ // Start → Router
60
+ .addEdge("__start__", "fileTypeRouter")
61
+ // Router → conditional branch
62
+ .addConditionalEdges("fileTypeRouter", routeByMimeType, {
63
+ pdf: "pdfSplitter",
64
+ convert: "libreOfficeToPdf",
65
+ extract: "textExtractorNode",
66
+ })
67
+ // Convert branch: LibreOffice → pdfSplitter → (joins PDF branch)
68
+ .addEdge("libreOfficeToPdf", "pdfSplitter")
69
+ // PDF branch dispatcher
70
+ .addConditionalEdges("pdfSplitter", dispatchPdfChunks, ["llmExtractionNode"])
71
+ // Unified Document/Text branch flow
72
+ .addEdge("textExtractorNode", "llmExtractionNode")
73
+ // After llmExtractionNode, conditionally merge PDF chunks or normalize Text
74
+ .addConditionalEdges("llmExtractionNode", routeAfterLlm, {
75
+ markdownMerger: "markdownMerger",
76
+ markdownNormalizer: "markdownNormalizer",
77
+ })
78
+ // If PDF branch, finish merger
79
+ .addEdge("markdownMerger", "markdownNormalizer")
80
+ // Shared tail: normalize → save → chunk → embed → upsert → end
81
+ .addEdge("markdownNormalizer", "saveMarkdown")
82
+ .addEdge("saveMarkdown", "markdownChunker")
83
+ .addEdge("markdownChunker", "vectorEmbedderNode")
84
+ .addEdge("vectorEmbedderNode", "vectorUpsertNode")
85
+ .addEdge("vectorUpsertNode", END);
86
+ return graph.compile();
87
+ }
88
+ /**
89
+ * The compiled graph instance.
90
+ * Exported specifically for LangGraph Studio and the LangGraph CLI.
91
+ */
92
+ export const graph = buildPipeline();
93
+ //# sourceMappingURL=singleDocument.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"singleDocument.js","sourceRoot":"","sources":["../../src/graphs/singleDocument.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAsB,MAAM,kBAAkB,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAC7E,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAE5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAEhE;;;;;;;;;GASG;AAEH;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAoB;IAC7C,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;QACpE,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,OAAO,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC1C,OAAO,IAAI,IAAI,CAAC,mBAAmB,EAAE;YACnC,KAAK;YACL,KAAK;YACL,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC,MAAM;SACpC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AACD,MAAM,UAAU,aAAa;IAC3B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,uBAAuB,CAAC;QACnD,yBAAyB;SACxB,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAE1C,6BAA6B;SAC5B,OAAO,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;SAC7C,OAAO,CAAC,aAAa,EAAE,WAAW,CAAC;SACnC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAE1C,gDAAgD;SAC/C,OAAO,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;SAC/C,OAAO,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;QAEhD,0CAA0C;SACzC,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,cAAc,EAAE,YAAY,CAAC;SACrC,OAAO,CAAC,iBAAiB,EAAE,eAAe,CAAC;QAE5C,sCAAsC;SACrC,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;QAE9C,cAAc;QACd,iBAAiB;SAChB,OAAO,CAAC,WAAW,EAAE,gBAAgB,CAAC;QAEvC,8BAA8B;SAC7B,mBAAmB,CAAC,gBAAgB,EAAE,eAAe,EAAE;QACtD,GAAG,EAAE,aAAa;QAClB,OAAO,EAAE,kBAAkB;QAC3B,OAAO,EAAE,mBAAmB;KAC7B,CAAC;QAEF,iEAAiE;SAChE,OAAO,CAAC,kBAAkB,EAAE,aAAa,CAAC;QAE3C,wBAAwB;SACvB,mBAAmB,CAAC,aAAa,EAAE,iBAAiB,EAAE,CAAC,mBAAmB,CAAC,CAAC;QAE7E,oCAAoC;SACnC,OAAO,CAAC,mBAAmB,EAAE,mBAAmB,CAAC;QAElD,4EAA4E;SAC3E,mBAAmB,CAAC,mBAAmB,EAAE,aAAa,EAAE;QACvD,cAAc,EAAE,gBAAgB;QAChC,kBAAkB,EAAE,oBAAoB;KACzC,CAAC;QAEF,+BAA+B;SAC9B,OAAO,CAAC,gBAAgB,EAAE,oBAAoB,CAAC;QAEhD,+DAA+D;SAC9D,OAAO,CAAC,oBAAoB,EAAE,cAAc,CAAC;SAC7C,OAAO,CAAC,cAAc,EAAE,iBAAiB,CAAC;SAC1C,OAAO,CAAC,iBAAiB,EAAE,oBAAoB,CAAC;SAChD,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,KAAK,CAAC,OAAO,EAAE,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG,aAAa,EAAE,CAAC"}