@virstack/doc-ingest 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +203 -0
  2. package/dist/adapters/aiAdapters.d.ts +25 -0
  3. package/dist/adapters/aiAdapters.d.ts.map +1 -0
  4. package/dist/adapters/aiAdapters.js +73 -0
  5. package/dist/adapters/aiAdapters.js.map +1 -0
  6. package/dist/adapters/vectorStore.d.ts +24 -0
  7. package/dist/adapters/vectorStore.d.ts.map +1 -0
  8. package/dist/adapters/vectorStore.js +22 -0
  9. package/dist/adapters/vectorStore.js.map +1 -0
  10. package/dist/aiAdapters.d.ts +25 -0
  11. package/dist/aiAdapters.d.ts.map +1 -0
  12. package/dist/aiAdapters.js +50 -0
  13. package/dist/aiAdapters.js.map +1 -0
  14. package/dist/assets/logo.png +0 -0
  15. package/dist/batchPipeline.d.ts +52 -0
  16. package/dist/batchPipeline.d.ts.map +1 -0
  17. package/dist/batchPipeline.js +81 -0
  18. package/dist/batchPipeline.js.map +1 -0
  19. package/dist/cli.d.ts +3 -0
  20. package/dist/cli.d.ts.map +1 -0
  21. package/dist/cli.js +217 -0
  22. package/dist/cli.js.map +1 -0
  23. package/dist/config.d.ts +26 -0
  24. package/dist/config.d.ts.map +1 -0
  25. package/dist/config.js +97 -0
  26. package/dist/config.js.map +1 -0
  27. package/dist/core/config.d.ts +26 -0
  28. package/dist/core/config.d.ts.map +1 -0
  29. package/dist/core/config.js +106 -0
  30. package/dist/core/config.js.map +1 -0
  31. package/dist/core/logger.d.ts +31 -0
  32. package/dist/core/logger.d.ts.map +1 -0
  33. package/dist/core/logger.js +42 -0
  34. package/dist/core/logger.js.map +1 -0
  35. package/dist/core/state.d.ts +52 -0
  36. package/dist/core/state.d.ts.map +1 -0
  37. package/dist/core/state.js +27 -0
  38. package/dist/core/state.js.map +1 -0
  39. package/dist/graphs/batchProcessor.d.ts +72 -0
  40. package/dist/graphs/batchProcessor.d.ts.map +1 -0
  41. package/dist/graphs/batchProcessor.js +94 -0
  42. package/dist/graphs/batchProcessor.js.map +1 -0
  43. package/dist/graphs/singleDocument.d.ts +303 -0
  44. package/dist/graphs/singleDocument.d.ts.map +1 -0
  45. package/dist/graphs/singleDocument.js +93 -0
  46. package/dist/graphs/singleDocument.js.map +1 -0
  47. package/dist/index.d.ts +8 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +10 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/logger.d.ts +24 -0
  52. package/dist/logger.d.ts.map +1 -0
  53. package/dist/logger.js +36 -0
  54. package/dist/logger.js.map +1 -0
  55. package/dist/logo.d.ts +2 -0
  56. package/dist/logo.d.ts.map +1 -0
  57. package/dist/logo.js +3 -0
  58. package/dist/logo.js.map +1 -0
  59. package/dist/nodes/fileTypeRouter.d.ts +16 -0
  60. package/dist/nodes/fileTypeRouter.d.ts.map +1 -0
  61. package/dist/nodes/fileTypeRouter.js +72 -0
  62. package/dist/nodes/fileTypeRouter.js.map +1 -0
  63. package/dist/nodes/geminiExtraction.d.ts +19 -0
  64. package/dist/nodes/geminiExtraction.d.ts.map +1 -0
  65. package/dist/nodes/geminiExtraction.js +87 -0
  66. package/dist/nodes/geminiExtraction.js.map +1 -0
  67. package/dist/nodes/libreOfficeToPdf.d.ts +8 -0
  68. package/dist/nodes/libreOfficeToPdf.d.ts.map +1 -0
  69. package/dist/nodes/libreOfficeToPdf.js +61 -0
  70. package/dist/nodes/libreOfficeToPdf.js.map +1 -0
  71. package/dist/nodes/llmExtractionNode.d.ts +19 -0
  72. package/dist/nodes/llmExtractionNode.d.ts.map +1 -0
  73. package/dist/nodes/llmExtractionNode.js +68 -0
  74. package/dist/nodes/llmExtractionNode.js.map +1 -0
  75. package/dist/nodes/markdownChunker.d.ts +8 -0
  76. package/dist/nodes/markdownChunker.d.ts.map +1 -0
  77. package/dist/nodes/markdownChunker.js +24 -0
  78. package/dist/nodes/markdownChunker.js.map +1 -0
  79. package/dist/nodes/markdownMerger.d.ts +9 -0
  80. package/dist/nodes/markdownMerger.d.ts.map +1 -0
  81. package/dist/nodes/markdownMerger.js +33 -0
  82. package/dist/nodes/markdownMerger.js.map +1 -0
  83. package/dist/nodes/markdownNormalizer.d.ts +10 -0
  84. package/dist/nodes/markdownNormalizer.d.ts.map +1 -0
  85. package/dist/nodes/markdownNormalizer.js +46 -0
  86. package/dist/nodes/markdownNormalizer.js.map +1 -0
  87. package/dist/nodes/openrouterEmbedder.d.ts +7 -0
  88. package/dist/nodes/openrouterEmbedder.d.ts.map +1 -0
  89. package/dist/nodes/openrouterEmbedder.js +31 -0
  90. package/dist/nodes/openrouterEmbedder.js.map +1 -0
  91. package/dist/nodes/pdfSplitter.d.ts +7 -0
  92. package/dist/nodes/pdfSplitter.d.ts.map +1 -0
  93. package/dist/nodes/pdfSplitter.js +41 -0
  94. package/dist/nodes/pdfSplitter.js.map +1 -0
  95. package/dist/nodes/saveMarkdown.d.ts +7 -0
  96. package/dist/nodes/saveMarkdown.d.ts.map +1 -0
  97. package/dist/nodes/saveMarkdown.js +28 -0
  98. package/dist/nodes/saveMarkdown.js.map +1 -0
  99. package/dist/nodes/textExtractorNode.d.ts +7 -0
  100. package/dist/nodes/textExtractorNode.d.ts.map +1 -0
  101. package/dist/nodes/textExtractorNode.js +39 -0
  102. package/dist/nodes/textExtractorNode.js.map +1 -0
  103. package/dist/nodes/upstashUpsert.d.ts +7 -0
  104. package/dist/nodes/upstashUpsert.d.ts.map +1 -0
  105. package/dist/nodes/upstashUpsert.js +45 -0
  106. package/dist/nodes/upstashUpsert.js.map +1 -0
  107. package/dist/nodes/vectorEmbedderNode.d.ts +7 -0
  108. package/dist/nodes/vectorEmbedderNode.d.ts.map +1 -0
  109. package/dist/nodes/vectorEmbedderNode.js +23 -0
  110. package/dist/nodes/vectorEmbedderNode.js.map +1 -0
  111. package/dist/nodes/vectorUpsertNode.d.ts +7 -0
  112. package/dist/nodes/vectorUpsertNode.d.ts.map +1 -0
  113. package/dist/nodes/vectorUpsertNode.js +45 -0
  114. package/dist/nodes/vectorUpsertNode.js.map +1 -0
  115. package/dist/pipeline.d.ts +303 -0
  116. package/dist/pipeline.d.ts.map +1 -0
  117. package/dist/pipeline.js +93 -0
  118. package/dist/pipeline.js.map +1 -0
  119. package/dist/state.d.ts +52 -0
  120. package/dist/state.d.ts.map +1 -0
  121. package/dist/state.js +27 -0
  122. package/dist/state.js.map +1 -0
  123. package/dist/vectorStore.d.ts +24 -0
  124. package/dist/vectorStore.d.ts.map +1 -0
  125. package/dist/vectorStore.js +22 -0
  126. package/dist/vectorStore.js.map +1 -0
  127. package/package.json +55 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAsB,MAAM,YAAY,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5E,OAAO,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAE3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAE/D;;;;;;;;;GASG;AAEH;;GAEG;AACH,SAAS,iBAAiB,CAAC,KAAoB;IAC7C,IAAI,CAAC,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrD,OAAO,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;QACpE,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,OAAO,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC1C,OAAO,IAAI,IAAI,CAAC,mBAAmB,EAAE;YACnC,KAAK;YACL,KAAK;YACL,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC,MAAM;SACpC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AACD,MAAM,UAAU,aAAa;IAC3B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,uBAAuB,CAAC;QACnD,yBAAyB;SACxB,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAE1C,6BAA6B;SAC5B,OAAO,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;SAC7C,OAAO,CAAC,aAAa,EAAE,WAAW,CAAC;SACnC,OAAO,CAAC,gBAAgB,EAAE,cAAc,CAAC;QAE1C,gDAAgD;SAC/C,OAAO,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;SAC/C,OAAO,CAAC,mBAAmB,EAAE,iBAAiB,CAAC;QAEhD,0CAA0C;SACzC,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,cAAc,EAAE,YAAY,CAAC;SACrC,OAAO,CAAC,iBAAiB,EAAE,eAAe,CAAC;QAE5C,sCAAsC;SACrC,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;QAE9C,cAAc;QACd,iBAAiB;SAChB,OAAO,CAAC,WAAW,EAAE,gBAAgB,CAAC;QAEvC,8BAA8B;SAC7B,mBAAmB,CAAC,gBAAgB,EAAE,eAAe,EAAE;QACtD,GAAG,EAAE,aAAa;QAClB,OAAO,EAAE,kBAAkB;QAC3B,OAAO,EAAE,mBAAmB;KAC7B,CAAC;QAEF,iEAAiE;SAChE,OAAO,CAAC,kBAAkB,EAAE,aAAa,CAAC;QAE3C,wBAAwB;SACvB,mBAAmB,CAAC,aAAa,EAAE,iBAAiB,EAAE,CAAC,mBAAmB,CAAC,CAAC;QAE7E,oCAAoC;SACnC,OAAO,CAAC,mBAAmB,EAAE,mBAAmB,CAAC;QAElD,4EAA4E;SAC3E,mBAAmB,CAAC,mBAAmB,EAAE,aAAa,EAAE;QACvD,cAAc,EAAE,gBAAgB;QAChC,kBAAkB,EAAE,oBAAoB;KACzC,CAAC;QAEF,+BAA+B;SAC9B,OAAO,CAAC,gBAAgB,EAAE,oBAAoB,CAAC;QAEhD,+DAA+D;SAC9D,OAAO,CAAC,oBAAoB,EAAE,cAAc,CAAC;SAC7C,OAAO,CAAC,cAAc,EAAE,iBAAiB,CAAC;SAC1C,OAAO,CAAC,iBAAiB,EAAE,oBAAoB,CAAC;SAChD,OAAO,CAAC,oBAAoB,EAAE,kBAAkB,CAAC;SACjD,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;IAEpC,OAAO,KAAK,CAAC,OAAO,EAAE,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG,aAAa,EAAE,CAAC"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * LangGraph pipeline state definition.
3
+ * Every node reads from and writes to this shared state.
4
+ */
5
+ export declare const PipelineStateAnnotation: import("@langchain/langgraph").AnnotationRoot<{
6
+ /** Absolute path to the input file (optional if rawText is provided) */
7
+ filePath: {
8
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
9
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
10
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
11
+ };
12
+ /** Detected MIME type of the input file (optional if rawText is provided) */
13
+ mimeType: {
14
+ (): import("@langchain/langgraph").LastValue<string | undefined>;
15
+ (annotation: import("@langchain/langgraph").SingleReducer<string | undefined, string | undefined>): import("@langchain/langgraph").BinaryOperatorAggregate<string | undefined, string | undefined>;
16
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
17
+ };
18
+ /** Extracted raw text (office / text branch) */
19
+ rawText: {
20
+ (): import("@langchain/langgraph").LastValue<string>;
21
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
22
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
23
+ };
24
+ /** 10-page PDF chunk buffers (base64 strings, PDF branch) */
25
+ pdfChunks: {
26
+ (): import("@langchain/langgraph").LastValue<string[]>;
27
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
28
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
29
+ };
30
+ /** Per-chunk markdown outputs from Gemini (PDF branch) */
31
+ markdownParts: import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
32
+ /** Final merged / extracted markdown (both branches converge here) */
33
+ markdown: {
34
+ (): import("@langchain/langgraph").LastValue<string>;
35
+ (annotation: import("@langchain/langgraph").SingleReducer<string, string>): import("@langchain/langgraph").BinaryOperatorAggregate<string, string>;
36
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
37
+ };
38
+ /** Semantic text chunks after splitting */
39
+ textChunks: {
40
+ (): import("@langchain/langgraph").LastValue<string[]>;
41
+ (annotation: import("@langchain/langgraph").SingleReducer<string[], string[]>): import("@langchain/langgraph").BinaryOperatorAggregate<string[], string[]>;
42
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
43
+ };
44
+ /** OpenAI embedding vectors, one per text chunk */
45
+ vectors: {
46
+ (): import("@langchain/langgraph").LastValue<number[][]>;
47
+ (annotation: import("@langchain/langgraph").SingleReducer<number[][], number[][]>): import("@langchain/langgraph").BinaryOperatorAggregate<number[][], number[][]>;
48
+ Root: <S extends import("@langchain/langgraph").StateDefinition>(sd: S) => import("@langchain/langgraph").AnnotationRoot<S>;
49
+ };
50
+ }>;
51
+ export type PipelineState = typeof PipelineStateAnnotation.State;
52
+ //# sourceMappingURL=state.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state.d.ts","sourceRoot":"","sources":["../src/state.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,eAAO,MAAM,uBAAuB;IAClC,wEAAwE;;;;;;IAGxE,6EAA6E;;;;;;IAG7E,gDAAgD;;;;;;IAGhD,6DAA6D;;;;;;IAG7D,0DAA0D;;IAM1D,sEAAsE;;;;;;IAGtE,2CAA2C;;;;;;IAG3C,mDAAmD;;;;;;EAEnD,CAAC;AAEH,MAAM,MAAM,aAAa,GAAG,OAAO,uBAAuB,CAAC,KAAK,CAAC"}
package/dist/state.js ADDED
@@ -0,0 +1,27 @@
1
+ import { Annotation } from "@langchain/langgraph";
2
+ /**
3
+ * LangGraph pipeline state definition.
4
+ * Every node reads from and writes to this shared state.
5
+ */
6
+ export const PipelineStateAnnotation = Annotation.Root({
7
+ /** Absolute path to the input file (optional if rawText is provided) */
8
+ filePath: (Annotation),
9
+ /** Detected MIME type of the input file (optional if rawText is provided) */
10
+ mimeType: (Annotation),
11
+ /** Extracted raw text (office / text branch) */
12
+ rawText: (Annotation),
13
+ /** 10-page PDF chunk buffers (base64 strings, PDF branch) */
14
+ pdfChunks: (Annotation),
15
+ /** Per-chunk markdown outputs from Gemini (PDF branch) */
16
+ markdownParts: Annotation({
17
+ reducer: (x, y) => x.concat(y),
18
+ default: () => [],
19
+ }),
20
+ /** Final merged / extracted markdown (both branches converge here) */
21
+ markdown: (Annotation),
22
+ /** Semantic text chunks after splitting */
23
+ textChunks: (Annotation),
24
+ /** OpenAI embedding vectors, one per text chunk */
25
+ vectors: (Annotation),
26
+ });
27
+ //# sourceMappingURL=state.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"state.js","sourceRoot":"","sources":["../src/state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAElD;;;GAGG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,UAAU,CAAC,IAAI,CAAC;IACrD,wEAAwE;IACxE,QAAQ,EAAE,CAAA,UAA8B,CAAA;IAExC,6EAA6E;IAC7E,QAAQ,EAAE,CAAA,UAA8B,CAAA;IAExC,gDAAgD;IAChD,OAAO,EAAE,CAAA,UAAkB,CAAA;IAE3B,6DAA6D;IAC7D,SAAS,EAAE,CAAA,UAAoB,CAAA;IAE/B,0DAA0D;IAC1D,aAAa,EAAE,UAAU,CAAW;QAClC,OAAO,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC9B,OAAO,EAAE,GAAG,EAAE,CAAC,EAAE;KAClB,CAAC;IAEF,sEAAsE;IACtE,QAAQ,EAAE,CAAA,UAAkB,CAAA;IAE5B,2CAA2C;IAC3C,UAAU,EAAE,CAAA,UAAoB,CAAA;IAEhC,mDAAmD;IACnD,OAAO,EAAE,CAAA,UAAsB,CAAA;CAChC,CAAC,CAAC"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * The standard shape of a record that the pipeline will produce.
3
+ */
4
+ export interface VectorRecord {
5
+ id: string;
6
+ vector: number[];
7
+ metadata: Record<string, any>;
8
+ }
9
+ /**
10
+ * The contract that any vector database adapter must follow.
11
+ */
12
+ export interface VectorStoreAdapter {
13
+ upsert(records: VectorRecord[]): Promise<void>;
14
+ }
15
+ /**
16
+ * Built-in adapter for Upstash Vector.
17
+ * Used by default when running via the CLI.
18
+ */
19
+ export declare class UpstashAdapter implements VectorStoreAdapter {
20
+ private index;
21
+ constructor(url: string, token: string);
22
+ upsert(records: VectorRecord[]): Promise<void>;
23
+ }
24
+ //# sourceMappingURL=vectorStore.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vectorStore.d.ts","sourceRoot":"","sources":["../src/vectorStore.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,qBAAa,cAAe,YAAW,kBAAkB;IACvD,OAAO,CAAC,KAAK,CAAQ;gBAET,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IAIhC,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;CAWrD"}
@@ -0,0 +1,22 @@
1
+ import { Index } from "@upstash/vector";
2
+ /**
3
+ * Built-in adapter for Upstash Vector.
4
+ * Used by default when running via the CLI.
5
+ */
6
+ export class UpstashAdapter {
7
+ index;
8
+ constructor(url, token) {
9
+ this.index = new Index({ url, token });
10
+ }
11
+ async upsert(records) {
12
+ const upstashRecords = records.map((r) => ({
13
+ id: r.id,
14
+ vector: r.vector,
15
+ metadata: r.metadata,
16
+ // For Upstash, the string payload goes in 'data' usually, but metadata is fine.
17
+ data: r.metadata.text || "",
18
+ }));
19
+ await this.index.upsert(upstashRecords);
20
+ }
21
+ }
22
+ //# sourceMappingURL=vectorStore.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vectorStore.js","sourceRoot":"","sources":["../src/vectorStore.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAkBxC;;;GAGG;AACH,MAAM,OAAO,cAAc;IACjB,KAAK,CAAQ;IAErB,YAAY,GAAW,EAAE,KAAa;QACpC,IAAI,CAAC,KAAK,GAAG,IAAI,KAAK,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAAuB;QAClC,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzC,EAAE,EAAE,CAAC,CAAC,EAAE;YACR,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;YACpB,gFAAgF;YAChF,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE;SAC5B,CAAC,CAAC,CAAC;QAEJ,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC;IAC1C,CAAC;CACF"}
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "@virstack/doc-ingest",
3
+ "version": "1.0.0",
4
+ "description": "A high-performance, parallelized document ingestion and vectorization pipeline.",
5
+ "main": "./dist/index.js",
6
+ "types": "./dist/index.d.ts",
7
+ "type": "module",
8
+ "bin": {
9
+ "virstack-doc-ingest": "./dist/cli.js"
10
+ },
11
+ "files": [
12
+ "dist"
13
+ ],
14
+ "scripts": {
15
+ "build": "tsc",
16
+ "prepublishOnly": "npm run build",
17
+ "start": "node dist/cli.js",
18
+ "dev": "tsx src/cli.ts"
19
+ },
20
+ "keywords": [
21
+ "virstack",
22
+ "llm",
23
+ "langgraph",
24
+ "gemini",
25
+ "pdf-parsing",
26
+ "vector-database"
27
+ ],
28
+ "author": "Mayura Sandakalum",
29
+ "license": "MIT",
30
+ "dependencies": {
31
+ "@clack/prompts": "^1.1.0",
32
+ "@langchain/core": "^0.3.0",
33
+ "@langchain/langgraph": "^0.2.0",
34
+ "@langchain/textsplitters": "^0.1.0",
35
+ "@openrouter/sdk": "^0.9.11",
36
+ "@types/figlet": "^1.7.0",
37
+ "@upstash/vector": "^1.1.0",
38
+ "csv-parse": "^5.6.0",
39
+ "dotenv": "^16.4.0",
40
+ "figlet": "^1.11.0",
41
+ "mime-types": "^2.1.35",
42
+ "officeparser": "^4.1.0",
43
+ "p-limit": "^7.3.0",
44
+ "pdf-lib": "^1.17.1",
45
+ "picocolors": "^1.1.1",
46
+ "terminal-image": "^4.2.0"
47
+ },
48
+ "devDependencies": {
49
+ "@langchain/langgraph-cli": "^1.1.16",
50
+ "@types/mime-types": "^2.1.4",
51
+ "@types/node": "^22.0.0",
52
+ "tsx": "^4.19.0",
53
+ "typescript": "^5.6.0"
54
+ }
55
+ }