@elsium-ai/rag 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -660,6 +660,198 @@ const reranked = mmrRerank(queryEmbedding, candidateResults, {
660
660
  })
661
661
  ```
662
662
 
663
+ ### `createPgVectorStore`
664
+
665
+ Creates a vector store backed by PostgreSQL with the pgvector extension.
666
+
667
+ ```typescript
668
+ function createPgVectorStore(config: {
669
+ connectionString: string
670
+ tableName?: string
671
+ dimensions?: number
672
+ }): VectorStore
673
+ ```
674
+
675
+ | Parameter | Type | Default | Description |
676
+ |---|---|---|---|
677
+ | `config.connectionString` | `string` | **(required)** | PostgreSQL connection string. |
678
+ | `config.tableName` | `string` | `'embeddings'` | Table name for storing vectors. |
679
+ | `config.dimensions` | `number` | `1536` | Vector dimensions (must match your embedding model). |
680
+
681
+ **Returns:** A `VectorStore` with `name: 'pgvector'`.
682
+
683
+ ```typescript
684
+ import { createPgVectorStore } from '@elsium-ai/rag'
685
+
686
+ const store = createPgVectorStore({
687
+ connectionString: process.env.DATABASE_URL!,
688
+ tableName: 'document_embeddings',
689
+ dimensions: 1536,
690
+ })
691
+
692
+ await store.upsert(embeddedChunks)
693
+ const results = await store.query(queryVector, { topK: 5 })
694
+ ```
695
+
696
+ ### `createQdrantStore`
697
+
698
+ Creates a vector store backed by the Qdrant REST API.
699
+
700
+ ```typescript
701
+ function createQdrantStore(config: {
702
+ url: string
703
+ apiKey?: string
704
+ collectionName: string
705
+ dimensions: number
706
+ }): VectorStore
707
+ ```
708
+
709
+ | Parameter | Type | Default | Description |
710
+ |---|---|---|---|
711
+ | `config.url` | `string` | **(required)** | Qdrant server URL. |
712
+ | `config.apiKey` | `string` | `undefined` | Optional API key for authentication. |
713
+ | `config.collectionName` | `string` | **(required)** | Name of the Qdrant collection. |
714
+ | `config.dimensions` | `number` | **(required)** | Vector dimensions. |
715
+
716
+ **Returns:** A `VectorStore` with `name: 'qdrant'`.
717
+
718
+ ```typescript
719
+ import { createQdrantStore } from '@elsium-ai/rag'
720
+
721
+ const store = createQdrantStore({
722
+ url: 'http://localhost:6333',
723
+ collectionName: 'documents',
724
+ dimensions: 1536,
725
+ })
726
+ ```
727
+
728
+ ---
729
+
730
+ ## Additional Embedding Providers
731
+
732
+ ### `createGoogleEmbeddings`
733
+
734
+ Creates an embedding provider backed by Google's text-embedding-004 model.
735
+
736
+ ```typescript
737
+ function createGoogleEmbeddings(config: {
738
+ apiKey: string
739
+ model?: string
740
+ dimensions?: number
741
+ }): EmbeddingProvider
742
+ ```
743
+
744
+ | Parameter | Type | Default | Description |
745
+ |---|---|---|---|
746
+ | `config.apiKey` | `string` | **(required)** | Google API key. |
747
+ | `config.model` | `string` | `'text-embedding-004'` | Model name. |
748
+ | `config.dimensions` | `number` | `768` | Embedding dimensions. |
749
+
750
+ **Returns:** An `EmbeddingProvider` with `name: 'google'`.
751
+
752
+ ```typescript
753
+ import { createGoogleEmbeddings } from '@elsium-ai/rag'
754
+
755
+ const embeddings = createGoogleEmbeddings({
756
+ apiKey: process.env.GOOGLE_API_KEY!,
757
+ })
758
+
759
+ const vector = await embeddings.embed('Hello, world!')
760
+ ```
761
+
762
+ ### `createCohereEmbeddings`
763
+
764
+ Creates an embedding provider backed by Cohere's embed-v4.0 model.
765
+
766
+ ```typescript
767
+ function createCohereEmbeddings(config: {
768
+ apiKey: string
769
+ model?: string
770
+ inputType?: string
771
+ }): EmbeddingProvider
772
+ ```
773
+
774
+ | Parameter | Type | Default | Description |
775
+ |---|---|---|---|
776
+ | `config.apiKey` | `string` | **(required)** | Cohere API key. |
777
+ | `config.model` | `string` | `'embed-v4.0'` | Model name. |
778
+ | `config.inputType` | `string` | `'search_document'` | Input type hint for the model. |
779
+
780
+ **Returns:** An `EmbeddingProvider` with `name: 'cohere'`.
781
+
782
+ ```typescript
783
+ import { createCohereEmbeddings } from '@elsium-ai/rag'
784
+
785
+ const embeddings = createCohereEmbeddings({
786
+ apiKey: process.env.COHERE_API_KEY!,
787
+ })
788
+
789
+ const vector = await embeddings.embed('Hello, world!')
790
+ ```
791
+
792
+ ---
793
+
794
+ ## Keyword & Hybrid Search
795
+
796
+ ### `createBM25Index`
797
+
798
+ Creates a BM25 keyword search index for term-frequency-based retrieval.
799
+
800
+ ```typescript
801
+ function createBM25Index(): {
802
+ add(chunks: Chunk[]): void
803
+ search(query: string, topK?: number): RetrievalResult[]
804
+ clear(): void
805
+ }
806
+ ```
807
+
808
+ **Returns:** A BM25 index with `add`, `search`, and `clear` methods.
809
+
810
+ ```typescript
811
+ import { createBM25Index } from '@elsium-ai/rag'
812
+
813
+ const bm25 = createBM25Index()
814
+ bm25.add(chunks)
815
+
816
+ const results = bm25.search('machine learning', 5)
817
+ ```
818
+
819
+ ### `createHybridSearch`
820
+
821
+ Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results.
822
+
823
+ ```typescript
824
+ function createHybridSearch(
825
+ vectorStore: VectorStore,
826
+ bm25: ReturnType<typeof createBM25Index>,
827
+ config?: { vectorWeight?: number; bm25Weight?: number; topK?: number },
828
+ ): {
829
+ query(embedding: EmbeddingVector, text: string, options?: QueryOptions): Promise<RetrievalResult[]>
830
+ }
831
+ ```
832
+
833
+ | Parameter | Type | Default | Description |
834
+ |---|---|---|---|
835
+ | `vectorStore` | `VectorStore` | **(required)** | The vector store for semantic search. |
836
+ | `bm25` | `BM25Index` | **(required)** | The BM25 index for keyword search. |
837
+ | `config.vectorWeight` | `number` | `0.7` | Weight for vector search results in RRF. |
838
+ | `config.bm25Weight` | `number` | `0.3` | Weight for BM25 results in RRF. |
839
+ | `config.topK` | `number` | `5` | Number of results to return. |
840
+
841
+ ```typescript
842
+ import { createInMemoryStore, createBM25Index, createHybridSearch } from '@elsium-ai/rag'
843
+
844
+ const vectorStore = createInMemoryStore()
845
+ const bm25 = createBM25Index()
846
+
847
+ const hybrid = createHybridSearch(vectorStore, bm25, {
848
+ vectorWeight: 0.7,
849
+ bm25Weight: 0.3,
850
+ })
851
+
852
+ const results = await hybrid.query(queryEmbedding, 'search query', { topK: 10 })
853
+ ```
854
+
663
855
  ---
664
856
 
665
857
  ## Pipeline
package/dist/bm25.d.ts ADDED
@@ -0,0 +1,10 @@
1
+ import type { Chunk, RetrievalResult } from './types';
2
+ export interface BM25Index {
3
+ index(chunks: Chunk[]): void;
4
+ search(query: string, topK?: number): RetrievalResult[];
5
+ }
6
+ export declare function createBM25Index(options?: {
7
+ k1?: number;
8
+ b?: number;
9
+ }): BM25Index;
10
+ //# sourceMappingURL=bm25.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../src/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAErD,MAAM,WAAW,SAAS;IACzB,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,IAAI,CAAA;IAC5B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,eAAe,EAAE,CAAA;CACvD;AAgBD,wBAAgB,eAAe,CAAC,OAAO,CAAC,EAAE;IAAE,EAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAwFhF"}
@@ -1,3 +1,4 @@
1
+ import type { Registry } from '@elsium-ai/core';
1
2
  import type { EmbeddingConfig, EmbeddingVector } from './types';
2
3
  export interface EmbeddingProvider {
3
4
  readonly name: string;
@@ -7,5 +8,7 @@ export interface EmbeddingProvider {
7
8
  }
8
9
  export declare function createOpenAIEmbeddings(config: EmbeddingConfig): EmbeddingProvider;
9
10
  export declare function createMockEmbeddings(dims?: number): EmbeddingProvider;
11
+ export type EmbeddingProviderFactory = (config: EmbeddingConfig) => EmbeddingProvider;
12
+ export declare const embeddingProviderRegistry: Registry<EmbeddingProviderFactory>;
10
13
  export declare function getEmbeddingProvider(config: EmbeddingConfig): EmbeddingProvider;
11
14
  //# sourceMappingURL=embeddings.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE/D,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAA;IAE3B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAA;IAC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACvD;AAID,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAyEjF;AAID,wBAAgB,oBAAoB,CAAC,IAAI,SAAM,GAAG,iBAAiB,CAgClE;AAID,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAa/E"}
1
+ {"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAA;AAC/C,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE/D,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAA;IAE3B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAA;IAC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACvD;AAID,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAyEjF;AAID,wBAAgB,oBAAoB,CAAC,IAAI,SAAM,GAAG,iBAAiB,CAgClE;AAID,MAAM,MAAM,wBAAwB,GAAG,CAAC,MAAM,EAAE,eAAe,KAAK,iBAAiB,CAAA;AAErF,eAAO,MAAM,yBAAyB,EAAE,QAAQ,CAAC,wBAAwB,CACX,CAAA;AAI9D,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAiB/E"}
@@ -0,0 +1,14 @@
1
+ import type { BM25Index } from './bm25';
2
+ import type { EmbeddingVector, RetrievalResult } from './types';
3
+ import type { VectorStore } from './vectorstore';
4
+ export interface HybridSearchConfig {
5
+ k?: number;
6
+ vectorWeight?: number;
7
+ bm25Weight?: number;
8
+ topK?: number;
9
+ }
10
+ export interface HybridSearch {
11
+ search(query: string, queryEmbedding: EmbeddingVector, topK?: number): Promise<RetrievalResult[]>;
12
+ }
13
+ export declare function createHybridSearch(vectorStore: VectorStore, bm25Index: BM25Index, config?: HybridSearchConfig): HybridSearch;
14
+ //# sourceMappingURL=hybrid.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../src/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AACvC,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAC/D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAEhD,MAAM,WAAW,kBAAkB;IAClC,CAAC,CAAC,EAAE,MAAM,CAAA;IACV,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,eAAe,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACjG;AAsCD,wBAAgB,kBAAkB,CACjC,WAAW,EAAE,WAAW,EACxB,SAAS,EAAE,SAAS,EACpB,MAAM,CAAC,EAAE,kBAAkB,GACzB,YAAY,CAwBd"}
package/dist/index.d.ts CHANGED
@@ -3,10 +3,22 @@ export { textLoader, markdownLoader, htmlLoader, jsonLoader, csvLoader, getLoade
3
3
  export type { DocumentLoader } from './loaders';
4
4
  export { fixedSizeChunker, recursiveChunker, sentenceChunker, getChunker, } from './chunkers';
5
5
  export type { Chunker } from './chunkers';
6
- export { createOpenAIEmbeddings, createMockEmbeddings, getEmbeddingProvider, } from './embeddings';
7
- export type { EmbeddingProvider } from './embeddings';
8
- export { createInMemoryStore, cosineSimilarity, mmrRerank, } from './vectorstore';
9
- export type { VectorStore } from './vectorstore';
6
+ export { createOpenAIEmbeddings, createMockEmbeddings, getEmbeddingProvider, embeddingProviderRegistry, } from './embeddings';
7
+ export type { EmbeddingProvider, EmbeddingProviderFactory } from './embeddings';
8
+ export { createInMemoryStore, cosineSimilarity, mmrRerank, vectorStoreRegistry, } from './vectorstore';
9
+ export type { VectorStore, VectorStoreFactory } from './vectorstore';
10
10
  export { rag } from './pipeline';
11
11
  export type { RAGPipeline, RAGPipelineConfig, IngestResult } from './pipeline';
12
+ export { createPgVectorStore } from './stores/index';
13
+ export type { PgVectorStoreConfig } from './stores/index';
14
+ export { createQdrantStore } from './stores/index';
15
+ export type { QdrantStoreConfig } from './stores/index';
16
+ export { createGoogleEmbeddings } from './providers/google-embeddings';
17
+ export type { GoogleEmbeddingsConfig } from './providers/google-embeddings';
18
+ export { createCohereEmbeddings } from './providers/cohere-embeddings';
19
+ export type { CohereEmbeddingsConfig } from './providers/cohere-embeddings';
20
+ export { createBM25Index } from './bm25';
21
+ export type { BM25Index } from './bm25';
22
+ export { createHybridSearch } from './hybrid';
23
+ export type { HybridSearch, HybridSearchConfig } from './hybrid';
12
24
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACX,QAAQ,EACR,gBAAgB,EAChB,KAAK,EACL,aAAa,EACb,eAAe,EACf,aAAa,EACb,eAAe,EACf,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,eAAe,GACf,MAAM,SAAS,CAAA;AAGhB,OAAO,EACN,UAAU,EACV,cAAc,EACd,UAAU,EACV,UAAU,EACV,SAAS,EACT,SAAS,GACT,MAAM,WAAW,CAAA;AAClB,YAAY,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAG/C,OAAO,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,UAAU,GACV,MAAM,YAAY,CAAA;AACnB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAGzC,OAAO,EACN,sBAAsB,EACtB,oBAAoB,EACpB,oBAAoB,GACpB,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAA;AAGrD,OAAO,EACN,mBAAmB,EACnB,gBAAgB,EAChB,SAAS,GACT,MAAM,eAAe,CAAA;AACtB,YAAY,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAGhD,OAAO,EAAE,GAAG,EAAE,MAAM,YAAY,CAAA;AAChC,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACX,QAAQ,EACR,gBAAgB,EAChB,KAAK,EACL,aAAa,EACb,eAAe,EACf,aAAa,EACb,eAAe,EACf,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,eAAe,GACf,MAAM,SAAS,CAAA;AAGhB,OAAO,EACN,UAAU,EACV,cAAc,EACd,UAAU,EACV,UAAU,EACV,SAAS,EACT,SAAS,GACT,MAAM,WAAW,CAAA;AAClB,YAAY,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAG/C,OAAO,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,UAAU,GACV,MAAM,YAAY,CAAA;AACnB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAGzC,OAAO,EACN,sBAAsB,EACtB,oBAAoB,EACpB,oBAAoB,EACpB,yBAAyB,GACzB,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,cAAc,CAAA;AAG/E,OAAO,EACN,mBAAmB,EACnB,gBAAgB,EAChB,SAAS,EACT,mBAAmB,GACnB,MAAM,eAAe,CAAA;AACtB,YAAY,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AAGpE,OAAO,EAAE,GAAG,EAAE,MAAM,YAAY,CAAA;AAChC,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAG9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACpD,YAAY,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAClD,YAAY,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAGvD,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AACtE,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAC3E,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AACtE,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAA;AACxC,YAAY,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAGvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AAC7C,YAAY,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA"}
package/dist/index.js CHANGED
@@ -97,6 +97,89 @@ function generateId(prefix = "els") {
97
97
  const random = cryptoHex(4);
98
98
  return `${prefix}_${timestamp}_${random}`;
99
99
  }
100
+ // ../core/src/logger.ts
101
+ var LOG_LEVELS = {
102
+ debug: 0,
103
+ info: 1,
104
+ warn: 2,
105
+ error: 3
106
+ };
107
+ function createLogger(options = {}) {
108
+ const { level = "info", pretty = false, context = {} } = options;
109
+ const minLevel = LOG_LEVELS[level];
110
+ function log(logLevel, message, data) {
111
+ if (LOG_LEVELS[logLevel] < minLevel)
112
+ return;
113
+ const entry = {
114
+ ...context,
115
+ level: logLevel,
116
+ message,
117
+ timestamp: new Date().toISOString(),
118
+ ...data ? { data } : {}
119
+ };
120
+ const output = pretty ? JSON.stringify(entry, null, 2) : JSON.stringify(entry);
121
+ if (logLevel === "error") {
122
+ console.error(output);
123
+ } else if (logLevel === "warn") {
124
+ console.warn(output);
125
+ } else {
126
+ console.log(output);
127
+ }
128
+ }
129
+ return {
130
+ debug: (msg, data) => log("debug", msg, data),
131
+ info: (msg, data) => log("info", msg, data),
132
+ warn: (msg, data) => log("warn", msg, data),
133
+ error: (msg, data) => log("error", msg, data),
134
+ child(childContext) {
135
+ return createLogger({
136
+ level,
137
+ pretty,
138
+ context: { ...context, ...childContext }
139
+ });
140
+ }
141
+ };
142
+ }
143
+ // ../core/src/schema.ts
144
+ var log = createLogger();
145
+ // ../core/src/registry.ts
146
+ var log2 = createLogger();
147
+ var BLOCKED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
148
+ function createRegistry(label) {
149
+ const entries = new Map;
150
+ return {
151
+ register(name, factory) {
152
+ if (BLOCKED_KEYS.has(name)) {
153
+ log2.warn(`Registry(${label}): rejected blocked key "${name}"`);
154
+ return;
155
+ }
156
+ entries.set(name, factory);
157
+ log2.debug(`Registry(${label}): registered "${name}"`);
158
+ },
159
+ get(name) {
160
+ if (BLOCKED_KEYS.has(name))
161
+ return;
162
+ return entries.get(name);
163
+ },
164
+ list() {
165
+ return Array.from(entries.keys());
166
+ },
167
+ has(name) {
168
+ if (BLOCKED_KEYS.has(name))
169
+ return false;
170
+ return entries.has(name);
171
+ },
172
+ unregister(name) {
173
+ if (BLOCKED_KEYS.has(name))
174
+ return false;
175
+ const deleted = entries.delete(name);
176
+ if (deleted) {
177
+ log2.debug(`Registry(${label}): unregistered "${name}"`);
178
+ }
179
+ return deleted;
180
+ }
181
+ };
182
+ }
100
183
  // src/loaders.ts
101
184
  function createDocument(content, metadata) {
102
185
  return {
@@ -549,7 +632,11 @@ function createMockEmbeddings(dims = 128) {
549
632
  }
550
633
  };
551
634
  }
635
+ var embeddingProviderRegistry = createRegistry("embeddingProvider");
552
636
  function getEmbeddingProvider(config) {
637
+ const registered = embeddingProviderRegistry.get(config.provider);
638
+ if (registered)
639
+ return registered(config);
553
640
  switch (config.provider) {
554
641
  case "openai":
555
642
  return createOpenAIEmbeddings(config);
@@ -558,12 +645,13 @@ function getEmbeddingProvider(config) {
558
645
  default:
559
646
  throw new ElsiumError({
560
647
  code: "CONFIG_ERROR",
561
- message: `Unknown embedding provider: ${config.provider}`,
648
+ message: `Unknown embedding provider: ${config.provider}. Available: openai, mock${embeddingProviderRegistry.list().length ? `, ${embeddingProviderRegistry.list().join(", ")}` : ""}`,
562
649
  retryable: false
563
650
  });
564
651
  }
565
652
  }
566
653
  // src/vectorstore.ts
654
+ var vectorStoreRegistry = createRegistry("vectorStore");
567
655
  function cosineSimilarity(a, b) {
568
656
  if (a.length !== b.length)
569
657
  return 0;
@@ -692,13 +780,19 @@ function rag(config) {
692
780
  minScore: 0,
693
781
  strategy: "similarity"
694
782
  };
695
- if (config.store) {
696
- throw new Error("External vector store not yet implemented. Use in-memory store.");
697
- }
698
783
  const loader = getLoader(loaderType);
699
784
  const chunker = getChunker(chunkingConfig);
700
785
  const embeddingProvider = getEmbeddingProvider(config.embeddings);
701
- const vectorStore = createInMemoryStore();
786
+ let vectorStore;
787
+ if (config.store) {
788
+ const factory = vectorStoreRegistry.get(config.store.provider);
789
+ if (!factory) {
790
+ throw new Error(`Unknown vector store provider: ${config.store.provider}. Register it with vectorStoreRegistry.register().`);
791
+ }
792
+ vectorStore = factory(config.store);
793
+ } else {
794
+ vectorStore = createInMemoryStore();
795
+ }
702
796
  async function embedChunks(chunks) {
703
797
  const texts = chunks.map((c) => c.content);
704
798
  const embeddings = await embeddingProvider.embedBatch(texts);
@@ -744,7 +838,442 @@ function rag(config) {
744
838
  }
745
839
  };
746
840
  }
841
+ // src/stores/pgvector.ts
842
+ import { createRequire } from "node:module";
843
+ var require2 = createRequire(import.meta.url);
844
+ var log3 = createLogger();
845
+ var BLOCKED_KEYS2 = new Set(["__proto__", "constructor", "prototype"]);
846
+ var TABLE_NAME_PATTERN = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
847
+ function createPgVectorStore(config) {
848
+ const { connectionString, tableName = "vector_chunks", dimensions = 1536 } = config;
849
+ if (BLOCKED_KEYS2.has(tableName)) {
850
+ throw new Error(`Invalid table name: ${tableName}`);
851
+ }
852
+ if (!TABLE_NAME_PATTERN.test(tableName)) {
853
+ throw new Error(`Invalid table name format: ${tableName}`);
854
+ }
855
+ let client = null;
856
+ let initialized = false;
857
+ async function getClient() {
858
+ if (client)
859
+ return client;
860
+ try {
861
+ const pg = require2("pg");
862
+ client = new pg.Client({ connectionString });
863
+ await client.connect();
864
+ if (!initialized) {
865
+ await client.query("CREATE EXTENSION IF NOT EXISTS vector");
866
+ await client.query(`
867
+ CREATE TABLE IF NOT EXISTS ${tableName} (
868
+ id TEXT PRIMARY KEY,
869
+ content TEXT NOT NULL,
870
+ document_id TEXT NOT NULL,
871
+ chunk_index INTEGER NOT NULL,
872
+ metadata JSONB DEFAULT '{}',
873
+ embedding vector(${dimensions})
874
+ )
875
+ `);
876
+ initialized = true;
877
+ }
878
+ return client;
879
+ } catch (err2) {
880
+ log3.error("Failed to initialize PgVector store", {
881
+ error: err2 instanceof Error ? err2.message : String(err2)
882
+ });
883
+ throw new Error("pg is required for PgVector store. Install it as a dependency.");
884
+ }
885
+ }
886
+ return {
887
+ name: "pgvector",
888
+ async upsert(chunks) {
889
+ const pg = await getClient();
890
+ for (const chunk of chunks) {
891
+ if (BLOCKED_KEYS2.has(chunk.id))
892
+ continue;
893
+ const embedding = `[${chunk.embedding.values.join(",")}]`;
894
+ await pg.query(`INSERT INTO ${tableName} (id, content, document_id, chunk_index, metadata, embedding)
895
+ VALUES ($1, $2, $3, $4, $5, $6)
896
+ ON CONFLICT (id) DO UPDATE SET
897
+ content = EXCLUDED.content,
898
+ document_id = EXCLUDED.document_id,
899
+ chunk_index = EXCLUDED.chunk_index,
900
+ metadata = EXCLUDED.metadata,
901
+ embedding = EXCLUDED.embedding`, [
902
+ chunk.id,
903
+ chunk.content,
904
+ chunk.documentId,
905
+ chunk.index,
906
+ JSON.stringify(chunk.metadata),
907
+ embedding
908
+ ]);
909
+ }
910
+ },
911
+ async query(embedding, options) {
912
+ const pg = await getClient();
913
+ const topK = options?.topK ?? 5;
914
+ const minScore = options?.minScore ?? 0;
915
+ const embeddingStr = `[${embedding.values.join(",")}]`;
916
+ const result = await pg.query(`SELECT id, content, document_id, chunk_index, metadata,
917
+ 1 - (embedding <=> $1::vector) as score
918
+ FROM ${tableName}
919
+ WHERE 1 - (embedding <=> $1::vector) >= $2
920
+ ORDER BY embedding <=> $1::vector
921
+ LIMIT $3`, [embeddingStr, minScore, topK]);
922
+ return result.rows.map((row) => ({
923
+ chunk: {
924
+ id: row.id,
925
+ content: row.content,
926
+ documentId: row.document_id,
927
+ index: row.chunk_index,
928
+ metadata: {
929
+ startChar: 0,
930
+ endChar: 0,
931
+ tokenEstimate: 0,
932
+ ...row.metadata ?? {}
933
+ }
934
+ },
935
+ score: row.score,
936
+ distance: 1 - row.score
937
+ }));
938
+ },
939
+ async delete(ids) {
940
+ const pg = await getClient();
941
+ const filtered = ids.filter((id) => !BLOCKED_KEYS2.has(id));
942
+ if (filtered.length === 0)
943
+ return;
944
+ const placeholders = filtered.map((_, i) => `$${i + 1}`).join(", ");
945
+ await pg.query(`DELETE FROM ${tableName} WHERE id IN (${placeholders})`, filtered);
946
+ },
947
+ async clear() {
948
+ const pg = await getClient();
949
+ await pg.query(`DELETE FROM ${tableName}`);
950
+ },
951
+ async count() {
952
+ const pg = await getClient();
953
+ const result = await pg.query(`SELECT COUNT(*)::int as count FROM ${tableName}`);
954
+ return result.rows[0]?.count ?? 0;
955
+ }
956
+ };
957
+ }
958
+ // src/stores/qdrant.ts
959
+ function createQdrantStore(config) {
960
+ const { url, apiKey, collectionName, dimensions } = config;
961
+ const headers = {
962
+ "Content-Type": "application/json"
963
+ };
964
+ if (apiKey) {
965
+ headers["api-key"] = apiKey;
966
+ }
967
+ async function request(method, path, body) {
968
+ const response = await fetch(`${url}${path}`, {
969
+ method,
970
+ headers,
971
+ ...body ? { body: JSON.stringify(body) } : {}
972
+ });
973
+ if (!response.ok) {
974
+ const text = await response.text().catch(() => "Unknown error");
975
+ throw ElsiumError.providerError(`Qdrant error ${response.status}: ${text}`, {
976
+ provider: "qdrant",
977
+ statusCode: response.status,
978
+ retryable: response.status >= 500
979
+ });
980
+ }
981
+ if (response.status === 204)
982
+ return null;
983
+ return response.json();
984
+ }
985
+ return {
986
+ name: "qdrant",
987
+ async upsert(chunks) {
988
+ const points = chunks.map((chunk) => ({
989
+ id: chunk.id,
990
+ vector: chunk.embedding.values,
991
+ payload: {
992
+ content: chunk.content,
993
+ documentId: chunk.documentId,
994
+ index: chunk.index,
995
+ metadata: chunk.metadata
996
+ }
997
+ }));
998
+ await request("PUT", `/collections/${collectionName}/points`, {
999
+ points
1000
+ });
1001
+ },
1002
+ async query(embedding, options) {
1003
+ const topK = options?.topK ?? 5;
1004
+ const minScore = options?.minScore ?? 0;
1005
+ const result = await request("POST", `/collections/${collectionName}/points/search`, {
1006
+ vector: embedding.values,
1007
+ limit: topK,
1008
+ score_threshold: minScore,
1009
+ with_payload: true
1010
+ });
1011
+ return (result.result ?? []).map((hit) => ({
1012
+ chunk: {
1013
+ id: String(hit.id),
1014
+ content: hit.payload.content,
1015
+ documentId: hit.payload.documentId,
1016
+ index: hit.payload.index,
1017
+ metadata: hit.payload.metadata
1018
+ },
1019
+ score: hit.score,
1020
+ distance: 1 - hit.score
1021
+ }));
1022
+ },
1023
+ async delete(ids) {
1024
+ await request("POST", `/collections/${collectionName}/points/delete`, {
1025
+ points: ids
1026
+ });
1027
+ },
1028
+ async clear() {
1029
+ try {
1030
+ await request("DELETE", `/collections/${collectionName}`);
1031
+ } catch {}
1032
+ await request("PUT", `/collections/${collectionName}`, {
1033
+ vectors: { size: dimensions, distance: "Cosine" }
1034
+ });
1035
+ },
1036
+ async count() {
1037
+ const result = await request("GET", `/collections/${collectionName}`);
1038
+ return result.result?.points_count ?? 0;
1039
+ }
1040
+ };
1041
+ }
1042
+ vectorStoreRegistry.register("qdrant", (config) => createQdrantStore(config));
1043
+ // src/providers/google-embeddings.ts
1044
+ function createGoogleEmbeddings(config) {
1045
+ const { apiKey, model = "text-embedding-004", dimensions = 768 } = config;
1046
+ if (!apiKey) {
1047
+ throw new ElsiumError({
1048
+ code: "CONFIG_ERROR",
1049
+ message: "Google API key is required for embeddings",
1050
+ retryable: false
1051
+ });
1052
+ }
1053
+ const baseUrl = "https://generativelanguage.googleapis.com/v1beta";
1054
+ async function callAPI(texts) {
1055
+ const results = [];
1056
+ for (const text of texts) {
1057
+ const url = `${baseUrl}/models/${model}:embedContent?key=${apiKey}`;
1058
+ const response = await fetch(url, {
1059
+ method: "POST",
1060
+ headers: { "Content-Type": "application/json" },
1061
+ body: JSON.stringify({
1062
+ model: `models/${model}`,
1063
+ content: { parts: [{ text }] },
1064
+ ...dimensions ? { outputDimensionality: dimensions } : {}
1065
+ })
1066
+ });
1067
+ if (!response.ok) {
1068
+ const body = await response.text().catch(() => "Unknown error");
1069
+ throw ElsiumError.providerError(`Google embeddings error ${response.status}: ${body}`, {
1070
+ provider: "google",
1071
+ statusCode: response.status,
1072
+ retryable: response.status >= 500
1073
+ });
1074
+ }
1075
+ const json = await response.json();
1076
+ results.push(json.embedding.values);
1077
+ }
1078
+ return results;
1079
+ }
1080
+ return {
1081
+ name: "google",
1082
+ dimensions,
1083
+ async embed(text) {
1084
+ const [embedding] = await callAPI([text]);
1085
+ return { values: embedding, dimensions: embedding.length };
1086
+ },
1087
+ async embedBatch(texts) {
1088
+ const embeddings = await callAPI(texts);
1089
+ return embeddings.map((values) => ({
1090
+ values,
1091
+ dimensions: values.length
1092
+ }));
1093
+ }
1094
+ };
1095
+ }
1096
+ embeddingProviderRegistry.register("google", (config) => createGoogleEmbeddings({
1097
+ apiKey: config.apiKey ?? "",
1098
+ model: config.model,
1099
+ dimensions: config.dimensions
1100
+ }));
1101
+ // src/providers/cohere-embeddings.ts
1102
+ function createCohereEmbeddings(config) {
1103
+ const { apiKey, model = "embed-v4.0", inputType = "search_document" } = config;
1104
+ if (!apiKey) {
1105
+ throw new ElsiumError({
1106
+ code: "CONFIG_ERROR",
1107
+ message: "Cohere API key is required for embeddings",
1108
+ retryable: false
1109
+ });
1110
+ }
1111
+ async function callAPI(texts) {
1112
+ const response = await fetch("https://api.cohere.com/v2/embed", {
1113
+ method: "POST",
1114
+ headers: {
1115
+ "Content-Type": "application/json",
1116
+ Authorization: `Bearer ${apiKey}`
1117
+ },
1118
+ body: JSON.stringify({
1119
+ texts,
1120
+ model,
1121
+ input_type: inputType,
1122
+ embedding_types: ["float"]
1123
+ })
1124
+ });
1125
+ if (!response.ok) {
1126
+ const body = await response.text().catch(() => "Unknown error");
1127
+ throw ElsiumError.providerError(`Cohere embeddings error ${response.status}: ${body}`, {
1128
+ provider: "cohere",
1129
+ statusCode: response.status,
1130
+ retryable: response.status >= 500
1131
+ });
1132
+ }
1133
+ const json = await response.json();
1134
+ return json.embeddings.float;
1135
+ }
1136
+ return {
1137
+ name: "cohere",
1138
+ dimensions: 1024,
1139
+ async embed(text) {
1140
+ const [embedding] = await callAPI([text]);
1141
+ return { values: embedding, dimensions: embedding.length };
1142
+ },
1143
+ async embedBatch(texts) {
1144
+ const embeddings = await callAPI(texts);
1145
+ return embeddings.map((values) => ({
1146
+ values,
1147
+ dimensions: values.length
1148
+ }));
1149
+ }
1150
+ };
1151
+ }
1152
+ embeddingProviderRegistry.register("cohere", (config) => createCohereEmbeddings({
1153
+ apiKey: config.apiKey ?? "",
1154
+ model: config.model
1155
+ }));
1156
+ // src/bm25.ts
1157
+ function tokenize(text) {
1158
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 0);
1159
+ }
1160
+ function createBM25Index(options) {
1161
+ const k1 = options?.k1 ?? 1.2;
1162
+ const b = options?.b ?? 0.75;
1163
+ const docs = [];
1164
+ const docFreqs = new Map;
1165
+ let avgDocLength = 0;
1166
+ function addDoc(chunk) {
1167
+ const tokens = tokenize(chunk.content);
1168
+ const termFreqs = new Map;
1169
+ for (const token of tokens) {
1170
+ termFreqs.set(token, (termFreqs.get(token) ?? 0) + 1);
1171
+ }
1172
+ for (const term of termFreqs.keys()) {
1173
+ docFreqs.set(term, (docFreqs.get(term) ?? 0) + 1);
1174
+ }
1175
+ docs.push({ chunk, termFreqs, length: tokens.length });
1176
+ }
1177
+ function recalcAvgLength() {
1178
+ if (docs.length === 0) {
1179
+ avgDocLength = 0;
1180
+ return;
1181
+ }
1182
+ avgDocLength = docs.reduce((sum, d) => sum + d.length, 0) / docs.length;
1183
+ }
1184
+ function idf(term) {
1185
+ const df = docFreqs.get(term) ?? 0;
1186
+ const n = docs.length;
1187
+ if (df === 0)
1188
+ return 0;
1189
+ return Math.log((n - df + 0.5) / (df + 0.5) + 1);
1190
+ }
1191
+ function scoreSingle(doc, queryTerms) {
1192
+ let score = 0;
1193
+ for (const term of queryTerms) {
1194
+ const tf = doc.termFreqs.get(term) ?? 0;
1195
+ if (tf === 0)
1196
+ continue;
1197
+ const termIdf = idf(term);
1198
+ const numerator = tf * (k1 + 1);
1199
+ const denominator = tf + k1 * (1 - b + b * (doc.length / avgDocLength));
1200
+ score += termIdf * (numerator / denominator);
1201
+ }
1202
+ return score;
1203
+ }
1204
+ return {
1205
+ index(chunks) {
1206
+ for (const chunk of chunks) {
1207
+ addDoc(chunk);
1208
+ }
1209
+ recalcAvgLength();
1210
+ },
1211
+ search(query, topK = 5) {
1212
+ if (docs.length === 0)
1213
+ return [];
1214
+ const queryTerms = tokenize(query);
1215
+ if (queryTerms.length === 0)
1216
+ return [];
1217
+ const scored = [];
1218
+ for (const doc of docs) {
1219
+ const score = scoreSingle(doc, queryTerms);
1220
+ if (score > 0) {
1221
+ scored.push({ chunk: doc.chunk, score });
1222
+ }
1223
+ }
1224
+ scored.sort((a, b2) => b2.score - a.score);
1225
+ return scored.slice(0, topK).map((s) => ({
1226
+ chunk: s.chunk,
1227
+ score: s.score,
1228
+ distance: 0
1229
+ }));
1230
+ }
1231
+ };
1232
+ }
1233
+ // src/hybrid.ts
1234
+ function reciprocalRankFusion(vectorResults, bm25Results, k, vectorWeight, bm25Weight) {
1235
+ const scores = new Map;
1236
+ for (let i = 0;i < vectorResults.length; i++) {
1237
+ const result = vectorResults[i];
1238
+ const rrfScore = vectorWeight / (k + i + 1);
1239
+ const existing = scores.get(result.chunk.id);
1240
+ if (existing) {
1241
+ existing.score += rrfScore;
1242
+ } else {
1243
+ scores.set(result.chunk.id, { score: rrfScore, chunk: result.chunk });
1244
+ }
1245
+ }
1246
+ for (let i = 0;i < bm25Results.length; i++) {
1247
+ const result = bm25Results[i];
1248
+ const rrfScore = bm25Weight / (k + i + 1);
1249
+ const existing = scores.get(result.chunk.id);
1250
+ if (existing) {
1251
+ existing.score += rrfScore;
1252
+ } else {
1253
+ scores.set(result.chunk.id, { score: rrfScore, chunk: result.chunk });
1254
+ }
1255
+ }
1256
+ return Array.from(scores.values()).sort((a, b) => b.score - a.score).map(({ score, chunk }) => ({ chunk, score, distance: 0 }));
1257
+ }
1258
+ function createHybridSearch(vectorStore, bm25Index, config) {
1259
+ const k = config?.k ?? 60;
1260
+ const vectorWeight = config?.vectorWeight ?? 1;
1261
+ const bm25Weight = config?.bm25Weight ?? 1;
1262
+ const defaultTopK = config?.topK ?? 10;
1263
+ return {
1264
+ async search(query, queryEmbedding, topK) {
1265
+ const limit = topK ?? defaultTopK;
1266
+ const [vectorResults, bm25Results] = await Promise.all([
1267
+ vectorStore.query(queryEmbedding, { topK: limit }),
1268
+ Promise.resolve(bm25Index.search(query, limit))
1269
+ ]);
1270
+ const fused = reciprocalRankFusion(vectorResults, bm25Results, k, vectorWeight, bm25Weight);
1271
+ return fused.slice(0, limit);
1272
+ }
1273
+ };
1274
+ }
747
1275
  export {
1276
+ vectorStoreRegistry,
748
1277
  textLoader,
749
1278
  sentenceChunker,
750
1279
  recursiveChunker,
@@ -757,9 +1286,16 @@ export {
757
1286
  getEmbeddingProvider,
758
1287
  getChunker,
759
1288
  fixedSizeChunker,
1289
+ embeddingProviderRegistry,
760
1290
  csvLoader,
1291
+ createQdrantStore,
1292
+ createPgVectorStore,
761
1293
  createOpenAIEmbeddings,
762
1294
  createMockEmbeddings,
763
1295
  createInMemoryStore,
1296
+ createHybridSearch,
1297
+ createGoogleEmbeddings,
1298
+ createCohereEmbeddings,
1299
+ createBM25Index,
764
1300
  cosineSimilarity
765
1301
  };
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,iBAAiB,EAAwB,MAAM,cAAc,CAAA;AAE3E,OAAO,KAAK,EAEX,cAAc,EACd,QAAQ,EAER,eAAe,EACf,UAAU,EACV,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,MAAM,SAAS,CAAA;AAChB,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,eAAe,CAAA;AAErE,MAAM,WAAW,iBAAiB;IACjC,MAAM,CAAC,EAAE,UAAU,CAAA;IACnB,QAAQ,CAAC,EAAE,cAAc,CAAA;IACzB,UAAU,EAAE,eAAe,CAAA;IAC3B,KAAK,CAAC,EAAE,iBAAiB,CAAA;IACzB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC3B;AAED,MAAM,WAAW,WAAW;IAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IAC9D,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IACzD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;IACxB,QAAQ,CAAC,iBAAiB,EAAE,iBAAiB,CAAA;IAC7C,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAA;CACjC;AAED,MAAM,WAAW,YAAY;IAC5B,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,GAAG,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CA8E1D"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,iBAAiB,EAAwB,MAAM,cAAc,CAAA;AAE3E,OAAO,KAAK,EAEX,cAAc,EACd,QAAQ,EAER,eAAe,EACf,UAAU,EACV,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,MAAM,SAAS,CAAA;AAChB,OAAO,EAAE,KAAK,WAAW,EAA4C,MAAM,eAAe,CAAA;AAE1F,MAAM,WAAW,iBAAiB;IACjC,MAAM,CAAC,EAAE,UAAU,CAAA;IACnB,QAAQ,CAAC,EAAE,cAAc,CAAA;IACzB,UAAU,EAAE,eAAe,CAAA;IAC3B,KAAK,CAAC,EAAE,iBAAiB,CAAA;IACzB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC3B;AAED,MAAM,WAAW,WAAW;IAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IAC9D,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IACzD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;IACxB,QAAQ,CAAC,iBAAiB,EAAE,iBAAiB,CAAA;IAC7C,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAA;CACjC;AAED,MAAM,WAAW,YAAY;IAC5B,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,GAAG,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CAsF1D"}
@@ -0,0 +1,8 @@
1
+ import type { EmbeddingProvider } from '../embeddings';
2
+ export interface CohereEmbeddingsConfig {
3
+ apiKey: string;
4
+ model?: string;
5
+ inputType?: string;
6
+ }
7
+ export declare function createCohereEmbeddings(config: CohereEmbeddingsConfig): EmbeddingProvider;
8
+ //# sourceMappingURL=cohere-embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cohere-embeddings.d.ts","sourceRoot":"","sources":["../../src/providers/cohere-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAA;AAItD,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,sBAAsB,GAAG,iBAAiB,CA2DxF"}
@@ -0,0 +1,8 @@
1
+ import type { EmbeddingProvider } from '../embeddings';
2
+ export interface GoogleEmbeddingsConfig {
3
+ apiKey: string;
4
+ model?: string;
5
+ dimensions?: number;
6
+ }
7
+ export declare function createGoogleEmbeddings(config: GoogleEmbeddingsConfig): EmbeddingProvider;
8
+ //# sourceMappingURL=google-embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"google-embeddings.d.ts","sourceRoot":"","sources":["../../src/providers/google-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAA;AAItD,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,sBAAsB,GAAG,iBAAiB,CAgExF"}
@@ -0,0 +1,5 @@
1
+ export { createPgVectorStore } from './pgvector';
2
+ export type { PgVectorStoreConfig } from './pgvector';
3
+ export { createQdrantStore } from './qdrant';
4
+ export type { QdrantStoreConfig } from './qdrant';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stores/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAChD,YAAY,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA;AAC5C,YAAY,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA"}
@@ -0,0 +1,8 @@
1
+ import type { VectorStore } from '../vectorstore';
2
+ export interface PgVectorStoreConfig {
3
+ connectionString: string;
4
+ tableName?: string;
5
+ dimensions?: number;
6
+ }
7
+ export declare function createPgVectorStore(config: PgVectorStoreConfig): VectorStore;
8
+ //# sourceMappingURL=pgvector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pgvector.d.ts","sourceRoot":"","sources":["../../src/stores/pgvector.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAA;AAOjD,MAAM,WAAW,mBAAmB;IACnC,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAOD,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,mBAAmB,GAAG,WAAW,CAoI5E"}
@@ -0,0 +1,9 @@
1
+ import type { VectorStore } from '../vectorstore';
2
+ export interface QdrantStoreConfig {
3
+ url: string;
4
+ apiKey?: string;
5
+ collectionName: string;
6
+ dimensions: number;
7
+ }
8
+ export declare function createQdrantStore(config: QdrantStoreConfig): VectorStore;
9
+ //# sourceMappingURL=qdrant.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/stores/qdrant.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAA;AAGjD,MAAM,WAAW,iBAAiB;IACjC,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,cAAc,EAAE,MAAM,CAAA;IACtB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CA+GxE"}
@@ -1,3 +1,4 @@
1
+ import type { Registry } from '@elsium-ai/core';
1
2
  import type { EmbeddedChunk, EmbeddingVector, QueryOptions, RetrievalResult } from './types';
2
3
  export interface VectorStore {
3
4
  readonly name: string;
@@ -7,6 +8,8 @@ export interface VectorStore {
7
8
  clear(): Promise<void>;
8
9
  count(): Promise<number>;
9
10
  }
11
+ export type VectorStoreFactory = (config: Record<string, unknown>) => VectorStore;
12
+ export declare const vectorStoreRegistry: Registry<VectorStoreFactory>;
10
13
  export declare function cosineSimilarity(a: number[], b: number[]): number;
11
14
  export declare function createInMemoryStore(options?: {
12
15
  maxChunks?: number;
@@ -1 +1 @@
1
- {"version":3,"file":"vectorstore.d.ts","sourceRoot":"","sources":["../src/vectorstore.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE5F,MAAM,WAAW,WAAW;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IAErB,MAAM,CAAC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC9C,KAAK,CAAC,SAAS,EAAE,eAAe,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACrF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;CACxB;AAID,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAkBjE;AAID,wBAAgB,mBAAmB,CAAC,OAAO,CAAC,EAAE;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB,GAAG,WAAW,CA2Dd;AAmDD,wBAAgB,SAAS,CACxB,cAAc,EAAE,eAAe,EAC/B,OAAO,EAAE,KAAK,CAAC,eAAe,GAAG;IAAE,SAAS,EAAE,eAAe,CAAA;CAAE,CAAC,EAChE,OAAO,CAAC,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1C,eAAe,EAAE,CAqBnB"}
1
+ {"version":3,"file":"vectorstore.d.ts","sourceRoot":"","sources":["../src/vectorstore.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAA;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE5F,MAAM,WAAW,WAAW;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IAErB,MAAM,CAAC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC9C,KAAK,CAAC,SAAS,EAAE,eAAe,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACrF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;CACxB;AAID,MAAM,MAAM,kBAAkB,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,WAAW,CAAA;AAEjF,eAAO,MAAM,mBAAmB,EAAE,QAAQ,CAAC,kBAAkB,CACX,CAAA;AAIlD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAkBjE;AAID,wBAAgB,mBAAmB,CAAC,OAAO,CAAC,EAAE;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB,GAAG,WAAW,CA2Dd;AAmDD,wBAAgB,SAAS,CACxB,cAAc,EAAE,eAAe,EAC/B,OAAO,EAAE,KAAK,CAAC,eAAe,GAAG;IAAE,SAAS,EAAE,eAAe,CAAA;CAAE,CAAC,EAChE,OAAO,CAAC,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1C,eAAe,EAAE,CAqBnB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elsium-ai/rag",
3
- "version": "0.2.3",
3
+ "version": "0.4.0",
4
4
  "description": "RAG pipeline, document processing, embeddings, and vector stores for ElsiumAI",
5
5
  "license": "MIT",
6
6
  "author": "Eric Utrera <ebutrera9103@gmail.com>",
@@ -26,7 +26,7 @@
26
26
  "dev": "bun --watch src/index.ts"
27
27
  },
28
28
  "dependencies": {
29
- "@elsium-ai/core": "^0.2.3"
29
+ "@elsium-ai/core": "^0.4.0"
30
30
  },
31
31
  "devDependencies": {
32
32
  "typescript": "^5.7.0"