@elsium-ai/rag 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +192 -0
- package/dist/bm25.d.ts +10 -0
- package/dist/bm25.d.ts.map +1 -0
- package/dist/embeddings.d.ts +3 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/hybrid.d.ts +14 -0
- package/dist/hybrid.d.ts.map +1 -0
- package/dist/index.d.ts +16 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +541 -5
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/providers/cohere-embeddings.d.ts +8 -0
- package/dist/providers/cohere-embeddings.d.ts.map +1 -0
- package/dist/providers/google-embeddings.d.ts +8 -0
- package/dist/providers/google-embeddings.d.ts.map +1 -0
- package/dist/stores/index.d.ts +5 -0
- package/dist/stores/index.d.ts.map +1 -0
- package/dist/stores/pgvector.d.ts +8 -0
- package/dist/stores/pgvector.d.ts.map +1 -0
- package/dist/stores/qdrant.d.ts +9 -0
- package/dist/stores/qdrant.d.ts.map +1 -0
- package/dist/vectorstore.d.ts +3 -0
- package/dist/vectorstore.d.ts.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -660,6 +660,198 @@ const reranked = mmrRerank(queryEmbedding, candidateResults, {
|
|
|
660
660
|
})
|
|
661
661
|
```
|
|
662
662
|
|
|
663
|
+
### `createPgVectorStore`
|
|
664
|
+
|
|
665
|
+
Creates a vector store backed by PostgreSQL with the pgvector extension.
|
|
666
|
+
|
|
667
|
+
```typescript
|
|
668
|
+
function createPgVectorStore(config: {
|
|
669
|
+
connectionString: string
|
|
670
|
+
tableName?: string
|
|
671
|
+
dimensions?: number
|
|
672
|
+
}): VectorStore
|
|
673
|
+
```
|
|
674
|
+
|
|
675
|
+
| Parameter | Type | Default | Description |
|
|
676
|
+
|---|---|---|---|
|
|
677
|
+
| `config.connectionString` | `string` | **(required)** | PostgreSQL connection string. |
|
|
678
|
+
| `config.tableName` | `string` | `'embeddings'` | Table name for storing vectors. |
|
|
679
|
+
| `config.dimensions` | `number` | `1536` | Vector dimensions (must match your embedding model). |
|
|
680
|
+
|
|
681
|
+
**Returns:** A `VectorStore` with `name: 'pgvector'`.
|
|
682
|
+
|
|
683
|
+
```typescript
|
|
684
|
+
import { createPgVectorStore } from '@elsium-ai/rag'
|
|
685
|
+
|
|
686
|
+
const store = createPgVectorStore({
|
|
687
|
+
connectionString: process.env.DATABASE_URL!,
|
|
688
|
+
tableName: 'document_embeddings',
|
|
689
|
+
dimensions: 1536,
|
|
690
|
+
})
|
|
691
|
+
|
|
692
|
+
await store.upsert(embeddedChunks)
|
|
693
|
+
const results = await store.query(queryVector, { topK: 5 })
|
|
694
|
+
```
|
|
695
|
+
|
|
696
|
+
### `createQdrantStore`
|
|
697
|
+
|
|
698
|
+
Creates a vector store backed by the Qdrant REST API.
|
|
699
|
+
|
|
700
|
+
```typescript
|
|
701
|
+
function createQdrantStore(config: {
|
|
702
|
+
url: string
|
|
703
|
+
apiKey?: string
|
|
704
|
+
collectionName: string
|
|
705
|
+
dimensions: number
|
|
706
|
+
}): VectorStore
|
|
707
|
+
```
|
|
708
|
+
|
|
709
|
+
| Parameter | Type | Default | Description |
|
|
710
|
+
|---|---|---|---|
|
|
711
|
+
| `config.url` | `string` | **(required)** | Qdrant server URL. |
|
|
712
|
+
| `config.apiKey` | `string` | `undefined` | Optional API key for authentication. |
|
|
713
|
+
| `config.collectionName` | `string` | **(required)** | Name of the Qdrant collection. |
|
|
714
|
+
| `config.dimensions` | `number` | **(required)** | Vector dimensions. |
|
|
715
|
+
|
|
716
|
+
**Returns:** A `VectorStore` with `name: 'qdrant'`.
|
|
717
|
+
|
|
718
|
+
```typescript
|
|
719
|
+
import { createQdrantStore } from '@elsium-ai/rag'
|
|
720
|
+
|
|
721
|
+
const store = createQdrantStore({
|
|
722
|
+
url: 'http://localhost:6333',
|
|
723
|
+
collectionName: 'documents',
|
|
724
|
+
dimensions: 1536,
|
|
725
|
+
})
|
|
726
|
+
```
|
|
727
|
+
|
|
728
|
+
---
|
|
729
|
+
|
|
730
|
+
## Additional Embedding Providers
|
|
731
|
+
|
|
732
|
+
### `createGoogleEmbeddings`
|
|
733
|
+
|
|
734
|
+
Creates an embedding provider backed by Google's text-embedding-004 model.
|
|
735
|
+
|
|
736
|
+
```typescript
|
|
737
|
+
function createGoogleEmbeddings(config: {
|
|
738
|
+
apiKey: string
|
|
739
|
+
model?: string
|
|
740
|
+
dimensions?: number
|
|
741
|
+
}): EmbeddingProvider
|
|
742
|
+
```
|
|
743
|
+
|
|
744
|
+
| Parameter | Type | Default | Description |
|
|
745
|
+
|---|---|---|---|
|
|
746
|
+
| `config.apiKey` | `string` | **(required)** | Google API key. |
|
|
747
|
+
| `config.model` | `string` | `'text-embedding-004'` | Model name. |
|
|
748
|
+
| `config.dimensions` | `number` | `768` | Embedding dimensions. |
|
|
749
|
+
|
|
750
|
+
**Returns:** An `EmbeddingProvider` with `name: 'google'`.
|
|
751
|
+
|
|
752
|
+
```typescript
|
|
753
|
+
import { createGoogleEmbeddings } from '@elsium-ai/rag'
|
|
754
|
+
|
|
755
|
+
const embeddings = createGoogleEmbeddings({
|
|
756
|
+
apiKey: process.env.GOOGLE_API_KEY!,
|
|
757
|
+
})
|
|
758
|
+
|
|
759
|
+
const vector = await embeddings.embed('Hello, world!')
|
|
760
|
+
```
|
|
761
|
+
|
|
762
|
+
### `createCohereEmbeddings`
|
|
763
|
+
|
|
764
|
+
Creates an embedding provider backed by Cohere's embed-v4.0 model.
|
|
765
|
+
|
|
766
|
+
```typescript
|
|
767
|
+
function createCohereEmbeddings(config: {
|
|
768
|
+
apiKey: string
|
|
769
|
+
model?: string
|
|
770
|
+
inputType?: string
|
|
771
|
+
}): EmbeddingProvider
|
|
772
|
+
```
|
|
773
|
+
|
|
774
|
+
| Parameter | Type | Default | Description |
|
|
775
|
+
|---|---|---|---|
|
|
776
|
+
| `config.apiKey` | `string` | **(required)** | Cohere API key. |
|
|
777
|
+
| `config.model` | `string` | `'embed-v4.0'` | Model name. |
|
|
778
|
+
| `config.inputType` | `string` | `'search_document'` | Input type hint for the model. |
|
|
779
|
+
|
|
780
|
+
**Returns:** An `EmbeddingProvider` with `name: 'cohere'`.
|
|
781
|
+
|
|
782
|
+
```typescript
|
|
783
|
+
import { createCohereEmbeddings } from '@elsium-ai/rag'
|
|
784
|
+
|
|
785
|
+
const embeddings = createCohereEmbeddings({
|
|
786
|
+
apiKey: process.env.COHERE_API_KEY!,
|
|
787
|
+
})
|
|
788
|
+
|
|
789
|
+
const vector = await embeddings.embed('Hello, world!')
|
|
790
|
+
```
|
|
791
|
+
|
|
792
|
+
---
|
|
793
|
+
|
|
794
|
+
## Keyword & Hybrid Search
|
|
795
|
+
|
|
796
|
+
### `createBM25Index`
|
|
797
|
+
|
|
798
|
+
Creates a BM25 keyword search index for term-frequency-based retrieval.
|
|
799
|
+
|
|
800
|
+
```typescript
|
|
801
|
+
function createBM25Index(): {
|
|
802
|
+
add(chunks: Chunk[]): void
|
|
803
|
+
search(query: string, topK?: number): RetrievalResult[]
|
|
804
|
+
clear(): void
|
|
805
|
+
}
|
|
806
|
+
```
|
|
807
|
+
|
|
808
|
+
**Returns:** A BM25 index with `add`, `search`, and `clear` methods.
|
|
809
|
+
|
|
810
|
+
```typescript
|
|
811
|
+
import { createBM25Index } from '@elsium-ai/rag'
|
|
812
|
+
|
|
813
|
+
const bm25 = createBM25Index()
|
|
814
|
+
bm25.add(chunks)
|
|
815
|
+
|
|
816
|
+
const results = bm25.search('machine learning', 5)
|
|
817
|
+
```
|
|
818
|
+
|
|
819
|
+
### `createHybridSearch`
|
|
820
|
+
|
|
821
|
+
Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results.
|
|
822
|
+
|
|
823
|
+
```typescript
|
|
824
|
+
function createHybridSearch(
|
|
825
|
+
vectorStore: VectorStore,
|
|
826
|
+
bm25: ReturnType<typeof createBM25Index>,
|
|
827
|
+
config?: { vectorWeight?: number; bm25Weight?: number; topK?: number },
|
|
828
|
+
): {
|
|
829
|
+
query(embedding: EmbeddingVector, text: string, options?: QueryOptions): Promise<RetrievalResult[]>
|
|
830
|
+
}
|
|
831
|
+
```
|
|
832
|
+
|
|
833
|
+
| Parameter | Type | Default | Description |
|
|
834
|
+
|---|---|---|---|
|
|
835
|
+
| `vectorStore` | `VectorStore` | **(required)** | The vector store for semantic search. |
|
|
836
|
+
| `bm25` | `BM25Index` | **(required)** | The BM25 index for keyword search. |
|
|
837
|
+
| `config.vectorWeight` | `number` | `0.7` | Weight for vector search results in RRF. |
|
|
838
|
+
| `config.bm25Weight` | `number` | `0.3` | Weight for BM25 results in RRF. |
|
|
839
|
+
| `config.topK` | `number` | `5` | Number of results to return. |
|
|
840
|
+
|
|
841
|
+
```typescript
|
|
842
|
+
import { createInMemoryStore, createBM25Index, createHybridSearch } from '@elsium-ai/rag'
|
|
843
|
+
|
|
844
|
+
const vectorStore = createInMemoryStore()
|
|
845
|
+
const bm25 = createBM25Index()
|
|
846
|
+
|
|
847
|
+
const hybrid = createHybridSearch(vectorStore, bm25, {
|
|
848
|
+
vectorWeight: 0.7,
|
|
849
|
+
bm25Weight: 0.3,
|
|
850
|
+
})
|
|
851
|
+
|
|
852
|
+
const results = await hybrid.query(queryEmbedding, 'search query', { topK: 10 })
|
|
853
|
+
```
|
|
854
|
+
|
|
663
855
|
---
|
|
664
856
|
|
|
665
857
|
## Pipeline
|
package/dist/bm25.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { Chunk, RetrievalResult } from './types';
|
|
2
|
+
export interface BM25Index {
|
|
3
|
+
index(chunks: Chunk[]): void;
|
|
4
|
+
search(query: string, topK?: number): RetrievalResult[];
|
|
5
|
+
}
|
|
6
|
+
export declare function createBM25Index(options?: {
|
|
7
|
+
k1?: number;
|
|
8
|
+
b?: number;
|
|
9
|
+
}): BM25Index;
|
|
10
|
+
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../src/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAErD,MAAM,WAAW,SAAS;IACzB,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,IAAI,CAAA;IAC5B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,eAAe,EAAE,CAAA;CACvD;AAgBD,wBAAgB,eAAe,CAAC,OAAO,CAAC,EAAE;IAAE,EAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAwFhF"}
|
package/dist/embeddings.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Registry } from '@elsium-ai/core';
|
|
1
2
|
import type { EmbeddingConfig, EmbeddingVector } from './types';
|
|
2
3
|
export interface EmbeddingProvider {
|
|
3
4
|
readonly name: string;
|
|
@@ -7,5 +8,7 @@ export interface EmbeddingProvider {
|
|
|
7
8
|
}
|
|
8
9
|
export declare function createOpenAIEmbeddings(config: EmbeddingConfig): EmbeddingProvider;
|
|
9
10
|
export declare function createMockEmbeddings(dims?: number): EmbeddingProvider;
|
|
11
|
+
export type EmbeddingProviderFactory = (config: EmbeddingConfig) => EmbeddingProvider;
|
|
12
|
+
export declare const embeddingProviderRegistry: Registry<EmbeddingProviderFactory>;
|
|
10
13
|
export declare function getEmbeddingProvider(config: EmbeddingConfig): EmbeddingProvider;
|
|
11
14
|
//# sourceMappingURL=embeddings.d.ts.map
|
package/dist/embeddings.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE/D,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAA;IAE3B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAA;IAC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACvD;AAID,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAyEjF;AAID,wBAAgB,oBAAoB,CAAC,IAAI,SAAM,GAAG,iBAAiB,CAgClE;AAID,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,
|
|
1
|
+
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../src/embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAA;AAC/C,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE/D,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAA;IAE3B,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAA;IAC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACvD;AAID,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAyEjF;AAID,wBAAgB,oBAAoB,CAAC,IAAI,SAAM,GAAG,iBAAiB,CAgClE;AAID,MAAM,MAAM,wBAAwB,GAAG,CAAC,MAAM,EAAE,eAAe,KAAK,iBAAiB,CAAA;AAErF,eAAO,MAAM,yBAAyB,EAAE,QAAQ,CAAC,wBAAwB,CACX,CAAA;AAI9D,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAiB/E"}
|
package/dist/hybrid.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { BM25Index } from './bm25';
|
|
2
|
+
import type { EmbeddingVector, RetrievalResult } from './types';
|
|
3
|
+
import type { VectorStore } from './vectorstore';
|
|
4
|
+
export interface HybridSearchConfig {
|
|
5
|
+
k?: number;
|
|
6
|
+
vectorWeight?: number;
|
|
7
|
+
bm25Weight?: number;
|
|
8
|
+
topK?: number;
|
|
9
|
+
}
|
|
10
|
+
export interface HybridSearch {
|
|
11
|
+
search(query: string, queryEmbedding: EmbeddingVector, topK?: number): Promise<RetrievalResult[]>;
|
|
12
|
+
}
|
|
13
|
+
export declare function createHybridSearch(vectorStore: VectorStore, bm25Index: BM25Index, config?: HybridSearchConfig): HybridSearch;
|
|
14
|
+
//# sourceMappingURL=hybrid.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../src/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AACvC,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAC/D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAEhD,MAAM,WAAW,kBAAkB;IAClC,CAAC,CAAC,EAAE,MAAM,CAAA;IACV,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,eAAe,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACjG;AAsCD,wBAAgB,kBAAkB,CACjC,WAAW,EAAE,WAAW,EACxB,SAAS,EAAE,SAAS,EACpB,MAAM,CAAC,EAAE,kBAAkB,GACzB,YAAY,CAwBd"}
|
package/dist/index.d.ts
CHANGED
|
@@ -3,10 +3,22 @@ export { textLoader, markdownLoader, htmlLoader, jsonLoader, csvLoader, getLoade
|
|
|
3
3
|
export type { DocumentLoader } from './loaders';
|
|
4
4
|
export { fixedSizeChunker, recursiveChunker, sentenceChunker, getChunker, } from './chunkers';
|
|
5
5
|
export type { Chunker } from './chunkers';
|
|
6
|
-
export { createOpenAIEmbeddings, createMockEmbeddings, getEmbeddingProvider, } from './embeddings';
|
|
7
|
-
export type { EmbeddingProvider } from './embeddings';
|
|
8
|
-
export { createInMemoryStore, cosineSimilarity, mmrRerank, } from './vectorstore';
|
|
9
|
-
export type { VectorStore } from './vectorstore';
|
|
6
|
+
export { createOpenAIEmbeddings, createMockEmbeddings, getEmbeddingProvider, embeddingProviderRegistry, } from './embeddings';
|
|
7
|
+
export type { EmbeddingProvider, EmbeddingProviderFactory } from './embeddings';
|
|
8
|
+
export { createInMemoryStore, cosineSimilarity, mmrRerank, vectorStoreRegistry, } from './vectorstore';
|
|
9
|
+
export type { VectorStore, VectorStoreFactory } from './vectorstore';
|
|
10
10
|
export { rag } from './pipeline';
|
|
11
11
|
export type { RAGPipeline, RAGPipelineConfig, IngestResult } from './pipeline';
|
|
12
|
+
export { createPgVectorStore } from './stores/index';
|
|
13
|
+
export type { PgVectorStoreConfig } from './stores/index';
|
|
14
|
+
export { createQdrantStore } from './stores/index';
|
|
15
|
+
export type { QdrantStoreConfig } from './stores/index';
|
|
16
|
+
export { createGoogleEmbeddings } from './providers/google-embeddings';
|
|
17
|
+
export type { GoogleEmbeddingsConfig } from './providers/google-embeddings';
|
|
18
|
+
export { createCohereEmbeddings } from './providers/cohere-embeddings';
|
|
19
|
+
export type { CohereEmbeddingsConfig } from './providers/cohere-embeddings';
|
|
20
|
+
export { createBM25Index } from './bm25';
|
|
21
|
+
export type { BM25Index } from './bm25';
|
|
22
|
+
export { createHybridSearch } from './hybrid';
|
|
23
|
+
export type { HybridSearch, HybridSearchConfig } from './hybrid';
|
|
12
24
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACX,QAAQ,EACR,gBAAgB,EAChB,KAAK,EACL,aAAa,EACb,eAAe,EACf,aAAa,EACb,eAAe,EACf,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,eAAe,GACf,MAAM,SAAS,CAAA;AAGhB,OAAO,EACN,UAAU,EACV,cAAc,EACd,UAAU,EACV,UAAU,EACV,SAAS,EACT,SAAS,GACT,MAAM,WAAW,CAAA;AAClB,YAAY,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAG/C,OAAO,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,UAAU,GACV,MAAM,YAAY,CAAA;AACnB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAGzC,OAAO,EACN,sBAAsB,EACtB,oBAAoB,EACpB,oBAAoB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACX,QAAQ,EACR,gBAAgB,EAChB,KAAK,EACL,aAAa,EACb,eAAe,EACf,aAAa,EACb,eAAe,EACf,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,eAAe,GACf,MAAM,SAAS,CAAA;AAGhB,OAAO,EACN,UAAU,EACV,cAAc,EACd,UAAU,EACV,UAAU,EACV,SAAS,EACT,SAAS,GACT,MAAM,WAAW,CAAA;AAClB,YAAY,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAG/C,OAAO,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,UAAU,GACV,MAAM,YAAY,CAAA;AACnB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAGzC,OAAO,EACN,sBAAsB,EACtB,oBAAoB,EACpB,oBAAoB,EACpB,yBAAyB,GACzB,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,cAAc,CAAA;AAG/E,OAAO,EACN,mBAAmB,EACnB,gBAAgB,EAChB,SAAS,EACT,mBAAmB,GACnB,MAAM,eAAe,CAAA;AACtB,YAAY,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AAGpE,OAAO,EAAE,GAAG,EAAE,MAAM,YAAY,CAAA;AAChC,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAG9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACpD,YAAY,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAClD,YAAY,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAGvD,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AACtE,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAC3E,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AACtE,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAA;AACxC,YAAY,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAGvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AAC7C,YAAY,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA"}
|
package/dist/index.js
CHANGED
|
@@ -97,6 +97,89 @@ function generateId(prefix = "els") {
|
|
|
97
97
|
const random = cryptoHex(4);
|
|
98
98
|
return `${prefix}_${timestamp}_${random}`;
|
|
99
99
|
}
|
|
100
|
+
// ../core/src/logger.ts
|
|
101
|
+
var LOG_LEVELS = {
|
|
102
|
+
debug: 0,
|
|
103
|
+
info: 1,
|
|
104
|
+
warn: 2,
|
|
105
|
+
error: 3
|
|
106
|
+
};
|
|
107
|
+
function createLogger(options = {}) {
|
|
108
|
+
const { level = "info", pretty = false, context = {} } = options;
|
|
109
|
+
const minLevel = LOG_LEVELS[level];
|
|
110
|
+
function log(logLevel, message, data) {
|
|
111
|
+
if (LOG_LEVELS[logLevel] < minLevel)
|
|
112
|
+
return;
|
|
113
|
+
const entry = {
|
|
114
|
+
...context,
|
|
115
|
+
level: logLevel,
|
|
116
|
+
message,
|
|
117
|
+
timestamp: new Date().toISOString(),
|
|
118
|
+
...data ? { data } : {}
|
|
119
|
+
};
|
|
120
|
+
const output = pretty ? JSON.stringify(entry, null, 2) : JSON.stringify(entry);
|
|
121
|
+
if (logLevel === "error") {
|
|
122
|
+
console.error(output);
|
|
123
|
+
} else if (logLevel === "warn") {
|
|
124
|
+
console.warn(output);
|
|
125
|
+
} else {
|
|
126
|
+
console.log(output);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return {
|
|
130
|
+
debug: (msg, data) => log("debug", msg, data),
|
|
131
|
+
info: (msg, data) => log("info", msg, data),
|
|
132
|
+
warn: (msg, data) => log("warn", msg, data),
|
|
133
|
+
error: (msg, data) => log("error", msg, data),
|
|
134
|
+
child(childContext) {
|
|
135
|
+
return createLogger({
|
|
136
|
+
level,
|
|
137
|
+
pretty,
|
|
138
|
+
context: { ...context, ...childContext }
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
// ../core/src/schema.ts
|
|
144
|
+
var log = createLogger();
|
|
145
|
+
// ../core/src/registry.ts
|
|
146
|
+
var log2 = createLogger();
|
|
147
|
+
var BLOCKED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
|
|
148
|
+
function createRegistry(label) {
|
|
149
|
+
const entries = new Map;
|
|
150
|
+
return {
|
|
151
|
+
register(name, factory) {
|
|
152
|
+
if (BLOCKED_KEYS.has(name)) {
|
|
153
|
+
log2.warn(`Registry(${label}): rejected blocked key "${name}"`);
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
entries.set(name, factory);
|
|
157
|
+
log2.debug(`Registry(${label}): registered "${name}"`);
|
|
158
|
+
},
|
|
159
|
+
get(name) {
|
|
160
|
+
if (BLOCKED_KEYS.has(name))
|
|
161
|
+
return;
|
|
162
|
+
return entries.get(name);
|
|
163
|
+
},
|
|
164
|
+
list() {
|
|
165
|
+
return Array.from(entries.keys());
|
|
166
|
+
},
|
|
167
|
+
has(name) {
|
|
168
|
+
if (BLOCKED_KEYS.has(name))
|
|
169
|
+
return false;
|
|
170
|
+
return entries.has(name);
|
|
171
|
+
},
|
|
172
|
+
unregister(name) {
|
|
173
|
+
if (BLOCKED_KEYS.has(name))
|
|
174
|
+
return false;
|
|
175
|
+
const deleted = entries.delete(name);
|
|
176
|
+
if (deleted) {
|
|
177
|
+
log2.debug(`Registry(${label}): unregistered "${name}"`);
|
|
178
|
+
}
|
|
179
|
+
return deleted;
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
}
|
|
100
183
|
// src/loaders.ts
|
|
101
184
|
function createDocument(content, metadata) {
|
|
102
185
|
return {
|
|
@@ -549,7 +632,11 @@ function createMockEmbeddings(dims = 128) {
|
|
|
549
632
|
}
|
|
550
633
|
};
|
|
551
634
|
}
|
|
635
|
+
var embeddingProviderRegistry = createRegistry("embeddingProvider");
|
|
552
636
|
function getEmbeddingProvider(config) {
|
|
637
|
+
const registered = embeddingProviderRegistry.get(config.provider);
|
|
638
|
+
if (registered)
|
|
639
|
+
return registered(config);
|
|
553
640
|
switch (config.provider) {
|
|
554
641
|
case "openai":
|
|
555
642
|
return createOpenAIEmbeddings(config);
|
|
@@ -558,12 +645,13 @@ function getEmbeddingProvider(config) {
|
|
|
558
645
|
default:
|
|
559
646
|
throw new ElsiumError({
|
|
560
647
|
code: "CONFIG_ERROR",
|
|
561
|
-
message: `Unknown embedding provider: ${config.provider}`,
|
|
648
|
+
message: `Unknown embedding provider: ${config.provider}. Available: openai, mock${embeddingProviderRegistry.list().length ? `, ${embeddingProviderRegistry.list().join(", ")}` : ""}`,
|
|
562
649
|
retryable: false
|
|
563
650
|
});
|
|
564
651
|
}
|
|
565
652
|
}
|
|
566
653
|
// src/vectorstore.ts
|
|
654
|
+
var vectorStoreRegistry = createRegistry("vectorStore");
|
|
567
655
|
function cosineSimilarity(a, b) {
|
|
568
656
|
if (a.length !== b.length)
|
|
569
657
|
return 0;
|
|
@@ -692,13 +780,19 @@ function rag(config) {
|
|
|
692
780
|
minScore: 0,
|
|
693
781
|
strategy: "similarity"
|
|
694
782
|
};
|
|
695
|
-
if (config.store) {
|
|
696
|
-
throw new Error("External vector store not yet implemented. Use in-memory store.");
|
|
697
|
-
}
|
|
698
783
|
const loader = getLoader(loaderType);
|
|
699
784
|
const chunker = getChunker(chunkingConfig);
|
|
700
785
|
const embeddingProvider = getEmbeddingProvider(config.embeddings);
|
|
701
|
-
|
|
786
|
+
let vectorStore;
|
|
787
|
+
if (config.store) {
|
|
788
|
+
const factory = vectorStoreRegistry.get(config.store.provider);
|
|
789
|
+
if (!factory) {
|
|
790
|
+
throw new Error(`Unknown vector store provider: ${config.store.provider}. Register it with vectorStoreRegistry.register().`);
|
|
791
|
+
}
|
|
792
|
+
vectorStore = factory(config.store);
|
|
793
|
+
} else {
|
|
794
|
+
vectorStore = createInMemoryStore();
|
|
795
|
+
}
|
|
702
796
|
async function embedChunks(chunks) {
|
|
703
797
|
const texts = chunks.map((c) => c.content);
|
|
704
798
|
const embeddings = await embeddingProvider.embedBatch(texts);
|
|
@@ -744,7 +838,442 @@ function rag(config) {
|
|
|
744
838
|
}
|
|
745
839
|
};
|
|
746
840
|
}
|
|
841
|
+
// src/stores/pgvector.ts
|
|
842
|
+
import { createRequire } from "node:module";
|
|
843
|
+
var require2 = createRequire(import.meta.url);
|
|
844
|
+
var log3 = createLogger();
|
|
845
|
+
var BLOCKED_KEYS2 = new Set(["__proto__", "constructor", "prototype"]);
|
|
846
|
+
var TABLE_NAME_PATTERN = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
847
|
+
function createPgVectorStore(config) {
|
|
848
|
+
const { connectionString, tableName = "vector_chunks", dimensions = 1536 } = config;
|
|
849
|
+
if (BLOCKED_KEYS2.has(tableName)) {
|
|
850
|
+
throw new Error(`Invalid table name: ${tableName}`);
|
|
851
|
+
}
|
|
852
|
+
if (!TABLE_NAME_PATTERN.test(tableName)) {
|
|
853
|
+
throw new Error(`Invalid table name format: ${tableName}`);
|
|
854
|
+
}
|
|
855
|
+
let client = null;
|
|
856
|
+
let initialized = false;
|
|
857
|
+
async function getClient() {
|
|
858
|
+
if (client)
|
|
859
|
+
return client;
|
|
860
|
+
try {
|
|
861
|
+
const pg = require2("pg");
|
|
862
|
+
client = new pg.Client({ connectionString });
|
|
863
|
+
await client.connect();
|
|
864
|
+
if (!initialized) {
|
|
865
|
+
await client.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
866
|
+
await client.query(`
|
|
867
|
+
CREATE TABLE IF NOT EXISTS ${tableName} (
|
|
868
|
+
id TEXT PRIMARY KEY,
|
|
869
|
+
content TEXT NOT NULL,
|
|
870
|
+
document_id TEXT NOT NULL,
|
|
871
|
+
chunk_index INTEGER NOT NULL,
|
|
872
|
+
metadata JSONB DEFAULT '{}',
|
|
873
|
+
embedding vector(${dimensions})
|
|
874
|
+
)
|
|
875
|
+
`);
|
|
876
|
+
initialized = true;
|
|
877
|
+
}
|
|
878
|
+
return client;
|
|
879
|
+
} catch (err2) {
|
|
880
|
+
log3.error("Failed to initialize PgVector store", {
|
|
881
|
+
error: err2 instanceof Error ? err2.message : String(err2)
|
|
882
|
+
});
|
|
883
|
+
throw new Error("pg is required for PgVector store. Install it as a dependency.");
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
return {
|
|
887
|
+
name: "pgvector",
|
|
888
|
+
async upsert(chunks) {
|
|
889
|
+
const pg = await getClient();
|
|
890
|
+
for (const chunk of chunks) {
|
|
891
|
+
if (BLOCKED_KEYS2.has(chunk.id))
|
|
892
|
+
continue;
|
|
893
|
+
const embedding = `[${chunk.embedding.values.join(",")}]`;
|
|
894
|
+
await pg.query(`INSERT INTO ${tableName} (id, content, document_id, chunk_index, metadata, embedding)
|
|
895
|
+
VALUES ($1, $2, $3, $4, $5, $6)
|
|
896
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
897
|
+
content = EXCLUDED.content,
|
|
898
|
+
document_id = EXCLUDED.document_id,
|
|
899
|
+
chunk_index = EXCLUDED.chunk_index,
|
|
900
|
+
metadata = EXCLUDED.metadata,
|
|
901
|
+
embedding = EXCLUDED.embedding`, [
|
|
902
|
+
chunk.id,
|
|
903
|
+
chunk.content,
|
|
904
|
+
chunk.documentId,
|
|
905
|
+
chunk.index,
|
|
906
|
+
JSON.stringify(chunk.metadata),
|
|
907
|
+
embedding
|
|
908
|
+
]);
|
|
909
|
+
}
|
|
910
|
+
},
|
|
911
|
+
async query(embedding, options) {
|
|
912
|
+
const pg = await getClient();
|
|
913
|
+
const topK = options?.topK ?? 5;
|
|
914
|
+
const minScore = options?.minScore ?? 0;
|
|
915
|
+
const embeddingStr = `[${embedding.values.join(",")}]`;
|
|
916
|
+
const result = await pg.query(`SELECT id, content, document_id, chunk_index, metadata,
|
|
917
|
+
1 - (embedding <=> $1::vector) as score
|
|
918
|
+
FROM ${tableName}
|
|
919
|
+
WHERE 1 - (embedding <=> $1::vector) >= $2
|
|
920
|
+
ORDER BY embedding <=> $1::vector
|
|
921
|
+
LIMIT $3`, [embeddingStr, minScore, topK]);
|
|
922
|
+
return result.rows.map((row) => ({
|
|
923
|
+
chunk: {
|
|
924
|
+
id: row.id,
|
|
925
|
+
content: row.content,
|
|
926
|
+
documentId: row.document_id,
|
|
927
|
+
index: row.chunk_index,
|
|
928
|
+
metadata: {
|
|
929
|
+
startChar: 0,
|
|
930
|
+
endChar: 0,
|
|
931
|
+
tokenEstimate: 0,
|
|
932
|
+
...row.metadata ?? {}
|
|
933
|
+
}
|
|
934
|
+
},
|
|
935
|
+
score: row.score,
|
|
936
|
+
distance: 1 - row.score
|
|
937
|
+
}));
|
|
938
|
+
},
|
|
939
|
+
async delete(ids) {
|
|
940
|
+
const pg = await getClient();
|
|
941
|
+
const filtered = ids.filter((id) => !BLOCKED_KEYS2.has(id));
|
|
942
|
+
if (filtered.length === 0)
|
|
943
|
+
return;
|
|
944
|
+
const placeholders = filtered.map((_, i) => `$${i + 1}`).join(", ");
|
|
945
|
+
await pg.query(`DELETE FROM ${tableName} WHERE id IN (${placeholders})`, filtered);
|
|
946
|
+
},
|
|
947
|
+
async clear() {
|
|
948
|
+
const pg = await getClient();
|
|
949
|
+
await pg.query(`DELETE FROM ${tableName}`);
|
|
950
|
+
},
|
|
951
|
+
async count() {
|
|
952
|
+
const pg = await getClient();
|
|
953
|
+
const result = await pg.query(`SELECT COUNT(*)::int as count FROM ${tableName}`);
|
|
954
|
+
return result.rows[0]?.count ?? 0;
|
|
955
|
+
}
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
// src/stores/qdrant.ts
|
|
959
|
+
function createQdrantStore(config) {
|
|
960
|
+
const { url, apiKey, collectionName, dimensions } = config;
|
|
961
|
+
const headers = {
|
|
962
|
+
"Content-Type": "application/json"
|
|
963
|
+
};
|
|
964
|
+
if (apiKey) {
|
|
965
|
+
headers["api-key"] = apiKey;
|
|
966
|
+
}
|
|
967
|
+
async function request(method, path, body) {
|
|
968
|
+
const response = await fetch(`${url}${path}`, {
|
|
969
|
+
method,
|
|
970
|
+
headers,
|
|
971
|
+
...body ? { body: JSON.stringify(body) } : {}
|
|
972
|
+
});
|
|
973
|
+
if (!response.ok) {
|
|
974
|
+
const text = await response.text().catch(() => "Unknown error");
|
|
975
|
+
throw ElsiumError.providerError(`Qdrant error ${response.status}: ${text}`, {
|
|
976
|
+
provider: "qdrant",
|
|
977
|
+
statusCode: response.status,
|
|
978
|
+
retryable: response.status >= 500
|
|
979
|
+
});
|
|
980
|
+
}
|
|
981
|
+
if (response.status === 204)
|
|
982
|
+
return null;
|
|
983
|
+
return response.json();
|
|
984
|
+
}
|
|
985
|
+
return {
|
|
986
|
+
name: "qdrant",
|
|
987
|
+
async upsert(chunks) {
|
|
988
|
+
const points = chunks.map((chunk) => ({
|
|
989
|
+
id: chunk.id,
|
|
990
|
+
vector: chunk.embedding.values,
|
|
991
|
+
payload: {
|
|
992
|
+
content: chunk.content,
|
|
993
|
+
documentId: chunk.documentId,
|
|
994
|
+
index: chunk.index,
|
|
995
|
+
metadata: chunk.metadata
|
|
996
|
+
}
|
|
997
|
+
}));
|
|
998
|
+
await request("PUT", `/collections/${collectionName}/points`, {
|
|
999
|
+
points
|
|
1000
|
+
});
|
|
1001
|
+
},
|
|
1002
|
+
async query(embedding, options) {
|
|
1003
|
+
const topK = options?.topK ?? 5;
|
|
1004
|
+
const minScore = options?.minScore ?? 0;
|
|
1005
|
+
const result = await request("POST", `/collections/${collectionName}/points/search`, {
|
|
1006
|
+
vector: embedding.values,
|
|
1007
|
+
limit: topK,
|
|
1008
|
+
score_threshold: minScore,
|
|
1009
|
+
with_payload: true
|
|
1010
|
+
});
|
|
1011
|
+
return (result.result ?? []).map((hit) => ({
|
|
1012
|
+
chunk: {
|
|
1013
|
+
id: String(hit.id),
|
|
1014
|
+
content: hit.payload.content,
|
|
1015
|
+
documentId: hit.payload.documentId,
|
|
1016
|
+
index: hit.payload.index,
|
|
1017
|
+
metadata: hit.payload.metadata
|
|
1018
|
+
},
|
|
1019
|
+
score: hit.score,
|
|
1020
|
+
distance: 1 - hit.score
|
|
1021
|
+
}));
|
|
1022
|
+
},
|
|
1023
|
+
async delete(ids) {
|
|
1024
|
+
await request("POST", `/collections/${collectionName}/points/delete`, {
|
|
1025
|
+
points: ids
|
|
1026
|
+
});
|
|
1027
|
+
},
|
|
1028
|
+
async clear() {
|
|
1029
|
+
try {
|
|
1030
|
+
await request("DELETE", `/collections/${collectionName}`);
|
|
1031
|
+
} catch {}
|
|
1032
|
+
await request("PUT", `/collections/${collectionName}`, {
|
|
1033
|
+
vectors: { size: dimensions, distance: "Cosine" }
|
|
1034
|
+
});
|
|
1035
|
+
},
|
|
1036
|
+
async count() {
|
|
1037
|
+
const result = await request("GET", `/collections/${collectionName}`);
|
|
1038
|
+
return result.result?.points_count ?? 0;
|
|
1039
|
+
}
|
|
1040
|
+
};
|
|
1041
|
+
}
|
|
1042
|
+
vectorStoreRegistry.register("qdrant", (config) => createQdrantStore(config));
|
|
1043
|
+
// src/providers/google-embeddings.ts
|
|
1044
|
+
function createGoogleEmbeddings(config) {
|
|
1045
|
+
const { apiKey, model = "text-embedding-004", dimensions = 768 } = config;
|
|
1046
|
+
if (!apiKey) {
|
|
1047
|
+
throw new ElsiumError({
|
|
1048
|
+
code: "CONFIG_ERROR",
|
|
1049
|
+
message: "Google API key is required for embeddings",
|
|
1050
|
+
retryable: false
|
|
1051
|
+
});
|
|
1052
|
+
}
|
|
1053
|
+
const baseUrl = "https://generativelanguage.googleapis.com/v1beta";
|
|
1054
|
+
async function callAPI(texts) {
|
|
1055
|
+
const results = [];
|
|
1056
|
+
for (const text of texts) {
|
|
1057
|
+
const url = `${baseUrl}/models/${model}:embedContent?key=${apiKey}`;
|
|
1058
|
+
const response = await fetch(url, {
|
|
1059
|
+
method: "POST",
|
|
1060
|
+
headers: { "Content-Type": "application/json" },
|
|
1061
|
+
body: JSON.stringify({
|
|
1062
|
+
model: `models/${model}`,
|
|
1063
|
+
content: { parts: [{ text }] },
|
|
1064
|
+
...dimensions ? { outputDimensionality: dimensions } : {}
|
|
1065
|
+
})
|
|
1066
|
+
});
|
|
1067
|
+
if (!response.ok) {
|
|
1068
|
+
const body = await response.text().catch(() => "Unknown error");
|
|
1069
|
+
throw ElsiumError.providerError(`Google embeddings error ${response.status}: ${body}`, {
|
|
1070
|
+
provider: "google",
|
|
1071
|
+
statusCode: response.status,
|
|
1072
|
+
retryable: response.status >= 500
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
const json = await response.json();
|
|
1076
|
+
results.push(json.embedding.values);
|
|
1077
|
+
}
|
|
1078
|
+
return results;
|
|
1079
|
+
}
|
|
1080
|
+
return {
|
|
1081
|
+
name: "google",
|
|
1082
|
+
dimensions,
|
|
1083
|
+
async embed(text) {
|
|
1084
|
+
const [embedding] = await callAPI([text]);
|
|
1085
|
+
return { values: embedding, dimensions: embedding.length };
|
|
1086
|
+
},
|
|
1087
|
+
async embedBatch(texts) {
|
|
1088
|
+
const embeddings = await callAPI(texts);
|
|
1089
|
+
return embeddings.map((values) => ({
|
|
1090
|
+
values,
|
|
1091
|
+
dimensions: values.length
|
|
1092
|
+
}));
|
|
1093
|
+
}
|
|
1094
|
+
};
|
|
1095
|
+
}
|
|
1096
|
+
embeddingProviderRegistry.register("google", (config) => createGoogleEmbeddings({
|
|
1097
|
+
apiKey: config.apiKey ?? "",
|
|
1098
|
+
model: config.model,
|
|
1099
|
+
dimensions: config.dimensions
|
|
1100
|
+
}));
|
|
1101
|
+
// src/providers/cohere-embeddings.ts
|
|
1102
|
+
function createCohereEmbeddings(config) {
|
|
1103
|
+
const { apiKey, model = "embed-v4.0", inputType = "search_document" } = config;
|
|
1104
|
+
if (!apiKey) {
|
|
1105
|
+
throw new ElsiumError({
|
|
1106
|
+
code: "CONFIG_ERROR",
|
|
1107
|
+
message: "Cohere API key is required for embeddings",
|
|
1108
|
+
retryable: false
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
async function callAPI(texts) {
|
|
1112
|
+
const response = await fetch("https://api.cohere.com/v2/embed", {
|
|
1113
|
+
method: "POST",
|
|
1114
|
+
headers: {
|
|
1115
|
+
"Content-Type": "application/json",
|
|
1116
|
+
Authorization: `Bearer ${apiKey}`
|
|
1117
|
+
},
|
|
1118
|
+
body: JSON.stringify({
|
|
1119
|
+
texts,
|
|
1120
|
+
model,
|
|
1121
|
+
input_type: inputType,
|
|
1122
|
+
embedding_types: ["float"]
|
|
1123
|
+
})
|
|
1124
|
+
});
|
|
1125
|
+
if (!response.ok) {
|
|
1126
|
+
const body = await response.text().catch(() => "Unknown error");
|
|
1127
|
+
throw ElsiumError.providerError(`Cohere embeddings error ${response.status}: ${body}`, {
|
|
1128
|
+
provider: "cohere",
|
|
1129
|
+
statusCode: response.status,
|
|
1130
|
+
retryable: response.status >= 500
|
|
1131
|
+
});
|
|
1132
|
+
}
|
|
1133
|
+
const json = await response.json();
|
|
1134
|
+
return json.embeddings.float;
|
|
1135
|
+
}
|
|
1136
|
+
return {
|
|
1137
|
+
name: "cohere",
|
|
1138
|
+
dimensions: 1024,
|
|
1139
|
+
async embed(text) {
|
|
1140
|
+
const [embedding] = await callAPI([text]);
|
|
1141
|
+
return { values: embedding, dimensions: embedding.length };
|
|
1142
|
+
},
|
|
1143
|
+
async embedBatch(texts) {
|
|
1144
|
+
const embeddings = await callAPI(texts);
|
|
1145
|
+
return embeddings.map((values) => ({
|
|
1146
|
+
values,
|
|
1147
|
+
dimensions: values.length
|
|
1148
|
+
}));
|
|
1149
|
+
}
|
|
1150
|
+
};
|
|
1151
|
+
}
|
|
1152
|
+
embeddingProviderRegistry.register("cohere", (config) => createCohereEmbeddings({
|
|
1153
|
+
apiKey: config.apiKey ?? "",
|
|
1154
|
+
model: config.model
|
|
1155
|
+
}));
|
|
1156
|
+
// src/bm25.ts
|
|
1157
|
+
function tokenize(text) {
|
|
1158
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 0);
|
|
1159
|
+
}
|
|
1160
|
+
function createBM25Index(options) {
|
|
1161
|
+
const k1 = options?.k1 ?? 1.2;
|
|
1162
|
+
const b = options?.b ?? 0.75;
|
|
1163
|
+
const docs = [];
|
|
1164
|
+
const docFreqs = new Map;
|
|
1165
|
+
let avgDocLength = 0;
|
|
1166
|
+
function addDoc(chunk) {
|
|
1167
|
+
const tokens = tokenize(chunk.content);
|
|
1168
|
+
const termFreqs = new Map;
|
|
1169
|
+
for (const token of tokens) {
|
|
1170
|
+
termFreqs.set(token, (termFreqs.get(token) ?? 0) + 1);
|
|
1171
|
+
}
|
|
1172
|
+
for (const term of termFreqs.keys()) {
|
|
1173
|
+
docFreqs.set(term, (docFreqs.get(term) ?? 0) + 1);
|
|
1174
|
+
}
|
|
1175
|
+
docs.push({ chunk, termFreqs, length: tokens.length });
|
|
1176
|
+
}
|
|
1177
|
+
function recalcAvgLength() {
|
|
1178
|
+
if (docs.length === 0) {
|
|
1179
|
+
avgDocLength = 0;
|
|
1180
|
+
return;
|
|
1181
|
+
}
|
|
1182
|
+
avgDocLength = docs.reduce((sum, d) => sum + d.length, 0) / docs.length;
|
|
1183
|
+
}
|
|
1184
|
+
function idf(term) {
|
|
1185
|
+
const df = docFreqs.get(term) ?? 0;
|
|
1186
|
+
const n = docs.length;
|
|
1187
|
+
if (df === 0)
|
|
1188
|
+
return 0;
|
|
1189
|
+
return Math.log((n - df + 0.5) / (df + 0.5) + 1);
|
|
1190
|
+
}
|
|
1191
|
+
function scoreSingle(doc, queryTerms) {
|
|
1192
|
+
let score = 0;
|
|
1193
|
+
for (const term of queryTerms) {
|
|
1194
|
+
const tf = doc.termFreqs.get(term) ?? 0;
|
|
1195
|
+
if (tf === 0)
|
|
1196
|
+
continue;
|
|
1197
|
+
const termIdf = idf(term);
|
|
1198
|
+
const numerator = tf * (k1 + 1);
|
|
1199
|
+
const denominator = tf + k1 * (1 - b + b * (doc.length / avgDocLength));
|
|
1200
|
+
score += termIdf * (numerator / denominator);
|
|
1201
|
+
}
|
|
1202
|
+
return score;
|
|
1203
|
+
}
|
|
1204
|
+
return {
|
|
1205
|
+
index(chunks) {
|
|
1206
|
+
for (const chunk of chunks) {
|
|
1207
|
+
addDoc(chunk);
|
|
1208
|
+
}
|
|
1209
|
+
recalcAvgLength();
|
|
1210
|
+
},
|
|
1211
|
+
search(query, topK = 5) {
|
|
1212
|
+
if (docs.length === 0)
|
|
1213
|
+
return [];
|
|
1214
|
+
const queryTerms = tokenize(query);
|
|
1215
|
+
if (queryTerms.length === 0)
|
|
1216
|
+
return [];
|
|
1217
|
+
const scored = [];
|
|
1218
|
+
for (const doc of docs) {
|
|
1219
|
+
const score = scoreSingle(doc, queryTerms);
|
|
1220
|
+
if (score > 0) {
|
|
1221
|
+
scored.push({ chunk: doc.chunk, score });
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
scored.sort((a, b2) => b2.score - a.score);
|
|
1225
|
+
return scored.slice(0, topK).map((s) => ({
|
|
1226
|
+
chunk: s.chunk,
|
|
1227
|
+
score: s.score,
|
|
1228
|
+
distance: 0
|
|
1229
|
+
}));
|
|
1230
|
+
}
|
|
1231
|
+
};
|
|
1232
|
+
}
|
|
1233
|
+
// src/hybrid.ts
|
|
1234
|
+
function reciprocalRankFusion(vectorResults, bm25Results, k, vectorWeight, bm25Weight) {
|
|
1235
|
+
const scores = new Map;
|
|
1236
|
+
for (let i = 0;i < vectorResults.length; i++) {
|
|
1237
|
+
const result = vectorResults[i];
|
|
1238
|
+
const rrfScore = vectorWeight / (k + i + 1);
|
|
1239
|
+
const existing = scores.get(result.chunk.id);
|
|
1240
|
+
if (existing) {
|
|
1241
|
+
existing.score += rrfScore;
|
|
1242
|
+
} else {
|
|
1243
|
+
scores.set(result.chunk.id, { score: rrfScore, chunk: result.chunk });
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
for (let i = 0;i < bm25Results.length; i++) {
|
|
1247
|
+
const result = bm25Results[i];
|
|
1248
|
+
const rrfScore = bm25Weight / (k + i + 1);
|
|
1249
|
+
const existing = scores.get(result.chunk.id);
|
|
1250
|
+
if (existing) {
|
|
1251
|
+
existing.score += rrfScore;
|
|
1252
|
+
} else {
|
|
1253
|
+
scores.set(result.chunk.id, { score: rrfScore, chunk: result.chunk });
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
return Array.from(scores.values()).sort((a, b) => b.score - a.score).map(({ score, chunk }) => ({ chunk, score, distance: 0 }));
|
|
1257
|
+
}
|
|
1258
|
+
function createHybridSearch(vectorStore, bm25Index, config) {
|
|
1259
|
+
const k = config?.k ?? 60;
|
|
1260
|
+
const vectorWeight = config?.vectorWeight ?? 1;
|
|
1261
|
+
const bm25Weight = config?.bm25Weight ?? 1;
|
|
1262
|
+
const defaultTopK = config?.topK ?? 10;
|
|
1263
|
+
return {
|
|
1264
|
+
async search(query, queryEmbedding, topK) {
|
|
1265
|
+
const limit = topK ?? defaultTopK;
|
|
1266
|
+
const [vectorResults, bm25Results] = await Promise.all([
|
|
1267
|
+
vectorStore.query(queryEmbedding, { topK: limit }),
|
|
1268
|
+
Promise.resolve(bm25Index.search(query, limit))
|
|
1269
|
+
]);
|
|
1270
|
+
const fused = reciprocalRankFusion(vectorResults, bm25Results, k, vectorWeight, bm25Weight);
|
|
1271
|
+
return fused.slice(0, limit);
|
|
1272
|
+
}
|
|
1273
|
+
};
|
|
1274
|
+
}
|
|
747
1275
|
export {
|
|
1276
|
+
vectorStoreRegistry,
|
|
748
1277
|
textLoader,
|
|
749
1278
|
sentenceChunker,
|
|
750
1279
|
recursiveChunker,
|
|
@@ -757,9 +1286,16 @@ export {
|
|
|
757
1286
|
getEmbeddingProvider,
|
|
758
1287
|
getChunker,
|
|
759
1288
|
fixedSizeChunker,
|
|
1289
|
+
embeddingProviderRegistry,
|
|
760
1290
|
csvLoader,
|
|
1291
|
+
createQdrantStore,
|
|
1292
|
+
createPgVectorStore,
|
|
761
1293
|
createOpenAIEmbeddings,
|
|
762
1294
|
createMockEmbeddings,
|
|
763
1295
|
createInMemoryStore,
|
|
1296
|
+
createHybridSearch,
|
|
1297
|
+
createGoogleEmbeddings,
|
|
1298
|
+
createCohereEmbeddings,
|
|
1299
|
+
createBM25Index,
|
|
764
1300
|
cosineSimilarity
|
|
765
1301
|
};
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,iBAAiB,EAAwB,MAAM,cAAc,CAAA;AAE3E,OAAO,KAAK,EAEX,cAAc,EACd,QAAQ,EAER,eAAe,EACf,UAAU,EACV,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,MAAM,SAAS,CAAA;AAChB,OAAO,EAAE,KAAK,WAAW,
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,iBAAiB,EAAwB,MAAM,cAAc,CAAA;AAE3E,OAAO,KAAK,EAEX,cAAc,EACd,QAAQ,EAER,eAAe,EACf,UAAU,EACV,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,MAAM,SAAS,CAAA;AAChB,OAAO,EAAE,KAAK,WAAW,EAA4C,MAAM,eAAe,CAAA;AAE1F,MAAM,WAAW,iBAAiB;IACjC,MAAM,CAAC,EAAE,UAAU,CAAA;IACnB,QAAQ,CAAC,EAAE,cAAc,CAAA;IACzB,UAAU,EAAE,eAAe,CAAA;IAC3B,KAAK,CAAC,EAAE,iBAAiB,CAAA;IACzB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC3B;AAED,MAAM,WAAW,WAAW;IAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IAC9D,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IACzD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;IACxB,QAAQ,CAAC,iBAAiB,EAAE,iBAAiB,CAAA;IAC7C,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAA;CACjC;AAED,MAAM,WAAW,YAAY;IAC5B,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,GAAG,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CAsF1D"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from '../embeddings';
|
|
2
|
+
export interface CohereEmbeddingsConfig {
|
|
3
|
+
apiKey: string;
|
|
4
|
+
model?: string;
|
|
5
|
+
inputType?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function createCohereEmbeddings(config: CohereEmbeddingsConfig): EmbeddingProvider;
|
|
8
|
+
//# sourceMappingURL=cohere-embeddings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cohere-embeddings.d.ts","sourceRoot":"","sources":["../../src/providers/cohere-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAA;AAItD,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,sBAAsB,GAAG,iBAAiB,CA2DxF"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from '../embeddings';
|
|
2
|
+
export interface GoogleEmbeddingsConfig {
|
|
3
|
+
apiKey: string;
|
|
4
|
+
model?: string;
|
|
5
|
+
dimensions?: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function createGoogleEmbeddings(config: GoogleEmbeddingsConfig): EmbeddingProvider;
|
|
8
|
+
//# sourceMappingURL=google-embeddings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"google-embeddings.d.ts","sourceRoot":"","sources":["../../src/providers/google-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAA;AAItD,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,sBAAsB,GAAG,iBAAiB,CAgExF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stores/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAChD,YAAY,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA;AAC5C,YAAY,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { VectorStore } from '../vectorstore';
|
|
2
|
+
export interface PgVectorStoreConfig {
|
|
3
|
+
connectionString: string;
|
|
4
|
+
tableName?: string;
|
|
5
|
+
dimensions?: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function createPgVectorStore(config: PgVectorStoreConfig): VectorStore;
|
|
8
|
+
//# sourceMappingURL=pgvector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pgvector.d.ts","sourceRoot":"","sources":["../../src/stores/pgvector.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAA;AAOjD,MAAM,WAAW,mBAAmB;IACnC,gBAAgB,EAAE,MAAM,CAAA;IACxB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAOD,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,mBAAmB,GAAG,WAAW,CAoI5E"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { VectorStore } from '../vectorstore';
|
|
2
|
+
export interface QdrantStoreConfig {
|
|
3
|
+
url: string;
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
collectionName: string;
|
|
6
|
+
dimensions: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function createQdrantStore(config: QdrantStoreConfig): VectorStore;
|
|
9
|
+
//# sourceMappingURL=qdrant.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/stores/qdrant.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAA;AAGjD,MAAM,WAAW,iBAAiB;IACjC,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,cAAc,EAAE,MAAM,CAAA;IACtB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CA+GxE"}
|
package/dist/vectorstore.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Registry } from '@elsium-ai/core';
|
|
1
2
|
import type { EmbeddedChunk, EmbeddingVector, QueryOptions, RetrievalResult } from './types';
|
|
2
3
|
export interface VectorStore {
|
|
3
4
|
readonly name: string;
|
|
@@ -7,6 +8,8 @@ export interface VectorStore {
|
|
|
7
8
|
clear(): Promise<void>;
|
|
8
9
|
count(): Promise<number>;
|
|
9
10
|
}
|
|
11
|
+
export type VectorStoreFactory = (config: Record<string, unknown>) => VectorStore;
|
|
12
|
+
export declare const vectorStoreRegistry: Registry<VectorStoreFactory>;
|
|
10
13
|
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
11
14
|
export declare function createInMemoryStore(options?: {
|
|
12
15
|
maxChunks?: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vectorstore.d.ts","sourceRoot":"","sources":["../src/vectorstore.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"vectorstore.d.ts","sourceRoot":"","sources":["../src/vectorstore.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAA;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAE5F,MAAM,WAAW,WAAW;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IAErB,MAAM,CAAC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC9C,KAAK,CAAC,SAAS,EAAE,eAAe,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACrF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;CACxB;AAID,MAAM,MAAM,kBAAkB,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,WAAW,CAAA;AAEjF,eAAO,MAAM,mBAAmB,EAAE,QAAQ,CAAC,kBAAkB,CACX,CAAA;AAIlD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAkBjE;AAID,wBAAgB,mBAAmB,CAAC,OAAO,CAAC,EAAE;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB,GAAG,WAAW,CA2Dd;AAmDD,wBAAgB,SAAS,CACxB,cAAc,EAAE,eAAe,EAC/B,OAAO,EAAE,KAAK,CAAC,eAAe,GAAG;IAAE,SAAS,EAAE,eAAe,CAAA;CAAE,CAAC,EAChE,OAAO,CAAC,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1C,eAAe,EAAE,CAqBnB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@elsium-ai/rag",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "RAG pipeline, document processing, embeddings, and vector stores for ElsiumAI",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Eric Utrera <ebutrera9103@gmail.com>",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"dev": "bun --watch src/index.ts"
|
|
27
27
|
},
|
|
28
28
|
"dependencies": {
|
|
29
|
-
"@elsium-ai/core": "^0.
|
|
29
|
+
"@elsium-ai/core": "^0.4.0"
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"typescript": "^5.7.0"
|