@elsium-ai/rag 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -660,6 +660,198 @@ const reranked = mmrRerank(queryEmbedding, candidateResults, {
660
660
  })
661
661
  ```
662
662
 
663
+ ### `createPgVectorStore`
664
+
665
+ Creates a vector store backed by PostgreSQL with the pgvector extension.
666
+
667
+ ```typescript
668
+ function createPgVectorStore(config: {
669
+ connectionString: string
670
+ tableName?: string
671
+ dimensions?: number
672
+ }): VectorStore
673
+ ```
674
+
675
+ | Parameter | Type | Default | Description |
676
+ |---|---|---|---|
677
+ | `config.connectionString` | `string` | **(required)** | PostgreSQL connection string. |
678
+ | `config.tableName` | `string` | `'embeddings'` | Table name for storing vectors. |
679
+ | `config.dimensions` | `number` | `1536` | Vector dimensions (must match your embedding model). |
680
+
681
+ **Returns:** A `VectorStore` with `name: 'pgvector'`.
682
+
683
+ ```typescript
684
+ import { createPgVectorStore } from '@elsium-ai/rag'
685
+
686
+ const store = createPgVectorStore({
687
+ connectionString: process.env.DATABASE_URL!,
688
+ tableName: 'document_embeddings',
689
+ dimensions: 1536,
690
+ })
691
+
692
+ await store.upsert(embeddedChunks)
693
+ const results = await store.query(queryVector, { topK: 5 })
694
+ ```
695
+
696
+ ### `createQdrantStore`
697
+
698
+ Creates a vector store backed by the Qdrant REST API.
699
+
700
+ ```typescript
701
+ function createQdrantStore(config: {
702
+ url: string
703
+ apiKey?: string
704
+ collectionName: string
705
+ dimensions: number
706
+ }): VectorStore
707
+ ```
708
+
709
+ | Parameter | Type | Default | Description |
710
+ |---|---|---|---|
711
+ | `config.url` | `string` | **(required)** | Qdrant server URL. |
712
+ | `config.apiKey` | `string` | `undefined` | Optional API key for authentication. |
713
+ | `config.collectionName` | `string` | **(required)** | Name of the Qdrant collection. |
714
+ | `config.dimensions` | `number` | **(required)** | Vector dimensions. |
715
+
716
+ **Returns:** A `VectorStore` with `name: 'qdrant'`.
717
+
718
+ ```typescript
719
+ import { createQdrantStore } from '@elsium-ai/rag'
720
+
721
+ const store = createQdrantStore({
722
+ url: 'http://localhost:6333',
723
+ collectionName: 'documents',
724
+ dimensions: 1536,
725
+ })
726
+ ```
727
+
728
+ ---
729
+
730
+ ## Additional Embedding Providers
731
+
732
+ ### `createGoogleEmbeddings`
733
+
734
+ Creates an embedding provider backed by Google's text-embedding-004 model.
735
+
736
+ ```typescript
737
+ function createGoogleEmbeddings(config: {
738
+ apiKey: string
739
+ model?: string
740
+ dimensions?: number
741
+ }): EmbeddingProvider
742
+ ```
743
+
744
+ | Parameter | Type | Default | Description |
745
+ |---|---|---|---|
746
+ | `config.apiKey` | `string` | **(required)** | Google API key. |
747
+ | `config.model` | `string` | `'text-embedding-004'` | Model name. |
748
+ | `config.dimensions` | `number` | `768` | Embedding dimensions. |
749
+
750
+ **Returns:** An `EmbeddingProvider` with `name: 'google'`.
751
+
752
+ ```typescript
753
+ import { createGoogleEmbeddings } from '@elsium-ai/rag'
754
+
755
+ const embeddings = createGoogleEmbeddings({
756
+ apiKey: process.env.GOOGLE_API_KEY!,
757
+ })
758
+
759
+ const vector = await embeddings.embed('Hello, world!')
760
+ ```
761
+
762
+ ### `createCohereEmbeddings`
763
+
764
+ Creates an embedding provider backed by Cohere's embed-v4.0 model.
765
+
766
+ ```typescript
767
+ function createCohereEmbeddings(config: {
768
+ apiKey: string
769
+ model?: string
770
+ inputType?: string
771
+ }): EmbeddingProvider
772
+ ```
773
+
774
+ | Parameter | Type | Default | Description |
775
+ |---|---|---|---|
776
+ | `config.apiKey` | `string` | **(required)** | Cohere API key. |
777
+ | `config.model` | `string` | `'embed-v4.0'` | Model name. |
778
+ | `config.inputType` | `string` | `'search_document'` | Input type hint for the model. |
779
+
780
+ **Returns:** An `EmbeddingProvider` with `name: 'cohere'`.
781
+
782
+ ```typescript
783
+ import { createCohereEmbeddings } from '@elsium-ai/rag'
784
+
785
+ const embeddings = createCohereEmbeddings({
786
+ apiKey: process.env.COHERE_API_KEY!,
787
+ })
788
+
789
+ const vector = await embeddings.embed('Hello, world!')
790
+ ```
791
+
792
+ ---
793
+
794
+ ## Keyword & Hybrid Search
795
+
796
+ ### `createBM25Index`
797
+
798
+ Creates a BM25 keyword search index for term-frequency-based retrieval.
799
+
800
+ ```typescript
801
+ function createBM25Index(): {
802
+ add(chunks: Chunk[]): void
803
+ search(query: string, topK?: number): RetrievalResult[]
804
+ clear(): void
805
+ }
806
+ ```
807
+
808
+ **Returns:** A BM25 index with `add`, `search`, and `clear` methods.
809
+
810
+ ```typescript
811
+ import { createBM25Index } from '@elsium-ai/rag'
812
+
813
+ const bm25 = createBM25Index()
814
+ bm25.add(chunks)
815
+
816
+ const results = bm25.search('machine learning', 5)
817
+ ```
818
+
819
+ ### `createHybridSearch`
820
+
821
+ Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results.
822
+
823
+ ```typescript
824
+ function createHybridSearch(
825
+ vectorStore: VectorStore,
826
+ bm25: ReturnType<typeof createBM25Index>,
827
+ config?: { vectorWeight?: number; bm25Weight?: number; topK?: number },
828
+ ): {
829
+ query(embedding: EmbeddingVector, text: string, options?: QueryOptions): Promise<RetrievalResult[]>
830
+ }
831
+ ```
832
+
833
+ | Parameter | Type | Default | Description |
834
+ |---|---|---|---|
835
+ | `vectorStore` | `VectorStore` | **(required)** | The vector store for semantic search. |
836
+ | `bm25` | `BM25Index` | **(required)** | The BM25 index for keyword search. |
837
+ | `config.vectorWeight` | `number` | `0.7` | Weight for vector search results in RRF. |
838
+ | `config.bm25Weight` | `number` | `0.3` | Weight for BM25 results in RRF. |
839
+ | `config.topK` | `number` | `5` | Number of results to return. |
840
+
841
+ ```typescript
842
+ import { createInMemoryStore, createBM25Index, createHybridSearch } from '@elsium-ai/rag'
843
+
844
+ const vectorStore = createInMemoryStore()
845
+ const bm25 = createBM25Index()
846
+
847
+ const hybrid = createHybridSearch(vectorStore, bm25, {
848
+ vectorWeight: 0.7,
849
+ bm25Weight: 0.3,
850
+ })
851
+
852
+ const results = await hybrid.query(queryEmbedding, 'search query', { topK: 10 })
853
+ ```
854
+
663
855
  ---
664
856
 
665
857
  ## Pipeline
package/dist/bm25.d.ts ADDED
@@ -0,0 +1,10 @@
1
+ import type { Chunk, RetrievalResult } from './types';
2
+ export interface BM25Index {
3
+ index(chunks: Chunk[]): void;
4
+ search(query: string, topK?: number): RetrievalResult[];
5
+ }
6
+ export declare function createBM25Index(options?: {
7
+ k1?: number;
8
+ b?: number;
9
+ }): BM25Index;
10
+ //# sourceMappingURL=bm25.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../src/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAErD,MAAM,WAAW,SAAS;IACzB,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,IAAI,CAAA;IAC5B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,eAAe,EAAE,CAAA;CACvD;AAgBD,wBAAgB,eAAe,CAAC,OAAO,CAAC,EAAE;IAAE,EAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAwFhF"}
@@ -0,0 +1,14 @@
1
+ import type { BM25Index } from './bm25';
2
+ import type { EmbeddingVector, RetrievalResult } from './types';
3
+ import type { VectorStore } from './vectorstore';
4
+ export interface HybridSearchConfig {
5
+ k?: number;
6
+ vectorWeight?: number;
7
+ bm25Weight?: number;
8
+ topK?: number;
9
+ }
10
+ export interface HybridSearch {
11
+ search(query: string, queryEmbedding: EmbeddingVector, topK?: number): Promise<RetrievalResult[]>;
12
+ }
13
+ export declare function createHybridSearch(vectorStore: VectorStore, bm25Index: BM25Index, config?: HybridSearchConfig): HybridSearch;
14
+ //# sourceMappingURL=hybrid.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../src/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AACvC,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,SAAS,CAAA;AAC/D,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAEhD,MAAM,WAAW,kBAAkB;IAClC,CAAC,CAAC,EAAE,MAAM,CAAA;IACV,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,cAAc,EAAE,eAAe,EAAE,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;CACjG;AAsCD,wBAAgB,kBAAkB,CACjC,WAAW,EAAE,WAAW,EACxB,SAAS,EAAE,SAAS,EACpB,MAAM,CAAC,EAAE,kBAAkB,GACzB,YAAY,CAwBd"}
package/dist/index.d.ts CHANGED
@@ -11,4 +11,14 @@ export { rag } from './pipeline';
11
11
  export type { RAGPipeline, RAGPipelineConfig, IngestResult } from './pipeline';
12
12
  export { createPgVectorStore } from './stores/index';
13
13
  export type { PgVectorStoreConfig } from './stores/index';
14
+ export { createQdrantStore } from './stores/index';
15
+ export type { QdrantStoreConfig } from './stores/index';
16
+ export { createGoogleEmbeddings } from './providers/google-embeddings';
17
+ export type { GoogleEmbeddingsConfig } from './providers/google-embeddings';
18
+ export { createCohereEmbeddings } from './providers/cohere-embeddings';
19
+ export type { CohereEmbeddingsConfig } from './providers/cohere-embeddings';
20
+ export { createBM25Index } from './bm25';
21
+ export type { BM25Index } from './bm25';
22
+ export { createHybridSearch } from './hybrid';
23
+ export type { HybridSearch, HybridSearchConfig } from './hybrid';
14
24
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACX,QAAQ,EACR,gBAAgB,EAChB,KAAK,EACL,aAAa,EACb,eAAe,EACf,aAAa,EACb,eAAe,EACf,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,eAAe,GACf,MAAM,SAAS,CAAA;AAGhB,OAAO,EACN,UAAU,EACV,cAAc,EACd,UAAU,EACV,UAAU,EACV,SAAS,EACT,SAAS,GACT,MAAM,WAAW,CAAA;AAClB,YAAY,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAG/C,OAAO,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,UAAU,GACV,MAAM,YAAY,CAAA;AACnB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAGzC,OAAO,EACN,sBAAsB,EACtB,oBAAoB,EACpB,oBAAoB,EACpB,yBAAyB,GACzB,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,cAAc,CAAA;AAG/E,OAAO,EACN,mBAAmB,EACnB,gBAAgB,EAChB,SAAS,EACT,mBAAmB,GACnB,MAAM,eAAe,CAAA;AACtB,YAAY,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AAGpE,OAAO,EAAE,GAAG,EAAE,MAAM,YAAY,CAAA;AAChC,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAG9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACpD,YAAY,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACX,QAAQ,EACR,gBAAgB,EAChB,KAAK,EACL,aAAa,EACb,eAAe,EACf,aAAa,EACb,eAAe,EACf,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,cAAc,EACd,eAAe,EACf,iBAAiB,EACjB,eAAe,GACf,MAAM,SAAS,CAAA;AAGhB,OAAO,EACN,UAAU,EACV,cAAc,EACd,UAAU,EACV,UAAU,EACV,SAAS,EACT,SAAS,GACT,MAAM,WAAW,CAAA;AAClB,YAAY,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAG/C,OAAO,EACN,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,UAAU,GACV,MAAM,YAAY,CAAA;AACnB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAGzC,OAAO,EACN,sBAAsB,EACtB,oBAAoB,EACpB,oBAAoB,EACpB,yBAAyB,GACzB,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,cAAc,CAAA;AAG/E,OAAO,EACN,mBAAmB,EACnB,gBAAgB,EAChB,SAAS,EACT,mBAAmB,GACnB,MAAM,eAAe,CAAA;AACtB,YAAY,EAAE,WAAW,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAA;AAGpE,OAAO,EAAE,GAAG,EAAE,MAAM,YAAY,CAAA;AAChC,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAG9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACpD,YAAY,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAClD,YAAY,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAGvD,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AACtE,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAC3E,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AACtE,YAAY,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAG3E,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAA;AACxC,YAAY,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAGvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AAC7C,YAAY,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA"}
package/dist/index.js CHANGED
@@ -780,13 +780,19 @@ function rag(config) {
780
780
  minScore: 0,
781
781
  strategy: "similarity"
782
782
  };
783
- if (config.store) {
784
- throw new Error("External vector store not yet implemented. Use in-memory store.");
785
- }
786
783
  const loader = getLoader(loaderType);
787
784
  const chunker = getChunker(chunkingConfig);
788
785
  const embeddingProvider = getEmbeddingProvider(config.embeddings);
789
- const vectorStore = createInMemoryStore();
786
+ let vectorStore;
787
+ if (config.store) {
788
+ const factory = vectorStoreRegistry.get(config.store.provider);
789
+ if (!factory) {
790
+ throw new Error(`Unknown vector store provider: ${config.store.provider}. Register it with vectorStoreRegistry.register().`);
791
+ }
792
+ vectorStore = factory(config.store);
793
+ } else {
794
+ vectorStore = createInMemoryStore();
795
+ }
790
796
  async function embedChunks(chunks) {
791
797
  const texts = chunks.map((c) => c.content);
792
798
  const embeddings = await embeddingProvider.embedBatch(texts);
@@ -949,6 +955,323 @@ function createPgVectorStore(config) {
949
955
  }
950
956
  };
951
957
  }
958
+ // src/stores/qdrant.ts
959
+ function createQdrantStore(config) {
960
+ const { url, apiKey, collectionName, dimensions } = config;
961
+ const headers = {
962
+ "Content-Type": "application/json"
963
+ };
964
+ if (apiKey) {
965
+ headers["api-key"] = apiKey;
966
+ }
967
+ async function request(method, path, body) {
968
+ const response = await fetch(`${url}${path}`, {
969
+ method,
970
+ headers,
971
+ ...body ? { body: JSON.stringify(body) } : {}
972
+ });
973
+ if (!response.ok) {
974
+ const text = await response.text().catch(() => "Unknown error");
975
+ throw ElsiumError.providerError(`Qdrant error ${response.status}: ${text}`, {
976
+ provider: "qdrant",
977
+ statusCode: response.status,
978
+ retryable: response.status >= 500
979
+ });
980
+ }
981
+ if (response.status === 204)
982
+ return null;
983
+ return response.json();
984
+ }
985
+ return {
986
+ name: "qdrant",
987
+ async upsert(chunks) {
988
+ const points = chunks.map((chunk) => ({
989
+ id: chunk.id,
990
+ vector: chunk.embedding.values,
991
+ payload: {
992
+ content: chunk.content,
993
+ documentId: chunk.documentId,
994
+ index: chunk.index,
995
+ metadata: chunk.metadata
996
+ }
997
+ }));
998
+ await request("PUT", `/collections/${collectionName}/points`, {
999
+ points
1000
+ });
1001
+ },
1002
+ async query(embedding, options) {
1003
+ const topK = options?.topK ?? 5;
1004
+ const minScore = options?.minScore ?? 0;
1005
+ const result = await request("POST", `/collections/${collectionName}/points/search`, {
1006
+ vector: embedding.values,
1007
+ limit: topK,
1008
+ score_threshold: minScore,
1009
+ with_payload: true
1010
+ });
1011
+ return (result.result ?? []).map((hit) => ({
1012
+ chunk: {
1013
+ id: String(hit.id),
1014
+ content: hit.payload.content,
1015
+ documentId: hit.payload.documentId,
1016
+ index: hit.payload.index,
1017
+ metadata: hit.payload.metadata
1018
+ },
1019
+ score: hit.score,
1020
+ distance: 1 - hit.score
1021
+ }));
1022
+ },
1023
+ async delete(ids) {
1024
+ await request("POST", `/collections/${collectionName}/points/delete`, {
1025
+ points: ids
1026
+ });
1027
+ },
1028
+ async clear() {
1029
+ try {
1030
+ await request("DELETE", `/collections/${collectionName}`);
1031
+ } catch {}
1032
+ await request("PUT", `/collections/${collectionName}`, {
1033
+ vectors: { size: dimensions, distance: "Cosine" }
1034
+ });
1035
+ },
1036
+ async count() {
1037
+ const result = await request("GET", `/collections/${collectionName}`);
1038
+ return result.result?.points_count ?? 0;
1039
+ }
1040
+ };
1041
+ }
1042
+ vectorStoreRegistry.register("qdrant", (config) => createQdrantStore(config));
1043
+ // src/providers/google-embeddings.ts
1044
+ function createGoogleEmbeddings(config) {
1045
+ const { apiKey, model = "text-embedding-004", dimensions = 768 } = config;
1046
+ if (!apiKey) {
1047
+ throw new ElsiumError({
1048
+ code: "CONFIG_ERROR",
1049
+ message: "Google API key is required for embeddings",
1050
+ retryable: false
1051
+ });
1052
+ }
1053
+ const baseUrl = "https://generativelanguage.googleapis.com/v1beta";
1054
+ async function callAPI(texts) {
1055
+ const results = [];
1056
+ for (const text of texts) {
1057
+ const url = `${baseUrl}/models/${model}:embedContent?key=${apiKey}`;
1058
+ const response = await fetch(url, {
1059
+ method: "POST",
1060
+ headers: { "Content-Type": "application/json" },
1061
+ body: JSON.stringify({
1062
+ model: `models/${model}`,
1063
+ content: { parts: [{ text }] },
1064
+ ...dimensions ? { outputDimensionality: dimensions } : {}
1065
+ })
1066
+ });
1067
+ if (!response.ok) {
1068
+ const body = await response.text().catch(() => "Unknown error");
1069
+ throw ElsiumError.providerError(`Google embeddings error ${response.status}: ${body}`, {
1070
+ provider: "google",
1071
+ statusCode: response.status,
1072
+ retryable: response.status >= 500
1073
+ });
1074
+ }
1075
+ const json = await response.json();
1076
+ results.push(json.embedding.values);
1077
+ }
1078
+ return results;
1079
+ }
1080
+ return {
1081
+ name: "google",
1082
+ dimensions,
1083
+ async embed(text) {
1084
+ const [embedding] = await callAPI([text]);
1085
+ return { values: embedding, dimensions: embedding.length };
1086
+ },
1087
+ async embedBatch(texts) {
1088
+ const embeddings = await callAPI(texts);
1089
+ return embeddings.map((values) => ({
1090
+ values,
1091
+ dimensions: values.length
1092
+ }));
1093
+ }
1094
+ };
1095
+ }
1096
+ embeddingProviderRegistry.register("google", (config) => createGoogleEmbeddings({
1097
+ apiKey: config.apiKey ?? "",
1098
+ model: config.model,
1099
+ dimensions: config.dimensions
1100
+ }));
1101
+ // src/providers/cohere-embeddings.ts
1102
+ function createCohereEmbeddings(config) {
1103
+ const { apiKey, model = "embed-v4.0", inputType = "search_document" } = config;
1104
+ if (!apiKey) {
1105
+ throw new ElsiumError({
1106
+ code: "CONFIG_ERROR",
1107
+ message: "Cohere API key is required for embeddings",
1108
+ retryable: false
1109
+ });
1110
+ }
1111
+ async function callAPI(texts) {
1112
+ const response = await fetch("https://api.cohere.com/v2/embed", {
1113
+ method: "POST",
1114
+ headers: {
1115
+ "Content-Type": "application/json",
1116
+ Authorization: `Bearer ${apiKey}`
1117
+ },
1118
+ body: JSON.stringify({
1119
+ texts,
1120
+ model,
1121
+ input_type: inputType,
1122
+ embedding_types: ["float"]
1123
+ })
1124
+ });
1125
+ if (!response.ok) {
1126
+ const body = await response.text().catch(() => "Unknown error");
1127
+ throw ElsiumError.providerError(`Cohere embeddings error ${response.status}: ${body}`, {
1128
+ provider: "cohere",
1129
+ statusCode: response.status,
1130
+ retryable: response.status >= 500
1131
+ });
1132
+ }
1133
+ const json = await response.json();
1134
+ return json.embeddings.float;
1135
+ }
1136
+ return {
1137
+ name: "cohere",
1138
+ dimensions: 1024,
1139
+ async embed(text) {
1140
+ const [embedding] = await callAPI([text]);
1141
+ return { values: embedding, dimensions: embedding.length };
1142
+ },
1143
+ async embedBatch(texts) {
1144
+ const embeddings = await callAPI(texts);
1145
+ return embeddings.map((values) => ({
1146
+ values,
1147
+ dimensions: values.length
1148
+ }));
1149
+ }
1150
+ };
1151
+ }
1152
+ embeddingProviderRegistry.register("cohere", (config) => createCohereEmbeddings({
1153
+ apiKey: config.apiKey ?? "",
1154
+ model: config.model
1155
+ }));
1156
+ // src/bm25.ts
1157
+ function tokenize(text) {
1158
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((t) => t.length > 0);
1159
+ }
1160
+ function createBM25Index(options) {
1161
+ const k1 = options?.k1 ?? 1.2;
1162
+ const b = options?.b ?? 0.75;
1163
+ const docs = [];
1164
+ const docFreqs = new Map;
1165
+ let avgDocLength = 0;
1166
+ function addDoc(chunk) {
1167
+ const tokens = tokenize(chunk.content);
1168
+ const termFreqs = new Map;
1169
+ for (const token of tokens) {
1170
+ termFreqs.set(token, (termFreqs.get(token) ?? 0) + 1);
1171
+ }
1172
+ for (const term of termFreqs.keys()) {
1173
+ docFreqs.set(term, (docFreqs.get(term) ?? 0) + 1);
1174
+ }
1175
+ docs.push({ chunk, termFreqs, length: tokens.length });
1176
+ }
1177
+ function recalcAvgLength() {
1178
+ if (docs.length === 0) {
1179
+ avgDocLength = 0;
1180
+ return;
1181
+ }
1182
+ avgDocLength = docs.reduce((sum, d) => sum + d.length, 0) / docs.length;
1183
+ }
1184
+ function idf(term) {
1185
+ const df = docFreqs.get(term) ?? 0;
1186
+ const n = docs.length;
1187
+ if (df === 0)
1188
+ return 0;
1189
+ return Math.log((n - df + 0.5) / (df + 0.5) + 1);
1190
+ }
1191
+ function scoreSingle(doc, queryTerms) {
1192
+ let score = 0;
1193
+ for (const term of queryTerms) {
1194
+ const tf = doc.termFreqs.get(term) ?? 0;
1195
+ if (tf === 0)
1196
+ continue;
1197
+ const termIdf = idf(term);
1198
+ const numerator = tf * (k1 + 1);
1199
+ const denominator = tf + k1 * (1 - b + b * (doc.length / avgDocLength));
1200
+ score += termIdf * (numerator / denominator);
1201
+ }
1202
+ return score;
1203
+ }
1204
+ return {
1205
+ index(chunks) {
1206
+ for (const chunk of chunks) {
1207
+ addDoc(chunk);
1208
+ }
1209
+ recalcAvgLength();
1210
+ },
1211
+ search(query, topK = 5) {
1212
+ if (docs.length === 0)
1213
+ return [];
1214
+ const queryTerms = tokenize(query);
1215
+ if (queryTerms.length === 0)
1216
+ return [];
1217
+ const scored = [];
1218
+ for (const doc of docs) {
1219
+ const score = scoreSingle(doc, queryTerms);
1220
+ if (score > 0) {
1221
+ scored.push({ chunk: doc.chunk, score });
1222
+ }
1223
+ }
1224
+ scored.sort((a, b2) => b2.score - a.score);
1225
+ return scored.slice(0, topK).map((s) => ({
1226
+ chunk: s.chunk,
1227
+ score: s.score,
1228
+ distance: 0
1229
+ }));
1230
+ }
1231
+ };
1232
+ }
1233
+ // src/hybrid.ts
1234
+ function reciprocalRankFusion(vectorResults, bm25Results, k, vectorWeight, bm25Weight) {
1235
+ const scores = new Map;
1236
+ for (let i = 0;i < vectorResults.length; i++) {
1237
+ const result = vectorResults[i];
1238
+ const rrfScore = vectorWeight / (k + i + 1);
1239
+ const existing = scores.get(result.chunk.id);
1240
+ if (existing) {
1241
+ existing.score += rrfScore;
1242
+ } else {
1243
+ scores.set(result.chunk.id, { score: rrfScore, chunk: result.chunk });
1244
+ }
1245
+ }
1246
+ for (let i = 0;i < bm25Results.length; i++) {
1247
+ const result = bm25Results[i];
1248
+ const rrfScore = bm25Weight / (k + i + 1);
1249
+ const existing = scores.get(result.chunk.id);
1250
+ if (existing) {
1251
+ existing.score += rrfScore;
1252
+ } else {
1253
+ scores.set(result.chunk.id, { score: rrfScore, chunk: result.chunk });
1254
+ }
1255
+ }
1256
+ return Array.from(scores.values()).sort((a, b) => b.score - a.score).map(({ score, chunk }) => ({ chunk, score, distance: 0 }));
1257
+ }
1258
+ function createHybridSearch(vectorStore, bm25Index, config) {
1259
+ const k = config?.k ?? 60;
1260
+ const vectorWeight = config?.vectorWeight ?? 1;
1261
+ const bm25Weight = config?.bm25Weight ?? 1;
1262
+ const defaultTopK = config?.topK ?? 10;
1263
+ return {
1264
+ async search(query, queryEmbedding, topK) {
1265
+ const limit = topK ?? defaultTopK;
1266
+ const [vectorResults, bm25Results] = await Promise.all([
1267
+ vectorStore.query(queryEmbedding, { topK: limit }),
1268
+ Promise.resolve(bm25Index.search(query, limit))
1269
+ ]);
1270
+ const fused = reciprocalRankFusion(vectorResults, bm25Results, k, vectorWeight, bm25Weight);
1271
+ return fused.slice(0, limit);
1272
+ }
1273
+ };
1274
+ }
952
1275
  export {
953
1276
  vectorStoreRegistry,
954
1277
  textLoader,
@@ -965,9 +1288,14 @@ export {
965
1288
  fixedSizeChunker,
966
1289
  embeddingProviderRegistry,
967
1290
  csvLoader,
1291
+ createQdrantStore,
968
1292
  createPgVectorStore,
969
1293
  createOpenAIEmbeddings,
970
1294
  createMockEmbeddings,
971
1295
  createInMemoryStore,
1296
+ createHybridSearch,
1297
+ createGoogleEmbeddings,
1298
+ createCohereEmbeddings,
1299
+ createBM25Index,
972
1300
  cosineSimilarity
973
1301
  };
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,iBAAiB,EAAwB,MAAM,cAAc,CAAA;AAE3E,OAAO,KAAK,EAEX,cAAc,EACd,QAAQ,EAER,eAAe,EACf,UAAU,EACV,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,MAAM,SAAS,CAAA;AAChB,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,eAAe,CAAA;AAErE,MAAM,WAAW,iBAAiB;IACjC,MAAM,CAAC,EAAE,UAAU,CAAA;IACnB,QAAQ,CAAC,EAAE,cAAc,CAAA;IACzB,UAAU,EAAE,eAAe,CAAA;IAC3B,KAAK,CAAC,EAAE,iBAAiB,CAAA;IACzB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC3B;AAED,MAAM,WAAW,WAAW;IAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IAC9D,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IACzD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;IACxB,QAAQ,CAAC,iBAAiB,EAAE,iBAAiB,CAAA;IAC7C,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAA;CACjC;AAED,MAAM,WAAW,YAAY;IAC5B,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,GAAG,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CA8E1D"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,iBAAiB,EAAwB,MAAM,cAAc,CAAA;AAE3E,OAAO,KAAK,EAEX,cAAc,EACd,QAAQ,EAER,eAAe,EACf,UAAU,EACV,YAAY,EACZ,eAAe,EACf,eAAe,EACf,iBAAiB,EACjB,MAAM,SAAS,CAAA;AAChB,OAAO,EAAE,KAAK,WAAW,EAA4C,MAAM,eAAe,CAAA;AAE1F,MAAM,WAAW,iBAAiB;IACjC,MAAM,CAAC,EAAE,UAAU,CAAA;IACnB,QAAQ,CAAC,EAAE,cAAc,CAAA;IACzB,UAAU,EAAE,eAAe,CAAA;IAC3B,KAAK,CAAC,EAAE,iBAAiB,CAAA;IACzB,SAAS,CAAC,EAAE,eAAe,CAAA;CAC3B;AAED,MAAM,WAAW,WAAW;IAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IAC9D,cAAc,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,CAAA;IACzD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAAA;IACvE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAA;IACtB,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC,CAAA;IACxB,QAAQ,CAAC,iBAAiB,EAAE,iBAAiB,CAAA;IAC7C,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAA;CACjC;AAED,MAAM,WAAW,YAAY;IAC5B,UAAU,EAAE,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,GAAG,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CAsF1D"}
@@ -0,0 +1,8 @@
1
+ import type { EmbeddingProvider } from '../embeddings';
2
+ export interface CohereEmbeddingsConfig {
3
+ apiKey: string;
4
+ model?: string;
5
+ inputType?: string;
6
+ }
7
+ export declare function createCohereEmbeddings(config: CohereEmbeddingsConfig): EmbeddingProvider;
8
+ //# sourceMappingURL=cohere-embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cohere-embeddings.d.ts","sourceRoot":"","sources":["../../src/providers/cohere-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAA;AAItD,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,sBAAsB,GAAG,iBAAiB,CA2DxF"}
@@ -0,0 +1,8 @@
1
+ import type { EmbeddingProvider } from '../embeddings';
2
+ export interface GoogleEmbeddingsConfig {
3
+ apiKey: string;
4
+ model?: string;
5
+ dimensions?: number;
6
+ }
7
+ export declare function createGoogleEmbeddings(config: GoogleEmbeddingsConfig): EmbeddingProvider;
8
+ //# sourceMappingURL=google-embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"google-embeddings.d.ts","sourceRoot":"","sources":["../../src/providers/google-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAA;AAItD,MAAM,WAAW,sBAAsB;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,sBAAsB,GAAG,iBAAiB,CAgExF"}
@@ -1,3 +1,5 @@
1
1
  export { createPgVectorStore } from './pgvector';
2
2
  export type { PgVectorStoreConfig } from './pgvector';
3
+ export { createQdrantStore } from './qdrant';
4
+ export type { QdrantStoreConfig } from './qdrant';
3
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stores/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAChD,YAAY,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stores/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAChD,YAAY,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA;AAC5C,YAAY,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA"}
@@ -0,0 +1,9 @@
1
+ import type { VectorStore } from '../vectorstore';
2
+ export interface QdrantStoreConfig {
3
+ url: string;
4
+ apiKey?: string;
5
+ collectionName: string;
6
+ dimensions: number;
7
+ }
8
+ export declare function createQdrantStore(config: QdrantStoreConfig): VectorStore;
9
+ //# sourceMappingURL=qdrant.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qdrant.d.ts","sourceRoot":"","sources":["../../src/stores/qdrant.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAA;AAGjD,MAAM,WAAW,iBAAiB;IACjC,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,cAAc,EAAE,MAAM,CAAA;IACtB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,iBAAiB,GAAG,WAAW,CA+GxE"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elsium-ai/rag",
3
- "version": "0.3.0",
3
+ "version": "0.4.1",
4
4
  "description": "RAG pipeline, document processing, embeddings, and vector stores for ElsiumAI",
5
5
  "license": "MIT",
6
6
  "author": "Eric Utrera <ebutrera9103@gmail.com>",
@@ -26,7 +26,7 @@
26
26
  "dev": "bun --watch src/index.ts"
27
27
  },
28
28
  "dependencies": {
29
- "@elsium-ai/core": "^0.3.0"
29
+ "@elsium-ai/core": "^0.4.1"
30
30
  },
31
31
  "devDependencies": {
32
32
  "typescript": "^5.7.0"