@rws-framework/ai-tools 2.2.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,17 +4,19 @@ import { ConsoleService, RWSConfigService, RWSErrorCodes} from '@rws-framework/s
4
4
  import { InjectServices } from '@rws-framework/server/src/services/_inject';
5
5
  import RWSPrompt from '../prompts/_prompt';
6
6
  import { IRWSPromptJSON, ILLMChunk } from '../../types/IPrompt';
7
- import {VectorStoreService} from '../../services/VectorStoreService';
8
- import RWSVectorStore, { VectorDocType } from './VectorStore';
7
+
8
+ import RWSVectorStore, { VectorDocType, IVectorStoreConfig } from './VectorStore';
9
9
 
10
10
  import { Document } from '@langchain/core/documents';
11
11
  import { UnstructuredLoader } from '@langchain/community/document_loaders/fs/unstructured';
12
+ import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
12
13
 
13
14
  import { BaseChatModel } from "@langchain/core/language_models/chat_models";
14
15
  import { BaseLanguageModelInterface, BaseLanguageModelInput } from '@langchain/core/language_models/base';
15
16
  import { Runnable } from '@langchain/core/runnables';
16
17
  import { BaseMessage } from '@langchain/core/messages';
17
-
18
+ import { EmbeddingsInterface } from '@langchain/core/embeddings';
19
+ import { CohereEmbeddings } from '@langchain/cohere';
18
20
 
19
21
  import { v4 as uuid } from 'uuid';
20
22
  import xml2js from 'xml2js';
@@ -59,6 +61,12 @@ interface IChainCallOutput {
59
61
  text: string
60
62
  }
61
63
 
64
+ interface IEmbeddingsConfig {
65
+ provider: 'cohere';
66
+ apiKey: string;
67
+ model?: string;
68
+ }
69
+
62
70
  interface IEmbeddingsHandler<T extends object> {
63
71
  generateEmbeddings: (text?: string) => Promise<T>
64
72
  storeEmbeddings: (embeddings: any, convoId: string) => Promise<void>
@@ -66,41 +74,64 @@ interface IEmbeddingsHandler<T extends object> {
66
74
 
67
75
  type LLMType = BaseLanguageModelInterface | Runnable<BaseLanguageModelInput, string> | Runnable<BaseLanguageModelInput, BaseMessage>;
68
76
 
69
- @InjectServices([VectorStoreService])
70
77
  class EmbedLoader<LLMChat extends BaseChatModel> {
71
78
  private loader: UnstructuredLoader;
72
- private embeddings: IEmbeddingsHandler<any>;
79
+ private embeddings: EmbeddingsInterface;
80
+ private docSplitter: RecursiveCharacterTextSplitter;
73
81
 
74
82
  private docs: Document[] = [];
75
83
  private _initiated = false;
76
84
  private convo_id: string;
77
85
  private llmChat: LLMChat;
78
- private chatConstructor: new (config: any) => LLMChat;
86
+
79
87
  private thePrompt: RWSPrompt;
80
-
81
- vectorStoreService: VectorStoreService;
82
- configService: RWSConfigService<IAiCfg>;
88
+ private vectorStoreConfig: IVectorStoreConfig;
89
+
90
+ configService: RWSConfigService<any>;
83
91
 
84
92
  public _baseSplitterParams: ISplitterParams;
85
93
 
86
- constructor(
87
- chatConstructor: new (config: any) => LLMChat,
88
- embeddings: IEmbeddingsHandler<any> | null = null,
94
+ constructor(
95
+ embeddingsConfig: IEmbeddingsConfig | null = null,
89
96
  convoId: string | null = null,
90
97
  baseSplitterParams: ISplitterParams = {
91
98
  chunkSize: 400,
92
99
  chunkOverlap: 80,
93
100
  separators: ['/n/n','.']
94
- }
101
+ },
102
+ vectorStoreConfig: IVectorStoreConfig = { type: 'memory' }
95
103
  ) {
96
- this.embeddings = embeddings;
104
+ if (embeddingsConfig) {
105
+ this.initializeEmbeddings(embeddingsConfig);
106
+ }
107
+
97
108
  if(convoId === null) {
98
109
  this.convo_id = EmbedLoader.uuid();
99
110
  } else {
100
111
  this.convo_id = convoId;
101
112
  }
102
- this.chatConstructor = chatConstructor;
103
- this._baseSplitterParams = baseSplitterParams;
113
+
114
+ this._baseSplitterParams = baseSplitterParams;
115
+ this.vectorStoreConfig = vectorStoreConfig;
116
+
117
+ this.docSplitter = new RecursiveCharacterTextSplitter({
118
+ chunkSize: baseSplitterParams.chunkSize,
119
+ chunkOverlap: baseSplitterParams.chunkOverlap,
120
+ separators: baseSplitterParams.separators
121
+ });
122
+ }
123
+
124
+ private initializeEmbeddings(config: IEmbeddingsConfig): void {
125
+ switch (config.provider) {
126
+ case 'cohere':
127
+ this.embeddings = new CohereEmbeddings({
128
+ apiKey: config.apiKey,
129
+ model: config.model || 'embed-english-v3.0'
130
+ });
131
+ break;
132
+ default:
133
+ throw new Error(`Unsupported embedding provider: ${config.provider}`);
134
+ }
104
135
  }
105
136
 
106
137
  static uuid(): string
@@ -133,7 +164,6 @@ class EmbedLoader<LLMChat extends BaseChatModel> {
133
164
 
134
165
  async splitDocs(filePath: string, params: ISplitterParams): Promise<RWSVectorStore>
135
166
  {
136
-
137
167
  if(!this.embeddings){
138
168
  throw new Error('No embeddings provided for ConvoLoader\'s constructor. ConvoLoader.splitDocs aborting...');
139
169
  }
@@ -145,23 +175,17 @@ class EmbedLoader<LLMChat extends BaseChatModel> {
145
175
  console.log(`Split dir ${ConsoleService.color().magentaBright(splitDir)} doesn't exist. Splitting docs...`);
146
176
  this.loader = new UnstructuredLoader(filePath);
147
177
 
148
- // this.docSplitter = new RecursiveCharacterTextSplitter({
149
- // chunkSize: params.chunkSize, // The size of the chunk that should be split.
150
- // chunkOverlap: params.chunkOverlap, // Adding overalap so that if a text is broken inbetween, next document may have part of the previous document
151
- // separators: params.separators // In this case we are assuming that /n/n would mean one whole sentence. In case there is no nearing /n/n then "." will be used instead. This can be anything that helps derive a complete sentence .
152
- // });
153
-
154
178
  fs.mkdirSync(splitDir, { recursive: true });
155
179
 
156
180
  const orgDocs = await this.loader.load();
157
- const splitDocs: any[] = [];//await this.docSplitter.splitDocuments(orgDocs);
181
+ const splitDocs = await this.docSplitter.splitDocuments(orgDocs);
158
182
 
159
183
  const avgCharCountPre = this.avgDocLength(orgDocs);
160
184
  const avgCharCountPost = this.avgDocLength(splitDocs);
161
185
 
162
186
  logConvo(`Average length among ${orgDocs.length} documents loaded is ${avgCharCountPre} characters.`);
163
187
  logConvo(`After the split we have ${splitDocs.length} documents more than the original ${orgDocs.length}.`);
164
- logConvo(`Average length among ${orgDocs.length} documents (after split) is ${avgCharCountPost} characters.`);
188
+ logConvo(`Average length among ${splitDocs.length} documents (after split) is ${avgCharCountPost} characters.`);
165
189
 
166
190
  let i = 0;
167
191
  splitDocs.forEach((doc: Document) => {
@@ -177,17 +201,75 @@ class EmbedLoader<LLMChat extends BaseChatModel> {
177
201
  finalDocs.push(new Document({ pageContent: txt }));
178
202
  }
179
203
  }
180
-
181
- return await this.vectorStoreService.createStore(finalDocs, await this.embeddings.generateEmbeddings());
204
+
205
+ const vectorStore = new RWSVectorStore(finalDocs, this.embeddings, this.vectorStoreConfig);
206
+ return await vectorStore.init();
182
207
  }
183
208
 
184
209
  async similaritySearch(query: string, splitCount: number, store: RWSVectorStore): Promise<string>
185
210
  {
186
211
  console.log('Store is ready. Searching for embedds...');
187
- const texts = await store.getFaiss().similaritySearchWithScore(`${query}`, splitCount);
212
+ const texts = await store.similaritySearchWithScore(query, splitCount);
188
213
  console.log('Found best parts: ' + texts.length);
189
214
  return texts.map(([doc, score]: [any, number]) => `${doc['pageContent']}`).join('\n\n');
190
215
  }
216
+
217
+ /**
218
+ * Index text content directly without file loading
219
+ */
220
+ async indexTextContent(
221
+ content: string,
222
+ documentId: string | number,
223
+ metadata: Record<string, any> = {}
224
+ ): Promise<RWSVectorStore> {
225
+ if (!this.embeddings) {
226
+ throw new Error('No embeddings provided for ConvoLoader. Cannot index text content.');
227
+ }
228
+
229
+ // Split the content into chunks
230
+ const docs = await this.docSplitter.createDocuments([content], [{
231
+ documentId,
232
+ ...metadata
233
+ }]);
234
+
235
+ // Create and initialize vector store
236
+ const vectorStore = new RWSVectorStore(docs, this.embeddings, this.vectorStoreConfig);
237
+ return await vectorStore.init();
238
+ }
239
+
240
+ /**
241
+ * Search for similar content with detailed results
242
+ */
243
+ async searchSimilarWithDetails(
244
+ query: string,
245
+ store: RWSVectorStore,
246
+ maxResults: number = 5,
247
+ threshold: number = 0.7
248
+ ): Promise<Array<{ content: string; score: number; metadata: any }>> {
249
+ const results = await store.similaritySearchWithScore(query, maxResults);
250
+
251
+ return results
252
+ .filter(([_, score]) => score >= threshold)
253
+ .map(([doc, score]) => ({
254
+ content: doc.pageContent,
255
+ score,
256
+ metadata: doc.metadata || {}
257
+ }));
258
+ }
259
+
260
+ /**
261
+ * Get or create embeddings instance
262
+ */
263
+ getEmbeddings(): EmbeddingsInterface {
264
+ return this.embeddings;
265
+ }
266
+
267
+ /**
268
+ * Update embeddings configuration
269
+ */
270
+ updateEmbeddingsConfig(config: IEmbeddingsConfig): void {
271
+ this.initializeEmbeddings(config);
272
+ }
191
273
 
192
274
  private async debugCall(debugCallback: (debugData: IConvoDebugXMLData) => Promise<IConvoDebugXMLData> = null)
193
275
  {
@@ -300,4 +382,4 @@ class EmbedLoader<LLMChat extends BaseChatModel> {
300
382
 
301
383
  }
302
384
 
303
- export { EmbedLoader, IChainCallOutput, IConvoDebugXMLData, IEmbeddingsHandler, ISplitterParams, IBaseLangchainHyperParams };
385
+ export { EmbedLoader, IChainCallOutput, IConvoDebugXMLData, IEmbeddingsHandler, IEmbeddingsConfig, ISplitterParams, IBaseLangchainHyperParams };
@@ -1,36 +1,114 @@
1
1
  import { FaissStore } from '@langchain/community/vectorstores/faiss';
2
+ import { MemoryVectorStore } from 'langchain/vectorstores/memory';
2
3
  import { EmbeddingsInterface } from '@langchain/core/embeddings';
3
4
  import { Document } from '@langchain/core/documents';
4
5
 
5
6
  type VectorDocType = Document<Record<string, any>>[];
6
7
 
8
+ export interface IVectorStoreConfig {
9
+ type: 'faiss' | 'memory';
10
+ persistPath?: string;
11
+ }
12
+
7
13
  export default class RWSVectorStore
8
14
  {
9
- private faiss: FaissStore;
15
+ private vectorStore: FaissStore | MemoryVectorStore;
10
16
  private docs: VectorDocType;
11
17
  private embeddings: EmbeddingsInterface;
18
+ private config: IVectorStoreConfig;
12
19
 
13
- constructor(docs: VectorDocType, embeddings: EmbeddingsInterface){
20
+ constructor(docs: VectorDocType, embeddings: EmbeddingsInterface, config: IVectorStoreConfig = { type: 'memory' }){
14
21
  this.docs = docs;
15
22
  this.embeddings = embeddings;
23
+ this.config = config;
16
24
  }
17
25
 
18
26
  async init(): Promise<RWSVectorStore>
19
27
  {
20
- this.faiss = await FaissStore.fromDocuments(this.docs, this.embeddings);
28
+ if (this.config.type === 'faiss') {
29
+ this.vectorStore = await FaissStore.fromDocuments(this.docs, this.embeddings);
30
+ } else {
31
+ this.vectorStore = await MemoryVectorStore.fromDocuments(this.docs, this.embeddings);
32
+ }
21
33
 
22
34
  return this;
23
35
  }
24
36
 
37
+ getVectorStore(): FaissStore | MemoryVectorStore
38
+ {
39
+ return this.vectorStore;
40
+ }
41
+
25
42
  getFaiss(): FaissStore
26
43
  {
27
- return this.faiss;
44
+ if (this.vectorStore instanceof FaissStore) {
45
+ return this.vectorStore;
46
+ }
47
+ throw new Error('Vector store is not a FAISS instance');
48
+ }
49
+
50
+ getMemoryStore(): MemoryVectorStore
51
+ {
52
+ if (this.vectorStore instanceof MemoryVectorStore) {
53
+ return this.vectorStore;
54
+ }
55
+ throw new Error('Vector store is not a Memory instance');
28
56
  }
29
57
 
30
58
  getDocs()
31
59
  {
32
60
  return this.docs;
33
61
  }
62
+
63
+ /**
64
+ * Add more documents to the vector store
65
+ */
66
+ async addDocuments(newDocs: VectorDocType): Promise<void> {
67
+ await this.vectorStore.addDocuments(newDocs);
68
+ this.docs.push(...newDocs);
69
+ }
70
+
71
+ /**
72
+ * Search for similar documents
73
+ */
74
+ async similaritySearchWithScore(query: string, k: number = 4): Promise<[Document, number][]> {
75
+ return await this.vectorStore.similaritySearchWithScore(query, k);
76
+ }
77
+
78
+ /**
79
+ * Search for similar documents using vector
80
+ */
81
+ async similaritySearchVectorWithScore(embedding: number[], k: number = 4): Promise<[Document, number][]> {
82
+ return await this.vectorStore.similaritySearchVectorWithScore(embedding, k);
83
+ }
84
+
85
+ /**
86
+ * Delete documents (if supported)
87
+ */
88
+ async deleteDocuments(ids: string[]): Promise<void> {
89
+ if ('delete' in this.vectorStore) {
90
+ await (this.vectorStore as any).delete({ ids });
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Save the vector store (FAISS only)
96
+ */
97
+ async save(path?: string): Promise<void> {
98
+ if (this.vectorStore instanceof FaissStore) {
99
+ await this.vectorStore.save(path || this.config.persistPath || './vector_store');
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Load a vector store from disk (FAISS only)
105
+ */
106
+ static async load(path: string, embeddings: EmbeddingsInterface): Promise<RWSVectorStore> {
107
+ const faissStore = await FaissStore.load(path, embeddings);
108
+ const vectorStore = new RWSVectorStore([], embeddings, { type: 'faiss', persistPath: path });
109
+ vectorStore.vectorStore = faissStore;
110
+ return vectorStore;
111
+ }
34
112
  }
35
113
 
36
114
  export {
@@ -69,7 +69,7 @@ class RWSPrompt implements IPromptInstance {
69
69
  }
70
70
 
71
71
  // Delegation methods for input/output management
72
- listen(source: string, stream: boolean = true): RWSPrompt {
72
+ listen(source: string | object, stream: boolean = true): RWSPrompt {
73
73
  this.ioManager.listen(source, stream);
74
74
  return this;
75
75
  }
@@ -108,7 +108,7 @@ class RWSPrompt implements IPromptInstance {
108
108
  return this;
109
109
  }
110
110
 
111
- readOutput(): string {
111
+ readOutput(): string | object {
112
112
  return this.ioManager.readOutput();
113
113
  }
114
114
 
@@ -7,11 +7,11 @@ import {
7
7
  } from './types';
8
8
 
9
9
  export interface IPromptInstance {
10
- readOutput(): string;
10
+ readOutput(): string | object;
11
11
  readInput(): any[];
12
12
  getInput(): any[];
13
13
  setSentInput(input: any[]): void;
14
- injestOutput(content: string): void;
14
+ injestOutput(content: string | object): void;
15
15
  }
16
16
 
17
17
  export class ExecutionMethodsHandler {
@@ -5,7 +5,7 @@ export class InputOutputManager {
5
5
  private enhancedInput: IPromptEnchantment[] = [];
6
6
  private sentInput: CompoundInput[] = [];
7
7
  private originalInput: CompoundInput[] = [];
8
- private output: string = '';
8
+ private output: string | object = null;
9
9
  private onStream: (chunk: string) => void = () => {};
10
10
 
11
11
  constructor(input: CompoundInput[]) {
@@ -54,18 +54,20 @@ export class InputOutputManager {
54
54
  this.output = content;
55
55
  }
56
56
 
57
- readOutput(): string {
57
+ readOutput(): string | object{
58
58
  return this.output;
59
59
  }
60
60
 
61
- listen(source: string, stream: boolean = true): void {
62
- this.output = '';
61
+ listen(source: string | object, stream: boolean = true): void {
62
+ if(stream){
63
+ this.output = '';
64
+ }
63
65
 
64
66
  if (!stream) {
65
67
  this.output = source;
66
68
  } else {
67
- this.output += source;
68
- this.onStream(source);
69
+ this.output += source as string;
70
+ this.onStream(source as string);
69
71
  }
70
72
  }
71
73
 
@@ -114,7 +116,7 @@ export class InputOutputManager {
114
116
  return this.originalInput;
115
117
  }
116
118
 
117
- getOutput(): string {
119
+ getOutput(): string | object {
118
120
  return this.output;
119
121
  }
120
122
  }
@@ -0,0 +1,222 @@
1
+ import { Injectable } from '@nestjs/common';
2
+ import { Embeddings } from '@langchain/core/embeddings';
3
+ import { CohereEmbeddings } from '@langchain/cohere';
4
+ import { Document } from '@langchain/core/documents';
5
+ import { IEmbeddingConfig, IChunkConfig } from '../types';
6
+ import { TextChunker } from './TextChunker';
7
+ import RWSVectorStore, { VectorDocType, IVectorStoreConfig } from '../models/convo/VectorStore';
8
+
9
+ @Injectable()
10
+ export class LangChainEmbeddingService {
11
+ private embeddings: Embeddings;
12
+ private config: IEmbeddingConfig;
13
+ private chunkConfig: IChunkConfig;
14
+ private isInitialized = false;
15
+ private vectorStore: RWSVectorStore | null = null;
16
+
17
+ constructor() {
18
+ // Empty constructor for NestJS dependency injection
19
+ }
20
+
21
+ /**
22
+ * Initialize the service with configuration
23
+ */
24
+ async initialize(config: IEmbeddingConfig, chunkConfig?: IChunkConfig): Promise<void> {
25
+ if (this.isInitialized) {
26
+ return;
27
+ }
28
+
29
+ this.config = config;
30
+ this.chunkConfig = chunkConfig || {
31
+ chunkSize: 1000,
32
+ chunkOverlap: 200
33
+ };
34
+ this.initializeEmbeddings();
35
+ this.isInitialized = true;
36
+ }
37
+
38
+ /**
39
+ * Alternative constructor-like method for backward compatibility
40
+ */
41
+ static create(config: IEmbeddingConfig, chunkConfig?: IChunkConfig): LangChainEmbeddingService {
42
+ const service = new LangChainEmbeddingService();
43
+ service.config = config;
44
+ service.chunkConfig = chunkConfig || {
45
+ chunkSize: 1000,
46
+ chunkOverlap: 200
47
+ };
48
+ service.initializeEmbeddings();
49
+ service.isInitialized = true;
50
+ return service;
51
+ }
52
+
53
+ private initializeEmbeddings(): void {
54
+ switch (this.config.provider) {
55
+ case 'cohere':
56
+ this.embeddings = new CohereEmbeddings({
57
+ apiKey: this.config.apiKey,
58
+ model: this.config.model || 'embed-v4.0',
59
+ batchSize: this.config.batchSize || 96
60
+ });
61
+ break;
62
+
63
+ default:
64
+ throw new Error(`Unsupported embedding provider: ${this.config.provider}`);
65
+ }
66
+ }
67
+
68
+ private initializeTextSplitter(chunkConfig?: IChunkConfig): void {
69
+ // Text chunking is now handled by TextChunker class
70
+ // This method is kept for compatibility but doesn't initialize anything
71
+ }
72
+
73
+ /**
74
+ * Generate embeddings for multiple texts
75
+ */
76
+ async embedTexts(texts: string[]): Promise<number[][]> {
77
+ this.ensureInitialized();
78
+ return await this.embeddings.embedDocuments(texts);
79
+ }
80
+
81
+ /**
82
+ * Generate embedding for a single text
83
+ */
84
+ async embedText(text: string): Promise<number[]> {
85
+ this.ensureInitialized();
86
+ return await this.embeddings.embedQuery(text);
87
+ }
88
+
89
+ /**
90
+ * Split text into chunks
91
+ */
92
+ async chunkText(text: string): Promise<string[]> {
93
+ this.ensureInitialized();
94
+
95
+ // Use our custom TextChunker instead of LangChain's splitter
96
+ // Use safe token limits - the TextChunker handles token estimation internally
97
+ const maxTokens = 450; // Safe token limit for embedding models
98
+ const overlap = this.chunkConfig?.chunkOverlap || 50; // Character overlap, not token
99
+
100
+ return TextChunker.chunkText(text, maxTokens, overlap);
101
+ }
102
+
103
+ /**
104
+ * Split text and generate embeddings for chunks
105
+ */
106
+ async chunkAndEmbed(text: string): Promise<{ text: string; embedding: number[] }[]> {
107
+ this.ensureInitialized();
108
+ const chunks = await this.chunkText(text);
109
+ const embeddings = await this.embedTexts(chunks);
110
+
111
+ return chunks.map((chunk, index) => ({
112
+ text: chunk,
113
+ embedding: embeddings[index]
114
+ }));
115
+ }
116
+
117
+ /**
118
+ * Create LangChain documents from text with metadata
119
+ */
120
+ async createDocuments(text: string, metadata: Record<string, any> = {}): Promise<Document[]> {
121
+ this.ensureInitialized();
122
+ const chunks = await this.chunkText(text);
123
+
124
+ return chunks.map((chunk, index) => new Document({
125
+ pageContent: chunk,
126
+ metadata: {
127
+ ...metadata,
128
+ chunkIndex: index,
129
+ id: `${metadata.documentId || 'doc'}_chunk_${index}`
130
+ }
131
+ }));
132
+ }
133
+
134
+ /**
135
+ * Get the underlying LangChain embeddings instance
136
+ */
137
+ getEmbeddingsInstance(): Embeddings {
138
+ this.ensureInitialized();
139
+ return this.embeddings;
140
+ }
141
+
142
+ /**
143
+ * Update configuration and reinitialize
144
+ */
145
+ updateConfig(newConfig: Partial<IEmbeddingConfig>): void {
146
+ this.config = { ...this.config, ...newConfig };
147
+ this.initializeEmbeddings();
148
+ }
149
+
150
+ /**
151
+ * Calculate cosine similarity between two vectors
152
+ */
153
+ cosineSimilarity(vecA: number[], vecB: number[]): number {
154
+ if (vecA.length !== vecB.length) {
155
+ throw new Error('Vectors must have the same length');
156
+ }
157
+
158
+ let dotProduct = 0;
159
+ let normA = 0;
160
+ let normB = 0;
161
+
162
+ for (let i = 0; i < vecA.length; i++) {
163
+ dotProduct += vecA[i] * vecB[i];
164
+ normA += vecA[i] * vecA[i];
165
+ normB += vecB[i] * vecB[i];
166
+ }
167
+
168
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
169
+ }
170
+
171
+ /**
172
+ * Ensure the service is initialized
173
+ */
174
+ private ensureInitialized(): void {
175
+ if (!this.isInitialized) {
176
+ throw new Error('LangChainEmbeddingService not initialized. Call initialize() first.');
177
+ }
178
+ }
179
+
180
+ /**
181
+ * Create a vector store for similarity search like the LangChain tutorial
182
+ * This allows us to use vectorStore.similaritySearch() just like in the tutorial
183
+ */
184
+ async createVectorStore(documents: Document[], config?: { type?: 'memory' | 'faiss'; persistPath?: string }): Promise<RWSVectorStore> {
185
+ this.ensureInitialized();
186
+
187
+ const vectorStoreConfig = {
188
+ type: config?.type || 'memory' as const,
189
+ persistPath: config?.persistPath
190
+ };
191
+
192
+ const vectorStore = await new RWSVectorStore(
193
+ documents,
194
+ this.embeddings,
195
+ vectorStoreConfig
196
+ ).init();
197
+
198
+ return vectorStore;
199
+ }
200
+
201
+ /**
202
+ * Perform similarity search on a vector store (tutorial-style interface)
203
+ * Usage: const results = await embeddingService.similaritySearch(vectorStore, query, k)
204
+ */
205
+ async similaritySearch(vectorStore: RWSVectorStore, query: string, k: number = 4): Promise<Document[]> {
206
+ this.ensureInitialized();
207
+
208
+ // Use RWSVectorStore's similarity search (returns documents without scores)
209
+ const resultsWithScores = await vectorStore.similaritySearchWithScore(query, k);
210
+ return resultsWithScores.map(([doc, _score]) => doc);
211
+ }
212
+
213
+ /**
214
+ * Perform similarity search with scores (tutorial-style interface)
215
+ * Usage: const results = await embeddingService.similaritySearchWithScore(vectorStore, query, k)
216
+ */
217
+ async similaritySearchWithScore(vectorStore: RWSVectorStore, query: string, k: number = 4): Promise<[Document, number][]> {
218
+ this.ensureInitialized();
219
+
220
+ return await vectorStore.similaritySearchWithScore(query, k);
221
+ }
222
+ }