@hazeljs/rag 0.2.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +379 -0
- package/dist/agentic/agentic-rag.service.d.ts +49 -0
- package/dist/agentic/agentic-rag.service.d.ts.map +1 -0
- package/dist/agentic/agentic-rag.service.js +149 -0
- package/dist/agentic/agentic-rag.service.js.map +1 -0
- package/dist/agentic/decorators/active-learning.decorator.d.ts +19 -0
- package/dist/agentic/decorators/active-learning.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/active-learning.decorator.js +98 -0
- package/dist/agentic/decorators/active-learning.decorator.js.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +16 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js +114 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -0
- package/dist/agentic/decorators/cached.decorator.d.ts +18 -0
- package/dist/agentic/decorators/cached.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/cached.decorator.js +93 -0
- package/dist/agentic/decorators/cached.decorator.js.map +1 -0
- package/dist/agentic/decorators/context-aware.decorator.d.ts +16 -0
- package/dist/agentic/decorators/context-aware.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/context-aware.decorator.js +169 -0
- package/dist/agentic/decorators/context-aware.decorator.js.map +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts +15 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.js +146 -0
- package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts +14 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/hyde.decorator.js +90 -0
- package/dist/agentic/decorators/hyde.decorator.js.map +1 -0
- package/dist/agentic/decorators/index.d.ts +16 -0
- package/dist/agentic/decorators/index.d.ts.map +1 -0
- package/dist/agentic/decorators/index.js +32 -0
- package/dist/agentic/decorators/index.js.map +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts +14 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.js +120 -0
- package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts +19 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/query-planner.decorator.js +227 -0
- package/dist/agentic/decorators/query-planner.decorator.js.map +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts +15 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.js +140 -0
- package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts +18 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/self-reflective.decorator.js +210 -0
- package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -0
- package/dist/agentic/decorators/source-verification.decorator.d.ts +15 -0
- package/dist/agentic/decorators/source-verification.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/source-verification.decorator.js +121 -0
- package/dist/agentic/decorators/source-verification.decorator.js.map +1 -0
- package/dist/agentic/index.d.ts +9 -0
- package/dist/agentic/index.d.ts.map +1 -0
- package/dist/agentic/index.js +25 -0
- package/dist/agentic/index.js.map +1 -0
- package/dist/agentic/types.d.ts +209 -0
- package/dist/agentic/types.d.ts.map +1 -0
- package/dist/agentic/types.js +7 -0
- package/dist/agentic/types.js.map +1 -0
- package/dist/decorators/embeddable.decorator.d.ts +31 -0
- package/dist/decorators/embeddable.decorator.d.ts.map +1 -0
- package/dist/decorators/embeddable.decorator.js +44 -0
- package/dist/decorators/embeddable.decorator.js.map +1 -0
- package/dist/decorators/rag.decorator.d.ts +58 -0
- package/dist/decorators/rag.decorator.d.ts.map +1 -0
- package/dist/decorators/rag.decorator.js +78 -0
- package/dist/decorators/rag.decorator.js.map +1 -0
- package/dist/decorators/semantic-search.decorator.d.ts +69 -0
- package/dist/decorators/semantic-search.decorator.d.ts.map +1 -0
- package/dist/decorators/semantic-search.decorator.js +116 -0
- package/dist/decorators/semantic-search.decorator.js.map +1 -0
- package/dist/embeddings/cohere-embeddings.d.ts +33 -0
- package/dist/embeddings/cohere-embeddings.d.ts.map +1 -0
- package/dist/embeddings/cohere-embeddings.js +91 -0
- package/dist/embeddings/cohere-embeddings.js.map +1 -0
- package/dist/embeddings/openai-embeddings.d.ts +21 -0
- package/dist/embeddings/openai-embeddings.d.ts.map +1 -0
- package/dist/embeddings/openai-embeddings.js +53 -0
- package/dist/embeddings/openai-embeddings.js.map +1 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +54 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/index.d.ts +11 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +31 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/memory-manager.d.ts +96 -0
- package/dist/memory/memory-manager.d.ts.map +1 -0
- package/dist/memory/memory-manager.js +369 -0
- package/dist/memory/memory-manager.js.map +1 -0
- package/dist/memory/memory-store.interface.d.ts +73 -0
- package/dist/memory/memory-store.interface.d.ts.map +1 -0
- package/dist/memory/memory-store.interface.js +6 -0
- package/dist/memory/memory-store.interface.js.map +1 -0
- package/dist/memory/stores/buffer-memory.d.ts +47 -0
- package/dist/memory/stores/buffer-memory.d.ts.map +1 -0
- package/dist/memory/stores/buffer-memory.js +280 -0
- package/dist/memory/stores/buffer-memory.js.map +1 -0
- package/dist/memory/stores/hybrid-memory.d.ts +49 -0
- package/dist/memory/stores/hybrid-memory.d.ts.map +1 -0
- package/dist/memory/stores/hybrid-memory.js +194 -0
- package/dist/memory/stores/hybrid-memory.js.map +1 -0
- package/dist/memory/stores/vector-memory.d.ts +48 -0
- package/dist/memory/stores/vector-memory.d.ts.map +1 -0
- package/dist/memory/stores/vector-memory.js +312 -0
- package/dist/memory/stores/vector-memory.js.map +1 -0
- package/dist/memory/types.d.ts +119 -0
- package/dist/memory/types.d.ts.map +1 -0
- package/dist/memory/types.js +18 -0
- package/dist/memory/types.js.map +1 -0
- package/dist/rag-pipeline-with-memory.d.ts +68 -0
- package/dist/rag-pipeline-with-memory.d.ts.map +1 -0
- package/dist/rag-pipeline-with-memory.js +186 -0
- package/dist/rag-pipeline-with-memory.js.map +1 -0
- package/dist/rag-pipeline.d.ts +59 -0
- package/dist/rag-pipeline.d.ts.map +1 -0
- package/dist/rag-pipeline.js +181 -0
- package/dist/rag-pipeline.js.map +1 -0
- package/dist/rag.module.d.ts +26 -0
- package/dist/rag.module.d.ts.map +1 -0
- package/dist/rag.module.js +40 -0
- package/dist/rag.module.js.map +1 -0
- package/dist/rag.service.d.ts +95 -0
- package/dist/rag.service.d.ts.map +1 -0
- package/dist/rag.service.js +174 -0
- package/dist/rag.service.js.map +1 -0
- package/dist/retrieval/bm25.d.ts +57 -0
- package/dist/retrieval/bm25.d.ts.map +1 -0
- package/dist/retrieval/bm25.js +106 -0
- package/dist/retrieval/bm25.js.map +1 -0
- package/dist/retrieval/hybrid-search.d.ts +48 -0
- package/dist/retrieval/hybrid-search.d.ts.map +1 -0
- package/dist/retrieval/hybrid-search.js +123 -0
- package/dist/retrieval/hybrid-search.js.map +1 -0
- package/dist/retrieval/multi-query.d.ts +38 -0
- package/dist/retrieval/multi-query.d.ts.map +1 -0
- package/dist/retrieval/multi-query.js +135 -0
- package/dist/retrieval/multi-query.js.map +1 -0
- package/dist/text-splitters/recursive-text-splitter.d.ts +21 -0
- package/dist/text-splitters/recursive-text-splitter.d.ts.map +1 -0
- package/dist/text-splitters/recursive-text-splitter.js +95 -0
- package/dist/text-splitters/recursive-text-splitter.js.map +1 -0
- package/dist/types/index.d.ts +144 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +16 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +16 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +58 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/vector-stores/chroma.store.d.ts +42 -0
- package/dist/vector-stores/chroma.store.d.ts.map +1 -0
- package/dist/vector-stores/chroma.store.js +242 -0
- package/dist/vector-stores/chroma.store.js.map +1 -0
- package/dist/vector-stores/memory-vector-store.d.ts +20 -0
- package/dist/vector-stores/memory-vector-store.d.ts.map +1 -0
- package/dist/vector-stores/memory-vector-store.js +94 -0
- package/dist/vector-stores/memory-vector-store.js.map +1 -0
- package/dist/vector-stores/pinecone.store.d.ts +34 -0
- package/dist/vector-stores/pinecone.store.d.ts.map +1 -0
- package/dist/vector-stores/pinecone.store.js +146 -0
- package/dist/vector-stores/pinecone.store.js.map +1 -0
- package/dist/vector-stores/qdrant.store.d.ts +31 -0
- package/dist/vector-stores/qdrant.store.d.ts.map +1 -0
- package/dist/vector-stores/qdrant.store.js +173 -0
- package/dist/vector-stores/qdrant.store.js.map +1 -0
- package/dist/vector-stores/weaviate.store.d.ts +37 -0
- package/dist/vector-stores/weaviate.store.d.ts.map +1 -0
- package/dist/vector-stores/weaviate.store.js +226 -0
- package/dist/vector-stores/weaviate.store.js.map +1 -0
- package/package.json +87 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* RAG Service
|
|
4
|
+
* Main service for RAG operations in HazelJS
|
|
5
|
+
*/
|
|
6
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
7
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
8
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
9
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
10
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
11
|
+
};
|
|
12
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
13
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
14
|
+
};
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.RAGService = void 0;
|
|
17
|
+
const core_1 = require("@hazeljs/core");
|
|
18
|
+
const rag_pipeline_1 = require("./rag-pipeline");
|
|
19
|
+
const types_1 = require("./types");
|
|
20
|
+
let RAGService = class RAGService {
|
|
21
|
+
constructor(config) {
|
|
22
|
+
this.config = config;
|
|
23
|
+
this.pipeline = new rag_pipeline_1.RAGPipeline({
|
|
24
|
+
vectorStore: config.vectorStore,
|
|
25
|
+
embeddingProvider: config.embeddingProvider,
|
|
26
|
+
textSplitter: config.textSplitter,
|
|
27
|
+
topK: config.topK,
|
|
28
|
+
}, config.llmFunction);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Initialize the RAG service
|
|
32
|
+
*/
|
|
33
|
+
async initialize() {
|
|
34
|
+
await this.pipeline.initialize();
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Index a document or multiple documents
|
|
38
|
+
*/
|
|
39
|
+
async index(documents) {
|
|
40
|
+
const docs = Array.isArray(documents) ? documents : [documents];
|
|
41
|
+
return this.pipeline.addDocuments(docs);
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Search for similar documents
|
|
45
|
+
*/
|
|
46
|
+
async search(query, options) {
|
|
47
|
+
const { strategy, ...queryOptions } = options || {};
|
|
48
|
+
return this.pipeline.retrieve(query, queryOptions, strategy);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Retrieve relevant context for a query
|
|
52
|
+
*/
|
|
53
|
+
async retrieve(query, options) {
|
|
54
|
+
return this.pipeline.retrieve(query, options);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Generate an answer using RAG
|
|
58
|
+
*/
|
|
59
|
+
async generate(query, context) {
|
|
60
|
+
if (!this.config.llmFunction) {
|
|
61
|
+
throw new Error('LLM function not configured');
|
|
62
|
+
}
|
|
63
|
+
const contextStr = typeof context === 'string'
|
|
64
|
+
? context
|
|
65
|
+
: context.map((r, idx) => `[${idx + 1}] ${r.content}`).join('\n\n');
|
|
66
|
+
const prompt = `Based on the following context, answer the question.
|
|
67
|
+
|
|
68
|
+
Context:
|
|
69
|
+
${contextStr}
|
|
70
|
+
|
|
71
|
+
Question: ${query}
|
|
72
|
+
|
|
73
|
+
Answer:`;
|
|
74
|
+
return this.config.llmFunction(prompt);
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Full RAG pipeline: retrieve + generate
|
|
78
|
+
*/
|
|
79
|
+
async ask(query, options) {
|
|
80
|
+
const sources = await this.retrieve(query, options);
|
|
81
|
+
const answer = await this.generate(query, sources);
|
|
82
|
+
return { answer, sources };
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Multi-query RAG
|
|
86
|
+
* Generates multiple search queries and combines results
|
|
87
|
+
*/
|
|
88
|
+
async multiQuery(question, _numQueries = 3) {
|
|
89
|
+
// TODO: Implement query generation using LLM
|
|
90
|
+
// For now, just use the original query
|
|
91
|
+
return this.search(question, { topK: 10 });
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Compress retrieved context
|
|
95
|
+
*/
|
|
96
|
+
async compress(documents, _query) {
|
|
97
|
+
// TODO: Implement context compression
|
|
98
|
+
// For now, return top results
|
|
99
|
+
return documents.slice(0, 5);
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Self-query with automatic metadata extraction
|
|
103
|
+
*/
|
|
104
|
+
async selfQuery(naturalLanguageQuery) {
|
|
105
|
+
// TODO: Implement metadata extraction from natural language
|
|
106
|
+
// For now, just do regular search
|
|
107
|
+
return this.search(naturalLanguageQuery);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Conversational RAG with session memory
|
|
111
|
+
*/
|
|
112
|
+
async chat(message, _sessionId) {
|
|
113
|
+
// TODO: Implement conversation memory
|
|
114
|
+
return this.ask(message);
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Hybrid search combining vector and keyword search
|
|
118
|
+
*/
|
|
119
|
+
async hybridSearch(query, options) {
|
|
120
|
+
return this.search(query, { ...options, strategy: types_1.RetrievalStrategy.HYBRID });
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Rerank search results
|
|
124
|
+
*/
|
|
125
|
+
async rerank(results, _query, topN) {
|
|
126
|
+
// TODO: Implement reranking with external model
|
|
127
|
+
// For now, return top N results
|
|
128
|
+
return results.slice(0, topN || 5);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Ensemble retrieval combining multiple methods
|
|
132
|
+
*/
|
|
133
|
+
async ensemble(query, _methods, _weights) {
|
|
134
|
+
// TODO: Implement ensemble retrieval
|
|
135
|
+
return this.search(query);
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Time-weighted retrieval favoring recent documents
|
|
139
|
+
*/
|
|
140
|
+
async timeWeighted(query, decayRate = 0.01) {
|
|
141
|
+
const results = await this.search(query, { includeMetadata: true });
|
|
142
|
+
// Apply time decay to scores
|
|
143
|
+
const now = Date.now();
|
|
144
|
+
return results
|
|
145
|
+
.map((result) => {
|
|
146
|
+
const timestamp = result.metadata?.timestamp || now;
|
|
147
|
+
const age = (now - Number(timestamp)) / (1000 * 60 * 60 * 24); // days
|
|
148
|
+
const timeWeight = Math.exp(-decayRate * age);
|
|
149
|
+
return {
|
|
150
|
+
...result,
|
|
151
|
+
score: result.score * timeWeight,
|
|
152
|
+
};
|
|
153
|
+
})
|
|
154
|
+
.sort((a, b) => b.score - a.score);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Delete documents by IDs
|
|
158
|
+
*/
|
|
159
|
+
async delete(ids) {
|
|
160
|
+
await this.pipeline.deleteDocuments(ids);
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Clear all documents
|
|
164
|
+
*/
|
|
165
|
+
async clear() {
|
|
166
|
+
await this.pipeline.clear();
|
|
167
|
+
}
|
|
168
|
+
};
|
|
169
|
+
exports.RAGService = RAGService;
|
|
170
|
+
exports.RAGService = RAGService = __decorate([
|
|
171
|
+
(0, core_1.Injectable)(),
|
|
172
|
+
__metadata("design:paramtypes", [Object])
|
|
173
|
+
], RAGService);
|
|
174
|
+
//# sourceMappingURL=rag.service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rag.service.js","sourceRoot":"","sources":["../src/rag.service.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;;;;;;;;;;AAEH,wCAA2C;AAC3C,iDAA0D;AAC1D,mCAQiB;AAWV,IAAM,UAAU,GAAhB,MAAM,UAAU;IAGrB,YAAoB,MAAwB;QAAxB,WAAM,GAAN,MAAM,CAAkB;QAC1C,IAAI,CAAC,QAAQ,GAAG,IAAI,0BAAW,CAC7B;YACE,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,EACD,MAAM,CAAC,WAAW,CACnB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC;IACnC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,SAAgC;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAChE,OAAO,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,KAAa,EACb,OAAyD;QAEzD,MAAM,EAAE,QAAQ,EAAE,GAAG,YAAY,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QACpD,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC;IAC/D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAa,EAAE,OAAsB;QAClD,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAa,EAAE,OAAgC;QAC5D,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;QACjD,CAAC;QAED,MAAM,UAAU,GACd,OAAO,OAAO,KAAK,QAAQ;YACzB,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAExE,MAAM,MAAM,GAAG;;;EAGjB,UAAU;;YAEA,KAAK;;QAET,CAAC;QAEL,OAAO,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,GAAG,CACP,KAAa,EACb,OAAsB;QAEtB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACnD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;IAC7B,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,cAAsB,CAAC;QACxD,6CAA6C;QAC7C,uCAAuC;QACvC,OAAO,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,SAAyB,EAAE,MAAc;QACtD,sCAAsC;QACtC,8BAA8B;QAC9B,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,oBAA4B;QAC1C,4DAA4D;QAC5D,kCAAkC;QAClC,OAAO,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CACR,OAAe,EACf,UAAkB;QAElB,sCAAsC;QACtC,OAAO,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAChB,KAAa,EACb,OAA0E;QAE1E,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,yBAAiB,CAAC,MAAM,EAAE,CAAC,CAAC;IAChF,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAuB,EAAE,MAAc,EAAE,IAAa;QACjE,gDAAgD;QAChD,gCAAgC;QAChC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAa,EACb,QAA6B,EAC7B,QAAmB;QAEnB,qCAAqC;QACrC,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,KAAa,EAAE,YAAoB,IAAI;QACxD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC;QAEpE,6BAA6B;QAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,OAAO,OAAO;aACX,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;YACd,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,EAAE,SAAS,IAAI,GAAG,CAAC;YACpD,MAAM,GAAG,GAAG,CAAC,GAAG,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,OAAO;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;YAE9C,OAAO;gBACL,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK,GAAG,UAAU;aACjC,CAAC;QACJ,CAAC,CAAC;aACD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,GAAa;QACxB,MAAM,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IAC9B,CAAC;CACF,CAAA;AA9LY,gCAAU;qBAAV,UAAU;IADtB,IAAA,iBAAU,GAAE;;GACA,UAAU,CA8LtB"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 (Best Matching 25) - Keyword-based ranking algorithm
|
|
3
|
+
* Used for hybrid search combining with vector similarity
|
|
4
|
+
*/
|
|
5
|
+
export interface BM25Document {
|
|
6
|
+
id: string;
|
|
7
|
+
content: string;
|
|
8
|
+
tokens: string[];
|
|
9
|
+
}
|
|
10
|
+
export interface BM25Config {
|
|
11
|
+
k1?: number;
|
|
12
|
+
b?: number;
|
|
13
|
+
}
|
|
14
|
+
export declare class BM25 {
|
|
15
|
+
private documents;
|
|
16
|
+
private idf;
|
|
17
|
+
private avgDocLength;
|
|
18
|
+
private k1;
|
|
19
|
+
private b;
|
|
20
|
+
constructor(config?: BM25Config);
|
|
21
|
+
/**
|
|
22
|
+
* Add documents to the BM25 index
|
|
23
|
+
*/
|
|
24
|
+
addDocuments(documents: BM25Document[]): void;
|
|
25
|
+
/**
|
|
26
|
+
* Search documents using BM25 scoring
|
|
27
|
+
*/
|
|
28
|
+
search(query: string, topK?: number): Array<{
|
|
29
|
+
id: string;
|
|
30
|
+
score: number;
|
|
31
|
+
}>;
|
|
32
|
+
/**
|
|
33
|
+
* Calculate BM25 score for a document given query tokens
|
|
34
|
+
*/
|
|
35
|
+
private calculateScore;
|
|
36
|
+
/**
|
|
37
|
+
* Calculate Inverse Document Frequency for all terms
|
|
38
|
+
*/
|
|
39
|
+
private calculateIDF;
|
|
40
|
+
/**
|
|
41
|
+
* Calculate average document length
|
|
42
|
+
*/
|
|
43
|
+
private calculateAvgDocLength;
|
|
44
|
+
/**
|
|
45
|
+
* Calculate term frequency in document
|
|
46
|
+
*/
|
|
47
|
+
private termFrequency;
|
|
48
|
+
/**
|
|
49
|
+
* Tokenize text into terms
|
|
50
|
+
*/
|
|
51
|
+
private tokenize;
|
|
52
|
+
/**
|
|
53
|
+
* Clear all documents
|
|
54
|
+
*/
|
|
55
|
+
clear(): void;
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=bm25.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/retrieval/bm25.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED,qBAAa,IAAI;IACf,OAAO,CAAC,SAAS,CAAsB;IACvC,OAAO,CAAC,GAAG,CAAkC;IAC7C,OAAO,CAAC,YAAY,CAAa;IACjC,OAAO,CAAC,EAAE,CAAS;IACnB,OAAO,CAAC,CAAC,CAAS;gBAEN,MAAM,GAAE,UAAe;IAKnC;;OAEG;IACH,YAAY,CAAC,SAAS,EAAE,YAAY,EAAE,GAAG,IAAI;IAM7C;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,GAAG,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAa7E;;OAEG;IACH,OAAO,CAAC,cAAc;IAkBtB;;OAEG;IACH,OAAO,CAAC,YAAY;IAoBpB;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAK7B;;OAEG;IACH,OAAO,CAAC,aAAa;IAIrB;;OAEG;IACH,OAAO,CAAC,QAAQ;IAQhB;;OAEG;IACH,KAAK,IAAI,IAAI;CAKd"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* BM25 (Best Matching 25) - Keyword-based ranking algorithm
|
|
4
|
+
* Used for hybrid search combining with vector similarity
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.BM25 = void 0;
|
|
8
|
+
class BM25 {
|
|
9
|
+
constructor(config = {}) {
|
|
10
|
+
this.documents = [];
|
|
11
|
+
this.idf = new Map();
|
|
12
|
+
this.avgDocLength = 0;
|
|
13
|
+
this.k1 = config.k1 || 1.5;
|
|
14
|
+
this.b = config.b || 0.75;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Add documents to the BM25 index
|
|
18
|
+
*/
|
|
19
|
+
addDocuments(documents) {
|
|
20
|
+
this.documents.push(...documents);
|
|
21
|
+
this.calculateIDF();
|
|
22
|
+
this.calculateAvgDocLength();
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Search documents using BM25 scoring
|
|
26
|
+
*/
|
|
27
|
+
search(query, topK = 5) {
|
|
28
|
+
const queryTokens = this.tokenize(query);
|
|
29
|
+
const scores = [];
|
|
30
|
+
for (const doc of this.documents) {
|
|
31
|
+
const score = this.calculateScore(queryTokens, doc);
|
|
32
|
+
scores.push({ id: doc.id, score });
|
|
33
|
+
}
|
|
34
|
+
// Sort by score descending and return top K
|
|
35
|
+
return scores.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Calculate BM25 score for a document given query tokens
|
|
39
|
+
*/
|
|
40
|
+
calculateScore(queryTokens, doc) {
|
|
41
|
+
let score = 0;
|
|
42
|
+
for (const token of queryTokens) {
|
|
43
|
+
const idf = this.idf.get(token) || 0;
|
|
44
|
+
const tf = this.termFrequency(token, doc.tokens);
|
|
45
|
+
const docLength = doc.tokens.length;
|
|
46
|
+
// BM25 formula
|
|
47
|
+
const numerator = tf * (this.k1 + 1);
|
|
48
|
+
const denominator = tf + this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength));
|
|
49
|
+
score += idf * (numerator / denominator);
|
|
50
|
+
}
|
|
51
|
+
return score;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Calculate Inverse Document Frequency for all terms
|
|
55
|
+
*/
|
|
56
|
+
calculateIDF() {
|
|
57
|
+
const termDocCount = new Map();
|
|
58
|
+
const N = this.documents.length;
|
|
59
|
+
// Count documents containing each term
|
|
60
|
+
for (const doc of this.documents) {
|
|
61
|
+
const uniqueTokens = new Set(doc.tokens);
|
|
62
|
+
for (const token of uniqueTokens) {
|
|
63
|
+
termDocCount.set(token, (termDocCount.get(token) || 0) + 1);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Calculate IDF for each term
|
|
67
|
+
for (const [term, docCount] of termDocCount.entries()) {
|
|
68
|
+
// IDF = log((N - df + 0.5) / (df + 0.5) + 1)
|
|
69
|
+
const idf = Math.log((N - docCount + 0.5) / (docCount + 0.5) + 1);
|
|
70
|
+
this.idf.set(term, idf);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Calculate average document length
|
|
75
|
+
*/
|
|
76
|
+
calculateAvgDocLength() {
|
|
77
|
+
const totalLength = this.documents.reduce((sum, doc) => sum + doc.tokens.length, 0);
|
|
78
|
+
this.avgDocLength = totalLength / this.documents.length || 1;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Calculate term frequency in document
|
|
82
|
+
*/
|
|
83
|
+
termFrequency(term, tokens) {
|
|
84
|
+
return tokens.filter((t) => t === term).length;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Tokenize text into terms
|
|
88
|
+
*/
|
|
89
|
+
tokenize(text) {
|
|
90
|
+
return text
|
|
91
|
+
.toLowerCase()
|
|
92
|
+
.replace(/[^\w\s]/g, ' ')
|
|
93
|
+
.split(/\s+/)
|
|
94
|
+
.filter((token) => token.length > 0);
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Clear all documents
|
|
98
|
+
*/
|
|
99
|
+
clear() {
|
|
100
|
+
this.documents = [];
|
|
101
|
+
this.idf.clear();
|
|
102
|
+
this.avgDocLength = 0;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
exports.BM25 = BM25;
|
|
106
|
+
//# sourceMappingURL=bm25.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/retrieval/bm25.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAaH,MAAa,IAAI;IAOf,YAAY,SAAqB,EAAE;QAN3B,cAAS,GAAmB,EAAE,CAAC;QAC/B,QAAG,GAAwB,IAAI,GAAG,EAAE,CAAC;QACrC,iBAAY,GAAW,CAAC,CAAC;QAK/B,IAAI,CAAC,EAAE,GAAG,MAAM,CAAC,EAAE,IAAI,GAAG,CAAC;QAC3B,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,IAAI,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,SAAyB;QACpC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;QAClC,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,IAAI,CAAC,qBAAqB,EAAE,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAa,EAAE,OAAe,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,MAAM,GAAyC,EAAE,CAAC;QAExD,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;QACrC,CAAC;QAED,4CAA4C;QAC5C,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACjE,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,WAAqB,EAAE,GAAiB;QAC7D,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC;YAEpC,eAAe;YACf,MAAM,SAAS,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;YACrC,MAAM,WAAW,GAAG,EAAE,GAAG,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;YAE3F,KAAK,IAAI,GAAG,GAAG,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;QAC3C,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,YAAY;QAClB,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,MAAM,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;QAEhC,uCAAuC;QACvC,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACjC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACzC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;gBACjC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC9D,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;YACtD,6CAA6C;YAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YAClE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED;;OAEG;IACK,qBAAqB;QAC3B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACpF,IAAI,CAAC,YAAY,GAAG,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,IAAI,CAAC,CAAC;IAC/D,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY,EAAE,MAAgB;QAClD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;IACjD,CAAC;IAED;;OAEG;IACK,QAAQ,CAAC,IAAY;QAC3B,OAAO,IAAI;aACR,WAAW,EAAE;aACb,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;aACxB,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC;QACjB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;IACxB,CAAC;CACF;AAnHD,oBAmHC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Search - Combines vector similarity and keyword search (BM25)
|
|
3
|
+
*/
|
|
4
|
+
import { VectorStore, SearchResult, QueryOptions } from '../types';
|
|
5
|
+
export interface HybridSearchConfig {
|
|
6
|
+
vectorWeight?: number;
|
|
7
|
+
keywordWeight?: number;
|
|
8
|
+
bm25Config?: {
|
|
9
|
+
k1?: number;
|
|
10
|
+
b?: number;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
export declare class HybridSearchRetrieval {
|
|
14
|
+
private vectorStore;
|
|
15
|
+
private bm25;
|
|
16
|
+
private vectorWeight;
|
|
17
|
+
private keywordWeight;
|
|
18
|
+
private documentsIndexed;
|
|
19
|
+
constructor(vectorStore: VectorStore, config?: HybridSearchConfig);
|
|
20
|
+
/**
|
|
21
|
+
* Index documents for hybrid search
|
|
22
|
+
*/
|
|
23
|
+
indexDocuments(documents: Array<{
|
|
24
|
+
id: string;
|
|
25
|
+
content: string;
|
|
26
|
+
}>): Promise<void>;
|
|
27
|
+
/**
|
|
28
|
+
* Perform hybrid search combining vector and keyword search
|
|
29
|
+
*/
|
|
30
|
+
search(query: string, options?: QueryOptions): Promise<SearchResult[]>;
|
|
31
|
+
/**
|
|
32
|
+
* Fuse vector and keyword results using weighted scores
|
|
33
|
+
*/
|
|
34
|
+
private fuseResults;
|
|
35
|
+
/**
|
|
36
|
+
* Normalize scores to 0-1 range using min-max normalization
|
|
37
|
+
*/
|
|
38
|
+
private normalizeScores;
|
|
39
|
+
/**
|
|
40
|
+
* Tokenize text
|
|
41
|
+
*/
|
|
42
|
+
private tokenize;
|
|
43
|
+
/**
|
|
44
|
+
* Clear indexed documents
|
|
45
|
+
*/
|
|
46
|
+
clear(): void;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=hybrid-search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-search.d.ts","sourceRoot":"","sources":["../../src/retrieval/hybrid-search.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAGnE,MAAM,WAAW,kBAAkB;IACjC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE;QACX,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,CAAC,CAAC,EAAE,MAAM,CAAC;KACZ,CAAC;CACH;AAED,qBAAa,qBAAqB;IAChC,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,IAAI,CAAO;IACnB,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,gBAAgB,CAAkB;gBAE9B,WAAW,EAAE,WAAW,EAAE,MAAM,GAAE,kBAAuB;IAarE;;OAEG;IACG,cAAc,CAAC,SAAS,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAYtF;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAuB5E;;OAEG;IACH,OAAO,CAAC,WAAW;IAyCnB;;OAEG;IACH,OAAO,CAAC,eAAe;IAsBvB;;OAEG;IACH,OAAO,CAAC,QAAQ;IAQhB;;OAEG;IACH,KAAK,IAAI,IAAI;CAId"}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Hybrid Search - Combines vector similarity and keyword search (BM25)
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.HybridSearchRetrieval = void 0;
|
|
7
|
+
const bm25_1 = require("./bm25");
|
|
8
|
+
class HybridSearchRetrieval {
|
|
9
|
+
constructor(vectorStore, config = {}) {
|
|
10
|
+
this.documentsIndexed = false;
|
|
11
|
+
this.vectorStore = vectorStore;
|
|
12
|
+
this.vectorWeight = config.vectorWeight || 0.7;
|
|
13
|
+
this.keywordWeight = config.keywordWeight || 0.3;
|
|
14
|
+
this.bm25 = new bm25_1.BM25(config.bm25Config);
|
|
15
|
+
// Validate weights sum to 1
|
|
16
|
+
const totalWeight = this.vectorWeight + this.keywordWeight;
|
|
17
|
+
if (Math.abs(totalWeight - 1.0) > 0.01) {
|
|
18
|
+
throw new Error('Vector and keyword weights must sum to 1.0');
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Index documents for hybrid search
|
|
23
|
+
*/
|
|
24
|
+
async indexDocuments(documents) {
|
|
25
|
+
// Prepare BM25 documents
|
|
26
|
+
const bm25Docs = documents.map((doc) => ({
|
|
27
|
+
id: doc.id,
|
|
28
|
+
content: doc.content,
|
|
29
|
+
tokens: this.tokenize(doc.content),
|
|
30
|
+
}));
|
|
31
|
+
this.bm25.addDocuments(bm25Docs);
|
|
32
|
+
this.documentsIndexed = true;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Perform hybrid search combining vector and keyword search
|
|
36
|
+
*/
|
|
37
|
+
async search(query, options) {
|
|
38
|
+
if (!this.documentsIndexed) {
|
|
39
|
+
throw new Error('Documents must be indexed before searching');
|
|
40
|
+
}
|
|
41
|
+
const topK = options?.topK || 10;
|
|
42
|
+
// Perform vector search
|
|
43
|
+
const vectorResults = await this.vectorStore.search(query, {
|
|
44
|
+
...options,
|
|
45
|
+
topK: topK * 2, // Get more results for fusion
|
|
46
|
+
});
|
|
47
|
+
// Perform BM25 keyword search
|
|
48
|
+
const keywordResults = this.bm25.search(query, topK * 2);
|
|
49
|
+
// Combine and re-rank results
|
|
50
|
+
const hybridResults = this.fuseResults(vectorResults, keywordResults);
|
|
51
|
+
// Return top K results
|
|
52
|
+
return hybridResults.slice(0, topK);
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Fuse vector and keyword results using weighted scores
|
|
56
|
+
*/
|
|
57
|
+
fuseResults(vectorResults, keywordResults) {
|
|
58
|
+
// Normalize scores to 0-1 range
|
|
59
|
+
const normalizedVector = this.normalizeScores(vectorResults.map((r) => ({ id: r.id, score: r.score })));
|
|
60
|
+
const normalizedKeyword = this.normalizeScores(keywordResults);
|
|
61
|
+
// Create score maps
|
|
62
|
+
const vectorScores = new Map(normalizedVector.map((r) => [r.id, r.score]));
|
|
63
|
+
const keywordScores = new Map(normalizedKeyword.map((r) => [r.id, r.score]));
|
|
64
|
+
// Get all unique document IDs
|
|
65
|
+
const allIds = new Set([...vectorResults.map((r) => r.id), ...keywordResults.map((r) => r.id)]);
|
|
66
|
+
// Calculate hybrid scores
|
|
67
|
+
const hybridScores = [];
|
|
68
|
+
for (const id of allIds) {
|
|
69
|
+
const vectorScore = vectorScores.get(id) || 0;
|
|
70
|
+
const keywordScore = keywordScores.get(id) || 0;
|
|
71
|
+
const hybridScore = this.vectorWeight * vectorScore + this.keywordWeight * keywordScore;
|
|
72
|
+
// Find the original result
|
|
73
|
+
const result = vectorResults.find((r) => r.id === id);
|
|
74
|
+
if (result) {
|
|
75
|
+
hybridScores.push({
|
|
76
|
+
id,
|
|
77
|
+
score: hybridScore,
|
|
78
|
+
result: { ...result, score: hybridScore },
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// Sort by hybrid score
|
|
83
|
+
return hybridScores.sort((a, b) => b.score - a.score).map((item) => item.result);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Normalize scores to 0-1 range using min-max normalization
|
|
87
|
+
*/
|
|
88
|
+
normalizeScores(results) {
|
|
89
|
+
if (results.length === 0) {
|
|
90
|
+
return [];
|
|
91
|
+
}
|
|
92
|
+
const scores = results.map((r) => r.score);
|
|
93
|
+
const min = Math.min(...scores);
|
|
94
|
+
const max = Math.max(...scores);
|
|
95
|
+
const range = max - min;
|
|
96
|
+
if (range === 0) {
|
|
97
|
+
return results.map((r) => ({ id: r.id, score: 1.0 }));
|
|
98
|
+
}
|
|
99
|
+
return results.map((r) => ({
|
|
100
|
+
id: r.id,
|
|
101
|
+
score: (r.score - min) / range,
|
|
102
|
+
}));
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Tokenize text
|
|
106
|
+
*/
|
|
107
|
+
tokenize(text) {
|
|
108
|
+
return text
|
|
109
|
+
.toLowerCase()
|
|
110
|
+
.replace(/[^\w\s]/g, ' ')
|
|
111
|
+
.split(/\s+/)
|
|
112
|
+
.filter((token) => token.length > 0);
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Clear indexed documents
|
|
116
|
+
*/
|
|
117
|
+
clear() {
|
|
118
|
+
this.bm25.clear();
|
|
119
|
+
this.documentsIndexed = false;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
exports.HybridSearchRetrieval = HybridSearchRetrieval;
|
|
123
|
+
//# sourceMappingURL=hybrid-search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-search.js","sourceRoot":"","sources":["../../src/retrieval/hybrid-search.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAGH,iCAA4C;AAW5C,MAAa,qBAAqB;IAOhC,YAAY,WAAwB,EAAE,SAA6B,EAAE;QAF7D,qBAAgB,GAAY,KAAK,CAAC;QAGxC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;QAC/C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,GAAG,CAAC;QACjD,IAAI,CAAC,IAAI,GAAG,IAAI,WAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAExC,4BAA4B;QAC5B,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,aAAa,CAAC;QAC3D,IAAI,IAAI,CAAC,GAAG,CAAC,WAAW,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC;YACvC,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc,CAAC,SAAiD;QACpE,yBAAyB;QACzB,MAAM,QAAQ,GAAmB,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YACvD,EAAE,EAAE,GAAG,CAAC,EAAE;YACV,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC;SACnC,CAAC,CAAC,CAAC;QAEJ,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QACjC,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,OAAsB;QAChD,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QAED,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC;QAEjC,wBAAwB;QACxB,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,EAAE;YACzD,GAAG,OAAO;YACV,IAAI,EAAE,IAAI,GAAG,CAAC,EAAE,8BAA8B;SAC/C,CAAC,CAAC;QAEH,8BAA8B;QAC9B,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,GAAG,CAAC,CAAC,CAAC;QAEzD,8BAA8B;QAC9B,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,aAAa,EAAE,cAAc,CAAC,CAAC;QAEtE,uBAAuB;QACvB,OAAO,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACK,WAAW,CACjB,aAA6B,EAC7B,cAAoD;QAEpD,gCAAgC;QAChC,MAAM,gBAAgB,GAAG,IAAI,CAAC,eAAe,CAC3C,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CACzD,CAAC;QACF,MAAM,iBAAiB,GAAG,IAAI,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;QAE/D,oBAAoB;QACpB,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3E,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAE7E,8BAA8B;QAC9B,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhG,0BAA0B;QAC1B,MAAM,YAAY,GAA+D,EAAE,CAAC;QAEpF,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;YACxB,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAC9C,MAAM,YAAY,GAAG,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAEhD,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,GAAG,WAAW,GAAG,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;YAExF,2BAA2B;YAC3B,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;YACtD,IAAI,MAAM,EAAE,CAAC;gBACX,YAAY,CAAC,IAAI,CAAC;oBAChB,EAAE;oBACF,KAAK,EAAE,WAAW;oBAClB,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE;iBAC1C,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,OAAO,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACnF,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAA6C;QAE7C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;QAChC,MAAM,KAAK,GAAG,GAAG,GAAG,GAAG,CAAC;QAExB,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;YAChB,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzB,EAAE,EAAE,CAAC,CAAC,EAAE;YACR,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,KAAK;SAC/B,CAAC,CAAC,CAAC;IACN,CAAC;IAED;;OAEG;IACK,QAAQ,CAAC,IAAY;QAC3B,OAAO,IAAI;aACR,WAAW,EAAE;aACb,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;aACxB,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QAClB,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;IAChC,CAAC;CACF;AApJD,sDAoJC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Query Retrieval
|
|
3
|
+
* Generates multiple search queries from a single question to improve retrieval
|
|
4
|
+
*/
|
|
5
|
+
import { VectorStore, SearchResult, QueryOptions } from '../types';
|
|
6
|
+
export interface MultiQueryConfig {
|
|
7
|
+
numQueries?: number;
|
|
8
|
+
llmProvider?: 'openai' | 'anthropic' | 'custom';
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
model?: string;
|
|
11
|
+
customGenerator?: (question: string, numQueries: number) => Promise<string[]>;
|
|
12
|
+
}
|
|
13
|
+
export declare class MultiQueryRetrieval {
|
|
14
|
+
private vectorStore;
|
|
15
|
+
private config;
|
|
16
|
+
constructor(vectorStore: VectorStore, config?: MultiQueryConfig);
|
|
17
|
+
/**
|
|
18
|
+
* Perform multi-query retrieval
|
|
19
|
+
*/
|
|
20
|
+
retrieve(question: string, options?: QueryOptions): Promise<SearchResult[]>;
|
|
21
|
+
/**
|
|
22
|
+
* Generate multiple search queries from a single question
|
|
23
|
+
*/
|
|
24
|
+
private generateQueries;
|
|
25
|
+
/**
|
|
26
|
+
* Generate queries using OpenAI
|
|
27
|
+
*/
|
|
28
|
+
private generateWithOpenAI;
|
|
29
|
+
/**
|
|
30
|
+
* Generate simple query variations (fallback)
|
|
31
|
+
*/
|
|
32
|
+
private generateSimpleVariations;
|
|
33
|
+
/**
|
|
34
|
+
* Rank results by frequency and average score
|
|
35
|
+
*/
|
|
36
|
+
private rankResults;
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=multi-query.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"multi-query.d.ts","sourceRoot":"","sources":["../../src/retrieval/multi-query.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAEnE,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAC;IAChD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;CAC/E;AAED,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,MAAM,CAAmB;gBAErB,WAAW,EAAE,WAAW,EAAE,MAAM,GAAE,gBAAqB;IASnE;;OAEG;IACG,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IA4BjF;;OAEG;YACW,eAAe;IAiB7B;;OAEG;YACW,kBAAkB;IAmChC;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAgBhC;;OAEG;IACH,OAAO,CAAC,WAAW;CA0BpB"}
|