@operor/knowledge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +457 -0
- package/dist/index.d.ts +437 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1442 -0
- package/dist/index.js.map +1 -0
- package/package.json +42 -0
- package/src/EmbeddingService.ts +92 -0
- package/src/IngestionPipeline.ts +357 -0
- package/src/QueryNormalizer.ts +59 -0
- package/src/QueryRewriter.ts +73 -0
- package/src/RankFusion.ts +72 -0
- package/src/RetrievalPipeline.ts +388 -0
- package/src/SQLiteKnowledgeStore.ts +379 -0
- package/src/TextChunker.ts +34 -0
- package/src/__tests__/cli-integration.test.ts +134 -0
- package/src/__tests__/content-fetcher.test.ts +156 -0
- package/src/__tests__/knowledge.test.ts +493 -0
- package/src/__tests__/retrieval-layers.test.ts +672 -0
- package/src/index.ts +41 -0
- package/src/ingestors/FileIngestor.ts +85 -0
- package/src/ingestors/SiteCrawler.ts +153 -0
- package/src/ingestors/UrlIngestor.ts +106 -0
- package/src/ingestors/WatiFaqSync.ts +75 -0
- package/src/ingestors/content-fetcher.ts +142 -0
- package/src/types.ts +62 -0
- package/tsconfig.json +9 -0
- package/tsdown.config.ts +10 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
import { LanguageModelV1 } from "ai";
|
|
2
|
+
|
|
3
|
+
//#region src/types.d.ts
|
|
4
|
+
interface KBDocument {
|
|
5
|
+
id: string;
|
|
6
|
+
sourceType: 'url' | 'file' | 'faq' | 'annotation';
|
|
7
|
+
sourceUrl?: string;
|
|
8
|
+
fileName?: string;
|
|
9
|
+
title?: string;
|
|
10
|
+
content: string;
|
|
11
|
+
metadata?: Record<string, any>;
|
|
12
|
+
createdAt: number;
|
|
13
|
+
updatedAt: number;
|
|
14
|
+
priority?: number;
|
|
15
|
+
contentHash?: string;
|
|
16
|
+
}
|
|
17
|
+
interface KBChunk {
|
|
18
|
+
id: string;
|
|
19
|
+
documentId: string;
|
|
20
|
+
content: string;
|
|
21
|
+
chunkIndex: number;
|
|
22
|
+
embedding?: number[];
|
|
23
|
+
metadata?: Record<string, any>;
|
|
24
|
+
}
|
|
25
|
+
interface KBSearchResult {
|
|
26
|
+
chunk: KBChunk;
|
|
27
|
+
document: KBDocument;
|
|
28
|
+
score: number;
|
|
29
|
+
distance: number;
|
|
30
|
+
}
|
|
31
|
+
interface KBSearchOptions {
|
|
32
|
+
limit?: number;
|
|
33
|
+
scoreThreshold?: number;
|
|
34
|
+
sourceTypes?: KBDocument['sourceType'][];
|
|
35
|
+
metadata?: Record<string, any>;
|
|
36
|
+
}
|
|
37
|
+
interface KBStats {
|
|
38
|
+
documentCount: number;
|
|
39
|
+
chunkCount: number;
|
|
40
|
+
embeddingDimensions: number;
|
|
41
|
+
dbSizeBytes: number;
|
|
42
|
+
}
|
|
43
|
+
interface KnowledgeStore {
|
|
44
|
+
initialize(): Promise<void>;
|
|
45
|
+
close(): Promise<void>;
|
|
46
|
+
addDocument(doc: KBDocument): Promise<void>;
|
|
47
|
+
getDocument(id: string): Promise<KBDocument | null>;
|
|
48
|
+
listDocuments(): Promise<KBDocument[]>;
|
|
49
|
+
deleteDocument(id: string): Promise<void>;
|
|
50
|
+
addChunks(chunks: KBChunk[]): Promise<void>;
|
|
51
|
+
search(query: string, embedding: number[], options?: KBSearchOptions): Promise<KBSearchResult[]>;
|
|
52
|
+
searchByEmbedding(embedding: number[], options?: KBSearchOptions): Promise<KBSearchResult[]>;
|
|
53
|
+
searchByKeyword?(query: string, options?: KBSearchOptions): Promise<KBSearchResult[]>;
|
|
54
|
+
getStats(): Promise<KBStats>;
|
|
55
|
+
/** Returns the number of chunks stored for a given document. Optional — only SQLiteKnowledgeStore implements this. */
|
|
56
|
+
getChunkCount?(documentId: string): number;
|
|
57
|
+
}
|
|
58
|
+
//#endregion
|
|
59
|
+
//#region src/EmbeddingService.d.ts
|
|
60
|
+
interface EmbeddingServiceConfig {
|
|
61
|
+
provider: 'openai' | 'google' | 'mistral' | 'cohere' | 'ollama';
|
|
62
|
+
apiKey?: string;
|
|
63
|
+
model?: string;
|
|
64
|
+
baseURL?: string;
|
|
65
|
+
dimensions?: number;
|
|
66
|
+
}
|
|
67
|
+
declare class EmbeddingService {
|
|
68
|
+
private config;
|
|
69
|
+
constructor(config: EmbeddingServiceConfig);
|
|
70
|
+
private getModel;
|
|
71
|
+
get provider(): string;
|
|
72
|
+
get dimensions(): number;
|
|
73
|
+
static defaultDimensions(provider: string, model?: string): number;
|
|
74
|
+
embed(text: string): Promise<number[]>;
|
|
75
|
+
embedMany(texts: string[]): Promise<number[][]>;
|
|
76
|
+
}
|
|
77
|
+
//#endregion
|
|
78
|
+
//#region src/SQLiteKnowledgeStore.d.ts
|
|
79
|
+
declare class SQLiteKnowledgeStore implements KnowledgeStore {
|
|
80
|
+
private db;
|
|
81
|
+
private dbPath;
|
|
82
|
+
private dimensions;
|
|
83
|
+
private dimensionWarned;
|
|
84
|
+
constructor(dbPath?: string, dimensions?: number);
|
|
85
|
+
getDimensions(): number;
|
|
86
|
+
initialize(): Promise<void>;
|
|
87
|
+
close(): Promise<void>;
|
|
88
|
+
addDocument(doc: KBDocument): Promise<void>;
|
|
89
|
+
getDocument(id: string): Promise<KBDocument | null>;
|
|
90
|
+
listDocuments(): Promise<KBDocument[]>;
|
|
91
|
+
deleteDocument(id: string): Promise<void>;
|
|
92
|
+
addChunks(chunks: KBChunk[]): Promise<void>;
|
|
93
|
+
getChunkCount(documentId: string): number;
|
|
94
|
+
search(query: string, embedding: number[], options?: KBSearchOptions): Promise<KBSearchResult[]>;
|
|
95
|
+
searchByEmbedding(embedding: number[], options?: KBSearchOptions): Promise<KBSearchResult[]>;
|
|
96
|
+
searchByKeyword(query: string, options?: KBSearchOptions): Promise<KBSearchResult[]>;
|
|
97
|
+
/**
|
|
98
|
+
* Get all chunks from kb_chunks (text content only, no embeddings).
|
|
99
|
+
* Used by rebuild to re-embed all content.
|
|
100
|
+
*/
|
|
101
|
+
getAllChunks(): {
|
|
102
|
+
id: string;
|
|
103
|
+
documentId: string;
|
|
104
|
+
content: string;
|
|
105
|
+
chunkIndex: number;
|
|
106
|
+
metadata?: string;
|
|
107
|
+
}[];
|
|
108
|
+
/**
|
|
109
|
+
* Drop and recreate the vec_chunks virtual table with new dimensions.
|
|
110
|
+
* Preserves kb_chunks, kb_documents, and fts_chunks — only vector data is affected.
|
|
111
|
+
*/
|
|
112
|
+
rebuildVecTable(newDimensions: number): void;
|
|
113
|
+
/**
|
|
114
|
+
* Batch-insert embeddings into vec_chunks.
|
|
115
|
+
* Expects an array of { chunkId, embedding } pairs.
|
|
116
|
+
*/
|
|
117
|
+
batchInsertEmbeddings(items: {
|
|
118
|
+
chunkId: string;
|
|
119
|
+
embedding: number[];
|
|
120
|
+
}[]): void;
|
|
121
|
+
getStats(): Promise<KBStats>;
|
|
122
|
+
findBySourceUrl(url: string): Promise<KBDocument | null>;
|
|
123
|
+
findByContentHash(hash: string): Promise<KBDocument | null>;
|
|
124
|
+
updateDocument(id: string, updates: {
|
|
125
|
+
content?: string;
|
|
126
|
+
title?: string;
|
|
127
|
+
contentHash?: string;
|
|
128
|
+
priority?: number;
|
|
129
|
+
metadata?: Record<string, any>;
|
|
130
|
+
}): Promise<void>;
|
|
131
|
+
findSimilarFaq(embedding: number[], threshold: number): Promise<KBSearchResult | null>;
|
|
132
|
+
private rowToDocument;
|
|
133
|
+
}
|
|
134
|
+
//#endregion
|
|
135
|
+
//#region src/TextChunker.d.ts
|
|
136
|
+
interface ChunkOptions {
|
|
137
|
+
chunkSize?: number;
|
|
138
|
+
chunkOverlap?: number;
|
|
139
|
+
}
|
|
140
|
+
declare class TextChunker {
|
|
141
|
+
private defaultChunkSize;
|
|
142
|
+
private defaultChunkOverlap;
|
|
143
|
+
constructor(options?: ChunkOptions);
|
|
144
|
+
chunk(text: string, options?: ChunkOptions): Promise<string[]>;
|
|
145
|
+
chunkMarkdown(markdown: string, options?: ChunkOptions): Promise<string[]>;
|
|
146
|
+
}
|
|
147
|
+
//#endregion
|
|
148
|
+
//#region src/IngestionPipeline.d.ts
|
|
149
|
+
interface ContentReformatter {
|
|
150
|
+
complete(messages: {
|
|
151
|
+
role: 'system' | 'user';
|
|
152
|
+
content: string;
|
|
153
|
+
}[]): Promise<{
|
|
154
|
+
text: string;
|
|
155
|
+
}>;
|
|
156
|
+
}
|
|
157
|
+
interface IngestInput {
|
|
158
|
+
sourceType: KBDocument['sourceType'];
|
|
159
|
+
content: string;
|
|
160
|
+
title?: string;
|
|
161
|
+
sourceUrl?: string;
|
|
162
|
+
fileName?: string;
|
|
163
|
+
metadata?: Record<string, any>;
|
|
164
|
+
isMarkdown?: boolean;
|
|
165
|
+
/** Opt-in to LLM Q&A extraction (expensive). Default: false (chunking path). */
|
|
166
|
+
extractQA?: boolean;
|
|
167
|
+
/** Document priority: 1=official, 2=supplementary, 3=archived. Auto-assigned if omitted. */
|
|
168
|
+
priority?: number;
|
|
169
|
+
}
|
|
170
|
+
interface IngestFaqOptions {
|
|
171
|
+
sourceUrl?: string;
|
|
172
|
+
[key: string]: any;
|
|
173
|
+
}
|
|
174
|
+
interface IngestFaqResult extends KBDocument {
|
|
175
|
+
existingMatch?: {
|
|
176
|
+
id: string;
|
|
177
|
+
question: string;
|
|
178
|
+
answer: string;
|
|
179
|
+
score: number;
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
interface RebuildResult {
|
|
183
|
+
documentsRebuilt: number;
|
|
184
|
+
chunksRebuilt: number;
|
|
185
|
+
oldDimensions: number;
|
|
186
|
+
newDimensions: number;
|
|
187
|
+
}
|
|
188
|
+
declare class IngestionPipeline {
|
|
189
|
+
private store;
|
|
190
|
+
private embedder;
|
|
191
|
+
private chunker;
|
|
192
|
+
private llmProvider?;
|
|
193
|
+
constructor(store: KnowledgeStore, embedder: EmbeddingService, chunker: TextChunker, llmProvider?: ContentReformatter);
|
|
194
|
+
private cleanContent;
|
|
195
|
+
private extractQAPairs;
|
|
196
|
+
private computeHash;
|
|
197
|
+
ingest(input: IngestInput): Promise<KBDocument>;
|
|
198
|
+
ingestFaq(question: string, answer: string, metadata?: Record<string, any> & {
|
|
199
|
+
forceReplace?: boolean;
|
|
200
|
+
}): Promise<IngestFaqResult>;
|
|
201
|
+
/**
|
|
202
|
+
* Rebuild all vector embeddings using the current embedding provider.
|
|
203
|
+
* Preserves all document content, chunks, and FTS data — only replaces vectors.
|
|
204
|
+
*
|
|
205
|
+
* Requires the store to be a SQLiteKnowledgeStore (uses rebuild-specific methods).
|
|
206
|
+
*/
|
|
207
|
+
rebuild(onProgress?: (current: number, total: number, docTitle: string) => void): Promise<RebuildResult>;
|
|
208
|
+
}
|
|
209
|
+
//#endregion
|
|
210
|
+
//#region src/QueryRewriter.d.ts
|
|
211
|
+
interface QueryRewriterOptions {
|
|
212
|
+
model: LanguageModelV1;
|
|
213
|
+
maxCacheSize?: number;
|
|
214
|
+
}
|
|
215
|
+
interface RewriteResult {
|
|
216
|
+
original: string;
|
|
217
|
+
rewritten: string;
|
|
218
|
+
cached: boolean;
|
|
219
|
+
tokenUsage?: {
|
|
220
|
+
prompt: number;
|
|
221
|
+
completion: number;
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
declare class QueryRewriter {
|
|
225
|
+
private model;
|
|
226
|
+
private cache;
|
|
227
|
+
private maxCacheSize;
|
|
228
|
+
constructor(options: QueryRewriterOptions);
|
|
229
|
+
rewrite(query: string): Promise<RewriteResult>;
|
|
230
|
+
get cacheSize(): number;
|
|
231
|
+
clearCache(): void;
|
|
232
|
+
}
|
|
233
|
+
//#endregion
|
|
234
|
+
//#region src/RetrievalPipeline.d.ts
|
|
235
|
+
interface RetrievalResult {
|
|
236
|
+
results: KBSearchResult[];
|
|
237
|
+
context: string;
|
|
238
|
+
isFaqMatch: boolean;
|
|
239
|
+
rewritten?: string;
|
|
240
|
+
/** Raw FAQ answer extracted from metadata (only set when isFaqMatch is true). */
|
|
241
|
+
faqAnswer?: string;
|
|
242
|
+
/** Raw FAQ question extracted from metadata (only set when isFaqMatch is true). */
|
|
243
|
+
faqQuestion?: string;
|
|
244
|
+
/** Multiple FAQ matches from compound query splitting. */
|
|
245
|
+
faqMatches?: Array<{
|
|
246
|
+
faqQuestion: string;
|
|
247
|
+
faqAnswer: string;
|
|
248
|
+
score: number;
|
|
249
|
+
}>;
|
|
250
|
+
}
|
|
251
|
+
interface RetrievalPipelineOptions {
|
|
252
|
+
faqThreshold?: number;
|
|
253
|
+
faqLowThreshold?: number;
|
|
254
|
+
faqScoreGap?: number;
|
|
255
|
+
useHybridSearch?: boolean;
|
|
256
|
+
queryRewriter?: QueryRewriter;
|
|
257
|
+
rewriteHighThreshold?: number;
|
|
258
|
+
rewriteLowThreshold?: number;
|
|
259
|
+
fusionStrategy?: 'rrf' | 'weighted';
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Heuristic splitter for compound questions. No LLM call — zero latency cost.
|
|
263
|
+
* Splits on "?" followed by more text, or " and " when both sides are >3 chars.
|
|
264
|
+
* Returns the original query in a single-element array if no split detected.
|
|
265
|
+
* Capped at 4 sub-queries max.
|
|
266
|
+
*/
|
|
267
|
+
declare function splitCompoundQuery(query: string): string[];
|
|
268
|
+
declare class RetrievalPipeline {
|
|
269
|
+
private store;
|
|
270
|
+
private embedder;
|
|
271
|
+
private faqThreshold;
|
|
272
|
+
private faqLowThreshold;
|
|
273
|
+
private faqScoreGap;
|
|
274
|
+
private useHybridSearch;
|
|
275
|
+
private queryRewriter?;
|
|
276
|
+
private rewriteHighThreshold;
|
|
277
|
+
private rewriteLowThreshold;
|
|
278
|
+
private fusionStrategy;
|
|
279
|
+
constructor(store: KnowledgeStore, embedder: EmbeddingService, faqThreshold?: number);
|
|
280
|
+
constructor(store: KnowledgeStore, embedder: EmbeddingService, options?: RetrievalPipelineOptions);
|
|
281
|
+
retrieve(query: string, options?: KBSearchOptions): Promise<RetrievalResult>;
|
|
282
|
+
private retrieveSingle;
|
|
283
|
+
private hybridSearch;
|
|
284
|
+
/**
|
|
285
|
+
* Apply freshness and priority boosts to search results, then re-sort.
|
|
286
|
+
*/
|
|
287
|
+
private applyBoosts;
|
|
288
|
+
private formatContext;
|
|
289
|
+
}
|
|
290
|
+
//#endregion
|
|
291
|
+
//#region src/QueryNormalizer.d.ts
|
|
292
|
+
/**
|
|
293
|
+
* Query normalization for improved KB retrieval.
|
|
294
|
+
* Expands chat abbreviations and normalizes whitespace before embedding.
|
|
295
|
+
*/
|
|
296
|
+
/**
|
|
297
|
+
* Normalize a user query for better embedding similarity.
|
|
298
|
+
* - Expands chat abbreviations with word-boundary awareness
|
|
299
|
+
* - Lowercases
|
|
300
|
+
* - Collapses whitespace
|
|
301
|
+
*/
|
|
302
|
+
declare function normalizeQuery(query: string): string;
|
|
303
|
+
//#endregion
|
|
304
|
+
//#region src/RankFusion.d.ts
|
|
305
|
+
/**
|
|
306
|
+
* Reciprocal Rank Fusion (RRF) for combining multiple ranked result sets.
|
|
307
|
+
* Standard technique for hybrid search (vector + keyword).
|
|
308
|
+
*/
|
|
309
|
+
/**
|
|
310
|
+
* Fuse multiple ranked result sets using Reciprocal Rank Fusion.
|
|
311
|
+
*
|
|
312
|
+
* @param resultSets - Array of Maps where key = item ID, value = rank (0-based)
|
|
313
|
+
* @param k - Smoothing constant (default 60, industry standard)
|
|
314
|
+
* @returns Map of item ID → fused RRF score, sorted descending by score
|
|
315
|
+
*/
|
|
316
|
+
declare function reciprocalRankFusion(resultSets: Map<string, number>[], k?: number): Map<string, number>;
|
|
317
|
+
/**
|
|
318
|
+
* Weighted Score Fusion: combine vector and keyword scores using weighted average.
|
|
319
|
+
* BM25 scores are min-max normalized to 0-1 before combining.
|
|
320
|
+
*
|
|
321
|
+
* @returns Map of item ID → fused score, sorted descending
|
|
322
|
+
*/
|
|
323
|
+
declare function weightedScoreFusion(vectorResults: {
|
|
324
|
+
id: string;
|
|
325
|
+
score: number;
|
|
326
|
+
}[], keywordResults: {
|
|
327
|
+
id: string;
|
|
328
|
+
score: number;
|
|
329
|
+
}[], vectorWeight?: number, keywordWeight?: number): Map<string, number>;
|
|
330
|
+
//#endregion
|
|
331
|
+
//#region src/ingestors/UrlIngestor.d.ts
|
|
332
|
+
interface CrawlOptions {
|
|
333
|
+
maxPages?: number;
|
|
334
|
+
maxDepth?: number;
|
|
335
|
+
}
|
|
336
|
+
interface UrlIngestorOptions {
|
|
337
|
+
crawl4aiUrl?: string;
|
|
338
|
+
}
|
|
339
|
+
declare class UrlIngestor {
|
|
340
|
+
private pipeline;
|
|
341
|
+
private fetchOptions;
|
|
342
|
+
constructor(pipeline: IngestionPipeline, options?: UrlIngestorOptions);
|
|
343
|
+
ingestUrl(url: string, options?: {
|
|
344
|
+
priority?: number;
|
|
345
|
+
extractQA?: boolean;
|
|
346
|
+
}): Promise<KBDocument>;
|
|
347
|
+
ingestSitemap(sitemapUrl: string, options?: CrawlOptions): Promise<KBDocument[]>;
|
|
348
|
+
crawl(startUrl: string, options?: CrawlOptions): Promise<KBDocument[]>;
|
|
349
|
+
private parseSitemapUrls;
|
|
350
|
+
}
|
|
351
|
+
//#endregion
|
|
352
|
+
//#region src/ingestors/FileIngestor.d.ts
|
|
353
|
+
declare class FileIngestor {
|
|
354
|
+
private pipeline;
|
|
355
|
+
constructor(pipeline: IngestionPipeline);
|
|
356
|
+
ingestFile(filePath: string, title?: string, options?: {
|
|
357
|
+
priority?: number;
|
|
358
|
+
}): Promise<KBDocument>;
|
|
359
|
+
private extractContent;
|
|
360
|
+
private extractPdf;
|
|
361
|
+
private extractDocx;
|
|
362
|
+
private extractXlsx;
|
|
363
|
+
private extractHtml;
|
|
364
|
+
}
|
|
365
|
+
//#endregion
|
|
366
|
+
//#region src/ingestors/SiteCrawler.d.ts
|
|
367
|
+
interface SiteCrawlOptions {
|
|
368
|
+
maxDepth?: number;
|
|
369
|
+
maxPages?: number;
|
|
370
|
+
useSitemap?: boolean;
|
|
371
|
+
delayMs?: number;
|
|
372
|
+
onProgress?: (crawled: number, discovered: number, url: string) => void;
|
|
373
|
+
}
|
|
374
|
+
interface SiteCrawlerOptions {
|
|
375
|
+
crawl4aiUrl?: string;
|
|
376
|
+
}
|
|
377
|
+
declare class SiteCrawler {
|
|
378
|
+
private pipeline;
|
|
379
|
+
private fetchOptions;
|
|
380
|
+
constructor(pipeline: IngestionPipeline, options?: SiteCrawlerOptions);
|
|
381
|
+
crawlSite(startUrl: string, options?: SiteCrawlOptions): Promise<KBDocument[]>;
|
|
382
|
+
private tryFetchSitemap;
|
|
383
|
+
private parseSitemapUrls;
|
|
384
|
+
private ingestPage;
|
|
385
|
+
private delay;
|
|
386
|
+
}
|
|
387
|
+
//#endregion
|
|
388
|
+
//#region src/ingestors/WatiFaqSync.d.ts
|
|
389
|
+
interface FaqPair {
|
|
390
|
+
question: string;
|
|
391
|
+
answer: string;
|
|
392
|
+
}
|
|
393
|
+
interface WatiFaqSyncOptions {
|
|
394
|
+
minAnswerLength?: number;
|
|
395
|
+
maxPairs?: number;
|
|
396
|
+
}
|
|
397
|
+
declare class WatiFaqSync {
|
|
398
|
+
private pipeline;
|
|
399
|
+
private llmExtract?;
|
|
400
|
+
constructor(pipeline: IngestionPipeline, llmExtract?: (conversation: string) => Promise<FaqPair[]>);
|
|
401
|
+
syncFromConversations(conversations: string[], options?: WatiFaqSyncOptions): Promise<KBDocument[]>;
|
|
402
|
+
syncFromPairs(pairs: FaqPair[], options?: WatiFaqSyncOptions): Promise<KBDocument[]>;
|
|
403
|
+
}
|
|
404
|
+
//#endregion
|
|
405
|
+
//#region src/ingestors/content-fetcher.d.ts
|
|
406
|
+
interface FetchContentOptions {
|
|
407
|
+
crawl4aiUrl?: string;
|
|
408
|
+
}
|
|
409
|
+
interface FetchContentResult {
|
|
410
|
+
title: string;
|
|
411
|
+
content: string;
|
|
412
|
+
isMarkdown: boolean;
|
|
413
|
+
}
|
|
414
|
+
/** Reset the Crawl4AI health check cache. Used in tests. */
|
|
415
|
+
declare function resetCrawl4aiHealthCache(): void;
|
|
416
|
+
/**
|
|
417
|
+
* Fetch raw HTML from a URL. Used for link extraction, sitemaps, etc.
|
|
418
|
+
*/
|
|
419
|
+
declare function fetchHtml(url: string): Promise<string>;
|
|
420
|
+
/**
|
|
421
|
+
* Smart content fetch: tries Crawl4AI first (if configured), falls back to Readability.
|
|
422
|
+
*/
|
|
423
|
+
declare function fetchContent(url: string, options?: FetchContentOptions): Promise<FetchContentResult>;
|
|
424
|
+
/**
|
|
425
|
+
* Extract readable content from pre-fetched HTML using @mozilla/readability.
|
|
426
|
+
*/
|
|
427
|
+
declare function extractFromHtml(html: string, url: string): {
|
|
428
|
+
title: string;
|
|
429
|
+
content: string;
|
|
430
|
+
};
|
|
431
|
+
/**
|
|
432
|
+
* Extract same-domain links from HTML.
|
|
433
|
+
*/
|
|
434
|
+
declare function extractLinks(html: string, baseUrl: string): string[];
|
|
435
|
+
//#endregion
|
|
436
|
+
export { type ChunkOptions, type ContentReformatter, type CrawlOptions, EmbeddingService, type EmbeddingServiceConfig, type FaqPair, type FetchContentOptions, type FetchContentResult, FileIngestor, type IngestFaqOptions, type IngestFaqResult, type IngestInput, IngestionPipeline, type KBChunk, type KBDocument, type KBSearchOptions, type KBSearchResult, type KBStats, type KnowledgeStore, QueryRewriter, type QueryRewriterOptions, type RebuildResult, RetrievalPipeline, type RetrievalPipelineOptions, type RetrievalResult, type RewriteResult, SQLiteKnowledgeStore, type SiteCrawlOptions, SiteCrawler, type SiteCrawlerOptions, TextChunker, UrlIngestor, type UrlIngestorOptions, WatiFaqSync, type WatiFaqSyncOptions, extractFromHtml, extractLinks, fetchContent, fetchHtml, normalizeQuery, reciprocalRankFusion, resetCrawl4aiHealthCache, splitCompoundQuery, weightedScoreFusion };
|
|
437
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","names":[],"sources":["../src/types.ts","../src/EmbeddingService.ts","../src/SQLiteKnowledgeStore.ts","../src/TextChunker.ts","../src/IngestionPipeline.ts","../src/QueryRewriter.ts","../src/RetrievalPipeline.ts","../src/QueryNormalizer.ts","../src/RankFusion.ts","../src/ingestors/UrlIngestor.ts","../src/ingestors/FileIngestor.ts","../src/ingestors/SiteCrawler.ts","../src/ingestors/WatiFaqSync.ts","../src/ingestors/content-fetcher.ts"],"mappings":";;;UAAiB,UAAA;EACf,EAAA;EACA,UAAA;EACA,SAAA;EACA,QAAA;EACA,KAAA;EACA,OAAA;EACA,QAAA,GAAW,MAAA;EACX,SAAA;EACA,SAAA;EACA,QAAA;EACA,WAAA;AAAA;AAAA,UAGe,OAAA;EACf,EAAA;EACA,UAAA;EACA,OAAA;EACA,UAAA;EACA,SAAA;EACA,QAAA,GAAW,MAAA;AAAA;AAAA,UAGI,cAAA;EACf,KAAA,EAAO,OAAA;EACP,QAAA,EAAU,UAAA;EACV,KAAA;EACA,QAAA;AAAA;AAAA,UAGe,eAAA;EACf,KAAA;EACA,cAAA;EACA,WAAA,GAAc,UAAA;EACd,QAAA,GAAW,MAAA;AAAA;AAAA,UAGI,OAAA;EACf,aAAA;EACA,UAAA;EACA,mBAAA;EACA,WAAA;AAAA;AAAA,UAGe,cAAA;EACf,UAAA,IAAc,OAAA;EACd,KAAA,IAAS,OAAA;EAET,WAAA,CAAY,GAAA,EAAK,UAAA,GAAa,OAAA;EAC9B,WAAA,CAAY,EAAA,WAAa,OAAA,CAAQ,UAAA;EACjC,aAAA,IAAiB,OAAA,CAAQ,UAAA;EACzB,cAAA,CAAe,EAAA,WAAa,OAAA;EAE5B,SAAA,CAAU,MAAA,EAAQ,OAAA,KAAY,OAAA;EAC9B,MAAA,CAAO,KAAA,UAAe,SAAA,YAAqB,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,cAAA;EAC/E,iBAAA,CAAkB,SAAA,YAAqB,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,cAAA;EAC3E,eAAA,EAAiB,KAAA,UAAe,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,cAAA;EACpE,QAAA,IAAY,OAAA,CAAQ,OAAA;EAzBpB;EA4BA,aAAA,EAAe,UAAA;AAAA;;;UCtDA,sBAAA;EACf,QAAA;EACA,MAAA;EACA,KAAA;EACA,OAAA;EACA,UAAA;AAAA;AAAA,cAGW,gBAAA;EAAA,QACH,MAAA;cAEI,MAAA,EAAQ,sBAAA;EAAA,QAIZ,QAAA;EAAA,IAgCJ,QAAA,CAAA;EAAA,IAIA,UAAA,CAAA;EAAA,OAKG,iBAAA,CAAkB,QAAA,UAAkB,KAAA;EAiBrC,KAAA,CAAM,IAAA,WAAe,OAAA;EAMrB,SAAA,CAAU,KAAA,aAAkB,OAAA;AAAA;;;cChFvB,oBAAA,YAAgC,cAAA;EAAA,QACnC,EAAA;EAAA,QACA,MAAA;EAAA,QACA,UAAA;EAAA,QACA,eAAA;cAEI,MAAA,WAAmC,UAAA;EAS/C,aAAA,CAAA;EAIM,UAAA,CAAA,GAAc,OAAA;EAiDd,KAAA,CAAA,GAAS,OAAA;EAIT,WAAA,CAAY,GAAA,EAAK,UAAA,GAAa,OAAA;EAmB9B,WAAA,CAAY,EAAA,WAAa,OAAA,CAAQ,UAAA;EAKjC,aAAA,CAAA,GAAiB,OAAA,CAAQ,UAAA;EAKzB,cAAA,CAAe,EAAA,WAAa,OAAA;EAU5B,SAAA,CAAU,MAAA,EAAQ,OAAA,KAAY,OAAA;EA8CpC,aAAA,CAAc,UAAA;EAOR,MAAA,CAAO,KAAA,UAAe,SAAA,YAAqB,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,cAAA;EAI/E,iBAAA,CAAkB,SAAA,YAAqB,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,cAAA;EAiD3E,eAAA,CAAgB,KAAA,UAAe,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,cAAA;EFnN9D;;AAGb;;EE2QE,YAAA,CAAA;IAAkB,EAAA;IAAY,UAAA;IAAoB,OAAA;IAAiB,UAAA;IAAoB,QAAA;EAAA;EFrQvF;;;;EE+QA,eAAA,CAAgB,aAAA;EF5Qa;;;;EE4R7B,qBAAA,CAAsB,KAAA;IAAS,OAAA;IAAiB,SAAA;EAAA;EAY1C,QAAA,CAAA,GAAY,OAAA,CAAQ,OAAA;EAepB,eAAA,CAAgB,GAAA,WAAc,OAAA,CAAQ,UAAA;EAKtC,iBAAA,CAAkB,IAAA,WAAe,OAAA,CAAQ,UAAA;EAKzC,cAAA,CAAe,EAAA,UAAY,OAAA;IAAW,OAAA;IAAkB,KAAA;IAAgB,WAAA;IAAsB,QAAA;IAAmB,QAAA,GAAW,MAAA;EAAA,IAAwB,OAAA;EAapJ,cAAA,CAAe,SAAA,YAAqB,SAAA,WAAoB,OAAA,CAAQ,cAAA;EAAA,QAM9D,aAAA;AAAA;;;UCzWO,YAAA;EACf,SAAA;EACA,YAAA;AAAA;AAAA,cAGW,WAAA;EAAA,QACH,gBAAA;EAAA,QACA,mBAAA;cAEI,OAAA,GAAU,YAAA;EAKhB,KAAA,CAAM,IAAA,UAAc,OAAA,GAAU,YAAA,GAAe,OAAA;EAS7C,aAAA,CAAc,QAAA,UAAkB,OAAA,GAAU,YAAA,GAAe,OAAA;AAAA;;;UClBhD,kBAAA;EACf,QAAA,CAAS,QAAA;IAAY,IAAA;IAAyB,OAAA;EAAA,MAAsB,OAAA;IAAU,IAAA;EAAA;AAAA;AAAA,UAG/D,WAAA;EACf,UAAA,EAAY,UAAA;EACZ,OAAA;EACA,KAAA;EACA,SAAA;EACA,QAAA;EACA,QAAA,GAAW,MAAA;EACX,UAAA;EJPW;EISX,SAAA;EJNe;EIQf,QAAA;AAAA;AAAA,UAGe,gBAAA;EACf,SAAA;EAAA,CACC,GAAA;AAAA;AAAA,UAGc,eAAA,SAAwB,UAAA;EACvC,aAAA;IAAkB,EAAA;IAAY,QAAA;IAAkB,MAAA;IAAgB,KAAA;EAAA;AAAA;AAAA,UAGjD,aAAA;EACf,gBAAA;EACA,aAAA;EACA,aAAA;EACA,aAAA;AAAA;AAAA,cAGW,iBAAA;EAAA,QACH,KAAA;EAAA,QACA,QAAA;EAAA,QACA,OAAA;EAAA,QACA,WAAA;cAEI,KAAA,EAAO,cAAA,EAAgB,QAAA,EAAU,gBAAA,EAAkB,OAAA,EAAS,WAAA,EAAa,WAAA,GAAc,kBAAA;EAAA,QAO3F,YAAA;EAAA,QAUM,cAAA;EAAA,QAgCN,WAAA;EAIF,MAAA,CAAO,KAAA,EAAO,WAAA,GAAc,OAAA,CAAQ,UAAA;EAgHpC,SAAA,CAAU,QAAA,UAAkB,MAAA,UAAgB,QAAA,GAAW,MAAA;IAAwB,YAAA;EAAA,IAA2B,OAAA,CAAQ,eAAA;EJlL7G;;;AAGb;;;EIoPQ,OAAA,CAAQ,UAAA,IAAc,OAAA,UAAiB,KAAA,UAAe,QAAA,oBAA4B,OAAA,CAAQ,aAAA;AAAA;;;UCjRjF,oBAAA;EACf,KAAA,EAAO,eAAA;EACP,YAAA;AAAA;AAAA,UAGe,aAAA;EACf,QAAA;EACA,SAAA;EACA,MAAA;EACA,UAAA;IAAe,MAAA;IAAgB,UAAA;EAAA;AAAA;AAAA,cAGpB,aAAA;EAAA,QACH,KAAA;EAAA,QACA,KAAA;EAAA,QACA,YAAA;cAEI,OAAA,EAAS,oBAAA;EAMf,OAAA,CAAQ,KAAA,WAAgB,OAAA,CAAQ,aAAA;EAAA,IAkClC,SAAA,CAAA;EAIJ,UAAA,CAAA;AAAA;;;UC/De,eAAA;EACf,OAAA,EAAS,cAAA;EACT,OAAA;EACA,UAAA;EACA,SAAA;ENRA;EMUA,SAAA;ENRA;EMUA,WAAA;ENRA;EMUA,UAAA,GAAa,KAAA;IAAQ,WAAA;IAAqB,SAAA;IAAmB,KAAA;EAAA;AAAA;AAAA,UAG9C,wBAAA;EACf,YAAA;EACA,eAAA;EACA,WAAA;EACA,eAAA;EACA,aAAA,GAAgB,aAAA;EAChB,oBAAA;EACA,mBAAA;EACA,cAAA;AAAA;;;;;;ANJF;iBMagB,kBAAA,CAAmB,KAAA;AAAA,cAiBtB,iBAAA;EAAA,QACH,KAAA;EAAA,QACA,QAAA;EAAA,QACA,YAAA;EAAA,QACA,eAAA;EAAA,QACA,WAAA;EAAA,QACA,eAAA;EAAA,QACA,aAAA;EAAA,QACA,oBAAA;EAAA,QACA,mBAAA;EAAA,QACA,cAAA;cAEI,KAAA,EAAO,cAAA,EAAgB,QAAA,EAAU,gBAAA,EAAkB,YAAA;cACnD,KAAA,EAAO,cAAA,EAAgB,QAAA,EAAU,gBAAA,EAAkB,OAAA,GAAU,wBAAA;EA8BnE,QAAA,CAAS,KAAA,UAAe,OAAA,GAAU,eAAA,GAAkB,OAAA,CAAQ,eAAA;EAAA,QAyDpD,cAAA;EAAA,QAkHA,YAAA;EN1Od;;;EAAA,QM8TQ,WAAA;EAAA,QA0BA,aAAA;AAAA;;;;;;ANzXV;;;;;;;iBO+CgB,cAAA,CAAe,KAAA;;;;;;AP/C/B;;;;;;;;iBQYgB,oBAAA,CACd,UAAA,EAAY,GAAA,oBACZ,CAAA,YACC,GAAA;;;;;;;iBAwBa,mBAAA,CACd,aAAA;EAAiB,EAAA;EAAY,KAAA;AAAA,KAC7B,cAAA;EAAkB,EAAA;EAAY,KAAA;AAAA,KAC9B,YAAA,WACA,aAAA,YACC,GAAA;;;UCvCc,YAAA;EACf,QAAA;EACA,QAAA;AAAA;AAAA,UAGe,kBAAA;EACf,WAAA;AAAA;AAAA,cAGW,WAAA;EAAA,QACH,QAAA;EAAA,QACA,YAAA;cAEI,QAAA,EAAU,iBAAA,EAAmB,OAAA,GAAU,kBAAA;EAK7C,SAAA,CAAU,GAAA,UAAa,OAAA;IAAY,QAAA;IAAmB,SAAA;EAAA,IAAwB,OAAA,CAAQ,UAAA;EActF,aAAA,CAAc,UAAA,UAAoB,OAAA,GAAU,YAAA,GAAe,OAAA,CAAQ,UAAA;EAiBnE,KAAA,CAAM,QAAA,UAAkB,OAAA,GAAU,YAAA,GAAe,OAAA,CAAQ,UAAA;EAAA,QA0CvD,gBAAA;AAAA;;;cC3FG,YAAA;EAAA,QACH,QAAA;cAEI,QAAA,EAAU,iBAAA;EAIhB,UAAA,CAAW,QAAA,UAAkB,KAAA,WAAgB,OAAA;IAAY,QAAA;EAAA,IAAsB,OAAA,CAAQ,UAAA;EAAA,QAc/E,cAAA;EAAA,QAqBA,UAAA;EAAA,QAQA,WAAA;EAAA,QAOA,WAAA;EAAA,QAaA,WAAA;AAAA;;;UCtEC,gBAAA;EACf,QAAA;EACA,QAAA;EACA,UAAA;EACA,OAAA;EACA,UAAA,IAAc,OAAA,UAAiB,UAAA,UAAoB,GAAA;AAAA;AAAA,UAGpC,kBAAA;EACf,WAAA;AAAA;AAAA,cAGW,WAAA;EAAA,QACH,QAAA;EAAA,QACA,YAAA;cAEI,QAAA,EAAU,iBAAA,EAAmB,OAAA,GAAU,kBAAA;EAK7C,SAAA,CAAU,QAAA,UAAkB,OAAA,GAAS,gBAAA,GAAwB,OAAA,CAAQ,UAAA;EAAA,QAuE7D,eAAA;EAAA,QAWA,gBAAA;EAAA,QA8BA,UAAA;EAAA,QAWN,KAAA;AAAA;;;UClJO,OAAA;EACf,QAAA;EACA,MAAA;AAAA;AAAA,UAGe,kBAAA;EACf,eAAA;EACA,QAAA;AAAA;AAAA,cAGW,WAAA;EAAA,QACH,QAAA;EAAA,QACA,UAAA;cAGN,QAAA,EAAU,iBAAA,EACV,UAAA,IAAc,YAAA,aAAyB,OAAA,CAAQ,OAAA;EAM3C,qBAAA,CACJ,aAAA,YACA,OAAA,GAAU,kBAAA,GACT,OAAA,CAAQ,UAAA;EA+BL,aAAA,CAAc,KAAA,EAAO,OAAA,IAAW,OAAA,GAAU,kBAAA,GAAqB,OAAA,CAAQ,UAAA;AAAA;;;UCxD9D,mBAAA;EACf,WAAA;AAAA;AAAA,UAGe,kBAAA;EACf,KAAA;EACA,OAAA;EACA,UAAA;AAAA;;iBASc,wBAAA,CAAA;;;;iBAQM,SAAA,CAAU,GAAA,WAAc,OAAA;;;;iBAWxB,YAAA,CAAa,GAAA,UAAa,OAAA,GAAU,mBAAA,GAAsB,OAAA,CAAQ,kBAAA;;;;iBAmBxE,eAAA,CAAgB,IAAA,UAAc,GAAA;EAAgB,KAAA;EAAe,OAAA;AAAA;;;;iBAa7D,YAAA,CAAa,IAAA,UAAc,OAAA"}
|