log-search 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +226 -0
- package/bin/log-search.js +45 -0
- package/dist/cli/commands/clear.d.ts +9 -0
- package/dist/cli/commands/clear.d.ts.map +1 -0
- package/dist/cli/commands/clear.js +116 -0
- package/dist/cli/commands/clear.js.map +1 -0
- package/dist/cli/commands/index.d.ts +8 -0
- package/dist/cli/commands/index.d.ts.map +1 -0
- package/dist/cli/commands/index.js +118 -0
- package/dist/cli/commands/index.js.map +1 -0
- package/dist/cli/commands/info.d.ts +8 -0
- package/dist/cli/commands/info.d.ts.map +1 -0
- package/dist/cli/commands/info.js +96 -0
- package/dist/cli/commands/info.js.map +1 -0
- package/dist/cli/commands/search.d.ts +9 -0
- package/dist/cli/commands/search.d.ts.map +1 -0
- package/dist/cli/commands/search.js +178 -0
- package/dist/cli/commands/search.js.map +1 -0
- package/dist/cli/commands/watch.d.ts +8 -0
- package/dist/cli/commands/watch.d.ts.map +1 -0
- package/dist/cli/commands/watch.js +141 -0
- package/dist/cli/commands/watch.js.map +1 -0
- package/dist/cli/main.d.ts +6 -0
- package/dist/cli/main.d.ts.map +1 -0
- package/dist/cli/main.js +57 -0
- package/dist/cli/main.js.map +1 -0
- package/dist/cli/output/colorizer.d.ts +36 -0
- package/dist/cli/output/colorizer.d.ts.map +1 -0
- package/dist/cli/output/colorizer.js +101 -0
- package/dist/cli/output/colorizer.js.map +1 -0
- package/dist/cli/output/formatter.d.ts +31 -0
- package/dist/cli/output/formatter.d.ts.map +1 -0
- package/dist/cli/output/formatter.js +146 -0
- package/dist/cli/output/formatter.js.map +1 -0
- package/dist/cli/output/table.d.ts +12 -0
- package/dist/cli/output/table.d.ts.map +1 -0
- package/dist/cli/output/table.js +48 -0
- package/dist/cli/output/table.js.map +1 -0
- package/dist/core/indexer/ChunkProcessor.d.ts +21 -0
- package/dist/core/indexer/ChunkProcessor.d.ts.map +1 -0
- package/dist/core/indexer/ChunkProcessor.js +105 -0
- package/dist/core/indexer/ChunkProcessor.js.map +1 -0
- package/dist/core/indexer/IndexBuilder.d.ts +47 -0
- package/dist/core/indexer/IndexBuilder.d.ts.map +1 -0
- package/dist/core/indexer/IndexBuilder.js +274 -0
- package/dist/core/indexer/IndexBuilder.js.map +1 -0
- package/dist/core/indexer/IndexSerializer.d.ts +30 -0
- package/dist/core/indexer/IndexSerializer.d.ts.map +1 -0
- package/dist/core/indexer/IndexSerializer.js +142 -0
- package/dist/core/indexer/IndexSerializer.js.map +1 -0
- package/dist/core/indexer/OffsetMapper.d.ts +55 -0
- package/dist/core/indexer/OffsetMapper.d.ts.map +1 -0
- package/dist/core/indexer/OffsetMapper.js +94 -0
- package/dist/core/indexer/OffsetMapper.js.map +1 -0
- package/dist/core/indexer/TokenExtractor.d.ts +27 -0
- package/dist/core/indexer/TokenExtractor.d.ts.map +1 -0
- package/dist/core/indexer/TokenExtractor.js +92 -0
- package/dist/core/indexer/TokenExtractor.js.map +1 -0
- package/dist/core/searcher/IndexSearcher.d.ts +37 -0
- package/dist/core/searcher/IndexSearcher.d.ts.map +1 -0
- package/dist/core/searcher/IndexSearcher.js +360 -0
- package/dist/core/searcher/IndexSearcher.js.map +1 -0
- package/dist/core/searcher/RankEngine.d.ts +29 -0
- package/dist/core/searcher/RankEngine.d.ts.map +1 -0
- package/dist/core/searcher/RankEngine.js +90 -0
- package/dist/core/searcher/RankEngine.js.map +1 -0
- package/dist/core/searcher/ResultFetcher.d.ts +37 -0
- package/dist/core/searcher/ResultFetcher.d.ts.map +1 -0
- package/dist/core/searcher/ResultFetcher.js +118 -0
- package/dist/core/searcher/ResultFetcher.js.map +1 -0
- package/dist/core/streaming/ChunkSplitter.d.ts +25 -0
- package/dist/core/streaming/ChunkSplitter.d.ts.map +1 -0
- package/dist/core/streaming/ChunkSplitter.js +71 -0
- package/dist/core/streaming/ChunkSplitter.js.map +1 -0
- package/dist/core/streaming/FileStreamer.d.ts +39 -0
- package/dist/core/streaming/FileStreamer.d.ts.map +1 -0
- package/dist/core/streaming/FileStreamer.js +138 -0
- package/dist/core/streaming/FileStreamer.js.map +1 -0
- package/dist/core/streaming/LineBuffer.d.ts +26 -0
- package/dist/core/streaming/LineBuffer.d.ts.map +1 -0
- package/dist/core/streaming/LineBuffer.js +52 -0
- package/dist/core/streaming/LineBuffer.js.map +1 -0
- package/dist/core/workers/IndexWorker.d.ts +8 -0
- package/dist/core/workers/IndexWorker.d.ts.map +1 -0
- package/dist/core/workers/IndexWorker.js +41 -0
- package/dist/core/workers/IndexWorker.js.map +1 -0
- package/dist/core/workers/SearchWorker.d.ts +7 -0
- package/dist/core/workers/SearchWorker.d.ts.map +1 -0
- package/dist/core/workers/SearchWorker.js +63 -0
- package/dist/core/workers/SearchWorker.js.map +1 -0
- package/dist/core/workers/WorkerPool.d.ts +30 -0
- package/dist/core/workers/WorkerPool.d.ts.map +1 -0
- package/dist/core/workers/WorkerPool.js +132 -0
- package/dist/core/workers/WorkerPool.js.map +1 -0
- package/dist/formats/LogFormatDetector.d.ts +22 -0
- package/dist/formats/LogFormatDetector.d.ts.map +1 -0
- package/dist/formats/LogFormatDetector.js +123 -0
- package/dist/formats/LogFormatDetector.js.map +1 -0
- package/dist/formats/parsers/ApacheParser.d.ts +10 -0
- package/dist/formats/parsers/ApacheParser.d.ts.map +1 -0
- package/dist/formats/parsers/ApacheParser.js +54 -0
- package/dist/formats/parsers/ApacheParser.js.map +1 -0
- package/dist/formats/parsers/GenericParser.d.ts +11 -0
- package/dist/formats/parsers/GenericParser.d.ts.map +1 -0
- package/dist/formats/parsers/GenericParser.js +61 -0
- package/dist/formats/parsers/GenericParser.js.map +1 -0
- package/dist/formats/parsers/JsonParser.d.ts +12 -0
- package/dist/formats/parsers/JsonParser.d.ts.map +1 -0
- package/dist/formats/parsers/JsonParser.js +92 -0
- package/dist/formats/parsers/JsonParser.js.map +1 -0
- package/dist/formats/parsers/NginxParser.d.ts +15 -0
- package/dist/formats/parsers/NginxParser.d.ts.map +1 -0
- package/dist/formats/parsers/NginxParser.js +78 -0
- package/dist/formats/parsers/NginxParser.js.map +1 -0
- package/dist/formats/parsers/SyslogParser.d.ts +10 -0
- package/dist/formats/parsers/SyslogParser.d.ts.map +1 -0
- package/dist/formats/parsers/SyslogParser.js +73 -0
- package/dist/formats/parsers/SyslogParser.js.map +1 -0
- package/dist/formats/schemas/LogSchema.d.ts +70 -0
- package/dist/formats/schemas/LogSchema.d.ts.map +1 -0
- package/dist/formats/schemas/LogSchema.js +7 -0
- package/dist/formats/schemas/LogSchema.js.map +1 -0
- package/dist/index-store/CacheManager.d.ts +45 -0
- package/dist/index-store/CacheManager.d.ts.map +1 -0
- package/dist/index-store/CacheManager.js +84 -0
- package/dist/index-store/CacheManager.js.map +1 -0
- package/dist/index-store/FileWatcher.d.ts +39 -0
- package/dist/index-store/FileWatcher.d.ts.map +1 -0
- package/dist/index-store/FileWatcher.js +121 -0
- package/dist/index-store/FileWatcher.js.map +1 -0
- package/dist/index-store/IncrementalUpdater.d.ts +19 -0
- package/dist/index-store/IncrementalUpdater.d.ts.map +1 -0
- package/dist/index-store/IncrementalUpdater.js +62 -0
- package/dist/index-store/IncrementalUpdater.js.map +1 -0
- package/dist/index-store/IndexStore.d.ts +66 -0
- package/dist/index-store/IndexStore.d.ts.map +1 -0
- package/dist/index-store/IndexStore.js +183 -0
- package/dist/index-store/IndexStore.js.map +1 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +99 -0
- package/dist/index.js.map +1 -0
- package/dist/pro/alerts/AlertEngine.d.ts +35 -0
- package/dist/pro/alerts/AlertEngine.d.ts.map +1 -0
- package/dist/pro/alerts/AlertEngine.js +162 -0
- package/dist/pro/alerts/AlertEngine.js.map +1 -0
- package/dist/pro/alerts/Notifier.d.ts +23 -0
- package/dist/pro/alerts/Notifier.d.ts.map +1 -0
- package/dist/pro/alerts/Notifier.js +173 -0
- package/dist/pro/alerts/Notifier.js.map +1 -0
- package/dist/pro/alerts/RuleParser.d.ts +32 -0
- package/dist/pro/alerts/RuleParser.d.ts.map +1 -0
- package/dist/pro/alerts/RuleParser.js +86 -0
- package/dist/pro/alerts/RuleParser.js.map +1 -0
- package/dist/pro/auth/LicenseValidator.d.ts +29 -0
- package/dist/pro/auth/LicenseValidator.d.ts.map +1 -0
- package/dist/pro/auth/LicenseValidator.js +122 -0
- package/dist/pro/auth/LicenseValidator.js.map +1 -0
- package/dist/pro/auth/TokenManager.d.ts +27 -0
- package/dist/pro/auth/TokenManager.d.ts.map +1 -0
- package/dist/pro/auth/TokenManager.js +98 -0
- package/dist/pro/auth/TokenManager.js.map +1 -0
- package/dist/pro/webui/UIServer.d.ts +34 -0
- package/dist/pro/webui/UIServer.d.ts.map +1 -0
- package/dist/pro/webui/UIServer.js +353 -0
- package/dist/pro/webui/UIServer.js.map +1 -0
- package/dist/query/QueryEngine.d.ts +34 -0
- package/dist/query/QueryEngine.d.ts.map +1 -0
- package/dist/query/QueryEngine.js +187 -0
- package/dist/query/QueryEngine.js.map +1 -0
- package/dist/query/operators/AndOperator.d.ts +18 -0
- package/dist/query/operators/AndOperator.d.ts.map +1 -0
- package/dist/query/operators/AndOperator.js +55 -0
- package/dist/query/operators/AndOperator.js.map +1 -0
- package/dist/query/operators/NotOperator.d.ts +19 -0
- package/dist/query/operators/NotOperator.d.ts.map +1 -0
- package/dist/query/operators/NotOperator.js +43 -0
- package/dist/query/operators/NotOperator.js.map +1 -0
- package/dist/query/operators/OrOperator.d.ts +17 -0
- package/dist/query/operators/OrOperator.d.ts.map +1 -0
- package/dist/query/operators/OrOperator.js +54 -0
- package/dist/query/operators/OrOperator.js.map +1 -0
- package/dist/query/operators/RangeOperator.d.ts +23 -0
- package/dist/query/operators/RangeOperator.d.ts.map +1 -0
- package/dist/query/operators/RangeOperator.js +63 -0
- package/dist/query/operators/RangeOperator.js.map +1 -0
- package/dist/query/regex/FuzzyMatcher.d.ts +29 -0
- package/dist/query/regex/FuzzyMatcher.d.ts.map +1 -0
- package/dist/query/regex/FuzzyMatcher.js +89 -0
- package/dist/query/regex/FuzzyMatcher.js.map +1 -0
- package/dist/query/regex/RegexMatcher.d.ts +31 -0
- package/dist/query/regex/RegexMatcher.d.ts.map +1 -0
- package/dist/query/regex/RegexMatcher.js +73 -0
- package/dist/query/regex/RegexMatcher.js.map +1 -0
- package/dist/types/ConfigTypes.d.ts +78 -0
- package/dist/types/ConfigTypes.d.ts.map +1 -0
- package/dist/types/ConfigTypes.js +7 -0
- package/dist/types/ConfigTypes.js.map +1 -0
- package/dist/types/IndexTypes.d.ts +86 -0
- package/dist/types/IndexTypes.d.ts.map +1 -0
- package/dist/types/IndexTypes.js +7 -0
- package/dist/types/IndexTypes.js.map +1 -0
- package/dist/types/SearchTypes.d.ts +102 -0
- package/dist/types/SearchTypes.d.ts.map +1 -0
- package/dist/types/SearchTypes.js +7 -0
- package/dist/types/SearchTypes.js.map +1 -0
- package/dist/types/index.d.ts +8 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +24 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/fs-helpers.d.ts +9 -0
- package/dist/utils/fs-helpers.d.ts.map +1 -0
- package/dist/utils/fs-helpers.js +76 -0
- package/dist/utils/fs-helpers.js.map +1 -0
- package/dist/utils/hash.d.ts +7 -0
- package/dist/utils/hash.d.ts.map +1 -0
- package/dist/utils/hash.js +49 -0
- package/dist/utils/hash.js.map +1 -0
- package/dist/utils/logger.d.ts +19 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +41 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/performance.d.ts +14 -0
- package/dist/utils/performance.d.ts.map +1 -0
- package/dist/utils/performance.js +32 -0
- package/dist/utils/performance.js.map +1 -0
- package/dist/utils/progress.d.ts +7 -0
- package/dist/utils/progress.d.ts.map +1 -0
- package/dist/utils/progress.js +22 -0
- package/dist/utils/progress.js.map +1 -0
- package/package.json +105 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* TokenExtractor.ts
|
|
4
|
+
* Extracts normalized tokens (terms) from a log line for indexing.
|
|
5
|
+
* Handles deduplication, normalization, and stop-word filtering.
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.TokenExtractor = void 0;
|
|
9
|
+
const DEFAULT_STOP_WORDS = new Set([
|
|
10
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
11
|
+
'of', 'with', 'by', 'from', 'is', 'are', 'was', 'were', 'be', 'been',
|
|
12
|
+
'has', 'have', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
|
13
|
+
'should', 'may', 'might', 'must', 'can', 'it', 'its', 'that', 'this',
|
|
14
|
+
]);
|
|
15
|
+
// Matches tokens: words, IP-like segments, version numbers, HTTP codes, etc.
|
|
16
|
+
const TOKEN_REGEX = /[\w.\-/]+/g;
|
|
17
|
+
class TokenExtractor {
|
|
18
|
+
minLength;
|
|
19
|
+
stopWords;
|
|
20
|
+
includeTimestamps;
|
|
21
|
+
constructor(options = {}) {
|
|
22
|
+
this.minLength = options.minLength ?? 2;
|
|
23
|
+
this.stopWords = new Set([
|
|
24
|
+
...DEFAULT_STOP_WORDS,
|
|
25
|
+
...(options.stopWords ?? []),
|
|
26
|
+
]);
|
|
27
|
+
this.includeTimestamps = options.includeTimestamps ?? true;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Extract all unique tokens from a single line.
|
|
31
|
+
* Returns a Set of normalized lowercase tokens.
|
|
32
|
+
*/
|
|
33
|
+
extract(line) {
|
|
34
|
+
const tokens = new Set();
|
|
35
|
+
const matches = line.matchAll(TOKEN_REGEX);
|
|
36
|
+
for (const match of matches) {
|
|
37
|
+
const raw = match[0];
|
|
38
|
+
// Skip very short tokens
|
|
39
|
+
if (raw.length < this.minLength)
|
|
40
|
+
continue;
|
|
41
|
+
const token = raw.toLowerCase();
|
|
42
|
+
// Skip stop words (for non-technical tokens only)
|
|
43
|
+
if (this.stopWords.has(token))
|
|
44
|
+
continue;
|
|
45
|
+
// Skip pure punctuation / single chars
|
|
46
|
+
if (/^[.\-/\\]+$/.test(token))
|
|
47
|
+
continue;
|
|
48
|
+
tokens.add(token);
|
|
49
|
+
// Also index sub-tokens split by common delimiters for partial matching
|
|
50
|
+
// e.g. "GET /api/users" → also indexes "api", "users"
|
|
51
|
+
this.addSubTokens(raw, tokens);
|
|
52
|
+
}
|
|
53
|
+
return tokens;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Break compound tokens into sub-tokens for better searchability.
|
|
57
|
+
* e.g. "192.168.1.1" → "192", "168"
|
|
58
|
+
* "/api/v2/users" → "api", "v2", "users"
|
|
59
|
+
*/
|
|
60
|
+
addSubTokens(raw, tokens) {
|
|
61
|
+
// Split on / for URLs and paths
|
|
62
|
+
if (raw.includes('/')) {
|
|
63
|
+
for (const part of raw.split('/')) {
|
|
64
|
+
if (part.length >= this.minLength) {
|
|
65
|
+
tokens.add(part.toLowerCase());
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Split on . for IPs and dotted names (but not file extensions)
|
|
70
|
+
if (raw.includes('.') && !raw.startsWith('.')) {
|
|
71
|
+
const parts = raw.split('.');
|
|
72
|
+
// Only index sub-tokens if it looks like an IP or dotted path (not filename)
|
|
73
|
+
if (parts.length >= 3 && parts.every((p) => p.length >= 1)) {
|
|
74
|
+
for (const part of parts) {
|
|
75
|
+
if (part.length >= this.minLength && !/^\d+$/.test(part)) {
|
|
76
|
+
tokens.add(part.toLowerCase());
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// Split on - for kebab-case identifiers
|
|
82
|
+
if (raw.includes('-') && raw.length > 4) {
|
|
83
|
+
for (const part of raw.split('-')) {
|
|
84
|
+
if (part.length >= this.minLength && !this.stopWords.has(part.toLowerCase())) {
|
|
85
|
+
tokens.add(part.toLowerCase());
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
exports.TokenExtractor = TokenExtractor;
|
|
92
|
+
//# sourceMappingURL=TokenExtractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TokenExtractor.js","sourceRoot":"","sources":["../../../src/core/indexer/TokenExtractor.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAEH,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACjC,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK;IACnE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM;IACpE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IACnE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;CACrE,CAAC,CAAC;AAEH,6EAA6E;AAC7E,MAAM,WAAW,GAAG,YAAY,CAAC;AAEjC,MAAa,cAAc;IACjB,SAAS,CAAS;IAClB,SAAS,CAAc;IACvB,iBAAiB,CAAU;IAEnC,YAAY,UAIR,EAAE;QACJ,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,SAAS,GAAG,IAAI,GAAG,CAAC;YACvB,GAAG,kBAAkB;YACrB,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,EAAE,CAAC;SAC7B,CAAC,CAAC;QACH,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC;IAC7D,CAAC;IAED;;;OAGG;IACH,OAAO,CAAC,IAAY;QAClB,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;QAE3C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAErB,yBAAyB;YACzB,IAAI,GAAG,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS;gBAAE,SAAS;YAE1C,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAEhC,kDAAkD;YAClD,IAAI,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC;gBAAE,SAAS;YAExC,uCAAuC;YACvC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YAExC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAElB,wEAAwE;YACxE,sDAAsD;YACtD,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QACjC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;OAIG;IACK,YAAY,CAAC,GAAW,EAAE,MAAmB;QACnD,gCAAgC;QAChC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;gBAClC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBAClC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC;QACH,CAAC;QAED,gEAAgE;QAChE,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC7B,6EAA6E;YAC7E,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,EAAE,CAAC;gBAC3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;wBACzD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,wCAAwC;QACxC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxC,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;gBAClC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;oBAC7E,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;gBACjC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAvFD,wCAuFC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* IndexSearcher.ts
|
|
3
|
+
* Executes queries against a loaded InvertedIndex in milliseconds.
|
|
4
|
+
* Supports AND, OR, NOT, phrase, regex, fuzzy, level, and field queries.
|
|
5
|
+
*/
|
|
6
|
+
import type { SearchOptions, SearchResult } from '../../types/SearchTypes';
|
|
7
|
+
export declare class IndexSearcher {
|
|
8
|
+
private filePath;
|
|
9
|
+
private indexData;
|
|
10
|
+
private fetcher;
|
|
11
|
+
private rankEngine;
|
|
12
|
+
private serializer;
|
|
13
|
+
private parser;
|
|
14
|
+
constructor(filePath: string);
|
|
15
|
+
loadIndex(indexPath: string): Promise<void>;
|
|
16
|
+
isIndexLoaded(): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* Execute a query and return matching SearchResults.
|
|
19
|
+
*/
|
|
20
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
21
|
+
private resolveQuery;
|
|
22
|
+
private lookupTerm;
|
|
23
|
+
private lookupPhrase;
|
|
24
|
+
private lookupFuzzy;
|
|
25
|
+
private intersect;
|
|
26
|
+
private union;
|
|
27
|
+
private subtract;
|
|
28
|
+
private findHighlights;
|
|
29
|
+
private findLineNumber;
|
|
30
|
+
private getContextOffsets;
|
|
31
|
+
private filterByTimeRange;
|
|
32
|
+
private filterByLevel;
|
|
33
|
+
private extractTermsFromQuery;
|
|
34
|
+
/** Jaro-Winkler similarity (0–1) for fuzzy term matching */
|
|
35
|
+
private jaroWinkler;
|
|
36
|
+
}
|
|
37
|
+
//# sourceMappingURL=IndexSearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexSearcher.d.ts","sourceRoot":"","sources":["../../../src/core/searcher/IndexSearcher.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAQH,OAAO,KAAK,EACV,aAAa,EACb,YAAY,EAEb,MAAM,yBAAyB,CAAC;AAEjC,qBAAa,aAAa;IACxB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,SAAS,CAA8B;IAC/C,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,MAAM,CAAc;gBAEhB,QAAQ,EAAE,MAAM;IAUtB,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAWjD,aAAa,IAAI,OAAO;IAMxB;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmGjF,OAAO,CAAC,YAAY;IAuCpB,OAAO,CAAC,UAAU;IAKlB,OAAO,CAAC,YAAY;IAUpB,OAAO,CAAC,WAAW;IAenB,OAAO,CAAC,SAAS;IAUjB,OAAO,CAAC,KAAK;IAQb,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,cAAc;IAsBtB,OAAO,CAAC,cAAc;IActB,OAAO,CAAC,iBAAiB;YAeX,iBAAiB;YAuBjB,aAAa;IAW3B,OAAO,CAAC,qBAAqB;IAa7B,4DAA4D;IAC5D,OAAO,CAAC,WAAW;CA8CpB"}
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* IndexSearcher.ts
|
|
4
|
+
* Executes queries against a loaded InvertedIndex in milliseconds.
|
|
5
|
+
* Supports AND, OR, NOT, phrase, regex, fuzzy, level, and field queries.
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
|
+
exports.IndexSearcher = void 0;
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const IndexSerializer_1 = require("../indexer/IndexSerializer");
|
|
44
|
+
const ResultFetcher_1 = require("./ResultFetcher");
|
|
45
|
+
const RankEngine_1 = require("./RankEngine");
|
|
46
|
+
const QueryEngine_1 = require("../../query/QueryEngine");
|
|
47
|
+
class IndexSearcher {
|
|
48
|
+
filePath;
|
|
49
|
+
indexData = null;
|
|
50
|
+
fetcher;
|
|
51
|
+
rankEngine;
|
|
52
|
+
serializer;
|
|
53
|
+
parser;
|
|
54
|
+
constructor(filePath) {
|
|
55
|
+
this.filePath = path.resolve(filePath);
|
|
56
|
+
this.fetcher = new ResultFetcher_1.ResultFetcher(this.filePath);
|
|
57
|
+
this.rankEngine = new RankEngine_1.RankEngine();
|
|
58
|
+
this.serializer = new IndexSerializer_1.IndexSerializer();
|
|
59
|
+
this.parser = new QueryEngine_1.QueryParser();
|
|
60
|
+
}
|
|
61
|
+
// ─── Index Loading ─────────────────────────────────────────────────────────
|
|
62
|
+
async loadIndex(indexPath) {
|
|
63
|
+
this.indexData = await this.serializer.load(indexPath);
|
|
64
|
+
// Calibrate rank engine with average line length from index metadata
|
|
65
|
+
const avgLen = this.indexData.metadata.totalLines > 0
|
|
66
|
+
? this.indexData.metadata.fileSize / this.indexData.metadata.totalLines
|
|
67
|
+
: 200;
|
|
68
|
+
this.rankEngine = new RankEngine_1.RankEngine(avgLen);
|
|
69
|
+
}
|
|
70
|
+
isIndexLoaded() {
|
|
71
|
+
return this.indexData !== null;
|
|
72
|
+
}
|
|
73
|
+
// ─── Main Search Entry Point ───────────────────────────────────────────────
|
|
74
|
+
/**
|
|
75
|
+
* Execute a query and return matching SearchResults.
|
|
76
|
+
*/
|
|
77
|
+
async search(query, options = {}) {
|
|
78
|
+
if (!this.indexData) {
|
|
79
|
+
throw new Error('Index not loaded. Call loadIndex() first.');
|
|
80
|
+
}
|
|
81
|
+
const opts = {
|
|
82
|
+
limit: options.limit ?? 1000,
|
|
83
|
+
offset: options.offset ?? 0,
|
|
84
|
+
ignoreCase: options.ignoreCase ?? true,
|
|
85
|
+
isRegex: options.isRegex ?? false,
|
|
86
|
+
context: options.context ?? 0,
|
|
87
|
+
since: options.since ?? '',
|
|
88
|
+
until: options.until ?? '',
|
|
89
|
+
level: options.level ?? undefined,
|
|
90
|
+
sortBy: options.sortBy ?? 'line',
|
|
91
|
+
sortDir: options.sortDir ?? 'asc',
|
|
92
|
+
};
|
|
93
|
+
// 1. Parse query
|
|
94
|
+
const parsedQuery = this.parser.parse(query, {
|
|
95
|
+
ignoreCase: opts.ignoreCase,
|
|
96
|
+
isRegex: opts.isRegex,
|
|
97
|
+
});
|
|
98
|
+
// 2. Resolve to byte offsets via index
|
|
99
|
+
let matchingOffsets = this.resolveQuery(parsedQuery, opts.ignoreCase);
|
|
100
|
+
// 3. Apply time-range filter if requested
|
|
101
|
+
if (opts.since || opts.until) {
|
|
102
|
+
matchingOffsets = await this.filterByTimeRange(matchingOffsets, opts.since, opts.until);
|
|
103
|
+
}
|
|
104
|
+
// 4. Apply level filter if requested
|
|
105
|
+
if (opts.level) {
|
|
106
|
+
matchingOffsets = await this.filterByLevel(matchingOffsets, opts.level);
|
|
107
|
+
}
|
|
108
|
+
if (matchingOffsets.length === 0)
|
|
109
|
+
return [];
|
|
110
|
+
// 5. Sort and paginate offsets
|
|
111
|
+
const sorted = opts.sortBy === 'line'
|
|
112
|
+
? [...matchingOffsets].sort((a, b) => (opts.sortDir === 'asc' ? a - b : b - a))
|
|
113
|
+
: [...matchingOffsets].sort((a, b) => a - b); // default sort for score pass
|
|
114
|
+
const paginated = sorted.slice(opts.offset, opts.offset + opts.limit);
|
|
115
|
+
// 6. Fetch line content
|
|
116
|
+
await this.fetcher.open();
|
|
117
|
+
const lines = await this.fetcher.fetchLines(paginated);
|
|
118
|
+
// 7. Optionally fetch context lines
|
|
119
|
+
let results = await Promise.all(lines.map(async (content, i) => {
|
|
120
|
+
const offset = paginated[i];
|
|
121
|
+
const lineNumber = this.findLineNumber(offset);
|
|
122
|
+
const highlights = this.findHighlights(content, parsedQuery, opts.ignoreCase);
|
|
123
|
+
let contextBefore;
|
|
124
|
+
let contextAfter;
|
|
125
|
+
if (opts.context > 0) {
|
|
126
|
+
const { beforeOffsets, afterOffsets } = this.getContextOffsets(offset, opts.context);
|
|
127
|
+
contextBefore = await this.fetcher.fetchLines(beforeOffsets);
|
|
128
|
+
contextAfter = await this.fetcher.fetchLines(afterOffsets);
|
|
129
|
+
}
|
|
130
|
+
return {
|
|
131
|
+
lineNumber,
|
|
132
|
+
offset,
|
|
133
|
+
content,
|
|
134
|
+
highlights,
|
|
135
|
+
matchScore: 0,
|
|
136
|
+
contextBefore,
|
|
137
|
+
contextAfter,
|
|
138
|
+
};
|
|
139
|
+
}));
|
|
140
|
+
await this.fetcher.close();
|
|
141
|
+
// 8. Score and optionally re-sort by relevance
|
|
142
|
+
if (opts.sortBy === 'score') {
|
|
143
|
+
results = this.rankEngine.rankResults(results, parsedQuery);
|
|
144
|
+
if (opts.sortDir === 'asc')
|
|
145
|
+
results.reverse();
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
// Just compute scores for display without re-sorting
|
|
149
|
+
const terms = this.extractTermsFromQuery(parsedQuery);
|
|
150
|
+
results = results.map((r) => ({
|
|
151
|
+
...r,
|
|
152
|
+
matchScore: this.rankEngine.score(r.content, terms),
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
return results;
|
|
156
|
+
}
|
|
157
|
+
// ─── Query Resolution ──────────────────────────────────────────────────────
|
|
158
|
+
resolveQuery(query, ignoreCase) {
|
|
159
|
+
switch (query.type) {
|
|
160
|
+
case 'simple':
|
|
161
|
+
return this.lookupTerm(query.term, ignoreCase);
|
|
162
|
+
case 'phrase':
|
|
163
|
+
return this.lookupPhrase(query.terms, ignoreCase);
|
|
164
|
+
case 'and':
|
|
165
|
+
return this.intersect(query.operands.map((op) => this.resolveQuery(op, ignoreCase)));
|
|
166
|
+
case 'or':
|
|
167
|
+
return this.union(query.operands.map((op) => this.resolveQuery(op, ignoreCase)));
|
|
168
|
+
case 'not':
|
|
169
|
+
return this.subtract(this.indexData.lineOffsets, this.resolveQuery(query.operand, ignoreCase));
|
|
170
|
+
case 'regex':
|
|
171
|
+
// Regex falls back to full streaming scan (handled in CLI layer)
|
|
172
|
+
// Return all offsets as candidates — will be filtered by ResultFetcher
|
|
173
|
+
return [...this.indexData.lineOffsets];
|
|
174
|
+
case 'fuzzy':
|
|
175
|
+
return this.lookupFuzzy(query.term, query.threshold, ignoreCase);
|
|
176
|
+
case 'level':
|
|
177
|
+
return this.lookupTerm(query.level, ignoreCase);
|
|
178
|
+
case 'field':
|
|
179
|
+
return this.lookupTerm(query.value, ignoreCase);
|
|
180
|
+
default:
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
lookupTerm(term, ignoreCase) {
|
|
185
|
+
const t = ignoreCase ? term.toLowerCase() : term;
|
|
186
|
+
return this.indexData.invertedIndex[t] ?? [];
|
|
187
|
+
}
|
|
188
|
+
lookupPhrase(terms, ignoreCase) {
|
|
189
|
+
if (terms.length === 0)
|
|
190
|
+
return [];
|
|
191
|
+
if (terms.length === 1)
|
|
192
|
+
return this.lookupTerm(terms[0], ignoreCase);
|
|
193
|
+
// Get candidates: lines containing ALL terms
|
|
194
|
+
const candidates = this.intersect(terms.map((t) => this.lookupTerm(t, ignoreCase)));
|
|
195
|
+
// Phrase order verification happens at the ResultFetcher level (content check)
|
|
196
|
+
return candidates;
|
|
197
|
+
}
|
|
198
|
+
lookupFuzzy(term, threshold, ignoreCase) {
|
|
199
|
+
const t = ignoreCase ? term.toLowerCase() : term;
|
|
200
|
+
const results = new Set();
|
|
201
|
+
for (const [indexedTerm, offsets] of Object.entries(this.indexData.invertedIndex)) {
|
|
202
|
+
if (this.jaroWinkler(t, indexedTerm) >= threshold) {
|
|
203
|
+
for (const o of offsets)
|
|
204
|
+
results.add(o);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return [...results];
|
|
208
|
+
}
|
|
209
|
+
// ─── Set Operations ────────────────────────────────────────────────────────
|
|
210
|
+
intersect(lists) {
|
|
211
|
+
if (lists.length === 0)
|
|
212
|
+
return [];
|
|
213
|
+
if (lists.length === 1)
|
|
214
|
+
return lists[0];
|
|
215
|
+
// Start with the smallest list for efficiency
|
|
216
|
+
const sorted = [...lists].sort((a, b) => a.length - b.length);
|
|
217
|
+
const sets = sorted.slice(1).map((l) => new Set(l));
|
|
218
|
+
return sorted[0].filter((offset) => sets.every((s) => s.has(offset)));
|
|
219
|
+
}
|
|
220
|
+
union(lists) {
|
|
221
|
+
const combined = new Set();
|
|
222
|
+
for (const list of lists) {
|
|
223
|
+
for (const offset of list)
|
|
224
|
+
combined.add(offset);
|
|
225
|
+
}
|
|
226
|
+
return [...combined];
|
|
227
|
+
}
|
|
228
|
+
subtract(all, excluded) {
|
|
229
|
+
const excludedSet = new Set(excluded);
|
|
230
|
+
return all.filter((o) => !excludedSet.has(o));
|
|
231
|
+
}
|
|
232
|
+
// ─── Highlight & Line Number ───────────────────────────────────────────────
|
|
233
|
+
findHighlights(line, query, ignoreCase) {
|
|
234
|
+
const terms = this.extractTermsFromQuery(query);
|
|
235
|
+
const positions = [];
|
|
236
|
+
const searchLine = ignoreCase ? line.toLowerCase() : line;
|
|
237
|
+
for (const term of terms) {
|
|
238
|
+
const t = ignoreCase ? term.toLowerCase() : term;
|
|
239
|
+
let idx = 0;
|
|
240
|
+
while ((idx = searchLine.indexOf(t, idx)) !== -1) {
|
|
241
|
+
positions.push([idx, idx + t.length]);
|
|
242
|
+
idx += t.length;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
// Sort by start position, merge overlapping ranges
|
|
246
|
+
return positions.sort((a, b) => a[0] - b[0]);
|
|
247
|
+
}
|
|
248
|
+
findLineNumber(offset) {
|
|
249
|
+
const offsets = this.indexData.lineOffsets;
|
|
250
|
+
let lo = 0;
|
|
251
|
+
let hi = offsets.length - 1;
|
|
252
|
+
while (lo <= hi) {
|
|
253
|
+
const mid = (lo + hi) >>> 1;
|
|
254
|
+
if (offsets[mid] === offset)
|
|
255
|
+
return mid + 1;
|
|
256
|
+
if (offsets[mid] < offset)
|
|
257
|
+
lo = mid + 1;
|
|
258
|
+
else
|
|
259
|
+
hi = mid - 1;
|
|
260
|
+
}
|
|
261
|
+
return Math.min(lo + 1, offsets.length);
|
|
262
|
+
}
|
|
263
|
+
getContextOffsets(offset, contextLines) {
|
|
264
|
+
const offsets = this.indexData.lineOffsets;
|
|
265
|
+
const idx = offsets.indexOf(offset);
|
|
266
|
+
if (idx === -1)
|
|
267
|
+
return { beforeOffsets: [], afterOffsets: [] };
|
|
268
|
+
const beforeOffsets = offsets.slice(Math.max(0, idx - contextLines), idx);
|
|
269
|
+
const afterOffsets = offsets.slice(idx + 1, idx + 1 + contextLines);
|
|
270
|
+
return { beforeOffsets, afterOffsets };
|
|
271
|
+
}
|
|
272
|
+
// ─── Filters ──────────────────────────────────────────────────────────────
|
|
273
|
+
async filterByTimeRange(offsets, since, until) {
|
|
274
|
+
// Simple timestamp filter — fetch lines and check timestamps
|
|
275
|
+
const sinceDate = since ? new Date(since).getTime() : 0;
|
|
276
|
+
const untilDate = until ? new Date(until).getTime() : Infinity;
|
|
277
|
+
await this.fetcher.open();
|
|
278
|
+
const lines = await this.fetcher.fetchLines(offsets);
|
|
279
|
+
await this.fetcher.close();
|
|
280
|
+
const ISO_RE = /\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/;
|
|
281
|
+
return offsets.filter((_, i) => {
|
|
282
|
+
const match = lines[i].match(ISO_RE);
|
|
283
|
+
if (!match)
|
|
284
|
+
return true; // Can't parse timestamp — include by default
|
|
285
|
+
const ts = new Date(match[0]).getTime();
|
|
286
|
+
return ts >= sinceDate && ts <= untilDate;
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
async filterByLevel(offsets, level) {
|
|
290
|
+
await this.fetcher.open();
|
|
291
|
+
const lines = await this.fetcher.fetchLines(offsets);
|
|
292
|
+
await this.fetcher.close();
|
|
293
|
+
const levelUpper = level.toUpperCase();
|
|
294
|
+
return offsets.filter((_, i) => lines[i].toUpperCase().includes(levelUpper));
|
|
295
|
+
}
|
|
296
|
+
// ─── Helpers ───────────────────────────────────────────────────────────────
|
|
297
|
+
extractTermsFromQuery(query) {
|
|
298
|
+
switch (query.type) {
|
|
299
|
+
case 'simple': return [query.term];
|
|
300
|
+
case 'phrase': return query.terms;
|
|
301
|
+
case 'and':
|
|
302
|
+
case 'or': return query.operands.flatMap((op) => this.extractTermsFromQuery(op));
|
|
303
|
+
case 'not': return this.extractTermsFromQuery(query.operand);
|
|
304
|
+
case 'fuzzy': return [query.term];
|
|
305
|
+
case 'level': return [query.level];
|
|
306
|
+
case 'field': return [query.value];
|
|
307
|
+
default: return [];
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
/** Jaro-Winkler similarity (0–1) for fuzzy term matching */
|
|
311
|
+
jaroWinkler(s1, s2) {
|
|
312
|
+
if (s1 === s2)
|
|
313
|
+
return 1;
|
|
314
|
+
const len1 = s1.length;
|
|
315
|
+
const len2 = s2.length;
|
|
316
|
+
const matchDist = Math.floor(Math.max(len1, len2) / 2) - 1;
|
|
317
|
+
if (matchDist < 0)
|
|
318
|
+
return 0;
|
|
319
|
+
const s1Matches = new Array(len1).fill(false);
|
|
320
|
+
const s2Matches = new Array(len2).fill(false);
|
|
321
|
+
let matches = 0;
|
|
322
|
+
let transpositions = 0;
|
|
323
|
+
for (let i = 0; i < len1; i++) {
|
|
324
|
+
const start = Math.max(0, i - matchDist);
|
|
325
|
+
const end = Math.min(i + matchDist + 1, len2);
|
|
326
|
+
for (let j = start; j < end; j++) {
|
|
327
|
+
if (s2Matches[j] || s1[i] !== s2[j])
|
|
328
|
+
continue;
|
|
329
|
+
s1Matches[i] = true;
|
|
330
|
+
s2Matches[j] = true;
|
|
331
|
+
matches++;
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
if (matches === 0)
|
|
336
|
+
return 0;
|
|
337
|
+
let k = 0;
|
|
338
|
+
for (let i = 0; i < len1; i++) {
|
|
339
|
+
if (!s1Matches[i])
|
|
340
|
+
continue;
|
|
341
|
+
while (!s2Matches[k])
|
|
342
|
+
k++;
|
|
343
|
+
if (s1[i] !== s2[k])
|
|
344
|
+
transpositions++;
|
|
345
|
+
k++;
|
|
346
|
+
}
|
|
347
|
+
const jaro = (matches / len1 + matches / len2 + (matches - transpositions / 2) / matches) / 3;
|
|
348
|
+
// Winkler prefix bonus
|
|
349
|
+
let prefix = 0;
|
|
350
|
+
for (let i = 0; i < Math.min(4, Math.min(len1, len2)); i++) {
|
|
351
|
+
if (s1[i] === s2[i])
|
|
352
|
+
prefix++;
|
|
353
|
+
else
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
return jaro + prefix * 0.1 * (1 - jaro);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
exports.IndexSearcher = IndexSearcher;
|
|
360
|
+
//# sourceMappingURL=IndexSearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IndexSearcher.js","sourceRoot":"","sources":["../../../src/core/searcher/IndexSearcher.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,2CAA6B;AAC7B,gEAA6D;AAC7D,mDAAgD;AAChD,6CAA0C;AAC1C,yDAAsD;AAQtD,MAAa,aAAa;IAChB,QAAQ,CAAS;IACjB,SAAS,GAAyB,IAAI,CAAC;IACvC,OAAO,CAAgB;IACvB,UAAU,CAAa;IACvB,UAAU,CAAkB;IAC5B,MAAM,CAAc;IAE5B,YAAY,QAAgB;QAC1B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACvC,IAAI,CAAC,OAAO,GAAG,IAAI,6BAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,GAAG,IAAI,uBAAU,EAAE,CAAC;QACnC,IAAI,CAAC,UAAU,GAAG,IAAI,iCAAe,EAAE,CAAC;QACxC,IAAI,CAAC,MAAM,GAAG,IAAI,yBAAW,EAAE,CAAC;IAClC,CAAC;IAED,8EAA8E;IAE9E,KAAK,CAAC,SAAS,CAAC,SAAiB;QAC/B,IAAI,CAAC,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEvD,qEAAqE;QACrE,MAAM,MAAM,GACV,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU,GAAG,CAAC;YACpC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,UAAU;YACvE,CAAC,CAAC,GAAG,CAAC;QACV,IAAI,CAAC,UAAU,GAAG,IAAI,uBAAU,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC;IACjC,CAAC;IAED,8EAA8E;IAE9E;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;QAC/D,CAAC;QAED,MAAM,IAAI,GAA4B;YACpC,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI;YAC5B,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,CAAC;YAC3B,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;YACtC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;YACjC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC;YAC7B,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAgB;YACxC,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,MAAM;YAChC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;SAClC,CAAC;QAEF,iBAAiB;QACjB,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE;YAC3C,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,uCAAuC;QACvC,IAAI,eAAe,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAEtE,0CAA0C;QAC1C,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,eAAe,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,eAAe,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC1F,CAAC;QAED,qCAAqC;QACrC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,eAAe,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC1E,CAAC;QAED,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE5C,+BAA+B;QAC/B,MAAM,MAAM,GACV,IAAI,CAAC,MAAM,KAAK,MAAM;YACpB,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC/E,CAAC,CAAC,CAAC,GAAG,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,8BAA8B;QAEhF,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QAEtE,wBAAwB;QACxB,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAC1B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAEvD,oCAAoC;QACpC,IAAI,OAAO,GAAmB,MAAM,OAAO,CAAC,GAAG,CAC7C,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE;YAC7B,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;YAE9E,IAAI,aAAmC,CAAC;YACxC,IAAI,YAAkC,CAAC;YAEvC,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,EAAE,aAAa,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrF,aAAa,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;gBAC7D,YAAY,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;YAC7D,CAAC;YAED,OAAO;gBACL,UAAU;gBACV,MAAM;gBACN,OAAO;gBACP,UAAU;gBACV,UAAU,EAAE,CAAC;gBACb,aAAa;gBACb,YAAY;aACb,CAAC;QACJ,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE3B,+CAA+C;QAC/C,IAAI,IAAI,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;YAC5B,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;YAC5D,IAAI,IAAI,CAAC,OAAO,KAAK,KAAK;gBAAE,OAAO,CAAC,OAAO,EAAE,CAAC;QAChD,CAAC;aAAM,CAAC;YACN,qDAAqD;YACrD,MAAM,KAAK,GAAG,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC,CAAC;YACtD,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC5B,GAAG,CAAC;gBACJ,UAAU,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,CAAC;aACpD,CAAC,CAAC,CAAC;QACN,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,8EAA8E;IAEtE,YAAY,CAAC,KAAkB,EAAE,UAAmB;QAC1D,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,QAAQ;gBACX,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;YAEjD,KAAK,QAAQ;gBACX,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;YAEpD,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;YAEvF,KAAK,IAAI;gBACP,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;YAEnF,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,QAAQ,CAClB,IAAI,CAAC,SAAU,CAAC,WAAW,EAC3B,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,EAAE,UAAU,CAAC,CAC7C,CAAC;YAEJ,KAAK,OAAO;gBACV,iEAAiE;gBACjE,uEAAuE;gBACvE,OAAO,CAAC,GAAG,IAAI,CAAC,SAAU,CAAC,WAAW,CAAC,CAAC;YAE1C,KAAK,OAAO;gBACV,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;YAEnE,KAAK,OAAO;gBACV,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;YAElD,KAAK,OAAO;gBACV,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;YAElD;gBACE,OAAO,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IAEO,UAAU,CAAC,IAAY,EAAE,UAAmB;QAClD,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACjD,OAAO,IAAI,CAAC,SAAU,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAChD,CAAC;IAEO,YAAY,CAAC,KAAe,EAAE,UAAmB;QACvD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QAErE,6CAA6C;QAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;QACpF,+EAA+E;QAC/E,OAAO,UAAU,CAAC;IACpB,CAAC;IAEO,WAAW,CAAC,IAAY,EAAE,SAAiB,EAAE,UAAmB;QACtE,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;QAElC,KAAK,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,SAAU,CAAC,aAAa,CAAC,EAAE,CAAC;YACnF,IAAI,IAAI,CAAC,WAAW,CAAC,CAAC,EAAE,WAAW,CAAC,IAAI,SAAS,EAAE,CAAC;gBAClD,KAAK,MAAM,CAAC,IAAI,OAAO;oBAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC;IACtB,CAAC;IAED,8EAA8E;IAEtE,SAAS,CAAC,KAAiB;QACjC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAClC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC;QAExC,8CAA8C;QAC9C,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;QAC9D,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC;IAEO,KAAK,CAAC,KAAiB;QAC7B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;QACnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,KAAK,MAAM,MAAM,IAAI,IAAI;gBAAE,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAClD,CAAC;QACD,OAAO,CAAC,GAAG,QAAQ,CAAC,CAAC;IACvB,CAAC;IAEO,QAAQ,CAAC,GAAa,EAAE,QAAkB;QAChD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;QACtC,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC;IAED,8EAA8E;IAEtE,cAAc,CACpB,IAAY,EACZ,KAAkB,EAClB,UAAmB;QAEnB,MAAM,KAAK,GAAG,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAChD,MAAM,SAAS,GAA4B,EAAE,CAAC;QAC9C,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAE1D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YACjD,IAAI,GAAG,GAAG,CAAC,CAAC;YACZ,OAAO,CAAC,GAAG,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACjD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBACtC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC;YAClB,CAAC;QACH,CAAC;QAED,mDAAmD;QACnD,OAAO,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC;IAEO,cAAc,CAAC,MAAc;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,SAAU,CAAC,WAAW,CAAC;QAC5C,IAAI,EAAE,GAAG,CAAC,CAAC;QACX,IAAI,EAAE,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QAE5B,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC;YAChB,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YAC5B,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM;gBAAE,OAAO,GAAG,GAAG,CAAC,CAAC;YAC5C,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM;gBAAE,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;;gBACnC,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;QACpB,CAAC;QACD,OAAO,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IAC1C,CAAC;IAEO,iBAAiB,CACvB,MAAc,EACd,YAAoB;QAEpB,MAAM,OAAO,GAAG,IAAI,CAAC,SAAU,CAAC,WAAW,CAAC;QAC5C,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QACpC,IAAI,GAAG,KAAK,CAAC,CAAC;YAAE,OAAO,EAAE,aAAa,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;QAE/D,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,YAAY,CAAC,EAAE,GAAG,CAAC,CAAC;QAC1E,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;QACpE,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;IACzC,CAAC;IAED,6EAA6E;IAErE,KAAK,CAAC,iBAAiB,CAC7B,OAAiB,EACjB,KAAa,EACb,KAAa;QAEb,6DAA6D;QAC7D,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE/D,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAC1B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE3B,MAAM,MAAM,GAAG,wCAAwC,CAAC;QAExD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAC7B,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACrC,IAAI,CAAC,KAAK;gBAAE,OAAO,IAAI,CAAC,CAAC,6CAA6C;YACtE,MAAM,EAAE,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;YACxC,OAAO,EAAE,IAAI,SAAS,IAAI,EAAE,IAAI,SAAS,CAAC;QAC5C,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAAiB,EAAE,KAAa;QAC1D,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAC1B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAE3B,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QACvC,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;IAC/E,CAAC;IAED,8EAA8E;IAEtE,qBAAqB,CAAC,KAAkB;QAC9C,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,QAAQ,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACnC,KAAK,QAAQ,CAAC,CAAC,OAAO,KAAK,CAAC,KAAK,CAAC;YAClC,KAAK,KAAK,CAAC;YAAC,KAAK,IAAI,CAAC,CAAC,OAAO,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC;YAC7F,KAAK,KAAK,CAAC,CAAC,OAAO,IAAI,CAAC,qBAAqB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC7D,KAAK,OAAO,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAClC,KAAK,OAAO,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,OAAO,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACnC,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED,4DAA4D;IACpD,WAAW,CAAC,EAAU,EAAE,EAAU;QACxC,IAAI,EAAE,KAAK,EAAE;YAAE,OAAO,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC;QACvB,MAAM,IAAI,GAAG,EAAE,CAAC,MAAM,CAAC;QACvB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3D,IAAI,SAAS,GAAG,CAAC;YAAE,OAAO,CAAC,CAAC;QAE5B,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC9C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YACzC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;YAC9C,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,IAAI,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;oBAAE,SAAS;gBAC9C,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;gBACpB,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;gBACpB,OAAO,EAAE,CAAC;gBACV,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,OAAO,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAE5B,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;gBAAE,SAAS;YAC5B,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;gBAAE,CAAC,EAAE,CAAC;YAC1B,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAAE,cAAc,EAAE,CAAC;YACtC,CAAC,EAAE,CAAC;QACN,CAAC;QAED,MAAM,IAAI,GACR,CAAC,OAAO,GAAG,IAAI,GAAG,OAAO,GAAG,IAAI,GAAG,CAAC,OAAO,GAAG,cAAc,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;QAEnF,uBAAuB;QACvB,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3D,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAAE,MAAM,EAAE,CAAC;;gBACzB,MAAM;QACb,CAAC;QAED,OAAO,IAAI,GAAG,MAAM,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;IAC1C,CAAC;CACF;AAxXD,sCAwXC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RankEngine.ts
|
|
3
|
+
* Scores and ranks search results using BM25-simplified scoring.
|
|
4
|
+
* Higher score = more relevant result.
|
|
5
|
+
*/
|
|
6
|
+
import type { SearchResult, ParsedQuery } from '../../types/SearchTypes';
|
|
7
|
+
export declare class RankEngine {
|
|
8
|
+
private avgLineLength;
|
|
9
|
+
constructor(avgLineLength?: number);
|
|
10
|
+
/**
|
|
11
|
+
* Score a single line for BM25 relevance.
|
|
12
|
+
* @param line The line content
|
|
13
|
+
* @param terms The search terms
|
|
14
|
+
*/
|
|
15
|
+
score(line: string, terms: string[]): number;
|
|
16
|
+
/**
|
|
17
|
+
* Re-score and sort a list of SearchResults by relevance.
|
|
18
|
+
*/
|
|
19
|
+
rankResults(results: SearchResult[], query: ParsedQuery): SearchResult[];
|
|
20
|
+
/**
|
|
21
|
+
* Count non-overlapping occurrences of `term` in `text`.
|
|
22
|
+
*/
|
|
23
|
+
private countOccurrences;
|
|
24
|
+
/**
|
|
25
|
+
* Extract all search terms from a parsed query tree.
|
|
26
|
+
*/
|
|
27
|
+
private extractTerms;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=RankEngine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RankEngine.d.ts","sourceRoot":"","sources":["../../../src/core/searcher/RankEngine.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAMzE,qBAAa,UAAU;IACrB,OAAO,CAAC,aAAa,CAAS;gBAElB,aAAa,GAAE,MAAY;IAIvC;;;;OAIG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM;IAsB5C;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,YAAY,EAAE,EAAE,KAAK,EAAE,WAAW,GAAG,YAAY,EAAE;IASxE;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAWxB;;OAEG;IACH,OAAO,CAAC,YAAY;CAqBrB"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* RankEngine.ts
|
|
4
|
+
* Scores and ranks search results using BM25-simplified scoring.
|
|
5
|
+
* Higher score = more relevant result.
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.RankEngine = void 0;
|
|
9
|
+
// BM25 tuning constants
|
|
10
|
+
const K1 = 1.5; // term frequency saturation
|
|
11
|
+
const B = 0.75; // length normalization
|
|
12
|
+
class RankEngine {
|
|
13
|
+
avgLineLength;
|
|
14
|
+
constructor(avgLineLength = 200) {
|
|
15
|
+
this.avgLineLength = avgLineLength;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Score a single line for BM25 relevance.
|
|
19
|
+
* @param line The line content
|
|
20
|
+
* @param terms The search terms
|
|
21
|
+
*/
|
|
22
|
+
score(line, terms) {
|
|
23
|
+
if (terms.length === 0)
|
|
24
|
+
return 0;
|
|
25
|
+
const lower = line.toLowerCase();
|
|
26
|
+
const lineLength = line.length;
|
|
27
|
+
let totalScore = 0;
|
|
28
|
+
for (const term of terms) {
|
|
29
|
+
const tf = this.countOccurrences(lower, term.toLowerCase());
|
|
30
|
+
if (tf === 0)
|
|
31
|
+
continue;
|
|
32
|
+
// BM25 term frequency component
|
|
33
|
+
const tfNorm = (tf * (K1 + 1)) /
|
|
34
|
+
(tf + K1 * (1 - B + B * (lineLength / this.avgLineLength)));
|
|
35
|
+
totalScore += tfNorm;
|
|
36
|
+
}
|
|
37
|
+
return totalScore;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Re-score and sort a list of SearchResults by relevance.
|
|
41
|
+
*/
|
|
42
|
+
rankResults(results, query) {
|
|
43
|
+
const terms = this.extractTerms(query);
|
|
44
|
+
if (terms.length === 0)
|
|
45
|
+
return results;
|
|
46
|
+
return results
|
|
47
|
+
.map((r) => ({ ...r, matchScore: this.score(r.content, terms) }))
|
|
48
|
+
.sort((a, b) => b.matchScore - a.matchScore);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Count non-overlapping occurrences of `term` in `text`.
|
|
52
|
+
*/
|
|
53
|
+
countOccurrences(text, term) {
|
|
54
|
+
if (!term)
|
|
55
|
+
return 0;
|
|
56
|
+
let count = 0;
|
|
57
|
+
let idx = 0;
|
|
58
|
+
while ((idx = text.indexOf(term, idx)) !== -1) {
|
|
59
|
+
count++;
|
|
60
|
+
idx += term.length;
|
|
61
|
+
}
|
|
62
|
+
return count;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Extract all search terms from a parsed query tree.
|
|
66
|
+
*/
|
|
67
|
+
extractTerms(query) {
|
|
68
|
+
switch (query.type) {
|
|
69
|
+
case 'simple':
|
|
70
|
+
return [query.term];
|
|
71
|
+
case 'phrase':
|
|
72
|
+
return query.terms;
|
|
73
|
+
case 'and':
|
|
74
|
+
case 'or':
|
|
75
|
+
return query.operands.flatMap((op) => this.extractTerms(op));
|
|
76
|
+
case 'not':
|
|
77
|
+
return this.extractTerms(query.operand);
|
|
78
|
+
case 'fuzzy':
|
|
79
|
+
return [query.term];
|
|
80
|
+
case 'level':
|
|
81
|
+
return [query.level];
|
|
82
|
+
case 'field':
|
|
83
|
+
return [query.value];
|
|
84
|
+
default:
|
|
85
|
+
return [];
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
exports.RankEngine = RankEngine;
|
|
90
|
+
//# sourceMappingURL=RankEngine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RankEngine.js","sourceRoot":"","sources":["../../../src/core/searcher/RankEngine.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAIH,wBAAwB;AACxB,MAAM,EAAE,GAAG,GAAG,CAAC,CAAE,4BAA4B;AAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,CAAE,uBAAuB;AAExC,MAAa,UAAU;IACb,aAAa,CAAS;IAE9B,YAAY,gBAAwB,GAAG;QACrC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,IAAY,EAAE,KAAe;QACjC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEjC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACjC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC;QAC/B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,EAAE,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;YAC5D,IAAI,EAAE,KAAK,CAAC;gBAAE,SAAS;YAEvB,gCAAgC;YAChC,MAAM,MAAM,GACV,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;gBACf,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;YAE9D,UAAU,IAAI,MAAM,CAAC;QACvB,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,OAAuB,EAAE,KAAkB;QACrD,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QACvC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC;QAEvC,OAAO,OAAO;aACX,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,CAAC;aAChE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,IAAY,EAAE,IAAY;QACjD,IAAI,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC;QACpB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;YAC9C,KAAK,EAAE,CAAC;YACR,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC;QACrB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,KAAkB;QACrC,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,QAAQ;gBACX,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACtB,KAAK,QAAQ;gBACX,OAAO,KAAK,CAAC,KAAK,CAAC;YACrB,KAAK,KAAK,CAAC;YACX,KAAK,IAAI;gBACP,OAAO,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC;YAC/D,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,OAAO;gBACV,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACtB,KAAK,OAAO;gBACV,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACvB,KAAK,OAAO;gBACV,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACvB;gBACE,OAAO,EAAE,CAAC;QACd,CAAC;IACH,CAAC;CACF;AApFD,gCAoFC"}
|