@lucern/contracts 0.1.1-alpha.1 → 0.1.2-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +3 -0
- package/README.md +3 -0
- package/dist/agents/v1.d.ts +2 -0
- package/dist/agents/v1.js +3 -0
- package/dist/agents/v1.js.map +1 -0
- package/dist/api-enums.contract.d.ts +60 -0
- package/dist/api-enums.contract.js +174 -0
- package/dist/api-enums.contract.js.map +1 -0
- package/dist/auth-context.contract.d.ts +2 -0
- package/dist/auth-context.contract.js +48 -0
- package/dist/auth-context.contract.js.map +1 -0
- package/dist/auth-session.contract.d.ts +2 -0
- package/dist/auth-session.contract.js +48 -0
- package/dist/auth-session.contract.js.map +1 -0
- package/dist/auth.contract.d.ts +92 -0
- package/dist/auth.contract.js +48 -0
- package/dist/auth.contract.js.map +1 -0
- package/dist/beliefs/v1.d.ts +2 -0
- package/dist/beliefs/v1.js +3 -0
- package/dist/beliefs/v1.js.map +1 -0
- package/dist/context-pack.contract.d.ts +496 -0
- package/dist/context-pack.contract.js +98 -0
- package/dist/context-pack.contract.js.map +1 -0
- package/dist/convex-admin.contract.d.ts +7 -0
- package/dist/convex-admin.contract.js +3 -0
- package/dist/convex-admin.contract.js.map +1 -0
- package/dist/events-types.contract.d.ts +1 -0
- package/dist/events-types.contract.js +136 -0
- package/dist/events-types.contract.js.map +1 -0
- package/dist/events.contract.d.ts +178 -0
- package/dist/events.contract.js +136 -0
- package/dist/events.contract.js.map +1 -0
- package/dist/evidence/v1.d.ts +2 -0
- package/dist/evidence/v1.js +3 -0
- package/dist/evidence/v1.js.map +1 -0
- package/dist/gateway.contract.d.ts +79 -0
- package/dist/gateway.contract.js +12 -0
- package/dist/gateway.contract.js.map +1 -0
- package/dist/graph/v1.d.ts +2 -0
- package/dist/graph/v1.js +3 -0
- package/dist/graph/v1.js.map +1 -0
- package/dist/ids.contract.d.ts +9 -0
- package/{src/ids.contract.ts → dist/ids.contract.js} +10 -17
- package/dist/ids.contract.js.map +1 -0
- package/dist/index.d.ts +15 -2004
- package/dist/index.js +61 -118
- package/dist/index.js.map +1 -0
- package/dist/lens-filter.contract.d.ts +72 -0
- package/dist/lens-filter.contract.js +71 -0
- package/dist/lens-filter.contract.js.map +1 -0
- package/dist/lens-workflow.contract.d.ts +87 -0
- package/dist/lens-workflow.contract.js +123 -0
- package/dist/lens-workflow.contract.js.map +1 -0
- package/dist/mcp-tools.contract-D8kXcP6d.d.ts +254 -0
- package/dist/mcp-tools.contract.d.ts +1 -0
- package/dist/mcp-tools.contract.js +3016 -0
- package/dist/mcp-tools.contract.js.map +1 -0
- package/dist/ontologies/v1.d.ts +2 -0
- package/dist/ontologies/v1.js +3 -0
- package/dist/ontologies/v1.js.map +1 -0
- package/dist/ontology-matching.contract.d.ts +1 -0
- package/dist/ontology-matching.contract.js +346 -0
- package/dist/ontology-matching.contract.js.map +1 -0
- package/dist/prompt.contract.d.ts +26 -0
- package/dist/prompt.contract.js +12 -0
- package/dist/prompt.contract.js.map +1 -0
- package/dist/questions/v1.d.ts +2 -0
- package/dist/questions/v1.js +3 -0
- package/dist/questions/v1.js.map +1 -0
- package/dist/sdk-methods.contract.d.ts +376 -0
- package/dist/sdk-methods.contract.js +3 -0
- package/dist/sdk-methods.contract.js.map +1 -0
- package/dist/sdk-tools.contract-BnV0hKLp.d.ts +150 -0
- package/dist/sdk-tools.contract.d.ts +2 -0
- package/dist/sdk-tools.contract.js +4252 -0
- package/dist/sdk-tools.contract.js.map +1 -0
- package/dist/text-matching.contract.d.ts +55 -0
- package/{src/text-matching.contract.ts → dist/text-matching.contract.js} +36 -137
- package/dist/text-matching.contract.js.map +1 -0
- package/dist/topic-scope.contract.d.ts +1 -0
- package/{src/v1/topics/v1.ts → dist/topic-scope.contract.js} +13 -38
- package/dist/topic-scope.contract.js.map +1 -0
- package/dist/topics/v1.d.ts +2 -0
- package/dist/topics/v1.js +3 -0
- package/dist/topics/v1.js.map +1 -0
- package/dist/v1/agents/v1.d.ts +2 -0
- package/dist/v1/agents/v1.js +3 -0
- package/dist/v1/agents/v1.js.map +1 -0
- package/dist/v1/beliefs/v1.d.ts +2 -0
- package/dist/v1/beliefs/v1.js +3 -0
- package/dist/v1/beliefs/v1.js.map +1 -0
- package/dist/v1/evidence/v1.d.ts +2 -0
- package/dist/v1/evidence/v1.js +3 -0
- package/dist/v1/evidence/v1.js.map +1 -0
- package/dist/v1/graph/v1.d.ts +2 -0
- package/dist/v1/graph/v1.js +3 -0
- package/dist/v1/graph/v1.js.map +1 -0
- package/dist/v1/ontologies/v1.d.ts +78 -0
- package/dist/v1/ontologies/v1.js +346 -0
- package/dist/v1/ontologies/v1.js.map +1 -0
- package/dist/v1/questions/v1.d.ts +2 -0
- package/dist/v1/questions/v1.js +3 -0
- package/dist/v1/questions/v1.js.map +1 -0
- package/dist/v1/topics/v1.d.ts +21 -0
- package/dist/v1/topics/v1.js +54 -0
- package/dist/v1/topics/v1.js.map +1 -0
- package/dist/v1/worktrees/v1.d.ts +2 -0
- package/dist/v1/worktrees/v1.js +3 -0
- package/dist/v1/worktrees/v1.js.map +1 -0
- package/dist/workflow-runtime.contract.d.ts +163 -0
- package/dist/workflow-runtime.contract.js +245 -0
- package/dist/workflow-runtime.contract.js.map +1 -0
- package/dist/worktrees/v1.d.ts +2 -0
- package/dist/worktrees/v1.js +3 -0
- package/dist/worktrees/v1.js.map +1 -0
- package/package.json +23 -7
- package/src/agents/v1.ts +0 -8
- package/src/api-enums.contract.ts +0 -183
- package/src/auth-context.contract.ts +0 -9
- package/src/auth-session.contract.ts +0 -9
- package/src/auth.contract.ts +0 -162
- package/src/beliefs/v1.ts +0 -8
- package/src/context-pack.contract.ts +0 -704
- package/src/convex-admin.contract.ts +0 -14
- package/src/events-types.contract.ts +0 -9
- package/src/events.contract.ts +0 -376
- package/src/evidence/v1.ts +0 -8
- package/src/gateway.contract.ts +0 -151
- package/src/graph/v1.ts +0 -8
- package/src/index.ts +0 -30
- package/src/lens-filter.contract.ts +0 -183
- package/src/lens-workflow.contract.ts +0 -162
- package/src/mcp-tools.contract.ts +0 -3636
- package/src/ontologies/v1.ts +0 -8
- package/src/ontology-matching.contract.ts +0 -9
- package/src/prompt.contract.ts +0 -50
- package/src/questions/v1.ts +0 -8
- package/src/sdk-methods.contract.ts +0 -522
- package/src/sdk-tools.contract.ts +0 -1545
- package/src/topic-scope.contract.ts +0 -9
- package/src/topics/v1.ts +0 -8
- package/src/v1/agents/v1.ts +0 -8
- package/src/v1/beliefs/v1.ts +0 -8
- package/src/v1/evidence/v1.ts +0 -8
- package/src/v1/graph/v1.ts +0 -8
- package/src/v1/ontologies/v1.ts +0 -276
- package/src/v1/questions/v1.ts +0 -8
- package/src/v1/worktrees/v1.ts +0 -8
- package/src/workflow-runtime.contract.ts +0 -440
- package/src/worktrees/v1.ts +0 -8
- package/tsconfig.json +0 -9
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared lexical matching primitives used across MCP handlers and graph utilities.
|
|
3
|
+
*
|
|
4
|
+
* The goal is not to replace downstream LLM scoring. It provides a fast,
|
|
5
|
+
* deterministic substrate for candidate generation, reranking, and light
|
|
6
|
+
* classification across belief/question/evidence/entity surfaces.
|
|
7
|
+
*/
|
|
8
|
+
type LexicalStrategy = "tokenOverlap" | "bigramJaccard" | "wordOverlap";
|
|
9
|
+
type PreparedLexicalQuery = {
|
|
10
|
+
raw: string;
|
|
11
|
+
tokens: string[];
|
|
12
|
+
words: string[];
|
|
13
|
+
bigrams: Set<string>;
|
|
14
|
+
};
|
|
15
|
+
type LexicalSignal = {
|
|
16
|
+
strategy?: LexicalStrategy;
|
|
17
|
+
text: string | null | undefined;
|
|
18
|
+
weight: number;
|
|
19
|
+
};
|
|
20
|
+
type LexicalRerankOptions = {
|
|
21
|
+
lexicalWeight?: number;
|
|
22
|
+
rankWeight?: number;
|
|
23
|
+
};
|
|
24
|
+
/** Tokenize a string into lowercase words, removing stop words. */
|
|
25
|
+
declare function tokenizeSearchText(text: string): string[];
|
|
26
|
+
/** Simple stemmer: strip common English suffixes for fuzzy matching. */
|
|
27
|
+
declare function stemToken(word: string): string;
|
|
28
|
+
/** Compute token overlap score between query tokens and text tokens. */
|
|
29
|
+
declare function tokenOverlapScore(queryTokens: string[], textTokens: string[]): number;
|
|
30
|
+
/**
|
|
31
|
+
* Extract character bigrams from text. Normalizes to lowercase, removes
|
|
32
|
+
* non-alphanumeric characters, and generates overlapping pairs.
|
|
33
|
+
*/
|
|
34
|
+
declare function bigramTokenize(text: string): Set<string>;
|
|
35
|
+
/**
|
|
36
|
+
* Extract word-level tokens from text (for coarser matching).
|
|
37
|
+
* Normalizes to lowercase, splits on non-alphanumeric.
|
|
38
|
+
*/
|
|
39
|
+
declare function wordTokenize(text: string): string[];
|
|
40
|
+
/** Jaccard similarity between two sets: |A ∩ B| / |A ∪ B|. */
|
|
41
|
+
declare function jaccardSimilarity(setA: Set<string>, setB: Set<string>): number;
|
|
42
|
+
/** Exact word overlap score: fraction of type words found in input text. */
|
|
43
|
+
declare function wordOverlapScore(inputWords: string[], typeWords: string[]): number;
|
|
44
|
+
/** Pre-compute reusable lexical structures for a query. */
|
|
45
|
+
declare function prepareLexicalQuery(query: string): PreparedLexicalQuery;
|
|
46
|
+
/** Score a single lexical signal against a prepared query. */
|
|
47
|
+
declare function scoreLexicalSignal(query: PreparedLexicalQuery, signal: LexicalSignal): number;
|
|
48
|
+
/** Weighted lexical score across multiple textual signals. */
|
|
49
|
+
declare function scoreLexicalSignals(query: PreparedLexicalQuery, signals: LexicalSignal[]): number;
|
|
50
|
+
/** Map a candidate's original rank position into a 0..1 prior. */
|
|
51
|
+
declare function rankWindowScore(index: number, total: number): number;
|
|
52
|
+
/** Rerank a candidate window by lexical overlap while preserving original-rank prior. */
|
|
53
|
+
declare function rerankLexicalWindow<T>(query: string, items: T[], getText: (item: T) => string | null | undefined, options?: LexicalRerankOptions): T[];
|
|
54
|
+
|
|
55
|
+
export { type LexicalRerankOptions, type LexicalSignal, type LexicalStrategy, type PreparedLexicalQuery, bigramTokenize, jaccardSimilarity, prepareLexicalQuery, rankWindowScore, rerankLexicalWindow, scoreLexicalSignal, scoreLexicalSignals, stemToken, tokenOverlapScore, tokenizeSearchText, wordOverlapScore, wordTokenize };
|
|
@@ -1,36 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
* deterministic substrate for candidate generation, reranking, and light
|
|
6
|
-
* classification across belief/question/evidence/entity surfaces.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
export type LexicalStrategy = "tokenOverlap" | "bigramJaccard" | "wordOverlap";
|
|
10
|
-
|
|
11
|
-
export type PreparedLexicalQuery = {
|
|
12
|
-
raw: string;
|
|
13
|
-
tokens: string[];
|
|
14
|
-
words: string[];
|
|
15
|
-
bigrams: Set<string>;
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
export type LexicalSignal = {
|
|
19
|
-
strategy?: LexicalStrategy;
|
|
20
|
-
text: string | null | undefined;
|
|
21
|
-
weight: number;
|
|
22
|
-
};
|
|
23
|
-
|
|
24
|
-
export type LexicalRerankOptions = {
|
|
25
|
-
lexicalWeight?: number;
|
|
26
|
-
rankWeight?: number;
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
const TOKEN_SPLIT_REGEX = /[^a-z0-9]+/;
|
|
30
|
-
const NON_ALPHANUMERIC_REGEX = /[^a-z0-9]/g;
|
|
31
|
-
|
|
32
|
-
/** Stop words that add noise to scoring. */
|
|
33
|
-
const STOP_WORDS = new Set([
|
|
1
|
+
// src/text-matching.contract.ts
|
|
2
|
+
var TOKEN_SPLIT_REGEX = /[^a-z0-9]+/;
|
|
3
|
+
var NON_ALPHANUMERIC_REGEX = /[^a-z0-9]/g;
|
|
4
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
34
5
|
"the",
|
|
35
6
|
"a",
|
|
36
7
|
"an",
|
|
@@ -86,19 +57,12 @@ const STOP_WORDS = new Set([
|
|
|
86
57
|
"their",
|
|
87
58
|
"we",
|
|
88
59
|
"our",
|
|
89
|
-
"so"
|
|
60
|
+
"so"
|
|
90
61
|
]);
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
export function tokenizeSearchText(text: string): string[] {
|
|
94
|
-
return text
|
|
95
|
-
.toLowerCase()
|
|
96
|
-
.split(TOKEN_SPLIT_REGEX)
|
|
97
|
-
.filter((token) => token.length >= 2 && !STOP_WORDS.has(token));
|
|
62
|
+
function tokenizeSearchText(text) {
|
|
63
|
+
return text.toLowerCase().split(TOKEN_SPLIT_REGEX).filter((token) => token.length >= 2 && !STOP_WORDS.has(token));
|
|
98
64
|
}
|
|
99
|
-
|
|
100
|
-
/** Simple stemmer: strip common English suffixes for fuzzy matching. */
|
|
101
|
-
export function stemToken(word: string): string {
|
|
65
|
+
function stemToken(word) {
|
|
102
66
|
if (word.length <= 4) {
|
|
103
67
|
return word;
|
|
104
68
|
}
|
|
@@ -158,93 +122,54 @@ export function stemToken(word: string): string {
|
|
|
158
122
|
}
|
|
159
123
|
return word;
|
|
160
124
|
}
|
|
161
|
-
|
|
162
|
-
/** Compute token overlap score between query tokens and text tokens. */
|
|
163
|
-
export function tokenOverlapScore(
|
|
164
|
-
queryTokens: string[],
|
|
165
|
-
textTokens: string[]
|
|
166
|
-
): number {
|
|
125
|
+
function tokenOverlapScore(queryTokens, textTokens) {
|
|
167
126
|
if (queryTokens.length === 0 || textTokens.length === 0) {
|
|
168
127
|
return 0;
|
|
169
128
|
}
|
|
170
|
-
|
|
171
129
|
const stemmedText = new Set(textTokens.map(stemToken));
|
|
172
130
|
let matchCount = 0;
|
|
173
|
-
|
|
174
131
|
for (const queryToken of queryTokens) {
|
|
175
132
|
const stemmedQuery = stemToken(queryToken);
|
|
176
|
-
|
|
177
133
|
if (stemmedText.has(stemmedQuery)) {
|
|
178
134
|
matchCount += 1;
|
|
179
135
|
continue;
|
|
180
136
|
}
|
|
181
|
-
|
|
182
137
|
for (const textToken of stemmedText) {
|
|
183
|
-
if (
|
|
184
|
-
textToken.startsWith(stemmedQuery) ||
|
|
185
|
-
stemmedQuery.startsWith(textToken)
|
|
186
|
-
) {
|
|
138
|
+
if (textToken.startsWith(stemmedQuery) || stemmedQuery.startsWith(textToken)) {
|
|
187
139
|
matchCount += 0.5;
|
|
188
140
|
break;
|
|
189
141
|
}
|
|
190
142
|
}
|
|
191
143
|
}
|
|
192
|
-
|
|
193
144
|
return matchCount / queryTokens.length;
|
|
194
145
|
}
|
|
195
|
-
|
|
196
|
-
/**
|
|
197
|
-
* Extract character bigrams from text. Normalizes to lowercase, removes
|
|
198
|
-
* non-alphanumeric characters, and generates overlapping pairs.
|
|
199
|
-
*/
|
|
200
|
-
export function bigramTokenize(text: string): Set<string> {
|
|
146
|
+
function bigramTokenize(text) {
|
|
201
147
|
const normalized = text.toLowerCase().replace(NON_ALPHANUMERIC_REGEX, "");
|
|
202
|
-
const bigrams = new Set
|
|
148
|
+
const bigrams = /* @__PURE__ */ new Set();
|
|
203
149
|
for (let i = 0; i < normalized.length - 1; i++) {
|
|
204
150
|
bigrams.add(normalized.slice(i, i + 2));
|
|
205
151
|
}
|
|
206
152
|
return bigrams;
|
|
207
153
|
}
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
* Extract word-level tokens from text (for coarser matching).
|
|
211
|
-
* Normalizes to lowercase, splits on non-alphanumeric.
|
|
212
|
-
*/
|
|
213
|
-
export function wordTokenize(text: string): string[] {
|
|
214
|
-
return text
|
|
215
|
-
.toLowerCase()
|
|
216
|
-
.split(TOKEN_SPLIT_REGEX)
|
|
217
|
-
.filter((token) => token.length > 1);
|
|
154
|
+
function wordTokenize(text) {
|
|
155
|
+
return text.toLowerCase().split(TOKEN_SPLIT_REGEX).filter((token) => token.length > 1);
|
|
218
156
|
}
|
|
219
|
-
|
|
220
|
-
/** Jaccard similarity between two sets: |A ∩ B| / |A ∪ B|. */
|
|
221
|
-
export function jaccardSimilarity(
|
|
222
|
-
setA: Set<string>,
|
|
223
|
-
setB: Set<string>
|
|
224
|
-
): number {
|
|
157
|
+
function jaccardSimilarity(setA, setB) {
|
|
225
158
|
if (setA.size === 0 && setB.size === 0) {
|
|
226
159
|
return 0;
|
|
227
160
|
}
|
|
228
|
-
|
|
229
161
|
let intersectionSize = 0;
|
|
230
162
|
const smaller = setA.size <= setB.size ? setA : setB;
|
|
231
163
|
const larger = setA.size <= setB.size ? setB : setA;
|
|
232
|
-
|
|
233
164
|
for (const item of smaller) {
|
|
234
165
|
if (larger.has(item)) {
|
|
235
166
|
intersectionSize++;
|
|
236
167
|
}
|
|
237
168
|
}
|
|
238
|
-
|
|
239
169
|
const unionSize = setA.size + setB.size - intersectionSize;
|
|
240
170
|
return unionSize === 0 ? 0 : intersectionSize / unionSize;
|
|
241
171
|
}
|
|
242
|
-
|
|
243
|
-
/** Exact word overlap score: fraction of type words found in input text. */
|
|
244
|
-
export function wordOverlapScore(
|
|
245
|
-
inputWords: string[],
|
|
246
|
-
typeWords: string[]
|
|
247
|
-
): number {
|
|
172
|
+
function wordOverlapScore(inputWords, typeWords) {
|
|
248
173
|
if (typeWords.length === 0) {
|
|
249
174
|
return 0;
|
|
250
175
|
}
|
|
@@ -256,27 +181,19 @@ export function wordOverlapScore(
|
|
|
256
181
|
}
|
|
257
182
|
return matches / typeWords.length;
|
|
258
183
|
}
|
|
259
|
-
|
|
260
|
-
/** Pre-compute reusable lexical structures for a query. */
|
|
261
|
-
export function prepareLexicalQuery(query: string): PreparedLexicalQuery {
|
|
184
|
+
function prepareLexicalQuery(query) {
|
|
262
185
|
return {
|
|
263
186
|
raw: query,
|
|
264
187
|
tokens: tokenizeSearchText(query),
|
|
265
188
|
words: wordTokenize(query),
|
|
266
|
-
bigrams: bigramTokenize(query)
|
|
189
|
+
bigrams: bigramTokenize(query)
|
|
267
190
|
};
|
|
268
191
|
}
|
|
269
|
-
|
|
270
|
-
/** Score a single lexical signal against a prepared query. */
|
|
271
|
-
export function scoreLexicalSignal(
|
|
272
|
-
query: PreparedLexicalQuery,
|
|
273
|
-
signal: LexicalSignal
|
|
274
|
-
): number {
|
|
192
|
+
function scoreLexicalSignal(query, signal) {
|
|
275
193
|
const text = signal.text?.trim();
|
|
276
194
|
if (!text) {
|
|
277
195
|
return 0;
|
|
278
196
|
}
|
|
279
|
-
|
|
280
197
|
switch (signal.strategy ?? "tokenOverlap") {
|
|
281
198
|
case "bigramJaccard":
|
|
282
199
|
return jaccardSimilarity(query.bigrams, bigramTokenize(text));
|
|
@@ -286,15 +203,9 @@ export function scoreLexicalSignal(
|
|
|
286
203
|
return tokenOverlapScore(query.tokens, tokenizeSearchText(text));
|
|
287
204
|
}
|
|
288
205
|
}
|
|
289
|
-
|
|
290
|
-
/** Weighted lexical score across multiple textual signals. */
|
|
291
|
-
export function scoreLexicalSignals(
|
|
292
|
-
query: PreparedLexicalQuery,
|
|
293
|
-
signals: LexicalSignal[]
|
|
294
|
-
): number {
|
|
206
|
+
function scoreLexicalSignals(query, signals) {
|
|
295
207
|
let weightedScore = 0;
|
|
296
208
|
let totalWeight = 0;
|
|
297
|
-
|
|
298
209
|
for (const signal of signals) {
|
|
299
210
|
if (!signal.text?.trim() || signal.weight <= 0) {
|
|
300
211
|
continue;
|
|
@@ -302,46 +213,34 @@ export function scoreLexicalSignals(
|
|
|
302
213
|
weightedScore += scoreLexicalSignal(query, signal) * signal.weight;
|
|
303
214
|
totalWeight += signal.weight;
|
|
304
215
|
}
|
|
305
|
-
|
|
306
216
|
return totalWeight === 0 ? 0 : weightedScore / totalWeight;
|
|
307
217
|
}
|
|
308
|
-
|
|
309
|
-
/** Map a candidate's original rank position into a 0..1 prior. */
|
|
310
|
-
export function rankWindowScore(index: number, total: number): number {
|
|
218
|
+
function rankWindowScore(index, total) {
|
|
311
219
|
if (total <= 1) {
|
|
312
220
|
return 1;
|
|
313
221
|
}
|
|
314
222
|
const clampedIndex = Math.max(0, Math.min(index, total - 1));
|
|
315
223
|
return 1 - clampedIndex / (total - 1);
|
|
316
224
|
}
|
|
317
|
-
|
|
318
|
-
/** Rerank a candidate window by lexical overlap while preserving original-rank prior. */
|
|
319
|
-
export function rerankLexicalWindow<T>(
|
|
320
|
-
query: string,
|
|
321
|
-
items: T[],
|
|
322
|
-
getText: (item: T) => string | null | undefined,
|
|
323
|
-
options?: LexicalRerankOptions
|
|
324
|
-
): T[] {
|
|
225
|
+
function rerankLexicalWindow(query, items, getText, options) {
|
|
325
226
|
const preparedQuery = prepareLexicalQuery(query);
|
|
326
227
|
if (preparedQuery.tokens.length === 0 || items.length <= 1) {
|
|
327
228
|
return items;
|
|
328
229
|
}
|
|
329
|
-
|
|
330
230
|
const lexicalWeight = options?.lexicalWeight ?? 0.65;
|
|
331
231
|
const rankWeight = options?.rankWeight ?? 0.35;
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
combinedScore: lexicalScore * lexicalWeight + rankScore * rankWeight,
|
|
343
|
-
};
|
|
344
|
-
})
|
|
345
|
-
.sort((left, right) => right.combinedScore - left.combinedScore)
|
|
346
|
-
.map(({ item }) => item);
|
|
232
|
+
return items.map((item, index) => {
|
|
233
|
+
const lexicalScore = scoreLexicalSignals(preparedQuery, [
|
|
234
|
+
{ text: getText(item) ?? "", weight: 1, strategy: "tokenOverlap" }
|
|
235
|
+
]);
|
|
236
|
+
const rankScore = rankWindowScore(index, items.length);
|
|
237
|
+
return {
|
|
238
|
+
item,
|
|
239
|
+
combinedScore: lexicalScore * lexicalWeight + rankScore * rankWeight
|
|
240
|
+
};
|
|
241
|
+
}).sort((left, right) => right.combinedScore - left.combinedScore).map(({ item }) => item);
|
|
347
242
|
}
|
|
243
|
+
|
|
244
|
+
export { bigramTokenize, jaccardSimilarity, prepareLexicalQuery, rankWindowScore, rerankLexicalWindow, scoreLexicalSignal, scoreLexicalSignals, stemToken, tokenOverlapScore, tokenizeSearchText, wordOverlapScore, wordTokenize };
|
|
245
|
+
//# sourceMappingURL=text-matching.contract.js.map
|
|
246
|
+
//# sourceMappingURL=text-matching.contract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/text-matching.contract.ts"],"names":[],"mappings":";AA4BA,IAAM,iBAAA,GAAoB,YAAA;AAC1B,IAAM,sBAAA,GAAyB,YAAA;AAG/B,IAAM,UAAA,uBAAiB,GAAA,CAAI;AAAA,EACzB,KAAA;AAAA,EACA,GAAA;AAAA,EACA,IAAA;AAAA,EACA,KAAA;AAAA,EACA,IAAA;AAAA,EACA,KAAA;AAAA,EACA,IAAA;AAAA,EACA,IAAA;AAAA,EACA,IAAA;AAAA,EACA,IAAA;AAAA,EACA,KAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,IAAA;AAAA,EACA,MAAA;AAAA,EACA,IAAA;AAAA,EACA,IAAA;AAAA,EACA,IAAA;AAAA,EACA,IAAA;AAAA,EACA,KAAA;AAAA,EACA,KAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA;AAAA,EACA,KAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA;AAAA,EACA,KAAA;AAAA,EACA,KAAA;AAAA,EACA,IAAA;AAAA,EACA,KAAA;AAAA,EACA,KAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,KAAA;AAAA,EACA,MAAA;AAAA,EACA,OAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA;AAAA,EACA,MAAA;AAAA,EACA,IAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,QAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,OAAA;AAAA,EACA,IAAA;AAAA,EACA,KAAA;AAAA,EACA;AACF,CAAC,CAAA;AAGM,SAAS,mBAAmB,IAAA,EAAwB;AACzD,EAAA,OAAO,KACJ,WAAA,EAAY,CACZ,KAAA,CAAM,iBAAiB,EACvB,MAAA,CAAO,CAAC,KAAA,KAAU,KAAA,CAAM,UAAU,CAAA,IAAK,CAAC,UAAA,CAAW,GAAA,CAAI,KAAK,CAAC,CAAA;AAClE;AAGO,SAAS,UAAU,IAAA,EAAsB;AAC9C,EAAA,IAAI,IAAA,CAAK,UAAU,CAAA,EAAG;AACpB,IAAA,OAAO,IAAA;AAAA,EACT;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,OAAO,CAAA,EAAG;AAC1B,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAM,CAAA,EAAG;AACzB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAM,CAAA,EAAG;AACzB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAM,CAAA,EAAG;AACzB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAM,CAAA,EAAG;AACzB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,MAAM,CAAA,EAAG;AACzB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,KAAK,CAAA,EAAG;AACxB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,KAAK,CAAA,EAAG;AACxB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,KAAK,CAAA,EAAG;AACxB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,KAAK,CAAA,EAAG;AACxB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,KAAK,CAAA,EAAG;AACxB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AACvB,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,IAAI,IAAA,CAAK,SAAS,GAAG,CAAA,IAAK,CAAC,IAAA,CAAK,QAAA,CAAS,IAAI,CAAA,EAAG;AAC9C,IAAA,OAAO,IAAA,CAAK,KAAA,CAAM,CAAA,EAAG,EAAE,CAAA;AAAA,EACzB;AACA,EAAA,OAAO,IAAA;AACT;AAGO,SAAS,iBAAA,CACd,aACA,UAAA,EACQ;AACR,EAAA,IAAI,WAAA,CAAY,MAAA,KAAW,CAAA,IAAK,UAAA,CAAW,WAAW,CAAA,EAAG;AACvD,IAAA,OAAO,CAAA;AAAA,EACT;AAEA,EAAA,MAAM,cAAc,IAAI,GAAA,CAAI,UAAA,CAAW,GAAA,CAAI,SAAS,CAAC,CAAA;AACrD,EAAA,IAAI,UAAA,GAAa,CAAA;AAEjB,EAAA,KAAA,MAAW,cAAc,WAAA,EAAa;AACpC,IAAA,MAAM,YAAA,GAAe,UAAU,UAAU,CAAA;AAEzC,IAAA,IAAI,WAAA,CAAY,GAAA,CAAI,YAAY,CAAA,EAAG;AACjC,MAAA,UAAA,IAAc,CAAA;AACd,MAAA;AAAA,IACF;AAEA,IAAA,KAAA,MAAW,aAAa,WAAA,EAAa;AACnC,MAAA,IACE,UAAU,UAAA,CAAW,YAAY,KACjC,YAAA,CAAa,UAAA,CAAW,SAAS,CAAA,EACjC;AACA,QAAA,UAAA,IAAc,GAAA;AACd,QAAA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,OAAO,aAAa,WAAA,CAAY,MAAA;AAClC;AAMO,SAAS,eAAe,IAAA,EAA2B;AACxD,EAAA,MAAM,aAAa,IAAA,CAAK,WAAA,EAAY,CAAE,OAAA,CAAQ,wBAAwB,EAAE,CAAA;AACxE,EAAA,MAAM,OAAA,uBAAc,GAAA,EAAY;AAChC,EAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,UAAA,CAAW,MAAA,GAAS,GAAG,CAAA,EAAA,EAAK;AAC9C,IAAA,OAAA,CAAQ,IAAI,UAAA,CAAW,KAAA,CAAM,CAAA,EAAG,CAAA,GAAI,CAAC,CAAC,CAAA;AAAA,EACxC;AACA,EAAA,OAAO,OAAA;AACT;AAMO,SAAS,aAAa,IAAA,EAAwB;AACnD,EAAA,OAAO,IAAA,CACJ,WAAA,EAAY,CACZ,KAAA,CAAM,iBAAiB,CAAA,CACvB,MAAA,CAAO,CAAC,KAAA,KAAU,KAAA,CAAM,MAAA,GAAS,CAAC,CAAA;AACvC;AAGO,SAAS,iBAAA,CACd,MACA,IAAA,EACQ;AACR,EAAA,IAAI,IAAA,CAAK,IAAA,KAAS,CAAA,IAAK,IAAA,CAAK,SAAS,CAAA,EAAG;AACtC,IAAA,OAAO,CAAA;AAAA,EACT;AAEA,EAAA,IAAI,gBAAA,GAAmB,CAAA;AACvB,EAAA,MAAM,OAAA,GAAU,IAAA,CAAK,IAAA,IAAQ,IAAA,CAAK,OAAO,IAAA,GAAO,IAAA;AAChD,EAAA,MAAM,MAAA,GAAS,IAAA,CAAK,IAAA,IAAQ,IAAA,CAAK,OAAO,IAAA,GAAO,IAAA;AAE/C,EAAA,KAAA,MAAW,QAAQ,OAAA,EAAS;AAC1B,IAAA,IAAI,MAAA,CAAO,GAAA,CAAI,IAAI,CAAA,EAAG;AACpB,MAAA,gBAAA,EAAA;AAAA,IACF;AAAA,EACF;AAEA,EAAA,MAAM,SAAA,GAAY,IAAA,CAAK,IAAA,GAAO,IAAA,CAAK,IAAA,GAAO,gBAAA;AAC1C,EAAA,OAAO,SAAA,KAAc,CAAA,GAAI,CAAA,GAAI,gBAAA,GAAmB,SAAA;AAClD;AAGO,SAAS,gBAAA,CACd,YACA,SAAA,EACQ;AACR,EAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,IAAI,OAAA,GAAU,CAAA;AACd,EAAA,KAAA,MAAW,QAAQ,SAAA,EAAW;AAC5B,IAAA,IAAI,UAAA,CAAW,QAAA,CAAS,IAAI,CAAA,EAAG;AAC7B,MAAA,OAAA,EAAA;AAAA,IACF;AAAA,EACF;AACA,EAAA,OAAO,UAAU,SAAA,CAAU,MAAA;AAC7B;AAGO,SAAS,oBAAoB,KAAA,EAAqC;AACvE,EAAA,OAAO;AAAA,IACL,GAAA,EAAK,KAAA;AAAA,IACL,MAAA,EAAQ,mBAAmB,KAAK,CAAA;AAAA,IAChC,KAAA,EAAO,aAAa,KAAK,CAAA;AAAA,IACzB,OAAA,EAAS,eAAe,KAAK;AAAA,GAC/B;AACF;AAGO,SAAS,kBAAA,CACd,OACA,MAAA,EACQ;AACR,EAAA,MAAM,IAAA,GAAO,MAAA,CAAO,IAAA,EAAM,IAAA,EAAK;AAC/B,EAAA,IAAI,CAAC,IAAA,EAAM;AACT,IAAA,OAAO,CAAA;AAAA,EACT;AAEA,EAAA,QAAQ,MAAA,CAAO,YAAY,cAAA;AAAgB,IACzC,KAAK,eAAA;AACH,MAAA,OAAO,iBAAA,CAAkB,KAAA,CAAM,OAAA,EAAS,cAAA,CAAe,IAAI,CAAC,CAAA;AAAA,IAC9D,KAAK,aAAA;AACH,MAAA,OAAO,gBAAA,CAAiB,KAAA,CAAM,KAAA,EAAO,YAAA,CAAa,IAAI,CAAC,CAAA;AAAA,IACzD;AACE,MAAA,OAAO,iBAAA,CAAkB,KAAA,CAAM,MAAA,EAAQ,kBAAA,CAAmB,IAAI,CAAC,CAAA;AAAA;AAErE;AAGO,SAAS,mBAAA,CACd,OACA,OAAA,EACQ;AACR,EAAA,IAAI,aAAA,GAAgB,CAAA;AACpB,EAAA,IAAI,WAAA,GAAc,CAAA;AAElB,EAAA,KAAA,MAAW,UAAU,OAAA,EAAS;AAC5B,IAAA,IAAI,CAAC,MAAA,CAAO,IAAA,EAAM,MAAK,IAAK,MAAA,CAAO,UAAU,CAAA,EAAG;AAC9C,MAAA;AAAA,IACF;AACA,IAAA,aAAA,IAAiB,kBAAA,CAAmB,KAAA,EAAO,MAAM,CAAA,GAAI,MAAA,CAAO,MAAA;AAC5D,IAAA,WAAA,IAAe,MAAA,CAAO,MAAA;AAAA,EACxB;AAEA,EAAA,OAAO,WAAA,KAAgB,CAAA,GAAI,CAAA,GAAI,aAAA,GAAgB,WAAA;AACjD;AAGO,SAAS,eAAA,CAAgB,OAAe,KAAA,EAAuB;AACpE,EAAA,IAAI,SAAS,CAAA,EAAG;AACd,IAAA,OAAO,CAAA;AAAA,EACT;AACA,EAAA,MAAM,YAAA,GAAe,KAAK,GAAA,CAAI,CAAA,EAAG,KAAK,GAAA,CAAI,KAAA,EAAO,KAAA,GAAQ,CAAC,CAAC,CAAA;AAC3D,EAAA,OAAO,CAAA,GAAI,gBAAgB,KAAA,GAAQ,CAAA,CAAA;AACrC;AAGO,SAAS,mBAAA,CACd,KAAA,EACA,KAAA,EACA,OAAA,EACA,OAAA,EACK;AACL,EAAA,MAAM,aAAA,GAAgB,oBAAoB,KAAK,CAAA;AAC/C,EAAA,IAAI,cAAc,MAAA,CAAO,MAAA,KAAW,CAAA,IAAK,KAAA,CAAM,UAAU,CAAA,EAAG;AAC1D,IAAA,OAAO,KAAA;AAAA,EACT;AAEA,EAAA,MAAM,aAAA,GAAgB,SAAS,aAAA,IAAiB,IAAA;AAChD,EAAA,MAAM,UAAA,GAAa,SAAS,UAAA,IAAc,IAAA;AAE1C,EAAA,OAAO,KAAA,CACJ,GAAA,CAAI,CAAC,IAAA,EAAM,KAAA,KAAU;AACpB,IAAA,MAAM,YAAA,GAAe,oBAAoB,aAAA,EAAe;AAAA,MACtD,EAAE,MAAM,OAAA,CAAQ,IAAI,KAAK,EAAA,EAAI,MAAA,EAAQ,CAAA,EAAG,QAAA,EAAU,cAAA;AAAe,KAClE,CAAA;AACD,IAAA,MAAM,SAAA,GAAY,eAAA,CAAgB,KAAA,EAAO,KAAA,CAAM,MAAM,CAAA;AAErD,IAAA,OAAO;AAAA,MACL,IAAA;AAAA,MACA,aAAA,EAAe,YAAA,GAAe,aAAA,GAAgB,SAAA,GAAY;AAAA,KAC5D;AAAA,EACF,CAAC,CAAA,CACA,IAAA,CAAK,CAAC,IAAA,EAAM,UAAU,KAAA,CAAM,aAAA,GAAgB,IAAA,CAAK,aAAa,EAC9D,GAAA,CAAI,CAAC,EAAE,IAAA,OAAW,IAAI,CAAA;AAC3B","file":"text-matching.contract.js","sourcesContent":["/**\n * Shared lexical matching primitives used across MCP handlers and graph utilities.\n *\n * The goal is not to replace downstream LLM scoring. It provides a fast,\n * deterministic substrate for candidate generation, reranking, and light\n * classification across belief/question/evidence/entity surfaces.\n */\n\nexport type LexicalStrategy = \"tokenOverlap\" | \"bigramJaccard\" | \"wordOverlap\";\n\nexport type PreparedLexicalQuery = {\n raw: string;\n tokens: string[];\n words: string[];\n bigrams: Set<string>;\n};\n\nexport type LexicalSignal = {\n strategy?: LexicalStrategy;\n text: string | null | undefined;\n weight: number;\n};\n\nexport type LexicalRerankOptions = {\n lexicalWeight?: number;\n rankWeight?: number;\n};\n\nconst TOKEN_SPLIT_REGEX = /[^a-z0-9]+/;\nconst NON_ALPHANUMERIC_REGEX = /[^a-z0-9]/g;\n\n/** Stop words that add noise to scoring. */\nconst STOP_WORDS = new Set([\n \"the\",\n \"a\",\n \"an\",\n \"and\",\n \"or\",\n \"but\",\n \"in\",\n \"on\",\n \"at\",\n \"to\",\n \"for\",\n \"of\",\n \"with\",\n \"by\",\n \"from\",\n \"is\",\n \"it\",\n \"as\",\n \"be\",\n \"was\",\n \"are\",\n \"this\",\n \"that\",\n \"has\",\n \"had\",\n \"have\",\n \"not\",\n \"all\",\n \"can\",\n \"do\",\n \"its\",\n \"may\",\n \"will\",\n \"how\",\n \"what\",\n \"which\",\n \"who\",\n \"when\",\n \"where\",\n \"than\",\n \"then\",\n \"each\",\n \"into\",\n \"such\",\n \"any\",\n \"been\",\n \"if\",\n \"would\",\n \"about\",\n \"should\",\n \"these\",\n \"those\",\n \"their\",\n \"we\",\n \"our\",\n \"so\",\n]);\n\n/** Tokenize a string into lowercase words, removing stop words. */\nexport function tokenizeSearchText(text: string): string[] {\n return text\n .toLowerCase()\n .split(TOKEN_SPLIT_REGEX)\n .filter((token) => token.length >= 2 && !STOP_WORDS.has(token));\n}\n\n/** Simple stemmer: strip common English suffixes for fuzzy matching. */\nexport function stemToken(word: string): string {\n if (word.length <= 4) {\n return word;\n }\n if (word.endsWith(\"ation\")) {\n return word.slice(0, -5);\n }\n if (word.endsWith(\"ment\")) {\n return word.slice(0, -4);\n }\n if (word.endsWith(\"ness\")) {\n return word.slice(0, -4);\n }\n if (word.endsWith(\"ical\")) {\n return word.slice(0, -4);\n }\n if (word.endsWith(\"tion\")) {\n return word.slice(0, -4);\n }\n if (word.endsWith(\"sion\")) {\n return word.slice(0, -4);\n }\n if (word.endsWith(\"ing\")) {\n return word.slice(0, -3);\n }\n if (word.endsWith(\"ous\")) {\n return word.slice(0, -3);\n }\n if (word.endsWith(\"ive\")) {\n return word.slice(0, -3);\n }\n if (word.endsWith(\"ity\")) {\n return word.slice(0, -3);\n }\n if (word.endsWith(\"ics\")) {\n return word.slice(0, -3);\n }\n if (word.endsWith(\"ly\")) {\n return word.slice(0, -2);\n }\n if (word.endsWith(\"ed\")) {\n return word.slice(0, -2);\n }\n if (word.endsWith(\"er\")) {\n return word.slice(0, -2);\n }\n if (word.endsWith(\"es\")) {\n return word.slice(0, -2);\n }\n if (word.endsWith(\"al\")) {\n return word.slice(0, -2);\n }\n if (word.endsWith(\"ic\")) {\n return word.slice(0, -2);\n }\n if (word.endsWith(\"s\") && !word.endsWith(\"ss\")) {\n return word.slice(0, -1);\n }\n return word;\n}\n\n/** Compute token overlap score between query tokens and text tokens. */\nexport function tokenOverlapScore(\n queryTokens: string[],\n textTokens: string[]\n): number {\n if (queryTokens.length === 0 || textTokens.length === 0) {\n return 0;\n }\n\n const stemmedText = new Set(textTokens.map(stemToken));\n let matchCount = 0;\n\n for (const queryToken of queryTokens) {\n const stemmedQuery = stemToken(queryToken);\n\n if (stemmedText.has(stemmedQuery)) {\n matchCount += 1;\n continue;\n }\n\n for (const textToken of stemmedText) {\n if (\n textToken.startsWith(stemmedQuery) ||\n stemmedQuery.startsWith(textToken)\n ) {\n matchCount += 0.5;\n break;\n }\n }\n }\n\n return matchCount / queryTokens.length;\n}\n\n/**\n * Extract character bigrams from text. Normalizes to lowercase, removes\n * non-alphanumeric characters, and generates overlapping pairs.\n */\nexport function bigramTokenize(text: string): Set<string> {\n const normalized = text.toLowerCase().replace(NON_ALPHANUMERIC_REGEX, \"\");\n const bigrams = new Set<string>();\n for (let i = 0; i < normalized.length - 1; i++) {\n bigrams.add(normalized.slice(i, i + 2));\n }\n return bigrams;\n}\n\n/**\n * Extract word-level tokens from text (for coarser matching).\n * Normalizes to lowercase, splits on non-alphanumeric.\n */\nexport function wordTokenize(text: string): string[] {\n return text\n .toLowerCase()\n .split(TOKEN_SPLIT_REGEX)\n .filter((token) => token.length > 1);\n}\n\n/** Jaccard similarity between two sets: |A ∩ B| / |A ∪ B|. */\nexport function jaccardSimilarity(\n setA: Set<string>,\n setB: Set<string>\n): number {\n if (setA.size === 0 && setB.size === 0) {\n return 0;\n }\n\n let intersectionSize = 0;\n const smaller = setA.size <= setB.size ? setA : setB;\n const larger = setA.size <= setB.size ? setB : setA;\n\n for (const item of smaller) {\n if (larger.has(item)) {\n intersectionSize++;\n }\n }\n\n const unionSize = setA.size + setB.size - intersectionSize;\n return unionSize === 0 ? 0 : intersectionSize / unionSize;\n}\n\n/** Exact word overlap score: fraction of type words found in input text. */\nexport function wordOverlapScore(\n inputWords: string[],\n typeWords: string[]\n): number {\n if (typeWords.length === 0) {\n return 0;\n }\n let matches = 0;\n for (const word of typeWords) {\n if (inputWords.includes(word)) {\n matches++;\n }\n }\n return matches / typeWords.length;\n}\n\n/** Pre-compute reusable lexical structures for a query. */\nexport function prepareLexicalQuery(query: string): PreparedLexicalQuery {\n return {\n raw: query,\n tokens: tokenizeSearchText(query),\n words: wordTokenize(query),\n bigrams: bigramTokenize(query),\n };\n}\n\n/** Score a single lexical signal against a prepared query. */\nexport function scoreLexicalSignal(\n query: PreparedLexicalQuery,\n signal: LexicalSignal\n): number {\n const text = signal.text?.trim();\n if (!text) {\n return 0;\n }\n\n switch (signal.strategy ?? \"tokenOverlap\") {\n case \"bigramJaccard\":\n return jaccardSimilarity(query.bigrams, bigramTokenize(text));\n case \"wordOverlap\":\n return wordOverlapScore(query.words, wordTokenize(text));\n default:\n return tokenOverlapScore(query.tokens, tokenizeSearchText(text));\n }\n}\n\n/** Weighted lexical score across multiple textual signals. */\nexport function scoreLexicalSignals(\n query: PreparedLexicalQuery,\n signals: LexicalSignal[]\n): number {\n let weightedScore = 0;\n let totalWeight = 0;\n\n for (const signal of signals) {\n if (!signal.text?.trim() || signal.weight <= 0) {\n continue;\n }\n weightedScore += scoreLexicalSignal(query, signal) * signal.weight;\n totalWeight += signal.weight;\n }\n\n return totalWeight === 0 ? 0 : weightedScore / totalWeight;\n}\n\n/** Map a candidate's original rank position into a 0..1 prior. */\nexport function rankWindowScore(index: number, total: number): number {\n if (total <= 1) {\n return 1;\n }\n const clampedIndex = Math.max(0, Math.min(index, total - 1));\n return 1 - clampedIndex / (total - 1);\n}\n\n/** Rerank a candidate window by lexical overlap while preserving original-rank prior. */\nexport function rerankLexicalWindow<T>(\n query: string,\n items: T[],\n getText: (item: T) => string | null | undefined,\n options?: LexicalRerankOptions\n): T[] {\n const preparedQuery = prepareLexicalQuery(query);\n if (preparedQuery.tokens.length === 0 || items.length <= 1) {\n return items;\n }\n\n const lexicalWeight = options?.lexicalWeight ?? 0.65;\n const rankWeight = options?.rankWeight ?? 0.35;\n\n return items\n .map((item, index) => {\n const lexicalScore = scoreLexicalSignals(preparedQuery, [\n { text: getText(item) ?? \"\", weight: 1, strategy: \"tokenOverlap\" },\n ]);\n const rankScore = rankWindowScore(index, items.length);\n\n return {\n item,\n combinedScore: lexicalScore * lexicalWeight + rankScore * rankWeight,\n };\n })\n .sort((left, right) => right.combinedScore - left.combinedScore)\n .map(({ item }) => item);\n}\n"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { ROOT_TOPIC_ID, TopicDoc, collectTopicNeighborhood } from './v1/topics/v1.js';
|
|
@@ -1,31 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
export const ROOT_TOPIC_ID = "n17tm38rwet7wqgzrmwahyt1z582590y";
|
|
9
|
-
|
|
10
|
-
export type TopicDoc = {
|
|
11
|
-
_id: string;
|
|
12
|
-
name?: string;
|
|
13
|
-
parentTopicId?: string;
|
|
14
|
-
depth?: number;
|
|
15
|
-
type?: string;
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* BFS traversal collecting a topic and its neighborhood:
|
|
20
|
-
* ancestors up to root + descendants down to maxDescendantDepth.
|
|
21
|
-
*/
|
|
22
|
-
export function collectTopicNeighborhood(
|
|
23
|
-
topics: TopicDoc[],
|
|
24
|
-
rootTopicId: string,
|
|
25
|
-
maxDescendantDepth = 2
|
|
26
|
-
): string[] {
|
|
27
|
-
const byId = new Map<string, TopicDoc>();
|
|
28
|
-
const children = new Map<string, string[]>();
|
|
1
|
+
// src/v1/topics/v1.ts
|
|
2
|
+
var ROOT_TOPIC_ID = "n17tm38rwet7wqgzrmwahyt1z582590y";
|
|
3
|
+
function collectTopicNeighborhood(topics, rootTopicId, maxDescendantDepth = 2) {
|
|
4
|
+
const byId = /* @__PURE__ */ new Map();
|
|
5
|
+
const children = /* @__PURE__ */ new Map();
|
|
29
6
|
for (const topic of topics) {
|
|
30
7
|
const id = String(topic._id);
|
|
31
8
|
byId.set(id, topic);
|
|
@@ -43,11 +20,8 @@ export function collectTopicNeighborhood(
|
|
|
43
20
|
list.push(id);
|
|
44
21
|
children.set(parent, list);
|
|
45
22
|
}
|
|
46
|
-
|
|
47
|
-
const selected = new Set<string>();
|
|
23
|
+
const selected = /* @__PURE__ */ new Set();
|
|
48
24
|
selected.add(rootTopicId);
|
|
49
|
-
|
|
50
|
-
// Ancestors
|
|
51
25
|
let cursor = byId.get(rootTopicId);
|
|
52
26
|
while (cursor?.parentTopicId) {
|
|
53
27
|
const parentId = String(cursor.parentTopicId);
|
|
@@ -57,13 +31,11 @@ export function collectTopicNeighborhood(
|
|
|
57
31
|
selected.add(parentId);
|
|
58
32
|
cursor = byId.get(parentId);
|
|
59
33
|
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
const queue: Array<{ id: string; depth: number }> = [
|
|
63
|
-
{ id: rootTopicId, depth: 0 },
|
|
34
|
+
const queue = [
|
|
35
|
+
{ id: rootTopicId, depth: 0 }
|
|
64
36
|
];
|
|
65
37
|
while (queue.length > 0) {
|
|
66
|
-
const current = queue.shift()
|
|
38
|
+
const current = queue.shift();
|
|
67
39
|
if (current.depth >= maxDescendantDepth) {
|
|
68
40
|
continue;
|
|
69
41
|
}
|
|
@@ -74,6 +46,9 @@ export function collectTopicNeighborhood(
|
|
|
74
46
|
queue.push({ id: childId, depth: current.depth + 1 });
|
|
75
47
|
}
|
|
76
48
|
}
|
|
77
|
-
|
|
78
49
|
return Array.from(selected);
|
|
79
50
|
}
|
|
51
|
+
|
|
52
|
+
export { ROOT_TOPIC_ID, collectTopicNeighborhood };
|
|
53
|
+
//# sourceMappingURL=topic-scope.contract.js.map
|
|
54
|
+
//# sourceMappingURL=topic-scope.contract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/v1/topics/v1.ts"],"names":[],"mappings":";AAOO,IAAM,aAAA,GAAgB;AActB,SAAS,wBAAA,CACd,MAAA,EACA,WAAA,EACA,kBAAA,GAAqB,CAAA,EACX;AACV,EAAA,MAAM,IAAA,uBAAW,GAAA,EAAsB;AACvC,EAAA,MAAM,QAAA,uBAAe,GAAA,EAAsB;AAC3C,EAAA,KAAA,MAAW,SAAS,MAAA,EAAQ;AAC1B,IAAA,MAAM,EAAA,GAAK,MAAA,CAAO,KAAA,CAAM,GAAG,CAAA;AAC3B,IAAA,IAAA,CAAK,GAAA,CAAI,IAAI,KAAK,CAAA;AAClB,IAAA,IAAI,CAAC,QAAA,CAAS,GAAA,CAAI,EAAE,CAAA,EAAG;AACrB,MAAA,QAAA,CAAS,GAAA,CAAI,EAAA,EAAI,EAAE,CAAA;AAAA,IACrB;AAAA,EACF;AACA,EAAA,KAAA,MAAW,SAAS,MAAA,EAAQ;AAC1B,IAAA,IAAI,CAAC,MAAM,aAAA,EAAe;AACxB,MAAA;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,KAAA,CAAM,aAAa,CAAA;AACzC,IAAA,MAAM,EAAA,GAAK,MAAA,CAAO,KAAA,CAAM,GAAG,CAAA;AAC3B,IAAA,MAAM,IAAA,GAAO,QAAA,CAAS,GAAA,CAAI,MAAM,KAAK,EAAC;AACtC,IAAA,IAAA,CAAK,KAAK,EAAE,CAAA;AACZ,IAAA,QAAA,CAAS,GAAA,CAAI,QAAQ,IAAI,CAAA;AAAA,EAC3B;AAEA,EAAA,MAAM,QAAA,uBAAe,GAAA,EAAY;AACjC,EAAA,QAAA,CAAS,IAAI,WAAW,CAAA;AAGxB,EAAA,IAAI,MAAA,GAAS,IAAA,CAAK,GAAA,CAAI,WAAW,CAAA;AACjC,EAAA,OAAO,QAAQ,aAAA,EAAe;AAC5B,IAAA,MAAM,QAAA,GAAW,MAAA,CAAO,MAAA,CAAO,aAAa,CAAA;AAC5C,IAAA,IAAI,QAAA,CAAS,GAAA,CAAI,QAAQ,CAAA,EAAG;AAC1B,MAAA;AAAA,IACF;AACA,IAAA,QAAA,CAAS,IAAI,QAAQ,CAAA;AACrB,IAAA,MAAA,GAAS,IAAA,CAAK,IAAI,QAAQ,CAAA;AAAA,EAC5B;AAGA,EAAA,MAAM,KAAA,GAA8C;AAAA,IAClD,EAAE,EAAA,EAAI,WAAA,EAAa,KAAA,EAAO,CAAA;AAAE,GAC9B;AACA,EAAA,OAAO,KAAA,CAAM,SAAS,CAAA,EAAG;AACvB,IAAA,MAAM,OAAA,GAAU,MAAM,KAAA,EAAM;AAC5B,IAAA,IAAI,OAAA,CAAQ,SAAS,kBAAA,EAAoB;AACvC,MAAA;AAAA,IACF;AACA,IAAA,KAAA,MAAW,WAAW,QAAA,CAAS,GAAA,CAAI,QAAQ,EAAE,CAAA,IAAK,EAAC,EAAG;AACpD,MAAA,IAAI,CAAC,QAAA,CAAS,GAAA,CAAI,OAAO,CAAA,EAAG;AAC1B,QAAA,QAAA,CAAS,IAAI,OAAO,CAAA;AAAA,MACtB;AACA,MAAA,KAAA,CAAM,IAAA,CAAK,EAAE,EAAA,EAAI,OAAA,EAAS,OAAO,OAAA,CAAQ,KAAA,GAAQ,GAAG,CAAA;AAAA,IACtD;AAAA,EACF;AAEA,EAAA,OAAO,KAAA,CAAM,KAAK,QAAQ,CAAA;AAC5B","file":"topic-scope.contract.js","sourcesContent":["/**\n * @lucern/contracts — TopicsV1 namespace (resource contracts)\n *\n * Moved from src/topic-scope.contract.ts in EK-16 T1 PR 2.\n * Compat shim remains at the old path until the Lucern 1.0.0 cut.\n */\n\nexport const ROOT_TOPIC_ID = \"n17tm38rwet7wqgzrmwahyt1z582590y\";\n\nexport type TopicDoc = {\n _id: string;\n name?: string;\n parentTopicId?: string;\n depth?: number;\n type?: string;\n};\n\n/**\n * BFS traversal collecting a topic and its neighborhood:\n * ancestors up to root + descendants down to maxDescendantDepth.\n */\nexport function collectTopicNeighborhood(\n topics: TopicDoc[],\n rootTopicId: string,\n maxDescendantDepth = 2\n): string[] {\n const byId = new Map<string, TopicDoc>();\n const children = new Map<string, string[]>();\n for (const topic of topics) {\n const id = String(topic._id);\n byId.set(id, topic);\n if (!children.has(id)) {\n children.set(id, []);\n }\n }\n for (const topic of topics) {\n if (!topic.parentTopicId) {\n continue;\n }\n const parent = String(topic.parentTopicId);\n const id = String(topic._id);\n const list = children.get(parent) || [];\n list.push(id);\n children.set(parent, list);\n }\n\n const selected = new Set<string>();\n selected.add(rootTopicId);\n\n // Ancestors\n let cursor = byId.get(rootTopicId);\n while (cursor?.parentTopicId) {\n const parentId = String(cursor.parentTopicId);\n if (selected.has(parentId)) {\n break;\n }\n selected.add(parentId);\n cursor = byId.get(parentId);\n }\n\n // Descendants\n const queue: Array<{ id: string; depth: number }> = [\n { id: rootTopicId, depth: 0 },\n ];\n while (queue.length > 0) {\n const current = queue.shift()!;\n if (current.depth >= maxDescendantDepth) {\n continue;\n }\n for (const childId of children.get(current.id) || []) {\n if (!selected.has(childId)) {\n selected.add(childId);\n }\n queue.push({ id: childId, depth: current.depth + 1 });\n }\n }\n\n return Array.from(selected);\n}\n"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"v1.js"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"v1.js"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"v1.js"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"v1.js"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"names":[],"mappings":"","file":"v1.js"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @lucern/contracts — OntologiesV1 namespace (resource contracts)
|
|
3
|
+
*
|
|
4
|
+
* Ontology Matching Engine — L0 entity type classification and similarity scoring.
|
|
5
|
+
* Provides bigram-based text similarity for matching free text against
|
|
6
|
+
* ontology entity types. Domain-agnostic: works identically for companies,
|
|
7
|
+
* molecules, code modules, or any tenant-defined entity vocabulary.
|
|
8
|
+
*
|
|
9
|
+
* Moved from src/ontology-matching.contract.ts in EK-16 T1 PR 2.
|
|
10
|
+
* Compat shim remains at the old path until the Lucern 1.0.0 cut.
|
|
11
|
+
*/
|
|
12
|
+
/** An entity type definition from a resolved ontology version. */
|
|
13
|
+
type OntologyEntityType = {
|
|
14
|
+
value: string;
|
|
15
|
+
label: string;
|
|
16
|
+
description?: string;
|
|
17
|
+
subtypes?: Array<{
|
|
18
|
+
value: string;
|
|
19
|
+
label: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
}>;
|
|
22
|
+
};
|
|
23
|
+
/** A scored match between input text and an entity type. */
|
|
24
|
+
type EntityTypeMatch = {
|
|
25
|
+
entityType: string;
|
|
26
|
+
label: string;
|
|
27
|
+
score: number;
|
|
28
|
+
reason: string;
|
|
29
|
+
};
|
|
30
|
+
/** A candidate entity node that can be matched against a target node. */
|
|
31
|
+
type EntityMatchCandidate = {
|
|
32
|
+
nodeId: string;
|
|
33
|
+
entityType: string;
|
|
34
|
+
title: string;
|
|
35
|
+
canonicalText: string;
|
|
36
|
+
connectedBeliefCount: number;
|
|
37
|
+
connectedEvidenceCount: number;
|
|
38
|
+
};
|
|
39
|
+
/** A scored entity match with suggested bridge edge type. */
|
|
40
|
+
type EntityConnectionMatch = {
|
|
41
|
+
entityNodeId: string;
|
|
42
|
+
entityType: string;
|
|
43
|
+
title: string;
|
|
44
|
+
score: number;
|
|
45
|
+
suggestedEdgeType: string;
|
|
46
|
+
reason: string;
|
|
47
|
+
};
|
|
48
|
+
/**
|
|
49
|
+
* Score how well input text matches a single entity type definition.
|
|
50
|
+
* Combines bigram Jaccard similarity, word overlap, and description matching.
|
|
51
|
+
*/
|
|
52
|
+
declare function scoreEntityTypeMatch(inputText: string, entityType: OntologyEntityType): EntityTypeMatch;
|
|
53
|
+
/**
|
|
54
|
+
* Rank all entity types in an ontology against input text.
|
|
55
|
+
* Returns matches sorted by score (descending), filtered to score > minScore.
|
|
56
|
+
*/
|
|
57
|
+
declare function rankEntityTypeMatches(inputText: string, entityTypes: OntologyEntityType[], options?: {
|
|
58
|
+
minScore?: number;
|
|
59
|
+
limit?: number;
|
|
60
|
+
}): EntityTypeMatch[];
|
|
61
|
+
/**
|
|
62
|
+
* Score how well a node's text matches an entity candidate.
|
|
63
|
+
* Used by discover_entity_connections to suggest missing bridge edges.
|
|
64
|
+
*/
|
|
65
|
+
declare function scoreEntityConnection(nodeText: string, candidate: EntityMatchCandidate, options?: {
|
|
66
|
+
connectivityWeight?: number;
|
|
67
|
+
}): EntityConnectionMatch;
|
|
68
|
+
/**
|
|
69
|
+
* Rank entity candidates against a node's text.
|
|
70
|
+
* Returns sorted matches above the minimum score threshold.
|
|
71
|
+
*/
|
|
72
|
+
declare function rankEntityConnections(nodeText: string, candidates: EntityMatchCandidate[], options?: {
|
|
73
|
+
minScore?: number;
|
|
74
|
+
limit?: number;
|
|
75
|
+
connectivityWeight?: number;
|
|
76
|
+
}): EntityConnectionMatch[];
|
|
77
|
+
|
|
78
|
+
export { type EntityConnectionMatch, type EntityMatchCandidate, type EntityTypeMatch, type OntologyEntityType, rankEntityConnections, rankEntityTypeMatches, scoreEntityConnection, scoreEntityTypeMatch };
|