n2-qln 3.3.4 β†’ 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.ko.md CHANGED
@@ -29,7 +29,9 @@
29
29
 
30
30
  πŸ” **ν•˜λ‚˜μ˜ λ„κ΅¬λ‘œ λͺ¨λ“  것을** β€” AIλŠ” `n2_qln_call` (~200 토큰)만 λ΄…λ‹ˆλ‹€. 1,000개의 κ°œλ³„ 도ꡬ가 μ•„λ‹™λ‹ˆλ‹€. 99.6% μ»¨ν…μŠ€νŠΈ 절감.
31
31
 
32
- ⚑ **5ms μ΄ν•˜ 검색** β€” 3단계 검색 μ—”μ§„ (트리거 + ν‚€μ›Œλ“œ + μ‹œλ§¨ν‹±)이 1,000개 μ΄μƒμ˜ λ„κ΅¬μ—μ„œλ„ 5ms 이내에 졜적 도ꡬλ₯Ό μ°ΎμŠ΅λ‹ˆλ‹€.
32
+ ⚑ **5ms μ΄ν•˜ 검색** β€” 3단계 검색 μ—”μ§„ (트리거 + BM25 ν‚€μ›Œλ“œ + μ‹œλ§¨ν‹±)이 1,000개 μ΄μƒμ˜ λ„κ΅¬μ—μ„œλ„ 5ms 이내에 졜적 도ꡬλ₯Ό μ°ΎμŠ΅λ‹ˆλ‹€.
33
+
34
+ 🎯 **BM25 ν‚€μ›Œλ“œ λž­ν‚Ή** *(v3.4)* β€” Stage 2에 [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) μ•Œκ³ λ¦¬μ¦˜ 적용. ν¬κ·€ν•œ λ‹¨μ–΄μΌμˆ˜λ‘ 높은 점수, λ¬Έμ„œ 길이 μ •κ·œν™”. Google, Elasticsearch, Wikipedia κ²€μƒ‰μ˜ 핡심 μ•Œκ³ λ¦¬μ¦˜.
33
35
 
34
36
  πŸ“ˆ **μžλ™ ν•™μŠ΅ λž­ν‚Ή** β€” 많이 μ‚¬μš©λ˜κ³  성곡λ₯ μ΄ 높은 λ„κ΅¬λŠ” μžλ™μœΌλ‘œ μƒμœ„μ— λž­ν¬λ©λ‹ˆλ‹€. μˆ˜λ™ νŠœλ‹ λΆˆν•„μš”.
35
37
 
@@ -184,14 +186,14 @@ QLN은 μ„Έ λ‹¨κ³„μ˜ κ²€μƒ‰μœΌλ‘œ μ ν•©ν•œ 도ꡬλ₯Ό μ°ΎμŠ΅λ‹ˆλ‹€:
184
186
  | 단계 | 방식 | 속도 | μž‘λ™ 원리 |
185
187
  |:---:|--------|:---:|---------|
186
188
  | **1** | 트리거 λ§€μΉ­ | ⚑ <1ms | 도ꡬ 이름과 트리거 ν‚€μ›Œλ“œ μ •ν™• λ§€μΉ­ |
187
- | **2** | ν‚€μ›Œλ“œ 검색 | ⚑ 1-3ms | μ„€λͺ…, νƒœκ·Έ, 예제 μ „λ¬Έ 검색 |
189
+ | **2** | BM25 ν‚€μ›Œλ“œ | ⚑ 1-3ms | [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) λž­ν‚Ή 검색 β€” IDF κ°€μ€‘μΉ˜ + λ¬Έμ„œ 길이 μ •κ·œν™” *(v3.4)* |
188
190
  | **3** | μ‹œλ§¨ν‹± 검색 | 🧠 5-15ms | μž„λ² λ”© 벑터 μœ μ‚¬λ„ 검색 *(선택, Ollama ν•„μš”)* |
189
191
 
190
192
  λͺ¨λ“  λ‹¨κ³„μ˜ κ²°κ³Όλ₯Ό 병합 ν›„ λž­ν‚Ή:
191
193
 
192
194
  ```
193
195
  final_score = trigger_score Γ— 3.0
194
- + keyword_score Γ— 1.0
196
+ + bm25_keyword_score Γ— 1.0
195
197
  + semantic_score Γ— 2.0
196
198
  + log2(usage_count + 1) Γ— 0.5
197
199
  + success_rate Γ— 1.0
@@ -397,7 +399,7 @@ n2-qln/
397
399
  β”‚ β”œβ”€β”€ schema.js # 도ꡬ μŠ€ν‚€λ§ˆ μ •κ·œν™” + 검색 ν…μŠ€νŠΈ λΉŒλ”
398
400
  β”‚ β”œβ”€β”€ validator.js # κ°•μ œ 검증 (이름, μ„€λͺ…, μΉ΄ν…Œκ³ λ¦¬)
399
401
  β”‚ β”œβ”€β”€ registry.js # 도ꡬ CRUD + μ‚¬μš©λŸ‰ 좔적 + μž„λ² λ”© μΊμ‹œ
400
- β”‚ β”œβ”€β”€ router.js # 3단계 검색 μ—”μ§„
402
+ β”‚ β”œβ”€β”€ router.js # 3단계 검색 μ—”μ§„ (BM25 v3.4)
401
403
  β”‚ β”œβ”€β”€ vector-index.js # Float32 벑터 인덱슀 (centroid hierarchy)
402
404
  β”‚ β”œβ”€β”€ embedding.js # Ollama μž„λ² λ”© ν΄λΌμ΄μ–ΈνŠΈ (nomic-embed-text)
403
405
  β”‚ β”œβ”€β”€ executor.js # HTTP/ν•¨μˆ˜ 도ꡬ μ‹€ν–‰κΈ°
package/README.md CHANGED
@@ -29,7 +29,9 @@
29
29
 
30
30
  πŸ” **One tool to rule them all** β€” Your AI sees `n2_qln_call` (~200 tokens), not 1,000 individual tools. 99.6% context reduction.
31
31
 
32
- ⚑ **Sub-5ms search** β€” 3-stage search engine (trigger + keyword + semantic) finds the right tool in under 5ms, even with 1,000+ tools indexed.
32
+ ⚑ **Sub-5ms search** β€” 3-stage search engine (trigger + BM25 keyword + semantic) finds the right tool in under 5ms, even with 1,000+ tools indexed.
33
+
34
+ 🎯 **BM25 keyword ranking** *(v3.4)* β€” Stage 2 uses [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) for keyword search. Rare terms score higher, document length is normalized. The same algorithm behind Google, Elasticsearch, and Wikipedia search.
33
35
 
34
36
  πŸ“ˆ **Self-learning ranking** β€” Tools that get used more and succeed more are automatically ranked higher over time. No manual tuning needed.
35
37
 
@@ -184,14 +186,14 @@ QLN finds the right tool using three parallel search stages:
184
186
  | Stage | Method | Speed | How it works |
185
187
  |:---:|--------|:---:|------|
186
188
  | **1** | Trigger Match | ⚑ <1ms | Matches exact words in tool names and trigger keywords |
187
- | **2** | Keyword Search | ⚑ 1-3ms | Full-text search across descriptions, tags, and examples |
189
+ | **2** | BM25 Keyword | ⚑ 1-3ms | [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) ranked search β€” IDF weighting + document length normalization *(v3.4)* |
188
190
  | **3** | Semantic Search | 🧠 5-15ms | Vector similarity using embeddings *(optional, requires Ollama)* |
189
191
 
190
192
  Results from all stages are merged and ranked:
191
193
 
192
194
  ```
193
195
  final_score = trigger_score Γ— 3.0
194
- + keyword_score Γ— 1.0
196
+ + bm25_keyword_score Γ— 1.0
195
197
  + semantic_score Γ— 2.0
196
198
  + log2(usage_count + 1) Γ— 0.5
197
199
  + success_rate Γ— 1.0
@@ -417,7 +419,7 @@ n2-qln/
417
419
  β”‚ β”œβ”€β”€ schema.js # Tool schema normalization + search text builder
418
420
  β”‚ β”œβ”€β”€ validator.js # Enforced validation (name, description, category)
419
421
  β”‚ β”œβ”€β”€ registry.js # Tool CRUD + usage tracking + embedding cache
420
- β”‚ β”œβ”€β”€ router.js # 3-stage parallel search engine
422
+ β”‚ β”œβ”€β”€ router.js # 3-stage parallel search engine (BM25 v3.4)
421
423
  β”‚ β”œβ”€β”€ vector-index.js # Float32 vector index with centroid hierarchy
422
424
  β”‚ β”œβ”€β”€ embedding.js # Ollama embedding client (nomic-embed-text)
423
425
  β”‚ β”œβ”€β”€ executor.js # HTTP/function tool executor
package/lib/router.js CHANGED
@@ -1,12 +1,12 @@
1
1
  // QLN β€” L1 Router (3-Stage parallel search engine)
2
- // Query β†’ Stage1(Trigger) + Stage2(Keyword) + Stage3(Semantic) β†’ Merge β†’ Top-K
2
+ // Query β†’ Stage1(Trigger) + Stage2(BM25 Keyword) + Stage3(Semantic) β†’ Merge β†’ Top-K
3
3
  const { buildSearchText } = require('./schema');
4
4
 
5
5
  /**
6
6
  * 3-Stage search engine.
7
7
  *
8
8
  * Score formula:
9
- * final = triggerΓ—3.0 + keywordΓ—1.0 + semanticΓ—2.0
9
+ * final = triggerΓ—3.0 + bm25_keywordΓ—1.0 + semanticΓ—2.0
10
10
  * + log2(usageCount+1)Γ—0.5 + successRateΓ—1.0
11
11
  */
12
12
  class Router {
@@ -19,6 +19,15 @@ class Router {
19
19
  this._registry = registry;
20
20
  this._vectorIndex = vectorIndex;
21
21
  this._embedding = embedding;
22
+
23
+ // BM25 parameters (standard Okapi BM25 defaults)
24
+ this._k1 = 1.2; // Term frequency saturation
25
+ this._b = 0.75; // Document length normalization
26
+
27
+ // IDF cache (rebuilt when tools change)
28
+ this._idfCache = new Map();
29
+ this._avgDocLen = 0;
30
+ this._idfDirty = true;
22
31
  }
23
32
 
24
33
  /**
@@ -34,14 +43,17 @@ class Router {
34
43
  const timing = { stage1: 0, stage2: 0, stage3: 0, merge: 0, total: 0 };
35
44
  const t0 = Date.now();
36
45
 
46
+ // Rebuild IDF if registry changed
47
+ if (this._idfDirty) this._buildIDF();
48
+
37
49
  // Stage 1: Trigger exact match (fastest)
38
50
  const t1 = Date.now();
39
51
  this._stage1TriggerMatch(query, scores);
40
52
  timing.stage1 = Date.now() - t1;
41
53
 
42
- // Stage 2: Keyword match (search_text LIKE)
54
+ // Stage 2: BM25 keyword search
43
55
  const t2 = Date.now();
44
- this._stage2KeywordMatch(query, scores);
56
+ this._stage2BM25(query, scores);
45
57
  timing.stage2 = Date.now() - t2;
46
58
 
47
59
  // Stage 3: Semantic vector search (when embedding available)
@@ -73,20 +85,101 @@ class Router {
73
85
  }
74
86
  }
75
87
 
76
- /** Stage 2: search_text keyword match. Weight: 1.0 */
77
- _stage2KeywordMatch(query, scores) {
78
- const queryWords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
88
+ /** Stage 2: BM25 keyword search. Weight: 1.0 */
89
+ _stage2BM25(query, scores) {
90
+ const queryTerms = this._tokenize(query);
91
+ if (queryTerms.length === 0) return;
92
+
79
93
  for (const tool of this._registry.getAll()) {
80
94
  const text = (tool.searchText || buildSearchText(tool)).toLowerCase();
81
- let matchCount = 0;
82
- for (const word of queryWords) {
83
- if (text.includes(word)) matchCount++;
95
+ const bm25 = this._bm25Score(queryTerms, text);
96
+ if (bm25 > 0) {
97
+ this._getOrCreate(scores, tool.name).stage2 = bm25 * 1.0;
84
98
  }
85
- if (matchCount > 0) {
86
- this._getOrCreate(scores, tool.name).stage2 =
87
- (matchCount / Math.max(queryWords.length, 1)) * 1.0;
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Calculate BM25 score for a query against a document.
104
+ * @param {string[]} queryTerms - Tokenized query terms
105
+ * @param {string} docText - Document text (lowercased)
106
+ * @returns {number} BM25 score
107
+ */
108
+ _bm25Score(queryTerms, docText) {
109
+ const docTerms = docText.split(/[\s_\-./]+/).filter(w => w.length > 1);
110
+ const docLen = docTerms.length;
111
+ if (docLen === 0) return 0;
112
+
113
+ // Build term frequency map for this document
114
+ const tf = new Map();
115
+ for (const term of docTerms) {
116
+ tf.set(term, (tf.get(term) || 0) + 1);
117
+ }
118
+
119
+ let score = 0;
120
+ for (const term of queryTerms) {
121
+ const idf = this._idfCache.get(term) || 0;
122
+ const freq = tf.get(term) || 0;
123
+ if (freq === 0) continue;
124
+
125
+ // BM25 formula: IDF Γ— (f Γ— (k1+1)) / (f + k1 Γ— (1 - b + b Γ— |d|/avgDL))
126
+ const numerator = freq * (this._k1 + 1);
127
+ const denominator = freq + this._k1 * (1 - this._b + this._b * (docLen / this._avgDocLen));
128
+ score += idf * (numerator / denominator);
129
+ }
130
+
131
+ return score;
132
+ }
133
+
134
+ /**
135
+ * Build IDF cache from all registered tools.
136
+ * IDF(term) = ln((N - n(t) + 0.5) / (n(t) + 0.5) + 1)
137
+ * where N = total docs, n(t) = docs containing term
138
+ */
139
+ _buildIDF() {
140
+ const tools = this._registry.getAll();
141
+ const N = tools.length;
142
+ if (N === 0) {
143
+ this._idfDirty = false;
144
+ return;
145
+ }
146
+
147
+ // Tokenize all documents and count document frequencies
148
+ const docFreq = new Map();
149
+ let totalLen = 0;
150
+
151
+ for (const tool of tools) {
152
+ const text = (tool.searchText || buildSearchText(tool)).toLowerCase();
153
+ const terms = text.split(/[\s_\-./]+/).filter(w => w.length > 1);
154
+ totalLen += terms.length;
155
+
156
+ // Unique terms per document
157
+ const uniqueTerms = new Set(terms);
158
+ for (const term of uniqueTerms) {
159
+ docFreq.set(term, (docFreq.get(term) || 0) + 1);
88
160
  }
89
161
  }
162
+
163
+ this._avgDocLen = totalLen / N;
164
+
165
+ // Calculate IDF for each term
166
+ this._idfCache.clear();
167
+ for (const [term, df] of docFreq) {
168
+ // BM25 IDF: ln((N - df + 0.5) / (df + 0.5) + 1)
169
+ const idf = Math.log((N - df + 0.5) / (df + 0.5) + 1);
170
+ this._idfCache.set(term, idf);
171
+ }
172
+
173
+ this._idfDirty = false;
174
+ }
175
+
176
+ /**
177
+ * Tokenize query string into search terms.
178
+ * @param {string} query
179
+ * @returns {string[]}
180
+ */
181
+ _tokenize(query) {
182
+ return query.toLowerCase().split(/[\s_\-./]+/).filter(w => w.length > 2);
90
183
  }
91
184
 
92
185
  /** Stage 3: Semantic vector search. Weight: 2.0 */
@@ -167,11 +260,17 @@ class Router {
167
260
  return ranked;
168
261
  }
169
262
 
170
- /** Build vector index */
263
+ /** Build vector index and refresh IDF cache */
171
264
  buildIndex() {
265
+ this._idfDirty = true;
172
266
  return this._vectorIndex.build(this._registry.getAll());
173
267
  }
174
268
 
269
+ /** Mark IDF cache as dirty (call after tool registration changes) */
270
+ invalidateIDF() {
271
+ this._idfDirty = true;
272
+ }
273
+
175
274
  /** @private */
176
275
  _getOrCreate(scores, name) {
177
276
  if (!scores.has(name)) scores.set(name, { stage1: 0, stage2: 0, stage3: 0 });
@@ -184,8 +283,15 @@ class Router {
184
283
  registrySize: this._registry.size,
185
284
  vectorIndex: this._vectorIndex.stats(),
186
285
  embeddingAvailable: !!this._embedding,
286
+ bm25: {
287
+ idfTerms: this._idfCache.size,
288
+ avgDocLen: Math.round(this._avgDocLen * 10) / 10,
289
+ k1: this._k1,
290
+ b: this._b,
291
+ },
187
292
  };
188
293
  }
189
294
  }
190
295
 
191
296
  module.exports = { Router };
297
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "n2-qln",
3
- "version": "3.3.4",
3
+ "version": "3.4.0",
4
4
  "description": "Query Layer Network β€” Semantic tool dispatcher for MCP. Route 1000 tools through 1 router.",
5
5
  "main": "index.js",
6
6
  "bin": {