@alaarab/cortex 1.13.5 → 1.13.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,26 @@ let dfCacheGeneration = 0;
14
14
  export function invalidateDfCache() {
15
15
  dfCacheGeneration++;
16
16
  dfCache.clear();
17
+ tokenCache.clear();
18
+ }
19
+ // Module-level cache for tokenized document content.
20
+ // Keyed by a short content hash so the same document content is only tokenized once per server lifetime.
21
+ // Cleared on full rebuild (same lifecycle as dfCache). Max 2000 entries to bound memory.
22
+ const MAX_TOKEN_CACHE = 2000;
23
+ const tokenCache = new Map();
24
+ function cachedTokenize(text) {
25
+ // Use first 16 chars + length as a cheap key (avoids hashing cost for most cases)
26
+ const key = `${text.length}:${text.slice(0, 32)}`;
27
+ const hit = tokenCache.get(key);
28
+ if (hit)
29
+ return hit;
30
+ const tokens = tokenize(text);
31
+ if (tokenCache.size >= MAX_TOKEN_CACHE) {
32
+ // Evict oldest entry
33
+ tokenCache.delete(tokenCache.keys().next().value ?? "");
34
+ }
35
+ tokenCache.set(key, tokens);
36
+ return tokens;
17
37
  }
18
38
  /**
19
39
  * Tokenize text into non-stop-word tokens for TF-IDF computation, with stemming.
@@ -34,10 +54,10 @@ function tfidfCosine(docs, query) {
34
54
  const queryTokens = tokenize(query);
35
55
  if (queryTokens.length === 0)
36
56
  return docs.map(() => 0);
37
- // Collect all unique terms from query + all docs
57
+ // Collect all unique terms from query + all docs (use cached tokenization for repeated content)
38
58
  const allTokens = new Set(queryTokens);
39
59
  const docTokenLists = docs.map(d => {
40
- const tokens = tokenize(d);
60
+ const tokens = cachedTokenize(d);
41
61
  for (const t of tokens)
42
62
  allTokens.add(t);
43
63
  return tokens;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@alaarab/cortex",
3
- "version": "1.13.5",
3
+ "version": "1.13.6",
4
4
  "description": "Long-term memory for AI agents — stored as markdown in a git repo you own.",
5
5
  "type": "module",
6
6
  "bin": {