mumpix 1.0.19 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -14
- package/README.md +185 -8
- package/bin/mumpix.js +1 -405
- package/examples/agent-memory.js +1 -1
- package/examples/basic.js +1 -1
- package/examples/behavioral-primitives.js +50 -0
- package/examples/verified-mode.js +1 -1
- package/package.json +17 -13
- package/scripts/test-license-modes.cjs +87 -0
- package/src/brp/index.js +1 -0
- package/src/collapse/index.js +1 -0
- package/src/core/MumpixDB.js +210 -322
- package/src/core/audit.js +1 -173
- package/src/core/auth.js +1 -232
- package/src/core/inverted-index.js +144 -0
- package/src/core/license.js +1 -267
- package/src/core/ml-dsa.mjs +1 -25
- package/src/core/ml-kem.mjs +1 -32
- package/src/core/recall.js +1 -176
- package/src/core/store.js +335 -286
- package/src/core/wal-writer.js +83 -0
- package/src/index.js +20 -34
- package/src/integrations/developer-sdk.js +1 -165
- package/src/integrations/langchain-official.js +1 -0
- package/src/integrations/langchain.js +1 -131
- package/src/integrations/llamaindex-official.js +1 -0
- package/src/integrations/llamaindex.js +1 -86
- package/src/integrations/vector-sidecar.js +325 -0
- package/src/rlp/index.js +1 -0
- package/src/temporal/engine.js +1 -1894
- package/src/temporal/indexes.js +1 -178
- package/src/temporal/operators.js +1 -186
- package/scripts/postinstall-auth.js +0 -101
package/src/core/recall.js
CHANGED
|
@@ -1,176 +1 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* MumpixRecall — hybrid retrieval engine
|
|
5
|
-
*
|
|
6
|
-
* Strategy (in order):
|
|
7
|
-
* 1. Exact substring match (zero-latency)
|
|
8
|
-
* 2. TF-IDF cosine similarity (local semantic approximation, no API needed)
|
|
9
|
-
* 3. Token overlap fallback (always produces a result)
|
|
10
|
-
*
|
|
11
|
-
* Optional: pass embedFn to use your own embeddings (OpenAI, Cohere, etc.)
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
// ── Stopwords ────────────────────────────────────
|
|
15
|
-
const STOPWORDS = new Set([
|
|
16
|
-
'a','an','the','is','are','was','were','be','been','being',
|
|
17
|
-
'have','has','had','do','does','did','will','would','could',
|
|
18
|
-
'should','may','might','i','you','he','she','it','we','they',
|
|
19
|
-
'my','your','his','her','its','our','their','what','which',
|
|
20
|
-
'who','whom','that','this','these','those','and','but','or',
|
|
21
|
-
'nor','for','so','yet','in','on','at','to','of','up','by',
|
|
22
|
-
'with','about','into','through','during','before','after',
|
|
23
|
-
'above','below','from','out','off','over','under','again',
|
|
24
|
-
'then','once','here','there','when','where','why','how','all',
|
|
25
|
-
'both','each','few','more','most','other','some','such','no',
|
|
26
|
-
'not','only','own','same','than','too','very','just','can',
|
|
27
|
-
'me','him','us','them','am','get','got','put','set','let',
|
|
28
|
-
'if','as','also','even','still','already','now', 'do', 'did',
|
|
29
|
-
]);
|
|
30
|
-
|
|
31
|
-
// ── TF-IDF utilities ─────────────────────────────
|
|
32
|
-
|
|
33
|
-
function tokenize(text) {
|
|
34
|
-
return text
|
|
35
|
-
.toLowerCase()
|
|
36
|
-
.replace(/[^a-z0-9\s'-]/g, ' ')
|
|
37
|
-
.split(/\s+/)
|
|
38
|
-
.filter(t => t.length > 1 && !STOPWORDS.has(t));
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function tf(tokens) {
|
|
42
|
-
const freq = {};
|
|
43
|
-
for (const t of tokens) freq[t] = (freq[t] || 0) + 1;
|
|
44
|
-
const len = tokens.length || 1;
|
|
45
|
-
const out = {};
|
|
46
|
-
for (const [t, c] of Object.entries(freq)) out[t] = c / len;
|
|
47
|
-
return out;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
function buildIDF(corpus) {
|
|
51
|
-
const df = {};
|
|
52
|
-
const N = corpus.length;
|
|
53
|
-
for (const doc of corpus) {
|
|
54
|
-
const seen = new Set(doc);
|
|
55
|
-
for (const t of seen) df[t] = (df[t] || 0) + 1;
|
|
56
|
-
}
|
|
57
|
-
const idf = {};
|
|
58
|
-
for (const [t, c] of Object.entries(df)) {
|
|
59
|
-
idf[t] = Math.log((N + 1) / (c + 1)) + 1;
|
|
60
|
-
}
|
|
61
|
-
return idf;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function tfidfVec(tfMap, idf) {
|
|
65
|
-
const vec = {};
|
|
66
|
-
for (const [t, w] of Object.entries(tfMap)) {
|
|
67
|
-
vec[t] = w * (idf[t] || 1);
|
|
68
|
-
}
|
|
69
|
-
return vec;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
function cosine(a, b) {
|
|
73
|
-
let dot = 0, normA = 0, normB = 0;
|
|
74
|
-
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
|
|
75
|
-
for (const k of keys) {
|
|
76
|
-
const va = a[k] || 0;
|
|
77
|
-
const vb = b[k] || 0;
|
|
78
|
-
dot += va * vb;
|
|
79
|
-
normA += va * va;
|
|
80
|
-
normB += vb * vb;
|
|
81
|
-
}
|
|
82
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
83
|
-
return denom === 0 ? 0 : dot / denom;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// ── Token overlap (tie-breaker / fallback) ───────
|
|
87
|
-
|
|
88
|
-
function tokenOverlap(queryTokens, docTokens) {
|
|
89
|
-
if (!queryTokens.length) return 0;
|
|
90
|
-
const docSet = new Set(docTokens);
|
|
91
|
-
const hits = queryTokens.filter(t => docSet.has(t)).length;
|
|
92
|
-
return hits / queryTokens.length;
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
// ── Main recall function ──────────────────────────
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* recall(query, records, opts) → Record | null
|
|
99
|
-
*
|
|
100
|
-
* opts.k — number of results to return (default 1)
|
|
101
|
-
* opts.embedFn — async fn(texts[]) → number[][] for custom embeddings
|
|
102
|
-
* opts.filter — fn(record) → bool for pre-filtering
|
|
103
|
-
* opts.since — timestamp: only consider records newer than this
|
|
104
|
-
* opts.mode — "exact" | "semantic" | "hybrid" (default "hybrid")
|
|
105
|
-
*/
|
|
106
|
-
async function recall(query, records, opts = {}) {
|
|
107
|
-
const results = await recallMany(query, records, { ...opts, k: opts.k || 1 });
|
|
108
|
-
return results.length ? results[0] : null;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
async function recallMany(query, records, opts = {}) {
|
|
112
|
-
const k = opts.k || 5;
|
|
113
|
-
const mode = opts.mode || 'hybrid';
|
|
114
|
-
const filter = opts.filter || null;
|
|
115
|
-
const since = opts.since || null;
|
|
116
|
-
|
|
117
|
-
let pool = records;
|
|
118
|
-
if (filter) pool = pool.filter(filter);
|
|
119
|
-
if (since) pool = pool.filter(r => r.ts >= since);
|
|
120
|
-
if (!pool.length) return [];
|
|
121
|
-
|
|
122
|
-
// 1. Exact match shortcut
|
|
123
|
-
const queryLower = query.toLowerCase();
|
|
124
|
-
if (mode !== 'semantic') {
|
|
125
|
-
const exact = pool.filter(r => r.content.toLowerCase().includes(queryLower));
|
|
126
|
-
if (exact.length >= k && mode === 'exact') return exact.slice(0, k);
|
|
127
|
-
if (exact.length && mode === 'exact') return exact;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// 2. Custom embeddings
|
|
131
|
-
if (opts.embedFn && mode !== 'exact') {
|
|
132
|
-
try {
|
|
133
|
-
const texts = [query, ...pool.map(r => r.content)];
|
|
134
|
-
const vectors = await opts.embedFn(texts);
|
|
135
|
-
const qVec = vectors[0];
|
|
136
|
-
const scored = pool.map((r, i) => ({ r, score: cosineArrays(qVec, vectors[i + 1]) }));
|
|
137
|
-
scored.sort((a, b) => b.score - a.score);
|
|
138
|
-
return scored.slice(0, k).map(s => ({ ...s.r, _score: s.score }));
|
|
139
|
-
} catch (_) { /* fall through to TF-IDF */ }
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// 3. TF-IDF semantic
|
|
143
|
-
const qTokens = tokenize(query);
|
|
144
|
-
const docTokens = pool.map(r => tokenize(r.content));
|
|
145
|
-
const corpus = [qTokens, ...docTokens];
|
|
146
|
-
const idf = buildIDF(corpus);
|
|
147
|
-
|
|
148
|
-
const qTF = tf(qTokens);
|
|
149
|
-
const qVec = tfidfVec(qTF, idf);
|
|
150
|
-
|
|
151
|
-
const scored = pool.map((r, i) => {
|
|
152
|
-
const dVec = tfidfVec(tf(docTokens[i]), idf);
|
|
153
|
-
const sem = cosine(qVec, dVec);
|
|
154
|
-
const over = tokenOverlap(qTokens, docTokens[i]);
|
|
155
|
-
// Blend: 70% semantic + 30% overlap, with recency boost
|
|
156
|
-
const recency = Math.exp(-(Date.now() - r.ts) / (1000 * 60 * 60 * 24 * 7)); // 7-day half-life
|
|
157
|
-
const score = (sem * 0.70) + (over * 0.20) + (recency * 0.10);
|
|
158
|
-
return { r, score, _debug: { sem, over, recency } };
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
scored.sort((a, b) => b.score - a.score);
|
|
162
|
-
return scored.slice(0, k).map(s => ({ ...s.r, _score: s.score }));
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
function cosineArrays(a, b) {
|
|
166
|
-
let dot = 0, normA = 0, normB = 0;
|
|
167
|
-
for (let i = 0; i < a.length; i++) {
|
|
168
|
-
dot += a[i] * b[i];
|
|
169
|
-
normA += a[i] * a[i];
|
|
170
|
-
normB += b[i] * b[i];
|
|
171
|
-
}
|
|
172
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
173
|
-
return denom === 0 ? 0 : dot / denom;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
module.exports = { recall, recallMany, tokenize };
|
|
1
|
+
"use strict";const e=new Set(["a","an","the","is","are","was","were","be","been","being","have","has","had","do","does","did","will","would","could","should","may","might","i","you","he","she","it","we","they","my","your","his","her","its","our","their","what","which","who","whom","that","this","these","those","and","but","or","nor","for","so","yet","in","on","at","to","of","up","by","with","about","into","through","during","before","after","above","below","from","out","off","over","under","again","then","once","here","there","when","where","why","how","all","both","each","few","more","most","other","some","such","no","not","only","own","same","than","too","very","just","can","me","him","us","them","am","get","got","put","set","let","if","as","also","even","still","already","now","do","did"]);function t(t){return t.toLowerCase().replace(/[^a-z0-9\s'-]/g," ").split(/\s+/).filter(t=>t.length>1&&!e.has(t))}function o(e){const t={};for(const o of e)t[o]=(t[o]||0)+1;const o=e.length||1,n={};for(const[e,r]of Object.entries(t))n[e]=r/o;return n}function n(e,t){const o={};for(const[n,r]of Object.entries(e))o[n]=r*(t[n]||1);return o}async function r(e,r,c={}){const a=c.k||5,h=c.mode||"hybrid",i=c.filter||null,l=c.since||null;let u=r;if(i&&(u=u.filter(i)),l&&(u=u.filter(e=>e.ts>=l)),!u.length)return[];const f=e.toLowerCase();if("semantic"!==h){const e=u.filter(e=>e.content.toLowerCase().includes(f));if(e.length>=a&&"exact"===h)return e.slice(0,a);if(e.length&&"exact"===h)return e}if(c.embedFn&&"exact"!==h)try{const t=[e,...u.map(e=>e.content)],o=await c.embedFn(t),n=o[0],r=u.map((e,t)=>({r:e,score:s(n,o[t+1])}));return r.sort((e,t)=>t.score-e.score),r.slice(0,a).map(e=>({...e.r,_score:e.score}))}catch(e){}const w=t(e),m=u.map(e=>t(e.content)),d=function(e){const t={},o=e.length;for(const o of e){const e=new Set(o);for(const o of e)t[o]=(t[o]||0)+1}const n={};for(const[e,r]of Object.entries(t))n[e]=Math.log((o+1)/(r+1))+1;return n}([w,...m]),g=n(o(w),d),b=u.map((e,t)=>{const r=n(o(m[t]),d),s=function(e,t){let o=0,n=0,r=0;const s=new Set([...Object.keys(e),...Object.keys(t)]);for(const c of s){const s=e[c]||0,a=t[c]||0;o+=s*a,n+=s*s,r+=a*a}const c=Math.sqrt(n)*Math.sqrt(r);return 0===c?0:o/c}(g,r),c=function(e,t){if(!e.length)return 0;const o=new Set(t);return e.filter(e=>o.has(e)).length/e.length}(w,m[t]),a=Math.exp(-(Date.now()-e.ts)/6048e5);return{r:e,score:.7*s+.2*c+.1*a,_debug:{sem:s,over:c,recency:a}}});return b.sort((e,t)=>t.score-e.score),b.slice(0,a).map(e=>({...e.r,_score:e.score}))}function s(e,t){let o=0,n=0,r=0;for(let s=0;s<e.length;s++)o+=e[s]*t[s],n+=e[s]*e[s],r+=t[s]*t[s];const s=Math.sqrt(n)*Math.sqrt(r);return 0===s?0:o/s}module.exports={recall:async function(e,t,o={}){const n=await r(e,t,{...o,k:o.k||1});return n.length?n[0]:null},recallMany:r,tokenize:t};
|