npm - mumpix - Versions diffs - 1.0.0 - Mend

mumpix 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mumpix might be problematic. Click here for more details.

Files changed (14) hide show

package/LICENSE +21 -0
package/README.md +258 -0
package/bin/mumpix.js +305 -0
package/examples/agent-memory.js +63 -0
package/examples/basic.js +44 -0
package/examples/verified-mode.js +85 -0
package/package.json +50 -0
package/src/core/MumpixDB.js +306 -0
package/src/core/audit.js +173 -0
package/src/core/recall.js +176 -0
package/src/core/store.js +230 -0
package/src/index.js +38 -0
package/src/integrations/langchain.js +131 -0
package/src/integrations/llamaindex.js +86 -0

package/src/core/recall.js ADDED Viewed

@@ -0,0 +1,176 @@
+'use strict';
+/**
+ * MumpixRecall — hybrid retrieval engine
+ *
+ * Strategy (in order):
+ *   1. Exact substring match (zero-latency)
+ *   2. TF-IDF cosine similarity (local semantic approximation, no API needed)
+ *   3. Token overlap fallback (always produces a result)
+ *
+ * Optional: pass embedFn to use your own embeddings (OpenAI, Cohere, etc.)
+ */
+// ── Stopwords ────────────────────────────────────
+const STOPWORDS = new Set([
+  'a','an','the','is','are','was','were','be','been','being',
+  'have','has','had','do','does','did','will','would','could',
+  'should','may','might','i','you','he','she','it','we','they',
+  'my','your','his','her','its','our','their','what','which',
+  'who','whom','that','this','these','those','and','but','or',
+  'nor','for','so','yet','in','on','at','to','of','up','by',
+  'with','about','into','through','during','before','after',
+  'above','below','from','out','off','over','under','again',
+  'then','once','here','there','when','where','why','how','all',
+  'both','each','few','more','most','other','some','such','no',
+  'not','only','own','same','than','too','very','just','can',
+  'me','him','us','them','am','get','got','put','set','let',
+  'if','as','also','even','still','already','now', 'do', 'did',
+]);
+// ── TF-IDF utilities ─────────────────────────────
+function tokenize(text) {
+  return text
+    .toLowerCase()
+    .replace(/[^a-z0-9\s'-]/g, ' ')
+    .split(/\s+/)
+    .filter(t => t.length > 1 && !STOPWORDS.has(t));
+}
+function tf(tokens) {
+  const freq = {};
+  for (const t of tokens) freq[t] = (freq[t] || 0) + 1;
+  const len = tokens.length || 1;
+  const out = {};
+  for (const [t, c] of Object.entries(freq)) out[t] = c / len;
+  return out;
+}
+function buildIDF(corpus) {
+  const df = {};
+  const N  = corpus.length;
+  for (const doc of corpus) {
+    const seen = new Set(doc);
+    for (const t of seen) df[t] = (df[t] || 0) + 1;
+  }
+  const idf = {};
+  for (const [t, c] of Object.entries(df)) {
+    idf[t] = Math.log((N + 1) / (c + 1)) + 1;
+  }
+  return idf;
+}
+function tfidfVec(tfMap, idf) {
+  const vec = {};
+  for (const [t, w] of Object.entries(tfMap)) {
+    vec[t] = w * (idf[t] || 1);
+  }
+  return vec;
+}
+function cosine(a, b) {
+  let dot = 0, normA = 0, normB = 0;
+  const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
+  for (const k of keys) {
+    const va = a[k] || 0;
+    const vb = b[k] || 0;
+    dot   += va * vb;
+    normA += va * va;
+    normB += vb * vb;
+  }
+  const denom = Math.sqrt(normA) * Math.sqrt(normB);
+  return denom === 0 ? 0 : dot / denom;
+}
+// ── Token overlap (tie-breaker / fallback) ───────
+function tokenOverlap(queryTokens, docTokens) {
+  if (!queryTokens.length) return 0;
+  const docSet = new Set(docTokens);
+  const hits   = queryTokens.filter(t => docSet.has(t)).length;
+  return hits / queryTokens.length;
+}
+// ── Main recall function ──────────────────────────
+/**
+ * recall(query, records, opts) → Record | null
+ *
+ * opts.k        — number of results to return (default 1)
+ * opts.embedFn  — async fn(texts[]) → number[][] for custom embeddings
+ * opts.filter   — fn(record) → bool for pre-filtering
+ * opts.since    — timestamp: only consider records newer than this
+ * opts.mode     — "exact" | "semantic" | "hybrid" (default "hybrid")
+ */
+async function recall(query, records, opts = {}) {
+  const results = await recallMany(query, records, { ...opts, k: opts.k || 1 });
+  return results.length ? results[0] : null;
+}
+async function recallMany(query, records, opts = {}) {
+  const k      = opts.k      || 5;
+  const mode   = opts.mode   || 'hybrid';
+  const filter = opts.filter || null;
+  const since  = opts.since  || null;
+  let pool = records;
+  if (filter) pool = pool.filter(filter);
+  if (since)  pool = pool.filter(r => r.ts >= since);
+  if (!pool.length) return [];
+  // 1. Exact match shortcut
+  const queryLower = query.toLowerCase();
+  if (mode !== 'semantic') {
+    const exact = pool.filter(r => r.content.toLowerCase().includes(queryLower));
+    if (exact.length >= k && mode === 'exact') return exact.slice(0, k);
+    if (exact.length && mode === 'exact') return exact;
+  }
+  // 2. Custom embeddings
+  if (opts.embedFn && mode !== 'exact') {
+    try {
+      const texts    = [query, ...pool.map(r => r.content)];
+      const vectors  = await opts.embedFn(texts);
+      const qVec     = vectors[0];
+      const scored   = pool.map((r, i) => ({ r, score: cosineArrays(qVec, vectors[i + 1]) }));
+      scored.sort((a, b) => b.score - a.score);
+      return scored.slice(0, k).map(s => ({ ...s.r, _score: s.score }));
+    } catch (_) { /* fall through to TF-IDF */ }
+  }
+  // 3. TF-IDF semantic
+  const qTokens   = tokenize(query);
+  const docTokens = pool.map(r => tokenize(r.content));
+  const corpus    = [qTokens, ...docTokens];
+  const idf       = buildIDF(corpus);
+  const qTF  = tf(qTokens);
+  const qVec = tfidfVec(qTF, idf);
+  const scored = pool.map((r, i) => {
+    const dVec  = tfidfVec(tf(docTokens[i]), idf);
+    const sem   = cosine(qVec, dVec);
+    const over  = tokenOverlap(qTokens, docTokens[i]);
+    // Blend: 70% semantic + 30% overlap, with recency boost
+    const recency = Math.exp(-(Date.now() - r.ts) / (1000 * 60 * 60 * 24 * 7)); // 7-day half-life
+    const score = (sem * 0.70) + (over * 0.20) + (recency * 0.10);
+    return { r, score, _debug: { sem, over, recency } };
+  });
+  scored.sort((a, b) => b.score - a.score);
+  return scored.slice(0, k).map(s => ({ ...s.r, _score: s.score }));
+}
+function cosineArrays(a, b) {
+  let dot = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot   += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  const denom = Math.sqrt(normA) * Math.sqrt(normB);
+  return denom === 0 ? 0 : dot / denom;
+}
+module.exports = { recall, recallMany, tokenize };

package/src/core/store.js ADDED Viewed

@@ -0,0 +1,230 @@
+'use strict';
+/**
+ * MumpixStore — crash-safe, append-only storage engine
+ *
+ * File format (.mumpix):
+ *   Line 0: JSON header  {"v":1,"consistency":"strict","created":ts}
+ *   Line N: JSON record  {"id":1,"content":"...","ts":ts,"h":"0xabc"}
+ *
+ * WAL (.mumpix.wal):
+ *   Each line: {"op":"write"|"clear","entry"?:{...},"ts":ts}
+ *   Replayed on open if present, then merged and deleted.
+ */
+const fs   = require('fs');
+const path = require('path');
+const os   = require('os');
+const MAGIC_VERSION = 1;
+class MumpixStore {
+  constructor(filePath) {
+    this.filePath = path.resolve(filePath);
+    this.walPath  = this.filePath + '.wal';
+    this.header   = null;
+    this.records  = [];       // { id, content, ts, h }
+    this._nextId  = 1;
+    this._fd      = null;     // open file descriptor for appends
+  }
+  // ── Public ──────────────────────────────────────
+  open(opts = {}) {
+    const consistency = opts.consistency || 'eventual';
+    if (fs.existsSync(this.filePath)) {
+      this._load();
+      // Replay any uncommitted WAL
+      this._replayWAL();
+    } else {
+      this.header = {
+        v: MAGIC_VERSION,
+        consistency,
+        created: Date.now(),
+        path: path.basename(this.filePath),
+      };
+      this._writeHeader();
+    }
+    // Update consistency if caller changed it
+    if (this.header.consistency !== consistency) {
+      this.header.consistency = consistency;
+      this._rewriteFull();
+    }
+    // Open append FD
+    this._fd = fs.openSync(this.filePath, 'a');
+    return this;
+  }
+  close() {
+    if (this._fd !== null) {
+      fs.closeSync(this._fd);
+      this._fd = null;
+    }
+  }
+  /**
+   * Append a record. WAL-first for crash safety.
+   * Returns the new record.
+   */
+  write(content) {
+    const record = {
+      id:      this._nextId++,
+      content: content.trim(),
+      ts:      Date.now(),
+      h:       this._hash(content),
+    };
+    // 1. Write to WAL
+    const walEntry = JSON.stringify({ op: 'write', entry: record, ts: Date.now() }) + '\n';
+    fs.appendFileSync(this.walPath, walEntry, 'utf8');
+    // 2. Commit to main file
+    const line = JSON.stringify(record) + '\n';
+    fs.writeSync(this._fd, line, null, 'utf8');
+    // 3. Sync to disk (strict/verified modes flush immediately)
+    const consistency = this.header.consistency;
+    if (consistency === 'strict' || consistency === 'verified') {
+      fs.fdatasyncSync(this._fd);
+    }
+    // 4. Remove WAL entry (write succeeded)
+    this._clearWAL();
+    this.records.push(record);
+    return record;
+  }
+  /**
+   * Clear all records — WAL-first.
+   */
+  clear() {
+    const count = this.records.length;
+    fs.appendFileSync(this.walPath, JSON.stringify({ op: 'clear', count, ts: Date.now() }) + '\n', 'utf8');
+    this.records = [];
+    this._nextId = 1;
+    this._rewriteFull();
+    this._clearWAL();
+    return count;
+  }
+  /**
+   * Return all records (immutable copy).
+   */
+  all() {
+    return this.records.map(r => ({ ...r }));
+  }
+  /**
+   * Return store metadata.
+   */
+  stats() {
+    const stat = fs.existsSync(this.filePath) ? fs.statSync(this.filePath) : null;
+    return {
+      path:        this.filePath,
+      consistency: this.header.consistency,
+      records:     this.records.length,
+      created:     this.header.created,
+      sizeBytes:   stat ? stat.size : 0,
+      version:     this.header.v,
+    };
+  }
+  // ── Private ─────────────────────────────────────
+  _load() {
+    const raw  = fs.readFileSync(this.filePath, 'utf8');
+    const lines = raw.split('\n').filter(l => l.trim());
+    if (!lines.length) throw new Error(`mumpix: corrupt or empty file: ${this.filePath}`);
+    this.header = JSON.parse(lines[0]);
+    if (this.header.v !== MAGIC_VERSION) {
+      throw new Error(`mumpix: unsupported file version ${this.header.v}`);
+    }
+    this.records = [];
+    for (let i = 1; i < lines.length; i++) {
+      try {
+        const r = JSON.parse(lines[i]);
+        if (r && r.id && r.content) {
+          this.records.push(r);
+          if (r.id >= this._nextId) this._nextId = r.id + 1;
+        }
+      } catch (_) { /* skip corrupt lines */ }
+    }
+  }
+  _replayWAL() {
+    if (!fs.existsSync(this.walPath)) return;
+    const raw   = fs.readFileSync(this.walPath, 'utf8');
+    const lines = raw.split('\n').filter(l => l.trim());
+    let dirty   = false;
+    for (const line of lines) {
+      try {
+        const entry = JSON.parse(line);
+        if (entry.op === 'write' && entry.entry) {
+          const exists = this.records.find(r => r.id === entry.entry.id);
+          if (!exists) {
+            this.records.push(entry.entry);
+            if (entry.entry.id >= this._nextId) this._nextId = entry.entry.id + 1;
+            dirty = true;
+          }
+        } else if (entry.op === 'clear') {
+          this.records = [];
+          this._nextId = 1;
+          dirty = true;
+        }
+      } catch (_) { /* skip corrupt WAL lines */ }
+    }
+    if (dirty) this._rewriteFull();
+    this._clearWAL();
+  }
+  _writeHeader() {
+    const dir = path.dirname(this.filePath);
+    if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+    fs.writeFileSync(this.filePath, JSON.stringify(this.header) + '\n', 'utf8');
+  }
+  _rewriteFull() {
+    // Atomic rewrite via temp file
+    const tmp = this.filePath + '.tmp.' + process.pid;
+    const lines = [JSON.stringify(this.header)];
+    for (const r of this.records) lines.push(JSON.stringify(r));
+    fs.writeFileSync(tmp, lines.join('\n') + '\n', 'utf8');
+    fs.renameSync(tmp, this.filePath);
+    // Re-open append FD if needed
+    if (this._fd !== null) {
+      fs.closeSync(this._fd);
+      this._fd = fs.openSync(this.filePath, 'a');
+    }
+  }
+  _clearWAL() {
+    if (fs.existsSync(this.walPath)) {
+      fs.unlinkSync(this.walPath);
+    }
+  }
+  _hash(s) {
+    let h = 0x811c9dc5;
+    for (let i = 0; i < s.length; i++) {
+      h ^= s.charCodeAt(i);
+      h = Math.imul(h, 0x01000193);
+    }
+    return '0x' + (h >>> 0).toString(16).padStart(8, '0');
+  }
+}
+module.exports = { MumpixStore };

package/src/index.js ADDED Viewed

@@ -0,0 +1,38 @@
+'use strict';
+/**
+ * mumpix — SQLite for AI
+ *
+ * Quick start:
+ *   const { Mumpix } = require('mumpix')
+ *   const db = Mumpix.open('./agent.mumpix', { consistency: 'strict' })
+ *
+ *   await db.remember('User prefers TypeScript')
+ *   const ans = await db.recall('What language do they prefer?')
+ *   console.log(ans) // → 'User prefers TypeScript'
+ *
+ *   await db.close()
+ */
+const { MumpixDB }    = require('./core/MumpixDB');
+const { MumpixStore } = require('./core/store');
+const { MumpixAudit } = require('./core/audit');
+const { recall, recallMany, tokenize } = require('./core/recall');
+// Convenience alias: Mumpix.open() === MumpixDB.open()
+const Mumpix = MumpixDB;
+module.exports = {
+  // Primary export — use this
+  Mumpix,
+  MumpixDB,
+  // Lower-level exports for advanced use
+  MumpixStore,
+  MumpixAudit,
+  // Recall utilities — useful for custom pipelines
+  recall,
+  recallMany,
+  tokenize,
+};

package/src/integrations/langchain.js ADDED Viewed

@@ -0,0 +1,131 @@
+'use strict';
+/**
+ * Mumpix × LangChain integration
+ *
+ * Usage:
+ *   const { MumpixVectorStore } = require('mumpix/src/integrations/langchain')
+ *   const db = Mumpix.open('./agent.mumpix', { consistency: 'strict' })
+ *   const store = new MumpixVectorStore(db)
+ *
+ *   // Use as a LangChain Memory:
+ *   const memory = new MumpixChatMemory({ db })
+ */
+const { recallMany } = require('../core/recall');
+/**
+ * MumpixVectorStore — drop-in VectorStore adapter for LangChain.
+ *
+ * Works with any LangChain retriever that accepts .similaritySearch().
+ * No external vector library required — uses Mumpix's built-in TF-IDF.
+ */
+class MumpixVectorStore {
+  constructor(db) {
+    this.db = db;
+  }
+  /**
+   * LangChain VectorStore interface
+   */
+  async similaritySearch(query, k = 4) {
+    const results = await this.db.recallMany(query, k);
+    return results.map(r => ({
+      pageContent: r.content,
+      metadata: { id: r.id, ts: r.ts, score: r.score },
+    }));
+  }
+  async similaritySearchWithScore(query, k = 4) {
+    const results = await this.db.recallMany(query, k);
+    return results.map(r => ([
+      { pageContent: r.content, metadata: { id: r.id, ts: r.ts } },
+      r.score,
+    ]));
+  }
+  async addDocuments(docs) {
+    const results = [];
+    for (const doc of docs) {
+      const r = await this.db.remember(doc.pageContent);
+      results.push(r.id.toString());
+    }
+    return results;
+  }
+  async addTexts(texts, metadatas = []) {
+    const ids = [];
+    for (const text of texts) {
+      const r = await this.db.remember(text);
+      ids.push(r.id.toString());
+    }
+    return ids;
+  }
+}
+/**
+ * MumpixChatMemory — LangChain BaseChatMemory compatible adapter.
+ *
+ * Stores conversation turns as memories and retrieves them semantically.
+ *
+ * const memory = new MumpixChatMemory({ db, k: 3 })
+ * // Then pass as `memory` to LLMChain, ConversationChain, etc.
+ */
+class MumpixChatMemory {
+  constructor({ db, k = 4, inputKey = 'input', outputKey = 'output' } = {}) {
+    this.db        = db;
+    this.k         = k;
+    this.inputKey  = inputKey;
+    this.outputKey = outputKey;
+    this.memoryKey = 'history';
+  }
+  get memoryKeys() {
+    return [this.memoryKey];
+  }
+  async loadMemoryVariables(values) {
+    const query   = values[this.inputKey] || '';
+    const results = await this.db.recallMany(query, this.k);
+    const history = results.map(r => r.content).join('\n');
+    return { [this.memoryKey]: history };
+  }
+  async saveContext(inputs, outputs) {
+    const input  = inputs[this.inputKey]   || '';
+    const output = outputs[this.outputKey] || '';
+    if (input)  await this.db.remember(`Human: ${input}`);
+    if (output) await this.db.remember(`AI: ${output}`);
+  }
+  async clear() {
+    await this.db.clear();
+  }
+}
+/**
+ * MumpixRetriever — LangChain BaseRetriever compatible adapter.
+ *
+ * const retriever = new MumpixRetriever(db, { k: 5 })
+ * const results = await retriever.getRelevantDocuments("query")
+ */
+class MumpixRetriever {
+  constructor(db, opts = {}) {
+    this.db = db;
+    this.k  = opts.k || 4;
+  }
+  async getRelevantDocuments(query) {
+    const results = await this.db.recallMany(query, this.k);
+    return results.map(r => ({
+      pageContent: r.content,
+      metadata: { id: r.id, ts: r.ts, score: r.score },
+    }));
+  }
+}
+module.exports = {
+  MumpixVectorStore,
+  MumpixChatMemory,
+  MumpixRetriever,
+};

package/src/integrations/llamaindex.js ADDED Viewed

@@ -0,0 +1,86 @@
+'use strict';
+/**
+ * Mumpix × LlamaIndex integration
+ *
+ * Usage:
+ *   const { MumpixReader, MumpixIndex } = require('mumpix/src/integrations/llamaindex')
+ *   const db = Mumpix.open('./agent.mumpix', { consistency: 'strict' })
+ *   const index = new MumpixIndex(db)
+ *   const retriever = index.asRetriever({ topK: 5 })
+ */
+/**
+ * MumpixIndex — wraps a MumpixDB as a LlamaIndex-style index.
+ */
+class MumpixIndex {
+  constructor(db) {
+    this.db = db;
+  }
+  asRetriever(opts = {}) {
+    return new MumpixIndexRetriever(this.db, opts);
+  }
+  /**
+   * Insert a document / text node.
+   */
+  async insert(node) {
+    const text = typeof node === 'string' ? node : (node.text || node.content || '');
+    return this.db.remember(text);
+  }
+  async insertMany(nodes) {
+    const results = [];
+    for (const n of nodes) results.push(await this.insert(n));
+    return results;
+  }
+}
+/**
+ * MumpixIndexRetriever — LlamaIndex-compatible retriever.
+ */
+class MumpixIndexRetriever {
+  constructor(db, opts = {}) {
+    this.db   = db;
+    this.topK = opts.topK || 5;
+  }
+  async retrieve(queryBundle) {
+    const query   = typeof queryBundle === 'string' ? queryBundle : queryBundle.queryStr;
+    const results = await this.db.recallMany(query, this.topK);
+    return results.map(r => ({
+      node: {
+        id_:       r.id.toString(),
+        text:      r.content,
+        metadata:  { ts: r.ts },
+        getContent: () => r.content,
+      },
+      score: r.score,
+    }));
+  }
+}
+/**
+ * MumpixReader — load .mumpix file contents as LlamaIndex documents.
+ */
+class MumpixReader {
+  constructor(db) {
+    this.db = db;
+  }
+  async loadData() {
+    const memories = await this.db.list();
+    return memories.map(m => ({
+      id_:      m.id.toString(),
+      text:     m.content,
+      metadata: { source: 'mumpix', ts: m.ts },
+    }));
+  }
+}
+module.exports = {
+  MumpixIndex,
+  MumpixIndexRetriever,
+  MumpixReader,
+};