npm - @geravant/sinain - Versions diffs - 1.10.0 → 1.10.1 - Mend

@geravant/sinain 1.10.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json +1 -1
package/sinain-agent/CLAUDE.md +50 -0
package/sinain-agent/run.sh +7 -3
package/sinain-core/src/index.ts +297 -26
package/sinain-core/src/learning/local-curation.ts +373 -0
package/sinain-core/src/server.ts +197 -0
package/sinain-mcp-server/index.ts +34 -4
package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
package/sinain-memory/eval/retrieval_benchmark.jsonl +12 -0
package/sinain-memory/eval/retrieval_evaluator.py +186 -0
package/sinain-memory/graph_query.py +34 -1
package/sinain-memory/knowledge_integrator.py +54 -0
package/sinain-memory/triplestore.py +76 -5

package/sinain-memory/eval/retrieval_evaluator.py ADDED Viewed

@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+"""Retrieval Quality Evaluator — Recall@k and NDCG@k for knowledge graph queries.
+Inspired by mempalace's LongMemEval benchmark infrastructure. Measures whether the
+right knowledge surfaces when the agent needs it, complementing sinain's existing
+output quality evaluation (schemas + assertions + LLM judges).
+Usage:
+    python3 eval/retrieval_evaluator.py \
+        --db memory/knowledge-graph.db \
+        --benchmark eval/retrieval_benchmark.jsonl \
+        [--k 1,3,5] [--format json|text]
+Benchmark dataset format (JSONL):
+    {"query": "OCR pipeline stalls on macOS 14", "expected_entities": ["fact:sck-capture-fix"], "category": "error-resolution"}
+"""
+import argparse
+import json
+import math
+import sys
+from collections import defaultdict
+from pathlib import Path
+def load_benchmark(path: str) -> list[dict]:
+    """Load benchmark QA pairs from JSONL."""
+    items = []
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                items.append(json.loads(line))
+    return items
+def extract_keywords(query: str) -> list[str]:
+    """Extract search keywords from a natural language query."""
+    import re
+    words = re.findall(r"[a-zA-Z][a-zA-Z0-9-]+", query.lower())
+    stopwords = {"the", "is", "in", "on", "for", "and", "or", "of", "to", "a", "an", "it", "was", "not", "how", "what", "when", "does"}
+    return [w for w in words if len(w) > 2 and w not in stopwords]
+def dcg_at_k(relevant_positions: list[int], k: int) -> float:
+    """Compute Discounted Cumulative Gain at k."""
+    score = 0.0
+    for pos in relevant_positions:
+        if pos < k:
+            score += 1.0 / math.log2(pos + 2)  # +2 because position is 0-indexed
+    return score
+def ndcg_at_k(relevant_positions: list[int], num_relevant: int, k: int) -> float:
+    """Compute Normalized DCG at k."""
+    dcg = dcg_at_k(relevant_positions, k)
+    # Ideal DCG: all relevant items at top positions
+    ideal_positions = list(range(min(num_relevant, k)))
+    idcg = dcg_at_k(ideal_positions, k)
+    return dcg / idcg if idcg > 0 else 0.0
+def evaluate_retrieval(
+    benchmark_path: str,
+    db_path: str,
+    k_values: list[int] = [1, 3, 5],
+) -> dict:
+    """Run benchmark queries against graph_query.py, compute Recall@k and NDCG@k."""
+    # Import graph_query from parent dir
+    sys.path.insert(0, str(Path(__file__).parent.parent))
+    from graph_query import query_facts_by_entities
+    items = load_benchmark(benchmark_path)
+    if not items:
+        return {"error": "Empty benchmark dataset"}
+    max_k = max(k_values)
+    metrics: dict[str, list[float]] = defaultdict(list)
+    category_metrics: dict[str, dict[str, list[float]]] = defaultdict(lambda: defaultdict(list))
+    details: list[dict] = []
+    for item in items:
+        query = item["query"]
+        expected = set(item.get("expected_entities", []))
+        category = item.get("category", "general")
+        keywords = extract_keywords(query)
+        if not keywords or not expected:
+            continue
+        results = query_facts_by_entities(db_path, keywords, max_facts=max_k)
+        result_ids = [r["entityId"] for r in results]
+        # Find positions of relevant results
+        relevant_positions = []
+        for i, rid in enumerate(result_ids):
+            if rid in expected:
+                relevant_positions.append(i)
+        for k in k_values:
+            hit = any(pos < k for pos in relevant_positions)
+            recall = 1.0 if hit else 0.0
+            ndcg = ndcg_at_k(relevant_positions, len(expected), k)
+            metrics[f"recall@{k}"].append(recall)
+            metrics[f"ndcg@{k}"].append(ndcg)
+            category_metrics[category][f"recall@{k}"].append(recall)
+            category_metrics[category][f"ndcg@{k}"].append(ndcg)
+        details.append({
+            "query": query,
+            "category": category,
+            "expected": list(expected),
+            "retrieved": result_ids[:max_k],
+            "hit@1": any(pos < 1 for pos in relevant_positions),
+            "hit@5": any(pos < 5 for pos in relevant_positions),
+        })
+    # Aggregate
+    summary = {
+        "total_queries": len(items),
+        "evaluated": len(details),
+    }
+    for metric_name, values in sorted(metrics.items()):
+        summary[metric_name] = round(sum(values) / len(values), 4) if values else 0.0
+    # Per-category breakdown
+    categories = {}
+    for cat, cat_metrics in sorted(category_metrics.items()):
+        categories[cat] = {
+            "count": len(next(iter(cat_metrics.values()))),
+        }
+        for metric_name, values in sorted(cat_metrics.items()):
+            categories[cat][metric_name] = round(sum(values) / len(values), 4) if values else 0.0
+    return {
+        "summary": summary,
+        "categories": categories,
+        "details": details,
+    }
+def format_report_text(result: dict) -> str:
+    """Format evaluation result as human-readable text for daily report injection."""
+    lines = ["## Retrieval Quality"]
+    s = result["summary"]
+    for key in sorted(s):
+        if key.startswith("recall@") or key.startswith("ndcg@"):
+            lines.append(f"- {key}: {s[key]:.2%}")
+    if result.get("categories"):
+        lines.append("")
+        lines.append("**By category:**")
+        for cat, cm in sorted(result["categories"].items()):
+            r5 = cm.get("recall@5", 0)
+            lines.append(f"- {cat} (n={cm['count']}): recall@5={r5:.0%}")
+    # Weakest category
+    cats = result.get("categories", {})
+    if cats:
+        weakest = min(cats.items(), key=lambda x: x[1].get("recall@5", 1.0))
+        if weakest[1].get("recall@5", 1.0) < 0.8:
+            lines.append(f"\n**Weakest**: {weakest[0]} ({weakest[1].get('recall@5', 0):.0%})")
+    return "\n".join(lines)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Retrieval Quality Evaluator")
+    parser.add_argument("--db", required=True, help="Path to knowledge-graph.db")
+    parser.add_argument("--benchmark", required=True, help="Path to retrieval_benchmark.jsonl")
+    parser.add_argument("--k", default="1,3,5", help="Comma-separated k values for Recall@k")
+    parser.add_argument("--format", choices=["json", "text"], default="json", help="Output format")
+    args = parser.parse_args()
+    k_values = [int(k) for k in args.k.split(",")]
+    result = evaluate_retrieval(args.benchmark, args.db, k_values)
+    if args.format == "text":
+        print(format_report_text(result))
+    else:
+        print(json.dumps(result, indent=2, ensure_ascii=False))
+if __name__ == "__main__":
+    main()

package/sinain-memory/graph_query.py CHANGED Viewed

@@ -154,6 +154,37 @@ def format_facts_text(facts: list[dict], max_chars: int = 500) -> str:
     return "\n".join(lines)
+def format_facts_compact(facts: list[dict], max_chars: int = 400) -> str:
+    """Encode facts for efficient escalation context injection.
+    Compact format: domain/entity: value (conf, Nx)
+    Inspired by mempalace AAAK compression — fits 3-5x more facts per token budget.
+    """
+    if not facts:
+        return ""
+    lines = []
+    total = 0
+    for f in facts:
+        entity = f.get("entityId", "").split(":")[-1][:20]
+        value = f.get("value", "")[:60]
+        conf = f.get("confidence", "?")
+        count = f.get("reinforce_count", "1")
+        domain = f.get("domain", "")
+        if domain:
+            line = f"{domain}/{entity}: {value} ({conf},{count}x)"
+        else:
+            line = f"{entity}: {value} ({conf},{count}x)"
+        if total + len(line) + 2 > max_chars:
+            break
+        lines.append(line)
+        total += len(line) + 2  # account for "; " separator
+    return "; ".join(lines)
 def domain_fact_counts(db_path: str) -> dict[str, int]:
     """Count facts per domain for module emergence detection."""
     if not Path(db_path).exists():
@@ -184,7 +215,7 @@ def main() -> None:
     parser.add_argument("--top", type=int, default=None, help="Query top-N facts by confidence")
     parser.add_argument("--domain-counts", action="store_true", help="Show fact counts per domain")
     parser.add_argument("--max-facts", type=int, default=5, help="Maximum facts to return")
-    parser.add_argument("--format", choices=["text", "json"], default="json", help="Output format")
+    parser.add_argument("--format", choices=["text", "json", "compact"], default="json", help="Output format")
     args = parser.parse_args()
     if args.domain_counts:
@@ -202,6 +233,8 @@ def main() -> None:
     if args.format == "text":
         print(format_facts_text(facts))
+    elif args.format == "compact":
+        print(format_facts_compact(facts))
     else:
         print(json.dumps({"facts": facts, "count": len(facts)}, indent=2, ensure_ascii=False))

package/sinain-memory/knowledge_integrator.py CHANGED Viewed

@@ -117,6 +117,55 @@ def _fact_id(entity: str, attribute: str, value: str) -> str:
     return f"fact:{slug}-{h}"
+def _normalize_entity(name: str) -> str:
+    """Normalize entity name to canonical form: lowercase, hyphenated, no punctuation."""
+    return re.sub(r"[^a-z0-9-]", "", name.lower().replace(" ", "-").replace("_", "-"))
+def _canonicalize_ops(ops: list[dict], existing_entities: list[str]) -> list[dict]:
+    """Map variant entity names to canonical forms before graph execution.
+    Inspired by mempalace entity detection — uses simple heuristic instead of
+    rule-based signal detection: normalize names, merge on edit distance or substring match.
+    Converts duplicate assert → reinforce when a near-match exists.
+    """
+    canonical_map: dict[str, str] = {}  # normalized → existing entity name
+    for eid in existing_entities:
+        # Extract entity name from the entity_id's attributes (stored as "entity" attr)
+        canonical_map[_normalize_entity(eid)] = eid
+    result = []
+    for op in ops:
+        if op.get("op") != "assert":
+            result.append(op)
+            continue
+        entity = op.get("entity", "")
+        normalized = _normalize_entity(entity)
+        # Check for near-match in existing entities
+        matched_id = None
+        for existing_norm, existing_eid in canonical_map.items():
+            if existing_norm == normalized:
+                matched_id = existing_eid
+                break
+            # Substring match: "react-router" matches "react-router-dom"
+            if len(normalized) >= 4 and (normalized in existing_norm or existing_norm in normalized):
+                matched_id = existing_eid
+                break
+        if matched_id:
+            # Convert assert → reinforce (entity already exists under different name)
+            result.append({"op": "reinforce", "entityId": matched_id})
+            print(f"  [canon] merged '{entity}' → existing '{matched_id}'", file=sys.stderr)
+        else:
+            result.append(op)
+            # Register the new canonical form
+            canonical_map[normalized] = _fact_id(entity, op.get("attribute", ""), op.get("value", ""))
+    return result
 def _load_graph_facts(db_path: str, entities: list[str] | None = None, limit: int = 50) -> list[dict]:
     """Load relevant facts from the knowledge graph for LLM context."""
     if not Path(db_path).exists():
@@ -180,6 +229,11 @@ def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str) -> dict:
     try:
         from triplestore import TripleStore
         store = TripleStore(db_path)
+        # Canonicalize entity names to prevent fragmentation
+        existing_ids = [r[0] for r in store.entities_with_attr("entity")]
+        ops = _canonicalize_ops(ops, existing_ids)
         stats = {"asserted": 0, "reinforced": 0, "retracted": 0}
         for op_data in ops:

package/sinain-memory/triplestore.py CHANGED Viewed

@@ -21,6 +21,7 @@ Self-test:
 """
 import json
+import math
 import os
 import sqlite3
 import sys
@@ -51,6 +52,7 @@ CREATE TABLE IF NOT EXISTS triples (
     value_type    TEXT    NOT NULL DEFAULT 'string',
     retracted     INTEGER NOT NULL DEFAULT 0,
     retracted_tx  INTEGER,
+    valid_to      TEXT,
     created_at    TEXT    NOT NULL
 );
@@ -88,6 +90,24 @@ def _entity_type(entity_id: str) -> str:
     return entity_id[:colon] if colon > 0 else "unknown"
+def decayed_confidence(
+    confidence: float, created_at: str, half_life_days: int = 60
+) -> float:
+    """Apply exponential time-decay to a confidence score.
+    Facts lose half their confidence every `half_life_days` without reinforcement.
+    Inspired by mempalace's temporal validity model.
+    """
+    try:
+        created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
+        age_days = (datetime.now(timezone.utc) - created).days
+        if age_days <= 0:
+            return confidence
+        return confidence * math.exp(-0.693 * age_days / half_life_days)
+    except (ValueError, TypeError):
+        return confidence
 class TripleStore:
     """SQLite-backed EAV triple store with WAL mode and 4 covering indexes."""
@@ -99,8 +119,16 @@ class TripleStore:
         self._conn.execute("PRAGMA journal_mode=WAL")
         self._conn.execute("PRAGMA busy_timeout=10000")
         self._conn.executescript(_SCHEMA_SQL)
+        self._migrate()
         self._conn.commit()
+    def _migrate(self) -> None:
+        """Run schema migrations for existing databases."""
+        # Add valid_to column if missing (added in memory-improvements)
+        cols = [r[1] for r in self._conn.execute("PRAGMA table_info(triples)").fetchall()]
+        if "valid_to" not in cols:
+            self._conn.execute("ALTER TABLE triples ADD COLUMN valid_to TEXT")
     def close(self) -> None:
         self._conn.close()
@@ -173,21 +201,22 @@ class TripleStore:
     ) -> int:
         """Retract triples matching entity+attribute (and optionally value).
-        Sets retracted=1 and retracted_tx to the retraction transaction.
+        Sets retracted=1, retracted_tx, and valid_to (temporal closure).
         The original tx_id is preserved for temporal (as_of_tx) queries.
         Returns the count of triples retracted.
         """
+        now = _now_iso()
         if value is not None:
             cur = self._conn.execute(
-                "UPDATE triples SET retracted = 1, retracted_tx = ? "
+                "UPDATE triples SET retracted = 1, retracted_tx = ?, valid_to = ? "
                 "WHERE entity_id = ? AND attribute = ? AND value = ? AND retracted = 0",
-                (tx_id, entity_id, attribute, value),
+                (tx_id, now, entity_id, attribute, value),
             )
         else:
             cur = self._conn.execute(
-                "UPDATE triples SET retracted = 1, retracted_tx = ? "
+                "UPDATE triples SET retracted = 1, retracted_tx = ?, valid_to = ? "
                 "WHERE entity_id = ? AND attribute = ? AND retracted = 0",
-                (tx_id, entity_id, attribute),
+                (tx_id, now, entity_id, attribute),
             )
         self._conn.commit()
         return cur.rowcount
@@ -220,6 +249,26 @@ class TripleStore:
             result.setdefault(row["attribute"], []).append(row["value"])
         return result
+    def entity_as_of(self, entity_id: str, date: datetime) -> dict[str, list[str]]:
+        """Return entity attributes as they were on a specific date.
+        Uses created_at and valid_to for date-based temporal queries
+        (vs as_of_tx which uses transaction ordering).
+        """
+        date_iso = date.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+        rows = self._conn.execute(
+            "SELECT attribute, value FROM triples "
+            "WHERE entity_id = ? AND created_at <= ? "
+            "AND (valid_to IS NULL OR valid_to > ?) "
+            "AND retracted = 0 "
+            "ORDER BY attribute, id",
+            (entity_id, date_iso, date_iso),
+        ).fetchall()
+        result: dict[str, list[str]] = {}
+        for row in rows:
+            result.setdefault(row["attribute"], []).append(row["value"])
+        return result
     # ----- Query: AEVT (attribute scan) -----
     def entities_with_attr(
@@ -473,6 +522,28 @@ def _self_test() -> None:
         assert "priority" in ent_before, "as_of_tx should see pre-retraction state"
         print("  [OK] as_of_tx isolation")
+        # valid_to set on retraction
+        retracted_row = store._conn.execute(
+            "SELECT valid_to FROM triples WHERE entity_id = 'signal:2026-03-01' AND attribute = 'priority'"
+        ).fetchone()
+        assert retracted_row and retracted_row["valid_to"] is not None, "valid_to should be set on retraction"
+        print("  [OK] valid_to set on retraction")
+        # entity_as_of
+        future = datetime.now(timezone.utc) + timedelta(days=1)
+        ent_future = store.entity_as_of("signal:2026-03-01", future)
+        assert "description" in ent_future, "entity_as_of should find active triples"
+        assert "priority" not in ent_future, "entity_as_of should exclude retracted triples"
+        print("  [OK] entity_as_of temporal query")
+        # Confidence decay utility
+        fresh_conf = decayed_confidence(0.8, _now_iso())
+        assert abs(fresh_conf - 0.8) < 0.01, f"Fresh fact should keep confidence: {fresh_conf}"
+        old_date = (datetime.now(timezone.utc) - timedelta(days=60)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+        old_conf = decayed_confidence(0.8, old_date)
+        assert 0.35 < old_conf < 0.45, f"60-day-old fact should decay to ~0.4: {old_conf}"
+        print(f"  [OK] Confidence decay: fresh=0.8→{fresh_conf:.2f}, 60d=0.8→{old_conf:.2f}")
         # GC (retracted triples are fresh, so gc with 0 days should get them)
         gc_count = store.gc(older_than_days=0)
         assert gc_count >= 1