npm - @geravant/sinain - Versions diffs - 1.13.0 → 1.15.0 - Mend

@geravant/sinain 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.env.example +33 -27
package/cli.js +30 -14
package/config-shared.js +173 -30
package/launcher.js +38 -21
package/onboard.js +36 -20
package/package.json +4 -1
package/sinain-agent/run.sh +600 -127
package/sinain-core/src/agents-loader.ts +254 -0
package/sinain-core/src/buffers/feed-buffer.ts +6 -4
package/sinain-core/src/config.ts +77 -15
package/sinain-core/src/escalation/escalator.ts +178 -18
package/sinain-core/src/index.ts +218 -31
package/sinain-core/src/learning/local-curation.ts +81 -27
package/sinain-core/src/overlay/commands.ts +25 -0
package/sinain-core/src/overlay/ws-handler.ts +3 -0
package/sinain-core/src/server.ts +101 -10
package/sinain-core/src/types.ts +29 -3
package/sinain-memory/graph_query.py +12 -3
package/sinain-memory/knowledge_integrator.py +194 -10
package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/embed_client.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
package/sinain-memory/eval/__init__.py +0 -0
package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
package/sinain-memory/eval/assertions.py +0 -267
package/sinain-memory/eval/benchmarks/__init__.py +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/base_adapter.py +0 -43
package/sinain-memory/eval/benchmarks/config.py +0 -23
package/sinain-memory/eval/benchmarks/evaluate.py +0 -146
package/sinain-memory/eval/benchmarks/ingest.py +0 -152
package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/judges/qa_judge.py +0 -81
package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +0 -177
package/sinain-memory/eval/benchmarks/meeting_adapter.py +0 -81
package/sinain-memory/eval/benchmarks/meeting_runner.py +0 -230
package/sinain-memory/eval/benchmarks/query.py +0 -193
package/sinain-memory/eval/benchmarks/report.py +0 -87
package/sinain-memory/eval/benchmarks/run_meeting_bench.sh +0 -318
package/sinain-memory/eval/benchmarks/runner.py +0 -283
package/sinain-memory/eval/judges/__init__.py +0 -0
package/sinain-memory/eval/judges/base_judge.py +0 -61
package/sinain-memory/eval/judges/curation_judge.py +0 -46
package/sinain-memory/eval/judges/insight_judge.py +0 -48
package/sinain-memory/eval/judges/mining_judge.py +0 -42
package/sinain-memory/eval/judges/signal_judge.py +0 -45
package/sinain-memory/eval/retrieval_benchmark.jsonl +0 -12
package/sinain-memory/eval/retrieval_evaluator.py +0 -186
package/sinain-memory/eval/schemas.py +0 -247
package/sinain-memory/tests/__init__.py +0 -0
package/sinain-memory/tests/conftest.py +0 -189
package/sinain-memory/tests/test_curator_helpers.py +0 -94
package/sinain-memory/tests/test_embedder.py +0 -210
package/sinain-memory/tests/test_extract_json.py +0 -124
package/sinain-memory/tests/test_feedback_computation.py +0 -121
package/sinain-memory/tests/test_miner_helpers.py +0 -71
package/sinain-memory/tests/test_module_management.py +0 -458
package/sinain-memory/tests/test_parsers.py +0 -96
package/sinain-memory/tests/test_tick_evaluator.py +0 -430
package/sinain-memory/tests/test_triple_extractor.py +0 -255
package/sinain-memory/tests/test_triple_ingest.py +0 -191
package/sinain-memory/tests/test_triple_migrate.py +0 -138
package/sinain-memory/tests/test_triplestore.py +0 -248

package/sinain-memory/knowledge_integrator.py CHANGED Viewed

@@ -21,7 +21,9 @@ import json
 import re
 import shutil
 import sys
+import unicodedata
 from datetime import datetime, timezone
+from difflib import SequenceMatcher
 from pathlib import Path
 from common import (
@@ -121,9 +123,50 @@ def _fact_id(entity: str, attribute: str, value: str) -> str:
     return f"fact:{slug}-{h}"
+_UNICODE_PRE_MAP = str.maketrans({"ß": "ss", "ẞ": "SS"})
 def _normalize_entity(name: str) -> str:
-    """Normalize entity name to canonical form: lowercase, hyphenated, no punctuation."""
-    return re.sub(r"[^a-z0-9-]", "", name.lower().replace(" ", "-").replace("_", "-"))
+    """Normalize entity name to canonical form: lowercase, hyphenated, ASCII-transliterated."""
+    s = name.translate(_UNICODE_PRE_MAP)
+    s = unicodedata.normalize("NFKD", s)
+    s = s.encode("ascii", "ignore").decode("ascii")
+    s = s.lower().replace(" ", "-").replace("_", "-")
+    s = re.sub(r"[^a-z0-9-]", "", s)
+    s = re.sub(r"-{2,}", "-", s)
+    return s.strip("-")
+def _find_matching_entity(
+    name: str,
+    existing_names: dict[str, str],
+) -> str | None:
+    """Find an existing entity that fuzzy-matches `name`. Returns entity_node_id or None."""
+    if name in existing_names:
+        return existing_names[name]
+    # Hyphen-insensitive exact match (chatgpt == chat-gpt)
+    name_compact = name.replace("-", "")
+    for existing_name, node_id in existing_names.items():
+        if existing_name.replace("-", "") == name_compact:
+            return node_id
+    # Edit-distance fuzzy match
+    if len(name) < 3:
+        return None
+    threshold = 0.90
+    best_match = None
+    best_ratio = threshold
+    for existing_name, node_id in existing_names.items():
+        if len(existing_name) < 3:
+            continue
+        if frozenset({name, existing_name}) in _DEDUP_SKIP_PAIRS:
+            continue
+        ratio = SequenceMatcher(None, name, existing_name).ratio()
+        if ratio >= best_ratio:
+            best_ratio = ratio
+            best_match = node_id
+    return best_match
 def _canonicalize_ops(ops: list[dict], existing_entities: list[str], existing_facts: list[dict]) -> list[dict]:
@@ -528,7 +571,14 @@ def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str, digest_ent
         # --- Build entity graph layer (two-layer model) ---
         if digest_entities and stats["asserted"] > 0:
             try:
-                # Create entity:* nodes from digest entities
+                # Load existing entity names for fuzzy matching
+                all_entity_nodes: dict[str, str] = {}  # {name: entity_node_id}
+                for r in store.entities_with_attr("name"):
+                    if r[0].startswith("entity:"):
+                        all_entity_nodes[r[1]] = r[0]
+                # Create entity:* nodes from digest entities (with fuzzy dedup)
+                entity_resolve: dict[str, str] = {}  # {normalized_name: resolved_node_id}
                 for ent in (digest_entities or []):
                     if isinstance(ent, dict):
                         ename = _normalize_entity(ent.get("name", ""))
@@ -539,12 +589,22 @@ def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str, digest_ent
                     if not ename or len(ename) < 2:
                         continue
+                    # Check for fuzzy match against existing entities
+                    matched_id = _find_matching_entity(ename, all_entity_nodes)
+                    if matched_id:
+                        entity_resolve[ename] = matched_id
+                        if matched_id != f"entity:{ename}":
+                            print(f"  [graph] alias: \"{ename}\" → {matched_id}", file=sys.stderr)
+                        continue
                     entity_node_id = f"entity:{ename}"
                     existing = store.entity(entity_node_id)
                     if not existing:
                         tx = store.begin_tx("entity_graph")
                         store.assert_triple(tx, entity_node_id, "name", ename)
                         store.assert_triple(tx, entity_node_id, "type", etype)
+                    all_entity_nodes[ename] = entity_node_id
+                    entity_resolve[ename] = entity_node_id
                 # Link facts to their entity nodes via "about" ref edges
                 for op_data in ops:
@@ -554,18 +614,13 @@ def _execute_graph_ops(db_path: str, ops: list[dict], digest_ts: str, digest_ent
                     value = op_data.get("value", "")
                     attribute = op_data.get("attribute", "")
                     fact_eid = _fact_id(entity, attribute, value)
-                    entity_node_id = f"entity:{_normalize_entity(entity)}"
+                    norm_entity = _normalize_entity(entity)
+                    entity_node_id = entity_resolve.get(norm_entity, f"entity:{norm_entity}")
                     # Only link if entity node exists
                     if store.entity(entity_node_id):
                         tx = store.begin_tx("entity_graph")
                         store.assert_triple(tx, fact_eid, "about", entity_node_id, value_type="ref")
-                # Infer cross-entity refs from fact content
-                all_entity_nodes = {}
-                for r in store.entities_with_attr("name"):
-                    if r[0].startswith("entity:"):
-                        all_entity_nodes[r[1]] = r[0]  # {name: entity_id}
                 ref_count = 0
                 for fact_eid_row in store.entities_with_attr("value"):
                     fact_eid = fact_eid_row[0]
@@ -695,17 +750,146 @@ def _bootstrap_graph(memory_dir: str, db_path: str) -> dict:
     return {"bootstrapped": stats.get("asserted", 0)}
+# Pairs that fuzzy matching incorrectly clusters — reviewed and confirmed distinct.
+_DEDUP_SKIP_PAIRS = {
+    frozenset({"ai-driven-development", "spac-driven-development"}),
+    frozenset({"german", "germany"}),
+    frozenset({"llama", "ollama"}),
+    frozenset({"gemma", "gemma4"}),
+}
+def merge_entity_duplicates(db_path: str, dry_run: bool = True) -> dict:
+    """Merge fragmented entity nodes using fuzzy matching.
+    Idempotent: checks for migration:entity-dedup-v1 stamp.
+    """
+    from triplestore import TripleStore
+    store = TripleStore(db_path)
+    # Idempotency check
+    stamp = store.entity("migration:entity-dedup-v1")
+    if stamp:
+        print("migration:entity-dedup-v1 already applied — skipping", file=sys.stderr)
+        return {"status": "already_applied"}
+    # Load all entity nodes
+    all_entities: dict[str, str] = {}  # {name: entity_node_id}
+    for entity_id, name in store.entities_with_attr("name"):
+        if entity_id.startswith("entity:"):
+            all_entities[name] = entity_id
+    print(f"Total entity nodes: {len(all_entities)}", file=sys.stderr)
+    # Build clusters via greedy matching
+    remaining = dict(all_entities)  # copy
+    clusters: list[list[tuple[str, str]]] = []  # [[( name, node_id ), ...], ...]
+    while remaining:
+        seed_name, seed_id = next(iter(remaining.items()))
+        cluster = [(seed_name, seed_id)]
+        del remaining[seed_name]
+        # Find all matches for this seed
+        to_remove = []
+        for other_name, other_id in remaining.items():
+            matched = _find_matching_entity(other_name, {seed_name: seed_id})
+            if matched:
+                cluster.append((other_name, other_id))
+                to_remove.append(other_name)
+        for name in to_remove:
+            del remaining[name]
+        if len(cluster) > 1:
+            # Filter out known false-positive pairs
+            names_set = {n for n, _ in cluster}
+            if any(pair <= names_set for pair in _DEDUP_SKIP_PAIRS):
+                continue
+            clusters.append(cluster)
+    print(f"Found {len(clusters)} duplicate clusters", file=sys.stderr)
+    merge_count = 0
+    repoint_count = 0
+    for cluster in clusters:
+        # Canonical selection: if any entity has significantly more backrefs (5+),
+        # use it. Otherwise prefer longest name (most complete spelling).
+        max_refs = max(len(store.backrefs(nid)) for _, nid in cluster)
+        if max_refs >= 5:
+            cluster.sort(key=lambda x: (-len(store.backrefs(x[1])), -len(x[0]), x[0]))
+        else:
+            cluster.sort(key=lambda x: (-len(x[0]), x[0]))
+        canonical_name, canonical_id = cluster[0]
+        duplicates = cluster[1:]
+        dup_names = [d[0] for d in duplicates]
+        print(f"  cluster: {canonical_name} ← {dup_names}", file=sys.stderr)
+        if dry_run:
+            merge_count += len(duplicates)
+            continue
+        for dup_name, dup_id in duplicates:
+            # Re-point all refs pointing to this duplicate
+            refs = store.backrefs(dup_id)
+            for src_entity, attr in refs:
+                tx = store.begin_tx("entity_dedup")
+                store.retract_triple(tx, src_entity, attr, dup_id)
+                store.assert_triple(tx, src_entity, attr, canonical_id, value_type="ref")
+                repoint_count += 1
+            # Retract all triples of the duplicate entity itself
+            dup_attrs = store.entity(dup_id)
+            tx = store.begin_tx("entity_dedup")
+            for attr, values in dup_attrs.items():
+                if not isinstance(values, list):
+                    values = [values]
+                for val in values:
+                    store.retract_triple(tx, dup_id, attr, str(val))
+            merge_count += 1
+    # Stamp migration
+    if not dry_run and clusters:
+        tx = store.begin_tx("entity_dedup")
+        store.assert_triple(tx, "migration:entity-dedup-v1", "applied_at",
+                            datetime.now(timezone.utc).isoformat())
+        store.assert_triple(tx, "migration:entity-dedup-v1", "clusters_merged",
+                            str(len(clusters)))
+    result = {
+        "status": "dry_run" if dry_run else "applied",
+        "clusters": len(clusters),
+        "entities_merged": merge_count,
+        "refs_repointed": repoint_count,
+    }
+    print(json.dumps(result, indent=2), file=sys.stderr)
+    return result
 def main() -> None:
     parser = argparse.ArgumentParser(description="Knowledge Integrator")
     parser.add_argument("--memory-dir", required=True, help="Path to memory/ directory")
     parser.add_argument("--digest", default=None, help="SessionDigest JSON string")
     parser.add_argument("--bootstrap", action="store_true", help="One-time: seed graph from playbook")
     parser.add_argument("--retag", action="store_true", help="Re-extract tags for all existing facts")
+    parser.add_argument("--dedup-entities", action="store_true", help="Merge fragmented entity nodes")
+    parser.add_argument("--dry-run", action="store_true", help="Preview changes without applying")
     args = parser.parse_args()
     memory_dir = args.memory_dir
     db_path = str(Path(memory_dir) / "knowledge-graph.db")
+    # Entity dedup mode: merge fragmented entity nodes
+    if args.dedup_entities:
+        if not Path(db_path).exists():
+            output_json({"error": "knowledge-graph.db not found"})
+            return
+        result = merge_entity_duplicates(db_path, dry_run=args.dry_run)
+        output_json(result)
+        return
     # Bootstrap mode: seed graph from current playbook
     if args.bootstrap:
         result = _bootstrap_graph(memory_dir, db_path)

package/sinain-memory/__pycache__/common.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/__pycache__/embed_client.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/__pycache__/graph_query.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/__pycache__/triplestore.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/__init__.py DELETED Viewed

File without changes

package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/assertions.py DELETED Viewed

@@ -1,267 +0,0 @@
-"""Behavioral assertion library for sinain-koog tick evaluation.
-Each assertion function validates a runtime invariant of the pipeline.
-Returns ``{"name": str, "passed": bool, "detail": str}``.
-"""
-from __future__ import annotations
-def _result(name: str, passed: bool, detail: str) -> dict:
-    return {"name": name, "passed": passed, "detail": detail}
-# ---------------------------------------------------------------------------
-# Playbook curator assertions
-# ---------------------------------------------------------------------------
-def assert_playbook_under_limit(curator_result: dict, limit: int = 50) -> dict:
-    """Verify playbook body stays under the line limit."""
-    lines = curator_result.get("playbookLines", 0)
-    if lines <= limit:
-        return _result("playbook_under_limit", True, f"body has {lines} lines (limit {limit})")
-    return _result("playbook_under_limit", False, f"body has {lines} lines, exceeds limit of {limit}")
-def assert_curator_respected_directive(curator_result: dict, directive: str) -> dict:
-    """Check that curator changes align with the curate directive."""
-    changes = curator_result.get("changes", {})
-    added = len(changes.get("added", []))
-    pruned = len(changes.get("pruned", []))
-    if directive == "aggressive_prune":
-        # Should have pruned items
-        if pruned > 0:
-            return _result("curator_respected_directive", True,
-                           f"aggressive_prune: pruned {pruned} items")
-        if added == 0 and pruned == 0:
-            return _result("curator_respected_directive", True,
-                           "aggressive_prune: no changes (acceptable if playbook already lean)")
-        return _result("curator_respected_directive", False,
-                       f"aggressive_prune: added {added} but pruned {pruned} — expected pruning")
-    if directive == "stability":
-        # Should not aggressively prune established patterns
-        if pruned > added + 2:
-            return _result("curator_respected_directive", False,
-                           f"stability: pruned {pruned} items (only added {added}) — too aggressive for stability mode")
-        return _result("curator_respected_directive", True,
-                       f"stability: added {added}, pruned {pruned} — conservative")
-    # normal / insufficient_data — any reasonable mix is fine
-    return _result("curator_respected_directive", True,
-                   f"{directive}: added {added}, pruned {pruned}")
-# ---------------------------------------------------------------------------
-# Signal analyzer assertions
-# ---------------------------------------------------------------------------
-def assert_no_repeat_action(signal_result: dict, recent_logs: list[dict], window: int = 3) -> dict:
-    """Verify recommendedAction doesn't repeat the last N ticks' actions."""
-    action = signal_result.get("recommendedAction")
-    if action is None or action.get("action") == "skip":
-        return _result("no_repeat_action", True, "no action recommended (skip/null)")
-    task = (action.get("task") or "").lower().strip()
-    if not task:
-        return _result("no_repeat_action", True, "no task description to compare")
-    # Collect recent action tasks
-    recent_tasks: list[str] = []
-    for log in recent_logs[:window]:
-        log_actions = log.get("actionsConsidered", [])
-        for a in log_actions:
-            if a.get("chosen"):
-                recent_tasks.append((a.get("reason") or a.get("task") or "").lower().strip())
-    # Check for near-duplicate (substring match to catch rephrasing)
-    for prev_task in recent_tasks:
-        if not prev_task:
-            continue
-        # If >60% of words overlap, consider it a repeat
-        task_words = set(task.split())
-        prev_words = set(prev_task.split())
-        if not task_words or not prev_words:
-            continue
-        overlap = len(task_words & prev_words) / max(len(task_words), len(prev_words))
-        if overlap > 0.6:
-            return _result("no_repeat_action", False,
-                           f"action task '{task[:60]}' overlaps with recent '{prev_task[:60]}' ({overlap:.0%} word overlap)")
-    return _result("no_repeat_action", True,
-                   f"action task is distinct from last {window} ticks")
-def assert_signal_confidence_threshold(signal_result: dict, threshold: float = 0.5) -> dict:
-    """Verify actions are only recommended above the confidence threshold."""
-    action = signal_result.get("recommendedAction")
-    if action is None or action.get("action") == "skip":
-        return _result("signal_confidence_threshold", True, "no action recommended")
-    confidence = action.get("confidence")
-    if confidence is None:
-        return _result("signal_confidence_threshold", False,
-                       "action recommended but no confidence value provided")
-    if confidence >= threshold:
-        return _result("signal_confidence_threshold", True,
-                       f"confidence {confidence:.2f} >= threshold {threshold}")
-    return _result("signal_confidence_threshold", False,
-                   f"confidence {confidence:.2f} < threshold {threshold}")
-# ---------------------------------------------------------------------------
-# Insight synthesizer assertions
-# ---------------------------------------------------------------------------
-def assert_insight_char_limit(synth_result: dict, limit: int = 500) -> dict:
-    """Verify suggestion+insight stays under the character limit."""
-    if synth_result.get("skip", False):
-        return _result("insight_char_limit", True, "output skipped")
-    suggestion = synth_result.get("suggestion", "")
-    insight = synth_result.get("insight", "")
-    total = len(suggestion) + len(insight)
-    if total <= limit:
-        return _result("insight_char_limit", True, f"total {total} chars (limit {limit})")
-    return _result("insight_char_limit", False, f"total {total} chars exceeds limit of {limit}")
-def assert_skip_reason_specific(synth_result: dict) -> dict:
-    """If skip=true, verify the reason is specific (not generic boilerplate)."""
-    if not synth_result.get("skip", False):
-        return _result("skip_reason_specific", True, "output not skipped")
-    reason = (synth_result.get("skipReason") or "").strip()
-    if not reason:
-        return _result("skip_reason_specific", False, "skip=true but no skipReason provided")
-    # Check against known-generic patterns
-    generic_phrases = [
-        "no new data",
-        "nothing new",
-        "no updates",
-        "insufficient data",
-        "not enough information",
-        "no changes",
-    ]
-    reason_lower = reason.lower()
-    for phrase in generic_phrases:
-        if reason_lower == phrase or (len(reason_lower) < 30 and phrase in reason_lower):
-            return _result("skip_reason_specific", False,
-                           f"skipReason is too generic: '{reason}'")
-    return _result("skip_reason_specific", True, f"skipReason is specific ({len(reason)} chars)")
-# ---------------------------------------------------------------------------
-# Memory miner assertions
-# ---------------------------------------------------------------------------
-def assert_miner_references_sources(miner_result: dict, daily_files: list[str]) -> dict:
-    """Verify mining findings reference actual source files that were provided."""
-    mined = miner_result.get("minedSources", [])
-    if not mined:
-        return _result("miner_references_sources", True, "no sources mined (early return)")
-    # daily_files contains basenames like "2026-02-21.md"
-    known_basenames = set(daily_files)
-    unknown = [s for s in mined if s not in known_basenames]
-    if unknown:
-        return _result("miner_references_sources", False,
-                       f"minedSources references unknown files: {unknown}")
-    return _result("miner_references_sources", True,
-                   f"all {len(mined)} mined sources are valid")
-# ---------------------------------------------------------------------------
-# Cross-script / structural assertions
-# ---------------------------------------------------------------------------
-def assert_schema_valid(script_name: str, output: dict, schema_errors: list[str]) -> dict:
-    """Wrap schema validation result as an assertion."""
-    if not schema_errors:
-        return _result(f"schema_valid_{script_name}", True, "output matches schema")
-    return _result(f"schema_valid_{script_name}", False,
-                   f"{len(schema_errors)} schema errors: {'; '.join(schema_errors[:3])}")
-def assert_playbook_header_footer_intact(playbook_text: str) -> dict:
-    """Verify the playbook still has its mining-index header and effectiveness footer."""
-    has_header = "<!-- mining-index:" in playbook_text
-    has_footer = "<!-- effectiveness:" in playbook_text
-    if has_header and has_footer:
-        return _result("playbook_header_footer_intact", True,
-                       "both mining-index and effectiveness comments present")
-    missing = []
-    if not has_header:
-        missing.append("mining-index")
-    if not has_footer:
-        missing.append("effectiveness")
-    return _result("playbook_header_footer_intact", False,
-                   f"missing playbook comments: {', '.join(missing)}")
-# ---------------------------------------------------------------------------
-# Runner: execute all applicable assertions for a tick
-# ---------------------------------------------------------------------------
-def run_tick_assertions(
-    log_entry: dict,
-    recent_logs: list[dict],
-    playbook_text: str,
-    daily_files: list[str],
-) -> list[dict]:
-    """Run all applicable assertions against a single tick's log entry.
-    Returns a list of assertion result dicts.
-    """
-    results: list[dict] = []
-    # Signal analyzer assertions
-    signals = log_entry.get("signals")
-    if signals is not None:
-        results.append(assert_signal_confidence_threshold(
-            {"signals": signals, "recommendedAction": log_entry.get("recommendedAction")},
-        ))
-        results.append(assert_no_repeat_action(
-            {"signals": signals, "recommendedAction": log_entry.get("recommendedAction")},
-            recent_logs,
-        ))
-    # Curator assertions — playbookChanges can be {"note": "skipped"} or full output
-    curator = log_entry.get("playbookChanges")
-    if isinstance(curator, dict) and "changes" in curator:
-        curator_with_lines = {**curator}
-        if "playbookLines" not in curator_with_lines:
-            curator_with_lines["playbookLines"] = curator.get("playbookLines", 0)
-        results.append(assert_playbook_under_limit(curator_with_lines))
-        directive = log_entry.get("curateDirective", "normal")
-        results.append(assert_curator_respected_directive(curator_with_lines, directive))
-    # Insight synthesizer assertions — output can be null (pipeline-level skip)
-    output = log_entry.get("output")
-    if isinstance(output, dict):
-        results.append(assert_insight_char_limit(output))
-        results.append(assert_skip_reason_specific(output))
-    # Mining assertions — log uses miningFindings (str) and minedSources (list)
-    mining = log_entry.get("miningResult")
-    if mining is not None:
-        results.append(assert_miner_references_sources(mining, daily_files))
-    elif log_entry.get("minedSources"):
-        # Reconstruct mining result from flat log fields
-        results.append(assert_miner_references_sources(
-            {"minedSources": log_entry.get("minedSources", [])}, daily_files
-        ))
-    # Playbook health (if we have playbook text)
-    if playbook_text:
-        results.append(assert_playbook_header_footer_intact(playbook_text))
-    return results

package/sinain-memory/eval/benchmarks/__init__.py DELETED Viewed

File without changes

package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc DELETED Viewed

Binary file

package/sinain-memory/eval/benchmarks/base_adapter.py DELETED Viewed

@@ -1,43 +0,0 @@
-"""Base adapter and data classes for benchmark evaluation."""
-from __future__ import annotations
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-@dataclass
-class BenchmarkQuestion:
-    id: str
-    text: str
-    gold_answer: str
-    category: str  # single-session, multi-session, temporal, etc.
-    evidence_session_ids: list[str] = field(default_factory=list)
-    metadata: dict = field(default_factory=dict)
-@dataclass
-class BenchmarkInstance:
-    """A set of conversations + questions that share the same context."""
-    id: str
-    sessions: list[list[dict]]  # list of sessions, each a list of feed items {source, text, ts}
-    questions: list[BenchmarkQuestion] = field(default_factory=list)
-    raw_sessions: list[dict] = field(default_factory=list)  # original benchmark format (for full-context condition)
-    metadata: dict = field(default_factory=dict)
-class BenchmarkAdapter(ABC):
-    """Abstract adapter: converts a published benchmark into sinain's format."""
-    @property
-    @abstractmethod
-    def name(self) -> str:
-        """Benchmark name (e.g. 'longmemeval', 'locomo')."""
-    @abstractmethod
-    def load_dataset(self, data_dir: str) -> list[BenchmarkInstance]:
-        """Download (if needed) and parse the benchmark dataset."""
-    @abstractmethod
-    def format_full_context(self, instance: BenchmarkInstance) -> str:
-        """Render the full conversation history as a text string for the baseline condition."""

package/sinain-memory/eval/benchmarks/config.py DELETED Viewed

@@ -1,23 +0,0 @@
-"""Benchmark configuration — models, paths, thresholds."""
-from pathlib import Path
-BENCHMARKS_DIR = Path(__file__).resolve().parent
-DATA_DIR = BENCHMARKS_DIR / "data"
-RESULTS_DIR = BENCHMARKS_DIR / "results"
-# LLM models (via OpenRouter)
-QA_MODEL = "google/gemini-2.5-flash"
-JUDGE_MODEL = "openai/gpt-4o"
-# Retrieval
-K_VALUES = [1, 3, 5, 10]
-MAX_FACTS_PER_QUERY = 10
-# Ingestion
-DISTILLER_TIMEOUT_S = 30
-INTEGRATOR_TIMEOUT_S = 60
-# Dataset URLs
-LONGMEMEVAL_HF = "xiaowu0162/longmemeval-cleaned"
-LOCOMO_GITHUB = "https://raw.githubusercontent.com/snap-research/locomo/main/data/locomo10.json"