npm - @agentikos/omega-os - Versions diffs - 0.1.0 → 0.2.0 - Mend

@agentikos/omega-os 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/omega/Agentik_Engine/tests/test_rag.py ADDED Viewed

@@ -0,0 +1,287 @@
+"""Tests for the multi-RAG subsystem — five real retrievers + router.
+Every retriever is exercised end-to-end on real data:
+* HybridRetriever indexes a small corpus into a temp SQLite WAL store and
+  the right doc surfaces for a known-good query.
+* GraphRetriever adds typed edges and asserts depth-limited expansion.
+* AgenticRetriever multi-hops on top of HybridRetriever, terminates within
+  `max_hops`, and accumulates docs without duplicates.
+* CorrectiveRetriever exercises the refine-on-low-score path (the
+  MockProvider returns 40 then 90, so a retry MUST happen).
+* RAGRouter classifies, picks an inner strategy, wraps it in CRAG, and
+  returns a `RetrievalResult` with a strategy string that names the pick.
+Standalone runner: `python3 tests/test_rag.py`. Temp dir cleans up at end.
+"""
+from __future__ import annotations
+import shutil
+import sys
+import tempfile
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+from omega_engine.provider import MockProvider                     # noqa: E402
+from omega_engine.rag import (                                    # noqa: E402
+    AgenticRetriever,
+    CorrectiveRetriever,
+    Document,
+    GraphRetriever,
+    HybridRetriever,
+    RAGRouter,
+    RetrievalResult,
+)
+# Three docs whose lexical and semantic separation is clear enough that a
+# simple BM25+cosine blend should rank the right one first for a focused query.
+_CORPUS: list[Document] = [
+    Document(
+        id="payments",
+        text=("The pricing service computes invoice totals and applies "
+              "promotional discounts before sending receipts."),
+        metadata={"topic": "billing"},
+    ),
+    Document(
+        id="auth",
+        text=("The authentication module validates JWT tokens, manages "
+              "session refresh, and enforces role-based access control."),
+        metadata={"topic": "auth"},
+    ),
+    Document(
+        id="telemetry",
+        text=("The telemetry layer emits structured events to the SQLite "
+              "WAL store and exposes a Prometheus endpoint."),
+        metadata={"topic": "ops"},
+    ),
+]
+def _hybrid(tmp_dir: Path, alpha: float = 0.5) -> HybridRetriever:
+    """Helper — build a HybridRetriever with the corpus already indexed."""
+    h = HybridRetriever(tmp_dir / "rag.db", alpha=alpha)
+    n = h.index(_CORPUS)
+    assert n == len(_CORPUS), f"indexed {n}, expected {len(_CORPUS)}"
+    return h
+def test_hybrid_surfaces_the_right_doc():
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    h = _hybrid(tmp)
+    try:
+        result = h.retrieve("how is the JWT session refresh handled?", k=3)
+        assert isinstance(result, RetrievalResult)
+        assert result.strategy == "hybrid"
+        assert result.documents, "expected at least one document"
+        # The auth doc is the only one that talks about JWT/session.
+        top_ids = [d.id for d in result.documents]
+        assert top_ids[0] == "auth", f"got {top_ids}"
+        # And every result carries its blended score in metadata.
+        assert "score" in result.documents[0].metadata
+        # k=3 means we should see up to 3 — never more.
+        assert len(result.documents) <= 3
+    finally:
+        h.close()
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_hybrid_dense_and_sparse_both_contribute():
+    """At alpha=1 (dense only) and alpha=0 (sparse only) the top doc for an
+    auth-flavoured query stays the auth doc — both legs are real.
+    Uses dim=4096 because the hashing-trick collision rate at the default
+    dim=256 is too high for short documents to reliably separate.  In a
+    real corpus you'd pick dim per the expected vocabulary size; for a
+    3-doc test we want headroom.
+    """
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    try:
+        for a in (0.0, 1.0):
+            h = HybridRetriever(tmp / f"rag_{a}.db", alpha=a, dim=4096)
+            h.index(_CORPUS)
+            r = h.retrieve("JWT token validation", k=1)
+            assert r.documents, f"alpha={a}: empty result"
+            assert r.documents[0].id == "auth", (
+                f"alpha={a}: top doc was {r.documents[0].id}"
+            )
+            h.close()
+    finally:
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_graph_expansion_and_persistence():
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    try:
+        g = GraphRetriever(tmp / "graph.json")
+        g.add_node("auth", text="authentication service",
+                   metadata={"owner": "alice"})
+        g.add_node("payments", text="payments service",
+                   metadata={"owner": "bob"})
+        g.add_node("billing", text="billing dashboard")
+        g.add_node("telemetry", text="metrics + tracing")
+        g.add_edge("auth", "payments", "depends_on")
+        g.add_edge("payments", "billing", "renders_into")
+        g.add_edge("billing", "telemetry", "emits_to")
+        # Depth 1 from auth = direct neighbours only.
+        n1 = g.neighbors("auth", depth=1)
+        assert "payments" in n1 and "billing" not in n1, n1
+        # Depth 2 reaches one more hop.
+        n2 = g.neighbors("auth", depth=2)
+        assert "billing" in n2 and "telemetry" not in n2, n2
+        # Depth 3 spans the whole graph.
+        n3 = g.neighbors("auth", depth=3)
+        assert {"payments", "billing", "telemetry"}.issubset(set(n3)), n3
+        # Query path: seed picks `auth` (token match), depth=2 expansion.
+        r = g.retrieve("what does auth depend on?", k=4, depth=2)
+        ids = [d.id for d in r.documents]
+        assert "auth" in ids and "payments" in ids, ids
+        assert r.strategy == "graph"
+        # Persistence: a fresh GraphRetriever on the same path should see
+        # every edge we just wrote.
+        g2 = GraphRetriever(tmp / "graph.json")
+        assert "billing" in g2.neighbors("payments", depth=1)
+    finally:
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_agentic_multihop_terminates():
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    h = _hybrid(tmp)
+    try:
+        provider = MockProvider()  # rag-agent terminates after 2 calls
+        a = AgenticRetriever(h, provider, max_hops=3, k_per_hop=2)
+        r = a.retrieve("explain authentication", k=4)
+        assert r.strategy == "agentic"
+        assert r.documents, "agentic returned no docs"
+        # Hop metadata stamped on every doc.
+        hops = {d.metadata.get("hop") for d in r.documents}
+        # Must have at least one hop and never more than max_hops - 1
+        # (since hop indices are 0-based).
+        assert hops, "no hop metadata"
+        assert max(hops) <= 2, f"hops={hops}"
+        # De-duplication: every id appears once.
+        ids = [d.id for d in r.documents]
+        assert len(ids) == len(set(ids)), f"duplicates in {ids}"
+    finally:
+        h.close()
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_corrective_grades_and_retries_on_low_score():
+    """MockProvider returns 40 (below threshold) then 90 — the Corrective
+    envelope MUST call the inner retriever twice."""
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    h = _hybrid(tmp)
+    try:
+        provider = MockProvider()
+        c = CorrectiveRetriever(h, provider, threshold=70.0, max_retries=2)
+        r = c.retrieve("auth", k=3)
+        assert r.documents, "corrective returned no docs"
+        assert r.strategy.startswith("corrective+"), r.strategy
+        # The grader stamps `grader_avg` on each document; final value must
+        # be the SECOND call's score (90), proving the retry happened.
+        avgs = {d.metadata.get("grader_avg") for d in r.documents}
+        assert avgs == {90.0}, f"grader_avg={avgs} (retry never fired?)"
+        # Provider call counter is the most direct proof.
+        assert provider._grader_calls == 2, provider._grader_calls
+    finally:
+        h.close()
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_corrective_settles_quickly_when_score_is_already_high():
+    """If the first grade is already above threshold, no retry happens."""
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    h = _hybrid(tmp)
+    try:
+        provider = MockProvider()
+        # threshold below the FIRST grade (40) → corrective is satisfied
+        # on the first try and never refines.
+        c = CorrectiveRetriever(h, provider, threshold=30.0, max_retries=2)
+        r = c.retrieve("auth", k=3)
+        assert r.documents
+        assert provider._grader_calls == 1, provider._grader_calls
+        assert r.strategy.startswith("corrective+"), r.strategy
+    finally:
+        h.close()
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_router_classifies_and_wraps_in_corrective():
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    h = _hybrid(tmp)
+    try:
+        g = GraphRetriever(tmp / "graph.json")
+        g.add_edge("auth", "payments", "depends_on")
+        provider = MockProvider()
+        router = RAGRouter(
+            strategies={"hybrid": h, "graph": g},
+            provider=provider,
+            default="hybrid",
+            threshold=70.0,
+            max_retries=2,
+        )
+        r = router.retrieve("JWT session refresh", k=3)
+        assert isinstance(r, RetrievalResult)
+        assert r.documents, "router returned no docs"
+        # Strategy string names the inner pick AND the corrective wrap.
+        assert r.strategy.startswith("router(")
+        assert "corrective+" in r.strategy, r.strategy
+        # Provider override returned "hybrid", and the heuristic also
+        # defaults to hybrid for this query — so the inner pick is hybrid.
+        assert "router(hybrid)" in r.strategy, r.strategy
+    finally:
+        h.close()
+        shutil.rmtree(tmp, ignore_errors=True)
+def test_router_heuristic_picks_graph_for_relational_query():
+    tmp = Path(tempfile.mkdtemp(prefix="omega_rag_"))
+    h = _hybrid(tmp)
+    try:
+        g = GraphRetriever(tmp / "graph.json")
+        g.add_edge("auth", "payments", "depends_on")
+        # Stub provider that returns no override → heuristic decides.
+        class HeuristicOnlyProvider(MockProvider):
+            def run(self, req):  # type: ignore[override]
+                if req.role == "rag-route":
+                    # Return empty artifact so router falls back to heuristic.
+                    from omega_engine.provider import AgentResult
+                    return AgentResult(text="no override", claimed_done=True,
+                                       artifacts={})
+                return super().run(req)
+        provider = HeuristicOnlyProvider()
+        router = RAGRouter(
+            strategies={"hybrid": h, "graph": g},
+            provider=provider,
+            corrective=False,    # disable corrective for cleaner assertion
+        )
+        # Query has "depend" and "between" — graph heuristic must fire.
+        chosen = router.classify("what does auth depend on?")
+        assert chosen == "graph", chosen
+    finally:
+        h.close()
+        shutil.rmtree(tmp, ignore_errors=True)
+def _run_all() -> bool:
+    tests = [v for k, v in sorted(globals().items())
+             if k.startswith("test_") and callable(v)]
+    passed = 0
+    for t in tests:
+        t()
+        print(f"  PASS  {t.__name__}")
+        passed += 1
+    print(f"\n{passed}/{len(tests)} rag tests passed")
+    return passed == len(tests)
+if __name__ == "__main__":
+    sys.exit(0 if _run_all() else 1)

package/omega/Agentik_Engine/tests/test_snapshot_partial.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""Snapshotting (bounded reduction) + PARTIAL policy (per-topology join handling).
+Standalone: python3 tests/test_snapshot_partial.py
+"""
+import os
+import sys
+import tempfile
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+from omega_engine.audit import AuditGate                        # noqa: E402
+from omega_engine.bus import EventBus                           # noqa: E402
+from omega_engine.events import Event, EventType                # noqa: E402
+from omega_engine.executor import Executor                      # noqa: E402
+from omega_engine.provider import AgentRequest, AgentResult     # noqa: E402
+from omega_engine.reducer import reduce_task, reduce_task_fast  # noqa: E402
+from omega_engine.router import ModelRouter                     # noqa: E402
+from omega_engine.store import SQLiteStore                      # noqa: E402
+from omega_engine.task import Kind, TaskState                   # noqa: E402
+# ----- snapshotting ---------------------------------------------------------
+def test_snapshot_round_trip():
+    """A snapshot captures the reduced state; latest_snapshot returns it."""
+    db = tempfile.mktemp(suffix=".db")
+    store = SQLiteStore(db)
+    for et in (EventType.CREATED, EventType.DISPATCHED, EventType.STARTED):
+        store.append(Event(task_id="t1", type=et))
+    snap = store.snapshot("t1")
+    assert snap is not None
+    assert snap["state"] is TaskState.RUNNING
+    again = store.latest_snapshot("t1")
+    assert again is not None and again["state"] is TaskState.RUNNING
+    store.close()
+    os.remove(db)
+def test_snapshot_makes_reduce_correct_and_short():
+    """reduce_task_fast skips events covered by the snapshot."""
+    db = tempfile.mktemp(suffix=".db")
+    store = SQLiteStore(db)
+    seq = [EventType.CREATED, EventType.DISPATCHED, EventType.STARTED]
+    for et in seq:
+        store.append(Event(task_id="t1", type=et))
+    store.snapshot("t1")  # state == RUNNING, captured at the 3rd event
+    # 0 events after the snapshot -> fast path returns the snapshot state
+    assert reduce_task_fast(store, "t1") is TaskState.RUNNING
+    # add more events; fast == full
+    for et in (EventType.CLAIMED_DONE, EventType.VERIFYING, EventType.VERIFIED,
+               EventType.COMPLETED):
+        store.append(Event(task_id="t1", type=et))
+    full = reduce_task(store.events_for("t1"))
+    fast = reduce_task_fast(store, "t1")
+    assert full is TaskState.COMPLETED
+    assert fast is TaskState.COMPLETED
+    # only events AFTER the snapshot should be folded by the fast path
+    assert len(store.events_since_snapshot("t1")) == 4
+    store.close()
+    os.remove(db)
+def test_snapshot_on_unknown_task_returns_none():
+    db = tempfile.mktemp(suffix=".db")
+    store = SQLiteStore(db)
+    assert store.snapshot("ghost") is None
+    assert store.latest_snapshot("ghost") is None
+    store.close()
+    os.remove(db)
+# ----- PARTIAL policy -------------------------------------------------------
+class _PartialProvider:
+    """3-worker plan; selected indices fail their runtime audit (verify_cmd=false)."""
+    id = "test-partial"
+    def __init__(self, fail_indices) -> None:
+        self._fail = set(fail_indices)
+    def run(self, req: AgentRequest) -> AgentResult:
+        if req.role in ("oracle", "manager", "aisb"):
+            plan = []
+            for i in range(3):
+                cmd = "false" if i in self._fail else "true"
+                plan.append({"role": "worker",
+                             "spec": {"task": f"t{i}", "verify_cmd": cmd}})
+            return AgentResult(text="planned", claimed_done=True, plan=plan)
+        if req.role == "worker":
+            return AgentResult(
+                text="done", claimed_done=True,
+                artifacts={"files": ["x.py"], "summary": "done"},
+            )
+        if req.role in ("verifier", "audit"):
+            return AgentResult(
+                text="ok", claimed_done=True,
+                artifacts={"verdict": {"score": 95, "verified": True,
+                                       "confidence": "high",
+                                       "summary": "ok",
+                                       "findings": [], "fix_plan": []}})
+        return AgentResult(text="ok", claimed_done=True)
+def _engine(provider, partial_policy="fail_up"):
+    db = tempfile.mktemp(suffix=".db")
+    store = SQLiteStore(db)
+    bus = EventBus(store)
+    router = ModelRouter.single(provider)
+    executor = Executor(store, bus, router, AuditGate(),
+                        partial_policy=partial_policy)
+    return store, executor, db
+def test_partial_fail_up_default():
+    """Default policy: a PARTIAL scope fails the dispatcher."""
+    store, ex, db = _engine(_PartialProvider(fail_indices=[1]))
+    result = ex.run_mission("partial mission")
+    assert result.final_state is TaskState.FAILED, result.final_state
+    store.close(); os.remove(db)
+def test_partial_accept_partial():
+    """accept_partial: the dispatcher completes despite a failed child."""
+    store, ex, db = _engine(_PartialProvider(fail_indices=[1]),
+                            partial_policy="accept_partial")
+    result = ex.run_mission("partial mission")
+    assert result.final_state is TaskState.COMPLETED, result.final_state
+    store.close(); os.remove(db)
+def test_partial_retry_failed_spawns_extra_children():
+    """retry_failed: a replacement task is spawned for each failed child."""
+    store, ex, db = _engine(_PartialProvider(fail_indices=[1]),
+                            partial_policy="retry_failed")
+    result = ex.run_mission("partial mission")
+    workers = [t for t in result.tasks.values() if t.kind is Kind.EXECUTOR]
+    # original 3 + 1 retry replacement = 4 worker tasks (the failing index
+    # was retried once; under this provider it fails again, so the final
+    # state is FAILED — but the retry attempt is observable)
+    assert len(workers) == 4, f"retry did not spawn a replacement: {len(workers)}"
+    assert result.final_state is TaskState.FAILED  # retry exhausted -> fail_up
+    store.close(); os.remove(db)
+def test_unknown_partial_policy_raises():
+    raised = False
+    try:
+        _engine(_PartialProvider([]), partial_policy="ignore")
+    except ValueError:
+        raised = True
+    assert raised
+def _run_all() -> bool:
+    tests = [v for k, v in sorted(globals().items())
+             if k.startswith("test_") and callable(v)]
+    passed = 0
+    for t in tests:
+        t()
+        print(f"  PASS  {t.__name__}")
+        passed += 1
+    print(f"\n{passed}/{len(tests)} snapshot+partial tests passed")
+    return passed == len(tests)
+if __name__ == "__main__":
+    sys.exit(0 if _run_all() else 1)