PyPI - flowscript-agents - Versions diffs - 0.4.0__tar.gz → 0.4.1__tar.gz - Mend

flowscript-agents 0.4.0tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flowscript-agents
-Version: 0.4.0
+Version: 0.4.1
 Summary: Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI.
 Project-URL: Homepage, https://flowscript.org
 Project-URL: Repository, https://github.com/phillipclapham/flowscript-agents

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/__init__.py RENAMED Viewed

@@ -47,7 +47,7 @@ from .memory import (
 from .unified import UnifiedMemory
 from .explain import explain, explain_counterfactual
-__version__ = "0.4.0"
+__version__ = "0.4.1"
 __all__ = [
     "explain",
     "explain_counterfactual",

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/continuity.py RENAMED Viewed

@@ -39,6 +39,16 @@ def _log(msg: str) -> None:
     sys.stderr.flush()
+# Matches graduated patterns (2x or 3x) with [evidence: <id> "explanation"] citations.
+# Captures: (1) level, (2) date, (3) cited IDs, (4) optional explanation in quotes.
+# Used by _validate_graduations to verify citations against actual session nodes.
+_GRADUATION_RE = re.compile(
+    r'\|\s*([23])x\s*\((\d{4}-\d{2}-\d{2})\)\s*\[evidence:\s*'
+    r'([a-fA-F0-9][a-fA-F0-9, ]*)'  # one or more hex IDs
+    r'(?:\s+"([^"]*)")?\s*\]'  # optional quoted explanation
+)
 # =============================================================================
 # Result types
 # =============================================================================
@@ -54,6 +64,9 @@ class ContinuityResult:
     truncated: bool  # whether LLM output exceeded max_chars
     session_nodes_count: int  # how many nodes were in this session
     patterns_extracted: int  # estimated from output (best-effort)
+    graduations_validated: int = 0  # citations that checked out
+    graduations_demoted: int = 0  # citations that failed → demoted
+    citation_reuse_max: int = 0  # max times any single node was cited (>2 = suspicious)
 # =============================================================================
@@ -133,8 +146,21 @@ This is where learning happens. Use these markers for density:
 **Temporal graduation (CRITICAL — this is what makes the system learn):**
 - Mark each pattern with `| Nx (date)` where N = validation count, date = last validated
 - New observation from THIS session not in existing patterns → add at `| 1x ({today})`
-- Observation that VALIDATES an existing 1x pattern → increment to `| 2x ({today})`
-- Observation that VALIDATES an existing 2x pattern → graduate to `| 3x ({today})`
+- Observation that VALIDATES an existing 1x pattern → increment to:
+  `| 2x ({today}) [evidence: <node_id> "brief explanation of how node validates pattern"]`
+  where `<node_id>` is the 8-char ID prefix (e.g., `abc12345`) from the session data above.
+  The explanation MUST reference specific content from the cited node.
+- Observation that VALIDATES an existing 2x pattern → graduate to:
+  `| 3x ({today}) [evidence: <node_id> "explanation"]`
+- Evidence citations with explanations are REQUIRED for all graduations (2x and 3x).
+  Cite the specific session node AND explain how it validates the pattern.
+  Without a valid citation, the graduation will be rejected.
+- For patterns you are NOT graduating (carrying forward at the same level), drop the
+  `[evidence:]` tag — evidence only appears on the graduation that created it.
+- Patterns marked `(ungrounded)` were demoted in a previous session due to invalid evidence.
+  They need FRESH validating evidence from THIS session to be re-graduated. Do not re-graduate
+  without new evidence — remove the `(ungrounded)` marker only when providing a valid citation.
+  Patterns marked `(ungrounded)` that you cannot provide fresh evidence for should be removed.
 - Patterns at 3x: extract the PRINCIPLE underneath, not the surface observations.
   Multiple related observations → single meta-pattern. This is compression-as-cognition.
 - Patterns with dates older than 7 days and no new validation → remove (they're stale)
@@ -144,7 +170,7 @@ Group related patterns in FlowScript blocks: `{{topic: ... }}`
 **Example Patterns section:**
 ```
 {{database_architecture:
-  thought: ACID compliance outweighs raw speed for financial data | 2x (2026-03-30)
+  thought: ACID compliance outweighs raw speed | 2x (2026-03-30) [evidence: 4931b6a8 "PostgreSQL chosen for ACID compliance"]
   thought: connection pooling is the real performance bottleneck | 1x (2026-03-30)
   ? horizontal scaling strategy ><[single-writer vs multi-writer] | 1x (2026-03-29)
 }}
@@ -304,6 +330,7 @@ class ContinuityManager:
         self,
         memory: Any,
         existing_continuity: str | None = None,
+        citations_seen: bool = False,
     ) -> ContinuityResult:
         """Produce a compressed continuity file from session memory.
@@ -311,6 +338,7 @@ class ContinuityManager:
             memory: A Memory instance containing the session's nodes.
             existing_continuity: The current continuity file text (if any).
                                  Pass None for first session.
+            citations_seen: If True, enforces citation requirement (fail-safe sunset).
         Returns:
             ContinuityResult with the compressed continuity text and metadata.
@@ -325,7 +353,8 @@ class ContinuityManager:
         temporal_map = dict(memory._temporal_map)
         return self.produce_from_nodes(
-            nodes, relationships, states, existing_continuity, temporal_map
+            nodes, relationships, states, existing_continuity, temporal_map,
+            citations_seen=citations_seen,
         )
     def produce_from_nodes(
@@ -335,12 +364,20 @@ class ContinuityManager:
         states: list[Any],
         existing_continuity: str | None = None,
         temporal_map: dict[str, Any] | None = None,
+        citations_seen: bool = False,
     ) -> ContinuityResult:
         """Produce continuity from raw node lists (alternative to Memory instance).
         Useful when you have nodes but not a full Memory object, e.g.,
         from a filtered set or from deserialized data.
+        Args:
+            citations_seen: If True, enforces citation requirement on all today's
+                           graduations. Set from metadata after first successful citation.
         """
+        import datetime
+        today = datetime.date.today().isoformat()
         session_summary = _format_session_nodes(
             nodes, relationships, states, temporal_map
         )
@@ -350,6 +387,7 @@ class ContinuityManager:
             existing_continuity=existing_continuity,
             project_name=self._project_name,
             max_chars=self._max_chars,
+            today=today,
         )
         _log(f"Producing continuity ({len(nodes)} nodes, max {self._max_chars} chars)")
@@ -374,6 +412,25 @@ class ContinuityManager:
             # (first session, something is better than nothing)
             _log("WARNING: No existing continuity to fall back to — using LLM output as-is")
+        # Validate graduation citations against actual session nodes.
+        # Only checks citations from today (carried-forward patterns are trusted).
+        valid_ids = {n.id[:8].lower() for n in nodes}
+        node_content_map = {n.id[:8].lower(): n.content for n in nodes}
+        text, grad_validated, grad_demoted, reuse_max = self._validate_graduations(
+            text, valid_ids, today=today, node_content_map=node_content_map,
+            citations_seen=citations_seen,
+        )
+        if grad_demoted:
+            _log(
+                f"Graduation validation: {grad_validated} validated, "
+                f"{grad_demoted} demoted (ungrounded)"
+            )
+        if reuse_max > 2:
+            _log(
+                f"Graduation warning: single node cited {reuse_max} times "
+                f"(possible citation gaming)"
+            )
         truncated = False
         if len(text) > self._max_chars:
             truncated = True
@@ -390,6 +447,9 @@ class ContinuityManager:
             truncated=truncated,
             session_nodes_count=len(nodes),
             patterns_extracted=patterns_extracted,
+            graduations_validated=grad_validated,
+            graduations_demoted=grad_demoted,
+            citation_reuse_max=reuse_max,
         )
     # -- File I/O --
@@ -404,6 +464,54 @@ class ContinuityManager:
         p = Path(memory_path)
         return str(p.parent / f"{p.stem}.continuity.md")
+    @staticmethod
+    def meta_path(memory_path: str) -> str:
+        """Get the metadata sidecar path. ./agent.json → ./agent.continuity.meta.json"""
+        p = Path(memory_path)
+        return str(p.parent / f"{p.stem}.continuity.meta.json")
+    @staticmethod
+    def load_meta(memory_path: str) -> dict:
+        """Load continuity metadata from the JSON sidecar.
+        Returns a dict with keys: sessions_produced, citations_seen, format_version.
+        Returns defaults if the file doesn't exist.
+        """
+        import json
+        path = ContinuityManager.meta_path(memory_path)
+        defaults = {"sessions_produced": 0, "citations_seen": False, "format_version": 1}
+        if not os.path.exists(path):
+            return defaults
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            # Merge with defaults for forward compatibility
+            return {**defaults, **data}
+        except (json.JSONDecodeError, OSError):
+            _log(f"WARNING: corrupt continuity meta at {path} — using defaults")
+            return defaults
+    @staticmethod
+    def save_meta(meta: dict, memory_path: str) -> str:
+        """Save continuity metadata to the JSON sidecar. Atomic write."""
+        import json
+        path = ContinuityManager.meta_path(memory_path)
+        tmp_path = path + ".tmp"
+        try:
+            with open(tmp_path, "w", encoding="utf-8") as f:
+                json.dump(meta, f, indent=2, sort_keys=True)
+                f.write("\n")
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(tmp_path, path)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
+        return path
     # -- Validation --
     _REQUIRED_SECTIONS = {"state", "patterns", "decisions", "context"}
@@ -424,6 +532,166 @@ class ContinuityManager:
                         found.add(section)
         return found == cls._REQUIRED_SECTIONS
+    # Minimum meaningful words for explanation-to-node content overlap check.
+    # Short/common words are excluded to avoid false positives.
+    _STOP_WORDS = frozenset(
+        "a an the is are was were be been being have has had do does did "
+        "will would shall should may might can could this that these those "
+        "it its he she they we you i me my our his her their in on at to "
+        "for of by with from and or but not no nor so if as".split()
+    )
+    # Matches bare graduations (2x or 3x) WITHOUT [evidence:] tags.
+    # Used to enforce citation requirement after fail-safe sunset.
+    _BARE_GRADUATION_RE = re.compile(
+        r"\|\s*([23])x\s*\((\d{4}-\d{2}-\d{2})\)\s*(?!\[evidence:)"
+    )
+    @staticmethod
+    def _validate_graduations(
+        text: str,
+        valid_ids: set[str],
+        today: str | None = None,
+        node_content_map: dict[str, str] | None = None,
+        citations_seen: bool = False,
+    ) -> tuple[str, int, int, int]:
+        """Validate evidence citations on graduated patterns.
+        Scans the ## Patterns section for 2x/3x lines with [evidence: <id> "explanation"].
+        Only validates citations whose date matches today (newly graduated this
+        session). Carried-forward patterns from previous sessions pass through
+        unchanged — their evidence was valid when originally graduated.
+        Validation checks (all must pass for a citation to be accepted):
+        1. At least one cited ID exists in the current session's node set
+        2. If an explanation is provided and node_content_map is available,
+           the explanation must reference actual content from the cited node
+           (word overlap check — prevents citation of irrelevant nodes)
+        If validation fails, demotes the graduation (3x→2x, 2x→1x).
+        Fail-safe sunset: when citations_seen=True, today's graduations WITHOUT
+        [evidence:] tags are also demoted. Before citations_seen, they pass through
+        (migration grace period). Once the LLM demonstrates citation ability, it
+        must always cite.
+        Returns:
+            (possibly_modified_text, validated_count, demoted_count, citation_reuse_max)
+        """
+        if today is None:
+            import datetime
+            today = datetime.date.today().isoformat()
+        lines = text.split("\n")
+        in_patterns = False
+        validated = 0
+        demoted = 0
+        citation_counts: dict[str, int] = {}  # track per-node citation frequency
+        for i, line in enumerate(lines):
+            # Track section boundaries (substring match, consistent with _validate_structure)
+            if line.startswith("## "):
+                in_patterns = "pattern" in line.lower()
+                continue
+            if not in_patterns:
+                continue
+            match = _GRADUATION_RE.search(line)
+            if match:
+                level = int(match.group(1))  # 2 or 3
+                date_str = match.group(2)    # YYYY-MM-DD
+                cited_raw = match.group(3)
+                explanation = match.group(4)  # may be None if no quotes
+                # Only validate citations from THIS session (today's date).
+                # Carried-forward patterns retain their evidence unchecked.
+                if date_str != today:
+                    continue
+                # Normalize cited IDs: lowercase, truncate to 8 chars, filter empties
+                cited_ids = {
+                    cid.strip().lower()[:8]
+                    for cid in re.split(r"[,\s]+", cited_raw)
+                    if cid.strip()
+                }
+                # Track citation frequency
+                for cid in cited_ids & valid_ids:
+                    citation_counts[cid] = citation_counts.get(cid, 0) + 1
+                # Check 1: at least one cited ID exists in session nodes
+                ids_valid = bool(cited_ids & valid_ids)
+                # Check 2: explanation references cited node content (if available)
+                explanation_valid = True
+                if ids_valid and explanation and node_content_map:
+                    matched_id = next(iter(cited_ids & valid_ids))
+                    node_content = node_content_map.get(matched_id, "")
+                    if node_content:
+                        explanation_valid = ContinuityManager._check_explanation_overlap(
+                            explanation, node_content
+                        )
+                if ids_valid and explanation_valid:
+                    validated += 1
+                else:
+                    demoted += 1
+                    demoted_level = level - 1
+                    old_marker = match.group(0)
+                    new_marker = old_marker.replace(
+                        f"| {level}x", f"| {demoted_level}x"
+                    )
+                    new_marker = re.sub(
+                        r'\[evidence:\s*[a-fA-F0-9][a-fA-F0-9, ]*(?:\s+"[^"]*")?\s*\]',
+                        "(ungrounded)", new_marker
+                    )
+                    lines[i] = line.replace(old_marker, new_marker)
+                continue
+            # Fail-safe sunset: once the LLM has demonstrated citation ability,
+            # today's graduations WITHOUT [evidence:] are demoted.
+            if not citations_seen:
+                continue
+            bare_match = ContinuityManager._BARE_GRADUATION_RE.search(line)
+            if not bare_match:
+                continue
+            bare_level = int(bare_match.group(1))
+            bare_date = bare_match.group(2)
+            if bare_date != today:
+                continue
+            demoted += 1
+            demoted_level = bare_level - 1
+            old_marker = bare_match.group(0)
+            new_marker = old_marker.replace(
+                f"| {bare_level}x", f"| {demoted_level}x"
+            )
+            lines[i] = line.replace(old_marker, new_marker + " (needs-evidence)")
+        reuse_max = max(citation_counts.values()) if citation_counts else 0
+        return "\n".join(lines), validated, demoted, reuse_max
+    @classmethod
+    def _check_explanation_overlap(cls, explanation: str, node_content: str) -> bool:
+        """Check if an explanation references actual content from the cited node.
+        Uses word overlap (excluding stop words). At least one meaningful word
+        from the explanation must appear in the node content. This prevents
+        generic explanations like "confirms pattern" while allowing legitimate
+        paraphrasing.
+        """
+        def meaningful_words(text: str) -> set[str]:
+            return {
+                w for w in re.split(r"[^a-zA-Z0-9]+", text.lower())
+                if len(w) > 2 and w not in cls._STOP_WORDS
+            }
+        explanation_words = meaningful_words(explanation)
+        node_words = meaningful_words(node_content)
+        return bool(explanation_words & node_words)
     # -- File I/O --
     def save(self, text: str, memory_path: str) -> str:

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/mcp.py RENAMED Viewed

@@ -579,8 +579,8 @@ _TOOL_DEFS_RAW = [
             "returned framework to analyze your problem thoroughly — deconstruct to "
             "fundamentals, trace consequences across multiple orders, verify "
             "assumptions explicitly, and hold contradictions without premature "
-            "resolution. Key insights from your analysis will be saved to memory "
-            "as typed reasoning nodes."
+            "resolution. After analysis, call add_memory to save key insights — "
+            "without this, your analysis is lost between sessions."
         ),
         "inputSchema": {
             "type": "object",
@@ -607,7 +607,8 @@ _TOOL_DEFS_RAW = [
             "fundamentally different angle. Returns a creative exploration framework. "
             "After calling, challenge every assumption — what constraints are real vs "
             "inherited? What would the opposite approach look like? What patterns from "
-            "unrelated domains apply? Insights will be saved to memory."
+            "unrelated domains apply? After exploration, call add_memory to save "
+            "breakthrough insights — without this, your exploration is lost between sessions."
         ),
         "inputSchema": {
             "type": "object",
@@ -634,8 +635,9 @@ _TOOL_DEFS_RAW = [
             "with assumption-breaking for a two-pronged attack: systematic depth AND "
             "lateral thinking simultaneously. Use when the problem requires both "
             "understanding WHY current approaches fail AND imagining fundamentally "
-            "different solutions. Returns a comprehensive framework. Key insights "
-            "saved to memory."
+            "different solutions. Returns a comprehensive framework. After analysis, "
+            "call add_memory to save key findings — without this, your analysis is "
+            "lost between sessions."
         ),
         "inputSchema": {
             "type": "object",
@@ -921,12 +923,19 @@ class MCPHandler:
         continuity_result = None
         if self._continuity_mgr and self._memory_path:
             try:
+                meta = ContinuityManager.load_meta(self._memory_path)
                 existing = ContinuityManager.load(self._memory_path)
                 continuity_result = self._continuity_mgr.produce(
                     self._umem.memory,
                     existing_continuity=existing,
+                    citations_seen=meta.get("citations_seen", False),
                 )
                 self._continuity_mgr.save(continuity_result.text, self._memory_path)
+                # Update metadata
+                meta["sessions_produced"] = meta.get("sessions_produced", 0) + 1
+                if continuity_result.graduations_validated > 0:
+                    meta["citations_seen"] = True
+                ContinuityManager.save_meta(meta, self._memory_path)
             except Exception as e:
                 _log(f"Continuity production failed: {e}")
                 # Non-fatal — session_wrap still proceeds
@@ -949,7 +958,7 @@ class MCPHandler:
             "path": result.path,
         }
-        # Include continuity metadata in response
+        # Always include continuity key so callers can distinguish disabled/error/success.
         if continuity_result:
             response["continuity"] = {
                 "produced": True,
@@ -959,6 +968,10 @@ class MCPHandler:
                 "truncated": continuity_result.truncated,
                 "path": ContinuityManager.continuity_path(self._memory_path),
             }
+        elif self._continuity_mgr:
+            response["continuity"] = {"produced": False, "reason": "error"}
+        else:
+            response["continuity"] = {"produced": False, "reason": "disabled"}
         return response
@@ -1647,11 +1660,17 @@ def run_server(
                 current_nodes = umem.memory.size
                 if current_nodes > _last_node_count[0]:
                     try:
+                        meta = ContinuityManager.load_meta(memory_path)
                         existing = ContinuityManager.load(memory_path)
                         cont_result = continuity_mgr.produce(
                             umem.memory, existing_continuity=existing,
+                            citations_seen=meta.get("citations_seen", False),
                         )
                         continuity_mgr.save(cont_result.text, memory_path)
+                        meta["sessions_produced"] = meta.get("sessions_produced", 0) + 1
+                        if cont_result.graduations_validated > 0:
+                            meta["citations_seen"] = True
+                        ContinuityManager.save_meta(meta, memory_path)
                         _continuity_produced[0] = True
                         _last_node_count[0] = current_nodes
                         _log(f"Auto-wrap: continuity produced ({cont_result.char_count} chars)")

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/tool-integrity.json RENAMED Viewed

@@ -14,8 +14,8 @@
   "remove_memory": "ee604c8f87855e32b4509162048168d0c941da79339f907d7d921a55780de830",
   "search_memory": "7e91e30bc03b5a2c990b83a33c00cf512c5c7c2a2e204c546206ffe606010064",
   "session_wrap": "ea1e2b2048ef4854de595601105375cfda91856a11851300e864c1e5358894b4",
-  "think_breakthrough": "8e0734bd5273943395a762a5c138882441a8a345e2b7b7bd4acdf81c1a94bb52",
-  "think_creative": "1347c8687847d6d2bf263bd7ae8d1d2bf09fa72ef213d185c049b67863e138fa",
-  "think_deeper": "f403bf30f55530674aeb31d4c3a5d9f58b9817fa0b7bed9ad61772b37a191163",
+  "think_breakthrough": "2e2b86d8e4d1c10c80cd9dcc0e55a5ec5642f0633e4e42bd8bf5dfd87c59b4c3",
+  "think_creative": "f648d9dd59e4c1901fd532d92568b3f102dc130b5f6383bfad64c00afce2ca0a",
+  "think_deeper": "4e190d9c344323be20fea2243830732247ec580b34fd936a837c082f0d6d8b76",
   "verify_audit": "2e93d3118ebeed1a1113e423ec915b8dd987c5d2c4adf6fefcd93fa0c931483f"
 }

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "flowscript-agents"
-version = "0.4.0"
+version = "0.4.1"
 description = "Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI."
 readme = "README.md"
 license = "MIT"

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_continuity.py RENAMED Viewed

@@ -489,3 +489,451 @@ class TestTruncation:
         assert result.text.startswith("# Agent")
         # Should have at least State section
         assert "## State" in result.text
+class TestGraduationValidation:
+    """Tests for graph-grounded graduation — anti-semantic-inbreeding defense."""
+    def test_valid_citation_kept(self):
+        text = (
+            "## Patterns\n"
+            "thought: caching helps | 2x (2026-03-30) [evidence: abc12345]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345", "def67890"}, today="2026-03-30"
+        )
+        assert "| 2x" in result_text
+        assert "ungrounded" not in result_text
+        assert validated == 1
+        assert demoted == 0
+    def test_invalid_citation_demoted(self):
+        text = (
+            "## Patterns\n"
+            "thought: caching helps | 2x (2026-03-30) [evidence: ffffffff]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert "| 1x" in result_text
+        assert "| 2x" not in result_text
+        assert "(ungrounded)" in result_text
+        assert validated == 0
+        assert demoted == 1
+    def test_3x_demoted_to_2x(self):
+        text = (
+            "## Patterns\n"
+            "thought: principle | 3x (2026-03-30) [evidence: badbadba]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert "| 2x" in result_text
+        assert "| 3x" not in result_text
+        assert "(ungrounded)" in result_text
+        assert demoted == 1
+    def test_no_citations_passthrough(self):
+        """Old-format patterns without [evidence:] pass through unchanged."""
+        text = (
+            "## Patterns\n"
+            "thought: caching helps | 2x (2026-03-30)\n"
+            "thought: pooling matters | 3x (2026-03-30)\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert result_text == text
+        assert validated == 0
+        assert demoted == 0
+    def test_mixed_valid_and_invalid(self):
+        text = (
+            "## Patterns\n"
+            "thought: good pattern | 2x (2026-03-30) [evidence: abc12345]\n"
+            "thought: hallucinated | 2x (2026-03-30) [evidence: ffffffff]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert validated == 1
+        assert demoted == 1
+        # First pattern kept at 2x, second demoted to 1x
+        lines = result_text.split("\n")
+        assert "| 2x" in lines[1]
+        assert "| 1x" in lines[2]
+        assert "(ungrounded)" in lines[2]
+    def test_multiple_citations_one_valid_sufficient(self):
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30) [evidence: bad00000, abc12345]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert validated == 1
+        assert demoted == 0
+        assert "| 2x" in result_text
+    def test_1x_not_affected(self):
+        """1x patterns are new observations — never checked for citations."""
+        text = (
+            "## Patterns\n"
+            "thought: new observation | 1x (2026-03-30)\n"
+            "thought: also new | 1x (2026-03-30) [evidence: ffffffff]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        # 1x lines are never matched by _GRADUATION_RE (only matches 2x/3x)
+        assert validated == 0
+        assert demoted == 0
+    def test_outside_patterns_section_ignored(self):
+        """Citations in non-Patterns sections should not be validated."""
+        text = (
+            "## State\n"
+            "some state | 2x (2026-03-30) [evidence: ffffffff]\n"
+            "## Patterns\n"
+            "thought: real pattern | 2x (2026-03-30) [evidence: abc12345]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        # Only the Patterns section line is checked
+        assert validated == 1
+        assert demoted == 0
+        # State section line unchanged (still has ffffffff)
+        assert "ffffffff" in result_text
+    def test_uppercase_citation_normalized(self):
+        """LLMs may uppercase hex — citations should be case-insensitive."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30) [evidence: ABC12345]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert validated == 1
+        assert demoted == 0
+    def test_space_separated_citations(self):
+        """LLMs might use spaces instead of commas between IDs."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30) [evidence: bad00000 abc12345]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert validated == 1
+        assert demoted == 0
+    def test_long_id_truncated_to_8_chars(self):
+        """LLM might cite full 64-char ID — should be truncated to 8 for matching."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30) [evidence: abc12345ffffffffffffffff]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert validated == 1
+        assert demoted == 0
+    def test_carried_forward_evidence_not_demoted(self):
+        """Patterns from previous sessions (old dates) should pass through unchanged."""
+        text = (
+            "## Patterns\n"
+            "thought: old pattern | 2x (2026-03-28) [evidence: abc12345]\n"
+            "thought: new pattern | 2x (2026-03-30) [evidence: def67890]\n"
+            "## Decisions\n"
+        )
+        # abc12345 is NOT in valid_ids, but its date is old → should pass through
+        # def67890 IS in valid_ids and its date matches today → validated
+        result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
+            text, {"def67890"}, today="2026-03-30"
+        )
+        assert validated == 1
+        assert demoted == 0
+        # Old pattern still at 2x (not demoted despite abc12345 not in current nodes)
+        assert "2026-03-28" in result_text
+        assert "ungrounded" not in result_text
+    def test_graduation_validation_through_produce(self):
+        """Integration: graduation validation works through the full produce() pipeline."""
+        import datetime
+        today = datetime.date.today().isoformat()
+        # Node ID 50d7c6fd = "Connection pooling will be the real bottleneck"
+        # from _make_session_memory(). Use today's date so validation fires.
+        response_with_valid_citation = f"""# Agent — Memory (v1)
+## State
+Working on database selection.
+## Patterns
+{{database_architecture:
+  thought: connection pooling is critical | 2x ({today}) [evidence: 50d7c6fd]
+  thought: ACID compliance matters | 2x ({today}) [evidence: ffffffff]
+}}
+## Decisions
+[decided(rationale: "ACID required", on: "{today}")] Use PostgreSQL
+## Context
+Selected PostgreSQL, investigating pooling."""
+        mgr = ContinuityManager(
+            llm=_make_mock_llm(response_with_valid_citation),
+        )
+        mem = _make_session_memory()
+        result = mgr.produce(mem)
+        # One citation valid (50d7c6fd exists), one invalid (ffffffff doesn't)
+        assert result.graduations_validated == 1
+        assert result.graduations_demoted == 1
+        assert "(ungrounded)" in result.text
+        # The valid graduation should still be 2x
+        assert "| 2x" in result.text
+        # The invalid one should be demoted to 1x
+        assert "| 1x" in result.text
+class TestExplanationValidation:
+    """Tests for explain-your-evidence — citation relevance checking."""
+    def test_explanation_with_node_content_overlap_passes(self):
+        text = (
+            '## Patterns\n'
+            'thought: pooling matters | 2x (2026-03-30) '
+            '[evidence: abc12345 "connection pooling identified as bottleneck"]\n'
+            '## Decisions\n'
+        )
+        node_map = {"abc12345": "Connection pooling will be the real bottleneck"}
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
+        )
+        assert validated == 1
+        assert demoted == 0
+    def test_explanation_without_overlap_demoted(self):
+        text = (
+            '## Patterns\n'
+            'thought: pooling matters | 2x (2026-03-30) '
+            '[evidence: abc12345 "confirms the pattern"]\n'
+            '## Decisions\n'
+        )
+        node_map = {"abc12345": "Connection pooling will be the real bottleneck"}
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
+        )
+        # "confirms the pattern" has no meaningful overlap with node content
+        assert validated == 0
+        assert demoted == 1
+        assert "(ungrounded)" in result_text
+    def test_no_explanation_still_passes_id_check(self):
+        """Citations without explanations pass on ID alone (backward compat)."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30) [evidence: abc12345]\n"
+            "## Decisions\n"
+        )
+        node_map = {"abc12345": "Some node content"}
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
+        )
+        # No explanation = no overlap check, just ID validation
+        assert validated == 1
+        assert demoted == 0
+    def test_no_node_map_skips_explanation_check(self):
+        """Without node_content_map, explanation check is skipped."""
+        text = (
+            '## Patterns\n'
+            'thought: pattern | 2x (2026-03-30) '
+            '[evidence: abc12345 "totally irrelevant words"]\n'
+            '## Decisions\n'
+        )
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", node_content_map=None
+        )
+        assert validated == 1
+        assert demoted == 0
+class TestCitationReuse:
+    """Tests for citation gaming detection."""
+    def test_reuse_count_tracked(self):
+        text = (
+            "## Patterns\n"
+            "thought: pattern A | 2x (2026-03-30) [evidence: abc12345]\n"
+            "thought: pattern B | 2x (2026-03-30) [evidence: abc12345]\n"
+            "thought: pattern C | 2x (2026-03-30) [evidence: abc12345]\n"
+            "## Decisions\n"
+        )
+        _text, _v, _d, reuse_max = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30"
+        )
+        assert reuse_max == 3
+class TestContinuityMeta:
+    """Tests for continuity metadata sidecar (session tracking, fail-safe sunset)."""
+    def test_meta_defaults_when_missing(self):
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mem_path = os.path.join(tmpdir, "agent.json")
+            meta = ContinuityManager.load_meta(mem_path)
+            assert meta["sessions_produced"] == 0
+            assert meta["citations_seen"] is False
+            assert meta["format_version"] == 1
+    def test_meta_save_and_load_roundtrip(self):
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mem_path = os.path.join(tmpdir, "agent.json")
+            meta = {"sessions_produced": 5, "citations_seen": True, "format_version": 1}
+            ContinuityManager.save_meta(meta, mem_path)
+            loaded = ContinuityManager.load_meta(mem_path)
+            assert loaded == meta
+    def test_meta_path_follows_sidecar_pattern(self):
+        path = ContinuityManager.meta_path("/tmp/agent.json")
+        assert path == "/tmp/agent.continuity.meta.json"
+    def test_corrupt_meta_returns_defaults(self):
+        import tempfile
+        with tempfile.TemporaryDirectory() as tmpdir:
+            mem_path = os.path.join(tmpdir, "agent.json")
+            meta_path = ContinuityManager.meta_path(mem_path)
+            with open(meta_path, "w") as f:
+                f.write("NOT JSON")
+            meta = ContinuityManager.load_meta(mem_path)
+            assert meta["sessions_produced"] == 0
+class TestFailSafeSunset:
+    """Tests for citation requirement enforcement after first successful citation."""
+    def test_bare_graduation_passes_before_sunset(self):
+        """Before citations_seen, bare graduations (no [evidence:]) pass through."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30)\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", citations_seen=False
+        )
+        assert demoted == 0
+        assert "| 2x" in result_text
+        assert "needs-evidence" not in result_text
+    def test_bare_graduation_demoted_after_sunset(self):
+        """After citations_seen, bare graduations are demoted with (needs-evidence)."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30)\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", citations_seen=True
+        )
+        assert demoted == 1
+        assert "| 1x" in result_text
+        assert "(needs-evidence)" in result_text
+    def test_bare_3x_demoted_to_2x_after_sunset(self):
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 3x (2026-03-30)\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", citations_seen=True
+        )
+        assert demoted == 1
+        assert "| 2x" in result_text
+        assert "(needs-evidence)" in result_text
+    def test_old_date_bare_graduation_unaffected_by_sunset(self):
+        """Carried-forward bare graduations from old sessions are not demoted."""
+        text = (
+            "## Patterns\n"
+            "thought: old pattern | 2x (2026-03-28)\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", citations_seen=True
+        )
+        assert demoted == 0
+        assert "| 2x" in result_text
+    def test_cited_graduation_still_passes_after_sunset(self):
+        """Properly cited graduations pass regardless of sunset state."""
+        text = (
+            "## Patterns\n"
+            "thought: pattern | 2x (2026-03-30) [evidence: abc12345]\n"
+            "## Decisions\n"
+        )
+        result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
+            text, {"abc12345"}, today="2026-03-30", citations_seen=True
+        )
+        assert validated == 1
+        assert demoted == 0
+    def test_no_reuse(self):
+        text = (
+            "## Patterns\n"
+            "thought: pattern A | 2x (2026-03-30) [evidence: abc12345]\n"
+            "thought: pattern B | 2x (2026-03-30) [evidence: def67890]\n"
+            "## Decisions\n"
+        )
+        _text, _v, _d, reuse_max = ContinuityManager._validate_graduations(
+            text, {"abc12345", "def67890"}, today="2026-03-30"
+        )
+        assert reuse_max == 1
+    def test_reuse_in_produce_result(self):
+        """citation_reuse_max flows through to ContinuityResult."""
+        import datetime
+        today = datetime.date.today().isoformat()
+        response = f"""# Test — Memory (v1)
+## State
+Testing.
+## Patterns
+thought: A | 2x ({today}) [evidence: 50d7c6fd]
+thought: B | 2x ({today}) [evidence: 50d7c6fd]
+thought: C | 2x ({today}) [evidence: 50d7c6fd]
+## Decisions
+None.
+## Context
+Testing citation reuse."""
+        mgr = ContinuityManager(llm=_make_mock_llm(response))
+        mem = _make_session_memory()
+        result = mgr.produce(mem)
+        assert result.citation_reuse_max == 3

{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_mcp.py RENAMED Viewed

@@ -393,6 +393,14 @@ class TestSessionWrap:
         assert result["nodes_before"] == 1
         assert result["nodes_after"] >= 0  # may prune if dormant
+    def test_wrap_continuity_disabled(self):
+        """session_wrap without continuity manager reports disabled."""
+        handler, umem = _make_handler()
+        umem.memory.session_start()
+        result = handler.handle_tool("session_wrap", {})
+        assert result["continuity"]["produced"] is False
+        assert result["continuity"]["reason"] == "disabled"
 class TestAutoConfiguration:
     """Tests for OPENAI_API_KEY auto-detection logic."""
@@ -549,8 +557,9 @@ class TestSessionWrapWithContinuity:
             assert "error" not in result
             assert "nodes_before" in result
             assert result["saved"] is True
-            # No continuity metadata since it failed
-            assert "continuity" not in result
+            # Continuity key present but indicates failure
+            assert result["continuity"]["produced"] is False
+            assert result["continuity"]["reason"] == "error"
 class TestVersionNegotiation:
@@ -692,6 +701,16 @@ class TestThinkingModes:
         handler.handle_tool("think_breakthrough", {"problem": "Scaling architecture"})
         assert umem.memory.size == nodes_before
+    def test_thinking_tool_descriptions_say_call_add_memory(self):
+        """Regression guard: descriptions must tell agents to call add_memory."""
+        from flowscript_agents.mcp import TOOLS
+        thinking_tools = [t for t in TOOLS if t["name"].startswith("think_")]
+        assert len(thinking_tools) == 3
+        for tool in thinking_tools:
+            assert "add_memory" in tool["description"], (
+                f"{tool['name']} description must mention add_memory"
+            )
 class TestDescriptionIntegrity:
     """Tests for the three-layer MCP description integrity system."""