flowscript-agents 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/PKG-INFO +1 -1
  2. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/__init__.py +1 -1
  3. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/continuity.py +272 -4
  4. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/mcp.py +25 -6
  5. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/tool-integrity.json +3 -3
  6. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/pyproject.toml +1 -1
  7. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_continuity.py +448 -0
  8. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_mcp.py +21 -2
  9. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/.github/workflows/test.yml +0 -0
  10. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/.gitignore +0 -0
  11. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/AUDIT_TRAIL_DESIGN.md +0 -0
  12. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/README.md +0 -0
  13. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/adapters.md +0 -0
  14. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/api-reference.md +0 -0
  15. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/audit-trail.md +0 -0
  16. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/brand/logo-512.png +0 -0
  17. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/brand/social-preview.png +0 -0
  18. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/flowscript-demo.png +0 -0
  19. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/lifecycle.md +0 -0
  20. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/examples/CLAUDE.md.example +0 -0
  21. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/examples/langgraph_live_test.py +0 -0
  22. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/examples/temporal_e2e_test.py +0 -0
  23. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/audit.py +0 -0
  24. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/camel_ai.py +0 -0
  25. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/client.py +0 -0
  26. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/cloud.py +0 -0
  27. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/crewai.py +0 -0
  28. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/__init__.py +0 -0
  29. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/_utils.py +0 -0
  30. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/consolidate.py +0 -0
  31. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/extract.py +0 -0
  32. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/index.py +0 -0
  33. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/providers.py +0 -0
  34. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/search.py +0 -0
  35. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/explain.py +0 -0
  36. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/fixpoint.py +0 -0
  37. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/google_adk.py +0 -0
  38. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/haystack.py +0 -0
  39. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/langgraph.py +0 -0
  40. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/llamaindex.py +0 -0
  41. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/memory.py +0 -0
  42. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/openai_agents.py +0 -0
  43. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/pydantic_ai.py +0 -0
  44. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/query.py +0 -0
  45. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/smolagents.py +0 -0
  46. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/types.py +0 -0
  47. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/unified.py +0 -0
  48. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/guides/recommended_claude_md.md +0 -0
  49. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/scripts/validate_dedup_threshold.py +0 -0
  50. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/conftest.py +0 -0
  51. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_audit.py +0 -0
  52. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_camel_ai.py +0 -0
  53. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_client.py +0 -0
  54. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_cloud.py +0 -0
  55. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_cloud_fixpoint.py +0 -0
  56. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_consolidation.py +0 -0
  57. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_crewai.py +0 -0
  58. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_embeddings.py +0 -0
  59. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_explain.py +0 -0
  60. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_fixpoint.py +0 -0
  61. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_google_adk.py +0 -0
  62. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_haystack.py +0 -0
  63. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_integration_continuity.py +0 -0
  64. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_langgraph.py +0 -0
  65. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_llamaindex.py +0 -0
  66. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_memory.py +0 -0
  67. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_openai_agents.py +0 -0
  68. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_pydantic_ai.py +0 -0
  69. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_smolagents.py +0 -0
  70. {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_temporal.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowscript-agents
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI.
5
5
  Project-URL: Homepage, https://flowscript.org
6
6
  Project-URL: Repository, https://github.com/phillipclapham/flowscript-agents
@@ -47,7 +47,7 @@ from .memory import (
47
47
  from .unified import UnifiedMemory
48
48
  from .explain import explain, explain_counterfactual
49
49
 
50
- __version__ = "0.4.0"
50
+ __version__ = "0.4.1"
51
51
  __all__ = [
52
52
  "explain",
53
53
  "explain_counterfactual",
@@ -39,6 +39,16 @@ def _log(msg: str) -> None:
39
39
  sys.stderr.flush()
40
40
 
41
41
 
42
+ # Matches graduated patterns (2x or 3x) with [evidence: <id> "explanation"] citations.
43
+ # Captures: (1) level, (2) date, (3) cited IDs, (4) optional explanation in quotes.
44
+ # Used by _validate_graduations to verify citations against actual session nodes.
45
+ _GRADUATION_RE = re.compile(
46
+ r'\|\s*([23])x\s*\((\d{4}-\d{2}-\d{2})\)\s*\[evidence:\s*'
47
+ r'([a-fA-F0-9][a-fA-F0-9, ]*)' # one or more hex IDs
48
+ r'(?:\s+"([^"]*)")?\s*\]' # optional quoted explanation
49
+ )
50
+
51
+
42
52
  # =============================================================================
43
53
  # Result types
44
54
  # =============================================================================
@@ -54,6 +64,9 @@ class ContinuityResult:
54
64
  truncated: bool # whether LLM output exceeded max_chars
55
65
  session_nodes_count: int # how many nodes were in this session
56
66
  patterns_extracted: int # estimated from output (best-effort)
67
+ graduations_validated: int = 0 # citations that checked out
68
+ graduations_demoted: int = 0 # citations that failed → demoted
69
+ citation_reuse_max: int = 0 # max times any single node was cited (>2 = suspicious)
57
70
 
58
71
 
59
72
  # =============================================================================
@@ -133,8 +146,21 @@ This is where learning happens. Use these markers for density:
133
146
  **Temporal graduation (CRITICAL — this is what makes the system learn):**
134
147
  - Mark each pattern with `| Nx (date)` where N = validation count, date = last validated
135
148
  - New observation from THIS session not in existing patterns → add at `| 1x ({today})`
136
- - Observation that VALIDATES an existing 1x pattern → increment to `| 2x ({today})`
137
- - Observation that VALIDATES an existing 2x pattern graduate to `| 3x ({today})`
149
+ - Observation that VALIDATES an existing 1x pattern → increment to:
150
+ `| 2x ({today}) [evidence: <node_id> "brief explanation of how node validates pattern"]`
151
+ where `<node_id>` is the 8-char ID prefix (e.g., `abc12345`) from the session data above.
152
+ The explanation MUST reference specific content from the cited node.
153
+ - Observation that VALIDATES an existing 2x pattern → graduate to:
154
+ `| 3x ({today}) [evidence: <node_id> "explanation"]`
155
+ - Evidence citations with explanations are REQUIRED for all graduations (2x and 3x).
156
+ Cite the specific session node AND explain how it validates the pattern.
157
+ Without a valid citation, the graduation will be rejected.
158
+ - For patterns you are NOT graduating (carrying forward at the same level), drop the
159
+ `[evidence:]` tag — evidence only appears on the graduation that created it.
160
+ - Patterns marked `(ungrounded)` were demoted in a previous session due to invalid evidence.
161
+ They need FRESH validating evidence from THIS session to be re-graduated. Do not re-graduate
162
+ without new evidence — remove the `(ungrounded)` marker only when providing a valid citation.
163
+ Patterns marked `(ungrounded)` that you cannot provide fresh evidence for should be removed.
138
164
  - Patterns at 3x: extract the PRINCIPLE underneath, not the surface observations.
139
165
  Multiple related observations → single meta-pattern. This is compression-as-cognition.
140
166
  - Patterns with dates older than 7 days and no new validation → remove (they're stale)
@@ -144,7 +170,7 @@ Group related patterns in FlowScript blocks: `{{topic: ... }}`
144
170
  **Example Patterns section:**
145
171
  ```
146
172
  {{database_architecture:
147
- thought: ACID compliance outweighs raw speed for financial data | 2x (2026-03-30)
173
+ thought: ACID compliance outweighs raw speed | 2x (2026-03-30) [evidence: 4931b6a8 "PostgreSQL chosen for ACID compliance"]
148
174
  thought: connection pooling is the real performance bottleneck | 1x (2026-03-30)
149
175
  ? horizontal scaling strategy ><[single-writer vs multi-writer] | 1x (2026-03-29)
150
176
  }}
@@ -304,6 +330,7 @@ class ContinuityManager:
304
330
  self,
305
331
  memory: Any,
306
332
  existing_continuity: str | None = None,
333
+ citations_seen: bool = False,
307
334
  ) -> ContinuityResult:
308
335
  """Produce a compressed continuity file from session memory.
309
336
 
@@ -311,6 +338,7 @@ class ContinuityManager:
311
338
  memory: A Memory instance containing the session's nodes.
312
339
  existing_continuity: The current continuity file text (if any).
313
340
  Pass None for first session.
341
+ citations_seen: If True, enforces citation requirement (fail-safe sunset).
314
342
 
315
343
  Returns:
316
344
  ContinuityResult with the compressed continuity text and metadata.
@@ -325,7 +353,8 @@ class ContinuityManager:
325
353
  temporal_map = dict(memory._temporal_map)
326
354
 
327
355
  return self.produce_from_nodes(
328
- nodes, relationships, states, existing_continuity, temporal_map
356
+ nodes, relationships, states, existing_continuity, temporal_map,
357
+ citations_seen=citations_seen,
329
358
  )
330
359
 
331
360
  def produce_from_nodes(
@@ -335,12 +364,20 @@ class ContinuityManager:
335
364
  states: list[Any],
336
365
  existing_continuity: str | None = None,
337
366
  temporal_map: dict[str, Any] | None = None,
367
+ citations_seen: bool = False,
338
368
  ) -> ContinuityResult:
339
369
  """Produce continuity from raw node lists (alternative to Memory instance).
340
370
 
341
371
  Useful when you have nodes but not a full Memory object, e.g.,
342
372
  from a filtered set or from deserialized data.
373
+
374
+ Args:
375
+ citations_seen: If True, enforces citation requirement on all today's
376
+ graduations. Set from metadata after first successful citation.
343
377
  """
378
+ import datetime
379
+ today = datetime.date.today().isoformat()
380
+
344
381
  session_summary = _format_session_nodes(
345
382
  nodes, relationships, states, temporal_map
346
383
  )
@@ -350,6 +387,7 @@ class ContinuityManager:
350
387
  existing_continuity=existing_continuity,
351
388
  project_name=self._project_name,
352
389
  max_chars=self._max_chars,
390
+ today=today,
353
391
  )
354
392
 
355
393
  _log(f"Producing continuity ({len(nodes)} nodes, max {self._max_chars} chars)")
@@ -374,6 +412,25 @@ class ContinuityManager:
374
412
  # (first session, something is better than nothing)
375
413
  _log("WARNING: No existing continuity to fall back to — using LLM output as-is")
376
414
 
415
+ # Validate graduation citations against actual session nodes.
416
+ # Only checks citations from today (carried-forward patterns are trusted).
417
+ valid_ids = {n.id[:8].lower() for n in nodes}
418
+ node_content_map = {n.id[:8].lower(): n.content for n in nodes}
419
+ text, grad_validated, grad_demoted, reuse_max = self._validate_graduations(
420
+ text, valid_ids, today=today, node_content_map=node_content_map,
421
+ citations_seen=citations_seen,
422
+ )
423
+ if grad_demoted:
424
+ _log(
425
+ f"Graduation validation: {grad_validated} validated, "
426
+ f"{grad_demoted} demoted (ungrounded)"
427
+ )
428
+ if reuse_max > 2:
429
+ _log(
430
+ f"Graduation warning: single node cited {reuse_max} times "
431
+ f"(possible citation gaming)"
432
+ )
433
+
377
434
  truncated = False
378
435
  if len(text) > self._max_chars:
379
436
  truncated = True
@@ -390,6 +447,9 @@ class ContinuityManager:
390
447
  truncated=truncated,
391
448
  session_nodes_count=len(nodes),
392
449
  patterns_extracted=patterns_extracted,
450
+ graduations_validated=grad_validated,
451
+ graduations_demoted=grad_demoted,
452
+ citation_reuse_max=reuse_max,
393
453
  )
394
454
 
395
455
  # -- File I/O --
@@ -404,6 +464,54 @@ class ContinuityManager:
404
464
  p = Path(memory_path)
405
465
  return str(p.parent / f"{p.stem}.continuity.md")
406
466
 
467
+ @staticmethod
468
+ def meta_path(memory_path: str) -> str:
469
+ """Get the metadata sidecar path. ./agent.json → ./agent.continuity.meta.json"""
470
+ p = Path(memory_path)
471
+ return str(p.parent / f"{p.stem}.continuity.meta.json")
472
+
473
+ @staticmethod
474
+ def load_meta(memory_path: str) -> dict:
475
+ """Load continuity metadata from the JSON sidecar.
476
+
477
+ Returns a dict with keys: sessions_produced, citations_seen, format_version.
478
+ Returns defaults if the file doesn't exist.
479
+ """
480
+ import json
481
+ path = ContinuityManager.meta_path(memory_path)
482
+ defaults = {"sessions_produced": 0, "citations_seen": False, "format_version": 1}
483
+ if not os.path.exists(path):
484
+ return defaults
485
+ try:
486
+ with open(path, "r", encoding="utf-8") as f:
487
+ data = json.load(f)
488
+ # Merge with defaults for forward compatibility
489
+ return {**defaults, **data}
490
+ except (json.JSONDecodeError, OSError):
491
+ _log(f"WARNING: corrupt continuity meta at {path} — using defaults")
492
+ return defaults
493
+
494
+ @staticmethod
495
+ def save_meta(meta: dict, memory_path: str) -> str:
496
+ """Save continuity metadata to the JSON sidecar. Atomic write."""
497
+ import json
498
+ path = ContinuityManager.meta_path(memory_path)
499
+ tmp_path = path + ".tmp"
500
+ try:
501
+ with open(tmp_path, "w", encoding="utf-8") as f:
502
+ json.dump(meta, f, indent=2, sort_keys=True)
503
+ f.write("\n")
504
+ f.flush()
505
+ os.fsync(f.fileno())
506
+ os.replace(tmp_path, path)
507
+ except Exception:
508
+ try:
509
+ os.unlink(tmp_path)
510
+ except OSError:
511
+ pass
512
+ raise
513
+ return path
514
+
407
515
  # -- Validation --
408
516
 
409
517
  _REQUIRED_SECTIONS = {"state", "patterns", "decisions", "context"}
@@ -424,6 +532,166 @@ class ContinuityManager:
424
532
  found.add(section)
425
533
  return found == cls._REQUIRED_SECTIONS
426
534
 
535
+ # Minimum meaningful words for explanation-to-node content overlap check.
536
+ # Short/common words are excluded to avoid false positives.
537
+ _STOP_WORDS = frozenset(
538
+ "a an the is are was were be been being have has had do does did "
539
+ "will would shall should may might can could this that these those "
540
+ "it its he she they we you i me my our his her their in on at to "
541
+ "for of by with from and or but not no nor so if as".split()
542
+ )
543
+
544
+ # Matches bare graduations (2x or 3x) WITHOUT [evidence:] tags.
545
+ # Used to enforce citation requirement after fail-safe sunset.
546
+ _BARE_GRADUATION_RE = re.compile(
547
+ r"\|\s*([23])x\s*\((\d{4}-\d{2}-\d{2})\)\s*(?!\[evidence:)"
548
+ )
549
+
550
+ @staticmethod
551
+ def _validate_graduations(
552
+ text: str,
553
+ valid_ids: set[str],
554
+ today: str | None = None,
555
+ node_content_map: dict[str, str] | None = None,
556
+ citations_seen: bool = False,
557
+ ) -> tuple[str, int, int, int]:
558
+ """Validate evidence citations on graduated patterns.
559
+
560
+ Scans the ## Patterns section for 2x/3x lines with [evidence: <id> "explanation"].
561
+ Only validates citations whose date matches today (newly graduated this
562
+ session). Carried-forward patterns from previous sessions pass through
563
+ unchanged — their evidence was valid when originally graduated.
564
+
565
+ Validation checks (all must pass for a citation to be accepted):
566
+ 1. At least one cited ID exists in the current session's node set
567
+ 2. If an explanation is provided and node_content_map is available,
568
+ the explanation must reference actual content from the cited node
569
+ (word overlap check — prevents citation of irrelevant nodes)
570
+
571
+ If validation fails, demotes the graduation (3x→2x, 2x→1x).
572
+
573
+ Fail-safe sunset: when citations_seen=True, today's graduations WITHOUT
574
+ [evidence:] tags are also demoted. Before citations_seen, they pass through
575
+ (migration grace period). Once the LLM demonstrates citation ability, it
576
+ must always cite.
577
+
578
+ Returns:
579
+ (possibly_modified_text, validated_count, demoted_count, citation_reuse_max)
580
+ """
581
+ if today is None:
582
+ import datetime
583
+ today = datetime.date.today().isoformat()
584
+
585
+ lines = text.split("\n")
586
+ in_patterns = False
587
+ validated = 0
588
+ demoted = 0
589
+ citation_counts: dict[str, int] = {} # track per-node citation frequency
590
+
591
+ for i, line in enumerate(lines):
592
+ # Track section boundaries (substring match, consistent with _validate_structure)
593
+ if line.startswith("## "):
594
+ in_patterns = "pattern" in line.lower()
595
+ continue
596
+ if not in_patterns:
597
+ continue
598
+
599
+ match = _GRADUATION_RE.search(line)
600
+ if match:
601
+ level = int(match.group(1)) # 2 or 3
602
+ date_str = match.group(2) # YYYY-MM-DD
603
+ cited_raw = match.group(3)
604
+ explanation = match.group(4) # may be None if no quotes
605
+
606
+ # Only validate citations from THIS session (today's date).
607
+ # Carried-forward patterns retain their evidence unchecked.
608
+ if date_str != today:
609
+ continue
610
+
611
+ # Normalize cited IDs: lowercase, truncate to 8 chars, filter empties
612
+ cited_ids = {
613
+ cid.strip().lower()[:8]
614
+ for cid in re.split(r"[,\s]+", cited_raw)
615
+ if cid.strip()
616
+ }
617
+
618
+ # Track citation frequency
619
+ for cid in cited_ids & valid_ids:
620
+ citation_counts[cid] = citation_counts.get(cid, 0) + 1
621
+
622
+ # Check 1: at least one cited ID exists in session nodes
623
+ ids_valid = bool(cited_ids & valid_ids)
624
+
625
+ # Check 2: explanation references cited node content (if available)
626
+ explanation_valid = True
627
+ if ids_valid and explanation and node_content_map:
628
+ matched_id = next(iter(cited_ids & valid_ids))
629
+ node_content = node_content_map.get(matched_id, "")
630
+ if node_content:
631
+ explanation_valid = ContinuityManager._check_explanation_overlap(
632
+ explanation, node_content
633
+ )
634
+
635
+ if ids_valid and explanation_valid:
636
+ validated += 1
637
+ else:
638
+ demoted += 1
639
+ demoted_level = level - 1
640
+ old_marker = match.group(0)
641
+ new_marker = old_marker.replace(
642
+ f"| {level}x", f"| {demoted_level}x"
643
+ )
644
+ new_marker = re.sub(
645
+ r'\[evidence:\s*[a-fA-F0-9][a-fA-F0-9, ]*(?:\s+"[^"]*")?\s*\]',
646
+ "(ungrounded)", new_marker
647
+ )
648
+ lines[i] = line.replace(old_marker, new_marker)
649
+ continue
650
+
651
+ # Fail-safe sunset: once the LLM has demonstrated citation ability,
652
+ # today's graduations WITHOUT [evidence:] are demoted.
653
+ if not citations_seen:
654
+ continue
655
+
656
+ bare_match = ContinuityManager._BARE_GRADUATION_RE.search(line)
657
+ if not bare_match:
658
+ continue
659
+
660
+ bare_level = int(bare_match.group(1))
661
+ bare_date = bare_match.group(2)
662
+ if bare_date != today:
663
+ continue
664
+
665
+ demoted += 1
666
+ demoted_level = bare_level - 1
667
+ old_marker = bare_match.group(0)
668
+ new_marker = old_marker.replace(
669
+ f"| {bare_level}x", f"| {demoted_level}x"
670
+ )
671
+ lines[i] = line.replace(old_marker, new_marker + " (needs-evidence)")
672
+
673
+ reuse_max = max(citation_counts.values()) if citation_counts else 0
674
+ return "\n".join(lines), validated, demoted, reuse_max
675
+
676
+ @classmethod
677
+ def _check_explanation_overlap(cls, explanation: str, node_content: str) -> bool:
678
+ """Check if an explanation references actual content from the cited node.
679
+
680
+ Uses word overlap (excluding stop words). At least one meaningful word
681
+ from the explanation must appear in the node content. This prevents
682
+ generic explanations like "confirms pattern" while allowing legitimate
683
+ paraphrasing.
684
+ """
685
+ def meaningful_words(text: str) -> set[str]:
686
+ return {
687
+ w for w in re.split(r"[^a-zA-Z0-9]+", text.lower())
688
+ if len(w) > 2 and w not in cls._STOP_WORDS
689
+ }
690
+
691
+ explanation_words = meaningful_words(explanation)
692
+ node_words = meaningful_words(node_content)
693
+ return bool(explanation_words & node_words)
694
+
427
695
  # -- File I/O --
428
696
 
429
697
  def save(self, text: str, memory_path: str) -> str:
@@ -579,8 +579,8 @@ _TOOL_DEFS_RAW = [
579
579
  "returned framework to analyze your problem thoroughly — deconstruct to "
580
580
  "fundamentals, trace consequences across multiple orders, verify "
581
581
  "assumptions explicitly, and hold contradictions without premature "
582
- "resolution. Key insights from your analysis will be saved to memory "
583
- "as typed reasoning nodes."
582
+ "resolution. After analysis, call add_memory to save key insights "
583
+ "without this, your analysis is lost between sessions."
584
584
  ),
585
585
  "inputSchema": {
586
586
  "type": "object",
@@ -607,7 +607,8 @@ _TOOL_DEFS_RAW = [
607
607
  "fundamentally different angle. Returns a creative exploration framework. "
608
608
  "After calling, challenge every assumption — what constraints are real vs "
609
609
  "inherited? What would the opposite approach look like? What patterns from "
610
- "unrelated domains apply? Insights will be saved to memory."
610
+ "unrelated domains apply? After exploration, call add_memory to save "
611
+ "breakthrough insights — without this, your exploration is lost between sessions."
611
612
  ),
612
613
  "inputSchema": {
613
614
  "type": "object",
@@ -634,8 +635,9 @@ _TOOL_DEFS_RAW = [
634
635
  "with assumption-breaking for a two-pronged attack: systematic depth AND "
635
636
  "lateral thinking simultaneously. Use when the problem requires both "
636
637
  "understanding WHY current approaches fail AND imagining fundamentally "
637
- "different solutions. Returns a comprehensive framework. Key insights "
638
- "saved to memory."
638
+ "different solutions. Returns a comprehensive framework. After analysis, "
639
+ "call add_memory to save key findings — without this, your analysis is "
640
+ "lost between sessions."
639
641
  ),
640
642
  "inputSchema": {
641
643
  "type": "object",
@@ -921,12 +923,19 @@ class MCPHandler:
921
923
  continuity_result = None
922
924
  if self._continuity_mgr and self._memory_path:
923
925
  try:
926
+ meta = ContinuityManager.load_meta(self._memory_path)
924
927
  existing = ContinuityManager.load(self._memory_path)
925
928
  continuity_result = self._continuity_mgr.produce(
926
929
  self._umem.memory,
927
930
  existing_continuity=existing,
931
+ citations_seen=meta.get("citations_seen", False),
928
932
  )
929
933
  self._continuity_mgr.save(continuity_result.text, self._memory_path)
934
+ # Update metadata
935
+ meta["sessions_produced"] = meta.get("sessions_produced", 0) + 1
936
+ if continuity_result.graduations_validated > 0:
937
+ meta["citations_seen"] = True
938
+ ContinuityManager.save_meta(meta, self._memory_path)
930
939
  except Exception as e:
931
940
  _log(f"Continuity production failed: {e}")
932
941
  # Non-fatal — session_wrap still proceeds
@@ -949,7 +958,7 @@ class MCPHandler:
949
958
  "path": result.path,
950
959
  }
951
960
 
952
- # Include continuity metadata in response
961
+ # Always include continuity key so callers can distinguish disabled/error/success.
953
962
  if continuity_result:
954
963
  response["continuity"] = {
955
964
  "produced": True,
@@ -959,6 +968,10 @@ class MCPHandler:
959
968
  "truncated": continuity_result.truncated,
960
969
  "path": ContinuityManager.continuity_path(self._memory_path),
961
970
  }
971
+ elif self._continuity_mgr:
972
+ response["continuity"] = {"produced": False, "reason": "error"}
973
+ else:
974
+ response["continuity"] = {"produced": False, "reason": "disabled"}
962
975
 
963
976
  return response
964
977
 
@@ -1647,11 +1660,17 @@ def run_server(
1647
1660
  current_nodes = umem.memory.size
1648
1661
  if current_nodes > _last_node_count[0]:
1649
1662
  try:
1663
+ meta = ContinuityManager.load_meta(memory_path)
1650
1664
  existing = ContinuityManager.load(memory_path)
1651
1665
  cont_result = continuity_mgr.produce(
1652
1666
  umem.memory, existing_continuity=existing,
1667
+ citations_seen=meta.get("citations_seen", False),
1653
1668
  )
1654
1669
  continuity_mgr.save(cont_result.text, memory_path)
1670
+ meta["sessions_produced"] = meta.get("sessions_produced", 0) + 1
1671
+ if cont_result.graduations_validated > 0:
1672
+ meta["citations_seen"] = True
1673
+ ContinuityManager.save_meta(meta, memory_path)
1655
1674
  _continuity_produced[0] = True
1656
1675
  _last_node_count[0] = current_nodes
1657
1676
  _log(f"Auto-wrap: continuity produced ({cont_result.char_count} chars)")
@@ -14,8 +14,8 @@
14
14
  "remove_memory": "ee604c8f87855e32b4509162048168d0c941da79339f907d7d921a55780de830",
15
15
  "search_memory": "7e91e30bc03b5a2c990b83a33c00cf512c5c7c2a2e204c546206ffe606010064",
16
16
  "session_wrap": "ea1e2b2048ef4854de595601105375cfda91856a11851300e864c1e5358894b4",
17
- "think_breakthrough": "8e0734bd5273943395a762a5c138882441a8a345e2b7b7bd4acdf81c1a94bb52",
18
- "think_creative": "1347c8687847d6d2bf263bd7ae8d1d2bf09fa72ef213d185c049b67863e138fa",
19
- "think_deeper": "f403bf30f55530674aeb31d4c3a5d9f58b9817fa0b7bed9ad61772b37a191163",
17
+ "think_breakthrough": "2e2b86d8e4d1c10c80cd9dcc0e55a5ec5642f0633e4e42bd8bf5dfd87c59b4c3",
18
+ "think_creative": "f648d9dd59e4c1901fd532d92568b3f102dc130b5f6383bfad64c00afce2ca0a",
19
+ "think_deeper": "4e190d9c344323be20fea2243830732247ec580b34fd936a837c082f0d6d8b76",
20
20
  "verify_audit": "2e93d3118ebeed1a1113e423ec915b8dd987c5d2c4adf6fefcd93fa0c931483f"
21
21
  }
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "flowscript-agents"
7
- version = "0.4.0"
7
+ version = "0.4.1"
8
8
  description = "Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -489,3 +489,451 @@ class TestTruncation:
489
489
  assert result.text.startswith("# Agent")
490
490
  # Should have at least State section
491
491
  assert "## State" in result.text
492
+
493
+
494
+ class TestGraduationValidation:
495
+ """Tests for graph-grounded graduation — anti-semantic-inbreeding defense."""
496
+
497
+ def test_valid_citation_kept(self):
498
+ text = (
499
+ "## Patterns\n"
500
+ "thought: caching helps | 2x (2026-03-30) [evidence: abc12345]\n"
501
+ "## Decisions\n"
502
+ )
503
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
504
+ text, {"abc12345", "def67890"}, today="2026-03-30"
505
+ )
506
+ assert "| 2x" in result_text
507
+ assert "ungrounded" not in result_text
508
+ assert validated == 1
509
+ assert demoted == 0
510
+
511
+ def test_invalid_citation_demoted(self):
512
+ text = (
513
+ "## Patterns\n"
514
+ "thought: caching helps | 2x (2026-03-30) [evidence: ffffffff]\n"
515
+ "## Decisions\n"
516
+ )
517
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
518
+ text, {"abc12345"}, today="2026-03-30"
519
+ )
520
+ assert "| 1x" in result_text
521
+ assert "| 2x" not in result_text
522
+ assert "(ungrounded)" in result_text
523
+ assert validated == 0
524
+ assert demoted == 1
525
+
526
+ def test_3x_demoted_to_2x(self):
527
+ text = (
528
+ "## Patterns\n"
529
+ "thought: principle | 3x (2026-03-30) [evidence: badbadba]\n"
530
+ "## Decisions\n"
531
+ )
532
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
533
+ text, {"abc12345"}, today="2026-03-30"
534
+ )
535
+ assert "| 2x" in result_text
536
+ assert "| 3x" not in result_text
537
+ assert "(ungrounded)" in result_text
538
+ assert demoted == 1
539
+
540
+ def test_no_citations_passthrough(self):
541
+ """Old-format patterns without [evidence:] pass through unchanged."""
542
+ text = (
543
+ "## Patterns\n"
544
+ "thought: caching helps | 2x (2026-03-30)\n"
545
+ "thought: pooling matters | 3x (2026-03-30)\n"
546
+ "## Decisions\n"
547
+ )
548
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
549
+ text, {"abc12345"}, today="2026-03-30"
550
+ )
551
+ assert result_text == text
552
+ assert validated == 0
553
+ assert demoted == 0
554
+
555
+ def test_mixed_valid_and_invalid(self):
556
+ text = (
557
+ "## Patterns\n"
558
+ "thought: good pattern | 2x (2026-03-30) [evidence: abc12345]\n"
559
+ "thought: hallucinated | 2x (2026-03-30) [evidence: ffffffff]\n"
560
+ "## Decisions\n"
561
+ )
562
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
563
+ text, {"abc12345"}, today="2026-03-30"
564
+ )
565
+ assert validated == 1
566
+ assert demoted == 1
567
+ # First pattern kept at 2x, second demoted to 1x
568
+ lines = result_text.split("\n")
569
+ assert "| 2x" in lines[1]
570
+ assert "| 1x" in lines[2]
571
+ assert "(ungrounded)" in lines[2]
572
+
573
+ def test_multiple_citations_one_valid_sufficient(self):
574
+ text = (
575
+ "## Patterns\n"
576
+ "thought: pattern | 2x (2026-03-30) [evidence: bad00000, abc12345]\n"
577
+ "## Decisions\n"
578
+ )
579
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
580
+ text, {"abc12345"}, today="2026-03-30"
581
+ )
582
+ assert validated == 1
583
+ assert demoted == 0
584
+ assert "| 2x" in result_text
585
+
586
+ def test_1x_not_affected(self):
587
+ """1x patterns are new observations — never checked for citations."""
588
+ text = (
589
+ "## Patterns\n"
590
+ "thought: new observation | 1x (2026-03-30)\n"
591
+ "thought: also new | 1x (2026-03-30) [evidence: ffffffff]\n"
592
+ "## Decisions\n"
593
+ )
594
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
595
+ text, {"abc12345"}, today="2026-03-30"
596
+ )
597
+ # 1x lines are never matched by _GRADUATION_RE (only matches 2x/3x)
598
+ assert validated == 0
599
+ assert demoted == 0
600
+
601
+ def test_outside_patterns_section_ignored(self):
602
+ """Citations in non-Patterns sections should not be validated."""
603
+ text = (
604
+ "## State\n"
605
+ "some state | 2x (2026-03-30) [evidence: ffffffff]\n"
606
+ "## Patterns\n"
607
+ "thought: real pattern | 2x (2026-03-30) [evidence: abc12345]\n"
608
+ "## Decisions\n"
609
+ )
610
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
611
+ text, {"abc12345"}, today="2026-03-30"
612
+ )
613
+ # Only the Patterns section line is checked
614
+ assert validated == 1
615
+ assert demoted == 0
616
+ # State section line unchanged (still has ffffffff)
617
+ assert "ffffffff" in result_text
618
+
619
+ def test_uppercase_citation_normalized(self):
620
+ """LLMs may uppercase hex — citations should be case-insensitive."""
621
+ text = (
622
+ "## Patterns\n"
623
+ "thought: pattern | 2x (2026-03-30) [evidence: ABC12345]\n"
624
+ "## Decisions\n"
625
+ )
626
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
627
+ text, {"abc12345"}, today="2026-03-30"
628
+ )
629
+ assert validated == 1
630
+ assert demoted == 0
631
+
632
+ def test_space_separated_citations(self):
633
+ """LLMs might use spaces instead of commas between IDs."""
634
+ text = (
635
+ "## Patterns\n"
636
+ "thought: pattern | 2x (2026-03-30) [evidence: bad00000 abc12345]\n"
637
+ "## Decisions\n"
638
+ )
639
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
640
+ text, {"abc12345"}, today="2026-03-30"
641
+ )
642
+ assert validated == 1
643
+ assert demoted == 0
644
+
645
+ def test_long_id_truncated_to_8_chars(self):
646
+ """LLM might cite full 64-char ID — should be truncated to 8 for matching."""
647
+ text = (
648
+ "## Patterns\n"
649
+ "thought: pattern | 2x (2026-03-30) [evidence: abc12345ffffffffffffffff]\n"
650
+ "## Decisions\n"
651
+ )
652
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
653
+ text, {"abc12345"}, today="2026-03-30"
654
+ )
655
+ assert validated == 1
656
+ assert demoted == 0
657
+
658
+ def test_carried_forward_evidence_not_demoted(self):
659
+ """Patterns from previous sessions (old dates) should pass through unchanged."""
660
+ text = (
661
+ "## Patterns\n"
662
+ "thought: old pattern | 2x (2026-03-28) [evidence: abc12345]\n"
663
+ "thought: new pattern | 2x (2026-03-30) [evidence: def67890]\n"
664
+ "## Decisions\n"
665
+ )
666
+ # abc12345 is NOT in valid_ids, but its date is old → should pass through
667
+ # def67890 IS in valid_ids and its date matches today → validated
668
+ result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
669
+ text, {"def67890"}, today="2026-03-30"
670
+ )
671
+ assert validated == 1
672
+ assert demoted == 0
673
+ # Old pattern still at 2x (not demoted despite abc12345 not in current nodes)
674
+ assert "2026-03-28" in result_text
675
+ assert "ungrounded" not in result_text
676
+
677
+ def test_graduation_validation_through_produce(self):
678
+ """Integration: graduation validation works through the full produce() pipeline."""
679
+ import datetime
680
+ today = datetime.date.today().isoformat()
681
+
682
+ # Node ID 50d7c6fd = "Connection pooling will be the real bottleneck"
683
+ # from _make_session_memory(). Use today's date so validation fires.
684
+ response_with_valid_citation = f"""# Agent — Memory (v1)
685
+
686
+ ## State
687
+ Working on database selection.
688
+
689
+ ## Patterns
690
+ {{database_architecture:
691
+ thought: connection pooling is critical | 2x ({today}) [evidence: 50d7c6fd]
692
+ thought: ACID compliance matters | 2x ({today}) [evidence: ffffffff]
693
+ }}
694
+
695
+ ## Decisions
696
+ [decided(rationale: "ACID required", on: "{today}")] Use PostgreSQL
697
+
698
+ ## Context
699
+ Selected PostgreSQL, investigating pooling."""
700
+
701
+ mgr = ContinuityManager(
702
+ llm=_make_mock_llm(response_with_valid_citation),
703
+ )
704
+ mem = _make_session_memory()
705
+ result = mgr.produce(mem)
706
+
707
+ # One citation valid (50d7c6fd exists), one invalid (ffffffff doesn't)
708
+ assert result.graduations_validated == 1
709
+ assert result.graduations_demoted == 1
710
+ assert "(ungrounded)" in result.text
711
+ # The valid graduation should still be 2x
712
+ assert "| 2x" in result.text
713
+ # The invalid one should be demoted to 1x
714
+ assert "| 1x" in result.text
715
+
716
+
717
+ class TestExplanationValidation:
718
+ """Tests for explain-your-evidence — citation relevance checking."""
719
+
720
+ def test_explanation_with_node_content_overlap_passes(self):
721
+ text = (
722
+ '## Patterns\n'
723
+ 'thought: pooling matters | 2x (2026-03-30) '
724
+ '[evidence: abc12345 "connection pooling identified as bottleneck"]\n'
725
+ '## Decisions\n'
726
+ )
727
+ node_map = {"abc12345": "Connection pooling will be the real bottleneck"}
728
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
729
+ text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
730
+ )
731
+ assert validated == 1
732
+ assert demoted == 0
733
+
734
+ def test_explanation_without_overlap_demoted(self):
735
+ text = (
736
+ '## Patterns\n'
737
+ 'thought: pooling matters | 2x (2026-03-30) '
738
+ '[evidence: abc12345 "confirms the pattern"]\n'
739
+ '## Decisions\n'
740
+ )
741
+ node_map = {"abc12345": "Connection pooling will be the real bottleneck"}
742
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
743
+ text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
744
+ )
745
+ # "confirms the pattern" has no meaningful overlap with node content
746
+ assert validated == 0
747
+ assert demoted == 1
748
+ assert "(ungrounded)" in result_text
749
+
750
+ def test_no_explanation_still_passes_id_check(self):
751
+ """Citations without explanations pass on ID alone (backward compat)."""
752
+ text = (
753
+ "## Patterns\n"
754
+ "thought: pattern | 2x (2026-03-30) [evidence: abc12345]\n"
755
+ "## Decisions\n"
756
+ )
757
+ node_map = {"abc12345": "Some node content"}
758
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
759
+ text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
760
+ )
761
+ # No explanation = no overlap check, just ID validation
762
+ assert validated == 1
763
+ assert demoted == 0
764
+
765
+ def test_no_node_map_skips_explanation_check(self):
766
+ """Without node_content_map, explanation check is skipped."""
767
+ text = (
768
+ '## Patterns\n'
769
+ 'thought: pattern | 2x (2026-03-30) '
770
+ '[evidence: abc12345 "totally irrelevant words"]\n'
771
+ '## Decisions\n'
772
+ )
773
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
774
+ text, {"abc12345"}, today="2026-03-30", node_content_map=None
775
+ )
776
+ assert validated == 1
777
+ assert demoted == 0
778
+
779
+
780
+ class TestCitationReuse:
781
+ """Tests for citation gaming detection."""
782
+
783
+ def test_reuse_count_tracked(self):
784
+ text = (
785
+ "## Patterns\n"
786
+ "thought: pattern A | 2x (2026-03-30) [evidence: abc12345]\n"
787
+ "thought: pattern B | 2x (2026-03-30) [evidence: abc12345]\n"
788
+ "thought: pattern C | 2x (2026-03-30) [evidence: abc12345]\n"
789
+ "## Decisions\n"
790
+ )
791
+ _text, _v, _d, reuse_max = ContinuityManager._validate_graduations(
792
+ text, {"abc12345"}, today="2026-03-30"
793
+ )
794
+ assert reuse_max == 3
795
+
796
+
797
+ class TestContinuityMeta:
798
+ """Tests for continuity metadata sidecar (session tracking, fail-safe sunset)."""
799
+
800
+ def test_meta_defaults_when_missing(self):
801
+ import tempfile
802
+ with tempfile.TemporaryDirectory() as tmpdir:
803
+ mem_path = os.path.join(tmpdir, "agent.json")
804
+ meta = ContinuityManager.load_meta(mem_path)
805
+ assert meta["sessions_produced"] == 0
806
+ assert meta["citations_seen"] is False
807
+ assert meta["format_version"] == 1
808
+
809
+ def test_meta_save_and_load_roundtrip(self):
810
+ import tempfile
811
+ with tempfile.TemporaryDirectory() as tmpdir:
812
+ mem_path = os.path.join(tmpdir, "agent.json")
813
+ meta = {"sessions_produced": 5, "citations_seen": True, "format_version": 1}
814
+ ContinuityManager.save_meta(meta, mem_path)
815
+ loaded = ContinuityManager.load_meta(mem_path)
816
+ assert loaded == meta
817
+
818
+ def test_meta_path_follows_sidecar_pattern(self):
819
+ path = ContinuityManager.meta_path("/tmp/agent.json")
820
+ assert path == "/tmp/agent.continuity.meta.json"
821
+
822
+ def test_corrupt_meta_returns_defaults(self):
823
+ import tempfile
824
+ with tempfile.TemporaryDirectory() as tmpdir:
825
+ mem_path = os.path.join(tmpdir, "agent.json")
826
+ meta_path = ContinuityManager.meta_path(mem_path)
827
+ with open(meta_path, "w") as f:
828
+ f.write("NOT JSON")
829
+ meta = ContinuityManager.load_meta(mem_path)
830
+ assert meta["sessions_produced"] == 0
831
+
832
+
833
+ class TestFailSafeSunset:
834
+ """Tests for citation requirement enforcement after first successful citation."""
835
+
836
+ def test_bare_graduation_passes_before_sunset(self):
837
+ """Before citations_seen, bare graduations (no [evidence:]) pass through."""
838
+ text = (
839
+ "## Patterns\n"
840
+ "thought: pattern | 2x (2026-03-30)\n"
841
+ "## Decisions\n"
842
+ )
843
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
844
+ text, {"abc12345"}, today="2026-03-30", citations_seen=False
845
+ )
846
+ assert demoted == 0
847
+ assert "| 2x" in result_text
848
+ assert "needs-evidence" not in result_text
849
+
850
+ def test_bare_graduation_demoted_after_sunset(self):
851
+ """After citations_seen, bare graduations are demoted with (needs-evidence)."""
852
+ text = (
853
+ "## Patterns\n"
854
+ "thought: pattern | 2x (2026-03-30)\n"
855
+ "## Decisions\n"
856
+ )
857
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
858
+ text, {"abc12345"}, today="2026-03-30", citations_seen=True
859
+ )
860
+ assert demoted == 1
861
+ assert "| 1x" in result_text
862
+ assert "(needs-evidence)" in result_text
863
+
864
+ def test_bare_3x_demoted_to_2x_after_sunset(self):
865
+ text = (
866
+ "## Patterns\n"
867
+ "thought: pattern | 3x (2026-03-30)\n"
868
+ "## Decisions\n"
869
+ )
870
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
871
+ text, {"abc12345"}, today="2026-03-30", citations_seen=True
872
+ )
873
+ assert demoted == 1
874
+ assert "| 2x" in result_text
875
+ assert "(needs-evidence)" in result_text
876
+
877
+ def test_old_date_bare_graduation_unaffected_by_sunset(self):
878
+ """Carried-forward bare graduations from old sessions are not demoted."""
879
+ text = (
880
+ "## Patterns\n"
881
+ "thought: old pattern | 2x (2026-03-28)\n"
882
+ "## Decisions\n"
883
+ )
884
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
885
+ text, {"abc12345"}, today="2026-03-30", citations_seen=True
886
+ )
887
+ assert demoted == 0
888
+ assert "| 2x" in result_text
889
+
890
+ def test_cited_graduation_still_passes_after_sunset(self):
891
+ """Properly cited graduations pass regardless of sunset state."""
892
+ text = (
893
+ "## Patterns\n"
894
+ "thought: pattern | 2x (2026-03-30) [evidence: abc12345]\n"
895
+ "## Decisions\n"
896
+ )
897
+ result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
898
+ text, {"abc12345"}, today="2026-03-30", citations_seen=True
899
+ )
900
+ assert validated == 1
901
+ assert demoted == 0
902
+
903
+ def test_no_reuse(self):
904
+ text = (
905
+ "## Patterns\n"
906
+ "thought: pattern A | 2x (2026-03-30) [evidence: abc12345]\n"
907
+ "thought: pattern B | 2x (2026-03-30) [evidence: def67890]\n"
908
+ "## Decisions\n"
909
+ )
910
+ _text, _v, _d, reuse_max = ContinuityManager._validate_graduations(
911
+ text, {"abc12345", "def67890"}, today="2026-03-30"
912
+ )
913
+ assert reuse_max == 1
914
+
915
+ def test_reuse_in_produce_result(self):
916
+ """citation_reuse_max flows through to ContinuityResult."""
917
+ import datetime
918
+ today = datetime.date.today().isoformat()
919
+
920
+ response = f"""# Test — Memory (v1)
921
+
922
+ ## State
923
+ Testing.
924
+
925
+ ## Patterns
926
+ thought: A | 2x ({today}) [evidence: 50d7c6fd]
927
+ thought: B | 2x ({today}) [evidence: 50d7c6fd]
928
+ thought: C | 2x ({today}) [evidence: 50d7c6fd]
929
+
930
+ ## Decisions
931
+ None.
932
+
933
+ ## Context
934
+ Testing citation reuse."""
935
+
936
+ mgr = ContinuityManager(llm=_make_mock_llm(response))
937
+ mem = _make_session_memory()
938
+ result = mgr.produce(mem)
939
+ assert result.citation_reuse_max == 3
@@ -393,6 +393,14 @@ class TestSessionWrap:
393
393
  assert result["nodes_before"] == 1
394
394
  assert result["nodes_after"] >= 0 # may prune if dormant
395
395
 
396
+ def test_wrap_continuity_disabled(self):
397
+ """session_wrap without continuity manager reports disabled."""
398
+ handler, umem = _make_handler()
399
+ umem.memory.session_start()
400
+ result = handler.handle_tool("session_wrap", {})
401
+ assert result["continuity"]["produced"] is False
402
+ assert result["continuity"]["reason"] == "disabled"
403
+
396
404
 
397
405
  class TestAutoConfiguration:
398
406
  """Tests for OPENAI_API_KEY auto-detection logic."""
@@ -549,8 +557,9 @@ class TestSessionWrapWithContinuity:
549
557
  assert "error" not in result
550
558
  assert "nodes_before" in result
551
559
  assert result["saved"] is True
552
- # No continuity metadata since it failed
553
- assert "continuity" not in result
560
+ # Continuity key present but indicates failure
561
+ assert result["continuity"]["produced"] is False
562
+ assert result["continuity"]["reason"] == "error"
554
563
 
555
564
 
556
565
  class TestVersionNegotiation:
@@ -692,6 +701,16 @@ class TestThinkingModes:
692
701
  handler.handle_tool("think_breakthrough", {"problem": "Scaling architecture"})
693
702
  assert umem.memory.size == nodes_before
694
703
 
704
+ def test_thinking_tool_descriptions_say_call_add_memory(self):
705
+ """Regression guard: descriptions must tell agents to call add_memory."""
706
+ from flowscript_agents.mcp import TOOLS
707
+ thinking_tools = [t for t in TOOLS if t["name"].startswith("think_")]
708
+ assert len(thinking_tools) == 3
709
+ for tool in thinking_tools:
710
+ assert "add_memory" in tool["description"], (
711
+ f"{tool['name']} description must mention add_memory"
712
+ )
713
+
695
714
 
696
715
  class TestDescriptionIntegrity:
697
716
  """Tests for the three-layer MCP description integrity system."""