flowscript-agents 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/PKG-INFO +1 -1
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/__init__.py +1 -1
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/continuity.py +272 -4
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/mcp.py +25 -6
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/tool-integrity.json +3 -3
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/pyproject.toml +1 -1
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_continuity.py +448 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_mcp.py +21 -2
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/.github/workflows/test.yml +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/.gitignore +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/AUDIT_TRAIL_DESIGN.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/README.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/adapters.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/api-reference.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/audit-trail.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/brand/logo-512.png +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/brand/social-preview.png +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/flowscript-demo.png +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/docs/lifecycle.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/examples/CLAUDE.md.example +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/examples/langgraph_live_test.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/examples/temporal_e2e_test.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/audit.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/camel_ai.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/client.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/cloud.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/crewai.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/__init__.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/_utils.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/consolidate.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/extract.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/index.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/providers.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/search.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/explain.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/fixpoint.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/google_adk.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/haystack.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/langgraph.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/llamaindex.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/memory.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/openai_agents.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/pydantic_ai.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/query.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/smolagents.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/types.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/unified.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/guides/recommended_claude_md.md +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/scripts/validate_dedup_threshold.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/conftest.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_audit.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_camel_ai.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_client.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_cloud.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_cloud_fixpoint.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_consolidation.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_crewai.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_embeddings.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_explain.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_fixpoint.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_google_adk.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_haystack.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_integration_continuity.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_langgraph.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_llamaindex.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_memory.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_openai_agents.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_pydantic_ai.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_smolagents.py +0 -0
- {flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/tests/test_temporal.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowscript-agents
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI.
|
|
5
5
|
Project-URL: Homepage, https://flowscript.org
|
|
6
6
|
Project-URL: Repository, https://github.com/phillipclapham/flowscript-agents
|
|
@@ -39,6 +39,16 @@ def _log(msg: str) -> None:
|
|
|
39
39
|
sys.stderr.flush()
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
# Matches graduated patterns (2x or 3x) with [evidence: <id> "explanation"] citations.
|
|
43
|
+
# Captures: (1) level, (2) date, (3) cited IDs, (4) optional explanation in quotes.
|
|
44
|
+
# Used by _validate_graduations to verify citations against actual session nodes.
|
|
45
|
+
_GRADUATION_RE = re.compile(
|
|
46
|
+
r'\|\s*([23])x\s*\((\d{4}-\d{2}-\d{2})\)\s*\[evidence:\s*'
|
|
47
|
+
r'([a-fA-F0-9][a-fA-F0-9, ]*)' # one or more hex IDs
|
|
48
|
+
r'(?:\s+"([^"]*)")?\s*\]' # optional quoted explanation
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
42
52
|
# =============================================================================
|
|
43
53
|
# Result types
|
|
44
54
|
# =============================================================================
|
|
@@ -54,6 +64,9 @@ class ContinuityResult:
|
|
|
54
64
|
truncated: bool # whether LLM output exceeded max_chars
|
|
55
65
|
session_nodes_count: int # how many nodes were in this session
|
|
56
66
|
patterns_extracted: int # estimated from output (best-effort)
|
|
67
|
+
graduations_validated: int = 0 # citations that checked out
|
|
68
|
+
graduations_demoted: int = 0 # citations that failed → demoted
|
|
69
|
+
citation_reuse_max: int = 0 # max times any single node was cited (>2 = suspicious)
|
|
57
70
|
|
|
58
71
|
|
|
59
72
|
# =============================================================================
|
|
@@ -133,8 +146,21 @@ This is where learning happens. Use these markers for density:
|
|
|
133
146
|
**Temporal graduation (CRITICAL — this is what makes the system learn):**
|
|
134
147
|
- Mark each pattern with `| Nx (date)` where N = validation count, date = last validated
|
|
135
148
|
- New observation from THIS session not in existing patterns → add at `| 1x ({today})`
|
|
136
|
-
- Observation that VALIDATES an existing 1x pattern → increment to
|
|
137
|
-
|
|
149
|
+
- Observation that VALIDATES an existing 1x pattern → increment to:
|
|
150
|
+
`| 2x ({today}) [evidence: <node_id> "brief explanation of how node validates pattern"]`
|
|
151
|
+
where `<node_id>` is the 8-char ID prefix (e.g., `abc12345`) from the session data above.
|
|
152
|
+
The explanation MUST reference specific content from the cited node.
|
|
153
|
+
- Observation that VALIDATES an existing 2x pattern → graduate to:
|
|
154
|
+
`| 3x ({today}) [evidence: <node_id> "explanation"]`
|
|
155
|
+
- Evidence citations with explanations are REQUIRED for all graduations (2x and 3x).
|
|
156
|
+
Cite the specific session node AND explain how it validates the pattern.
|
|
157
|
+
Without a valid citation, the graduation will be rejected.
|
|
158
|
+
- For patterns you are NOT graduating (carrying forward at the same level), drop the
|
|
159
|
+
`[evidence:]` tag — evidence only appears on the graduation that created it.
|
|
160
|
+
- Patterns marked `(ungrounded)` were demoted in a previous session due to invalid evidence.
|
|
161
|
+
They need FRESH validating evidence from THIS session to be re-graduated. Do not re-graduate
|
|
162
|
+
without new evidence — remove the `(ungrounded)` marker only when providing a valid citation.
|
|
163
|
+
Patterns marked `(ungrounded)` that you cannot provide fresh evidence for should be removed.
|
|
138
164
|
- Patterns at 3x: extract the PRINCIPLE underneath, not the surface observations.
|
|
139
165
|
Multiple related observations → single meta-pattern. This is compression-as-cognition.
|
|
140
166
|
- Patterns with dates older than 7 days and no new validation → remove (they're stale)
|
|
@@ -144,7 +170,7 @@ Group related patterns in FlowScript blocks: `{{topic: ... }}`
|
|
|
144
170
|
**Example Patterns section:**
|
|
145
171
|
```
|
|
146
172
|
{{database_architecture:
|
|
147
|
-
thought: ACID compliance outweighs raw speed
|
|
173
|
+
thought: ACID compliance outweighs raw speed | 2x (2026-03-30) [evidence: 4931b6a8 "PostgreSQL chosen for ACID compliance"]
|
|
148
174
|
thought: connection pooling is the real performance bottleneck | 1x (2026-03-30)
|
|
149
175
|
? horizontal scaling strategy ><[single-writer vs multi-writer] | 1x (2026-03-29)
|
|
150
176
|
}}
|
|
@@ -304,6 +330,7 @@ class ContinuityManager:
|
|
|
304
330
|
self,
|
|
305
331
|
memory: Any,
|
|
306
332
|
existing_continuity: str | None = None,
|
|
333
|
+
citations_seen: bool = False,
|
|
307
334
|
) -> ContinuityResult:
|
|
308
335
|
"""Produce a compressed continuity file from session memory.
|
|
309
336
|
|
|
@@ -311,6 +338,7 @@ class ContinuityManager:
|
|
|
311
338
|
memory: A Memory instance containing the session's nodes.
|
|
312
339
|
existing_continuity: The current continuity file text (if any).
|
|
313
340
|
Pass None for first session.
|
|
341
|
+
citations_seen: If True, enforces citation requirement (fail-safe sunset).
|
|
314
342
|
|
|
315
343
|
Returns:
|
|
316
344
|
ContinuityResult with the compressed continuity text and metadata.
|
|
@@ -325,7 +353,8 @@ class ContinuityManager:
|
|
|
325
353
|
temporal_map = dict(memory._temporal_map)
|
|
326
354
|
|
|
327
355
|
return self.produce_from_nodes(
|
|
328
|
-
nodes, relationships, states, existing_continuity, temporal_map
|
|
356
|
+
nodes, relationships, states, existing_continuity, temporal_map,
|
|
357
|
+
citations_seen=citations_seen,
|
|
329
358
|
)
|
|
330
359
|
|
|
331
360
|
def produce_from_nodes(
|
|
@@ -335,12 +364,20 @@ class ContinuityManager:
|
|
|
335
364
|
states: list[Any],
|
|
336
365
|
existing_continuity: str | None = None,
|
|
337
366
|
temporal_map: dict[str, Any] | None = None,
|
|
367
|
+
citations_seen: bool = False,
|
|
338
368
|
) -> ContinuityResult:
|
|
339
369
|
"""Produce continuity from raw node lists (alternative to Memory instance).
|
|
340
370
|
|
|
341
371
|
Useful when you have nodes but not a full Memory object, e.g.,
|
|
342
372
|
from a filtered set or from deserialized data.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
citations_seen: If True, enforces citation requirement on all today's
|
|
376
|
+
graduations. Set from metadata after first successful citation.
|
|
343
377
|
"""
|
|
378
|
+
import datetime
|
|
379
|
+
today = datetime.date.today().isoformat()
|
|
380
|
+
|
|
344
381
|
session_summary = _format_session_nodes(
|
|
345
382
|
nodes, relationships, states, temporal_map
|
|
346
383
|
)
|
|
@@ -350,6 +387,7 @@ class ContinuityManager:
|
|
|
350
387
|
existing_continuity=existing_continuity,
|
|
351
388
|
project_name=self._project_name,
|
|
352
389
|
max_chars=self._max_chars,
|
|
390
|
+
today=today,
|
|
353
391
|
)
|
|
354
392
|
|
|
355
393
|
_log(f"Producing continuity ({len(nodes)} nodes, max {self._max_chars} chars)")
|
|
@@ -374,6 +412,25 @@ class ContinuityManager:
|
|
|
374
412
|
# (first session, something is better than nothing)
|
|
375
413
|
_log("WARNING: No existing continuity to fall back to — using LLM output as-is")
|
|
376
414
|
|
|
415
|
+
# Validate graduation citations against actual session nodes.
|
|
416
|
+
# Only checks citations from today (carried-forward patterns are trusted).
|
|
417
|
+
valid_ids = {n.id[:8].lower() for n in nodes}
|
|
418
|
+
node_content_map = {n.id[:8].lower(): n.content for n in nodes}
|
|
419
|
+
text, grad_validated, grad_demoted, reuse_max = self._validate_graduations(
|
|
420
|
+
text, valid_ids, today=today, node_content_map=node_content_map,
|
|
421
|
+
citations_seen=citations_seen,
|
|
422
|
+
)
|
|
423
|
+
if grad_demoted:
|
|
424
|
+
_log(
|
|
425
|
+
f"Graduation validation: {grad_validated} validated, "
|
|
426
|
+
f"{grad_demoted} demoted (ungrounded)"
|
|
427
|
+
)
|
|
428
|
+
if reuse_max > 2:
|
|
429
|
+
_log(
|
|
430
|
+
f"Graduation warning: single node cited {reuse_max} times "
|
|
431
|
+
f"(possible citation gaming)"
|
|
432
|
+
)
|
|
433
|
+
|
|
377
434
|
truncated = False
|
|
378
435
|
if len(text) > self._max_chars:
|
|
379
436
|
truncated = True
|
|
@@ -390,6 +447,9 @@ class ContinuityManager:
|
|
|
390
447
|
truncated=truncated,
|
|
391
448
|
session_nodes_count=len(nodes),
|
|
392
449
|
patterns_extracted=patterns_extracted,
|
|
450
|
+
graduations_validated=grad_validated,
|
|
451
|
+
graduations_demoted=grad_demoted,
|
|
452
|
+
citation_reuse_max=reuse_max,
|
|
393
453
|
)
|
|
394
454
|
|
|
395
455
|
# -- File I/O --
|
|
@@ -404,6 +464,54 @@ class ContinuityManager:
|
|
|
404
464
|
p = Path(memory_path)
|
|
405
465
|
return str(p.parent / f"{p.stem}.continuity.md")
|
|
406
466
|
|
|
467
|
+
@staticmethod
|
|
468
|
+
def meta_path(memory_path: str) -> str:
|
|
469
|
+
"""Get the metadata sidecar path. ./agent.json → ./agent.continuity.meta.json"""
|
|
470
|
+
p = Path(memory_path)
|
|
471
|
+
return str(p.parent / f"{p.stem}.continuity.meta.json")
|
|
472
|
+
|
|
473
|
+
@staticmethod
|
|
474
|
+
def load_meta(memory_path: str) -> dict:
|
|
475
|
+
"""Load continuity metadata from the JSON sidecar.
|
|
476
|
+
|
|
477
|
+
Returns a dict with keys: sessions_produced, citations_seen, format_version.
|
|
478
|
+
Returns defaults if the file doesn't exist.
|
|
479
|
+
"""
|
|
480
|
+
import json
|
|
481
|
+
path = ContinuityManager.meta_path(memory_path)
|
|
482
|
+
defaults = {"sessions_produced": 0, "citations_seen": False, "format_version": 1}
|
|
483
|
+
if not os.path.exists(path):
|
|
484
|
+
return defaults
|
|
485
|
+
try:
|
|
486
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
487
|
+
data = json.load(f)
|
|
488
|
+
# Merge with defaults for forward compatibility
|
|
489
|
+
return {**defaults, **data}
|
|
490
|
+
except (json.JSONDecodeError, OSError):
|
|
491
|
+
_log(f"WARNING: corrupt continuity meta at {path} — using defaults")
|
|
492
|
+
return defaults
|
|
493
|
+
|
|
494
|
+
@staticmethod
|
|
495
|
+
def save_meta(meta: dict, memory_path: str) -> str:
|
|
496
|
+
"""Save continuity metadata to the JSON sidecar. Atomic write."""
|
|
497
|
+
import json
|
|
498
|
+
path = ContinuityManager.meta_path(memory_path)
|
|
499
|
+
tmp_path = path + ".tmp"
|
|
500
|
+
try:
|
|
501
|
+
with open(tmp_path, "w", encoding="utf-8") as f:
|
|
502
|
+
json.dump(meta, f, indent=2, sort_keys=True)
|
|
503
|
+
f.write("\n")
|
|
504
|
+
f.flush()
|
|
505
|
+
os.fsync(f.fileno())
|
|
506
|
+
os.replace(tmp_path, path)
|
|
507
|
+
except Exception:
|
|
508
|
+
try:
|
|
509
|
+
os.unlink(tmp_path)
|
|
510
|
+
except OSError:
|
|
511
|
+
pass
|
|
512
|
+
raise
|
|
513
|
+
return path
|
|
514
|
+
|
|
407
515
|
# -- Validation --
|
|
408
516
|
|
|
409
517
|
_REQUIRED_SECTIONS = {"state", "patterns", "decisions", "context"}
|
|
@@ -424,6 +532,166 @@ class ContinuityManager:
|
|
|
424
532
|
found.add(section)
|
|
425
533
|
return found == cls._REQUIRED_SECTIONS
|
|
426
534
|
|
|
535
|
+
# Minimum meaningful words for explanation-to-node content overlap check.
|
|
536
|
+
# Short/common words are excluded to avoid false positives.
|
|
537
|
+
_STOP_WORDS = frozenset(
|
|
538
|
+
"a an the is are was were be been being have has had do does did "
|
|
539
|
+
"will would shall should may might can could this that these those "
|
|
540
|
+
"it its he she they we you i me my our his her their in on at to "
|
|
541
|
+
"for of by with from and or but not no nor so if as".split()
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# Matches bare graduations (2x or 3x) WITHOUT [evidence:] tags.
|
|
545
|
+
# Used to enforce citation requirement after fail-safe sunset.
|
|
546
|
+
_BARE_GRADUATION_RE = re.compile(
|
|
547
|
+
r"\|\s*([23])x\s*\((\d{4}-\d{2}-\d{2})\)\s*(?!\[evidence:)"
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
@staticmethod
|
|
551
|
+
def _validate_graduations(
|
|
552
|
+
text: str,
|
|
553
|
+
valid_ids: set[str],
|
|
554
|
+
today: str | None = None,
|
|
555
|
+
node_content_map: dict[str, str] | None = None,
|
|
556
|
+
citations_seen: bool = False,
|
|
557
|
+
) -> tuple[str, int, int, int]:
|
|
558
|
+
"""Validate evidence citations on graduated patterns.
|
|
559
|
+
|
|
560
|
+
Scans the ## Patterns section for 2x/3x lines with [evidence: <id> "explanation"].
|
|
561
|
+
Only validates citations whose date matches today (newly graduated this
|
|
562
|
+
session). Carried-forward patterns from previous sessions pass through
|
|
563
|
+
unchanged — their evidence was valid when originally graduated.
|
|
564
|
+
|
|
565
|
+
Validation checks (all must pass for a citation to be accepted):
|
|
566
|
+
1. At least one cited ID exists in the current session's node set
|
|
567
|
+
2. If an explanation is provided and node_content_map is available,
|
|
568
|
+
the explanation must reference actual content from the cited node
|
|
569
|
+
(word overlap check — prevents citation of irrelevant nodes)
|
|
570
|
+
|
|
571
|
+
If validation fails, demotes the graduation (3x→2x, 2x→1x).
|
|
572
|
+
|
|
573
|
+
Fail-safe sunset: when citations_seen=True, today's graduations WITHOUT
|
|
574
|
+
[evidence:] tags are also demoted. Before citations_seen, they pass through
|
|
575
|
+
(migration grace period). Once the LLM demonstrates citation ability, it
|
|
576
|
+
must always cite.
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
(possibly_modified_text, validated_count, demoted_count, citation_reuse_max)
|
|
580
|
+
"""
|
|
581
|
+
if today is None:
|
|
582
|
+
import datetime
|
|
583
|
+
today = datetime.date.today().isoformat()
|
|
584
|
+
|
|
585
|
+
lines = text.split("\n")
|
|
586
|
+
in_patterns = False
|
|
587
|
+
validated = 0
|
|
588
|
+
demoted = 0
|
|
589
|
+
citation_counts: dict[str, int] = {} # track per-node citation frequency
|
|
590
|
+
|
|
591
|
+
for i, line in enumerate(lines):
|
|
592
|
+
# Track section boundaries (substring match, consistent with _validate_structure)
|
|
593
|
+
if line.startswith("## "):
|
|
594
|
+
in_patterns = "pattern" in line.lower()
|
|
595
|
+
continue
|
|
596
|
+
if not in_patterns:
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
match = _GRADUATION_RE.search(line)
|
|
600
|
+
if match:
|
|
601
|
+
level = int(match.group(1)) # 2 or 3
|
|
602
|
+
date_str = match.group(2) # YYYY-MM-DD
|
|
603
|
+
cited_raw = match.group(3)
|
|
604
|
+
explanation = match.group(4) # may be None if no quotes
|
|
605
|
+
|
|
606
|
+
# Only validate citations from THIS session (today's date).
|
|
607
|
+
# Carried-forward patterns retain their evidence unchecked.
|
|
608
|
+
if date_str != today:
|
|
609
|
+
continue
|
|
610
|
+
|
|
611
|
+
# Normalize cited IDs: lowercase, truncate to 8 chars, filter empties
|
|
612
|
+
cited_ids = {
|
|
613
|
+
cid.strip().lower()[:8]
|
|
614
|
+
for cid in re.split(r"[,\s]+", cited_raw)
|
|
615
|
+
if cid.strip()
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
# Track citation frequency
|
|
619
|
+
for cid in cited_ids & valid_ids:
|
|
620
|
+
citation_counts[cid] = citation_counts.get(cid, 0) + 1
|
|
621
|
+
|
|
622
|
+
# Check 1: at least one cited ID exists in session nodes
|
|
623
|
+
ids_valid = bool(cited_ids & valid_ids)
|
|
624
|
+
|
|
625
|
+
# Check 2: explanation references cited node content (if available)
|
|
626
|
+
explanation_valid = True
|
|
627
|
+
if ids_valid and explanation and node_content_map:
|
|
628
|
+
matched_id = next(iter(cited_ids & valid_ids))
|
|
629
|
+
node_content = node_content_map.get(matched_id, "")
|
|
630
|
+
if node_content:
|
|
631
|
+
explanation_valid = ContinuityManager._check_explanation_overlap(
|
|
632
|
+
explanation, node_content
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
if ids_valid and explanation_valid:
|
|
636
|
+
validated += 1
|
|
637
|
+
else:
|
|
638
|
+
demoted += 1
|
|
639
|
+
demoted_level = level - 1
|
|
640
|
+
old_marker = match.group(0)
|
|
641
|
+
new_marker = old_marker.replace(
|
|
642
|
+
f"| {level}x", f"| {demoted_level}x"
|
|
643
|
+
)
|
|
644
|
+
new_marker = re.sub(
|
|
645
|
+
r'\[evidence:\s*[a-fA-F0-9][a-fA-F0-9, ]*(?:\s+"[^"]*")?\s*\]',
|
|
646
|
+
"(ungrounded)", new_marker
|
|
647
|
+
)
|
|
648
|
+
lines[i] = line.replace(old_marker, new_marker)
|
|
649
|
+
continue
|
|
650
|
+
|
|
651
|
+
# Fail-safe sunset: once the LLM has demonstrated citation ability,
|
|
652
|
+
# today's graduations WITHOUT [evidence:] are demoted.
|
|
653
|
+
if not citations_seen:
|
|
654
|
+
continue
|
|
655
|
+
|
|
656
|
+
bare_match = ContinuityManager._BARE_GRADUATION_RE.search(line)
|
|
657
|
+
if not bare_match:
|
|
658
|
+
continue
|
|
659
|
+
|
|
660
|
+
bare_level = int(bare_match.group(1))
|
|
661
|
+
bare_date = bare_match.group(2)
|
|
662
|
+
if bare_date != today:
|
|
663
|
+
continue
|
|
664
|
+
|
|
665
|
+
demoted += 1
|
|
666
|
+
demoted_level = bare_level - 1
|
|
667
|
+
old_marker = bare_match.group(0)
|
|
668
|
+
new_marker = old_marker.replace(
|
|
669
|
+
f"| {bare_level}x", f"| {demoted_level}x"
|
|
670
|
+
)
|
|
671
|
+
lines[i] = line.replace(old_marker, new_marker + " (needs-evidence)")
|
|
672
|
+
|
|
673
|
+
reuse_max = max(citation_counts.values()) if citation_counts else 0
|
|
674
|
+
return "\n".join(lines), validated, demoted, reuse_max
|
|
675
|
+
|
|
676
|
+
@classmethod
|
|
677
|
+
def _check_explanation_overlap(cls, explanation: str, node_content: str) -> bool:
|
|
678
|
+
"""Check if an explanation references actual content from the cited node.
|
|
679
|
+
|
|
680
|
+
Uses word overlap (excluding stop words). At least one meaningful word
|
|
681
|
+
from the explanation must appear in the node content. This prevents
|
|
682
|
+
generic explanations like "confirms pattern" while allowing legitimate
|
|
683
|
+
paraphrasing.
|
|
684
|
+
"""
|
|
685
|
+
def meaningful_words(text: str) -> set[str]:
|
|
686
|
+
return {
|
|
687
|
+
w for w in re.split(r"[^a-zA-Z0-9]+", text.lower())
|
|
688
|
+
if len(w) > 2 and w not in cls._STOP_WORDS
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
explanation_words = meaningful_words(explanation)
|
|
692
|
+
node_words = meaningful_words(node_content)
|
|
693
|
+
return bool(explanation_words & node_words)
|
|
694
|
+
|
|
427
695
|
# -- File I/O --
|
|
428
696
|
|
|
429
697
|
def save(self, text: str, memory_path: str) -> str:
|
|
@@ -579,8 +579,8 @@ _TOOL_DEFS_RAW = [
|
|
|
579
579
|
"returned framework to analyze your problem thoroughly — deconstruct to "
|
|
580
580
|
"fundamentals, trace consequences across multiple orders, verify "
|
|
581
581
|
"assumptions explicitly, and hold contradictions without premature "
|
|
582
|
-
"resolution.
|
|
583
|
-
"
|
|
582
|
+
"resolution. After analysis, call add_memory to save key insights — "
|
|
583
|
+
"without this, your analysis is lost between sessions."
|
|
584
584
|
),
|
|
585
585
|
"inputSchema": {
|
|
586
586
|
"type": "object",
|
|
@@ -607,7 +607,8 @@ _TOOL_DEFS_RAW = [
|
|
|
607
607
|
"fundamentally different angle. Returns a creative exploration framework. "
|
|
608
608
|
"After calling, challenge every assumption — what constraints are real vs "
|
|
609
609
|
"inherited? What would the opposite approach look like? What patterns from "
|
|
610
|
-
"unrelated domains apply?
|
|
610
|
+
"unrelated domains apply? After exploration, call add_memory to save "
|
|
611
|
+
"breakthrough insights — without this, your exploration is lost between sessions."
|
|
611
612
|
),
|
|
612
613
|
"inputSchema": {
|
|
613
614
|
"type": "object",
|
|
@@ -634,8 +635,9 @@ _TOOL_DEFS_RAW = [
|
|
|
634
635
|
"with assumption-breaking for a two-pronged attack: systematic depth AND "
|
|
635
636
|
"lateral thinking simultaneously. Use when the problem requires both "
|
|
636
637
|
"understanding WHY current approaches fail AND imagining fundamentally "
|
|
637
|
-
"different solutions. Returns a comprehensive framework.
|
|
638
|
-
"
|
|
638
|
+
"different solutions. Returns a comprehensive framework. After analysis, "
|
|
639
|
+
"call add_memory to save key findings — without this, your analysis is "
|
|
640
|
+
"lost between sessions."
|
|
639
641
|
),
|
|
640
642
|
"inputSchema": {
|
|
641
643
|
"type": "object",
|
|
@@ -921,12 +923,19 @@ class MCPHandler:
|
|
|
921
923
|
continuity_result = None
|
|
922
924
|
if self._continuity_mgr and self._memory_path:
|
|
923
925
|
try:
|
|
926
|
+
meta = ContinuityManager.load_meta(self._memory_path)
|
|
924
927
|
existing = ContinuityManager.load(self._memory_path)
|
|
925
928
|
continuity_result = self._continuity_mgr.produce(
|
|
926
929
|
self._umem.memory,
|
|
927
930
|
existing_continuity=existing,
|
|
931
|
+
citations_seen=meta.get("citations_seen", False),
|
|
928
932
|
)
|
|
929
933
|
self._continuity_mgr.save(continuity_result.text, self._memory_path)
|
|
934
|
+
# Update metadata
|
|
935
|
+
meta["sessions_produced"] = meta.get("sessions_produced", 0) + 1
|
|
936
|
+
if continuity_result.graduations_validated > 0:
|
|
937
|
+
meta["citations_seen"] = True
|
|
938
|
+
ContinuityManager.save_meta(meta, self._memory_path)
|
|
930
939
|
except Exception as e:
|
|
931
940
|
_log(f"Continuity production failed: {e}")
|
|
932
941
|
# Non-fatal — session_wrap still proceeds
|
|
@@ -949,7 +958,7 @@ class MCPHandler:
|
|
|
949
958
|
"path": result.path,
|
|
950
959
|
}
|
|
951
960
|
|
|
952
|
-
#
|
|
961
|
+
# Always include continuity key so callers can distinguish disabled/error/success.
|
|
953
962
|
if continuity_result:
|
|
954
963
|
response["continuity"] = {
|
|
955
964
|
"produced": True,
|
|
@@ -959,6 +968,10 @@ class MCPHandler:
|
|
|
959
968
|
"truncated": continuity_result.truncated,
|
|
960
969
|
"path": ContinuityManager.continuity_path(self._memory_path),
|
|
961
970
|
}
|
|
971
|
+
elif self._continuity_mgr:
|
|
972
|
+
response["continuity"] = {"produced": False, "reason": "error"}
|
|
973
|
+
else:
|
|
974
|
+
response["continuity"] = {"produced": False, "reason": "disabled"}
|
|
962
975
|
|
|
963
976
|
return response
|
|
964
977
|
|
|
@@ -1647,11 +1660,17 @@ def run_server(
|
|
|
1647
1660
|
current_nodes = umem.memory.size
|
|
1648
1661
|
if current_nodes > _last_node_count[0]:
|
|
1649
1662
|
try:
|
|
1663
|
+
meta = ContinuityManager.load_meta(memory_path)
|
|
1650
1664
|
existing = ContinuityManager.load(memory_path)
|
|
1651
1665
|
cont_result = continuity_mgr.produce(
|
|
1652
1666
|
umem.memory, existing_continuity=existing,
|
|
1667
|
+
citations_seen=meta.get("citations_seen", False),
|
|
1653
1668
|
)
|
|
1654
1669
|
continuity_mgr.save(cont_result.text, memory_path)
|
|
1670
|
+
meta["sessions_produced"] = meta.get("sessions_produced", 0) + 1
|
|
1671
|
+
if cont_result.graduations_validated > 0:
|
|
1672
|
+
meta["citations_seen"] = True
|
|
1673
|
+
ContinuityManager.save_meta(meta, memory_path)
|
|
1655
1674
|
_continuity_produced[0] = True
|
|
1656
1675
|
_last_node_count[0] = current_nodes
|
|
1657
1676
|
_log(f"Auto-wrap: continuity produced ({cont_result.char_count} chars)")
|
|
@@ -14,8 +14,8 @@
|
|
|
14
14
|
"remove_memory": "ee604c8f87855e32b4509162048168d0c941da79339f907d7d921a55780de830",
|
|
15
15
|
"search_memory": "7e91e30bc03b5a2c990b83a33c00cf512c5c7c2a2e204c546206ffe606010064",
|
|
16
16
|
"session_wrap": "ea1e2b2048ef4854de595601105375cfda91856a11851300e864c1e5358894b4",
|
|
17
|
-
"think_breakthrough": "
|
|
18
|
-
"think_creative": "
|
|
19
|
-
"think_deeper": "
|
|
17
|
+
"think_breakthrough": "2e2b86d8e4d1c10c80cd9dcc0e55a5ec5642f0633e4e42bd8bf5dfd87c59b4c3",
|
|
18
|
+
"think_creative": "f648d9dd59e4c1901fd532d92568b3f102dc130b5f6383bfad64c00afce2ca0a",
|
|
19
|
+
"think_deeper": "4e190d9c344323be20fea2243830732247ec580b34fd936a837c082f0d6d8b76",
|
|
20
20
|
"verify_audit": "2e93d3118ebeed1a1113e423ec915b8dd987c5d2c4adf6fefcd93fa0c931483f"
|
|
21
21
|
}
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "flowscript-agents"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.1"
|
|
8
8
|
description = "Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -489,3 +489,451 @@ class TestTruncation:
|
|
|
489
489
|
assert result.text.startswith("# Agent")
|
|
490
490
|
# Should have at least State section
|
|
491
491
|
assert "## State" in result.text
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class TestGraduationValidation:
|
|
495
|
+
"""Tests for graph-grounded graduation — anti-semantic-inbreeding defense."""
|
|
496
|
+
|
|
497
|
+
def test_valid_citation_kept(self):
|
|
498
|
+
text = (
|
|
499
|
+
"## Patterns\n"
|
|
500
|
+
"thought: caching helps | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
501
|
+
"## Decisions\n"
|
|
502
|
+
)
|
|
503
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
504
|
+
text, {"abc12345", "def67890"}, today="2026-03-30"
|
|
505
|
+
)
|
|
506
|
+
assert "| 2x" in result_text
|
|
507
|
+
assert "ungrounded" not in result_text
|
|
508
|
+
assert validated == 1
|
|
509
|
+
assert demoted == 0
|
|
510
|
+
|
|
511
|
+
def test_invalid_citation_demoted(self):
|
|
512
|
+
text = (
|
|
513
|
+
"## Patterns\n"
|
|
514
|
+
"thought: caching helps | 2x (2026-03-30) [evidence: ffffffff]\n"
|
|
515
|
+
"## Decisions\n"
|
|
516
|
+
)
|
|
517
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
518
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
519
|
+
)
|
|
520
|
+
assert "| 1x" in result_text
|
|
521
|
+
assert "| 2x" not in result_text
|
|
522
|
+
assert "(ungrounded)" in result_text
|
|
523
|
+
assert validated == 0
|
|
524
|
+
assert demoted == 1
|
|
525
|
+
|
|
526
|
+
def test_3x_demoted_to_2x(self):
|
|
527
|
+
text = (
|
|
528
|
+
"## Patterns\n"
|
|
529
|
+
"thought: principle | 3x (2026-03-30) [evidence: badbadba]\n"
|
|
530
|
+
"## Decisions\n"
|
|
531
|
+
)
|
|
532
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
533
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
534
|
+
)
|
|
535
|
+
assert "| 2x" in result_text
|
|
536
|
+
assert "| 3x" not in result_text
|
|
537
|
+
assert "(ungrounded)" in result_text
|
|
538
|
+
assert demoted == 1
|
|
539
|
+
|
|
540
|
+
def test_no_citations_passthrough(self):
|
|
541
|
+
"""Old-format patterns without [evidence:] pass through unchanged."""
|
|
542
|
+
text = (
|
|
543
|
+
"## Patterns\n"
|
|
544
|
+
"thought: caching helps | 2x (2026-03-30)\n"
|
|
545
|
+
"thought: pooling matters | 3x (2026-03-30)\n"
|
|
546
|
+
"## Decisions\n"
|
|
547
|
+
)
|
|
548
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
549
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
550
|
+
)
|
|
551
|
+
assert result_text == text
|
|
552
|
+
assert validated == 0
|
|
553
|
+
assert demoted == 0
|
|
554
|
+
|
|
555
|
+
def test_mixed_valid_and_invalid(self):
|
|
556
|
+
text = (
|
|
557
|
+
"## Patterns\n"
|
|
558
|
+
"thought: good pattern | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
559
|
+
"thought: hallucinated | 2x (2026-03-30) [evidence: ffffffff]\n"
|
|
560
|
+
"## Decisions\n"
|
|
561
|
+
)
|
|
562
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
563
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
564
|
+
)
|
|
565
|
+
assert validated == 1
|
|
566
|
+
assert demoted == 1
|
|
567
|
+
# First pattern kept at 2x, second demoted to 1x
|
|
568
|
+
lines = result_text.split("\n")
|
|
569
|
+
assert "| 2x" in lines[1]
|
|
570
|
+
assert "| 1x" in lines[2]
|
|
571
|
+
assert "(ungrounded)" in lines[2]
|
|
572
|
+
|
|
573
|
+
def test_multiple_citations_one_valid_sufficient(self):
|
|
574
|
+
text = (
|
|
575
|
+
"## Patterns\n"
|
|
576
|
+
"thought: pattern | 2x (2026-03-30) [evidence: bad00000, abc12345]\n"
|
|
577
|
+
"## Decisions\n"
|
|
578
|
+
)
|
|
579
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
580
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
581
|
+
)
|
|
582
|
+
assert validated == 1
|
|
583
|
+
assert demoted == 0
|
|
584
|
+
assert "| 2x" in result_text
|
|
585
|
+
|
|
586
|
+
def test_1x_not_affected(self):
|
|
587
|
+
"""1x patterns are new observations — never checked for citations."""
|
|
588
|
+
text = (
|
|
589
|
+
"## Patterns\n"
|
|
590
|
+
"thought: new observation | 1x (2026-03-30)\n"
|
|
591
|
+
"thought: also new | 1x (2026-03-30) [evidence: ffffffff]\n"
|
|
592
|
+
"## Decisions\n"
|
|
593
|
+
)
|
|
594
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
595
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
596
|
+
)
|
|
597
|
+
# 1x lines are never matched by _GRADUATION_RE (only matches 2x/3x)
|
|
598
|
+
assert validated == 0
|
|
599
|
+
assert demoted == 0
|
|
600
|
+
|
|
601
|
+
def test_outside_patterns_section_ignored(self):
|
|
602
|
+
"""Citations in non-Patterns sections should not be validated."""
|
|
603
|
+
text = (
|
|
604
|
+
"## State\n"
|
|
605
|
+
"some state | 2x (2026-03-30) [evidence: ffffffff]\n"
|
|
606
|
+
"## Patterns\n"
|
|
607
|
+
"thought: real pattern | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
608
|
+
"## Decisions\n"
|
|
609
|
+
)
|
|
610
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
611
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
612
|
+
)
|
|
613
|
+
# Only the Patterns section line is checked
|
|
614
|
+
assert validated == 1
|
|
615
|
+
assert demoted == 0
|
|
616
|
+
# State section line unchanged (still has ffffffff)
|
|
617
|
+
assert "ffffffff" in result_text
|
|
618
|
+
|
|
619
|
+
def test_uppercase_citation_normalized(self):
|
|
620
|
+
"""LLMs may uppercase hex — citations should be case-insensitive."""
|
|
621
|
+
text = (
|
|
622
|
+
"## Patterns\n"
|
|
623
|
+
"thought: pattern | 2x (2026-03-30) [evidence: ABC12345]\n"
|
|
624
|
+
"## Decisions\n"
|
|
625
|
+
)
|
|
626
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
627
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
628
|
+
)
|
|
629
|
+
assert validated == 1
|
|
630
|
+
assert demoted == 0
|
|
631
|
+
|
|
632
|
+
def test_space_separated_citations(self):
|
|
633
|
+
"""LLMs might use spaces instead of commas between IDs."""
|
|
634
|
+
text = (
|
|
635
|
+
"## Patterns\n"
|
|
636
|
+
"thought: pattern | 2x (2026-03-30) [evidence: bad00000 abc12345]\n"
|
|
637
|
+
"## Decisions\n"
|
|
638
|
+
)
|
|
639
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
640
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
641
|
+
)
|
|
642
|
+
assert validated == 1
|
|
643
|
+
assert demoted == 0
|
|
644
|
+
|
|
645
|
+
def test_long_id_truncated_to_8_chars(self):
|
|
646
|
+
"""LLM might cite full 64-char ID — should be truncated to 8 for matching."""
|
|
647
|
+
text = (
|
|
648
|
+
"## Patterns\n"
|
|
649
|
+
"thought: pattern | 2x (2026-03-30) [evidence: abc12345ffffffffffffffff]\n"
|
|
650
|
+
"## Decisions\n"
|
|
651
|
+
)
|
|
652
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
653
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
654
|
+
)
|
|
655
|
+
assert validated == 1
|
|
656
|
+
assert demoted == 0
|
|
657
|
+
|
|
658
|
+
def test_carried_forward_evidence_not_demoted(self):
|
|
659
|
+
"""Patterns from previous sessions (old dates) should pass through unchanged."""
|
|
660
|
+
text = (
|
|
661
|
+
"## Patterns\n"
|
|
662
|
+
"thought: old pattern | 2x (2026-03-28) [evidence: abc12345]\n"
|
|
663
|
+
"thought: new pattern | 2x (2026-03-30) [evidence: def67890]\n"
|
|
664
|
+
"## Decisions\n"
|
|
665
|
+
)
|
|
666
|
+
# abc12345 is NOT in valid_ids, but its date is old → should pass through
|
|
667
|
+
# def67890 IS in valid_ids and its date matches today → validated
|
|
668
|
+
result_text, validated, demoted, _reuse = ContinuityManager._validate_graduations(
|
|
669
|
+
text, {"def67890"}, today="2026-03-30"
|
|
670
|
+
)
|
|
671
|
+
assert validated == 1
|
|
672
|
+
assert demoted == 0
|
|
673
|
+
# Old pattern still at 2x (not demoted despite abc12345 not in current nodes)
|
|
674
|
+
assert "2026-03-28" in result_text
|
|
675
|
+
assert "ungrounded" not in result_text
|
|
676
|
+
|
|
677
|
+
def test_graduation_validation_through_produce(self):
|
|
678
|
+
"""Integration: graduation validation works through the full produce() pipeline."""
|
|
679
|
+
import datetime
|
|
680
|
+
today = datetime.date.today().isoformat()
|
|
681
|
+
|
|
682
|
+
# Node ID 50d7c6fd = "Connection pooling will be the real bottleneck"
|
|
683
|
+
# from _make_session_memory(). Use today's date so validation fires.
|
|
684
|
+
response_with_valid_citation = f"""# Agent — Memory (v1)
|
|
685
|
+
|
|
686
|
+
## State
|
|
687
|
+
Working on database selection.
|
|
688
|
+
|
|
689
|
+
## Patterns
|
|
690
|
+
{{database_architecture:
|
|
691
|
+
thought: connection pooling is critical | 2x ({today}) [evidence: 50d7c6fd]
|
|
692
|
+
thought: ACID compliance matters | 2x ({today}) [evidence: ffffffff]
|
|
693
|
+
}}
|
|
694
|
+
|
|
695
|
+
## Decisions
|
|
696
|
+
[decided(rationale: "ACID required", on: "{today}")] Use PostgreSQL
|
|
697
|
+
|
|
698
|
+
## Context
|
|
699
|
+
Selected PostgreSQL, investigating pooling."""
|
|
700
|
+
|
|
701
|
+
mgr = ContinuityManager(
|
|
702
|
+
llm=_make_mock_llm(response_with_valid_citation),
|
|
703
|
+
)
|
|
704
|
+
mem = _make_session_memory()
|
|
705
|
+
result = mgr.produce(mem)
|
|
706
|
+
|
|
707
|
+
# One citation valid (50d7c6fd exists), one invalid (ffffffff doesn't)
|
|
708
|
+
assert result.graduations_validated == 1
|
|
709
|
+
assert result.graduations_demoted == 1
|
|
710
|
+
assert "(ungrounded)" in result.text
|
|
711
|
+
# The valid graduation should still be 2x
|
|
712
|
+
assert "| 2x" in result.text
|
|
713
|
+
# The invalid one should be demoted to 1x
|
|
714
|
+
assert "| 1x" in result.text
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
class TestExplanationValidation:
|
|
718
|
+
"""Tests for explain-your-evidence — citation relevance checking."""
|
|
719
|
+
|
|
720
|
+
def test_explanation_with_node_content_overlap_passes(self):
|
|
721
|
+
text = (
|
|
722
|
+
'## Patterns\n'
|
|
723
|
+
'thought: pooling matters | 2x (2026-03-30) '
|
|
724
|
+
'[evidence: abc12345 "connection pooling identified as bottleneck"]\n'
|
|
725
|
+
'## Decisions\n'
|
|
726
|
+
)
|
|
727
|
+
node_map = {"abc12345": "Connection pooling will be the real bottleneck"}
|
|
728
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
729
|
+
text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
|
|
730
|
+
)
|
|
731
|
+
assert validated == 1
|
|
732
|
+
assert demoted == 0
|
|
733
|
+
|
|
734
|
+
def test_explanation_without_overlap_demoted(self):
|
|
735
|
+
text = (
|
|
736
|
+
'## Patterns\n'
|
|
737
|
+
'thought: pooling matters | 2x (2026-03-30) '
|
|
738
|
+
'[evidence: abc12345 "confirms the pattern"]\n'
|
|
739
|
+
'## Decisions\n'
|
|
740
|
+
)
|
|
741
|
+
node_map = {"abc12345": "Connection pooling will be the real bottleneck"}
|
|
742
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
743
|
+
text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
|
|
744
|
+
)
|
|
745
|
+
# "confirms the pattern" has no meaningful overlap with node content
|
|
746
|
+
assert validated == 0
|
|
747
|
+
assert demoted == 1
|
|
748
|
+
assert "(ungrounded)" in result_text
|
|
749
|
+
|
|
750
|
+
def test_no_explanation_still_passes_id_check(self):
|
|
751
|
+
"""Citations without explanations pass on ID alone (backward compat)."""
|
|
752
|
+
text = (
|
|
753
|
+
"## Patterns\n"
|
|
754
|
+
"thought: pattern | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
755
|
+
"## Decisions\n"
|
|
756
|
+
)
|
|
757
|
+
node_map = {"abc12345": "Some node content"}
|
|
758
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
759
|
+
text, {"abc12345"}, today="2026-03-30", node_content_map=node_map
|
|
760
|
+
)
|
|
761
|
+
# No explanation = no overlap check, just ID validation
|
|
762
|
+
assert validated == 1
|
|
763
|
+
assert demoted == 0
|
|
764
|
+
|
|
765
|
+
def test_no_node_map_skips_explanation_check(self):
|
|
766
|
+
"""Without node_content_map, explanation check is skipped."""
|
|
767
|
+
text = (
|
|
768
|
+
'## Patterns\n'
|
|
769
|
+
'thought: pattern | 2x (2026-03-30) '
|
|
770
|
+
'[evidence: abc12345 "totally irrelevant words"]\n'
|
|
771
|
+
'## Decisions\n'
|
|
772
|
+
)
|
|
773
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
774
|
+
text, {"abc12345"}, today="2026-03-30", node_content_map=None
|
|
775
|
+
)
|
|
776
|
+
assert validated == 1
|
|
777
|
+
assert demoted == 0
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
class TestCitationReuse:
|
|
781
|
+
"""Tests for citation gaming detection."""
|
|
782
|
+
|
|
783
|
+
def test_reuse_count_tracked(self):
|
|
784
|
+
text = (
|
|
785
|
+
"## Patterns\n"
|
|
786
|
+
"thought: pattern A | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
787
|
+
"thought: pattern B | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
788
|
+
"thought: pattern C | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
789
|
+
"## Decisions\n"
|
|
790
|
+
)
|
|
791
|
+
_text, _v, _d, reuse_max = ContinuityManager._validate_graduations(
|
|
792
|
+
text, {"abc12345"}, today="2026-03-30"
|
|
793
|
+
)
|
|
794
|
+
assert reuse_max == 3
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
class TestContinuityMeta:
|
|
798
|
+
"""Tests for continuity metadata sidecar (session tracking, fail-safe sunset)."""
|
|
799
|
+
|
|
800
|
+
def test_meta_defaults_when_missing(self):
|
|
801
|
+
import tempfile
|
|
802
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
803
|
+
mem_path = os.path.join(tmpdir, "agent.json")
|
|
804
|
+
meta = ContinuityManager.load_meta(mem_path)
|
|
805
|
+
assert meta["sessions_produced"] == 0
|
|
806
|
+
assert meta["citations_seen"] is False
|
|
807
|
+
assert meta["format_version"] == 1
|
|
808
|
+
|
|
809
|
+
def test_meta_save_and_load_roundtrip(self):
|
|
810
|
+
import tempfile
|
|
811
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
812
|
+
mem_path = os.path.join(tmpdir, "agent.json")
|
|
813
|
+
meta = {"sessions_produced": 5, "citations_seen": True, "format_version": 1}
|
|
814
|
+
ContinuityManager.save_meta(meta, mem_path)
|
|
815
|
+
loaded = ContinuityManager.load_meta(mem_path)
|
|
816
|
+
assert loaded == meta
|
|
817
|
+
|
|
818
|
+
def test_meta_path_follows_sidecar_pattern(self):
|
|
819
|
+
path = ContinuityManager.meta_path("/tmp/agent.json")
|
|
820
|
+
assert path == "/tmp/agent.continuity.meta.json"
|
|
821
|
+
|
|
822
|
+
def test_corrupt_meta_returns_defaults(self):
|
|
823
|
+
import tempfile
|
|
824
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
825
|
+
mem_path = os.path.join(tmpdir, "agent.json")
|
|
826
|
+
meta_path = ContinuityManager.meta_path(mem_path)
|
|
827
|
+
with open(meta_path, "w") as f:
|
|
828
|
+
f.write("NOT JSON")
|
|
829
|
+
meta = ContinuityManager.load_meta(mem_path)
|
|
830
|
+
assert meta["sessions_produced"] == 0
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
class TestFailSafeSunset:
|
|
834
|
+
"""Tests for citation requirement enforcement after first successful citation."""
|
|
835
|
+
|
|
836
|
+
def test_bare_graduation_passes_before_sunset(self):
|
|
837
|
+
"""Before citations_seen, bare graduations (no [evidence:]) pass through."""
|
|
838
|
+
text = (
|
|
839
|
+
"## Patterns\n"
|
|
840
|
+
"thought: pattern | 2x (2026-03-30)\n"
|
|
841
|
+
"## Decisions\n"
|
|
842
|
+
)
|
|
843
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
844
|
+
text, {"abc12345"}, today="2026-03-30", citations_seen=False
|
|
845
|
+
)
|
|
846
|
+
assert demoted == 0
|
|
847
|
+
assert "| 2x" in result_text
|
|
848
|
+
assert "needs-evidence" not in result_text
|
|
849
|
+
|
|
850
|
+
def test_bare_graduation_demoted_after_sunset(self):
|
|
851
|
+
"""After citations_seen, bare graduations are demoted with (needs-evidence)."""
|
|
852
|
+
text = (
|
|
853
|
+
"## Patterns\n"
|
|
854
|
+
"thought: pattern | 2x (2026-03-30)\n"
|
|
855
|
+
"## Decisions\n"
|
|
856
|
+
)
|
|
857
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
858
|
+
text, {"abc12345"}, today="2026-03-30", citations_seen=True
|
|
859
|
+
)
|
|
860
|
+
assert demoted == 1
|
|
861
|
+
assert "| 1x" in result_text
|
|
862
|
+
assert "(needs-evidence)" in result_text
|
|
863
|
+
|
|
864
|
+
def test_bare_3x_demoted_to_2x_after_sunset(self):
|
|
865
|
+
text = (
|
|
866
|
+
"## Patterns\n"
|
|
867
|
+
"thought: pattern | 3x (2026-03-30)\n"
|
|
868
|
+
"## Decisions\n"
|
|
869
|
+
)
|
|
870
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
871
|
+
text, {"abc12345"}, today="2026-03-30", citations_seen=True
|
|
872
|
+
)
|
|
873
|
+
assert demoted == 1
|
|
874
|
+
assert "| 2x" in result_text
|
|
875
|
+
assert "(needs-evidence)" in result_text
|
|
876
|
+
|
|
877
|
+
def test_old_date_bare_graduation_unaffected_by_sunset(self):
|
|
878
|
+
"""Carried-forward bare graduations from old sessions are not demoted."""
|
|
879
|
+
text = (
|
|
880
|
+
"## Patterns\n"
|
|
881
|
+
"thought: old pattern | 2x (2026-03-28)\n"
|
|
882
|
+
"## Decisions\n"
|
|
883
|
+
)
|
|
884
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
885
|
+
text, {"abc12345"}, today="2026-03-30", citations_seen=True
|
|
886
|
+
)
|
|
887
|
+
assert demoted == 0
|
|
888
|
+
assert "| 2x" in result_text
|
|
889
|
+
|
|
890
|
+
def test_cited_graduation_still_passes_after_sunset(self):
|
|
891
|
+
"""Properly cited graduations pass regardless of sunset state."""
|
|
892
|
+
text = (
|
|
893
|
+
"## Patterns\n"
|
|
894
|
+
"thought: pattern | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
895
|
+
"## Decisions\n"
|
|
896
|
+
)
|
|
897
|
+
result_text, validated, demoted, _r = ContinuityManager._validate_graduations(
|
|
898
|
+
text, {"abc12345"}, today="2026-03-30", citations_seen=True
|
|
899
|
+
)
|
|
900
|
+
assert validated == 1
|
|
901
|
+
assert demoted == 0
|
|
902
|
+
|
|
903
|
+
def test_no_reuse(self):
|
|
904
|
+
text = (
|
|
905
|
+
"## Patterns\n"
|
|
906
|
+
"thought: pattern A | 2x (2026-03-30) [evidence: abc12345]\n"
|
|
907
|
+
"thought: pattern B | 2x (2026-03-30) [evidence: def67890]\n"
|
|
908
|
+
"## Decisions\n"
|
|
909
|
+
)
|
|
910
|
+
_text, _v, _d, reuse_max = ContinuityManager._validate_graduations(
|
|
911
|
+
text, {"abc12345", "def67890"}, today="2026-03-30"
|
|
912
|
+
)
|
|
913
|
+
assert reuse_max == 1
|
|
914
|
+
|
|
915
|
+
def test_reuse_in_produce_result(self):
|
|
916
|
+
"""citation_reuse_max flows through to ContinuityResult."""
|
|
917
|
+
import datetime
|
|
918
|
+
today = datetime.date.today().isoformat()
|
|
919
|
+
|
|
920
|
+
response = f"""# Test — Memory (v1)
|
|
921
|
+
|
|
922
|
+
## State
|
|
923
|
+
Testing.
|
|
924
|
+
|
|
925
|
+
## Patterns
|
|
926
|
+
thought: A | 2x ({today}) [evidence: 50d7c6fd]
|
|
927
|
+
thought: B | 2x ({today}) [evidence: 50d7c6fd]
|
|
928
|
+
thought: C | 2x ({today}) [evidence: 50d7c6fd]
|
|
929
|
+
|
|
930
|
+
## Decisions
|
|
931
|
+
None.
|
|
932
|
+
|
|
933
|
+
## Context
|
|
934
|
+
Testing citation reuse."""
|
|
935
|
+
|
|
936
|
+
mgr = ContinuityManager(llm=_make_mock_llm(response))
|
|
937
|
+
mem = _make_session_memory()
|
|
938
|
+
result = mgr.produce(mem)
|
|
939
|
+
assert result.citation_reuse_max == 3
|
|
@@ -393,6 +393,14 @@ class TestSessionWrap:
|
|
|
393
393
|
assert result["nodes_before"] == 1
|
|
394
394
|
assert result["nodes_after"] >= 0 # may prune if dormant
|
|
395
395
|
|
|
396
|
+
def test_wrap_continuity_disabled(self):
|
|
397
|
+
"""session_wrap without continuity manager reports disabled."""
|
|
398
|
+
handler, umem = _make_handler()
|
|
399
|
+
umem.memory.session_start()
|
|
400
|
+
result = handler.handle_tool("session_wrap", {})
|
|
401
|
+
assert result["continuity"]["produced"] is False
|
|
402
|
+
assert result["continuity"]["reason"] == "disabled"
|
|
403
|
+
|
|
396
404
|
|
|
397
405
|
class TestAutoConfiguration:
|
|
398
406
|
"""Tests for OPENAI_API_KEY auto-detection logic."""
|
|
@@ -549,8 +557,9 @@ class TestSessionWrapWithContinuity:
|
|
|
549
557
|
assert "error" not in result
|
|
550
558
|
assert "nodes_before" in result
|
|
551
559
|
assert result["saved"] is True
|
|
552
|
-
#
|
|
553
|
-
assert "continuity"
|
|
560
|
+
# Continuity key present but indicates failure
|
|
561
|
+
assert result["continuity"]["produced"] is False
|
|
562
|
+
assert result["continuity"]["reason"] == "error"
|
|
554
563
|
|
|
555
564
|
|
|
556
565
|
class TestVersionNegotiation:
|
|
@@ -692,6 +701,16 @@ class TestThinkingModes:
|
|
|
692
701
|
handler.handle_tool("think_breakthrough", {"problem": "Scaling architecture"})
|
|
693
702
|
assert umem.memory.size == nodes_before
|
|
694
703
|
|
|
704
|
+
def test_thinking_tool_descriptions_say_call_add_memory(self):
|
|
705
|
+
"""Regression guard: descriptions must tell agents to call add_memory."""
|
|
706
|
+
from flowscript_agents.mcp import TOOLS
|
|
707
|
+
thinking_tools = [t for t in TOOLS if t["name"].startswith("think_")]
|
|
708
|
+
assert len(thinking_tools) == 3
|
|
709
|
+
for tool in thinking_tools:
|
|
710
|
+
assert "add_memory" in tool["description"], (
|
|
711
|
+
f"{tool['name']} description must mention add_memory"
|
|
712
|
+
)
|
|
713
|
+
|
|
695
714
|
|
|
696
715
|
class TestDescriptionIntegrity:
|
|
697
716
|
"""Tests for the three-layer MCP description integrity system."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/consolidate.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{flowscript_agents-0.4.0 → flowscript_agents-0.4.1}/flowscript_agents/embeddings/providers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|