flowscript-agents 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/PKG-INFO +14 -2
  2. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/README.md +13 -1
  3. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/__init__.py +1 -1
  4. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/mcp.py +286 -12
  5. flowscript_agents-0.2.5/flowscript_agents/tool-integrity.json +15 -0
  6. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/pyproject.toml +1 -1
  7. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_mcp.py +102 -6
  8. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/.github/workflows/test.yml +0 -0
  9. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/.gitignore +0 -0
  10. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/AUDIT_TRAIL_DESIGN.md +0 -0
  11. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/adapters.md +0 -0
  12. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/api-reference.md +0 -0
  13. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/audit-trail.md +0 -0
  14. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/brand/logo-512.png +0 -0
  15. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/brand/social-preview.png +0 -0
  16. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/flowscript-demo.png +0 -0
  17. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/docs/lifecycle.md +0 -0
  18. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/examples/CLAUDE.md.example +0 -0
  19. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/examples/langgraph_live_test.py +0 -0
  20. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/examples/temporal_e2e_test.py +0 -0
  21. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/audit.py +0 -0
  22. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/camel_ai.py +0 -0
  23. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/crewai.py +0 -0
  24. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/__init__.py +0 -0
  25. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/_utils.py +0 -0
  26. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/consolidate.py +0 -0
  27. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/extract.py +0 -0
  28. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/index.py +0 -0
  29. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/providers.py +0 -0
  30. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/embeddings/search.py +0 -0
  31. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/google_adk.py +0 -0
  32. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/haystack.py +0 -0
  33. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/langgraph.py +0 -0
  34. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/llamaindex.py +0 -0
  35. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/memory.py +0 -0
  36. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/openai_agents.py +0 -0
  37. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/pydantic_ai.py +0 -0
  38. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/query.py +0 -0
  39. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/smolagents.py +0 -0
  40. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/types.py +0 -0
  41. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/flowscript_agents/unified.py +0 -0
  42. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/scripts/validate_dedup_threshold.py +0 -0
  43. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/conftest.py +0 -0
  44. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_audit.py +0 -0
  45. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_camel_ai.py +0 -0
  46. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_consolidation.py +0 -0
  47. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_crewai.py +0 -0
  48. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_embeddings.py +0 -0
  49. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_google_adk.py +0 -0
  50. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_haystack.py +0 -0
  51. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_langgraph.py +0 -0
  52. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_llamaindex.py +0 -0
  53. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_memory.py +0 -0
  54. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_openai_agents.py +0 -0
  55. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_pydantic_ai.py +0 -0
  56. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_smolagents.py +0 -0
  57. {flowscript_agents-0.2.3 → flowscript_agents-0.2.5}/tests/test_temporal.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowscript-agents
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI.
5
5
  Project-URL: Homepage, https://flowscript.org
6
6
  Project-URL: Repository, https://github.com/phillipclapham/flowscript-agents
@@ -349,6 +349,18 @@ After 20 sessions, your memory is a curated knowledge base, not a pile of notes.
349
349
 
350
350
  ---
351
351
 
352
+ ## Description Integrity
353
+
354
+ MCP tool descriptions are the prompts your LLM reads. If they're mutated in-process, the LLM silently follows poisoned instructions. The FlowScript MCP server includes three-layer integrity verification — a reference implementation of [deterministic description integrity for MCP](https://github.com/modelcontextprotocol/modelcontextprotocol/discussions/2402):
355
+
356
+ 1. **`verify_integrity` tool** — LLM-callable. SHA-256 hashes of all tool definitions, deep-frozen at startup (`MappingProxyType`). Detects in-process mutation by malicious dependencies, monkey-patching, or middleware.
357
+ 2. **`flowscript://integrity/manifest` resource** — Host-verifiable. Claude Code / Cursor can verify descriptions without LLM involvement.
358
+ 3. **`tool-integrity.json`** — Build-time root of trust. Generated via `flowscript-mcp --generate-manifest`, ships in the package.
359
+
360
+ Both the Python and [TypeScript](https://www.npmjs.com/package/flowscript-core) MCP servers implement this architecture. Honest threat model: detects in-process mutation, not supply chain or transport-layer attacks. [Full discussion →](https://github.com/modelcontextprotocol/modelcontextprotocol/discussions/2402)
361
+
362
+ ---
363
+
352
364
  ## Comparison
353
365
 
354
366
  | | FlowScript | Mem0 | Vector stores |
@@ -375,7 +387,7 @@ Under the hood: a local semantic graph with typed nodes, typed relationships, an
375
387
  | [flowscript-core](https://www.npmjs.com/package/flowscript-core) | TypeScript SDK — Memory class, 15 tools, token budgeting, audit trail | `npm install flowscript-core` |
376
388
  | [flowscript.org](https://flowscript.org) | Web editor, D3 visualization, live query panel | Browser |
377
389
 
378
- **1,272 tests** across Python (581) and TypeScript (691). Same audit trail format and canonical JSON serialization across both languages.
390
+ **1,312 tests** across Python (581) and TypeScript (731). Same audit trail format and canonical JSON serialization across both languages.
379
391
 
380
392
  ### Docs
381
393
 
@@ -285,6 +285,18 @@ After 20 sessions, your memory is a curated knowledge base, not a pile of notes.
285
285
 
286
286
  ---
287
287
 
288
+ ## Description Integrity
289
+
290
+ MCP tool descriptions are the prompts your LLM reads. If they're mutated in-process, the LLM silently follows poisoned instructions. The FlowScript MCP server includes three-layer integrity verification — a reference implementation of [deterministic description integrity for MCP](https://github.com/modelcontextprotocol/modelcontextprotocol/discussions/2402):
291
+
292
+ 1. **`verify_integrity` tool** — LLM-callable. SHA-256 hashes of all tool definitions, deep-frozen at startup (`MappingProxyType`). Detects in-process mutation by malicious dependencies, monkey-patching, or middleware.
293
+ 2. **`flowscript://integrity/manifest` resource** — Host-verifiable. Claude Code / Cursor can verify descriptions without LLM involvement.
294
+ 3. **`tool-integrity.json`** — Build-time root of trust. Generated via `flowscript-mcp --generate-manifest`, ships in the package.
295
+
296
+ Both the Python and [TypeScript](https://www.npmjs.com/package/flowscript-core) MCP servers implement this architecture. Honest threat model: detects in-process mutation, not supply chain or transport-layer attacks. [Full discussion →](https://github.com/modelcontextprotocol/modelcontextprotocol/discussions/2402)
297
+
298
+ ---
299
+
288
300
  ## Comparison
289
301
 
290
302
  | | FlowScript | Mem0 | Vector stores |
@@ -311,7 +323,7 @@ Under the hood: a local semantic graph with typed nodes, typed relationships, an
311
323
  | [flowscript-core](https://www.npmjs.com/package/flowscript-core) | TypeScript SDK — Memory class, 15 tools, token budgeting, audit trail | `npm install flowscript-core` |
312
324
  | [flowscript.org](https://flowscript.org) | Web editor, D3 visualization, live query panel | Browser |
313
325
 
314
- **1,272 tests** across Python (581) and TypeScript (691). Same audit trail format and canonical JSON serialization across both languages.
326
+ **1,312 tests** across Python (581) and TypeScript (731). Same audit trail format and canonical JSON serialization across both languages.
315
327
 
316
328
  ### Docs
317
329
 
@@ -43,7 +43,7 @@ from .memory import (
43
43
  )
44
44
  from .unified import UnifiedMemory
45
45
 
46
- __version__ = "0.2.0"
46
+ __version__ = "0.2.5"
47
47
  __all__ = [
48
48
  "AuditConfig",
49
49
  "AuditQueryResult",
@@ -43,7 +43,7 @@ When OPENAI_API_KEY is set, the server auto-configures:
43
43
  - LLM extraction (gpt-4o-mini) for typed reasoning extraction
44
44
  - Consolidation (gpt-4o-mini) for memory management (UPDATE/RELATE/RESOLVE)
45
45
 
46
- Tools exposed (13):
46
+ Tools exposed (14):
47
47
  - search_memory: Unified search (vector + keyword + temporal)
48
48
  - add_memory: Auto-extract reasoning from text with consolidation
49
49
  - get_context: Get formatted memory for prompt injection
@@ -57,14 +57,18 @@ Tools exposed (13):
57
57
  - memory_stats: Get memory statistics
58
58
  - query_audit: Search the audit trail with filters
59
59
  - verify_audit: Verify hash chain integrity
60
+ - verify_integrity: Verify tool description integrity (SRI for LLM prompts)
60
61
  """
61
62
 
62
63
  from __future__ import annotations
63
64
 
64
65
  import argparse
66
+ import datetime
67
+ import hashlib
65
68
  import json
66
69
  import os
67
70
  import sys
71
+ from types import MappingProxyType
68
72
  from typing import Any, Optional
69
73
 
70
74
  from .memory import Memory
@@ -85,7 +89,7 @@ def _log(msg: str) -> None:
85
89
 
86
90
  _PROTOCOL_VERSION = "2025-03-26"
87
91
  _SERVER_NAME = "flowscript-agents"
88
- _SERVER_VERSION = "0.2.0"
92
+ _SERVER_VERSION = "0.2.5"
89
93
 
90
94
 
91
95
  def _jsonrpc_response(id: Any, result: Any) -> dict:
@@ -96,11 +100,111 @@ def _jsonrpc_error(id: Any, code: int, message: str) -> dict:
96
100
  return {"jsonrpc": "2.0", "id": id, "error": {"code": code, "message": message}}
97
101
 
98
102
 
103
+ # =============================================================================
104
+ # Description Integrity — "SRI for LLM tool descriptions"
105
+ # =============================================================================
106
+ # Reference implementation: deterministic integrity verification for MCP servers.
107
+ # See: github.com/modelcontextprotocol/modelcontextprotocol/discussions/2402
108
+ #
109
+ # THREE-LAYER ARCHITECTURE:
110
+ # 1. Tool: verify_integrity — LLM-callable, detects in-process mutation
111
+ # 2. Resource: flowscript://integrity/manifest — Host-verifiable manifest
112
+ # (enables Claude Code/Cursor to verify descriptions WITHOUT LLM involvement,
113
+ # moving the security boundary to the correct layer)
114
+ # 3. Build-time manifest: tool-integrity.json — root of trust independent of
115
+ # running process (generated via --generate-manifest)
116
+ #
117
+ # DETECTS:
118
+ # - In-process description mutation (malicious dependency, monkey-patching,
119
+ # or middleware that modifies tool dicts in the same Python process)
120
+ # - Accidental mutation (buggy wrapper that string-replaces descriptions)
121
+ #
122
+ # DOES NOT DETECT (requires ecosystem-level changes):
123
+ # - Supply chain attacks (poisoned before startup — manifest captures poisoned state)
124
+ # - Transport-layer attacks (MITM between server and client — hashes never leave process)
125
+ # - Client-side injection (host manipulates descriptions after receiving them)
126
+ # - Reflection-based bypass: gc.get_referents() can reach the underlying dict
127
+ # behind MappingProxyType. ctypes can write to arbitrary memory. Deep-freeze
128
+ # is best-effort against casual/accidental mutation. For determined in-process
129
+ # attackers, the build-time manifest is the correct verification layer.
130
+ # - Filesystem manifest replacement: if an attacker can write to the package
131
+ # directory, they can replace tool-integrity.json to match poisoned definitions.
132
+ # In high-security deployments, sign the manifest or distribute via separate
133
+ # trust channel.
134
+ #
135
+ # This is a reference implementation. Full integrity requires client-side verification
136
+ # against an out-of-band manifest (build-time hashes, package signatures, etc.).
137
+
138
+
139
+ def _canonicalize(obj: Any) -> str:
140
+ """Canonicalize a JSON-serializable value for deterministic hashing.
141
+
142
+ Sorted keys, no whitespace, deterministic primitive serialization.
143
+ Matches the TypeScript MCP server's canonicalize() for cross-language
144
+ consistency (though hash comparison is per-server, not cross-language).
145
+ """
146
+ if obj is None:
147
+ return "null"
148
+ if isinstance(obj, bool):
149
+ return "true" if obj else "false"
150
+ if isinstance(obj, (int, float)):
151
+ return json.dumps(obj)
152
+ if isinstance(obj, str):
153
+ return json.dumps(obj, ensure_ascii=True)
154
+ if isinstance(obj, (list, tuple)):
155
+ return "[" + ",".join(_canonicalize(v) for v in obj) + "]"
156
+ if isinstance(obj, (dict, MappingProxyType)):
157
+ entries = []
158
+ for k in sorted(obj.keys()):
159
+ v = obj[k]
160
+ # Include None as "null" (matches TS which keeps null but skips undefined).
161
+ # Python dicts don't have "undefined" — all present keys are serialized.
162
+ entries.append(json.dumps(k, ensure_ascii=True) + ":" + _canonicalize(v))
163
+ return "{" + ",".join(entries) + "}"
164
+ return json.dumps(str(obj), ensure_ascii=True)
165
+
166
+
167
+ def _hash_tool_definition(tool: dict | MappingProxyType) -> str:
168
+ """Compute SHA-256 hash of a canonical JSON representation of a tool definition."""
169
+ canonical = _canonicalize(tool)
170
+ return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
171
+
172
+
173
+ def _thaw(obj: Any) -> Any:
174
+ """Recursively convert MappingProxyType back to plain dicts for JSON serialization."""
175
+ if isinstance(obj, MappingProxyType):
176
+ return {k: _thaw(v) for k, v in obj.items()}
177
+ if isinstance(obj, tuple):
178
+ return [_thaw(x) for x in obj]
179
+ if isinstance(obj, list):
180
+ return [_thaw(x) for x in obj]
181
+ return obj
182
+
183
+
184
+ def _deep_freeze(obj: dict) -> MappingProxyType:
185
+ """Recursively convert a dict tree to immutable MappingProxyType.
186
+
187
+ Any attempt to mutate a frozen dict raises TypeError.
188
+ Lists inside are converted to tuples (also immutable).
189
+ """
190
+ frozen = {}
191
+ for k, v in obj.items():
192
+ if isinstance(v, dict):
193
+ frozen[k] = _deep_freeze(v)
194
+ elif isinstance(v, list):
195
+ frozen[k] = tuple(_deep_freeze(x) if isinstance(x, dict) else x for x in v)
196
+ else:
197
+ frozen[k] = v
198
+ return MappingProxyType(frozen)
199
+
200
+
99
201
  # =============================================================================
100
202
  # Tool definitions
101
203
  # =============================================================================
102
204
 
103
- TOOLS = [
205
+ # Defined as plain dicts first, then frozen after definition.
206
+ # The verify_integrity tool is NOT in this list (it verifies, it isn't verified).
207
+ _TOOL_DEFS_RAW = [
104
208
  {
105
209
  "name": "search_memory",
106
210
  "description": (
@@ -124,6 +228,7 @@ TOOLS = [
124
228
  },
125
229
  },
126
230
  "required": ["query"],
231
+ "additionalProperties": False,
127
232
  },
128
233
  },
129
234
  {
@@ -149,6 +254,7 @@ TOOLS = [
149
254
  },
150
255
  },
151
256
  "required": ["text"],
257
+ "additionalProperties": False,
152
258
  },
153
259
  },
154
260
  {
@@ -168,6 +274,7 @@ TOOLS = [
168
274
  "default": 4000,
169
275
  },
170
276
  },
277
+ "additionalProperties": False,
171
278
  },
172
279
  },
173
280
  {
@@ -188,6 +295,7 @@ TOOLS = [
188
295
  "default": "axis",
189
296
  },
190
297
  },
298
+ "additionalProperties": False,
191
299
  },
192
300
  },
193
301
  {
@@ -198,7 +306,7 @@ TOOLS = [
198
306
  "external dependencies. Returns blockers sorted by impact score "
199
307
  "(downstream effects), with reason, duration, and transitive causes."
200
308
  ),
201
- "inputSchema": {"type": "object", "properties": {}},
309
+ "inputSchema": {"type": "object", "properties": {}, "additionalProperties": False},
202
310
  },
203
311
  {
204
312
  "name": "query_why",
@@ -215,6 +323,7 @@ TOOLS = [
215
323
  "node_id": {"type": "string", "description": "Node ID to trace"},
216
324
  "content": {"type": "string", "description": "Search for node by content (alternative to node_id)"},
217
325
  },
326
+ "additionalProperties": False,
218
327
  },
219
328
  },
220
329
  {
@@ -231,6 +340,7 @@ TOOLS = [
231
340
  "question_id": {"type": "string", "description": "Question node ID"},
232
341
  "content": {"type": "string", "description": "Search for question by content (alternative to question_id)"},
233
342
  },
343
+ "additionalProperties": False,
234
344
  },
235
345
  },
236
346
  {
@@ -247,6 +357,7 @@ TOOLS = [
247
357
  "node_id": {"type": "string", "description": "Node ID to analyze"},
248
358
  "content": {"type": "string", "description": "Search for node by content (alternative to node_id)"},
249
359
  },
360
+ "additionalProperties": False,
250
361
  },
251
362
  },
252
363
  {
@@ -263,6 +374,7 @@ TOOLS = [
263
374
  "node_id": {"type": "string", "description": "ID of the node to remove"},
264
375
  },
265
376
  "required": ["node_id"],
377
+ "additionalProperties": False,
266
378
  },
267
379
  },
268
380
  {
@@ -272,7 +384,7 @@ TOOLS = [
272
384
  "save to disk. Call this at the end of a work session to keep memory "
273
385
  "healthy. Dormant nodes (not accessed recently) are archived, not deleted."
274
386
  ),
275
- "inputSchema": {"type": "object", "properties": {}},
387
+ "inputSchema": {"type": "object", "properties": {}, "additionalProperties": False},
276
388
  },
277
389
  {
278
390
  "name": "memory_stats",
@@ -280,7 +392,7 @@ TOOLS = [
280
392
  "Get memory statistics: node count, tier distribution, garden health, "
281
393
  "embedding status. Call this to understand the current state of memory."
282
394
  ),
283
- "inputSchema": {"type": "object", "properties": {}},
395
+ "inputSchema": {"type": "object", "properties": {}, "additionalProperties": False},
284
396
  },
285
397
  {
286
398
  "name": "query_audit",
@@ -315,6 +427,7 @@ TOOLS = [
315
427
  "default": False,
316
428
  },
317
429
  },
430
+ "additionalProperties": False,
318
431
  },
319
432
  },
320
433
  {
@@ -324,10 +437,61 @@ TOOLS = [
324
437
  "confirm the audit trail has not been tampered with. Returns chain "
325
438
  "validity status, total entries verified, and location of any break."
326
439
  ),
327
- "inputSchema": {"type": "object", "properties": {}},
440
+ "inputSchema": {"type": "object", "properties": {}, "additionalProperties": False},
328
441
  },
329
442
  ]
330
443
 
444
+ # Deep-freeze all tool definitions — any in-process mutation raises TypeError.
445
+ TOOLS: list[MappingProxyType] = [_deep_freeze(t) for t in _TOOL_DEFS_RAW]
446
+
447
+ # Compute integrity manifest at startup — captures the "intended" state.
448
+ _INTEGRITY_MANIFEST: dict[str, str] = {}
449
+ _EXPECTED_TOOL_COUNT = len(TOOLS)
450
+ for _t in TOOLS:
451
+ _INTEGRITY_MANIFEST[_t["name"]] = _hash_tool_definition(_t)
452
+ _INTEGRITY_MANIFEST = MappingProxyType(_INTEGRITY_MANIFEST) # type: ignore[assignment]
453
+
454
+ # Load build-time manifest if available (generated via --generate-manifest).
455
+ _BUILD_TIME_MANIFEST: dict[str, str] | None = None
456
+ try:
457
+ _manifest_path = os.path.join(os.path.dirname(__file__), "tool-integrity.json")
458
+ with open(_manifest_path) as _f:
459
+ _BUILD_TIME_MANIFEST = json.load(_f)
460
+ _log(f"Integrity: loaded build-time manifest ({len(_BUILD_TIME_MANIFEST)} tools)")
461
+ except (FileNotFoundError, json.JSONDecodeError):
462
+ pass # No build-time manifest — startup-only verification
463
+
464
+ # The verify_integrity tool — separate from the verified tools.
465
+ _VERIFY_INTEGRITY_TOOL = _deep_freeze({
466
+ "name": "verify_integrity",
467
+ "description": (
468
+ "Verify that tool descriptions have not been mutated in-process since "
469
+ "server startup. Detects description modifications by malicious dependencies, "
470
+ "middleware, or monkey-patching. Returns per-tool SHA-256 hashes (expected vs "
471
+ "current) and a pass/fail verdict. NOTE: This verifies the server's own state "
472
+ "— transport-layer integrity requires host-level verification via the "
473
+ "flowscript://integrity/manifest resource. "
474
+ "Reference implementation: "
475
+ "github.com/modelcontextprotocol/modelcontextprotocol/discussions/2402"
476
+ ),
477
+ "inputSchema": {"type": "object", "properties": {}, "additionalProperties": False},
478
+ })
479
+
480
+ # Full tool list exposed to clients: verified tools + the verifier
481
+ ALL_TOOLS: list[MappingProxyType] = [*TOOLS, _VERIFY_INTEGRITY_TOOL]
482
+
483
+ # Integrity resource definition (frozen for consistency)
484
+ _INTEGRITY_RESOURCE = _deep_freeze({
485
+ "uri": "flowscript://integrity/manifest",
486
+ "name": "Tool Integrity Manifest",
487
+ "description": (
488
+ "SHA-256 hashes of all tool definitions for client-side integrity "
489
+ "verification. Compare these hashes against the tool definitions you "
490
+ "received to detect transport-layer description mutation."
491
+ ),
492
+ "mimeType": "application/json",
493
+ })
494
+
331
495
 
332
496
  # =============================================================================
333
497
  # Tool handlers
@@ -355,6 +519,7 @@ class MCPHandler:
355
519
  "memory_stats": self._memory_stats,
356
520
  "query_audit": self._query_audit,
357
521
  "verify_audit": self._verify_audit,
522
+ "verify_integrity": self._verify_integrity,
358
523
  }
359
524
  handler = handlers.get(name)
360
525
  if handler is None:
@@ -591,6 +756,71 @@ class MCPHandler:
591
756
  "status": "no_audit_trail",
592
757
  "note": "No audit trail file found — auditing may not be configured"}
593
758
 
759
+ def _verify_integrity(self, args: dict) -> dict:
760
+ """Verify in-process description integrity of all tool definitions."""
761
+ results = []
762
+ all_passed = True
763
+
764
+ # Check: has the tool count changed? (detect additions/removals)
765
+ count_match = len(TOOLS) == _EXPECTED_TOOL_COUNT
766
+ if not count_match:
767
+ all_passed = False
768
+
769
+ # Per-tool hash verification
770
+ for tool in TOOLS:
771
+ tool_name = tool["name"]
772
+ expected = _INTEGRITY_MANIFEST[tool_name]
773
+ current = _hash_tool_definition(tool)
774
+ passed = expected == current
775
+ if not passed:
776
+ all_passed = False
777
+
778
+ entry: dict[str, Any] = {
779
+ "tool": tool_name,
780
+ "expected_hash": expected,
781
+ "current_hash": current,
782
+ "status": "pass" if passed else "fail",
783
+ }
784
+
785
+ # Compare against build-time manifest if available
786
+ if _BUILD_TIME_MANIFEST:
787
+ build_hash = _BUILD_TIME_MANIFEST.get(tool_name)
788
+ if build_hash:
789
+ build_match = build_hash == current
790
+ if not build_match:
791
+ all_passed = False
792
+ entry["build_time_status"] = "pass" if build_match else "fail"
793
+ else:
794
+ entry["build_time_status"] = "no_manifest"
795
+
796
+ results.append(entry)
797
+
798
+ verdict = "PASS" if all_passed else "FAIL"
799
+ return {
800
+ "success": True,
801
+ "verdict": verdict,
802
+ "tool_count": len(TOOLS),
803
+ "expected_tool_count": _EXPECTED_TOOL_COUNT,
804
+ "count_match": count_match,
805
+ "algorithm": "SHA-256",
806
+ "canonicalization": "deterministic sorted-keys JSON",
807
+ "build_time_manifest": "verified" if _BUILD_TIME_MANIFEST else "not available",
808
+ "tools": results,
809
+ "scope": (
810
+ "Verifies in-process description integrity (detects mutation by "
811
+ "dependencies, middleware, or monkey-patching). Transport-layer "
812
+ "integrity requires host-side verification via "
813
+ "flowscript://integrity/manifest resource."
814
+ ),
815
+ "description": (
816
+ "All tool descriptions match their startup hashes. "
817
+ "No in-process mutation detected."
818
+ if all_passed else
819
+ "WARNING: Tool description integrity violation detected. "
820
+ "One or more definitions have been modified since server startup."
821
+ ),
822
+ }
823
+
594
824
 
595
825
  def _serialize_query_result(result: Any, _seen: set | None = None) -> dict:
596
826
  """Best-effort serialization of query result dataclasses."""
@@ -861,7 +1091,7 @@ def run_server(
861
1091
  client_version = params.get("protocolVersion", _PROTOCOL_VERSION)
862
1092
  resp = _jsonrpc_response(msg_id, {
863
1093
  "protocolVersion": client_version if client_version >= _PROTOCOL_VERSION else _PROTOCOL_VERSION,
864
- "capabilities": {"tools": {}},
1094
+ "capabilities": {"tools": {}, "resources": {}},
865
1095
  "serverInfo": {
866
1096
  "name": _SERVER_NAME,
867
1097
  "version": _SERVER_VERSION,
@@ -870,9 +1100,35 @@ def run_server(
870
1100
  elif method == "notifications/initialized":
871
1101
  continue # notification, no response
872
1102
  elif method == "tools/list":
873
- resp = _jsonrpc_response(msg_id, {"tools": TOOLS})
1103
+ resp = _jsonrpc_response(msg_id, {"tools": [json.loads(json.dumps(_thaw(t))) for t in ALL_TOOLS]})
874
1104
  elif method == "resources/list":
875
- resp = _jsonrpc_response(msg_id, {"resources": []})
1105
+ resp = _jsonrpc_response(msg_id, {"resources": [_thaw(_INTEGRITY_RESOURCE)]})
1106
+ elif method == "resources/read":
1107
+ uri = params.get("uri", "")
1108
+ if uri == "flowscript://integrity/manifest":
1109
+ manifest = {
1110
+ "version": _SERVER_VERSION,
1111
+ "algorithm": "SHA-256",
1112
+ "canonicalization": "deterministic sorted-keys JSON",
1113
+ "generated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
1114
+ "tool_count": _EXPECTED_TOOL_COUNT,
1115
+ "tools": dict(_INTEGRITY_MANIFEST),
1116
+ "build_time_manifest": "available" if _BUILD_TIME_MANIFEST else "not generated",
1117
+ "usage": (
1118
+ "Hash each tool definition (sorted keys, no whitespace, SHA-256) "
1119
+ "and compare against the hashes in this manifest. Mismatches "
1120
+ "indicate description mutation between server and client."
1121
+ ),
1122
+ }
1123
+ resp = _jsonrpc_response(msg_id, {
1124
+ "contents": [{
1125
+ "uri": uri,
1126
+ "mimeType": "application/json",
1127
+ "text": json.dumps(manifest, indent=2),
1128
+ }],
1129
+ })
1130
+ else:
1131
+ resp = _jsonrpc_error(msg_id, -32602, f"Unknown resource: {uri}")
876
1132
  elif method == "prompts/list":
877
1133
  resp = _jsonrpc_response(msg_id, {"prompts": []})
878
1134
  elif method == "tools/call":
@@ -927,8 +1183,8 @@ def main() -> None:
927
1183
  ),
928
1184
  )
929
1185
  parser.add_argument(
930
- "--memory", required=True,
931
- help="Path to memory JSON file (created if doesn't exist)",
1186
+ "--memory",
1187
+ help="Path to memory JSON file (created if doesn't exist). Required unless --generate-manifest.",
932
1188
  )
933
1189
  parser.add_argument(
934
1190
  "--embedder", choices=["openai", "sentence-transformers", "ollama"],
@@ -948,8 +1204,26 @@ def main() -> None:
948
1204
  action="store_true",
949
1205
  help="Disable auto-configuration from OPENAI_API_KEY",
950
1206
  )
1207
+ parser.add_argument(
1208
+ "--generate-manifest",
1209
+ action="store_true",
1210
+ help="Generate tool-integrity.json and exit (build-time integrity manifest)",
1211
+ )
951
1212
  args = parser.parse_args()
952
1213
 
1214
+ # Generate build-time manifest and exit (no --memory needed)
1215
+ if args.generate_manifest:
1216
+ manifest = dict(_INTEGRITY_MANIFEST)
1217
+ out_path = os.path.join(os.path.dirname(__file__), "tool-integrity.json")
1218
+ with open(out_path, "w") as f:
1219
+ json.dump(manifest, f, indent=2, sort_keys=True)
1220
+ f.write("\n")
1221
+ print(f"Generated {out_path} ({len(manifest)} tools)")
1222
+ sys.exit(0)
1223
+
1224
+ if not args.memory:
1225
+ parser.error("--memory is required (unless using --generate-manifest)")
1226
+
953
1227
  embedder = None
954
1228
  llm = None
955
1229
  consolidation = None
@@ -0,0 +1,15 @@
1
+ {
2
+ "add_memory": "c98f233ffc441c2e672687f446a9e2ee4104c954e1a8f3d8300e6c29bf5d92af",
3
+ "get_context": "e3069a73a874311e817094e0dc1c9a4d4f2fb0761db6e266400ebb0e30843878",
4
+ "memory_stats": "38d352ee7e5396135125efa47473c74e9ac9908df7414996f55395c520128a32",
5
+ "query_alternatives": "a8b055c266a741b5006263e0372ec55d39adbcc7cb8b8e14369f148a1dbe9460",
6
+ "query_audit": "906e1aa27b0a9757cdf379dcc96e43e9b832495ea888b273bc604d41b4926e4b",
7
+ "query_blocked": "32ba5402add1f14fa6d42e3de95f8abc0eb411ef1e485e5f5640cc65adfce6a2",
8
+ "query_tensions": "39361a228e90da2fae52f44563670528505f77ff55c76e34027c139f3071434a",
9
+ "query_what_if": "583a203a17c21f73bc0abe83fbad3b1195be4051e11a9eea2376e169ef4f795b",
10
+ "query_why": "4eac8ed68ca419cbe02fad7a948951f8aae7ee86301f8bc3d80c3b3004b1860e",
11
+ "remove_memory": "ee604c8f87855e32b4509162048168d0c941da79339f907d7d921a55780de830",
12
+ "search_memory": "7e91e30bc03b5a2c990b83a33c00cf512c5c7c2a2e204c546206ffe606010064",
13
+ "session_wrap": "669c9ed43617001776a70c142d589d53b6da541bc65b2ce00613ebef04368323",
14
+ "verify_audit": "2e93d3118ebeed1a1113e423ec915b8dd987c5d2c4adf6fefcd93fa0c931483f"
15
+ }
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "flowscript-agents"
7
- version = "0.2.3"
7
+ version = "0.2.5"
8
8
  description = "Complete agent memory: reasoning queries + vector search + auto-extraction. Decision intelligence for LangGraph, CrewAI, Google ADK, OpenAI Agents SDK, Pydantic AI, smolagents, LlamaIndex, Haystack, and CAMEL-AI."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -258,13 +258,14 @@ class TestMCPStdioProtocol:
258
258
  if method == "initialize":
259
259
  return _jsonrpc_response(msg_id, {
260
260
  "protocolVersion": "2025-03-26",
261
- "capabilities": {"tools": {}},
262
- "serverInfo": {"name": "flowscript-agents", "version": "0.2.0"},
261
+ "capabilities": {"tools": {}, "resources": {}},
262
+ "serverInfo": {"name": "flowscript-agents", "version": "0.2.5"},
263
263
  })
264
264
  elif method == "notifications/initialized":
265
265
  return None # notification, no response
266
266
  elif method == "tools/list":
267
- return _jsonrpc_response(msg_id, {"tools": TOOLS})
267
+ from flowscript_agents.mcp import ALL_TOOLS, _thaw
268
+ return _jsonrpc_response(msg_id, {"tools": [_thaw(t) for t in ALL_TOOLS]})
268
269
  elif method == "tools/call":
269
270
  tool_name = params.get("name", "")
270
271
  tool_args = params.get("arguments", {})
@@ -273,7 +274,8 @@ class TestMCPStdioProtocol:
273
274
  "content": [{"type": "text", "text": json.dumps(result)}],
274
275
  })
275
276
  elif method == "resources/list":
276
- return _jsonrpc_response(msg_id, {"resources": []})
277
+ from flowscript_agents.mcp import _INTEGRITY_RESOURCE
278
+ return _jsonrpc_response(msg_id, {"resources": [_INTEGRITY_RESOURCE]})
277
279
  elif method == "prompts/list":
278
280
  return _jsonrpc_response(msg_id, {"prompts": []})
279
281
  elif method == "ping":
@@ -295,10 +297,11 @@ class TestMCPStdioProtocol:
295
297
  "jsonrpc": "2.0", "id": 2, "method": "tools/list",
296
298
  })
297
299
  tools = resp["result"]["tools"]
298
- assert len(tools) == 13
300
+ assert len(tools) == 14 # 13 verified + verify_integrity
299
301
  names = {t["name"] for t in tools}
300
302
  assert "search_memory" in names
301
303
  assert "query_what_if" in names
304
+ assert "verify_integrity" in names
302
305
 
303
306
  def test_tools_call(self):
304
307
  resp = self._simulate_message({
@@ -321,7 +324,9 @@ class TestMCPStdioProtocol:
321
324
  resp = self._simulate_message({
322
325
  "jsonrpc": "2.0", "id": 4, "method": "resources/list",
323
326
  })
324
- assert resp["result"]["resources"] == []
327
+ resources = resp["result"]["resources"]
328
+ assert len(resources) == 1
329
+ assert resources[0]["uri"] == "flowscript://integrity/manifest"
325
330
 
326
331
  def test_prompts_list(self):
327
332
  resp = self._simulate_message({
@@ -493,3 +498,94 @@ class TestVersionNegotiation:
493
498
  """Server should accept newer client versions (tools-only, compatible)."""
494
499
  from flowscript_agents.mcp import _PROTOCOL_VERSION
495
500
  assert _PROTOCOL_VERSION >= "2025-03-26"
501
+
502
+
503
+ class TestDescriptionIntegrity:
504
+ """Tests for the three-layer MCP description integrity system."""
505
+
506
+ def test_tools_are_frozen(self):
507
+ """Tool definitions should be immutable MappingProxyType."""
508
+ from types import MappingProxyType
509
+ from flowscript_agents.mcp import TOOLS
510
+ for tool in TOOLS:
511
+ assert isinstance(tool, MappingProxyType), f"{tool['name']} is not frozen"
512
+
513
+ def test_mutation_blocked(self):
514
+ """Attempting to mutate a frozen tool should raise TypeError."""
515
+ from flowscript_agents.mcp import TOOLS
516
+ import pytest
517
+ with pytest.raises(TypeError):
518
+ TOOLS[0]["name"] = "hacked"
519
+
520
+ def test_verify_integrity_returns_pass(self):
521
+ """verify_integrity should return PASS on unmodified tools."""
522
+ handler, _ = _make_handler()
523
+ result = handler.handle_tool("verify_integrity", {})
524
+ assert result["verdict"] == "PASS"
525
+ assert result["count_match"] is True
526
+ assert result["tool_count"] == 13 # verified tools (not counting verify_integrity itself)
527
+
528
+ def test_verify_integrity_per_tool_status(self):
529
+ """Each tool should have pass status with matching hashes."""
530
+ handler, _ = _make_handler()
531
+ result = handler.handle_tool("verify_integrity", {})
532
+ for tool_result in result["tools"]:
533
+ assert tool_result["status"] == "pass", f"{tool_result['tool']} failed integrity check"
534
+ assert tool_result["expected_hash"] == tool_result["current_hash"]
535
+
536
+ def test_hash_determinism(self):
537
+ """Same tool should produce same hash across calls."""
538
+ from flowscript_agents.mcp import TOOLS, _hash_tool_definition
539
+ h1 = _hash_tool_definition(TOOLS[0])
540
+ h2 = _hash_tool_definition(TOOLS[0])
541
+ assert h1 == h2
542
+ assert len(h1) == 64 # SHA-256 hex length
543
+
544
+ def test_manifest_matches_runtime(self):
545
+ """Build-time manifest should match runtime hashes."""
546
+ from flowscript_agents.mcp import TOOLS, _INTEGRITY_MANIFEST, _hash_tool_definition
547
+ for tool in TOOLS:
548
+ name = tool["name"]
549
+ assert name in _INTEGRITY_MANIFEST
550
+ assert _INTEGRITY_MANIFEST[name] == _hash_tool_definition(tool)
551
+
552
+ def test_integrity_resource_exists(self):
553
+ """The integrity resource should be listed."""
554
+ from flowscript_agents.mcp import _INTEGRITY_RESOURCE
555
+ assert _INTEGRITY_RESOURCE["uri"] == "flowscript://integrity/manifest"
556
+ assert _INTEGRITY_RESOURCE["mimeType"] == "application/json"
557
+
558
+ def test_integrity_resource_frozen(self):
559
+ """The integrity resource metadata should be frozen."""
560
+ from types import MappingProxyType
561
+ from flowscript_agents.mcp import _INTEGRITY_RESOURCE
562
+ assert isinstance(_INTEGRITY_RESOURCE, MappingProxyType)
563
+
564
+ def test_canonicalize_none_as_null(self):
565
+ """None should canonicalize as 'null', not be skipped."""
566
+ from flowscript_agents.mcp import _canonicalize
567
+ result = _canonicalize({"a": None, "b": 1})
568
+ assert '"a":null' in result
569
+ assert '"b":1' in result
570
+
571
+ def test_canonicalize_bool_not_int(self):
572
+ """Booleans should serialize as true/false, not 1/0."""
573
+ from flowscript_agents.mcp import _canonicalize
574
+ assert _canonicalize(True) == "true"
575
+ assert _canonicalize(False) == "false"
576
+ assert _canonicalize(1) == "1"
577
+
578
+ def test_canonicalize_sorted_keys(self):
579
+ """Keys should be sorted alphabetically."""
580
+ from flowscript_agents.mcp import _canonicalize
581
+ result = _canonicalize({"z": 1, "a": 2, "m": 3})
582
+ assert result == '{"a":2,"m":3,"z":1}'
583
+
584
+ def test_all_schemas_have_additional_properties(self):
585
+ """All tool inputSchemas should have additionalProperties: false."""
586
+ from flowscript_agents.mcp import TOOLS
587
+ for tool in TOOLS:
588
+ schema = tool["inputSchema"]
589
+ assert schema.get("additionalProperties") is False, (
590
+ f"{tool['name']} missing additionalProperties: false"
591
+ )