distributed-a2a 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {distributed_a2a-0.2.2/distributed_a2a.egg-info → distributed_a2a-0.2.3}/PKG-INFO +22 -1
  2. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/README.md +21 -0
  3. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/__init__.py +5 -1
  4. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/executors.py +14 -5
  5. distributed_a2a-0.2.3/distributed_a2a/file_extractors.py +142 -0
  6. distributed_a2a-0.2.3/distributed_a2a/mcp_interceptors.py +44 -0
  7. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3/distributed_a2a.egg-info}/PKG-INFO +22 -1
  8. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a.egg-info/SOURCES.txt +4 -2
  9. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/pyproject.toml +1 -1
  10. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/tests/test_executor_files.py +67 -0
  11. distributed_a2a-0.2.2/tests/test_files.py → distributed_a2a-0.2.3/tests/test_file_extractors.py +177 -1
  12. distributed_a2a-0.2.3/tests/test_mcp_interceptors.py +197 -0
  13. distributed_a2a-0.2.2/distributed_a2a/files.py +0 -68
  14. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/LICENSE +0 -0
  15. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/MANIFEST.in +0 -0
  16. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/agent.py +0 -0
  17. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/client.py +0 -0
  18. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/config.py +0 -0
  19. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/model.py +0 -0
  20. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/py.typed +0 -0
  21. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry.py +0 -0
  22. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry_server/__init__.py +0 -0
  23. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry_server/bootstrap.py +0 -0
  24. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry_server/dynamo_db.py +0 -0
  25. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry_server/in_memory_registry_storage.py +0 -0
  26. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry_server/model.py +0 -0
  27. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/registry_server/storage.py +0 -0
  28. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/router.py +0 -0
  29. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/schemas/agent-schema.json +0 -0
  30. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/schemas/router-agent-schema.json +0 -0
  31. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a/server.py +0 -0
  32. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a.egg-info/dependency_links.txt +0 -0
  33. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a.egg-info/requires.txt +0 -0
  34. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/distributed_a2a.egg-info/top_level.txt +0 -0
  35. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/requirements.txt +0 -0
  36. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/setup.cfg +0 -0
  37. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/setup.py +0 -0
  38. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/tests/test_app.py +0 -0
  39. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/tests/test_client.py +0 -0
  40. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/tests/test_rejection.py +0 -0
  41. {distributed_a2a-0.2.2 → distributed_a2a-0.2.3}/tests/test_timeout.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: distributed_a2a
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: A library for building A2A agents with routing capabilities
5
5
  Home-page: https://github.com/Barra-Technologies/distributed-a2a
6
6
  Author: Fabian Bell
@@ -211,6 +211,27 @@ if __name__ == "__main__":
211
211
  asyncio.run(main())
212
212
  ```
213
213
 
214
+ ### Binary content handling
215
+
216
+ When an agent invokes an MCP tool that returns non-text content (files, images,
217
+ `EmbeddedResource`, `ResourceLink`), the library keeps those payloads out of the
218
+ LLM's context window and delivers them out-of-band as A2A `FilePart` artifacts.
219
+
220
+ Concretely, `RoutingAgentExecutor` installs the
221
+ `hide_binary_content_from_llm` tool-call interceptor on every
222
+ `MultiServerMCPClient` it builds. The interceptor moves any non-`TextContent`
223
+ block from `CallToolResult.content` into `CallToolResult.structuredContent`
224
+ under the `non_text_content` key. The upstream adapter then carries that dict
225
+ into `ToolMessage.artifact['structured_content']`, which LangChain does **not**
226
+ surface to the model. After the graph run, the executor walks the message list,
227
+ extracts the stashed blocks, and emits one `TaskArtifactUpdateEvent` per file
228
+ before the terminating text artifact.
229
+
230
+ Client-side, `RoutingA2AClient.send_message` returns an `AgentReply` that
231
+ exposes both the LLM's text summary and any `FileRef` payloads (with either
232
+ inline `bytes_b64` or a `uri`), so downstream integrations (e.g. Slack file
233
+ uploads) can forward the bytes without ever routing them through a model.
234
+
214
235
  ### Environment Variables
215
236
  The library uses several environment variables for configuration. These can be set in your shell or via a `.env` file.
216
237
 
@@ -172,6 +172,27 @@ if __name__ == "__main__":
172
172
  asyncio.run(main())
173
173
  ```
174
174
 
175
+ ### Binary content handling
176
+
177
+ When an agent invokes an MCP tool that returns non-text content (files, images,
178
+ `EmbeddedResource`, `ResourceLink`), the library keeps those payloads out of the
179
+ LLM's context window and delivers them out-of-band as A2A `FilePart` artifacts.
180
+
181
+ Concretely, `RoutingAgentExecutor` installs the
182
+ `hide_binary_content_from_llm` tool-call interceptor on every
183
+ `MultiServerMCPClient` it builds. The interceptor moves any non-`TextContent`
184
+ block from `CallToolResult.content` into `CallToolResult.structuredContent`
185
+ under the `non_text_content` key. The upstream adapter then carries that dict
186
+ into `ToolMessage.artifact['structured_content']`, which LangChain does **not**
187
+ surface to the model. After the graph run, the executor walks the message list,
188
+ extracts the stashed blocks, and emits one `TaskArtifactUpdateEvent` per file
189
+ before the terminating text artifact.
190
+
191
+ Client-side, `RoutingA2AClient.send_message` returns an `AgentReply` that
192
+ exposes both the LLM's text summary and any `FileRef` payloads (with either
193
+ inline `bytes_b64` or a `uri`), so downstream integrations (e.g. Slack file
194
+ uploads) can forward the bytes without ever routing them through a model.
195
+
175
196
  ### Environment Variables
176
197
  The library uses several environment variables for configuration. These can be set in your shell or via a `.env` file.
177
198
 
@@ -1,4 +1,6 @@
1
1
  from .client import A2ATimeoutError, AgentReply, FileRef, RoutingA2AClient
2
+ from .mcp_interceptors import (NON_TEXT_CONTENT_KEY,
3
+ hide_binary_content_from_llm)
2
4
  from .model import (AgentConfig, AgentItem, CardConfig, LLMConfig,
3
5
  RegistryConfig, RegistryItemConfig, RouterConfig,
4
6
  RouterItem, SkillConfig)
@@ -31,5 +33,7 @@ __all__ = [
31
33
  "AgentRegistryClient",
32
34
  "McpRegistryClient",
33
35
  "InMemoryAgentRegistry",
34
- "InMemoryMcpRegistry"
36
+ "InMemoryMcpRegistry",
37
+ "hide_binary_content_from_llm",
38
+ "NON_TEXT_CONTENT_KEY",
35
39
  ]
@@ -13,7 +13,8 @@ from langgraph.checkpoint.base import BaseCheckpointSaver
13
13
 
14
14
  from .agent import RoutingResponse, StatusAgent, StringResponse
15
15
  from .config import settings
16
- from .files import extract_file_parts
16
+ from .file_extractors import extract_file_parts
17
+ from .mcp_interceptors import hide_binary_content_from_llm
17
18
  from .model import AgentConfig, RouterConfig
18
19
  from .registry import AgentRegistryLookupClient, McpRegistryLookup
19
20
 
@@ -206,10 +207,18 @@ class RoutingAgentExecutor(AgentExecutor):
206
207
  return
207
208
 
208
209
  logger.info(f"Agent {self.agent_config.agent.card.name} has access to the following tools: {mcp_server_raw}")
209
- mcp_servers = {tool["name"]: {"url": tool["url"], "transport": tool["protocol"],
210
- "headers": settings.get_mcp_auth_headers(tool["name"])} for tool in
211
- mcp_server_raw}
212
- mcp_client = MultiServerMCPClient(mcp_servers) # type: ignore[arg-type]
210
+ mcp_servers: dict[str, Any] = {
211
+ tool["name"]: {
212
+ "url": tool["url"],
213
+ "transport": tool["protocol"],
214
+ "headers": settings.get_mcp_auth_headers(tool["name"])
215
+ }
216
+ for tool in mcp_server_raw
217
+ }
218
+ mcp_client = MultiServerMCPClient(
219
+ connections=mcp_servers,
220
+ tool_interceptors=[hide_binary_content_from_llm],
221
+ )
213
222
  mcp_tools = await mcp_client.get_tools()
214
223
 
215
224
  self.agent = StatusAgent[StringResponse](
@@ -0,0 +1,142 @@
1
+ import json
2
+ import mimetypes
3
+ from typing import Any
4
+
5
+ from a2a.types import FilePart, FileWithBytes, FileWithUri
6
+ from langchain_core.messages import BaseMessage, ToolMessage
7
+ from mcp.types import (BlobResourceContents, EmbeddedResource, ImageContent,
8
+ ResourceLink)
9
+
10
+ from .mcp_interceptors import NON_TEXT_CONTENT_KEY
11
+
12
+ _LANGCHAIN_BINARY_BLOCK_TYPES: dict[str, str] = {
13
+ "file": "attachment",
14
+ "image": "image",
15
+ }
16
+
17
+
18
+ def _filename_from_text_block(block: dict[str, Any]) -> str | None:
19
+ text = block.get("text")
20
+ if not isinstance(text, str):
21
+ return None
22
+ try:
23
+ payload = json.loads(text)
24
+ except (ValueError, TypeError):
25
+ return None
26
+ if isinstance(payload, dict):
27
+ name = payload.get("filename")
28
+ if isinstance(name, str) and name:
29
+ return name
30
+ return None
31
+
32
+
33
+ def _synthetic_name(kind: str, index: int, mime_type: str) -> str:
34
+ guessed_ext = mimetypes.guess_extension(mime_type)
35
+ ext = guessed_ext if guessed_ext is not None else ""
36
+ suffix = f"-{index}" if index > 0 else ""
37
+ return f"{kind}{suffix}{ext}"
38
+
39
+
40
+ def _name_from_uri(uri: str, fallback_kind: str, index: int, mime_type: str) -> str:
41
+ tail = uri.rsplit("/", 1)[-1]
42
+ if tail:
43
+ return tail
44
+ return _synthetic_name(fallback_kind, index, mime_type)
45
+
46
+
47
+ def _extract_from_mcp_blocks(blocks: list[Any]) -> list[tuple[str, FilePart]]:
48
+ out: list[tuple[str, FilePart]] = []
49
+ counters: dict[str, int] = {"attachment": 0, "image": 0}
50
+ for block in blocks:
51
+ if isinstance(block, EmbeddedResource) and isinstance(block.resource, BlobResourceContents):
52
+ mime_type = block.resource.mimeType or "application/octet-stream"
53
+ uri = str(block.resource.uri) if block.resource.uri is not None else ""
54
+ kind = "image" if mime_type.startswith("image/") else "attachment"
55
+ if uri:
56
+ name = _name_from_uri(uri, kind, counters[kind], mime_type)
57
+ else:
58
+ name = _synthetic_name(kind, counters[kind], mime_type)
59
+ counters[kind] += 1
60
+ out.append((name, FilePart(file=FileWithBytes(
61
+ name=name, mime_type=mime_type, bytes=block.resource.blob,
62
+ ))))
63
+ elif isinstance(block, ImageContent):
64
+ mime_type = block.mimeType or "application/octet-stream"
65
+ name = _synthetic_name("image", counters["image"], mime_type)
66
+ counters["image"] += 1
67
+ out.append((name, FilePart(file=FileWithBytes(
68
+ name=name, mime_type=mime_type, bytes=block.data,
69
+ ))))
70
+ elif isinstance(block, ResourceLink):
71
+ mime_type = block.mimeType or "application/octet-stream"
72
+ uri = str(block.uri)
73
+ kind = "image" if mime_type.startswith("image/") else "attachment"
74
+ name = _name_from_uri(uri, kind, counters[kind], mime_type)
75
+ counters[kind] += 1
76
+ out.append((name, FilePart(file=FileWithUri(
77
+ name=name, mime_type=mime_type, uri=uri,
78
+ ))))
79
+ return out
80
+
81
+
82
+ def _extract_from_langchain_content_blocks(content: list[Any]) -> list[tuple[str, FilePart]]:
83
+ out: list[tuple[str, FilePart]] = []
84
+ pending_name: str | None = None
85
+ counters: dict[str, int] = {"file": 0, "image": 0}
86
+ for block in content:
87
+ if not isinstance(block, dict):
88
+ continue
89
+ block_type = block.get("type")
90
+ if not isinstance(block_type, str):
91
+ continue
92
+ if block_type == "text":
93
+ hint = _filename_from_text_block(block)
94
+ if hint:
95
+ pending_name = hint
96
+ continue
97
+ kind = _LANGCHAIN_BINARY_BLOCK_TYPES.get(block_type)
98
+ if kind is None:
99
+ continue
100
+ b64 = block.get("base64")
101
+ if not isinstance(b64, str) or not b64:
102
+ continue
103
+ mime_type = block.get("mime_type") or "application/octet-stream"
104
+ if pending_name is not None:
105
+ name = pending_name
106
+ pending_name = None
107
+ else:
108
+ index = counters[block_type]
109
+ counters[block_type] = index + 1
110
+ name = _synthetic_name(kind, index, mime_type)
111
+ out.append((name, FilePart(file=FileWithBytes(
112
+ name=name, mime_type=mime_type, bytes=b64,
113
+ ))))
114
+ return out
115
+
116
+
117
+ def _mcp_blocks_from_artifact(artifact: Any) -> list[Any] | None:
118
+ if not isinstance(artifact, dict):
119
+ return None
120
+ structured = artifact.get("structured_content")
121
+ if not isinstance(structured, dict):
122
+ return None
123
+ blocks = structured.get(NON_TEXT_CONTENT_KEY)
124
+ if not isinstance(blocks, list) or not blocks:
125
+ return None
126
+ return blocks
127
+
128
+
129
+ def extract_file_parts(messages: list[BaseMessage]) -> list[tuple[str, FilePart]]:
130
+ parts: list[tuple[str, FilePart]] = []
131
+ for message in messages:
132
+ if not isinstance(message, ToolMessage):
133
+ continue
134
+
135
+ mcp_blocks = _mcp_blocks_from_artifact(message.artifact)
136
+ if mcp_blocks is not None:
137
+ parts.extend(_extract_from_mcp_blocks(mcp_blocks))
138
+ continue
139
+
140
+ if isinstance(message.content, list):
141
+ parts.extend(_extract_from_langchain_content_blocks(message.content))
142
+ return parts
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Awaitable, Callable
4
+ from typing import Any
5
+
6
+ from langchain_mcp_adapters.interceptors import (MCPToolCallRequest,
7
+ MCPToolCallResult)
8
+ from mcp.types import CallToolResult, TextContent
9
+
10
+ """Key under ``CallToolResult.structuredContent`` where the interceptor stashes
11
+ any non-text MCP content blocks. Also the key under
12
+ ``ToolMessage.artifact['structured_content']`` where downstream extraction
13
+ code (:func:`distributed_a2a.files.extract_file_parts`) reads them back."""
14
+ NON_TEXT_CONTENT_KEY = "non_text_content"
15
+
16
+
17
+ async def hide_binary_content_from_llm(
18
+ request: MCPToolCallRequest,
19
+ handler: Callable[
20
+ [MCPToolCallRequest],
21
+ Awaitable[MCPToolCallResult], # pyright: ignore[reportInvalidTypeForm]
22
+ ],
23
+ ) -> MCPToolCallResult: # pyright: ignore[reportInvalidTypeForm]
24
+ result = await handler(request)
25
+ if not isinstance(result, CallToolResult) or result.isError:
26
+ return result
27
+
28
+ text_blocks: list[TextContent] = []
29
+ non_text_blocks: list[Any] = []
30
+ for block in result.content:
31
+ if isinstance(block, TextContent):
32
+ text_blocks.append(block)
33
+ else:
34
+ non_text_blocks.append(block)
35
+
36
+ merged_structured: dict[str, Any] = (
37
+ dict(result.structuredContent) if result.structuredContent else {}
38
+ )
39
+ merged_structured[NON_TEXT_CONTENT_KEY] = non_text_blocks
40
+
41
+ return result.model_copy(update={
42
+ "content": text_blocks,
43
+ "structuredContent": merged_structured,
44
+ })
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: distributed_a2a
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: A library for building A2A agents with routing capabilities
5
5
  Home-page: https://github.com/Barra-Technologies/distributed-a2a
6
6
  Author: Fabian Bell
@@ -211,6 +211,27 @@ if __name__ == "__main__":
211
211
  asyncio.run(main())
212
212
  ```
213
213
 
214
+ ### Binary content handling
215
+
216
+ When an agent invokes an MCP tool that returns non-text content (files, images,
217
+ `EmbeddedResource`, `ResourceLink`), the library keeps those payloads out of the
218
+ LLM's context window and delivers them out-of-band as A2A `FilePart` artifacts.
219
+
220
+ Concretely, `RoutingAgentExecutor` installs the
221
+ `hide_binary_content_from_llm` tool-call interceptor on every
222
+ `MultiServerMCPClient` it builds. The interceptor moves any non-`TextContent`
223
+ block from `CallToolResult.content` into `CallToolResult.structuredContent`
224
+ under the `non_text_content` key. The upstream adapter then carries that dict
225
+ into `ToolMessage.artifact['structured_content']`, which LangChain does **not**
226
+ surface to the model. After the graph run, the executor walks the message list,
227
+ extracts the stashed blocks, and emits one `TaskArtifactUpdateEvent` per file
228
+ before the terminating text artifact.
229
+
230
+ Client-side, `RoutingA2AClient.send_message` returns an `AgentReply` that
231
+ exposes both the LLM's text summary and any `FileRef` payloads (with either
232
+ inline `bytes_b64` or a `uri`), so downstream integrations (e.g. Slack file
233
+ uploads) can forward the bytes without ever routing them through a model.
234
+
214
235
  ### Environment Variables
215
236
  The library uses several environment variables for configuration. These can be set in your shell or via a `.env` file.
216
237
 
@@ -9,7 +9,8 @@ distributed_a2a/agent.py
9
9
  distributed_a2a/client.py
10
10
  distributed_a2a/config.py
11
11
  distributed_a2a/executors.py
12
- distributed_a2a/files.py
12
+ distributed_a2a/file_extractors.py
13
+ distributed_a2a/mcp_interceptors.py
13
14
  distributed_a2a/model.py
14
15
  distributed_a2a/py.typed
15
16
  distributed_a2a/registry.py
@@ -31,6 +32,7 @@ distributed_a2a/schemas/router-agent-schema.json
31
32
  tests/test_app.py
32
33
  tests/test_client.py
33
34
  tests/test_executor_files.py
34
- tests/test_files.py
35
+ tests/test_file_extractors.py
36
+ tests/test_mcp_interceptors.py
35
37
  tests/test_rejection.py
36
38
  tests/test_timeout.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "distributed_a2a"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "A library for building A2A agents with routing capabilities"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.14"
@@ -12,9 +12,16 @@ from a2a.types import Message as A2AMessage
12
12
  from a2a.types import (MessageSendParams, Part, Role, TaskArtifactUpdateEvent,
13
13
  TaskState, TaskStatusUpdateEvent, TextPart)
14
14
  from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage
15
+ from mcp.types import BlobResourceContents, EmbeddedResource
16
+ from pydantic import AnyUrl
15
17
 
16
18
  from distributed_a2a.agent import AgentInvocation, StringResponse
17
19
  from distributed_a2a.executors import RoutingAgentExecutor
20
+ from distributed_a2a.mcp_interceptors import NON_TEXT_CONTENT_KEY
21
+
22
+ _DOCX_MIME = (
23
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
24
+ )
18
25
 
19
26
 
20
27
  class _StubStatusAgent:
@@ -214,3 +221,63 @@ async def test_executor_emits_one_file_event_per_file_block() -> None:
214
221
  if isinstance(e, TaskStatusUpdateEvent) and e.final]
215
222
  assert len(final_status) == 1
216
223
  assert final_status[0].status.state == TaskState.completed
224
+
225
+
226
+ @pytest.mark.asyncio
227
+ async def test_executor_emits_file_part_from_interceptor_artifact_shape() -> None:
228
+ docx_b64 = base64.b64encode(b"PK\x03\x04 hidden bytes").decode("ascii")
229
+ embedded = EmbeddedResource(
230
+ type="resource",
231
+ resource=BlobResourceContents(
232
+ uri=AnyUrl("cv://cv-carol.docx"), mimeType=_DOCX_MIME, blob=docx_b64,
233
+ ),
234
+ )
235
+ summary_json = (
236
+ '{"filename": "cv-carol.docx", '
237
+ f'"mime_type": "{_DOCX_MIME}", "size_bytes": 42}}'
238
+ )
239
+ tool_msg = ToolMessage(
240
+ content=summary_json,
241
+ tool_call_id="call-cv",
242
+ artifact={"structured_content": {NON_TEXT_CONTENT_KEY: [embedded]}},
243
+ )
244
+
245
+ executor = RoutingAgentExecutor.__new__(RoutingAgentExecutor)
246
+ executor.agent_config = SimpleNamespace( # type: ignore[assignment]
247
+ agent=SimpleNamespace(card=SimpleNamespace(name="cv-agent")),
248
+ )
249
+ executor.agent = _StubStatusAgent( # type: ignore[assignment]
250
+ StringResponse(status=TaskState.completed,
251
+ response="Here is your CV."),
252
+ [HumanMessage(content="render a CV please"), tool_msg],
253
+ )
254
+
255
+ async def _noop_reinit() -> None:
256
+ return None
257
+
258
+ executor.reinitialize_agent_with_tools = _noop_reinit # type: ignore[method-assign]
259
+
260
+ ctx = _make_request_context()
261
+ queue = EventQueue()
262
+ await executor.execute(ctx, queue)
263
+ events = await _drain_queue(queue)
264
+
265
+ artifact_events = [e for e in events if isinstance(e, TaskArtifactUpdateEvent)]
266
+ assert len(artifact_events) == 2
267
+ file_event, text_event = artifact_events
268
+ assert file_event.last_chunk is False
269
+ assert file_event.artifact.name == "cv-carol.docx"
270
+ file_part = file_event.artifact.parts[0].root
271
+ assert isinstance(file_part, FilePart)
272
+ assert isinstance(file_part.file, FileWithBytes)
273
+ assert file_part.file.name == "cv-carol.docx"
274
+ assert file_part.file.mime_type == _DOCX_MIME
275
+ assert file_part.file.bytes == docx_b64
276
+
277
+ assert text_event.last_chunk is True
278
+ assert text_event.artifact.name == "current_result"
279
+
280
+ final_status = [e for e in events
281
+ if isinstance(e, TaskStatusUpdateEvent) and e.final]
282
+ assert len(final_status) == 1
283
+ assert final_status[0].status.state == TaskState.completed
@@ -9,9 +9,17 @@ from a2a.types import (AgentCapabilities, AgentCard, Artifact, FilePart,
9
9
  FileWithBytes, FileWithUri, Message, Part, Task,
10
10
  TaskState, TaskStatus, TextPart)
11
11
  from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
12
+ from mcp.types import (BlobResourceContents, EmbeddedResource, ImageContent,
13
+ ResourceLink)
14
+ from pydantic import AnyUrl
12
15
 
13
16
  from distributed_a2a.client import AgentReply, RemoteAgentConnection
14
- from distributed_a2a.files import extract_file_parts
17
+ from distributed_a2a.file_extractors import extract_file_parts
18
+ from distributed_a2a.mcp_interceptors import NON_TEXT_CONTENT_KEY
19
+
20
+ _DOCX_MIME = (
21
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
22
+ )
15
23
 
16
24
 
17
25
  def _b64(payload: bytes) -> str:
@@ -150,6 +158,174 @@ def test_extract_file_parts_matches_multiple_filenames_by_order() -> None:
150
158
  assert parts[1][1].file.bytes == b64_b # type: ignore[union-attr]
151
159
 
152
160
 
161
+ def _interceptor_artifact(blocks: list[object], **extra: object) -> dict[str, object]:
162
+ return {"structured_content": {NON_TEXT_CONTENT_KEY: blocks, **extra}}
163
+
164
+
165
+ def test_extract_file_parts_reads_interceptor_artifact_shape() -> None:
166
+ docx_b64 = _b64(b"PK\x03\x04 real docx bytes")
167
+ embedded = EmbeddedResource(
168
+ type="resource",
169
+ resource=BlobResourceContents(
170
+ uri=AnyUrl("cv://cv-alice.docx"), mimeType=_DOCX_MIME, blob=docx_b64,
171
+ ),
172
+ )
173
+ tool_msg = ToolMessage(
174
+ content='{"filename": "cv-alice.docx"}',
175
+ tool_call_id="call-cv",
176
+ artifact=_interceptor_artifact([embedded]),
177
+ )
178
+
179
+ parts = extract_file_parts([tool_msg])
180
+
181
+ assert len(parts) == 1
182
+ name, file_part = parts[0]
183
+ assert name == "cv-alice.docx"
184
+ assert isinstance(file_part.file, FileWithBytes)
185
+ assert file_part.file.name == "cv-alice.docx"
186
+ assert file_part.file.mime_type == _DOCX_MIME
187
+ assert file_part.file.bytes == docx_b64
188
+
189
+
190
+ def test_extract_file_parts_prefers_interceptor_artifact_over_content_blocks() -> None:
191
+ docx_b64 = _b64(b"PK\x03\x04 interceptor bytes")
192
+ embedded = EmbeddedResource(
193
+ type="resource",
194
+ resource=BlobResourceContents(
195
+ uri=AnyUrl("cv://cv-interceptor.docx"), mimeType=_DOCX_MIME, blob=docx_b64,
196
+ ),
197
+ )
198
+ tool_msg = ToolMessage(
199
+ content=[
200
+ {"type": "text", "text": '{"filename": "cv-legacy.docx"}',
201
+ "id": "lc_text_1"},
202
+ {"type": "file",
203
+ "base64": _b64(b"legacy fallback bytes"),
204
+ "mime_type": _DOCX_MIME,
205
+ "id": "lc_file_1"},
206
+ ],
207
+ tool_call_id="call-cv",
208
+ artifact=_interceptor_artifact([embedded]),
209
+ )
210
+
211
+ parts = extract_file_parts([tool_msg])
212
+
213
+ assert len(parts) == 1
214
+ name, file_part = parts[0]
215
+ assert name == "cv-interceptor.docx"
216
+ assert isinstance(file_part.file, FileWithBytes)
217
+ assert file_part.file.bytes == docx_b64
218
+
219
+
220
+ def test_extract_file_parts_reads_image_content_from_interceptor_artifact() -> None:
221
+ png_b64 = _b64(b"\x89PNG\r\n\x1a\n fake image bytes")
222
+ image = ImageContent(type="image", data=png_b64, mimeType="image/png")
223
+ tool_msg = ToolMessage(
224
+ content="here is a chart",
225
+ tool_call_id="call-img",
226
+ artifact=_interceptor_artifact([image]),
227
+ )
228
+
229
+ parts = extract_file_parts([tool_msg])
230
+
231
+ assert len(parts) == 1
232
+ name, file_part = parts[0]
233
+ assert name.startswith("image")
234
+ assert name.endswith(".png")
235
+ assert isinstance(file_part.file, FileWithBytes)
236
+ assert file_part.file.mime_type == "image/png"
237
+ assert file_part.file.bytes == png_b64
238
+
239
+
240
+ def test_extract_file_parts_reads_resource_link_as_file_with_uri() -> None:
241
+ link = ResourceLink(
242
+ type="resource_link",
243
+ uri=AnyUrl("https://example.com/reports/report.pdf"),
244
+ name="report.pdf",
245
+ mimeType="application/pdf",
246
+ )
247
+ tool_msg = ToolMessage(
248
+ content="see attached report",
249
+ tool_call_id="call-link",
250
+ artifact=_interceptor_artifact([link]),
251
+ )
252
+
253
+ parts = extract_file_parts([tool_msg])
254
+
255
+ assert len(parts) == 1
256
+ name, file_part = parts[0]
257
+ assert name == "report.pdf"
258
+ assert isinstance(file_part.file, FileWithUri)
259
+ assert file_part.file.mime_type == "application/pdf"
260
+ assert file_part.file.uri == "https://example.com/reports/report.pdf"
261
+
262
+
263
+ def test_extract_file_parts_reads_multiple_blocks_from_interceptor_artifact() -> None:
264
+ b64_a = _b64(b"aaa docx")
265
+ b64_b = _b64(b"bbb docx")
266
+ a = EmbeddedResource(
267
+ type="resource",
268
+ resource=BlobResourceContents(
269
+ uri=AnyUrl("cv://cv-a.docx"), mimeType=_DOCX_MIME, blob=b64_a,
270
+ ),
271
+ )
272
+ b = EmbeddedResource(
273
+ type="resource",
274
+ resource=BlobResourceContents(
275
+ uri=AnyUrl("cv://cv-b.docx"), mimeType=_DOCX_MIME, blob=b64_b,
276
+ ),
277
+ )
278
+ tool_msg = ToolMessage(
279
+ content='{"count": 2}',
280
+ tool_call_id="call-multi",
281
+ artifact=_interceptor_artifact([a, b]),
282
+ )
283
+
284
+ parts = extract_file_parts([tool_msg])
285
+
286
+ assert [name for name, _ in parts] == ["cv-a.docx", "cv-b.docx"]
287
+ assert parts[0][1].file.bytes == b64_a # type: ignore[union-attr]
288
+ assert parts[1][1].file.bytes == b64_b # type: ignore[union-attr]
289
+
290
+
291
+ def test_extract_file_parts_ignores_empty_interceptor_artifact() -> None:
292
+ """An empty ``non_text_content`` list must not cause the extractor to
293
+ fall through to the ``content`` path — that would double-extract files
294
+ on any tool where the interceptor happened to filter everything out."""
295
+ tool_msg = ToolMessage(
296
+ content=[
297
+ {"type": "file", "base64": _b64(b"leaked"),
298
+ "mime_type": _DOCX_MIME, "id": "lc_file_1"},
299
+ ],
300
+ tool_call_id="call-mixed",
301
+ artifact={"structured_content": {NON_TEXT_CONTENT_KEY: []}},
302
+ )
303
+ parts = extract_file_parts([tool_msg])
304
+ assert len(parts) == 1
305
+
306
+
307
+ def test_extract_file_parts_falls_back_to_content_when_artifact_has_no_key() -> None:
308
+ docx_b64 = _b64(b"PK\x03\x04 bytes")
309
+ tool_msg = ToolMessage(
310
+ content=[
311
+ {"type": "text", "text": '{"filename": "cv-fallback.docx"}',
312
+ "id": "lc_text_1"},
313
+ {"type": "file", "base64": docx_b64,
314
+ "mime_type": _DOCX_MIME, "id": "lc_file_1"},
315
+ ],
316
+ tool_call_id="call-fallback",
317
+ artifact={"structured_content": {"unrelated": {"foo": 1}}},
318
+ )
319
+
320
+ parts = extract_file_parts([tool_msg])
321
+
322
+ assert len(parts) == 1
323
+ name, file_part = parts[0]
324
+ assert name == "cv-fallback.docx"
325
+ assert isinstance(file_part.file, FileWithBytes)
326
+ assert file_part.file.bytes == docx_b64
327
+
328
+
153
329
  class _StubAgentClient:
154
330
  def __init__(self, task: Task):
155
331
  self._task = task
@@ -0,0 +1,197 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import pytest
6
+ from langchain_core.messages import ToolMessage
7
+ from langchain_core.tools import StructuredTool
8
+ from langchain_mcp_adapters.interceptors import MCPToolCallRequest
9
+ from langchain_mcp_adapters.tools import _convert_call_tool_result
10
+ from mcp.types import (BlobResourceContents, CallToolResult, EmbeddedResource,
11
+ ImageContent, TextContent)
12
+ from pydantic import AnyUrl
13
+
14
+ from distributed_a2a.mcp_interceptors import (NON_TEXT_CONTENT_KEY,
15
+ hide_binary_content_from_llm)
16
+
17
+
18
+ def _request() -> MCPToolCallRequest:
19
+ return MCPToolCallRequest(
20
+ name="render_file", args={}, server_name="test-server",
21
+ )
22
+
23
+
24
+ def _embedded_docx(uri: str = "file://alice.docx", blob: str = "UEsDBAA=") -> EmbeddedResource:
25
+ return EmbeddedResource(
26
+ type="resource",
27
+ resource=BlobResourceContents(
28
+ uri=AnyUrl(uri),
29
+ mimeType=(
30
+ "application/vnd.openxmlformats-officedocument."
31
+ "wordprocessingml.document"
32
+ ),
33
+ blob=blob,
34
+ ),
35
+ )
36
+
37
+
38
+ def _make_handler(returning: Any) -> Any:
39
+ async def _handler(_req: MCPToolCallRequest) -> Any:
40
+ return returning
41
+ return _handler
42
+
43
+
44
+ @pytest.mark.asyncio
45
+ async def test_text_only_result_is_passed_through() -> None:
46
+ original = CallToolResult(
47
+ content=[TextContent(type="text", text="just a summary")],
48
+ structuredContent=None,
49
+ isError=False,
50
+ )
51
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
52
+ assert isinstance(result, CallToolResult)
53
+ assert result.content == original.content
54
+ assert result.structuredContent == {NON_TEXT_CONTENT_KEY: []}
55
+
56
+
57
+ @pytest.mark.asyncio
58
+ async def test_mixed_result_moves_binary_into_structured_content() -> None:
59
+ embedded = _embedded_docx()
60
+ original = CallToolResult(
61
+ content=[
62
+ TextContent(type="text", text='{"filename": "alice.docx"}'),
63
+ embedded,
64
+ ],
65
+ structuredContent=None,
66
+ isError=False,
67
+ )
68
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
69
+ assert isinstance(result, CallToolResult)
70
+ assert result is not original, "A mutated copy is expected, not the original object."
71
+ assert len(result.content) == 1
72
+ assert isinstance(result.content[0], TextContent)
73
+ assert result.content[0].text == '{"filename": "alice.docx"}'
74
+ assert result.structuredContent is not None
75
+ assert result.structuredContent[NON_TEXT_CONTENT_KEY] == [embedded]
76
+
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_binary_only_result_produces_empty_content_list() -> None:
80
+ embedded = _embedded_docx()
81
+ original = CallToolResult(
82
+ content=[embedded],
83
+ structuredContent=None,
84
+ isError=False,
85
+ )
86
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
87
+ assert isinstance(result, CallToolResult)
88
+ assert result.content == [], (
89
+ "Binary-only tool output should leave content empty — the model "
90
+ "receives no text, and the block is only reachable via artifact."
91
+ )
92
+ assert result.structuredContent == {NON_TEXT_CONTENT_KEY: [embedded]}
93
+
94
+
95
+ @pytest.mark.asyncio
96
+ async def test_error_result_is_passed_through_unchanged() -> None:
97
+ original = CallToolResult(
98
+ content=[TextContent(type="text", text="boom")],
99
+ structuredContent=None,
100
+ isError=True,
101
+ )
102
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
103
+ assert result is original
104
+ assert isinstance(result, CallToolResult) and result.isError is True
105
+
106
+
107
+ @pytest.mark.asyncio
108
+ async def test_error_result_with_binary_still_passes_through() -> None:
109
+ embedded = _embedded_docx()
110
+ original = CallToolResult(
111
+ content=[TextContent(type="text", text="oops"), embedded],
112
+ structuredContent=None,
113
+ isError=True,
114
+ )
115
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
116
+ assert result is original
117
+
118
+
119
+ @pytest.mark.asyncio
120
+ async def test_preserves_existing_structured_content() -> None:
121
+ embedded = _embedded_docx()
122
+ original = CallToolResult(
123
+ content=[TextContent(type="text", text="summary"), embedded],
124
+ structuredContent={"foo": 1, "nested": {"bar": 2}},
125
+ isError=False,
126
+ )
127
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
128
+ assert isinstance(result, CallToolResult)
129
+ assert result.structuredContent == {
130
+ "foo": 1,
131
+ "nested": {"bar": 2},
132
+ NON_TEXT_CONTENT_KEY: [embedded],
133
+ }
134
+ assert original.structuredContent == {"foo": 1, "nested": {"bar": 2}}
135
+
136
+
137
+ @pytest.mark.asyncio
138
+ async def test_non_call_tool_result_is_passed_through() -> None:
139
+ upstream = ToolMessage(content="upstream", tool_call_id="tc-1")
140
+ result = await hide_binary_content_from_llm(_request(), _make_handler(upstream))
141
+ assert result is upstream
142
+
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_image_content_is_hidden() -> None:
146
+ image = ImageContent(type="image", data="AA==", mimeType="image/png")
147
+ original = CallToolResult(
148
+ content=[TextContent(type="text", text="see below"), image],
149
+ isError=False,
150
+ )
151
+ result = await hide_binary_content_from_llm(_request(), _make_handler(original))
152
+ assert isinstance(result, CallToolResult)
153
+ assert result.content == [TextContent(type="text", text="see below")]
154
+ assert result.structuredContent == {NON_TEXT_CONTENT_KEY: [image]}
155
+
156
+
157
+ def test_adapter_forwards_structured_content_into_tool_message_artifact() -> None:
158
+ embedded = _embedded_docx()
159
+ stashed = CallToolResult(
160
+ content=[TextContent(type="text", text='{"filename": "file.docx"}')],
161
+ structuredContent={NON_TEXT_CONTENT_KEY: [embedded]},
162
+ isError=False,
163
+ )
164
+ content, artifact = _convert_call_tool_result(stashed)
165
+ assert isinstance(content, list) and content
166
+ # No file/image block leaked into the LLM-visible content list.
167
+ for block in content:
168
+ assert not isinstance(block, dict) or block.get("type") == "text"
169
+ assert artifact is not None
170
+ assert artifact["structured_content"] == {NON_TEXT_CONTENT_KEY: [embedded]}
171
+
172
+
173
+ def test_base_tool_invoke_sets_tool_call_id_when_content_is_not_tool_message() -> None:
174
+ def fake_call(**_kwargs: Any) -> tuple[list[dict[str, Any]], dict[str, Any]]:
175
+ return (
176
+ [{"type": "text", "text": "summary"}],
177
+ {"structured_content": {NON_TEXT_CONTENT_KEY: ["placeholder"]}},
178
+ )
179
+
180
+ tool = StructuredTool.from_function(
181
+ func=fake_call,
182
+ name="render_file",
183
+ description="stub",
184
+ response_format="content_and_artifact",
185
+ )
186
+ tool_call = {
187
+ "name": "render_file",
188
+ "args": {},
189
+ "id": "TCID_42",
190
+ "type": "tool_call",
191
+ }
192
+ result = tool.invoke(tool_call)
193
+ assert isinstance(result, ToolMessage)
194
+ assert result.tool_call_id == "TCID_42"
195
+ assert result.artifact == {
196
+ "structured_content": {NON_TEXT_CONTENT_KEY: ["placeholder"]},
197
+ }
@@ -1,68 +0,0 @@
1
- import json
2
- import mimetypes
3
- from typing import Any
4
-
5
- from a2a.types import FilePart, FileWithBytes
6
- from langchain_core.messages import BaseMessage, ToolMessage
7
-
8
- _LANGCHAIN_BINARY_BLOCK_TYPES: dict[str, str] = {
9
- "file": "attachment",
10
- "image": "image",
11
- }
12
-
13
-
14
- def _filename_from_text_block(block: dict[str, Any]) -> str | None:
15
- text = block.get("text")
16
- if not isinstance(text, str):
17
- return None
18
- try:
19
- payload = json.loads(text)
20
- except (ValueError, TypeError):
21
- return None
22
- if isinstance(payload, dict):
23
- name = payload.get("filename")
24
- if isinstance(name, str) and name:
25
- return name
26
- return None
27
-
28
-
29
- def extract_file_parts(messages: list[BaseMessage]) -> list[tuple[str, FilePart]]:
30
- parts: list[tuple[str, FilePart]] = []
31
- for message in messages:
32
- if not (isinstance(message, ToolMessage)
33
- and isinstance(message.content, list)):
34
- continue
35
- pending_name: str | None = None
36
- counters: dict[str, int] = {"file": 0, "image": 0}
37
- for block in message.content:
38
- if not isinstance(block, dict):
39
- continue
40
- block_type = block.get("type")
41
- if not isinstance(block_type, str):
42
- continue
43
- if block_type == "text":
44
- hint = _filename_from_text_block(block)
45
- if hint:
46
- pending_name = hint
47
- continue
48
- kind = _LANGCHAIN_BINARY_BLOCK_TYPES.get(block_type)
49
- if kind is None:
50
- continue
51
- b64 = block.get("base64")
52
- if not isinstance(b64, str) or not b64:
53
- continue
54
- mime_type = block.get("mime_type") or "application/octet-stream"
55
- if pending_name is not None:
56
- name = pending_name
57
- pending_name = None
58
- else:
59
- index = counters[block_type]
60
- counters[block_type] = index + 1
61
- guessed_ext = mimetypes.guess_extension(mime_type)
62
- ext = f"-{guessed_ext}" if guessed_ext is not None else ""
63
- suffix = f"-{index}" if index > 0 else ""
64
- name = f"{kind}{suffix}{ext}"
65
- parts.append((name, FilePart(file=FileWithBytes(
66
- name=name, mime_type=mime_type, bytes=b64,
67
- ))))
68
- return parts
File without changes