llmsessioncontract 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llmsessioncontract-0.2.2/llmsessioncontract.egg-info → llmsessioncontract-0.3.0}/PKG-INFO +81 -2
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/README.md +76 -1
- llmsessioncontract-0.3.0/llmcontract/langchain/__init__.py +43 -0
- llmsessioncontract-0.3.0/llmcontract/langchain/exceptions.py +26 -0
- llmsessioncontract-0.3.0/llmcontract/langchain/fsm.py +184 -0
- llmsessioncontract-0.3.0/llmcontract/langchain/middleware.py +111 -0
- llmsessioncontract-0.3.0/llmcontract/langchain/monitor.py +117 -0
- llmsessioncontract-0.3.0/llmcontract/langchain/tool_ref.py +87 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0/llmsessioncontract.egg-info}/PKG-INFO +81 -2
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/SOURCES.txt +6 -0
- llmsessioncontract-0.3.0/llmsessioncontract.egg-info/requires.txt +11 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/pyproject.toml +3 -2
- llmsessioncontract-0.2.2/llmsessioncontract.egg-info/requires.txt +0 -6
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/LICENSE +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/__init__.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/dsl/__init__.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/dsl/ast.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/dsl/parser.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/__init__.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/client.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/exceptions.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/langfuse.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/middleware.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/types.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/monitor/__init__.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/monitor/automaton.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/monitor/monitor.py +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/py.typed +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/dependency_links.txt +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/top_level.txt +0 -0
- {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmsessioncontract
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Runtime monitor for LLM agent interaction protocols based on session type theory
|
|
5
5
|
Author-email: Chris Bartolo Burlo <chris@mizziburlo.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -23,8 +23,12 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
Provides-Extra: langfuse
|
|
25
25
|
Requires-Dist: langfuse>=3.0.0; extra == "langfuse"
|
|
26
|
+
Provides-Extra: langchain
|
|
27
|
+
Requires-Dist: langchain>=1.0.0; extra == "langchain"
|
|
28
|
+
Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
|
|
26
29
|
Provides-Extra: dev
|
|
27
30
|
Requires-Dist: pytest; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
28
32
|
Dynamic: license-file
|
|
29
33
|
|
|
30
34
|
# llmcontract
|
|
@@ -230,6 +234,80 @@ while True:
|
|
|
230
234
|
messages.append(tool_result_msg(tc.id, result))
|
|
231
235
|
```
|
|
232
236
|
|
|
237
|
+
## LangChain Integration (`llmcontract.langchain`, 0.3.0+)
|
|
238
|
+
|
|
239
|
+
A focused FSM-as-data API for users who want to wire protocol monitoring
|
|
240
|
+
into LangChain agents without touching the DSL parser. Tool references
|
|
241
|
+
are real Python callables, transitions are explicit objects with
|
|
242
|
+
optional guards and actions, and violation handling is fully
|
|
243
|
+
user-controlled.
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
pip install llmsessioncontract[langchain]
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from langchain_core.tools import tool
|
|
251
|
+
from langchain.agents import create_agent
|
|
252
|
+
from llmcontract.langchain import (
|
|
253
|
+
ProtocolFSM, Transition, ProtocolMonitor,
|
|
254
|
+
ProtocolEnforcerMiddleware, ViolationEvent,
|
|
255
|
+
ProtocolViolationError, ref,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
@tool
|
|
259
|
+
def search(query: str) -> str:
|
|
260
|
+
"""Search for available flights."""
|
|
261
|
+
return f"Results for: {query}"
|
|
262
|
+
|
|
263
|
+
@tool
|
|
264
|
+
def book(result: str) -> str:
|
|
265
|
+
"""Book a selected flight."""
|
|
266
|
+
return f"Booked: {result}"
|
|
267
|
+
|
|
268
|
+
search_ref = ref(search)
|
|
269
|
+
book_ref = ref(book)
|
|
270
|
+
|
|
271
|
+
fsm = (
|
|
272
|
+
ProtocolFSM(initial="idle")
|
|
273
|
+
.add_transition(Transition(source="idle", tool=search_ref, phase="send", target="searching"))
|
|
274
|
+
.add_transition(Transition(source="searching", tool=search_ref, phase="recv", target="results"))
|
|
275
|
+
.add_transition(Transition(source="results", tool=book_ref, phase="send", target="booking",
|
|
276
|
+
guard=lambda ctx: bool(ctx.metadata.get("args", {}))))
|
|
277
|
+
.add_transition(Transition(source="booking", tool=book_ref, phase="recv", target="done"))
|
|
278
|
+
.mark_terminal("done")
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def on_violation(v: ViolationEvent) -> None:
|
|
282
|
+
raise ProtocolViolationError(f"Illegal {v.phase}:{v.tool_ref.label} from {v.current_state!r}", violation=v)
|
|
283
|
+
|
|
284
|
+
monitor = ProtocolMonitor(fsm=fsm, on_violation=on_violation)
|
|
285
|
+
middleware = ProtocolEnforcerMiddleware(monitor=monitor, tool_refs=[search_ref, book_ref]).middleware
|
|
286
|
+
|
|
287
|
+
agent = create_agent(model=..., tools=[search, book], middleware=[middleware])
|
|
288
|
+
agent.invoke({"messages": [("user", "Book me a flight to Rome.")]})
|
|
289
|
+
|
|
290
|
+
print(monitor.state) # → "done"
|
|
291
|
+
print(monitor.is_complete()) # → True
|
|
292
|
+
print(monitor.trace) # → ["send:search", "recv:search", "send:book", "recv:book"]
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
When to pick this over the DSL `Monitor`:
|
|
296
|
+
|
|
297
|
+
- You're already in a LangChain stack and want a drop-in `AgentMiddleware`
|
|
298
|
+
- You need per-transition guards and actions (e.g., audit logs, business rules)
|
|
299
|
+
- You want enforcement (block tool calls), not just observation
|
|
300
|
+
- You don't need recursion / choice / `Unrecognized` from the DSL
|
|
301
|
+
|
|
302
|
+
When to stick with the DSL `Monitor`:
|
|
303
|
+
|
|
304
|
+
- You want to write protocols as concise strings (`!Search.?Result.end`)
|
|
305
|
+
- You need recursion or compositional choice
|
|
306
|
+
- You're outside LangChain (Anthropic SDK, OpenAI SDK, custom loop)
|
|
307
|
+
- You want first-class natural-language ambiguity via `Unrecognized`
|
|
308
|
+
|
|
309
|
+
Worked example: [`examples/langchain_booking/booking_agent_submodule.py`](examples/langchain_booking/booking_agent_submodule.py).
|
|
310
|
+
|
|
233
311
|
## Langfuse Integration
|
|
234
312
|
|
|
235
313
|
Track protocol compliance in [Langfuse](https://langfuse.com) — every send/receive is recorded as a guardrail observation with a pass/fail score.
|
|
@@ -280,7 +358,8 @@ The skill validates each draft DSL against `llmcontract`'s parser, so anything i
|
|
|
280
358
|
|
|
281
359
|
## Case Studies
|
|
282
360
|
|
|
283
|
-
- **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
|
|
361
|
+
- **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — *user ↔ agent layer.* Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
|
|
362
|
+
- **[`llmcontract-playwright-mcp`](https://github.com/chrisbartoloburlo/llmcontract-playwright-mcp)** — *agent ↔ tool layer.* 90-trajectory sweep across Claude Haiku 4.5 / Sonnet 4.6 / Opus 4.7 driving [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp), checked against two invariants from the server's documented usage. Headline: 9% violate `snap-before-interact`, 29% violate `stay-on-snapshot-refs` — and the failure modes scale opposite directions with model capability (Haiku snapshots religiously but ignores the snapshot 57% of the time; Opus skips the snapshot 13% of the time but follows through cleanly when it commits).
|
|
284
363
|
|
|
285
364
|
## Research
|
|
286
365
|
|
|
@@ -201,6 +201,80 @@ while True:
|
|
|
201
201
|
messages.append(tool_result_msg(tc.id, result))
|
|
202
202
|
```
|
|
203
203
|
|
|
204
|
+
## LangChain Integration (`llmcontract.langchain`, 0.3.0+)
|
|
205
|
+
|
|
206
|
+
A focused FSM-as-data API for users who want to wire protocol monitoring
|
|
207
|
+
into LangChain agents without touching the DSL parser. Tool references
|
|
208
|
+
are real Python callables, transitions are explicit objects with
|
|
209
|
+
optional guards and actions, and violation handling is fully
|
|
210
|
+
user-controlled.
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
pip install llmsessioncontract[langchain]
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
from langchain_core.tools import tool
|
|
218
|
+
from langchain.agents import create_agent
|
|
219
|
+
from llmcontract.langchain import (
|
|
220
|
+
ProtocolFSM, Transition, ProtocolMonitor,
|
|
221
|
+
ProtocolEnforcerMiddleware, ViolationEvent,
|
|
222
|
+
ProtocolViolationError, ref,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
@tool
|
|
226
|
+
def search(query: str) -> str:
|
|
227
|
+
"""Search for available flights."""
|
|
228
|
+
return f"Results for: {query}"
|
|
229
|
+
|
|
230
|
+
@tool
|
|
231
|
+
def book(result: str) -> str:
|
|
232
|
+
"""Book a selected flight."""
|
|
233
|
+
return f"Booked: {result}"
|
|
234
|
+
|
|
235
|
+
search_ref = ref(search)
|
|
236
|
+
book_ref = ref(book)
|
|
237
|
+
|
|
238
|
+
fsm = (
|
|
239
|
+
ProtocolFSM(initial="idle")
|
|
240
|
+
.add_transition(Transition(source="idle", tool=search_ref, phase="send", target="searching"))
|
|
241
|
+
.add_transition(Transition(source="searching", tool=search_ref, phase="recv", target="results"))
|
|
242
|
+
.add_transition(Transition(source="results", tool=book_ref, phase="send", target="booking",
|
|
243
|
+
guard=lambda ctx: bool(ctx.metadata.get("args", {}))))
|
|
244
|
+
.add_transition(Transition(source="booking", tool=book_ref, phase="recv", target="done"))
|
|
245
|
+
.mark_terminal("done")
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def on_violation(v: ViolationEvent) -> None:
|
|
249
|
+
raise ProtocolViolationError(f"Illegal {v.phase}:{v.tool_ref.label} from {v.current_state!r}", violation=v)
|
|
250
|
+
|
|
251
|
+
monitor = ProtocolMonitor(fsm=fsm, on_violation=on_violation)
|
|
252
|
+
middleware = ProtocolEnforcerMiddleware(monitor=monitor, tool_refs=[search_ref, book_ref]).middleware
|
|
253
|
+
|
|
254
|
+
agent = create_agent(model=..., tools=[search, book], middleware=[middleware])
|
|
255
|
+
agent.invoke({"messages": [("user", "Book me a flight to Rome.")]})
|
|
256
|
+
|
|
257
|
+
print(monitor.state) # → "done"
|
|
258
|
+
print(monitor.is_complete()) # → True
|
|
259
|
+
print(monitor.trace) # → ["send:search", "recv:search", "send:book", "recv:book"]
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
When to pick this over the DSL `Monitor`:
|
|
263
|
+
|
|
264
|
+
- You're already in a LangChain stack and want a drop-in `AgentMiddleware`
|
|
265
|
+
- You need per-transition guards and actions (e.g., audit logs, business rules)
|
|
266
|
+
- You want enforcement (block tool calls), not just observation
|
|
267
|
+
- You don't need recursion / choice / `Unrecognized` from the DSL
|
|
268
|
+
|
|
269
|
+
When to stick with the DSL `Monitor`:
|
|
270
|
+
|
|
271
|
+
- You want to write protocols as concise strings (`!Search.?Result.end`)
|
|
272
|
+
- You need recursion or compositional choice
|
|
273
|
+
- You're outside LangChain (Anthropic SDK, OpenAI SDK, custom loop)
|
|
274
|
+
- You want first-class natural-language ambiguity via `Unrecognized`
|
|
275
|
+
|
|
276
|
+
Worked example: [`examples/langchain_booking/booking_agent_submodule.py`](examples/langchain_booking/booking_agent_submodule.py).
|
|
277
|
+
|
|
204
278
|
## Langfuse Integration
|
|
205
279
|
|
|
206
280
|
Track protocol compliance in [Langfuse](https://langfuse.com) — every send/receive is recorded as a guardrail observation with a pass/fail score.
|
|
@@ -251,7 +325,8 @@ The skill validates each draft DSL against `llmcontract`'s parser, so anything i
|
|
|
251
325
|
|
|
252
326
|
## Case Studies
|
|
253
327
|
|
|
254
|
-
- **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
|
|
328
|
+
- **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — *user ↔ agent layer.* Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
|
|
329
|
+
- **[`llmcontract-playwright-mcp`](https://github.com/chrisbartoloburlo/llmcontract-playwright-mcp)** — *agent ↔ tool layer.* 90-trajectory sweep across Claude Haiku 4.5 / Sonnet 4.6 / Opus 4.7 driving [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp), checked against two invariants from the server's documented usage. Headline: 9% violate `snap-before-interact`, 29% violate `stay-on-snapshot-refs` — and the failure modes scale opposite directions with model capability (Haiku snapshots religiously but ignores the snapshot 57% of the time; Opus skips the snapshot 13% of the time but follows through cleanly when it commits).
|
|
255
330
|
|
|
256
331
|
## Research
|
|
257
332
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""LangChain-native protocol enforcement for ``llmcontract``.
|
|
2
|
+
|
|
3
|
+
A focused, FSM-as-data API for users who want to wire protocol monitoring
|
|
4
|
+
into LangChain agents with no DSL parsing and no magic strings. Tool
|
|
5
|
+
references are real Python callables; transitions are explicit objects
|
|
6
|
+
with optional guards and actions; violation handling is fully
|
|
7
|
+
user-controlled.
|
|
8
|
+
|
|
9
|
+
The full design and rationale live at
|
|
10
|
+
https://llmcontract.dev/findings/ and in the project README.
|
|
11
|
+
|
|
12
|
+
Submodules:
|
|
13
|
+
tool_ref ToolRef, ref()
|
|
14
|
+
fsm ProtocolFSM, Transition, MonitorContext, ViolationEvent
|
|
15
|
+
monitor ProtocolMonitor
|
|
16
|
+
middleware ProtocolEnforcerMiddleware
|
|
17
|
+
exceptions ProtocolViolationError
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from llmcontract.langchain.exceptions import ProtocolViolationError
|
|
21
|
+
from llmcontract.langchain.fsm import (
|
|
22
|
+
MonitorContext,
|
|
23
|
+
ProtocolFSM,
|
|
24
|
+
Transition,
|
|
25
|
+
ViolationEvent,
|
|
26
|
+
ViolationHandler,
|
|
27
|
+
)
|
|
28
|
+
from llmcontract.langchain.middleware import ProtocolEnforcerMiddleware
|
|
29
|
+
from llmcontract.langchain.monitor import ProtocolMonitor
|
|
30
|
+
from llmcontract.langchain.tool_ref import ToolRef, ref
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"ToolRef",
|
|
34
|
+
"ref",
|
|
35
|
+
"ProtocolFSM",
|
|
36
|
+
"Transition",
|
|
37
|
+
"MonitorContext",
|
|
38
|
+
"ViolationEvent",
|
|
39
|
+
"ViolationHandler",
|
|
40
|
+
"ProtocolMonitor",
|
|
41
|
+
"ProtocolEnforcerMiddleware",
|
|
42
|
+
"ProtocolViolationError",
|
|
43
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Exception type for protocol violations.
|
|
2
|
+
|
|
3
|
+
The library never raises this itself — it is provided as a convenience for
|
|
4
|
+
``on_violation`` callbacks that want to halt execution by raising. The
|
|
5
|
+
runtime decision of *what* to do on a violation belongs to the user, not
|
|
6
|
+
to the library.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from llmcontract.langchain.fsm import ViolationEvent
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ProtocolViolationError(RuntimeError):
|
|
18
|
+
"""Convenience exception for use in ``on_violation`` callbacks.
|
|
19
|
+
|
|
20
|
+
The library never raises this itself. Construct and raise it inside
|
|
21
|
+
your handler if you want a violation to abort the agent invocation.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, message: str, violation: "ViolationEvent") -> None:
|
|
25
|
+
super().__init__(message)
|
|
26
|
+
self.violation = violation
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Pure finite-state-machine protocol definition.
|
|
2
|
+
|
|
3
|
+
This module has zero LangChain imports. ``ProtocolFSM`` is an explicit
|
|
4
|
+
transition table you build via ``add_transition`` calls; it has no notion
|
|
5
|
+
of recursion, choice, or any other DSL primitive — those compose by
|
|
6
|
+
hand from individual ``Transition`` edges.
|
|
7
|
+
|
|
8
|
+
State is held entirely in ``ProtocolMonitor`` (a sibling module). The FSM
|
|
9
|
+
itself is immutable after the developer finishes adding transitions and
|
|
10
|
+
marking terminal states.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any, Callable
|
|
17
|
+
|
|
18
|
+
from llmcontract.langchain.tool_ref import ToolRef
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ── Per-step contextual data ────────────────────────────────
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class MonitorContext:
|
|
26
|
+
"""Transient data passed to ``guard``/``action`` callables and embedded
|
|
27
|
+
in ``ViolationEvent``. Built fresh by the monitor for each transition
|
|
28
|
+
attempt; do not retain references."""
|
|
29
|
+
|
|
30
|
+
current_state: str
|
|
31
|
+
"""FSM state *before* the transition attempt."""
|
|
32
|
+
|
|
33
|
+
event: str
|
|
34
|
+
"""Full event string, e.g. ``"send:search"``."""
|
|
35
|
+
|
|
36
|
+
tool_ref: ToolRef
|
|
37
|
+
"""The ``ToolRef`` whose call triggered this event."""
|
|
38
|
+
|
|
39
|
+
phase: str
|
|
40
|
+
"""Either ``"send"`` (tool call about to run) or ``"recv"`` (result returned)."""
|
|
41
|
+
|
|
42
|
+
trace: list[str]
|
|
43
|
+
"""Snapshot copy of all events fired so far. Mutating this list does
|
|
44
|
+
not affect the monitor's internal trace."""
|
|
45
|
+
|
|
46
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
47
|
+
"""Per-phase context — tool ``args`` on ``send`` events, tool
|
|
48
|
+
``result`` on ``recv`` events. The library does not interpret it;
|
|
49
|
+
it's threaded through to user-supplied guards and actions."""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ViolationEvent:
|
|
54
|
+
"""Argument passed to the user's ``on_violation`` callback when a
|
|
55
|
+
transition cannot fire (no rule matches, or a guard returned
|
|
56
|
+
``False``)."""
|
|
57
|
+
|
|
58
|
+
current_state: str
|
|
59
|
+
event: str
|
|
60
|
+
expected: list[str]
|
|
61
|
+
"""Event strings that *would* have been valid from ``current_state``.
|
|
62
|
+
May be empty if no transitions are defined from this state."""
|
|
63
|
+
trace: list[str]
|
|
64
|
+
"""All events fired so far, *including* the violating one."""
|
|
65
|
+
tool_ref: ToolRef
|
|
66
|
+
phase: str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
ViolationHandler = Callable[[ViolationEvent], None]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ── Transitions ─────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_VALID_PHASES = ("send", "recv")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class Transition:
|
|
80
|
+
"""One edge in the FSM graph.
|
|
81
|
+
|
|
82
|
+
``event`` is computed from ``phase`` and ``tool.label``; it must not
|
|
83
|
+
be assigned by the developer. ``guard`` (if set) decides whether the
|
|
84
|
+
edge fires; ``action`` (if set) runs as a side effect when the edge
|
|
85
|
+
commits.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
source: str
|
|
89
|
+
tool: ToolRef
|
|
90
|
+
phase: str
|
|
91
|
+
target: str
|
|
92
|
+
guard: Callable[[MonitorContext], bool] | None = None
|
|
93
|
+
action: Callable[[MonitorContext], None] | None = None
|
|
94
|
+
|
|
95
|
+
def __post_init__(self) -> None:
|
|
96
|
+
if self.phase not in _VALID_PHASES:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"Transition.phase must be one of {_VALID_PHASES!r}; "
|
|
99
|
+
f"got {self.phase!r}"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def event(self) -> str:
|
|
104
|
+
"""The lookup key for this transition: ``"<phase>:<tool.label>"``.
|
|
105
|
+
Always recomputed; never stored — keeps correctness if internals
|
|
106
|
+
of ``ToolRef`` ever shift.
|
|
107
|
+
"""
|
|
108
|
+
return f"{self.phase}:{self.tool.label}"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ── The FSM ─────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class ProtocolFSM:
|
|
115
|
+
"""Pure FSM definition — initial state, transition table, terminal
|
|
116
|
+
set. No reference to LangChain or to monitor state.
|
|
117
|
+
|
|
118
|
+
``step()`` is the workhorse: given a current state and an incoming
|
|
119
|
+
event, it consults the table, runs any guard, fires any action, and
|
|
120
|
+
returns ``(next_state, ok)``. Failures (no rule, or guard rejected)
|
|
121
|
+
return ``(state, False)`` — the monitor decides what to do with that.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self, initial: str) -> None:
|
|
125
|
+
self.initial: str = initial
|
|
126
|
+
# Indexed by (source_state, event_string) for O(1) lookup.
|
|
127
|
+
self._transitions: dict[tuple[str, str], Transition] = {}
|
|
128
|
+
self._terminal: set[str] = set()
|
|
129
|
+
|
|
130
|
+
# ── Building the FSM (fluent) ────────────────────────────
|
|
131
|
+
|
|
132
|
+
def add_transition(self, t: Transition) -> "ProtocolFSM":
|
|
133
|
+
"""Register a transition. Returns ``self`` so calls chain.
|
|
134
|
+
|
|
135
|
+
Raises ``ValueError`` if a transition with the same
|
|
136
|
+
``(source, event)`` already exists — duplicates would make the
|
|
137
|
+
FSM non-deterministic, which we forbid by construction.
|
|
138
|
+
"""
|
|
139
|
+
key = (t.source, t.event)
|
|
140
|
+
if key in self._transitions:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"duplicate transition for state={t.source!r}, "
|
|
143
|
+
f"event={t.event!r}"
|
|
144
|
+
)
|
|
145
|
+
self._transitions[key] = t
|
|
146
|
+
return self
|
|
147
|
+
|
|
148
|
+
def mark_terminal(self, *states: str) -> "ProtocolFSM":
|
|
149
|
+
"""Flag one or more states as protocol-complete. Returns ``self``."""
|
|
150
|
+
self._terminal.update(states)
|
|
151
|
+
return self
|
|
152
|
+
|
|
153
|
+
# ── Querying the FSM ─────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
def valid_events(self, state: str) -> list[str]:
|
|
156
|
+
"""All event strings with a registered transition out of ``state``.
|
|
157
|
+
Returns ``[]`` when ``state`` is unknown or has no outgoing edges.
|
|
158
|
+
"""
|
|
159
|
+
return [event for (src, event) in self._transitions if src == state]
|
|
160
|
+
|
|
161
|
+
def step(
|
|
162
|
+
self,
|
|
163
|
+
state: str,
|
|
164
|
+
event: str,
|
|
165
|
+
ctx: MonitorContext,
|
|
166
|
+
) -> tuple[str, bool]:
|
|
167
|
+
"""Try to fire transition ``(state, event)``.
|
|
168
|
+
|
|
169
|
+
On success: run the action (if any), return ``(target, True)``.
|
|
170
|
+
On failure: return ``(state, False)`` *without* mutating anything.
|
|
171
|
+
Calling ``on_violation`` is the monitor's responsibility, not the
|
|
172
|
+
FSM's — keeping the FSM pure makes it trivially unit-testable.
|
|
173
|
+
"""
|
|
174
|
+
transition = self._transitions.get((state, event))
|
|
175
|
+
if transition is None:
|
|
176
|
+
return state, False
|
|
177
|
+
if transition.guard is not None and not transition.guard(ctx):
|
|
178
|
+
return state, False
|
|
179
|
+
if transition.action is not None:
|
|
180
|
+
transition.action(ctx)
|
|
181
|
+
return transition.target, True
|
|
182
|
+
|
|
183
|
+
def is_terminal(self, state: str) -> bool:
|
|
184
|
+
return state in self._terminal
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""LangChain integration: a thin ``AgentMiddleware`` that drives a
|
|
2
|
+
``ProtocolMonitor`` from ``wrap_tool_call`` / ``awrap_tool_call``.
|
|
3
|
+
|
|
4
|
+
This is the only module in the submodule that imports LangChain. The
|
|
5
|
+
FSM, the monitor, and ``ToolRef`` are all framework-agnostic; they can
|
|
6
|
+
be unit-tested in isolation without LangChain installed.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any, Awaitable, Callable
|
|
12
|
+
|
|
13
|
+
from llmcontract.langchain.monitor import ProtocolMonitor
|
|
14
|
+
from llmcontract.langchain.tool_ref import ToolRef
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ProtocolEnforcerMiddleware:
|
|
18
|
+
"""LangChain ``AgentMiddleware`` that fires ``send`` and ``recv``
|
|
19
|
+
transitions on the wrapped monitor for every registered tool call.
|
|
20
|
+
|
|
21
|
+
Construction does the import-and-subclass dance against LangChain's
|
|
22
|
+
``AgentMiddleware`` lazily so the rest of the package stays
|
|
23
|
+
importable without LangChain. The actual middleware object exposed
|
|
24
|
+
via ``.middleware`` is a real ``AgentMiddleware`` subclass instance
|
|
25
|
+
that you pass to ``create_agent(middleware=[...])``.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
monitor: ProtocolMonitor,
|
|
31
|
+
tool_refs: list[ToolRef],
|
|
32
|
+
) -> None:
|
|
33
|
+
self._monitor = monitor
|
|
34
|
+
# The only place tool *name strings* surface inside the library —
|
|
35
|
+
# we look up by the name LangChain hands us in the request, so
|
|
36
|
+
# the developer never has to write or see a string.
|
|
37
|
+
self._ref_by_label: dict[str, ToolRef] = {t.label: t for t in tool_refs}
|
|
38
|
+
self._impl = self._build_impl()
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def middleware(self) -> Any:
|
|
42
|
+
"""The ``AgentMiddleware`` subclass instance to pass to
|
|
43
|
+
``create_agent(middleware=[...])``."""
|
|
44
|
+
return self._impl
|
|
45
|
+
|
|
46
|
+
# ── Build the real AgentMiddleware subclass instance ────
|
|
47
|
+
|
|
48
|
+
def _build_impl(self) -> Any:
|
|
49
|
+
# Imports happen here, not at module load — keeps the rest of
|
|
50
|
+
# the langchain submodule usable in environments without
|
|
51
|
+
# langchain installed.
|
|
52
|
+
from langchain.agents.middleware import AgentMiddleware
|
|
53
|
+
|
|
54
|
+
outer = self # captured by closure into the methods below
|
|
55
|
+
|
|
56
|
+
class _Impl(AgentMiddleware):
|
|
57
|
+
def wrap_tool_call(self, request, handler): # type: ignore[override]
|
|
58
|
+
return outer._dispatch_sync(request, handler)
|
|
59
|
+
|
|
60
|
+
async def awrap_tool_call(self, request, handler): # type: ignore[override]
|
|
61
|
+
return await outer._dispatch_async(request, handler)
|
|
62
|
+
|
|
63
|
+
return _Impl()
|
|
64
|
+
|
|
65
|
+
# ── Sync and async dispatch share one logical body ──────
|
|
66
|
+
|
|
67
|
+
def _dispatch_sync(
|
|
68
|
+
self,
|
|
69
|
+
request: Any,
|
|
70
|
+
handler: Callable[[Any], Any],
|
|
71
|
+
) -> Any:
|
|
72
|
+
name = request.tool_call["name"]
|
|
73
|
+
tool_ref = self._ref_by_label.get(name)
|
|
74
|
+
if tool_ref is None:
|
|
75
|
+
# Tool isn't registered in this protocol — pass through
|
|
76
|
+
# unmonitored. Partial protocol coverage is a valid use case
|
|
77
|
+
# (e.g., monitoring only the booking subset of a larger tool
|
|
78
|
+
# surface).
|
|
79
|
+
return handler(request)
|
|
80
|
+
|
|
81
|
+
args = request.tool_call.get("args", {}) or {}
|
|
82
|
+
self._monitor.transition(tool_ref, phase="send", metadata={"args": args})
|
|
83
|
+
|
|
84
|
+
# Tool exception path: let it propagate. We deliberately do NOT
|
|
85
|
+
# fire the recv transition — the protocol stays in the
|
|
86
|
+
# post-send state, mirroring reality (the tool didn't actually
|
|
87
|
+
# produce a result). A tool exception is *not* a protocol
|
|
88
|
+
# violation; it's an orthogonal failure mode and the user's
|
|
89
|
+
# outer error handling owns it.
|
|
90
|
+
result = handler(request)
|
|
91
|
+
|
|
92
|
+
self._monitor.transition(tool_ref, phase="recv", metadata={"result": result})
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
async def _dispatch_async(
|
|
96
|
+
self,
|
|
97
|
+
request: Any,
|
|
98
|
+
handler: Callable[[Any], Awaitable[Any]],
|
|
99
|
+
) -> Any:
|
|
100
|
+
name = request.tool_call["name"]
|
|
101
|
+
tool_ref = self._ref_by_label.get(name)
|
|
102
|
+
if tool_ref is None:
|
|
103
|
+
return await handler(request)
|
|
104
|
+
|
|
105
|
+
args = request.tool_call.get("args", {}) or {}
|
|
106
|
+
self._monitor.transition(tool_ref, phase="send", metadata={"args": args})
|
|
107
|
+
|
|
108
|
+
result = await handler(request)
|
|
109
|
+
|
|
110
|
+
self._monitor.transition(tool_ref, phase="recv", metadata={"result": result})
|
|
111
|
+
return result
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Stateful runner that drives a ``ProtocolFSM`` through one chain
|
|
2
|
+
execution.
|
|
3
|
+
|
|
4
|
+
One ``ProtocolMonitor`` instance per agent invocation. Holds the current
|
|
5
|
+
state and the trace of every event that has fired so far. Calls the
|
|
6
|
+
user-supplied ``on_violation`` handler whenever the FSM rejects a
|
|
7
|
+
transition.
|
|
8
|
+
|
|
9
|
+
This module does not import LangChain.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from llmcontract.langchain.fsm import (
|
|
17
|
+
MonitorContext,
|
|
18
|
+
ProtocolFSM,
|
|
19
|
+
ViolationEvent,
|
|
20
|
+
ViolationHandler,
|
|
21
|
+
)
|
|
22
|
+
from llmcontract.langchain.tool_ref import ToolRef
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ProtocolMonitor:
|
|
26
|
+
"""Owns the mutable state for one chain execution.
|
|
27
|
+
|
|
28
|
+
Construct once per ``agent.invoke`` (or call ``reset()`` between
|
|
29
|
+
invocations). The middleware calls ``transition()`` on every tool
|
|
30
|
+
call — once with ``phase="send"`` before the tool runs, once with
|
|
31
|
+
``phase="recv"`` after it returns successfully.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
fsm: ProtocolFSM,
|
|
37
|
+
on_violation: ViolationHandler,
|
|
38
|
+
initial_state: str | None = None,
|
|
39
|
+
) -> None:
|
|
40
|
+
self._fsm = fsm
|
|
41
|
+
self._on_violation = on_violation
|
|
42
|
+
self._initial = initial_state if initial_state is not None else fsm.initial
|
|
43
|
+
self._state: str = self._initial
|
|
44
|
+
self._trace: list[str] = []
|
|
45
|
+
|
|
46
|
+
# ── Read-only views ─────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def state(self) -> str:
|
|
50
|
+
return self._state
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def trace(self) -> list[str]:
|
|
54
|
+
# Snapshot copy — callers must not mutate the monitor's history.
|
|
55
|
+
return list(self._trace)
|
|
56
|
+
|
|
57
|
+
# ── Driving the FSM ─────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
def transition(
|
|
60
|
+
self,
|
|
61
|
+
tool_ref: ToolRef,
|
|
62
|
+
phase: str,
|
|
63
|
+
metadata: dict[str, Any] | None = None,
|
|
64
|
+
) -> bool:
|
|
65
|
+
"""Attempt one FSM step.
|
|
66
|
+
|
|
67
|
+
Returns ``True`` on success (state advanced), ``False`` on
|
|
68
|
+
violation (state unchanged, ``on_violation`` invoked).
|
|
69
|
+
"""
|
|
70
|
+
event = f"{phase}:{tool_ref.label}"
|
|
71
|
+
# The trace records the *attempted* event regardless of outcome,
|
|
72
|
+
# so violation handlers see the full history including the
|
|
73
|
+
# violating step. ``ViolationEvent.trace`` and
|
|
74
|
+
# ``MonitorContext.trace`` are both snapshot copies, never
|
|
75
|
+
# references to this list.
|
|
76
|
+
self._trace.append(event)
|
|
77
|
+
|
|
78
|
+
ctx = MonitorContext(
|
|
79
|
+
current_state=self._state,
|
|
80
|
+
event=event,
|
|
81
|
+
tool_ref=tool_ref,
|
|
82
|
+
phase=phase,
|
|
83
|
+
trace=list(self._trace),
|
|
84
|
+
metadata=dict(metadata) if metadata else {},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
next_state, ok = self._fsm.step(self._state, event, ctx)
|
|
88
|
+
if ok:
|
|
89
|
+
self._state = next_state
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
self._on_violation(
|
|
93
|
+
ViolationEvent(
|
|
94
|
+
current_state=self._state,
|
|
95
|
+
event=event,
|
|
96
|
+
expected=self._fsm.valid_events(self._state),
|
|
97
|
+
trace=list(self._trace),
|
|
98
|
+
tool_ref=tool_ref,
|
|
99
|
+
phase=phase,
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def reset(self) -> None:
|
|
105
|
+
"""Restore the monitor to its initial state and clear the trace.
|
|
106
|
+
|
|
107
|
+
Use this if you want to reuse one ``ProtocolMonitor`` instance
|
|
108
|
+
across multiple ``agent.invoke`` calls. The cleaner pattern is
|
|
109
|
+
to construct a fresh monitor per invocation, but reset is
|
|
110
|
+
provided for callers who pool resources.
|
|
111
|
+
"""
|
|
112
|
+
self._state = self._initial
|
|
113
|
+
self._trace.clear()
|
|
114
|
+
|
|
115
|
+
def is_complete(self) -> bool:
|
|
116
|
+
"""Whether the monitor's current state is terminal."""
|
|
117
|
+
return self._fsm.is_terminal(self._state)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Stable, hashable references to LangChain tools.
|
|
2
|
+
|
|
3
|
+
A ``ToolRef`` wraps a ``BaseTool`` instance, a ``@tool``-decorated callable,
|
|
4
|
+
or any plain callable, and exposes a single read-only string label derived
|
|
5
|
+
once at construction. Two ``ToolRef`` objects are equal (and share a hash)
|
|
6
|
+
iff their labels match — which lets developers refer to the same tool from
|
|
7
|
+
multiple FSM transitions without juggling identity.
|
|
8
|
+
|
|
9
|
+
Crucially, this module does **not** import LangChain. The label resolution
|
|
10
|
+
walks duck-typed attributes (``.name`` first, ``.__name__`` second) so the
|
|
11
|
+
core FSM/monitor stays importable in environments without LangChain
|
|
12
|
+
installed. The actual ``BaseTool`` import only happens inside the
|
|
13
|
+
middleware module.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any, Callable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ToolRef:
|
|
22
|
+
"""Stable label-bearing reference to a LangChain tool or callable.
|
|
23
|
+
|
|
24
|
+
Label resolution order, applied once at construction:
|
|
25
|
+
|
|
26
|
+
1. If ``tool`` has a ``.name`` attribute that is a non-empty string,
|
|
27
|
+
use it. (Covers ``BaseTool`` instances and ``@tool``-decorated
|
|
28
|
+
callables, which expose ``.name`` on the resulting
|
|
29
|
+
``StructuredTool``.)
|
|
30
|
+
2. Otherwise, if ``tool`` is callable and has ``__name__``, use that.
|
|
31
|
+
3. Otherwise, raise ``TypeError``.
|
|
32
|
+
|
|
33
|
+
The label is read-only. Comparing or hashing two ``ToolRef`` objects
|
|
34
|
+
uses the label only — wrapping different callables that happen to
|
|
35
|
+
share a name yields equal refs.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
__slots__ = ("_label", "_tool")
|
|
39
|
+
|
|
40
|
+
def __init__(self, tool: Any) -> None:
|
|
41
|
+
# `.name` first — covers BaseTool subclasses and @tool wrappers
|
|
42
|
+
# (StructuredTool exposes .name) without importing langchain.
|
|
43
|
+
name = getattr(tool, "name", None)
|
|
44
|
+
if isinstance(name, str) and name:
|
|
45
|
+
label = name
|
|
46
|
+
elif callable(tool) and getattr(tool, "__name__", None):
|
|
47
|
+
label = tool.__name__
|
|
48
|
+
else:
|
|
49
|
+
raise TypeError(
|
|
50
|
+
f"ToolRef expects a BaseTool, @tool callable, or named "
|
|
51
|
+
f"callable; got {type(tool).__name__}"
|
|
52
|
+
)
|
|
53
|
+
# __slots__ disables __dict__; assign through object.__setattr__
|
|
54
|
+
# so future attempts to overwrite (label/tool are read-only) fail.
|
|
55
|
+
object.__setattr__(self, "_label", label)
|
|
56
|
+
object.__setattr__(self, "_tool", tool)
|
|
57
|
+
|
|
58
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
59
|
+
raise AttributeError(f"ToolRef is immutable; cannot set {name!r}")
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def label(self) -> str:
|
|
63
|
+
return self._label
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def tool(self) -> Any:
|
|
67
|
+
return self._tool
|
|
68
|
+
|
|
69
|
+
def __repr__(self) -> str:
|
|
70
|
+
return f"ToolRef({self._label!r})"
|
|
71
|
+
|
|
72
|
+
def __eq__(self, other: object) -> bool:
|
|
73
|
+
if not isinstance(other, ToolRef):
|
|
74
|
+
return NotImplemented
|
|
75
|
+
return self._label == other._label
|
|
76
|
+
|
|
77
|
+
def __hash__(self) -> int:
|
|
78
|
+
return hash(("ToolRef", self._label))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def ref(tool: Any) -> ToolRef:
|
|
82
|
+
"""Convenience shorthand for ``ToolRef(tool)``.
|
|
83
|
+
|
|
84
|
+
This is the primary API developers reach for — they always pass the
|
|
85
|
+
tool function, never a name string.
|
|
86
|
+
"""
|
|
87
|
+
return ToolRef(tool)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmsessioncontract
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Runtime monitor for LLM agent interaction protocols based on session type theory
|
|
5
5
|
Author-email: Chris Bartolo Burlo <chris@mizziburlo.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -23,8 +23,12 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
License-File: LICENSE
|
|
24
24
|
Provides-Extra: langfuse
|
|
25
25
|
Requires-Dist: langfuse>=3.0.0; extra == "langfuse"
|
|
26
|
+
Provides-Extra: langchain
|
|
27
|
+
Requires-Dist: langchain>=1.0.0; extra == "langchain"
|
|
28
|
+
Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
|
|
26
29
|
Provides-Extra: dev
|
|
27
30
|
Requires-Dist: pytest; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
28
32
|
Dynamic: license-file
|
|
29
33
|
|
|
30
34
|
# llmcontract
|
|
@@ -230,6 +234,80 @@ while True:
|
|
|
230
234
|
messages.append(tool_result_msg(tc.id, result))
|
|
231
235
|
```
|
|
232
236
|
|
|
237
|
+
## LangChain Integration (`llmcontract.langchain`, 0.3.0+)
|
|
238
|
+
|
|
239
|
+
A focused FSM-as-data API for users who want to wire protocol monitoring
|
|
240
|
+
into LangChain agents without touching the DSL parser. Tool references
|
|
241
|
+
are real Python callables, transitions are explicit objects with
|
|
242
|
+
optional guards and actions, and violation handling is fully
|
|
243
|
+
user-controlled.
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
pip install llmsessioncontract[langchain]
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from langchain_core.tools import tool
|
|
251
|
+
from langchain.agents import create_agent
|
|
252
|
+
from llmcontract.langchain import (
|
|
253
|
+
ProtocolFSM, Transition, ProtocolMonitor,
|
|
254
|
+
ProtocolEnforcerMiddleware, ViolationEvent,
|
|
255
|
+
ProtocolViolationError, ref,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
@tool
|
|
259
|
+
def search(query: str) -> str:
|
|
260
|
+
"""Search for available flights."""
|
|
261
|
+
return f"Results for: {query}"
|
|
262
|
+
|
|
263
|
+
@tool
|
|
264
|
+
def book(result: str) -> str:
|
|
265
|
+
"""Book a selected flight."""
|
|
266
|
+
return f"Booked: {result}"
|
|
267
|
+
|
|
268
|
+
search_ref = ref(search)
|
|
269
|
+
book_ref = ref(book)
|
|
270
|
+
|
|
271
|
+
fsm = (
|
|
272
|
+
ProtocolFSM(initial="idle")
|
|
273
|
+
.add_transition(Transition(source="idle", tool=search_ref, phase="send", target="searching"))
|
|
274
|
+
.add_transition(Transition(source="searching", tool=search_ref, phase="recv", target="results"))
|
|
275
|
+
.add_transition(Transition(source="results", tool=book_ref, phase="send", target="booking",
|
|
276
|
+
guard=lambda ctx: bool(ctx.metadata.get("args", {}))))
|
|
277
|
+
.add_transition(Transition(source="booking", tool=book_ref, phase="recv", target="done"))
|
|
278
|
+
.mark_terminal("done")
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def on_violation(v: ViolationEvent) -> None:
|
|
282
|
+
raise ProtocolViolationError(f"Illegal {v.phase}:{v.tool_ref.label} from {v.current_state!r}", violation=v)
|
|
283
|
+
|
|
284
|
+
monitor = ProtocolMonitor(fsm=fsm, on_violation=on_violation)
|
|
285
|
+
middleware = ProtocolEnforcerMiddleware(monitor=monitor, tool_refs=[search_ref, book_ref]).middleware
|
|
286
|
+
|
|
287
|
+
agent = create_agent(model=..., tools=[search, book], middleware=[middleware])
|
|
288
|
+
agent.invoke({"messages": [("user", "Book me a flight to Rome.")]})
|
|
289
|
+
|
|
290
|
+
print(monitor.state) # → "done"
|
|
291
|
+
print(monitor.is_complete()) # → True
|
|
292
|
+
print(monitor.trace) # → ["send:search", "recv:search", "send:book", "recv:book"]
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
When to pick this over the DSL `Monitor`:
|
|
296
|
+
|
|
297
|
+
- You're already in a LangChain stack and want a drop-in `AgentMiddleware`
|
|
298
|
+
- You need per-transition guards and actions (e.g., audit logs, business rules)
|
|
299
|
+
- You want enforcement (block tool calls), not just observation
|
|
300
|
+
- You don't need recursion / choice / `Unrecognized` from the DSL
|
|
301
|
+
|
|
302
|
+
When to stick with the DSL `Monitor`:
|
|
303
|
+
|
|
304
|
+
- You want to write protocols as concise strings (`!Search.?Result.end`)
|
|
305
|
+
- You need recursion or compositional choice
|
|
306
|
+
- You're outside LangChain (Anthropic SDK, OpenAI SDK, custom loop)
|
|
307
|
+
- You want first-class natural-language ambiguity via `Unrecognized`
|
|
308
|
+
|
|
309
|
+
Worked example: [`examples/langchain_booking/booking_agent_submodule.py`](examples/langchain_booking/booking_agent_submodule.py).
|
|
310
|
+
|
|
233
311
|
## Langfuse Integration
|
|
234
312
|
|
|
235
313
|
Track protocol compliance in [Langfuse](https://langfuse.com) — every send/receive is recorded as a guardrail observation with a pass/fail score.
|
|
@@ -280,7 +358,8 @@ The skill validates each draft DSL against `llmcontract`'s parser, so anything i
|
|
|
280
358
|
|
|
281
359
|
## Case Studies
|
|
282
360
|
|
|
283
|
-
- **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
|
|
361
|
+
- **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — *user ↔ agent layer.* Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
|
|
362
|
+
- **[`llmcontract-playwright-mcp`](https://github.com/chrisbartoloburlo/llmcontract-playwright-mcp)** — *agent ↔ tool layer.* 90-trajectory sweep across Claude Haiku 4.5 / Sonnet 4.6 / Opus 4.7 driving [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp), checked against two invariants from the server's documented usage. Headline: 9% violate `snap-before-interact`, 29% violate `stay-on-snapshot-refs` — and the failure modes scale opposite directions with model capability (Haiku snapshots religiously but ignores the snapshot 57% of the time; Opus skips the snapshot 13% of the time but follows through cleanly when it commits).
|
|
284
363
|
|
|
285
364
|
## Research
|
|
286
365
|
|
{llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/SOURCES.txt
RENAMED
|
@@ -12,6 +12,12 @@ llmcontract/integration/exceptions.py
|
|
|
12
12
|
llmcontract/integration/langfuse.py
|
|
13
13
|
llmcontract/integration/middleware.py
|
|
14
14
|
llmcontract/integration/types.py
|
|
15
|
+
llmcontract/langchain/__init__.py
|
|
16
|
+
llmcontract/langchain/exceptions.py
|
|
17
|
+
llmcontract/langchain/fsm.py
|
|
18
|
+
llmcontract/langchain/middleware.py
|
|
19
|
+
llmcontract/langchain/monitor.py
|
|
20
|
+
llmcontract/langchain/tool_ref.py
|
|
15
21
|
llmcontract/monitor/__init__.py
|
|
16
22
|
llmcontract/monitor/automaton.py
|
|
17
23
|
llmcontract/monitor/monitor.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "llmsessioncontract"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Runtime monitor for LLM agent interaction protocols based on session type theory"
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
10
|
license = "MIT"
|
|
@@ -33,7 +33,8 @@ Issues = "https://github.com/chrisbartoloburlo/llmcontract/issues"
|
|
|
33
33
|
|
|
34
34
|
[project.optional-dependencies]
|
|
35
35
|
langfuse = ["langfuse>=3.0.0"]
|
|
36
|
-
|
|
36
|
+
langchain = ["langchain>=1.0.0", "langchain-core>=0.3.0"]
|
|
37
|
+
dev = ["pytest", "pytest-asyncio"]
|
|
37
38
|
|
|
38
39
|
[tool.setuptools.packages.find]
|
|
39
40
|
include = ["llmcontract*"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|