llmsessioncontract 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {llmsessioncontract-0.2.2/llmsessioncontract.egg-info → llmsessioncontract-0.3.0}/PKG-INFO +81 -2
  2. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/README.md +76 -1
  3. llmsessioncontract-0.3.0/llmcontract/langchain/__init__.py +43 -0
  4. llmsessioncontract-0.3.0/llmcontract/langchain/exceptions.py +26 -0
  5. llmsessioncontract-0.3.0/llmcontract/langchain/fsm.py +184 -0
  6. llmsessioncontract-0.3.0/llmcontract/langchain/middleware.py +111 -0
  7. llmsessioncontract-0.3.0/llmcontract/langchain/monitor.py +117 -0
  8. llmsessioncontract-0.3.0/llmcontract/langchain/tool_ref.py +87 -0
  9. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0/llmsessioncontract.egg-info}/PKG-INFO +81 -2
  10. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/SOURCES.txt +6 -0
  11. llmsessioncontract-0.3.0/llmsessioncontract.egg-info/requires.txt +11 -0
  12. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/pyproject.toml +3 -2
  13. llmsessioncontract-0.2.2/llmsessioncontract.egg-info/requires.txt +0 -6
  14. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/LICENSE +0 -0
  15. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/__init__.py +0 -0
  16. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/dsl/__init__.py +0 -0
  17. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/dsl/ast.py +0 -0
  18. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/dsl/parser.py +0 -0
  19. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/__init__.py +0 -0
  20. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/client.py +0 -0
  21. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/exceptions.py +0 -0
  22. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/langfuse.py +0 -0
  23. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/middleware.py +0 -0
  24. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/integration/types.py +0 -0
  25. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/monitor/__init__.py +0 -0
  26. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/monitor/automaton.py +0 -0
  27. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/monitor/monitor.py +0 -0
  28. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmcontract/py.typed +0 -0
  29. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/dependency_links.txt +0 -0
  30. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/llmsessioncontract.egg-info/top_level.txt +0 -0
  31. {llmsessioncontract-0.2.2 → llmsessioncontract-0.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmsessioncontract
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Runtime monitor for LLM agent interaction protocols based on session type theory
5
5
  Author-email: Chris Bartolo Burlo <chris@mizziburlo.com>
6
6
  License-Expression: MIT
@@ -23,8 +23,12 @@ Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
24
  Provides-Extra: langfuse
25
25
  Requires-Dist: langfuse>=3.0.0; extra == "langfuse"
26
+ Provides-Extra: langchain
27
+ Requires-Dist: langchain>=1.0.0; extra == "langchain"
28
+ Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
26
29
  Provides-Extra: dev
27
30
  Requires-Dist: pytest; extra == "dev"
31
+ Requires-Dist: pytest-asyncio; extra == "dev"
28
32
  Dynamic: license-file
29
33
 
30
34
  # llmcontract
@@ -230,6 +234,80 @@ while True:
230
234
  messages.append(tool_result_msg(tc.id, result))
231
235
  ```
232
236
 
237
+ ## LangChain Integration (`llmcontract.langchain`, 0.3.0+)
238
+
239
+ A focused FSM-as-data API for users who want to wire protocol monitoring
240
+ into LangChain agents without touching the DSL parser. Tool references
241
+ are real Python callables, transitions are explicit objects with
242
+ optional guards and actions, and violation handling is fully
243
+ user-controlled.
244
+
245
+ ```bash
246
+ pip install llmsessioncontract[langchain]
247
+ ```
248
+
249
+ ```python
250
+ from langchain_core.tools import tool
251
+ from langchain.agents import create_agent
252
+ from llmcontract.langchain import (
253
+ ProtocolFSM, Transition, ProtocolMonitor,
254
+ ProtocolEnforcerMiddleware, ViolationEvent,
255
+ ProtocolViolationError, ref,
256
+ )
257
+
258
+ @tool
259
+ def search(query: str) -> str:
260
+ """Search for available flights."""
261
+ return f"Results for: {query}"
262
+
263
+ @tool
264
+ def book(result: str) -> str:
265
+ """Book a selected flight."""
266
+ return f"Booked: {result}"
267
+
268
+ search_ref = ref(search)
269
+ book_ref = ref(book)
270
+
271
+ fsm = (
272
+ ProtocolFSM(initial="idle")
273
+ .add_transition(Transition(source="idle", tool=search_ref, phase="send", target="searching"))
274
+ .add_transition(Transition(source="searching", tool=search_ref, phase="recv", target="results"))
275
+ .add_transition(Transition(source="results", tool=book_ref, phase="send", target="booking",
276
+ guard=lambda ctx: bool(ctx.metadata.get("args", {}))))
277
+ .add_transition(Transition(source="booking", tool=book_ref, phase="recv", target="done"))
278
+ .mark_terminal("done")
279
+ )
280
+
281
+ def on_violation(v: ViolationEvent) -> None:
282
+ raise ProtocolViolationError(f"Illegal {v.phase}:{v.tool_ref.label} from {v.current_state!r}", violation=v)
283
+
284
+ monitor = ProtocolMonitor(fsm=fsm, on_violation=on_violation)
285
+ middleware = ProtocolEnforcerMiddleware(monitor=monitor, tool_refs=[search_ref, book_ref]).middleware
286
+
287
+ agent = create_agent(model=..., tools=[search, book], middleware=[middleware])
288
+ agent.invoke({"messages": [("user", "Book me a flight to Rome.")]})
289
+
290
+ print(monitor.state) # → "done"
291
+ print(monitor.is_complete()) # → True
292
+ print(monitor.trace) # → ["send:search", "recv:search", "send:book", "recv:book"]
293
+ ```
294
+
295
+ When to pick this over the DSL `Monitor`:
296
+
297
+ - You're already in a LangChain stack and want a drop-in `AgentMiddleware`
298
+ - You need per-transition guards and actions (e.g., audit logs, business rules)
299
+ - You want enforcement (block tool calls), not just observation
300
+ - You don't need recursion / choice / `Unrecognized` from the DSL
301
+
302
+ When to stick with the DSL `Monitor`:
303
+
304
+ - You want to write protocols as concise strings (`!Search.?Result.end`)
305
+ - You need recursion or compositional choice
306
+ - You're outside LangChain (Anthropic SDK, OpenAI SDK, custom loop)
307
+ - You want first-class natural-language ambiguity via `Unrecognized`
308
+
309
+ Worked example: [`examples/langchain_booking/booking_agent_submodule.py`](examples/langchain_booking/booking_agent_submodule.py).
310
+
233
311
  ## Langfuse Integration
234
312
 
235
313
  Track protocol compliance in [Langfuse](https://langfuse.com) — every send/receive is recorded as a guardrail observation with a pass/fail score.
@@ -280,7 +358,8 @@ The skill validates each draft DSL against `llmcontract`'s parser, so anything i
280
358
 
281
359
  ## Case Studies
282
360
 
283
- - **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
361
+ - **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — *user ↔ agent layer.* Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
362
+ - **[`llmcontract-playwright-mcp`](https://github.com/chrisbartoloburlo/llmcontract-playwright-mcp)** — *agent ↔ tool layer.* 90-trajectory sweep across Claude Haiku 4.5 / Sonnet 4.6 / Opus 4.7 driving [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp), checked against two invariants from the server's documented usage. Headline: 9% violate `snap-before-interact`, 29% violate `stay-on-snapshot-refs` — and the failure modes scale opposite directions with model capability (Haiku snapshots religiously but ignores the snapshot 57% of the time; Opus skips the snapshot 13% of the time but follows through cleanly when it commits).
284
363
 
285
364
  ## Research
286
365
 
@@ -201,6 +201,80 @@ while True:
201
201
  messages.append(tool_result_msg(tc.id, result))
202
202
  ```
203
203
 
204
+ ## LangChain Integration (`llmcontract.langchain`, 0.3.0+)
205
+
206
+ A focused FSM-as-data API for users who want to wire protocol monitoring
207
+ into LangChain agents without touching the DSL parser. Tool references
208
+ are real Python callables, transitions are explicit objects with
209
+ optional guards and actions, and violation handling is fully
210
+ user-controlled.
211
+
212
+ ```bash
213
+ pip install llmsessioncontract[langchain]
214
+ ```
215
+
216
+ ```python
217
+ from langchain_core.tools import tool
218
+ from langchain.agents import create_agent
219
+ from llmcontract.langchain import (
220
+ ProtocolFSM, Transition, ProtocolMonitor,
221
+ ProtocolEnforcerMiddleware, ViolationEvent,
222
+ ProtocolViolationError, ref,
223
+ )
224
+
225
+ @tool
226
+ def search(query: str) -> str:
227
+ """Search for available flights."""
228
+ return f"Results for: {query}"
229
+
230
+ @tool
231
+ def book(result: str) -> str:
232
+ """Book a selected flight."""
233
+ return f"Booked: {result}"
234
+
235
+ search_ref = ref(search)
236
+ book_ref = ref(book)
237
+
238
+ fsm = (
239
+ ProtocolFSM(initial="idle")
240
+ .add_transition(Transition(source="idle", tool=search_ref, phase="send", target="searching"))
241
+ .add_transition(Transition(source="searching", tool=search_ref, phase="recv", target="results"))
242
+ .add_transition(Transition(source="results", tool=book_ref, phase="send", target="booking",
243
+ guard=lambda ctx: bool(ctx.metadata.get("args", {}))))
244
+ .add_transition(Transition(source="booking", tool=book_ref, phase="recv", target="done"))
245
+ .mark_terminal("done")
246
+ )
247
+
248
+ def on_violation(v: ViolationEvent) -> None:
249
+ raise ProtocolViolationError(f"Illegal {v.phase}:{v.tool_ref.label} from {v.current_state!r}", violation=v)
250
+
251
+ monitor = ProtocolMonitor(fsm=fsm, on_violation=on_violation)
252
+ middleware = ProtocolEnforcerMiddleware(monitor=monitor, tool_refs=[search_ref, book_ref]).middleware
253
+
254
+ agent = create_agent(model=..., tools=[search, book], middleware=[middleware])
255
+ agent.invoke({"messages": [("user", "Book me a flight to Rome.")]})
256
+
257
+ print(monitor.state) # → "done"
258
+ print(monitor.is_complete()) # → True
259
+ print(monitor.trace) # → ["send:search", "recv:search", "send:book", "recv:book"]
260
+ ```
261
+
262
+ When to pick this over the DSL `Monitor`:
263
+
264
+ - You're already in a LangChain stack and want a drop-in `AgentMiddleware`
265
+ - You need per-transition guards and actions (e.g., audit logs, business rules)
266
+ - You want enforcement (block tool calls), not just observation
267
+ - You don't need recursion / choice / `Unrecognized` from the DSL
268
+
269
+ When to stick with the DSL `Monitor`:
270
+
271
+ - You want to write protocols as concise strings (`!Search.?Result.end`)
272
+ - You need recursion or compositional choice
273
+ - You're outside LangChain (Anthropic SDK, OpenAI SDK, custom loop)
274
+ - You want first-class natural-language ambiguity via `Unrecognized`
275
+
276
+ Worked example: [`examples/langchain_booking/booking_agent_submodule.py`](examples/langchain_booking/booking_agent_submodule.py).
277
+
204
278
  ## Langfuse Integration
205
279
 
206
280
  Track protocol compliance in [Langfuse](https://langfuse.com) — every send/receive is recorded as a guardrail observation with a pass/fail score.
@@ -251,7 +325,8 @@ The skill validates each draft DSL against `llmcontract`'s parser, so anything i
251
325
 
252
326
  ## Case Studies
253
327
 
254
- - **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
328
+ - **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — *user ↔ agent layer.* Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
329
+ - **[`llmcontract-playwright-mcp`](https://github.com/chrisbartoloburlo/llmcontract-playwright-mcp)** — *agent ↔ tool layer.* 90-trajectory sweep across Claude Haiku 4.5 / Sonnet 4.6 / Opus 4.7 driving [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp), checked against two invariants from the server's documented usage. Headline: 9% violate `snap-before-interact`, 29% violate `stay-on-snapshot-refs` — and the failure modes scale opposite directions with model capability (Haiku snapshots religiously but ignores the snapshot 57% of the time; Opus skips the snapshot 13% of the time but follows through cleanly when it commits).
255
330
 
256
331
  ## Research
257
332
 
@@ -0,0 +1,43 @@
1
+ """LangChain-native protocol enforcement for ``llmcontract``.
2
+
3
+ A focused, FSM-as-data API for users who want to wire protocol monitoring
4
+ into LangChain agents with no DSL parsing and no magic strings. Tool
5
+ references are real Python callables; transitions are explicit objects
6
+ with optional guards and actions; violation handling is fully
7
+ user-controlled.
8
+
9
+ The full design and rationale live at
10
+ https://llmcontract.dev/findings/ and in the project README.
11
+
12
+ Submodules:
13
+ tool_ref ToolRef, ref()
14
+ fsm ProtocolFSM, Transition, MonitorContext, ViolationEvent
15
+ monitor ProtocolMonitor
16
+ middleware ProtocolEnforcerMiddleware
17
+ exceptions ProtocolViolationError
18
+ """
19
+
20
+ from llmcontract.langchain.exceptions import ProtocolViolationError
21
+ from llmcontract.langchain.fsm import (
22
+ MonitorContext,
23
+ ProtocolFSM,
24
+ Transition,
25
+ ViolationEvent,
26
+ ViolationHandler,
27
+ )
28
+ from llmcontract.langchain.middleware import ProtocolEnforcerMiddleware
29
+ from llmcontract.langchain.monitor import ProtocolMonitor
30
+ from llmcontract.langchain.tool_ref import ToolRef, ref
31
+
32
+ __all__ = [
33
+ "ToolRef",
34
+ "ref",
35
+ "ProtocolFSM",
36
+ "Transition",
37
+ "MonitorContext",
38
+ "ViolationEvent",
39
+ "ViolationHandler",
40
+ "ProtocolMonitor",
41
+ "ProtocolEnforcerMiddleware",
42
+ "ProtocolViolationError",
43
+ ]
@@ -0,0 +1,26 @@
1
+ """Exception type for protocol violations.
2
+
3
+ The library never raises this itself — it is provided as a convenience for
4
+ ``on_violation`` callbacks that want to halt execution by raising. The
5
+ runtime decision of *what* to do on a violation belongs to the user, not
6
+ to the library.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from llmcontract.langchain.fsm import ViolationEvent
15
+
16
+
17
+ class ProtocolViolationError(RuntimeError):
18
+ """Convenience exception for use in ``on_violation`` callbacks.
19
+
20
+ The library never raises this itself. Construct and raise it inside
21
+ your handler if you want a violation to abort the agent invocation.
22
+ """
23
+
24
+ def __init__(self, message: str, violation: "ViolationEvent") -> None:
25
+ super().__init__(message)
26
+ self.violation = violation
@@ -0,0 +1,184 @@
1
+ """Pure finite-state-machine protocol definition.
2
+
3
+ This module has zero LangChain imports. ``ProtocolFSM`` is an explicit
4
+ transition table you build via ``add_transition`` calls; it has no notion
5
+ of recursion, choice, or any other DSL primitive — those compose by
6
+ hand from individual ``Transition`` edges.
7
+
8
+ State is held entirely in ``ProtocolMonitor`` (a sibling module). The FSM
9
+ itself is immutable after the developer finishes adding transitions and
10
+ marking terminal states.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, Callable
17
+
18
+ from llmcontract.langchain.tool_ref import ToolRef
19
+
20
+
21
+ # ── Per-step contextual data ────────────────────────────────
22
+
23
+
24
+ @dataclass
25
+ class MonitorContext:
26
+ """Transient data passed to ``guard``/``action`` callables and embedded
27
+ in ``ViolationEvent``. Built fresh by the monitor for each transition
28
+ attempt; do not retain references."""
29
+
30
+ current_state: str
31
+ """FSM state *before* the transition attempt."""
32
+
33
+ event: str
34
+ """Full event string, e.g. ``"send:search"``."""
35
+
36
+ tool_ref: ToolRef
37
+ """The ``ToolRef`` whose call triggered this event."""
38
+
39
+ phase: str
40
+ """Either ``"send"`` (tool call about to run) or ``"recv"`` (result returned)."""
41
+
42
+ trace: list[str]
43
+ """Snapshot copy of all events fired so far. Mutating this list does
44
+ not affect the monitor's internal trace."""
45
+
46
+ metadata: dict[str, Any] = field(default_factory=dict)
47
+ """Per-phase context — tool ``args`` on ``send`` events, tool
48
+ ``result`` on ``recv`` events. The library does not interpret it;
49
+ it's threaded through to user-supplied guards and actions."""
50
+
51
+
52
+ @dataclass
53
+ class ViolationEvent:
54
+ """Argument passed to the user's ``on_violation`` callback when a
55
+ transition cannot fire (no rule matches, or a guard returned
56
+ ``False``)."""
57
+
58
+ current_state: str
59
+ event: str
60
+ expected: list[str]
61
+ """Event strings that *would* have been valid from ``current_state``.
62
+ May be empty if no transitions are defined from this state."""
63
+ trace: list[str]
64
+ """All events fired so far, *including* the violating one."""
65
+ tool_ref: ToolRef
66
+ phase: str
67
+
68
+
69
+ ViolationHandler = Callable[[ViolationEvent], None]
70
+
71
+
72
+ # ── Transitions ─────────────────────────────────────────────
73
+
74
+
75
+ _VALID_PHASES = ("send", "recv")
76
+
77
+
78
+ @dataclass
79
+ class Transition:
80
+ """One edge in the FSM graph.
81
+
82
+ ``event`` is computed from ``phase`` and ``tool.label``; it must not
83
+ be assigned by the developer. ``guard`` (if set) decides whether the
84
+ edge fires; ``action`` (if set) runs as a side effect when the edge
85
+ commits.
86
+ """
87
+
88
+ source: str
89
+ tool: ToolRef
90
+ phase: str
91
+ target: str
92
+ guard: Callable[[MonitorContext], bool] | None = None
93
+ action: Callable[[MonitorContext], None] | None = None
94
+
95
+ def __post_init__(self) -> None:
96
+ if self.phase not in _VALID_PHASES:
97
+ raise ValueError(
98
+ f"Transition.phase must be one of {_VALID_PHASES!r}; "
99
+ f"got {self.phase!r}"
100
+ )
101
+
102
+ @property
103
+ def event(self) -> str:
104
+ """The lookup key for this transition: ``"<phase>:<tool.label>"``.
105
+ Always recomputed; never stored — keeps correctness if internals
106
+ of ``ToolRef`` ever shift.
107
+ """
108
+ return f"{self.phase}:{self.tool.label}"
109
+
110
+
111
+ # ── The FSM ─────────────────────────────────────────────────
112
+
113
+
114
+ class ProtocolFSM:
115
+ """Pure FSM definition — initial state, transition table, terminal
116
+ set. No reference to LangChain or to monitor state.
117
+
118
+ ``step()`` is the workhorse: given a current state and an incoming
119
+ event, it consults the table, runs any guard, fires any action, and
120
+ returns ``(next_state, ok)``. Failures (no rule, or guard rejected)
121
+ return ``(state, False)`` — the monitor decides what to do with that.
122
+ """
123
+
124
+ def __init__(self, initial: str) -> None:
125
+ self.initial: str = initial
126
+ # Indexed by (source_state, event_string) for O(1) lookup.
127
+ self._transitions: dict[tuple[str, str], Transition] = {}
128
+ self._terminal: set[str] = set()
129
+
130
+ # ── Building the FSM (fluent) ────────────────────────────
131
+
132
+ def add_transition(self, t: Transition) -> "ProtocolFSM":
133
+ """Register a transition. Returns ``self`` so calls chain.
134
+
135
+ Raises ``ValueError`` if a transition with the same
136
+ ``(source, event)`` already exists — duplicates would make the
137
+ FSM non-deterministic, which we forbid by construction.
138
+ """
139
+ key = (t.source, t.event)
140
+ if key in self._transitions:
141
+ raise ValueError(
142
+ f"duplicate transition for state={t.source!r}, "
143
+ f"event={t.event!r}"
144
+ )
145
+ self._transitions[key] = t
146
+ return self
147
+
148
+ def mark_terminal(self, *states: str) -> "ProtocolFSM":
149
+ """Flag one or more states as protocol-complete. Returns ``self``."""
150
+ self._terminal.update(states)
151
+ return self
152
+
153
+ # ── Querying the FSM ─────────────────────────────────────
154
+
155
+ def valid_events(self, state: str) -> list[str]:
156
+ """All event strings with a registered transition out of ``state``.
157
+ Returns ``[]`` when ``state`` is unknown or has no outgoing edges.
158
+ """
159
+ return [event for (src, event) in self._transitions if src == state]
160
+
161
+ def step(
162
+ self,
163
+ state: str,
164
+ event: str,
165
+ ctx: MonitorContext,
166
+ ) -> tuple[str, bool]:
167
+ """Try to fire transition ``(state, event)``.
168
+
169
+ On success: run the action (if any), return ``(target, True)``.
170
+ On failure: return ``(state, False)`` *without* mutating anything.
171
+ Calling ``on_violation`` is the monitor's responsibility, not the
172
+ FSM's — keeping the FSM pure makes it trivially unit-testable.
173
+ """
174
+ transition = self._transitions.get((state, event))
175
+ if transition is None:
176
+ return state, False
177
+ if transition.guard is not None and not transition.guard(ctx):
178
+ return state, False
179
+ if transition.action is not None:
180
+ transition.action(ctx)
181
+ return transition.target, True
182
+
183
+ def is_terminal(self, state: str) -> bool:
184
+ return state in self._terminal
@@ -0,0 +1,111 @@
1
+ """LangChain integration: a thin ``AgentMiddleware`` that drives a
2
+ ``ProtocolMonitor`` from ``wrap_tool_call`` / ``awrap_tool_call``.
3
+
4
+ This is the only module in the submodule that imports LangChain. The
5
+ FSM, the monitor, and ``ToolRef`` are all framework-agnostic; they can
6
+ be unit-tested in isolation without LangChain installed.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Awaitable, Callable
12
+
13
+ from llmcontract.langchain.monitor import ProtocolMonitor
14
+ from llmcontract.langchain.tool_ref import ToolRef
15
+
16
+
17
+ class ProtocolEnforcerMiddleware:
18
+ """LangChain ``AgentMiddleware`` that fires ``send`` and ``recv``
19
+ transitions on the wrapped monitor for every registered tool call.
20
+
21
+ Construction does the import-and-subclass dance against LangChain's
22
+ ``AgentMiddleware`` lazily so the rest of the package stays
23
+ importable without LangChain. The actual middleware object exposed
24
+ via ``.middleware`` is a real ``AgentMiddleware`` subclass instance
25
+ that you pass to ``create_agent(middleware=[...])``.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ monitor: ProtocolMonitor,
31
+ tool_refs: list[ToolRef],
32
+ ) -> None:
33
+ self._monitor = monitor
34
+ # The only place tool *name strings* surface inside the library —
35
+ # we look up by the name LangChain hands us in the request, so
36
+ # the developer never has to write or see a string.
37
+ self._ref_by_label: dict[str, ToolRef] = {t.label: t for t in tool_refs}
38
+ self._impl = self._build_impl()
39
+
40
+ @property
41
+ def middleware(self) -> Any:
42
+ """The ``AgentMiddleware`` subclass instance to pass to
43
+ ``create_agent(middleware=[...])``."""
44
+ return self._impl
45
+
46
+ # ── Build the real AgentMiddleware subclass instance ────
47
+
48
+ def _build_impl(self) -> Any:
49
+ # Imports happen here, not at module load — keeps the rest of
50
+ # the langchain submodule usable in environments without
51
+ # langchain installed.
52
+ from langchain.agents.middleware import AgentMiddleware
53
+
54
+ outer = self # captured by closure into the methods below
55
+
56
+ class _Impl(AgentMiddleware):
57
+ def wrap_tool_call(self, request, handler): # type: ignore[override]
58
+ return outer._dispatch_sync(request, handler)
59
+
60
+ async def awrap_tool_call(self, request, handler): # type: ignore[override]
61
+ return await outer._dispatch_async(request, handler)
62
+
63
+ return _Impl()
64
+
65
+ # ── Sync and async dispatch share one logical body ──────
66
+
67
+ def _dispatch_sync(
68
+ self,
69
+ request: Any,
70
+ handler: Callable[[Any], Any],
71
+ ) -> Any:
72
+ name = request.tool_call["name"]
73
+ tool_ref = self._ref_by_label.get(name)
74
+ if tool_ref is None:
75
+ # Tool isn't registered in this protocol — pass through
76
+ # unmonitored. Partial protocol coverage is a valid use case
77
+ # (e.g., monitoring only the booking subset of a larger tool
78
+ # surface).
79
+ return handler(request)
80
+
81
+ args = request.tool_call.get("args", {}) or {}
82
+ self._monitor.transition(tool_ref, phase="send", metadata={"args": args})
83
+
84
+ # Tool exception path: let it propagate. We deliberately do NOT
85
+ # fire the recv transition — the protocol stays in the
86
+ # post-send state, mirroring reality (the tool didn't actually
87
+ # produce a result). A tool exception is *not* a protocol
88
+ # violation; it's an orthogonal failure mode and the user's
89
+ # outer error handling owns it.
90
+ result = handler(request)
91
+
92
+ self._monitor.transition(tool_ref, phase="recv", metadata={"result": result})
93
+ return result
94
+
95
+ async def _dispatch_async(
96
+ self,
97
+ request: Any,
98
+ handler: Callable[[Any], Awaitable[Any]],
99
+ ) -> Any:
100
+ name = request.tool_call["name"]
101
+ tool_ref = self._ref_by_label.get(name)
102
+ if tool_ref is None:
103
+ return await handler(request)
104
+
105
+ args = request.tool_call.get("args", {}) or {}
106
+ self._monitor.transition(tool_ref, phase="send", metadata={"args": args})
107
+
108
+ result = await handler(request)
109
+
110
+ self._monitor.transition(tool_ref, phase="recv", metadata={"result": result})
111
+ return result
@@ -0,0 +1,117 @@
1
+ """Stateful runner that drives a ``ProtocolFSM`` through one chain
2
+ execution.
3
+
4
+ One ``ProtocolMonitor`` instance per agent invocation. Holds the current
5
+ state and the trace of every event that has fired so far. Calls the
6
+ user-supplied ``on_violation`` handler whenever the FSM rejects a
7
+ transition.
8
+
9
+ This module does not import LangChain.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any
15
+
16
+ from llmcontract.langchain.fsm import (
17
+ MonitorContext,
18
+ ProtocolFSM,
19
+ ViolationEvent,
20
+ ViolationHandler,
21
+ )
22
+ from llmcontract.langchain.tool_ref import ToolRef
23
+
24
+
25
+ class ProtocolMonitor:
26
+ """Owns the mutable state for one chain execution.
27
+
28
+ Construct once per ``agent.invoke`` (or call ``reset()`` between
29
+ invocations). The middleware calls ``transition()`` on every tool
30
+ call — once with ``phase="send"`` before the tool runs, once with
31
+ ``phase="recv"`` after it returns successfully.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ fsm: ProtocolFSM,
37
+ on_violation: ViolationHandler,
38
+ initial_state: str | None = None,
39
+ ) -> None:
40
+ self._fsm = fsm
41
+ self._on_violation = on_violation
42
+ self._initial = initial_state if initial_state is not None else fsm.initial
43
+ self._state: str = self._initial
44
+ self._trace: list[str] = []
45
+
46
+ # ── Read-only views ─────────────────────────────────────
47
+
48
+ @property
49
+ def state(self) -> str:
50
+ return self._state
51
+
52
+ @property
53
+ def trace(self) -> list[str]:
54
+ # Snapshot copy — callers must not mutate the monitor's history.
55
+ return list(self._trace)
56
+
57
+ # ── Driving the FSM ─────────────────────────────────────
58
+
59
+ def transition(
60
+ self,
61
+ tool_ref: ToolRef,
62
+ phase: str,
63
+ metadata: dict[str, Any] | None = None,
64
+ ) -> bool:
65
+ """Attempt one FSM step.
66
+
67
+ Returns ``True`` on success (state advanced), ``False`` on
68
+ violation (state unchanged, ``on_violation`` invoked).
69
+ """
70
+ event = f"{phase}:{tool_ref.label}"
71
+ # The trace records the *attempted* event regardless of outcome,
72
+ # so violation handlers see the full history including the
73
+ # violating step. ``ViolationEvent.trace`` and
74
+ # ``MonitorContext.trace`` are both snapshot copies, never
75
+ # references to this list.
76
+ self._trace.append(event)
77
+
78
+ ctx = MonitorContext(
79
+ current_state=self._state,
80
+ event=event,
81
+ tool_ref=tool_ref,
82
+ phase=phase,
83
+ trace=list(self._trace),
84
+ metadata=dict(metadata) if metadata else {},
85
+ )
86
+
87
+ next_state, ok = self._fsm.step(self._state, event, ctx)
88
+ if ok:
89
+ self._state = next_state
90
+ return True
91
+
92
+ self._on_violation(
93
+ ViolationEvent(
94
+ current_state=self._state,
95
+ event=event,
96
+ expected=self._fsm.valid_events(self._state),
97
+ trace=list(self._trace),
98
+ tool_ref=tool_ref,
99
+ phase=phase,
100
+ )
101
+ )
102
+ return False
103
+
104
+ def reset(self) -> None:
105
+ """Restore the monitor to its initial state and clear the trace.
106
+
107
+ Use this if you want to reuse one ``ProtocolMonitor`` instance
108
+ across multiple ``agent.invoke`` calls. The cleaner pattern is
109
+ to construct a fresh monitor per invocation, but reset is
110
+ provided for callers who pool resources.
111
+ """
112
+ self._state = self._initial
113
+ self._trace.clear()
114
+
115
+ def is_complete(self) -> bool:
116
+ """Whether the monitor's current state is terminal."""
117
+ return self._fsm.is_terminal(self._state)
@@ -0,0 +1,87 @@
1
+ """Stable, hashable references to LangChain tools.
2
+
3
+ A ``ToolRef`` wraps a ``BaseTool`` instance, a ``@tool``-decorated callable,
4
+ or any plain callable, and exposes a single read-only string label derived
5
+ once at construction. Two ``ToolRef`` objects are equal (and share a hash)
6
+ iff their labels match — which lets developers refer to the same tool from
7
+ multiple FSM transitions without juggling identity.
8
+
9
+ Crucially, this module does **not** import LangChain. The label resolution
10
+ walks duck-typed attributes (``.name`` first, ``.__name__`` second) so the
11
+ core FSM/monitor stays importable in environments without LangChain
12
+ installed. The actual ``BaseTool`` import only happens inside the
13
+ middleware module.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any, Callable
19
+
20
+
21
+ class ToolRef:
22
+ """Stable label-bearing reference to a LangChain tool or callable.
23
+
24
+ Label resolution order, applied once at construction:
25
+
26
+ 1. If ``tool`` has a ``.name`` attribute that is a non-empty string,
27
+ use it. (Covers ``BaseTool`` instances and ``@tool``-decorated
28
+ callables, which expose ``.name`` on the resulting
29
+ ``StructuredTool``.)
30
+ 2. Otherwise, if ``tool`` is callable and has ``__name__``, use that.
31
+ 3. Otherwise, raise ``TypeError``.
32
+
33
+ The label is read-only. Comparing or hashing two ``ToolRef`` objects
34
+ uses the label only — wrapping different callables that happen to
35
+ share a name yields equal refs.
36
+ """
37
+
38
+ __slots__ = ("_label", "_tool")
39
+
40
+ def __init__(self, tool: Any) -> None:
41
+ # `.name` first — covers BaseTool subclasses and @tool wrappers
42
+ # (StructuredTool exposes .name) without importing langchain.
43
+ name = getattr(tool, "name", None)
44
+ if isinstance(name, str) and name:
45
+ label = name
46
+ elif callable(tool) and getattr(tool, "__name__", None):
47
+ label = tool.__name__
48
+ else:
49
+ raise TypeError(
50
+ f"ToolRef expects a BaseTool, @tool callable, or named "
51
+ f"callable; got {type(tool).__name__}"
52
+ )
53
+ # __slots__ disables __dict__; assign through object.__setattr__
54
+ # so future attempts to overwrite (label/tool are read-only) fail.
55
+ object.__setattr__(self, "_label", label)
56
+ object.__setattr__(self, "_tool", tool)
57
+
58
+ def __setattr__(self, name: str, value: Any) -> None:
59
+ raise AttributeError(f"ToolRef is immutable; cannot set {name!r}")
60
+
61
+ @property
62
+ def label(self) -> str:
63
+ return self._label
64
+
65
+ @property
66
+ def tool(self) -> Any:
67
+ return self._tool
68
+
69
+ def __repr__(self) -> str:
70
+ return f"ToolRef({self._label!r})"
71
+
72
+ def __eq__(self, other: object) -> bool:
73
+ if not isinstance(other, ToolRef):
74
+ return NotImplemented
75
+ return self._label == other._label
76
+
77
+ def __hash__(self) -> int:
78
+ return hash(("ToolRef", self._label))
79
+
80
+
81
+ def ref(tool: Any) -> ToolRef:
82
+ """Convenience shorthand for ``ToolRef(tool)``.
83
+
84
+ This is the primary API developers reach for — they always pass the
85
+ tool function, never a name string.
86
+ """
87
+ return ToolRef(tool)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmsessioncontract
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Runtime monitor for LLM agent interaction protocols based on session type theory
5
5
  Author-email: Chris Bartolo Burlo <chris@mizziburlo.com>
6
6
  License-Expression: MIT
@@ -23,8 +23,12 @@ Description-Content-Type: text/markdown
23
23
  License-File: LICENSE
24
24
  Provides-Extra: langfuse
25
25
  Requires-Dist: langfuse>=3.0.0; extra == "langfuse"
26
+ Provides-Extra: langchain
27
+ Requires-Dist: langchain>=1.0.0; extra == "langchain"
28
+ Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
26
29
  Provides-Extra: dev
27
30
  Requires-Dist: pytest; extra == "dev"
31
+ Requires-Dist: pytest-asyncio; extra == "dev"
28
32
  Dynamic: license-file
29
33
 
30
34
  # llmcontract
@@ -230,6 +234,80 @@ while True:
230
234
  messages.append(tool_result_msg(tc.id, result))
231
235
  ```
232
236
 
237
+ ## LangChain Integration (`llmcontract.langchain`, 0.3.0+)
238
+
239
+ A focused FSM-as-data API for users who want to wire protocol monitoring
240
+ into LangChain agents without touching the DSL parser. Tool references
241
+ are real Python callables, transitions are explicit objects with
242
+ optional guards and actions, and violation handling is fully
243
+ user-controlled.
244
+
245
+ ```bash
246
+ pip install llmsessioncontract[langchain]
247
+ ```
248
+
249
+ ```python
250
+ from langchain_core.tools import tool
251
+ from langchain.agents import create_agent
252
+ from llmcontract.langchain import (
253
+ ProtocolFSM, Transition, ProtocolMonitor,
254
+ ProtocolEnforcerMiddleware, ViolationEvent,
255
+ ProtocolViolationError, ref,
256
+ )
257
+
258
+ @tool
259
+ def search(query: str) -> str:
260
+ """Search for available flights."""
261
+ return f"Results for: {query}"
262
+
263
+ @tool
264
+ def book(result: str) -> str:
265
+ """Book a selected flight."""
266
+ return f"Booked: {result}"
267
+
268
+ search_ref = ref(search)
269
+ book_ref = ref(book)
270
+
271
+ fsm = (
272
+ ProtocolFSM(initial="idle")
273
+ .add_transition(Transition(source="idle", tool=search_ref, phase="send", target="searching"))
274
+ .add_transition(Transition(source="searching", tool=search_ref, phase="recv", target="results"))
275
+ .add_transition(Transition(source="results", tool=book_ref, phase="send", target="booking",
276
+ guard=lambda ctx: bool(ctx.metadata.get("args", {}))))
277
+ .add_transition(Transition(source="booking", tool=book_ref, phase="recv", target="done"))
278
+ .mark_terminal("done")
279
+ )
280
+
281
+ def on_violation(v: ViolationEvent) -> None:
282
+ raise ProtocolViolationError(f"Illegal {v.phase}:{v.tool_ref.label} from {v.current_state!r}", violation=v)
283
+
284
+ monitor = ProtocolMonitor(fsm=fsm, on_violation=on_violation)
285
+ middleware = ProtocolEnforcerMiddleware(monitor=monitor, tool_refs=[search_ref, book_ref]).middleware
286
+
287
+ agent = create_agent(model=..., tools=[search, book], middleware=[middleware])
288
+ agent.invoke({"messages": [("user", "Book me a flight to Rome.")]})
289
+
290
+ print(monitor.state) # → "done"
291
+ print(monitor.is_complete()) # → True
292
+ print(monitor.trace) # → ["send:search", "recv:search", "send:book", "recv:book"]
293
+ ```
294
+
295
+ When to pick this over the DSL `Monitor`:
296
+
297
+ - You're already in a LangChain stack and want a drop-in `AgentMiddleware`
298
+ - You need per-transition guards and actions (e.g., audit logs, business rules)
299
+ - You want enforcement (block tool calls), not just observation
300
+ - You don't need recursion / choice / `Unrecognized` from the DSL
301
+
302
+ When to stick with the DSL `Monitor`:
303
+
304
+ - You want to write protocols as concise strings (`!Search.?Result.end`)
305
+ - You need recursion or compositional choice
306
+ - You're outside LangChain (Anthropic SDK, OpenAI SDK, custom loop)
307
+ - You want first-class natural-language ambiguity via `Unrecognized`
308
+
309
+ Worked example: [`examples/langchain_booking/booking_agent_submodule.py`](examples/langchain_booking/booking_agent_submodule.py).
310
+
233
311
  ## Langfuse Integration
234
312
 
235
313
  Track protocol compliance in [Langfuse](https://langfuse.com) — every send/receive is recorded as a guardrail observation with a pass/fail score.
@@ -280,7 +358,8 @@ The skill validates each draft DSL against `llmcontract`'s parser, so anything i
280
358
 
281
359
  ## Case Studies
282
360
 
283
- - **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
361
+ - **[`llmcontract-tau2`](https://github.com/chrisbartoloburlo/llmcontract-tau2)** — *user ↔ agent layer.* Standalone replay of [tau2-bench](https://github.com/sierra-research/tau2-bench)'s shipped trajectories through `Monitor`. Headline: 11/1755 (0.6%) of trajectories that tau2 scored as passing violate the documented "obtain user confirmation before mutating the database" policy. Discussion upstream: [tau2-bench#298](https://github.com/sierra-research/tau2-bench/issues/298).
362
+ - **[`llmcontract-playwright-mcp`](https://github.com/chrisbartoloburlo/llmcontract-playwright-mcp)** — *agent ↔ tool layer.* 90-trajectory sweep across Claude Haiku 4.5 / Sonnet 4.6 / Opus 4.7 driving [`@playwright/mcp`](https://github.com/microsoft/playwright-mcp), checked against two invariants from the server's documented usage. Headline: 9% violate `snap-before-interact`, 29% violate `stay-on-snapshot-refs` — and the failure modes scale opposite directions with model capability (Haiku snapshots religiously but ignores the snapshot 57% of the time; Opus skips the snapshot 13% of the time but follows through cleanly when it commits).
284
363
 
285
364
  ## Research
286
365
 
@@ -12,6 +12,12 @@ llmcontract/integration/exceptions.py
12
12
  llmcontract/integration/langfuse.py
13
13
  llmcontract/integration/middleware.py
14
14
  llmcontract/integration/types.py
15
+ llmcontract/langchain/__init__.py
16
+ llmcontract/langchain/exceptions.py
17
+ llmcontract/langchain/fsm.py
18
+ llmcontract/langchain/middleware.py
19
+ llmcontract/langchain/monitor.py
20
+ llmcontract/langchain/tool_ref.py
15
21
  llmcontract/monitor/__init__.py
16
22
  llmcontract/monitor/automaton.py
17
23
  llmcontract/monitor/monitor.py
@@ -0,0 +1,11 @@
1
+
2
+ [dev]
3
+ pytest
4
+ pytest-asyncio
5
+
6
+ [langchain]
7
+ langchain>=1.0.0
8
+ langchain-core>=0.3.0
9
+
10
+ [langfuse]
11
+ langfuse>=3.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "llmsessioncontract"
7
- version = "0.2.2"
7
+ version = "0.3.0"
8
8
  description = "Runtime monitor for LLM agent interaction protocols based on session type theory"
9
9
  requires-python = ">=3.10"
10
10
  license = "MIT"
@@ -33,7 +33,8 @@ Issues = "https://github.com/chrisbartoloburlo/llmcontract/issues"
33
33
 
34
34
  [project.optional-dependencies]
35
35
  langfuse = ["langfuse>=3.0.0"]
36
- dev = ["pytest"]
36
+ langchain = ["langchain>=1.0.0", "langchain-core>=0.3.0"]
37
+ dev = ["pytest", "pytest-asyncio"]
37
38
 
38
39
  [tool.setuptools.packages.find]
39
40
  include = ["llmcontract*"]
@@ -1,6 +0,0 @@
1
-
2
- [dev]
3
- pytest
4
-
5
- [langfuse]
6
- langfuse>=3.0.0