extodan-agentsync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentsync/__init__.py +17 -0
- agentsync/__main__.py +55 -0
- agentsync/demo/langgraph_demo.py +232 -0
- agentsync/harness.py +252 -0
- agentsync/models.py +251 -0
- agentsync/py.typed +0 -0
- agentsync/repro.py +146 -0
- agentsync/store.py +223 -0
- agentsync/strategies/__init__.py +43 -0
- agentsync/strategies/crdt.py +275 -0
- agentsync/strategies/lww.py +115 -0
- agentsync/strategies/transactional.py +209 -0
- agentsync/table.py +54 -0
- agentsync/workloads/__init__.py +102 -0
- extodan_agentsync-0.1.0.dist-info/METADATA +197 -0
- extodan_agentsync-0.1.0.dist-info/RECORD +19 -0
- extodan_agentsync-0.1.0.dist-info/WHEEL +4 -0
- extodan_agentsync-0.1.0.dist-info/entry_points.txt +2 -0
- extodan_agentsync-0.1.0.dist-info/licenses/LICENSE +21 -0
agentsync/models.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""The three-strategy contract.
|
|
2
|
+
|
|
3
|
+
Everything in the benchmark flows through these types so that LWW,
|
|
4
|
+
LLM-transactional, and CRDT (Loro) are measured behind ONE common interface.
|
|
5
|
+
The types here are deliberately small and backend-agnostic: a strategy is a
|
|
6
|
+
class with an `apply(write) -> Outcome` method and a `finalize_state()` call.
|
|
7
|
+
|
|
8
|
+
Two value kinds matter for the thesis:
|
|
9
|
+
|
|
10
|
+
* ``FieldKind.scalar`` — a single value; two concurrent writes to the same
|
|
11
|
+
scalar with differing values are a SEMANTIC conflict (un-mergeable). LWW
|
|
12
|
+
silently picks one; CRDT must ESCALATE.
|
|
13
|
+
* ``FieldKind.grow_set`` / ``FieldKind.append_text`` — monotone, mergeable.
|
|
14
|
+
Concurrent writes union / concatenate; no write is lost. This is where the
|
|
15
|
+
CRDT wins for free and LWW still corrupts (it overwrites the whole field).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from enum import Enum
|
|
22
|
+
from typing import Any, Literal, Protocol
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class EndState(str, Enum):
|
|
26
|
+
"""How a strategy reached its end state on a workload.
|
|
27
|
+
|
|
28
|
+
This is intentionally SEPARATE from ``verdict`` (which only says "reached an
|
|
29
|
+
acceptable end state"). Two strategies can both PASS yet get there by
|
|
30
|
+
different, non-interchangeable routes — and conflating them is the trap this
|
|
31
|
+
column exists to prevent.
|
|
32
|
+
|
|
33
|
+
``escalated`` vs ``resolved`` is the one that matters and is the easiest to
|
|
34
|
+
misread. ``resolved`` spends work NOW to produce a field the next agent can
|
|
35
|
+
act on. ``escalated`` is cheap precisely because it does NOT resolve — it
|
|
36
|
+
defers BOTH the cost AND the correctness: the conflicting field stays
|
|
37
|
+
divergent until someone drains the queue, so an agent that needs to read it
|
|
38
|
+
on its next step is blocked. For async knowledge-merge that deferral is
|
|
39
|
+
free; for a synchronous read it re-incurs the inference downstream, plus a
|
|
40
|
+
stall. Escalate is the safer *primitive* (you can always bolt a resolver on
|
|
41
|
+
later); it is not a free lunch on time-to-usable-state.
|
|
42
|
+
|
|
43
|
+
* ``auto_merged`` — no semantic conflict in the workload; mergeable writes
|
|
44
|
+
unioned/concatenated. Every correct strategy lands here
|
|
45
|
+
on a clean merge. LWW lands here only if it didn't drop
|
|
46
|
+
writes — otherwise it's ``corrupted``.
|
|
47
|
+
* ``corrupted`` — a conflict (or a concurrent write to a mergeable field)
|
|
48
|
+
existed and the strategy silently dropped or overwrote
|
|
49
|
+
it with NO signal. LWW. The corruption baseline.
|
|
50
|
+
* ``resolved`` — a real conflict existed and the strategy spent work
|
|
51
|
+
(a model call) to autonomously decide and repair.
|
|
52
|
+
transactional. Correct end state, costs inference, acts
|
|
53
|
+
without a human in the loop.
|
|
54
|
+
* ``escalated`` — a real conflict existed and the strategy flagged it for
|
|
55
|
+
a downstream consumer instead of deciding. crdt. Cheap
|
|
56
|
+
because it defers resolution — see the note above.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
auto_merged = "auto_merged"
|
|
60
|
+
corrupted = "corrupted"
|
|
61
|
+
resolved = "resolved"
|
|
62
|
+
escalated = "escalated"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FieldKind(str, Enum):
|
|
66
|
+
"""How a field's concurrent writes should combine.
|
|
67
|
+
|
|
68
|
+
``scalar`` fields are the source of semantic conflicts. The grow_set and
|
|
69
|
+
append_text kinds are mergeable by construction (union / concatenation) and
|
|
70
|
+
are where the CRDT converges for free.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
scalar = "scalar"
|
|
74
|
+
grow_set = "grow_set"
|
|
75
|
+
append_text = "append_text"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class Write:
|
|
80
|
+
"""A single attributed write by one agent.
|
|
81
|
+
|
|
82
|
+
``agent_id`` and ``op_id`` survive every merge — attribution completeness
|
|
83
|
+
is a first-class measured metric, not a side effect. ``op_id`` orders
|
|
84
|
+
writes from the same agent; ``happens_after`` (optional) lets a workload
|
|
85
|
+
express the partial order ("agent B's write saw agent A's write") so the
|
|
86
|
+
harness can distinguish true concurrency from sequential causality.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
agent_id: str
|
|
90
|
+
op_id: int
|
|
91
|
+
field: str
|
|
92
|
+
value: Any
|
|
93
|
+
kind: FieldKind = FieldKind.scalar
|
|
94
|
+
happens_after: tuple[str, int] | None = None # (agent_id, op_id) of a write this one observed
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class Outcome:
|
|
99
|
+
"""What a strategy reports for a single applied write.
|
|
100
|
+
|
|
101
|
+
``applied`` is True if the write is reflected in finalized state. A False
|
|
102
|
+
here under LWW means the write was overwritten (the corruption signal).
|
|
103
|
+
The optional ``escalation`` is non-None only when the strategy detected a
|
|
104
|
+
semantic conflict and surfaced it instead of silently merging.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
applied: bool
|
|
108
|
+
escalation: "Escalation | None" = None
|
|
109
|
+
# Diagnostics the strategy may set; not all are meaningful for every
|
|
110
|
+
# strategy (e.g. model_calls is nonzero only for transactional).
|
|
111
|
+
overwrote_prior: bool = False
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class Escalation:
|
|
116
|
+
"""A semantic conflict the strategy refused to auto-resolve.
|
|
117
|
+
|
|
118
|
+
Per the thesis, CRDT auto-merges mergeable state and ESCALATES semantic /
|
|
119
|
+
un-mergeable conflicts rather than silently picking a winner. LWW never
|
|
120
|
+
escalates (it silently corrupts); transactional escalates via a model call.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
field: str
|
|
124
|
+
contenders: list["Contender"]
|
|
125
|
+
reason: str = "semantic_conflict"
|
|
126
|
+
|
|
127
|
+
@dataclass
|
|
128
|
+
class Contender:
|
|
129
|
+
agent_id: str
|
|
130
|
+
op_id: int
|
|
131
|
+
value: Any
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class Metrics:
|
|
136
|
+
"""Per-run counters accumulated across all writes.
|
|
137
|
+
|
|
138
|
+
These map 1:1 to the comparison-table columns. ``writes_lost`` is the
|
|
139
|
+
convergence-correctness signal: it counts writes that were applied to a
|
|
140
|
+
replica but did NOT survive into the final converged state.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
writes_seen: int = 0
|
|
144
|
+
writes_applied: int = 0
|
|
145
|
+
writes_lost: int = 0
|
|
146
|
+
escalations: int = 0
|
|
147
|
+
model_calls: int = 0 # only nonzero for the transactional strategy
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class MergeStrategy(Protocol):
|
|
151
|
+
"""The ONE interface all three strategies implement.
|
|
152
|
+
|
|
153
|
+
A strategy owns one replica's view of the document. The harness creates one
|
|
154
|
+
instance per (strategy, replica) pair, applies that replica's writes to it,
|
|
155
|
+
then merges replicas via :meth:`export_state` / :meth:`import_state`.
|
|
156
|
+
|
|
157
|
+
Why merge-by-export rather than a central ``apply_to_all``: it mirrors how
|
|
158
|
+
real multi-agent systems actually replicate — each agent edits a local copy
|
|
159
|
+
and the sync layer reconciles. LWW/transactional simulate this with a
|
|
160
|
+
shared dict + timestamp; CRDT does it natively via Loro frontiers/blobs.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
name: str
|
|
164
|
+
|
|
165
|
+
def apply(self, write: Write) -> Outcome: ...
|
|
166
|
+
|
|
167
|
+
def export_state(self) -> bytes:
|
|
168
|
+
"""Serialize this replica's state for transfer to another replica."""
|
|
169
|
+
...
|
|
170
|
+
|
|
171
|
+
def import_state(self, blob: bytes) -> None:
|
|
172
|
+
"""Merge another replica's exported state into this one."""
|
|
173
|
+
...
|
|
174
|
+
|
|
175
|
+
def finalized_state(self) -> dict[str, Any]:
|
|
176
|
+
"""Return the human-readable converged document for assertion / display."""
|
|
177
|
+
...
|
|
178
|
+
|
|
179
|
+
def metrics(self) -> Metrics: ...
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# ---------------------------------------------------------------------------
|
|
183
|
+
# Workload definitions
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@dataclass
|
|
188
|
+
class Workload:
|
|
189
|
+
"""A named workload: the partial order of writes each agent performs.
|
|
190
|
+
|
|
191
|
+
``writes_by_replica[replica_id]`` is the sequence of writes that replica
|
|
192
|
+
applies LOCALLY before any merge. The harness then merges replicas and
|
|
193
|
+
checks convergence. ``expectation`` states what a CORRECT strategy should
|
|
194
|
+
produce, so the table can show pass/fail per metric, not just numbers.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
name: str
|
|
198
|
+
description: str
|
|
199
|
+
writes_by_replica: dict[str, list[Write]]
|
|
200
|
+
expectation: "Expectation"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@dataclass
|
|
204
|
+
class Expectation:
|
|
205
|
+
"""What correct behavior looks like for a workload.
|
|
206
|
+
|
|
207
|
+
Used to turn raw metrics into pass/fail verdicts in the comparison table.
|
|
208
|
+
``semantic_conflict_on`` lists fields where a correct strategy MUST
|
|
209
|
+
escalate (non-empty) — empty means the workload is clean-merge and any
|
|
210
|
+
escalation or lost write is a failure.
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
clean_merge: bool # True => no semantic conflict is expected
|
|
214
|
+
all_writes_survive: bool # True => writes_lost must be 0 for a correct strategy
|
|
215
|
+
semantic_conflict_on: tuple[str, ...] = ()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@dataclass
|
|
219
|
+
class RunResult:
|
|
220
|
+
"""One row of the comparison table: one strategy on one workload."""
|
|
221
|
+
|
|
222
|
+
strategy: str
|
|
223
|
+
workload: str
|
|
224
|
+
converged: bool # all replicas reached identical finalized_state
|
|
225
|
+
writes_lost: int
|
|
226
|
+
attribution_complete: bool # every surviving write traces to an agent
|
|
227
|
+
escalations: int
|
|
228
|
+
model_calls: int
|
|
229
|
+
latency_ms: float
|
|
230
|
+
peak_mem_kb: float
|
|
231
|
+
# How the strategy reached its end state — NOT interchangeable across
|
|
232
|
+
# strategies even when both PASS. ``verdict`` says "acceptable end state";
|
|
233
|
+
# ``outcome`` says *how*. This is the one column that keeps escalate vs
|
|
234
|
+
# resolve vs corrupt from masquerading as the same green checkmark.
|
|
235
|
+
outcome: EndState = EndState.auto_merged
|
|
236
|
+
verdict: Literal["PASS", "FAIL"] = "PASS"
|
|
237
|
+
notes: list[str] = field(default_factory=list)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
__all__ = [
|
|
241
|
+
"FieldKind",
|
|
242
|
+
"EndState",
|
|
243
|
+
"Write",
|
|
244
|
+
"Outcome",
|
|
245
|
+
"Escalation",
|
|
246
|
+
"Metrics",
|
|
247
|
+
"MergeStrategy",
|
|
248
|
+
"Workload",
|
|
249
|
+
"Expectation",
|
|
250
|
+
"RunResult",
|
|
251
|
+
]
|
agentsync/py.typed
ADDED
|
File without changes
|
agentsync/repro.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Independent reproduction of the langgraph silent-write-loss bug.
|
|
2
|
+
|
|
3
|
+
This module exists to prove the bug is real in VANILLA langgraph — no
|
|
4
|
+
agentsync code in the write path. A hostile reader's first instinct is "the
|
|
5
|
+
demo was rigged to make agentsync look good"; this closes that door.
|
|
6
|
+
|
|
7
|
+
Run two parallel nodes that ``store.put()`` to the SAME key against the STOCK
|
|
8
|
+
``InMemoryStore``. Show the raw final state: one write gone, no error raised.
|
|
9
|
+
Then, clearly delineated, run the SAME graph against ``SyncedStore`` to show
|
|
10
|
+
the fix.
|
|
11
|
+
|
|
12
|
+
python -m agentsync.repro # or: make repro
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import sys
|
|
18
|
+
from importlib.metadata import version as pkg_version
|
|
19
|
+
from typing import Any, Callable
|
|
20
|
+
|
|
21
|
+
from langgraph.graph import END, START, StateGraph
|
|
22
|
+
from langgraph.store.base import BaseStore
|
|
23
|
+
from langgraph.store.memory import InMemoryStore
|
|
24
|
+
from typing_extensions import TypedDict
|
|
25
|
+
|
|
26
|
+
from .store import SyncedStore
|
|
27
|
+
|
|
28
|
+
NS = ("ctx",)
|
|
29
|
+
KEY = "k"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class _S(TypedDict):
|
|
33
|
+
"""Minimal graph state. The shared memory lives in the store, not here."""
|
|
34
|
+
trigger: int
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _node(agent_id: str, payload: dict[str, Any]) -> Callable:
|
|
38
|
+
"""A node that writes ``payload`` to the shared key as ``agent_id``.
|
|
39
|
+
|
|
40
|
+
Pure langgraph on its own: it calls the injected ``store.put``. On a stock
|
|
41
|
+
``InMemoryStore`` that is a blind overwrite; on ``SyncedStore`` it merges.
|
|
42
|
+
The node code is IDENTICAL for both — only the store instance differs, which
|
|
43
|
+
is the whole point of the reproduction.
|
|
44
|
+
"""
|
|
45
|
+
# NOTE: nodes that want agentsync attribution use ``store.acting_as``; the
|
|
46
|
+
# BASELINE run uses a stock InMemoryStore which has no such method, so we
|
|
47
|
+
# guard it. The put() call itself is the same either way.
|
|
48
|
+
def node(_state: _S, store: BaseStore) -> dict:
|
|
49
|
+
if hasattr(store, "acting_as"):
|
|
50
|
+
with store.acting_as(agent_id):
|
|
51
|
+
store.put(NS, KEY, payload)
|
|
52
|
+
else:
|
|
53
|
+
store.put(NS, KEY, payload)
|
|
54
|
+
return {}
|
|
55
|
+
|
|
56
|
+
return node
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _build_and_run(store: BaseStore, nodes: dict[str, Callable]) -> dict | None:
|
|
60
|
+
g = StateGraph(_S)
|
|
61
|
+
for name, fn in nodes.items():
|
|
62
|
+
g.add_node(name, fn)
|
|
63
|
+
g.add_edge(START, name)
|
|
64
|
+
g.add_edge(name, END)
|
|
65
|
+
g.compile(store=store).invoke({"trigger": 0})
|
|
66
|
+
item = store.get(NS, KEY)
|
|
67
|
+
return item.value if item else None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _section(title: str) -> None:
|
|
71
|
+
print("\n" + "=" * 68)
|
|
72
|
+
print(title)
|
|
73
|
+
print("=" * 68)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def main() -> int:
|
|
77
|
+
try:
|
|
78
|
+
lg_version = pkg_version("langgraph")
|
|
79
|
+
except Exception: # pragma: no cover
|
|
80
|
+
lg_version = "unknown"
|
|
81
|
+
|
|
82
|
+
print("agentsync bug reproduction — langgraph silent write-loss")
|
|
83
|
+
print(f"detected langgraph version: {lg_version}")
|
|
84
|
+
|
|
85
|
+
# The two concurrent writes. Both target the SAME key. All fields are
|
|
86
|
+
# mergeable in principle (tags are lists, status is a scalar that diverges).
|
|
87
|
+
payload_a = {"tags": ["crdt", "agents"], "status": "draft"}
|
|
88
|
+
payload_b = {"tags": ["benchmark"], "status": "published"}
|
|
89
|
+
nodes = {"agent_A": _node("A", payload_a), "agent_B": _node("B", payload_b)}
|
|
90
|
+
|
|
91
|
+
# ------------------------------------------------------------------ #
|
|
92
|
+
# PART 1 — stock langgraph. NO agentsync in the write path.
|
|
93
|
+
# ------------------------------------------------------------------ #
|
|
94
|
+
_section("PART 1 — stock langgraph InMemoryStore (no agentsync in play)")
|
|
95
|
+
print(f"agent_A puts: {payload_a}")
|
|
96
|
+
print(f"agent_B puts: {payload_b}")
|
|
97
|
+
print("running two PARALLEL nodes against InMemoryStore...")
|
|
98
|
+
|
|
99
|
+
plain = InMemoryStore()
|
|
100
|
+
final = _build_and_run(plain, nodes)
|
|
101
|
+
print(f"\nfinal store value: {final}")
|
|
102
|
+
a_tags_survive = final and "crdt" in (final.get("tags") or [])
|
|
103
|
+
print(
|
|
104
|
+
"agent_A's tags survived? "
|
|
105
|
+
+ ("YES — unexpected" if a_tags_survive else "NO — silently dropped")
|
|
106
|
+
)
|
|
107
|
+
if not a_tags_survive:
|
|
108
|
+
print("-> BUG REPRODUCED: one concurrent write is gone. No exception was raised.")
|
|
109
|
+
|
|
110
|
+
# ------------------------------------------------------------------ #
|
|
111
|
+
# PART 2 — the SAME graph, only the store instance changes.
|
|
112
|
+
# ------------------------------------------------------------------ #
|
|
113
|
+
_section("PART 2 — same graph, SyncedStore (agentsync in play)")
|
|
114
|
+
synced = SyncedStore()
|
|
115
|
+
final2 = _build_and_run(synced, nodes)
|
|
116
|
+
print(f"final store value: {final2}")
|
|
117
|
+
all_tags = set((final2 or {}).get("tags") or [])
|
|
118
|
+
print(f"agent_A's tags survived? {'YES' if 'crdt' in all_tags else 'NO'}")
|
|
119
|
+
print(f"agent_B's tags survived? {'YES' if 'benchmark' in all_tags else 'NO'}")
|
|
120
|
+
escs = synced.escalations()
|
|
121
|
+
if escs:
|
|
122
|
+
for esc in escs:
|
|
123
|
+
contenders = ", ".join(f"{c.agent_id}='{c.value}'" for c in esc.contenders)
|
|
124
|
+
print(f"semantic conflict on '{esc.field}' ESCALATED (not auto-resolved): {contenders}")
|
|
125
|
+
else:
|
|
126
|
+
print("no escalations")
|
|
127
|
+
|
|
128
|
+
# ------------------------------------------------------------------ #
|
|
129
|
+
# Verdict
|
|
130
|
+
# ------------------------------------------------------------------ #
|
|
131
|
+
_section("VERDICT")
|
|
132
|
+
print(
|
|
133
|
+
"stock InMemoryStore: "
|
|
134
|
+
+ ("silent write-loss reproduced" if not a_tags_survive else "no loss observed")
|
|
135
|
+
)
|
|
136
|
+
print(
|
|
137
|
+
f"SyncedStore: merged tags = {sorted(all_tags)}, "
|
|
138
|
+
f"{len(escs)} escalation(s)"
|
|
139
|
+
)
|
|
140
|
+
print("\nThe bug is in vanilla langgraph. SyncedStore fixes it. Same graph,")
|
|
141
|
+
print("same writes — only the store instance differs.")
|
|
142
|
+
return 0
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
if __name__ == "__main__":
|
|
146
|
+
sys.exit(main())
|
agentsync/store.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""SyncedStore — the drop-in shared-state backend that fixes LangGraph's silent
|
|
2
|
+
write-loss.
|
|
3
|
+
|
|
4
|
+
The problem this exists to solve (verified against langgraph 1.2.6): when two
|
|
5
|
+
parallel nodes call ``store.put`` on the SAME key, ``InMemoryStore`` silently
|
|
6
|
+
overwrites the first write with the second. No error, no merge, no signal —
|
|
7
|
+
last-write-wins, A's contribution gone. A team wiring parallel agents to shared
|
|
8
|
+
context never learns a write was dropped.
|
|
9
|
+
|
|
10
|
+
``SyncedStore`` subclasses ``InMemoryStore`` so it's a 1-line swap, and routes
|
|
11
|
+
every ``PutOp`` through a per-key CRDT engine (the same Loro/eg-walker merge
|
|
12
|
+
from the benchmark). Concurrent writes to the SAME key now:
|
|
13
|
+
|
|
14
|
+
* **merge** when they're mergeable — list values union (set-style), nested dicts
|
|
15
|
+
deep-merge, text-convention keys concatenate. No write is lost.
|
|
16
|
+
* **escalate** when they're a semantic conflict — two agents setting the same
|
|
17
|
+
SCALAR field to different values. The conflict is recorded as an
|
|
18
|
+
:class:`Escalation` event with both contenders attributed, and NOT silently
|
|
19
|
+
resolved. The field holds a sentinel until someone drains the escalation.
|
|
20
|
+
|
|
21
|
+
Every write is attributed to an ``agent_id``. langgraph's ``store.put`` has no
|
|
22
|
+
agent parameter, so attribution is wired through a contextvar: wrap a node's
|
|
23
|
+
puts in ``with store.acting_as(agent_id):`` (one extra line) and every put
|
|
24
|
+
inside carries that id through merges.
|
|
25
|
+
|
|
26
|
+
The rest of the store — ``get``/``delete``/``search`` — is inherited unchanged
|
|
27
|
+
from ``InMemoryStore``; only the write path is intercepted.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import contextvars
|
|
33
|
+
from contextlib import contextmanager
|
|
34
|
+
from dataclasses import dataclass, field
|
|
35
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable
|
|
36
|
+
|
|
37
|
+
from langgraph.store.base import Op, PutOp
|
|
38
|
+
from langgraph.store.memory import InMemoryStore
|
|
39
|
+
|
|
40
|
+
from .models import Escalation, FieldKind, Write
|
|
41
|
+
from .strategies.crdt import CRDTStrategy
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from collections.abc import Iterator
|
|
45
|
+
|
|
46
|
+
# The agent_id attribution context. langgraph's batch() receives no config, so
|
|
47
|
+
# a contextvar is the honest channel for "which agent is writing right now". It
|
|
48
|
+
# defaults to "anonymous" so the store never hard-fails on an unwrapped put —
|
|
49
|
+
# but attribution completeness then can't be guaranteed, and metrics say so.
|
|
50
|
+
_current_agent: contextvars.ContextVar[str] = contextvars.ContextVar(
|
|
51
|
+
"agentsync_agent_id", default="anonymous"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class _KeyEngine:
|
|
57
|
+
"""One CRDT engine per logical key ((namespace, key) pair).
|
|
58
|
+
|
|
59
|
+
langgraph calls ``batch`` once per node per key per superstep, so a key hit
|
|
60
|
+
by N parallel nodes sees N separate writes — each lands here and the engine
|
|
61
|
+
merges them. This is the per-key accumulator the merge lives in.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
engine: CRDTStrategy = field(default_factory=CRDTStrategy)
|
|
65
|
+
# Monotone op counter per agent so the CRDT can order that agent's writes.
|
|
66
|
+
_ops: dict[str, int] = field(default_factory=dict)
|
|
67
|
+
|
|
68
|
+
def next_op(self, agent_id: str) -> int:
|
|
69
|
+
n = self._ops.get(agent_id, 0)
|
|
70
|
+
self._ops[agent_id] = n + 1
|
|
71
|
+
return n
|
|
72
|
+
|
|
73
|
+
def state(self) -> dict[str, Any]:
|
|
74
|
+
return self.engine.finalized_state()
|
|
75
|
+
|
|
76
|
+
def escalations(self) -> list[Escalation]:
|
|
77
|
+
# finalized_state() recomputes escalations into the engine's list.
|
|
78
|
+
self.engine.finalized_state()
|
|
79
|
+
return list(self.engine._escalations)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _classify_field(key: str, value: Any) -> FieldKind:
|
|
83
|
+
"""Heuristic field-kind for a JSON-store value.
|
|
84
|
+
|
|
85
|
+
The benchmark workloads declared kinds explicitly; a drop-in store only sees
|
|
86
|
+
raw JSON values, so we infer. Lists are mergeable (union); string values
|
|
87
|
+
under a *-text / notes / findings key are append-style; everything else is a
|
|
88
|
+
scalar (the conflict surface). Conservative: when unsure, scalar — which
|
|
89
|
+
means "escalate on divergence" rather than "silently merge".
|
|
90
|
+
"""
|
|
91
|
+
if isinstance(value, list):
|
|
92
|
+
return FieldKind.grow_set
|
|
93
|
+
if isinstance(value, str) and any(
|
|
94
|
+
tag in key.lower() for tag in ("text", "notes", "findings", "log")
|
|
95
|
+
):
|
|
96
|
+
return FieldKind.append_text
|
|
97
|
+
return FieldKind.scalar
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class SyncedStore(InMemoryStore):
|
|
101
|
+
"""``InMemoryStore`` whose writes CRDT-merge instead of last-write-wins.
|
|
102
|
+
|
|
103
|
+
Drop-in: ``graph = builder.compile(store=SyncedStore())``. Get merges +
|
|
104
|
+
attribution + escalation for free; lose nothing that ``InMemoryStore``
|
|
105
|
+
already does.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
def __init__(self) -> None:
|
|
109
|
+
super().__init__()
|
|
110
|
+
# One CRDT engine per key. Keys are (namespace, key) tuples.
|
|
111
|
+
self._engines: dict[tuple[tuple[str, ...], str], _KeyEngine] = {}
|
|
112
|
+
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
# Attribution context
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
@contextmanager
|
|
117
|
+
def acting_as(self, agent_id: str) -> "Iterator[SyncedStore]":
|
|
118
|
+
"""Scope the agent_id attributed to puts inside this block.
|
|
119
|
+
|
|
120
|
+
Example::
|
|
121
|
+
|
|
122
|
+
with store.acting_as("researcher"):
|
|
123
|
+
store.put(("ctx",), "notes", {...}) # attributed to researcher
|
|
124
|
+
|
|
125
|
+
Why a contextvar and not a put() arg: langgraph's store API has no agent
|
|
126
|
+
parameter, and ``batch()`` receives no config. A contextvar is the one
|
|
127
|
+
channel that threads an id from a node into the store without forking
|
|
128
|
+
langgraph internals.
|
|
129
|
+
"""
|
|
130
|
+
token = _current_agent.set(agent_id)
|
|
131
|
+
try:
|
|
132
|
+
yield self
|
|
133
|
+
finally:
|
|
134
|
+
_current_agent.reset(token)
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def current_agent(self) -> str:
|
|
138
|
+
return _current_agent.get()
|
|
139
|
+
|
|
140
|
+
# ------------------------------------------------------------------
|
|
141
|
+
# The write path — intercept PutOps and route through the CRDT engine
|
|
142
|
+
# ------------------------------------------------------------------
|
|
143
|
+
def batch(self, ops: Iterable[Op]) -> list:
|
|
144
|
+
"""Override the chokepoint: merge same-key concurrent writes.
|
|
145
|
+
|
|
146
|
+
``put`` is concrete in ``BaseStore`` and calls ``self.batch([PutOp])``,
|
|
147
|
+
so this is the single place every write passes through. For each
|
|
148
|
+
``PutOp`` we (a) feed each field of the value into the key's CRDT
|
|
149
|
+
engine, (b) read back the merged value, (c) hand a *merged* PutOp to
|
|
150
|
+
super().batch so the parent storage reflects the union, not the clobber.
|
|
151
|
+
"""
|
|
152
|
+
prepared: list[Op] = []
|
|
153
|
+
for op in ops:
|
|
154
|
+
if isinstance(op, PutOp) and op.value is not None:
|
|
155
|
+
merged = self._merge_through_crdt(op)
|
|
156
|
+
op = PutOp(
|
|
157
|
+
op.namespace, op.key, merged, index=op.index, ttl=op.ttl
|
|
158
|
+
)
|
|
159
|
+
prepared.append(op)
|
|
160
|
+
return super().batch(prepared)
|
|
161
|
+
|
|
162
|
+
def _merge_through_crdt(self, op: PutOp) -> dict[str, Any]:
|
|
163
|
+
"""Feed one PutOp's value fields into the key's CRDT, return merged value.
|
|
164
|
+
|
|
165
|
+
The store's value model is a flat ``dict[str, JSON]``. Each top-level
|
|
166
|
+
field becomes a CRDT field; its kind is inferred from value shape. The
|
|
167
|
+
engine merges concurrent fields across the multiple batch() calls the
|
|
168
|
+
runtime makes for parallel nodes.
|
|
169
|
+
"""
|
|
170
|
+
k = (op.namespace, op.key)
|
|
171
|
+
engine = self._engines.setdefault(k, _KeyEngine())
|
|
172
|
+
agent_id = _current_agent.get()
|
|
173
|
+
for field_name, field_value in op.value.items():
|
|
174
|
+
kind = _classify_field(field_name, field_value)
|
|
175
|
+
# Coerce list into the set semantics the CRDT grow_set expects.
|
|
176
|
+
crdt_value = set(field_value) if kind is FieldKind.grow_set else field_value
|
|
177
|
+
engine.engine.apply(
|
|
178
|
+
Write(
|
|
179
|
+
agent_id=agent_id,
|
|
180
|
+
op_id=engine.next_op(agent_id),
|
|
181
|
+
field=field_name,
|
|
182
|
+
value=crdt_value,
|
|
183
|
+
kind=kind,
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
# Read back the merged, materialized state — this is what gets stored.
|
|
187
|
+
merged = engine.state()
|
|
188
|
+
# Surface CRDT escalations on the store so a caller can drain them.
|
|
189
|
+
for esc in engine.escalations():
|
|
190
|
+
self._on_escalation(esc)
|
|
191
|
+
return merged
|
|
192
|
+
|
|
193
|
+
# ------------------------------------------------------------------
|
|
194
|
+
# Escalation surface
|
|
195
|
+
# ------------------------------------------------------------------
|
|
196
|
+
# Callable hook: a consumer wires ``store.on_escalation(my_callback)`` to be
|
|
197
|
+
# notified instead of polling. Deliberately just a function attribute, not a
|
|
198
|
+
# queue/worker — the consumer's shape (human? supervisor? retry?) is unknown
|
|
199
|
+
# until validated, so we surface the event and get out of the way.
|
|
200
|
+
def on_escalation(self, callback: Callable[[Escalation], None]) -> None:
|
|
201
|
+
self._escalation_cb = callback # type: ignore[attr-defined]
|
|
202
|
+
|
|
203
|
+
def _on_escalation(self, esc: Escalation) -> None:
|
|
204
|
+
cb = getattr(self, "_escalation_cb", None)
|
|
205
|
+
if cb is not None:
|
|
206
|
+
cb(esc)
|
|
207
|
+
|
|
208
|
+
def escalations(self) -> list[Escalation]:
|
|
209
|
+
"""All semantic conflicts observed across all keys (drain point)."""
|
|
210
|
+
out: list[Escalation] = []
|
|
211
|
+
for engine in self._engines.values():
|
|
212
|
+
out.extend(engine.escalations())
|
|
213
|
+
return out
|
|
214
|
+
|
|
215
|
+
def attribution(self) -> dict[str, dict[str, dict[str, Any]]]:
|
|
216
|
+
"""Per-key, per-write attribution: which agent wrote each surviving field."""
|
|
217
|
+
return {
|
|
218
|
+
f"{ns}:{key}": eng.engine.attribution()
|
|
219
|
+
for (ns, key), eng in self._engines.items()
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
__all__ = ["SyncedStore"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Strategies for reconciling concurrent agent writes to shared state.
|
|
2
|
+
|
|
3
|
+
Each module implements :class:`agentsync.models.MergeStrategy` behind the ONE
|
|
4
|
+
common interface so the benchmark harness can swap them without touching
|
|
5
|
+
workload or measurement code.
|
|
6
|
+
|
|
7
|
+
* :mod:`lww` — last-write-wins. The silent-corruption baseline.
|
|
8
|
+
* :mod:`transactional` — LLM-mediated conflict resolution (CoAgent MTPO approx).
|
|
9
|
+
* :mod:`crdt` — Loro / eg-walker deterministic merge.
|
|
10
|
+
|
|
11
|
+
Exported here as factory callables (``make_strategy(name)``) so the harness and
|
|
12
|
+
workloads share a single registry.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from ..models import MergeStrategy
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def make_strategy(name: str) -> MergeStrategy:
|
|
21
|
+
"""Construct a fresh replica for the named strategy.
|
|
22
|
+
|
|
23
|
+
Raising on unknown names (rather than returning None) keeps a typo from
|
|
24
|
+
silently turning the benchmark into a two-strategy comparison.
|
|
25
|
+
"""
|
|
26
|
+
# Imported lazily so that a missing optional dep (e.g. loro) only blows up
|
|
27
|
+
# when its strategy is actually selected, not on package import.
|
|
28
|
+
if name == "lww":
|
|
29
|
+
from .lww import LWWStrategy
|
|
30
|
+
|
|
31
|
+
return LWWStrategy()
|
|
32
|
+
if name == "transactional":
|
|
33
|
+
from .transactional import TransactionalStrategy
|
|
34
|
+
|
|
35
|
+
return TransactionalStrategy()
|
|
36
|
+
if name == "crdt":
|
|
37
|
+
from .crdt import CRDTStrategy
|
|
38
|
+
|
|
39
|
+
return CRDTStrategy()
|
|
40
|
+
raise ValueError(f"unknown strategy: {name!r}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
__all__ = ["make_strategy"]
|