extodan-agentsync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentsync/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """Agent-State Sync Engine — benchmark-first.
2
+
3
+ The thesis under test: for multi-agent MERGEABLE shared state (notes, context,
4
+ structured docs), an event-graph CRDT (eg-walker family, here via Loro) gives
5
+ deterministic coordinator-free convergence with full write attribution at
6
+ ZERO model calls — beating naive last-write-wins (silent corruption) and
7
+ LLM-mediated transactional control (costs inference per conflict).
8
+
9
+ The benchmark is the product. See README for the thesis, build order, and
10
+ results.
11
+ """
12
+
13
+ __version__ = "0.0.1"
14
+
15
+ from .store import SyncedStore
16
+
17
+ __all__ = ["SyncedStore"]
agentsync/__main__.py ADDED
@@ -0,0 +1,55 @@
1
+ """`python -m agentsync` / `make bench` entry point.
2
+
3
+ Runs every available strategy against every available workload, prints the
4
+ comparison table, and exits nonzero if any row FAILED its expectation — so the
5
+ benchmark doubles as a regression gate: a strategy that regresses (e.g. CRDT
6
+ suddenly loses a write, or LWW stops reproducing the known corruption) fails
7
+ the run.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import sys
13
+
14
+ from .harness import run_one
15
+ from .strategies import make_strategy
16
+ from .table import render_json, render_table
17
+ from .workloads import all_workloads
18
+
19
+
20
+ # Which strategies to run. crdt/transactional are added in later build steps;
21
+ # until then the harness runs cleanly with just lww and reports the first
22
+ # number, per the FIRST STEP directive.
23
+ _AVAILABLE_STRATEGIES = ["lww", "transactional", "crdt"]
24
+
25
+
26
+ def main() -> int:
27
+ workloads = all_workloads()
28
+ results = []
29
+ for wl in workloads:
30
+ for strat in _AVAILABLE_STRATEGIES:
31
+ results.append(run_one(strat, wl, make_strategy))
32
+
33
+ print()
34
+ print(render_table(results))
35
+ print()
36
+
37
+ # Interpretation note: a row's FAIL verdict is a property of the STRATEGY,
38
+ # not the harness. LWW is *supposed* to fail these workloads — that failure
39
+ # is the corruption the thesis is built to eliminate. So the only thing
40
+ # that makes the benchmark itself fail is if a strategy we EXPECT to pass
41
+ # (crdt on both; transactional on both) regresses. For step 1, only lww is
42
+ # wired up and its failures are expected, so the run is a success.
43
+ expected_pass = {"crdt", "transactional"}
44
+ regressions = [
45
+ r for r in results if r.strategy in expected_pass and r.verdict == "FAIL"
46
+ ]
47
+ if regressions:
48
+ print(f"⚠ {len(regressions)} expected-pass strategy regressed — see notes.")
49
+ return 1
50
+ print("✓ benchmark complete; LWW failures above are the demonstrated baseline.")
51
+ return 0
52
+
53
+
54
+ if __name__ == "__main__":
55
+ sys.exit(main())
@@ -0,0 +1,232 @@
1
+ """LangGraph demo — the asset to send, not plumbing to polish.
2
+
3
+ The 60-second pitch: watch LWW silently corrupt an agent's shared state, then
4
+ watch CRDT not. The SAME two-agent graph runs twice — once with an LWW store
5
+ backend, once with a CRDT store backend — and the demo prints exactly what each
6
+ agent wrote, what survived the merge, and (for CRDT) what got escalated.
7
+
8
+ Why each agent writes to its OWN replica then syncs (rather than both writing
9
+ one shared dict): that's how real multi-agent shared memory actually works.
10
+ Each subagent edits a local copy of the context and the sync layer reconciles.
11
+ langgraph 1.x raises ``InvalidUpdateError`` if two parallel nodes write the same
12
+ state key with no reducer — so we bypass state for the shared memory itself and
13
+ use the graph purely to model the *concurrency* (two agents acting at once),
14
+ which is the honest mapping.
15
+
16
+ Demo runs two scenarios back to back:
17
+
18
+ * ``clean_merge`` — both agents add distinct findings to shared notes/tags.
19
+ LWW drops one agent's findings; CRDT keeps both. Convergence-without-loss.
20
+ * ``conflict`` — both agents set the same scalar to different values.
21
+ LWW silently picks one; CRDT escalates with both contenders attributed.
22
+
23
+ The point is visual: same agents, same writes, opposite outcomes by backend.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from dataclasses import dataclass
29
+ from typing import Callable
30
+
31
+ from langgraph.graph import END, START, StateGraph
32
+ from typing_extensions import TypedDict
33
+
34
+ from ..models import FieldKind, MergeStrategy, Write
35
+ from ..strategies import make_strategy
36
+
37
+
38
+ class GraphState(TypedDict):
39
+ """Per-agent scratch passed through the graph. The shared memory itself
40
+ lives in the Store (one replica per agent), NOT in graph state — otherwise
41
+ langgraph's reducer guard would intercept the very concurrency we want to
42
+ measure."""
43
+ agent_id: str
44
+
45
+
46
+ @dataclass
47
+ class StoreHandle:
48
+ """One agent's handle onto shared memory: its own replica + an op counter.
49
+
50
+ The agent writes locally (its replica is isolated from the other agent's),
51
+ then the harness syncs replicas. This mirrors each subagent holding a local
52
+ copy of shared context and the sync engine reconciling after.
53
+ """
54
+
55
+ agent_id: str
56
+ replica: MergeStrategy
57
+ _op: int = 0
58
+
59
+ def write(self, field: str, value, kind: FieldKind = FieldKind.scalar) -> None:
60
+ self.replica.apply(
61
+ Write(
62
+ agent_id=self.agent_id,
63
+ op_id=self._op,
64
+ field=field,
65
+ value=value,
66
+ kind=kind,
67
+ )
68
+ )
69
+ self._op += 1
70
+
71
+
72
+ def _agent_node(agent_id: str, field: str, value, kind: FieldKind):
73
+ """Build a node fn that writes one contribution to THIS agent's replica.
74
+
75
+ The agent_id is bound into the closure at construction (not read from graph
76
+ state, which is shared across the whole graph). Returns an empty state
77
+ update because the real payload lives in the replica, not in graph state.
78
+ """
79
+
80
+ def node(state: GraphState) -> dict:
81
+ handle = _HANDLES[agent_id]
82
+ handle.write(field, value, kind)
83
+ return {}
84
+
85
+ return node
86
+
87
+
88
+ # Module-level registry so node closures can find their handle without it being
89
+ # serializable graph state. Reset per-run by _run_scenario.
90
+ _HANDLES: dict[str, StoreHandle] = {}
91
+
92
+
93
+ def _make_handles(strategy_name: str, agent_ids: list[str]) -> None:
94
+ _HANDLES.clear()
95
+ for aid in agent_ids:
96
+ _HANDLES[aid] = StoreHandle(agent_id=aid, replica=make_strategy(strategy_name))
97
+
98
+
99
+ def _run_scenario(strategy_name: str, agent_work: dict[str, list[tuple]]) -> dict:
100
+ """Run one backend through the graph. Returns what each agent wrote and the
101
+ merged result.
102
+
103
+ ``agent_work`` maps agent_id -> list of (field, value, kind) writes. All
104
+ agents run as parallel branches off START (concurrent), then a sync step
105
+ merges every replica into every other (full mesh) and we read out.
106
+ """
107
+ agent_ids = list(agent_work.keys())
108
+ _make_handles(strategy_name, agent_ids)
109
+
110
+ g = StateGraph(GraphState)
111
+ for aid in agent_ids:
112
+ # Each agent becomes its own branch off START. Multiple writes per
113
+ # agent chain into a linear sub-sequence ending at END.
114
+ prev = START
115
+ for i, (field, value, kind) in enumerate(agent_work[aid]):
116
+ node_name = f"{aid}_{i}"
117
+ g.add_node(node_name, _agent_node(aid, field, value, kind))
118
+ g.add_edge(prev, node_name)
119
+ prev = node_name
120
+ g.add_edge(prev, END)
121
+ compiled = g.compile()
122
+
123
+ # Fan out: START feeds every agent's first node. We invoke with each agent
124
+ # id present so nodes can route to their handle.
125
+ compiled.invoke({"agent_id": agent_ids[0]})
126
+
127
+ # Phase 2 — sync. Full-mesh merge of replicas (each imports every other).
128
+ handles = [_HANDLES[aid] for aid in agent_ids]
129
+ for h in handles:
130
+ for other in handles:
131
+ if other is h:
132
+ continue
133
+ h.replica.import_state(other.replica.export_state())
134
+
135
+ final = handles[0].replica.finalized_state()
136
+ escalations = []
137
+ drain = getattr(handles[0].replica, "_escalations", None)
138
+ if drain:
139
+ for esc in drain:
140
+ escalations.append(
141
+ {
142
+ "field": esc.field,
143
+ "contenders": [
144
+ {"agent": c.agent_id, "op": c.op_id, "value": c.value}
145
+ for c in esc.contenders
146
+ ],
147
+ }
148
+ )
149
+ return {"final_state": final, "escalations": escalations, "strategy": strategy_name}
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # The two scenarios. Mirrored from the benchmark workloads so the demo and the
154
+ # table tell the same story.
155
+ # ---------------------------------------------------------------------------
156
+
157
+ _CLEAN_MERGE = {
158
+ "researcher": [
159
+ ("project", "AgentSync", FieldKind.scalar),
160
+ ("findings", "found 3 CRDT papers", FieldKind.append_text),
161
+ ("tags", {"crdt", "agents"}, FieldKind.grow_set),
162
+ ],
163
+ "writer": [
164
+ ("findings", "drafted intro section", FieldKind.append_text),
165
+ ("tags", {"benchmark"}, FieldKind.grow_set),
166
+ ],
167
+ }
168
+
169
+ _CONFLICT = {
170
+ "researcher": [("status", "draft", FieldKind.scalar)],
171
+ "writer": [("status", "published", FieldKind.scalar)],
172
+ }
173
+
174
+
175
+ def _expected_clean() -> set:
176
+ return {"crdt", "agents", "benchmark"}
177
+
178
+
179
+ def main() -> int:
180
+ print()
181
+ print("=" * 72)
182
+ print("AGENTSYNC LANGGRAPH DEMO — same graph, two shared-memory backends")
183
+ print("=" * 72)
184
+
185
+ for label, work, strategy_pair in [
186
+ ("SCENARIO 1 — clean merge (mergeable concurrent writes)", _CLEAN_MERGE, ("lww", "crdt")),
187
+ ("SCENARIO 2 — semantic conflict (same scalar, different values)", _CONFLICT, ("lww", "crdt")),
188
+ ]:
189
+ print()
190
+ print("-" * 72)
191
+ print(label)
192
+ print("-" * 72)
193
+ what = {}
194
+ for aid, writes in work.items():
195
+ what[aid] = [(f, v) for f, v, _ in writes]
196
+ print(f" {aid} writes: {what[aid]}")
197
+
198
+ for strat in strategy_pair:
199
+ res = _run_scenario(strat, work)
200
+ print(f"\n [{strat}] merged state: {res['final_state']}")
201
+ if res["escalations"]:
202
+ print(f" [{strat}] ESCALATED (flagged, not auto-resolved):")
203
+ for esc in res["escalations"]:
204
+ print(f" field={esc['field']!r} contenders={esc['contenders']}")
205
+ else:
206
+ print(f" [{strat}] no escalations")
207
+
208
+ # Verdict line: make the corruption-vs-convergence contrast explicit.
209
+ print()
210
+ print("=" * 72)
211
+ print("VERDICT")
212
+ print("=" * 72)
213
+ lww_clean = _run_scenario("lww", _CLEAN_MERGE)
214
+ crdt_clean = _run_scenario("crdt", _CLEAN_MERGE)
215
+ lww_tags = set(lww_clean["final_state"].get("tags", []))
216
+ crdt_tags = set(crdt_clean["final_state"].get("tags", []))
217
+ expected = _expected_clean()
218
+ print(f" clean merge — expected tags: {sorted(expected)}")
219
+ print(f" LWW kept tags: {sorted(lww_tags)} -> {'OK' if lww_tags == expected else 'LOST ' + str(sorted(expected - lww_tags))}")
220
+ print(f" CRDT kept tags: {sorted(crdt_tags)} -> {'OK' if crdt_tags == expected else 'LOST ' + str(sorted(expected - crdt_tags))}")
221
+
222
+ crdt_conflict = _run_scenario("crdt", _CONFLICT)
223
+ lww_conflict = _run_scenario("lww", _CONFLICT)
224
+ print(f"\n semantic conflict on 'status':")
225
+ print(f" LWW -> status={lww_conflict['final_state'].get('status')!r}, escalated: {bool(lww_conflict['escalations'])} (silently picked)")
226
+ print(f" CRDT -> status={crdt_conflict['final_state'].get('status')!r}, escalated: {bool(crdt_conflict['escalations'])} (flagged for review)")
227
+ print()
228
+ return 0
229
+
230
+
231
+ if __name__ == "__main__":
232
+ raise SystemExit(main())
agentsync/harness.py ADDED
@@ -0,0 +1,252 @@
1
+ """The three-way benchmark harness — the core deliverable.
2
+
3
+ For each workload × strategy:
4
+
5
+ 1. Create one strategy replica per agent in the workload.
6
+ 2. Apply that agent's local writes to its own replica (the "concurrent" phase —
7
+ replicas never see each other here).
8
+ 3. Merge every replica into every other (full-mesh import/export), so each
9
+ replica ends up holding the union. This is the sync phase.
10
+ 4. Measure: convergence (all replicas identical?), writes lost vs. seen,
11
+ attribution completeness, model calls, wall-clock, peak memory.
12
+ 5. Score a PASS/FAIL verdict from the workload's expectation.
13
+
14
+ Strategies are pluggable via :data:`STRATEGIES`; the harness never branches on
15
+ strategy identity, so adding a fourth is a one-line registry change.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import time
21
+ from dataclasses import asdict
22
+ from typing import Callable
23
+
24
+ try:
25
+ import psutil
26
+ _HAS_PSUTIL = True
27
+ _proc = psutil.Process()
28
+ except ImportError: # pragma: no cover - bench extra is optional
29
+ _HAS_PSUTIL = False
30
+ _proc = None # type: ignore[assignment]
31
+
32
+ from .models import EndState, FieldKind, MergeStrategy, RunResult, Workload
33
+ from .strategies import make_strategy
34
+
35
+
36
+ def _peak_mem_kb() -> float:
37
+ if _proc is None:
38
+ return float("nan")
39
+ # rss is resident set size of this process; the harness is single-process,
40
+ # so this captures the strategy's memory footprint directly.
41
+ return _proc.memory_info().rss / 1024.0
42
+
43
+
44
+ def run_one(
45
+ strategy_name: str,
46
+ workload: Workload,
47
+ strategy_factory: Callable[[str], MergeStrategy] = make_strategy,
48
+ ) -> RunResult:
49
+ """Run ``workload`` through one strategy and return a measured result row.
50
+
51
+ Full-mesh merge: every replica imports every other replica's exported
52
+ state. With N replicas this is N*(N-1) imports; for our 2-agent MVP that's
53
+ a single swap, but the loop generalizes to larger agent counts without
54
+ touching measurement logic.
55
+ """
56
+ t0 = time.perf_counter()
57
+ mem_before = _peak_mem_kb()
58
+ notes: list[str] = []
59
+
60
+ replica_ids = list(workload.writes_by_replica.keys())
61
+ replicas: dict[str, MergeStrategy] = {
62
+ rid: strategy_factory(strategy_name) for rid in replica_ids
63
+ }
64
+
65
+ # Phase 1 — local concurrent edits (replicas are isolated).
66
+ for rid in replica_ids:
67
+ for write in workload.writes_by_replica[rid]:
68
+ replicas[rid].apply(write)
69
+
70
+ # Phase 2 — full-mesh sync: each replica pulls every other replica's state.
71
+ for rid in replica_ids:
72
+ for other in replica_ids:
73
+ if other == rid:
74
+ continue
75
+ replicas[rid].import_state(replicas[other].export_state())
76
+
77
+ states = {rid: r.finalized_state() for rid, r in replicas.items()}
78
+ metrics = {rid: r.metrics() for rid, r in replicas.items()}
79
+
80
+ latency_ms = (time.perf_counter() - t0) * 1000.0
81
+ peak_mem_kb = max(_peak_mem_kb(), mem_before)
82
+
83
+ converged = len({str(sorted(s.items())) for s in states.values()}) == 1
84
+
85
+ # --- Correctness scoring against the workload's expectation ---
86
+ sample_metrics = metrics[replica_ids[0]]
87
+ # writes_lost is measured structurally, NOT from per-apply counters: an
88
+ # apply always succeeds locally, so counting seen-applied would be 0 even
89
+ # when a merge later clobbers the write. Instead we ask, for every
90
+ # mergeable field, how many issued writes are reflected in the converged
91
+ # state. The difference is real loss — e.g. LWW keeps one agent's `tags`
92
+ # and silently overwrites the other's.
93
+ converged_state = states[replica_ids[0]]
94
+ writes_lost = _count_lost_writes(converged_state, workload)
95
+ escalations = getattr(sample_metrics, "escalations", 0)
96
+ # Attribution: every surviving write should trace to an agent. We check this
97
+ # structurally — a correct merge keeps one attribution per *write*, LWW
98
+ # keeps one per *field* (overwritten agents vanish).
99
+ attribution_complete = _check_attribution(
100
+ workload, replicas, sample_metrics
101
+ )
102
+
103
+ # Outcome — HOW the strategy reached its end state. Derived, not declared,
104
+ # because corruption is a measurement, not a self-report. Two rules, in
105
+ # priority order:
106
+ # 1. corrupted: a mergeable write that should have survived didn't, OR a
107
+ # real semantic conflict existed and the strategy produced no signal
108
+ # for it (LWW silently picks a winner). Either way intent was lost
109
+ # with no escalation — the baseline failure mode.
110
+ # 2. otherwise: the strategy's declared mode for conflicts it actually
111
+ # handled — resolved (spent a model call) or escalated (flagged).
112
+ # On a clean merge with nothing lost, every strategy is auto_merged.
113
+ had_conflict = not workload.expectation.clean_merge
114
+ silent_on_conflict = had_conflict and escalations == 0
115
+ if writes_lost > 0 or silent_on_conflict:
116
+ outcome = EndState.corrupted
117
+ elif had_conflict:
118
+ outcome = getattr(
119
+ replicas[replica_ids[0]], "conflict_mode", EndState.escalated
120
+ )
121
+ else:
122
+ outcome = EndState.auto_merged
123
+
124
+ verdict, fail_notes = _score(
125
+ workload, converged, writes_lost, escalations, attribution_complete
126
+ )
127
+ notes.extend(fail_notes)
128
+
129
+ return RunResult(
130
+ strategy=strategy_name,
131
+ workload=workload.name,
132
+ converged=converged,
133
+ writes_lost=writes_lost,
134
+ attribution_complete=attribution_complete,
135
+ escalations=escalations,
136
+ model_calls=sample_metrics.model_calls,
137
+ latency_ms=latency_ms,
138
+ peak_mem_kb=peak_mem_kb,
139
+ outcome=outcome,
140
+ verdict=verdict,
141
+ notes=notes,
142
+ )
143
+
144
+
145
+ def _count_lost_writes(converged_state: dict, workload: Workload) -> int:
146
+ """Count writes the strategy dropped, judged structurally from the merged state.
147
+
148
+ For mergeable fields (grow_set / append_text) the correct converged value
149
+ contains EVERY agent's contribution; each one missing is one lost write.
150
+ For scalars there's nothing to lose — only one value can hold, so a
151
+ conflict there is an escalation concern, not a "lost write". This mirrors
152
+ the thesis: mergeable state must not lose writes; semantic state must
153
+ escalate.
154
+ """
155
+ # Bucket writes by field so we know each field's expected contributions.
156
+ by_field: dict[str, list] = {}
157
+ for writes in workload.writes_by_replica.values():
158
+ for w in writes:
159
+ by_field.setdefault(w.field, []).append(w)
160
+
161
+ lost = 0
162
+ for field, writes in by_field.items():
163
+ kind = writes[0].kind
164
+ actual = converged_state.get(field)
165
+ if kind is FieldKind.grow_set:
166
+ expected = set()
167
+ for w in writes:
168
+ expected |= set(w.value)
169
+ actual_set = set(actual) if isinstance(actual, (list, tuple, set)) else set()
170
+ # Each distinct contribution missing from the union is a lost write.
171
+ lost += len(expected - actual_set)
172
+ elif kind is FieldKind.append_text:
173
+ expected_frags = [w.value for w in writes]
174
+ if actual is None:
175
+ lost += len(expected_frags)
176
+ else:
177
+ # Order is unspecified on concurrent append, so each fragment
178
+ # must simply appear somewhere in the merged text.
179
+ lost += sum(1 for frag in expected_frags if frag not in actual)
180
+ # scalar: no writes_lost accounting; conflicts are scored as escalations.
181
+ return lost
182
+
183
+
184
+ def _check_attribution(
185
+ workload: Workload, replicas: dict[str, MergeStrategy], metrics
186
+ ) -> bool:
187
+ """Attribution is complete iff every distinct write that survives to the
188
+ merged state still carries an agent_id.
189
+
190
+ We approximate via the per-write attribution the strategy exposes (if any).
191
+ LWW exposes per-FIELD attribution only, so a field written by two agents
192
+ has one attribution for two writes → incomplete. CRDT exposes per-write.
193
+ """
194
+ # Count how many distinct (agent, op) writes the workload issued.
195
+ distinct_writes = set()
196
+ for writes in workload.writes_by_replica.values():
197
+ for w in writes:
198
+ distinct_writes.add((w.agent_id, w.op_id, w.field))
199
+
200
+ # Strategy-specific attribution introspection.
201
+ surviving = set()
202
+ for r in replicas.values():
203
+ attr_fn = getattr(r, "attribution", None)
204
+ if attr_fn is None:
205
+ continue
206
+ for key, meta in attr_fn().items():
207
+ surviving.add((meta["agent_id"], meta["op_id"], key))
208
+ if not surviving:
209
+ # Strategy exposes no per-write attribution at all — can't be complete.
210
+ return len(distinct_writes) == 0
211
+ return len(surviving) >= len(distinct_writes)
212
+
213
+
214
+ def _score(
215
+ workload: Workload,
216
+ converged: bool,
217
+ writes_lost: int,
218
+ escalations: int,
219
+ attribution_complete: bool,
220
+ ) -> tuple[str, list[str]]:
221
+ """Turn raw metrics into a verdict using the workload's expectation."""
222
+ notes: list[str] = []
223
+ ok = True
224
+
225
+ if not converged:
226
+ ok = False
227
+ notes.append("replicas diverged (no convergence)")
228
+
229
+ exp = workload.expectation
230
+ if exp.all_writes_survive and writes_lost > 0:
231
+ ok = False
232
+ notes.append(f"lost {writes_lost} write(s) on a mergeable workload")
233
+
234
+ if exp.clean_merge and escalations > 0:
235
+ ok = False
236
+ notes.append(f"{escalations} spurious escalation(s) on a clean merge")
237
+
238
+ if exp.semantic_conflict_on and escalations < len(exp.semantic_conflict_on):
239
+ ok = False
240
+ notes.append(
241
+ f"expected escalation on {exp.semantic_conflict_on}, "
242
+ f"got {escalations}"
243
+ )
244
+
245
+ if exp.all_writes_survive and not attribution_complete:
246
+ ok = False
247
+ notes.append("attribution incomplete (a surviving write lost its agent)")
248
+
249
+ return ("PASS" if ok else "FAIL"), notes
250
+
251
+
252
+ __all__ = ["run_one"]