claude_swarm 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-version +1 -1
  3. data/CHANGELOG.md +14 -0
  4. data/README.md +336 -1037
  5. data/docs/V1_TO_V2_MIGRATION_GUIDE.md +1120 -0
  6. data/docs/v1/README.md +1195 -0
  7. data/docs/v2/CHANGELOG.swarm_cli.md +22 -0
  8. data/docs/v2/CHANGELOG.swarm_memory.md +20 -0
  9. data/docs/v2/CHANGELOG.swarm_sdk.md +287 -10
  10. data/docs/v2/README.md +32 -6
  11. data/docs/v2/guides/complete-tutorial.md +133 -37
  12. data/docs/v2/guides/composable-swarms.md +1178 -0
  13. data/docs/v2/guides/getting-started.md +42 -1
  14. data/docs/v2/guides/snapshots.md +1498 -0
  15. data/docs/v2/reference/architecture-flow.md +5 -3
  16. data/docs/v2/reference/event_payload_structures.md +249 -12
  17. data/docs/v2/reference/execution-flow.md +1 -1
  18. data/docs/v2/reference/ruby-dsl.md +368 -22
  19. data/docs/v2/reference/yaml.md +314 -63
  20. data/examples/snapshot_demo.rb +119 -0
  21. data/examples/v2/dsl/01_basic.rb +0 -2
  22. data/examples/v2/dsl/02_core_parameters.rb +0 -2
  23. data/examples/v2/dsl/03_capabilities.rb +0 -2
  24. data/examples/v2/dsl/04_llm_parameters.rb +0 -2
  25. data/examples/v2/dsl/05_advanced_flags.rb +0 -3
  26. data/examples/v2/dsl/06_permissions.rb +0 -4
  27. data/examples/v2/dsl/07_mcp_server.rb +0 -2
  28. data/examples/v2/dsl/08_swarm_hooks.rb +0 -2
  29. data/examples/v2/dsl/09_agent_hooks.rb +0 -2
  30. data/examples/v2/dsl/10_all_agents_hooks.rb +0 -3
  31. data/examples/v2/dsl/11_delegation.rb +0 -2
  32. data/examples/v2/dsl/12_complete_integration.rb +2 -6
  33. data/examples/v2/node_context_demo.rb +1 -1
  34. data/examples/v2/node_workflow.rb +2 -4
  35. data/examples/v2/plan_and_execute.rb +157 -0
  36. data/lib/claude_swarm/configuration.rb +28 -4
  37. data/lib/claude_swarm/version.rb +1 -1
  38. data/lib/swarm_cli/formatters/human_formatter.rb +103 -0
  39. data/lib/swarm_cli/interactive_repl.rb +9 -3
  40. data/lib/swarm_cli/version.rb +1 -1
  41. data/lib/swarm_memory/core/storage_read_tracker.rb +51 -14
  42. data/lib/swarm_memory/integration/cli_registration.rb +3 -2
  43. data/lib/swarm_memory/integration/sdk_plugin.rb +11 -5
  44. data/lib/swarm_memory/tools/memory_edit.rb +2 -2
  45. data/lib/swarm_memory/tools/memory_multi_edit.rb +2 -2
  46. data/lib/swarm_memory/tools/memory_read.rb +3 -3
  47. data/lib/swarm_memory/version.rb +1 -1
  48. data/lib/swarm_memory.rb +5 -0
  49. data/lib/swarm_sdk/agent/builder.rb +33 -0
  50. data/lib/swarm_sdk/agent/chat/context_tracker.rb +33 -0
  51. data/lib/swarm_sdk/agent/chat/hook_integration.rb +49 -3
  52. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +11 -27
  53. data/lib/swarm_sdk/agent/chat.rb +200 -51
  54. data/lib/swarm_sdk/agent/context.rb +6 -2
  55. data/lib/swarm_sdk/agent/context_manager.rb +6 -0
  56. data/lib/swarm_sdk/agent/definition.rb +14 -2
  57. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +180 -0
  58. data/lib/swarm_sdk/configuration.rb +387 -94
  59. data/lib/swarm_sdk/events_to_messages.rb +181 -0
  60. data/lib/swarm_sdk/log_collector.rb +31 -5
  61. data/lib/swarm_sdk/log_stream.rb +37 -8
  62. data/lib/swarm_sdk/model_aliases.json +4 -1
  63. data/lib/swarm_sdk/node/agent_config.rb +33 -8
  64. data/lib/swarm_sdk/node/builder.rb +39 -18
  65. data/lib/swarm_sdk/node_orchestrator.rb +293 -26
  66. data/lib/swarm_sdk/proc_helpers.rb +53 -0
  67. data/lib/swarm_sdk/providers/openai_with_responses.rb +22 -15
  68. data/lib/swarm_sdk/restore_result.rb +65 -0
  69. data/lib/swarm_sdk/snapshot.rb +156 -0
  70. data/lib/swarm_sdk/snapshot_from_events.rb +386 -0
  71. data/lib/swarm_sdk/state_restorer.rb +491 -0
  72. data/lib/swarm_sdk/state_snapshot.rb +369 -0
  73. data/lib/swarm_sdk/swarm/agent_initializer.rb +360 -55
  74. data/lib/swarm_sdk/swarm/all_agents_builder.rb +28 -1
  75. data/lib/swarm_sdk/swarm/builder.rb +208 -12
  76. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +67 -0
  77. data/lib/swarm_sdk/swarm/tool_configurator.rb +46 -11
  78. data/lib/swarm_sdk/swarm.rb +338 -42
  79. data/lib/swarm_sdk/swarm_loader.rb +145 -0
  80. data/lib/swarm_sdk/swarm_registry.rb +136 -0
  81. data/lib/swarm_sdk/tools/delegate.rb +92 -7
  82. data/lib/swarm_sdk/tools/read.rb +17 -5
  83. data/lib/swarm_sdk/tools/stores/read_tracker.rb +47 -12
  84. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +45 -0
  85. data/lib/swarm_sdk/utils.rb +18 -0
  86. data/lib/swarm_sdk/validation_result.rb +33 -0
  87. data/lib/swarm_sdk/version.rb +1 -1
  88. data/lib/swarm_sdk.rb +40 -8
  89. data/swarm_cli.gemspec +1 -1
  90. data/swarm_memory.gemspec +2 -2
  91. data/swarm_sdk.gemspec +2 -2
  92. metadata +21 -13
  93. data/examples/learning-assistant/assistant.md +0 -7
  94. data/examples/learning-assistant/example-memories/concept-example.md +0 -90
  95. data/examples/learning-assistant/example-memories/experience-example.md +0 -66
  96. data/examples/learning-assistant/example-memories/fact-example.md +0 -76
  97. data/examples/learning-assistant/example-memories/memory-index.md +0 -78
  98. data/examples/learning-assistant/example-memories/skill-example.md +0 -168
  99. data/examples/learning-assistant/learning_assistant.rb +0 -34
  100. data/examples/learning-assistant/learning_assistant.yml +0 -20
  101. data/lib/swarm_sdk/mcp.rb +0 -16
  102. data/llm.v2.txt +0 -13407
  103. /data/docs/v2/guides/{MEMORY_DEFRAG_GUIDE.md → memory-defrag-guide.md} +0 -0
  104. /data/{llms.txt → llms.claude-swarm.txt} +0 -0
@@ -0,0 +1,1498 @@
1
+ # Snapshot & Restore Guide
2
+
3
+ Enable multi-turn conversations across process restarts with SwarmSDK's snapshot/restore functionality.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Overview](#overview)
8
+ - [Quick Start](#quick-start)
9
+ - [Core Concepts](#core-concepts)
10
+ - [Reconstructing Snapshots from Events](#reconstructing-snapshots-from-events)
11
+ - [API Reference](#api-reference)
12
+ - [Use Cases](#use-cases)
13
+ - [Advanced Topics](#advanced-topics)
14
+ - [Troubleshooting](#troubleshooting)
15
+
16
+ ## Overview
17
+
18
+ Snapshots capture your swarm's **conversation state** so you can pause work and resume later in a new process. This enables:
19
+
20
+ - **Multi-turn conversations** across process restarts
21
+ - **Session persistence** in web applications
22
+ - **Checkpoint/rollback** during long-running tasks
23
+ - **State inspection** and debugging
24
+
25
+ ### What Gets Snapshotted
26
+
27
+ ✅ **Included in Snapshots:**
28
+ - Agent conversation history (all messages)
29
+ - Agent context state (warnings, compression, TodoWrite tracking)
30
+ - Delegation instance conversations
31
+ - Scratchpad contents (volatile shared storage)
32
+ - Read tracking with content verification (prevents stale edits)
33
+ - Memory read tracking with content verification
34
+
35
+ ❌ **NOT Included (stays in your config):**
36
+ - Agent definitions (model, tools, prompts)
37
+ - MCP server configurations
38
+ - SwarmMemory persistent storage (stays on disk)
39
+ - Hook configurations
40
+
41
+ ## Quick Start
42
+
43
+ ### Basic Workflow
44
+
45
+ ```ruby
46
+ # 1. Create swarm and do work
47
+ swarm = SwarmSDK.build do
48
+ name "Dev Team"
49
+ lead :backend
50
+
51
+ agent :backend do
52
+ model "claude-sonnet-4"
53
+ system_prompt "You build robust APIs"
54
+ tools :Read, :Write, :Bash
55
+ end
56
+ end
57
+
58
+ result = swarm.execute("Build authentication system")
59
+
60
+ # 2. Create snapshot
61
+ snapshot = swarm.snapshot
62
+
63
+ # 3. Save to file
64
+ snapshot.write_to_file("session.json")
65
+
66
+ # === Process ends, new process starts ===
67
+
68
+ # 4. Load snapshot
69
+ snapshot = SwarmSDK::Snapshot.from_file("session.json")
70
+
71
+ # 5. Recreate swarm (SAME config as before)
72
+ swarm = SwarmSDK.build do
73
+ name "Dev Team"
74
+ lead :backend
75
+
76
+ agent :backend do
77
+ model "claude-sonnet-4"
78
+ system_prompt "You build robust APIs"
79
+ tools :Read, :Write, :Bash
80
+ end
81
+ end
82
+
83
+ # 6. Restore state
84
+ result = swarm.restore(snapshot)
85
+
86
+ if result.success?
87
+ puts "✅ All agents restored"
88
+ else
89
+ puts "⚠️ #{result.summary}"
90
+ end
91
+
92
+ # 7. Continue working with full context
93
+ swarm.execute("Add password reset functionality")
94
+ ```
95
+
96
+ ## Core Concepts
97
+
98
+ ### Snapshot Object
99
+
100
+ The `Snapshot` class encapsulates snapshot data with convenient methods:
101
+
102
+ ```ruby
103
+ snapshot = swarm.snapshot
104
+
105
+ # Convert to different formats
106
+ hash = snapshot.to_hash
107
+ json_string = snapshot.to_json
108
+ pretty_json = snapshot.to_json(pretty: true)
109
+
110
+ # Save to file
111
+ snapshot.write_to_file("session.json")
112
+ snapshot.write_to_file("session.json", pretty: false)
113
+
114
+ # Access metadata
115
+ snapshot.version # => "1.0.0"
116
+ snapshot.type # => "swarm" or "node_orchestrator"
117
+ snapshot.snapshot_at # => "2025-01-03T14:30:00Z"
118
+ snapshot.swarm_sdk_version # => "2.1.3"
119
+ snapshot.agent_names # => ["backend", "database"]
120
+ snapshot.delegation_instance_names # => ["database@backend"]
121
+
122
+ # Type checks
123
+ snapshot.swarm? # => true
124
+ snapshot.node_orchestrator? # => false
125
+ ```
126
+
127
+ ### Loading Snapshots
128
+
129
+ Three factory methods for different sources:
130
+
131
+ ```ruby
132
+ # From file (most common)
133
+ snapshot = SwarmSDK::Snapshot.from_file("session.json")
134
+
135
+ # From JSON string
136
+ json_string = redis.get("session:#{user_id}")
137
+ snapshot = SwarmSDK::Snapshot.from_json(json_string)
138
+
139
+ # From hash
140
+ hash = { version: "1.0.0", type: "swarm", ... }
141
+ snapshot = SwarmSDK::Snapshot.from_hash(hash)
142
+ ```
143
+
144
+ ### Configuration vs State
145
+
146
+ **Critical Concept**: Snapshots separate configuration from state.
147
+
148
+ ```ruby
149
+ # Configuration (YAML/DSL) = What agents can do
150
+ swarm = SwarmSDK.build do
151
+ agent :backend do
152
+ model "claude-sonnet-4" # Config
153
+ tools :Read, :Write, :Bash # Config
154
+ system_prompt "You build..." # Config
155
+ end
156
+ end
157
+
158
+ # Snapshot = What agents remember
159
+ snapshot = swarm.snapshot # Only conversation history
160
+
161
+ # You MUST recreate with same config when restoring
162
+ swarm2 = SwarmSDK.build do
163
+ agent :backend do
164
+ model "claude-sonnet-4" # Must match original
165
+ tools :Read, :Write, :Bash # Must match original
166
+ system_prompt "You build..." # Must match original
167
+ end
168
+ end
169
+
170
+ swarm2.restore(snapshot) # Restores conversation only
171
+ ```
172
+
173
+ ### System Prompt Handling
174
+
175
+ **By default, system prompts come from your current configuration**, not from the snapshot. This means you can update system prompts without creating new sessions.
176
+
177
+ ```ruby
178
+ # Original execution with system prompt A
179
+ swarm = SwarmSDK.build do
180
+ agent :backend do
181
+ system_prompt "You build robust APIs" # System prompt A
182
+ end
183
+ end
184
+ result = swarm.execute("Build auth")
185
+ snapshot = swarm.snapshot
186
+
187
+ # === Update system prompt in config ===
188
+
189
+ # Restoration with system prompt B (default behavior)
190
+ swarm = SwarmSDK.build do
191
+ agent :backend do
192
+ system_prompt "You build secure APIs with comprehensive logging" # System prompt B (NEW!)
193
+ end
194
+ end
195
+ swarm.restore(snapshot) # Uses NEW system prompt B with OLD conversation
196
+ swarm.execute("Add password reset") # Continues with updated prompt
197
+ ```
198
+
199
+ **Why This Design?**
200
+
201
+ System prompts define agent **behavior** and should come from configuration (your source of truth). Snapshots preserve **conversation history** (what happened), not configuration (how agents should behave).
202
+
203
+ This enables:
204
+ - ✅ Iterate on system prompts without losing conversation history
205
+ - ✅ A/B test different prompts on same conversation
206
+ - ✅ Update prompts across all sessions instantly
207
+ - ✅ Configuration stays in version control, conversations in storage
208
+
209
+ **System Prompts Include:**
210
+ - YAML `system_prompt` field
211
+ - SDK-injected defaults (environment, date, etc.)
212
+ - Plugin injections (SwarmMemory instructions, etc.)
213
+
214
+ When you restore, **all these injections apply**, giving you the complete current system prompt.
215
+
216
+ #### Historical System Prompts (Advanced)
217
+
218
+ For debugging, auditing, or exact reproducibility, use `preserve_system_prompts: true`:
219
+
220
+ ```ruby
221
+ # Use historical system prompts from snapshot
222
+ swarm.restore(snapshot, preserve_system_prompts: true)
223
+ ```
224
+
225
+ **Use Cases:**
226
+ - **Debugging**: "What system prompt was active when this bug occurred?"
227
+ - **Audit Trail**: "What instructions was the agent following at that time?"
228
+ - **Reproducibility**: "Run this exact scenario again with historical context"
229
+ - **Time-Travel Debugging**: "Replay with exact state from past execution"
230
+
231
+ **Example: Debugging with Historical Prompts**
232
+
233
+ ```ruby
234
+ # Load snapshot from bug report
235
+ snapshot = SwarmSDK::Snapshot.from_file("bug_session.json")
236
+
237
+ # Restore with EXACT prompts that were active during bug
238
+ swarm = SwarmSDK.build { ... }
239
+ swarm.restore(snapshot, preserve_system_prompts: true)
240
+
241
+ # Now you can reproduce the exact behavior
242
+ result = swarm.execute("Trigger the bug scenario")
243
+ ```
244
+
245
+ **Default vs Preserve Comparison:**
246
+
247
+ | Aspect | Default (`false`) | Preserve (`true`) |
248
+ |--------|------------------|-------------------|
249
+ | System Prompt Source | Current YAML config | Historical snapshot |
250
+ | SDK Injections | Current defaults | Historical defaults |
251
+ | Plugin Injections | Current plugins | Historical plugins |
252
+ | Use Case | Production, iteration | Debugging, audit |
253
+ | Config Changes | Apply immediately | Ignored |
254
+
255
+ ## Reconstructing Snapshots from Events
256
+
257
+ SwarmSDK can reconstruct complete snapshots from event logs, enabling event sourcing and session persistence without explicit snapshot storage.
258
+
259
+ ### Why Reconstruct from Events?
260
+
261
+ **Benefits:**
262
+ - ✅ **Single source of truth** - Events are the authoritative record
263
+ - ✅ **Complete audit trail** - Every state change is logged
264
+ - ✅ **Time travel** - Reconstruct state at any point in time
265
+ - ✅ **Event sourcing** - Store only events, derive snapshots on-demand
266
+ - ✅ **Smaller storage** - Events are append-only and compressible
267
+
268
+ **Use Cases:**
269
+ - Session persistence in databases
270
+ - Multi-process session sharing
271
+ - Debugging and replay
272
+ - Compliance and audit trails
273
+
274
+ ### Basic Usage
275
+
276
+ ```ruby
277
+ # 1. Collect events during execution
278
+ events = []
279
+ swarm = SwarmSDK::Swarm.from_config("swarm.yml")
280
+
281
+ result = swarm.execute("Build feature") do |event|
282
+ events << event
283
+ end
284
+
285
+ # 2. Save events to storage (DB, file, Redis, etc.)
286
+ File.write("session_events.json", JSON.generate(events))
287
+
288
+ # === Later, even in a different process ===
289
+
290
+ # 3. Load events
291
+ events = JSON.parse(File.read("session_events.json"), symbolize_names: true)
292
+
293
+ # 4. Reconstruct snapshot from events
294
+ snapshot = SwarmSDK::SnapshotFromEvents.reconstruct(events)
295
+
296
+ # 5. Restore swarm from reconstructed snapshot
297
+ swarm = SwarmSDK::Swarm.from_config("swarm.yml")
298
+ swarm.restore(snapshot)
299
+
300
+ # 6. Continue with full context
301
+ swarm.execute("Continue feature development")
302
+ ```
303
+
304
+ ### What Gets Reconstructed
305
+
306
+ SnapshotFromEvents reconstructs **100% of swarm state**:
307
+
308
+ | Component | Source |
309
+ |-----------|--------|
310
+ | Swarm metadata | swarm_id, parent_swarm_id from all events |
311
+ | Agent conversations | user_prompt, agent_step, agent_stop, tool_result events |
312
+ | Delegation instances | Same events, agent name contains `@` |
313
+ | Context warnings | context_threshold_hit events |
314
+ | Compression state | compression_completed events |
315
+ | TodoWrite tracking | TodoWrite tool_call events |
316
+ | Active skills | LoadSkill tool_call arguments |
317
+ | Scratchpad contents | ScratchpadWrite tool_call arguments |
318
+ | Read tracking | Read tool_result metadata.read_digest |
319
+ | Memory tracking | MemoryRead tool_result metadata.read_digest |
320
+
321
+ ### Event Requirements
322
+
323
+ Events must have:
324
+ - `:timestamp` - ISO 8601 format (auto-added by SwarmSDK)
325
+ - `:agent` - Agent identifier
326
+ - `:type` - Event type (user_prompt, agent_step, tool_call, etc.)
327
+
328
+ All SwarmSDK events automatically include these fields.
329
+
330
+ ### Database Storage Pattern
331
+
332
+ ```ruby
333
+ # ActiveRecord model
334
+ class SwarmEvent < ApplicationRecord
335
+ # Schema: session_id:string, event_data:jsonb, timestamp:datetime
336
+
337
+ scope :for_session, ->(session_id) { where(session_id: session_id).order(:timestamp) }
338
+ end
339
+
340
+ class SwarmSession
341
+ def initialize(session_id)
342
+ @session_id = session_id
343
+ end
344
+
345
+ # Execute and store events
346
+ def execute(prompt, swarm_config)
347
+ swarm = SwarmSDK::Swarm.from_config(swarm_config)
348
+
349
+ # Restore from previous events if any exist
350
+ previous_events = SwarmEvent.for_session(@session_id).pluck(:event_data)
351
+ if previous_events.any?
352
+ snapshot = SwarmSDK::SnapshotFromEvents.reconstruct(previous_events)
353
+ swarm.restore(snapshot)
354
+ end
355
+
356
+ # Execute with event collection
357
+ result = swarm.execute(prompt) do |event|
358
+ SwarmEvent.create!(
359
+ session_id: @session_id,
360
+ event_data: event,
361
+ timestamp: Time.parse(event[:timestamp])
362
+ )
363
+ end
364
+
365
+ result
366
+ end
367
+ end
368
+
369
+ # Usage
370
+ session = SwarmSession.new("user_123_session_456")
371
+ result = session.execute("Build authentication", "swarm.yml")
372
+ # All events saved to database, can reconstruct later
373
+ ```
374
+
375
+ ### Redis Streaming Pattern
376
+
377
+ ```ruby
378
+ class RedisEventSession
379
+ def initialize(redis, session_id)
380
+ @redis = redis
381
+ @session_id = session_id
382
+ @events_key = "session:#{session_id}:events"
383
+ end
384
+
385
+ def execute(prompt, swarm_config)
386
+ swarm = SwarmSDK::Swarm.from_config(swarm_config)
387
+
388
+ # Restore from events
389
+ restore_from_events(swarm)
390
+
391
+ # Execute and stream events
392
+ result = swarm.execute(prompt) do |event|
393
+ @redis.rpush(@events_key, JSON.generate(event))
394
+ @redis.expire(@events_key, 3600) # 1 hour TTL
395
+ end
396
+
397
+ result
398
+ end
399
+
400
+ def restore_from_events(swarm)
401
+ events_json = @redis.lrange(@events_key, 0, -1)
402
+ return if events_json.empty?
403
+
404
+ events = events_json.map { |json| JSON.parse(json, symbolize_names: true) }
405
+ snapshot = SwarmSDK::SnapshotFromEvents.reconstruct(events)
406
+ swarm.restore(snapshot)
407
+ end
408
+ end
409
+
410
+ # Usage
411
+ session = RedisEventSession.new(redis, "session_123")
412
+ result = session.execute("Build feature", "swarm.yml")
413
+ ```
414
+
415
+ ### Time Travel Debugging
416
+
417
+ ```ruby
418
+ # Reconstruct state at any point in time
419
+ def reconstruct_at_time(events, timestamp)
420
+ # Filter events up to specific timestamp
421
+ events_until = events.select do |e|
422
+ Time.parse(e[:timestamp]) <= Time.parse(timestamp)
423
+ end
424
+
425
+ # Reconstruct snapshot
426
+ SwarmSDK::SnapshotFromEvents.reconstruct(events_until)
427
+ end
428
+
429
+ # Usage
430
+ all_events = load_all_events(session_id)
431
+
432
+ # See state at 3:00 PM
433
+ snapshot_3pm = reconstruct_at_time(all_events, "2025-11-04T15:00:00Z")
434
+ swarm = SwarmSDK::Swarm.from_config("swarm.yml")
435
+ swarm.restore(snapshot_3pm)
436
+ # Swarm state is exactly as it was at 3:00 PM
437
+
438
+ # See state at 4:00 PM
439
+ snapshot_4pm = reconstruct_at_time(all_events, "2025-11-04T16:00:00Z")
440
+
441
+ # Compare states
442
+ puts "Scratchpad changes:"
443
+ diff_scratchpad(snapshot_3pm[:scratchpad], snapshot_4pm[:scratchpad])
444
+ ```
445
+
446
+ ### Event Sourcing Architecture
447
+
448
+ ```ruby
449
+ class EventSourcedSwarmSession
450
+ def initialize(event_store, session_id)
451
+ @event_store = event_store
452
+ @session_id = session_id
453
+ end
454
+
455
+ # Execute and append events
456
+ def execute(prompt, swarm_config)
457
+ swarm = build_swarm(swarm_config)
458
+
459
+ # Restore from all previous events
460
+ restore_from_events(swarm)
461
+
462
+ # Execute and store events
463
+ result = swarm.execute(prompt) do |event|
464
+ @event_store.append(@session_id, event)
465
+ end
466
+
467
+ result
468
+ end
469
+
470
+ # Reconstruct current state from events
471
+ def current_snapshot
472
+ events = @event_store.get_all(@session_id)
473
+ SwarmSDK::SnapshotFromEvents.reconstruct(events)
474
+ end
475
+
476
+ private
477
+
478
+ def restore_from_events(swarm)
479
+ events = @event_store.get_all(@session_id)
480
+ return if events.empty?
481
+
482
+ snapshot = SwarmSDK::SnapshotFromEvents.reconstruct(events)
483
+ swarm.restore(snapshot)
484
+ end
485
+
486
+ def build_swarm(config)
487
+ SwarmSDK::Swarm.from_config(config)
488
+ end
489
+ end
490
+
491
+ # Event store implementation
492
+ class PostgresEventStore
493
+ def append(session_id, event)
494
+ DB[:swarm_events].insert(
495
+ session_id: session_id,
496
+ event_type: event[:type],
497
+ event_data: Sequel.pg_jsonb(event),
498
+ timestamp: Time.parse(event[:timestamp])
499
+ )
500
+ end
501
+
502
+ def get_all(session_id)
503
+ DB[:swarm_events]
504
+ .where(session_id: session_id)
505
+ .order(:timestamp)
506
+ .select_map(:event_data)
507
+ end
508
+ end
509
+ ```
510
+
511
+ ### Hybrid Approach: Snapshots + Delta Events
512
+
513
+ For optimal performance with large sessions:
514
+
515
+ ```ruby
516
+ class HybridSessionStorage
517
+ def initialize(storage)
518
+ @storage = storage
519
+ end
520
+
521
+ def save_session(session_id, swarm, events)
522
+ # Save periodic snapshot every 100 events
523
+ if events.size % 100 == 0
524
+ snapshot = swarm.snapshot
525
+ @storage.save_snapshot(session_id, snapshot)
526
+ @storage.clear_old_events(session_id) # Keep only delta
527
+ end
528
+
529
+ # Always save events
530
+ @storage.save_events(session_id, events)
531
+ end
532
+
533
+ def restore_session(session_id, swarm)
534
+ # Get last snapshot
535
+ snapshot = @storage.load_snapshot(session_id)
536
+
537
+ # Get delta events since snapshot
538
+ delta_events = @storage.load_events_after(session_id, snapshot&.snapshot_at)
539
+
540
+ if snapshot
541
+ # Restore from snapshot
542
+ swarm.restore(snapshot)
543
+
544
+ # Apply delta events if any
545
+ if delta_events.any?
546
+ delta_snapshot = SwarmSDK::SnapshotFromEvents.reconstruct(delta_events)
547
+ swarm.restore(delta_snapshot)
548
+ end
549
+ elsif delta_events.any?
550
+ # No snapshot, reconstruct from all events
551
+ full_snapshot = SwarmSDK::SnapshotFromEvents.reconstruct(delta_events)
552
+ swarm.restore(full_snapshot)
553
+ end
554
+ end
555
+ end
556
+ ```
557
+
558
+ ### Performance Considerations
559
+
560
+ **Reconstruction Time:**
561
+ - 1,000 events: ~10-20ms
562
+ - 10,000 events: ~100-200ms
563
+ - 100,000 events: ~1-2 seconds
564
+
565
+ **Optimization Strategies:**
566
+ 1. Periodic snapshots (every N events)
567
+ 2. Event compaction (merge old events into snapshot)
568
+ 3. Parallel processing for multiple agents
569
+ 4. Index events by agent and type in database
570
+
571
+ **Storage Size:**
572
+ - Average event: ~500 bytes
573
+ - Average snapshot: ~10-50KB
574
+ - 1000 events ≈ 500KB vs 1 snapshot ≈ 30KB
575
+
576
+ ### RestoreResult
577
+
578
+ The `restore()` method returns a `RestoreResult` with information about the restoration:
579
+
580
+ ```ruby
581
+ result = swarm.restore(snapshot)
582
+
583
+ # Check if fully successful
584
+ if result.success?
585
+ puts "All agents restored successfully"
586
+ end
587
+
588
+ # Check if partial restore (some agents skipped)
589
+ if result.partial_restore?
590
+ puts result.summary
591
+ # => "Snapshot restored with warnings. 1 agents skipped, 0 delegation instances skipped."
592
+
593
+ result.warnings.each do |warning|
594
+ puts "⚠️ #{warning[:message]}"
595
+ end
596
+
597
+ puts "Skipped agents: #{result.skipped_agents.join(', ')}"
598
+ end
599
+ ```
600
+
601
+ ## API Reference
602
+
603
+ ### Swarm Methods
604
+
605
+ ```ruby
606
+ # Create snapshot
607
+ snapshot = swarm.snapshot
608
+ # => SwarmSDK::Snapshot
609
+
610
+ # Restore from snapshot
611
+ result = swarm.restore(snapshot)
612
+ # => SwarmSDK::RestoreResult
613
+
614
+ # Also accepts hash or JSON string (backward compatible)
615
+ result = swarm.restore(hash)
616
+ result = swarm.restore(json_string)
617
+ ```
618
+
619
+ ### NodeOrchestrator Methods
620
+
621
+ Same API as Swarm:
622
+
623
+ ```ruby
624
+ snapshot = orchestrator.snapshot
625
+ result = orchestrator.restore(snapshot)
626
+ ```
627
+
628
+ ### Snapshot Class
629
+
630
+ ```ruby
631
+ # Instance methods
632
+ snapshot.to_hash # => Hash
633
+ snapshot.to_json(pretty: true) # => String (JSON)
634
+ snapshot.write_to_file(path, pretty: true) # => void
635
+
636
+ # Class methods (factory)
637
+ Snapshot.from_file(path) # => Snapshot
638
+ Snapshot.from_json(json_string) # => Snapshot
639
+ Snapshot.from_hash(hash) # => Snapshot
640
+
641
+ # Metadata accessors
642
+ snapshot.version # => "1.0.0"
643
+ snapshot.type # => "swarm" | "node_orchestrator"
644
+ snapshot.snapshot_at # => "2025-01-03T14:30:00Z"
645
+ snapshot.swarm_sdk_version # => "2.1.3"
646
+ snapshot.agent_names # => ["agent1", "agent2"]
647
+ snapshot.delegation_instance_names # => ["agent2@agent1"]
648
+
649
+ # Type checks
650
+ snapshot.swarm? # => true | false
651
+ snapshot.node_orchestrator? # => true | false
652
+ ```
653
+
654
+ ### SnapshotFromEvents Class
655
+
656
+ ```ruby
657
+ # Reconstruct snapshot from event stream
658
+ snapshot_hash = SwarmSDK::SnapshotFromEvents.reconstruct(events)
659
+ # => Hash (compatible with StateRestorer)
660
+
661
+ # Use reconstructed snapshot
662
+ swarm.restore(snapshot_hash)
663
+
664
+ # Or wrap in Snapshot object
665
+ snapshot = SwarmSDK::Snapshot.from_hash(snapshot_hash)
666
+ snapshot.write_to_file("reconstructed.json")
667
+ ```
668
+
669
+ **Parameters:**
670
+ - `events` - Array of event hashes with timestamps
671
+
672
+ **Returns:** Hash in StateSnapshot format
673
+
674
+ **Requirements:**
675
+ - Events must have `:timestamp`, `:agent`, `:type` fields
676
+ - Events are automatically sorted by timestamp
677
+
678
+ ### RestoreResult Class
679
+
680
+ ```ruby
681
+ # Status checks
682
+ result.success? # => Boolean
683
+ result.partial_restore? # => Boolean
684
+ result.summary # => String
685
+
686
+ # Details
687
+ result.warnings # => Array<Hash>
688
+ result.skipped_agents # => Array<Symbol>
689
+ result.skipped_delegations # => Array<String>
690
+ ```
691
+
692
+ ## Use Cases
693
+
694
+ ### Web Applications
695
+
696
+ ```ruby
697
+ # Rails controller
698
+ class SwarmSessionsController < ApplicationController
699
+ def create
700
+ swarm = build_swarm_from_config
701
+ result = swarm.execute(params[:prompt])
702
+
703
+ # Save snapshot to session
704
+ snapshot = swarm.snapshot
705
+ session[:swarm_snapshot] = snapshot.to_hash
706
+
707
+ render json: { result: result.content }
708
+ end
709
+
710
+ def continue
711
+ # Restore from session
712
+ swarm = build_swarm_from_config
713
+ snapshot = SwarmSDK::Snapshot.from_hash(session[:swarm_snapshot])
714
+ swarm.restore(snapshot)
715
+
716
+ # Continue conversation
717
+ result = swarm.execute(params[:prompt])
718
+
719
+ # Update snapshot
720
+ snapshot = swarm.snapshot
721
+ session[:swarm_snapshot] = snapshot.to_hash
722
+
723
+ render json: { result: result.content }
724
+ end
725
+ end
726
+ ```
727
+
728
+ ### Redis Storage
729
+
730
+ ```ruby
731
+ class SwarmSessionManager
732
+ def initialize(redis_client)
733
+ @redis = redis_client
734
+ end
735
+
736
+ def save_session(user_id, swarm)
737
+ snapshot = swarm.snapshot
738
+ @redis.set("swarm:#{user_id}", snapshot.to_json(pretty: false))
739
+ @redis.expire("swarm:#{user_id}", 3600) # 1 hour TTL
740
+ end
741
+
742
+ def load_session(user_id, swarm)
743
+ json_data = @redis.get("swarm:#{user_id}")
744
+ return nil unless json_data
745
+
746
+ snapshot = SwarmSDK::Snapshot.from_json(json_data)
747
+ swarm.restore(snapshot)
748
+ end
749
+ end
750
+ ```
751
+
752
+ ### Database Storage
753
+
754
+ ```ruby
755
+ # ActiveRecord model
756
+ class SwarmSession < ApplicationRecord
757
+ # Schema: user_id:integer, snapshot_data:jsonb, created_at:datetime
758
+
759
+ def save_snapshot(swarm)
760
+ snapshot = swarm.snapshot
761
+ update!(snapshot_data: snapshot.to_hash)
762
+ end
763
+
764
+ def restore_to(swarm)
765
+ snapshot = SwarmSDK::Snapshot.from_hash(snapshot_data)
766
+ swarm.restore(snapshot)
767
+ end
768
+ end
769
+
770
+ # Usage
771
+ session = SwarmSession.find_by(user_id: current_user.id)
772
+ swarm = build_swarm_from_config
773
+ session.restore_to(swarm)
774
+ ```
775
+
776
+ ### Checkpoint/Rollback
777
+
778
+ ```ruby
779
+ # Save checkpoints during long-running tasks
780
+ checkpoints = []
781
+
782
+ swarm = SwarmSDK.build { ... }
783
+
784
+ # Phase 1
785
+ result = swarm.execute("Design database schema")
786
+ checkpoints << swarm.snapshot
787
+ checkpoints.last.write_to_file("checkpoint_1.json")
788
+
789
+ # Phase 2
790
+ result = swarm.execute("Implement API endpoints")
791
+ checkpoints << swarm.snapshot
792
+ checkpoints.last.write_to_file("checkpoint_2.json")
793
+
794
+ # Something went wrong, rollback to checkpoint 1
795
+ snapshot = SwarmSDK::Snapshot.from_file("checkpoint_1.json")
796
+ swarm.restore(snapshot)
797
+
798
+ # Retry phase 2 with different approach
799
+ result = swarm.execute("Implement API endpoints using different pattern")
800
+ ```
801
+
802
+ ### NodeOrchestrator Workflows
803
+
804
+ ```ruby
805
+ orchestrator = SwarmSDK::NodeOrchestrator.new(
806
+ swarm_name: "Dev Workflow",
807
+ agent_definitions: { planner: planner_def, coder: coder_def },
808
+ nodes: { planning: planning_node, coding: coding_node },
809
+ start_node: :planning
810
+ )
811
+
812
+ # Execute workflow
813
+ result = orchestrator.execute("Build user registration")
814
+
815
+ # Save snapshot
816
+ snapshot = orchestrator.snapshot
817
+ snapshot.write_to_file("workflow_session.json")
818
+
819
+ # === Later, new process ===
820
+
821
+ # Restore and continue
822
+ orchestrator = SwarmSDK::NodeOrchestrator.new(...) # Same config
823
+ snapshot = SwarmSDK::Snapshot.from_file("workflow_session.json")
824
+ orchestrator.restore(snapshot)
825
+
826
+ # Continue workflow
827
+ result = orchestrator.execute("Add email verification")
828
+ ```
829
+
830
+ ## Advanced Topics
831
+
832
+ ### Handling Configuration Mismatches
833
+
834
+ When your swarm config changes (agent renamed/removed), restore handles it gracefully:
835
+
836
+ ```ruby
837
+ # Original config had agents: backend, frontend, database
838
+ # New config only has: backend, frontend
839
+
840
+ snapshot = SwarmSDK::Snapshot.from_file("old_session.json")
841
+ result = swarm.restore(snapshot)
842
+
843
+ if result.partial_restore?
844
+ puts result.summary
845
+ # => "Snapshot restored with warnings. 1 agents skipped, 1 delegation instances skipped."
846
+
847
+ result.warnings.each do |warning|
848
+ case warning[:type]
849
+ when :agent_not_found
850
+ puts "⚠️ Agent '#{warning[:agent]}' no longer exists"
851
+ puts " #{warning[:message]}"
852
+ when :delegation_instance_not_restorable
853
+ puts "⚠️ Delegation '#{warning[:instance]}' can't be restored"
854
+ puts " #{warning[:message]}"
855
+ end
856
+ end
857
+
858
+ # Decide whether to proceed
859
+ if result.skipped_agents.size > 2
860
+ puts "Too many agents missing, aborting"
861
+ exit 1
862
+ end
863
+ end
864
+
865
+ # Continue with partial state
866
+ swarm.execute("Continue work with available agents")
867
+ ```
868
+
869
+ ### Multiple Storage Backends
870
+
871
+ ```ruby
872
+ # File storage
873
+ snapshot.write_to_file("snapshots/session_123.json")
874
+
875
+ # Redis storage
876
+ redis.set("snapshot:123", snapshot.to_json(pretty: false))
877
+
878
+ # PostgreSQL storage
879
+ DB[:snapshots].insert(
880
+ session_id: 123,
881
+ data: Sequel.pg_jsonb(snapshot.to_hash)
882
+ )
883
+
884
+ # S3 storage
885
+ s3.put_object(
886
+ bucket: "my-snapshots",
887
+ key: "session_123.json",
888
+ body: snapshot.to_json
889
+ )
890
+ ```
891
+
892
+ ### Snapshot Inspection
893
+
894
+ ```ruby
895
+ snapshot = SwarmSDK::Snapshot.from_file("session.json")
896
+
897
+ # Check what's in the snapshot
898
+ puts "Type: #{snapshot.type}"
899
+ puts "Created: #{snapshot.snapshot_at}"
900
+ puts "Agents: #{snapshot.agent_names.join(', ')}"
901
+ puts "Delegations: #{snapshot.delegation_instance_names.join(', ')}"
902
+
903
+ # Inspect raw data
904
+ data = snapshot.to_hash
905
+ puts "Message count: #{data[:agents][:backend][:conversation].size}"
906
+ puts "Scratchpad entries: #{data[:scratchpad].keys.size}"
907
+
908
+ # Verify compatibility before restoring
909
+ unless snapshot.agent_names.all? { |name| swarm.agent_names.include?(name.to_sym) }
910
+ puts "⚠️ Snapshot contains agents not in current config"
911
+ end
912
+ ```
913
+
914
+ ### Atomic File Writes
915
+
916
+ Snapshots use atomic writes to prevent corruption:
917
+
918
+ ```ruby
919
+ # Write to temp file, then rename
920
+ # Pattern: session.json.tmp.PID.TIMESTAMP.RANDOM
921
+ snapshot.write_to_file("session.json")
922
+
923
+ # Even if process crashes during write:
924
+ # - session.json is never corrupted (atomic rename)
925
+ # - Temp file is cleaned up on next write
926
+ # - Multiple processes can write different files safely
927
+ ```
928
+
929
+ ### Content Digest Verification
930
+
931
+ Snapshots include SHA256 digests of all read files and memory entries:
932
+
933
+ ```ruby
934
+ # Agent reads file
935
+ swarm.execute("Read config.yml and update database settings")
936
+
937
+ # Snapshot includes digest
938
+ snapshot = swarm.snapshot
939
+ hash = snapshot.to_hash
940
+ hash[:read_tracking][:backend]["config.yml"]
941
+ # => "a1b2c3d4e5f67890abcdef..." (SHA256 digest)
942
+
943
+ # File externally modified
944
+ File.write("config.yml", "completely different content")
945
+
946
+ # Restore in new process
947
+ swarm2 = SwarmSDK.build { ... }
948
+ swarm2.restore(snapshot)
949
+
950
+ # Agent tries to edit without re-reading
951
+ swarm2.execute("Edit config.yml to change port")
952
+ # => Agent must re-read file first (digest doesn't match)
953
+ # => Prevents editing based on stale content from LLM memory
954
+ ```
955
+
956
+ ## API Reference
957
+
958
+ ### Swarm#snapshot
959
+
960
+ Create a snapshot of current conversation state.
961
+
962
+ ```ruby
963
+ snapshot = swarm.snapshot
964
+ # => SwarmSDK::Snapshot
965
+ ```
966
+
967
+ **Returns**: `Snapshot` object
968
+
969
+ **Captures**:
970
+ - All agent conversations
971
+ - Agent context state
972
+ - Delegation conversations
973
+ - Scratchpad contents
974
+ - Read tracking with digests
975
+ - Memory read tracking with digests
976
+
977
+ ### Swarm#restore
978
+
979
+ Restore conversation state from snapshot.
980
+
981
+ ```ruby
982
+ # Default: use current system prompts from config
983
+ result = swarm.restore(snapshot)
984
+ # => SwarmSDK::RestoreResult
985
+
986
+ # Advanced: use historical system prompts from snapshot
987
+ result = swarm.restore(snapshot, preserve_system_prompts: true)
988
+ # => SwarmSDK::RestoreResult
989
+ ```
990
+
991
+ **Parameters**:
992
+ - `snapshot` - `Snapshot` object, hash, or JSON string
993
+ - `preserve_system_prompts` - Boolean, default `false`
994
+ - `false` (default): Use current system prompts from agent definitions
995
+ - `true`: Use historical system prompts from snapshot
996
+
997
+ **Returns**: `RestoreResult` object
998
+
999
+ **Requirements**:
1000
+ - Swarm must have same agents (by name) as snapshot
1001
+ - With `preserve_system_prompts: false` (default):
1002
+ - System prompts come from current config (YAML + SDK defaults + plugins)
1003
+ - Allows prompt iteration without creating new sessions
1004
+ - With `preserve_system_prompts: true`:
1005
+ - System prompts come from snapshot (historical)
1006
+ - Exact reproducibility for debugging/auditing
1007
+
1008
+ ### Snapshot.from_file
1009
+
1010
+ Load snapshot from JSON file.
1011
+
1012
+ ```ruby
1013
+ snapshot = SwarmSDK::Snapshot.from_file("session.json")
1014
+ # => SwarmSDK::Snapshot
1015
+ ```
1016
+
1017
+ **Parameters**:
1018
+ - `path` - File path to JSON file
1019
+
1020
+ **Returns**: `Snapshot` object
1021
+
1022
+ ### Snapshot.from_json
1023
+
1024
+ Create snapshot from JSON string.
1025
+
1026
+ ```ruby
1027
+ snapshot = SwarmSDK::Snapshot.from_json(json_string)
1028
+ # => SwarmSDK::Snapshot
1029
+ ```
1030
+
1031
+ **Parameters**:
1032
+ - `json_string` - JSON string
1033
+
1034
+ **Returns**: `Snapshot` object
1035
+
1036
+ ### Snapshot.from_hash
1037
+
1038
+ Create snapshot from hash.
1039
+
1040
+ ```ruby
1041
+ snapshot = SwarmSDK::Snapshot.from_hash(hash)
1042
+ # => SwarmSDK::Snapshot
1043
+ ```
1044
+
1045
+ **Parameters**:
1046
+ - `hash` - Hash with snapshot data
1047
+
1048
+ **Returns**: `Snapshot` object
1049
+
1050
+ ### Snapshot#to_hash
1051
+
1052
+ Convert snapshot to hash.
1053
+
1054
+ ```ruby
1055
+ hash = snapshot.to_hash
1056
+ # => Hash
1057
+ ```
1058
+
1059
+ **Returns**: Hash with all snapshot data
1060
+
1061
+ ### Snapshot#to_json
1062
+
1063
+ Convert snapshot to JSON string.
1064
+
1065
+ ```ruby
1066
+ json = snapshot.to_json
1067
+ # => String (pretty-printed JSON)
1068
+
1069
+ json = snapshot.to_json(pretty: false)
1070
+ # => String (compact JSON)
1071
+ ```
1072
+
1073
+ **Parameters**:
1074
+ - `pretty` - Boolean, default true
1075
+
1076
+ **Returns**: JSON string
1077
+
1078
+ ### Snapshot#write_to_file
1079
+
1080
+ Write snapshot to JSON file with atomic write protection.
1081
+
1082
+ ```ruby
1083
+ snapshot.write_to_file("session.json")
1084
+ snapshot.write_to_file("session.json", pretty: false)
1085
+ ```
1086
+
1087
+ **Parameters**:
1088
+ - `path` - File path
1089
+ - `pretty` - Boolean, default true (pretty-print JSON)
1090
+
1091
+ **Behavior**: Uses atomic write (temp file + rename) to prevent corruption
1092
+
1093
+ ## Use Cases
1094
+
1095
+ ### Long-Running Tasks
1096
+
1097
+ ```ruby
1098
+ def process_large_codebase(swarm, files)
1099
+ files.each_slice(10).with_index do |batch, i|
1100
+ swarm.execute("Process files: #{batch.join(', ')}")
1101
+
1102
+ # Checkpoint every 10 files
1103
+ snapshot = swarm.snapshot
1104
+ snapshot.write_to_file("checkpoint_#{i}.json")
1105
+ end
1106
+ end
1107
+
1108
+ # If process crashes, resume from last checkpoint
1109
+ snapshot = SwarmSDK::Snapshot.from_file("checkpoint_5.json")
1110
+ swarm = build_swarm
1111
+ swarm.restore(snapshot)
1112
+ process_large_codebase(swarm, remaining_files)
1113
+ ```
1114
+
1115
+ ### Multi-User Sessions
1116
+
1117
+ ```ruby
1118
+ class UserSession
1119
+ def initialize(user_id)
1120
+ @user_id = user_id
1121
+ @snapshot_path = "sessions/#{user_id}.json"
1122
+ end
1123
+
1124
+ def execute(prompt)
1125
+ swarm = build_user_swarm
1126
+
1127
+ # Restore previous session if exists
1128
+ if File.exist?(@snapshot_path)
1129
+ snapshot = SwarmSDK::Snapshot.from_file(@snapshot_path)
1130
+ swarm.restore(snapshot)
1131
+ end
1132
+
1133
+ # Execute prompt
1134
+ result = swarm.execute(prompt)
1135
+
1136
+ # Save updated snapshot
1137
+ snapshot = swarm.snapshot
1138
+ snapshot.write_to_file(@snapshot_path)
1139
+
1140
+ result
1141
+ end
1142
+ end
1143
+ ```
1144
+
1145
+ ### Background Jobs
1146
+
1147
+ ```ruby
1148
+ class SwarmJob
1149
+ def perform(job_id, prompt)
1150
+ # Load snapshot from previous job iteration
1151
+ snapshot_key = "job:#{job_id}:snapshot"
1152
+
1153
+ swarm = build_swarm
1154
+ if redis.exists?(snapshot_key)
1155
+ json = redis.get(snapshot_key)
1156
+ snapshot = SwarmSDK::Snapshot.from_json(json)
1157
+ swarm.restore(snapshot)
1158
+ end
1159
+
1160
+ # Execute work
1161
+ result = swarm.execute(prompt)
1162
+
1163
+ # Save snapshot for next iteration
1164
+ snapshot = swarm.snapshot
1165
+ redis.set(snapshot_key, snapshot.to_json(pretty: false))
1166
+ redis.expire(snapshot_key, 86400) # 24 hours
1167
+
1168
+ result
1169
+ end
1170
+ end
1171
+ ```
1172
+
1173
+ ### Testing
1174
+
1175
+ ```ruby
1176
+ class SwarmTest < Minitest::Test
1177
+ def test_snapshot_restore_preserves_context
1178
+ swarm = SwarmSDK.build { ... }
1179
+ swarm.execute("Initial task")
1180
+
1181
+ # Take snapshot
1182
+ snapshot = swarm.snapshot
1183
+
1184
+ # Create new swarm and restore
1185
+ swarm2 = SwarmSDK.build { ... } # Same config
1186
+ result = swarm2.restore(snapshot)
1187
+
1188
+ assert result.success?
1189
+ assert_equal swarm.agent(:backend).messages.size,
1190
+ swarm2.agent(:backend).messages.size
1191
+ end
1192
+ end
1193
+ ```
1194
+
1195
+ ## Advanced Topics
1196
+
1197
+ ### Snapshot Versioning
1198
+
1199
+ Track multiple snapshots per session:
1200
+
1201
+ ```ruby
1202
+ class VersionedSnapshots
1203
+ def initialize(session_id)
1204
+ @session_id = session_id
1205
+ @version = 0
1206
+ end
1207
+
1208
+ def save(swarm)
1209
+ @version += 1
1210
+ snapshot = swarm.snapshot
1211
+ snapshot.write_to_file("sessions/#{@session_id}_v#{@version}.json")
1212
+ end
1213
+
1214
+ def load(version)
1215
+ SwarmSDK::Snapshot.from_file("sessions/#{@session_id}_v#{version}.json")
1216
+ end
1217
+
1218
+ def latest
1219
+ load(@version)
1220
+ end
1221
+ end
1222
+ ```
1223
+
1224
+ ### Snapshot Compression
1225
+
1226
+ For large conversations, compress snapshots:
1227
+
1228
+ ```ruby
1229
+ require "zlib"
1230
+
1231
+ # Save compressed
1232
+ snapshot = swarm.snapshot
1233
+ json = snapshot.to_json(pretty: false)
1234
+ compressed = Zlib::Deflate.deflate(json)
1235
+ File.binwrite("session.json.gz", compressed)
1236
+
1237
+ # Load compressed
1238
+ compressed = File.binread("session.json.gz")
1239
+ json = Zlib::Inflate.inflate(compressed)
1240
+ snapshot = SwarmSDK::Snapshot.from_json(json)
1241
+ ```
1242
+
1243
+ ### Snapshot Diff
1244
+
1245
+ Compare two snapshots:
1246
+
1247
+ ```ruby
1248
+ def snapshot_diff(snapshot1, snapshot2)
1249
+ h1 = snapshot1.to_hash
1250
+ h2 = snapshot2.to_hash
1251
+
1252
+ {
1253
+ agents_added: snapshot2.agent_names - snapshot1.agent_names,
1254
+ agents_removed: snapshot1.agent_names - snapshot2.agent_names,
1255
+ message_count_changes: snapshot2.agent_names.map { |name|
1256
+ count1 = h1.dig(:agents, name.to_sym, :conversation)&.size || 0
1257
+ count2 = h2.dig(:agents, name.to_sym, :conversation)&.size || 0
1258
+ [name, count2 - count1]
1259
+ }.to_h
1260
+ }
1261
+ end
1262
+ ```
1263
+
1264
+ ### Scratchpad Persistence
1265
+
1266
+ Scratchpad is volatile by default, but snapshot preserves it:
1267
+
1268
+ ```ruby
1269
+ # Session 1
1270
+ swarm = SwarmSDK.build { ... }
1271
+ swarm.execute("Write progress to scratchpad://tasks/auth.md")
1272
+ snapshot = swarm.snapshot
1273
+
1274
+ # Scratchpad content is in snapshot
1275
+ hash = snapshot.to_hash
1276
+ hash[:scratchpad]["tasks/auth.md"]
1277
+ # => { content: "...", title: "...", updated_at: "...", size: 123 }
1278
+
1279
+ # Session 2 - scratchpad content restored
1280
+ swarm2 = SwarmSDK.build { ... }
1281
+ swarm2.restore(snapshot)
1282
+ swarm2.execute("Read scratchpad://tasks/auth.md")
1283
+ # => Agent sees content from previous session
1284
+ ```
1285
+
1286
+ **Note**: NodeOrchestrator doesn't snapshot scratchpad because each node creates its own fresh scratchpad.
1287
+
1288
+ ## Troubleshooting
1289
+
1290
+ ### Partial Restore Warnings
1291
+
1292
+ **Problem**: Getting warnings about skipped agents
1293
+
1294
+ **Solution**: Ensure swarm configuration matches snapshot
1295
+
1296
+ ```ruby
1297
+ result = swarm.restore(snapshot)
1298
+ if result.partial_restore?
1299
+ # Check which agents are missing
1300
+ puts "Skipped: #{result.skipped_agents.join(', ')}"
1301
+
1302
+ # Option 1: Update config to include missing agents
1303
+ # Option 2: Accept partial restore and continue
1304
+ # Option 3: Reject and don't proceed
1305
+ end
1306
+ ```
1307
+
1308
+ ### Agent Not Found Error
1309
+
1310
+ **Problem**: Agent in snapshot doesn't exist in current swarm
1311
+
1312
+ **Cause**: Configuration changed between snapshot and restore
1313
+
1314
+ **Solution**:
1315
+ ```ruby
1316
+ # Before restoring, check compatibility
1317
+ snapshot = SwarmSDK::Snapshot.from_file("session.json")
1318
+ current_agents = swarm.agent_names
1319
+ snapshot_agents = snapshot.agent_names.map(&:to_sym)
1320
+
1321
+ missing = snapshot_agents - current_agents
1322
+ if missing.any?
1323
+ puts "⚠️ Snapshot contains agents not in current config: #{missing.join(', ')}"
1324
+ puts "Add these agents to your config or accept partial restore"
1325
+ end
1326
+
1327
+ result = swarm.restore(snapshot)
1328
+ # Missing agents will be skipped with warnings
1329
+ ```
1330
+
1331
+ ### Version Mismatch
1332
+
1333
+ **Problem**: `Unsupported snapshot version: X.X.X`
1334
+
1335
+ **Cause**: Snapshot created with different SwarmSDK version
1336
+
1337
+ **Solution**:
1338
+ - Update SwarmSDK to compatible version
1339
+ - Or migrate snapshot to new format (when migration guide available)
1340
+
1341
+ ### Type Mismatch
1342
+
1343
+ **Problem**: `Snapshot type 'swarm' doesn't match orchestration type 'node_orchestrator'`
1344
+
1345
+ **Cause**: Trying to restore swarm snapshot into NodeOrchestrator (or vice versa)
1346
+
1347
+ **Solution**: Use correct orchestration type that matches snapshot
1348
+
1349
+ ### Stale Content After Restore
1350
+
1351
+ **Problem**: Agent edits file that was modified externally
1352
+
1353
+ **This is prevented automatically!** Digest tracking ensures agents must re-read files if content changed:
1354
+
1355
+ ```ruby
1356
+ # Before snapshot
1357
+ swarm.execute("Read and analyze config.yml")
1358
+ snapshot = swarm.snapshot
1359
+
1360
+ # File modified externally
1361
+ File.write("config.yml", "new content")
1362
+
1363
+ # After restore
1364
+ swarm2.restore(snapshot)
1365
+ swarm2.execute("Update config.yml")
1366
+ # => Agent must re-read config.yml first
1367
+ # => "Cannot edit config.yml without reading it first"
1368
+ ```
1369
+
1370
+ ### Empty Messages After Restore
1371
+
1372
+ **Problem**: Agent has fewer messages after restore
1373
+
1374
+ **Likely Causes**:
1375
+ 1. Accessing agent before calling `restore()` (triggers initialization)
1376
+ 2. JSON parsing without `symbolize_names: true`
1377
+
1378
+ **Solution**:
1379
+ ```ruby
1380
+ # ❌ Wrong - agent initialized before restore
1381
+ swarm = SwarmSDK.build { ... }
1382
+ agent = swarm.agent(:backend) # Initializes agent
1383
+ swarm.restore(snapshot) # Restores but agent already has system message
1384
+
1385
+ # ✅ Correct - restore before accessing agents
1386
+ swarm = SwarmSDK.build { ... }
1387
+ swarm.restore(snapshot) # Restores to uninitialized agents
1388
+ agent = swarm.agent(:backend) # Access after restore
1389
+
1390
+ # Also ensure proper JSON parsing
1391
+ snapshot = SwarmSDK::Snapshot.from_json(json_string) # Handles this automatically
1392
+ ```
1393
+
1394
+ ## Best Practices
1395
+
1396
+ ### 1. Always Use Same Configuration
1397
+
1398
+ ```ruby
1399
+ # Save config hash with snapshot for verification
1400
+ config_hash = Digest::SHA256.hexdigest(swarm_yaml_content)
1401
+
1402
+ snapshot_data = {
1403
+ config_hash: config_hash,
1404
+ snapshot: swarm.snapshot.to_hash
1405
+ }
1406
+
1407
+ # On restore, verify config hasn't changed
1408
+ saved_hash = snapshot_data[:config_hash]
1409
+ current_hash = Digest::SHA256.hexdigest(swarm_yaml_content)
1410
+
1411
+ unless saved_hash == current_hash
1412
+ puts "⚠️ Configuration has changed since snapshot"
1413
+ end
1414
+ ```
1415
+
1416
+ ### 2. Set Expiration on Stored Snapshots
1417
+
1418
+ ```ruby
1419
+ # Redis
1420
+ redis.set("snapshot:#{id}", snapshot.to_json)
1421
+ redis.expire("snapshot:#{id}", 7.days.to_i)
1422
+
1423
+ # Database
1424
+ SwarmSession.where("created_at < ?", 30.days.ago).delete_all
1425
+ ```
1426
+
1427
+ ### 3. Validate Restore Results
1428
+
1429
+ ```ruby
1430
+ result = swarm.restore(snapshot)
1431
+
1432
+ # Don't silently ignore partial restores
1433
+ if result.partial_restore?
1434
+ logger.warn("Partial snapshot restore: #{result.summary}")
1435
+
1436
+ # Notify user or take corrective action
1437
+ if result.skipped_agents.include?(:critical_agent)
1438
+ raise "Critical agent missing from restore"
1439
+ end
1440
+ end
1441
+ ```
1442
+
1443
+ ### 4. Use Pretty JSON for Files, Compact for Storage
1444
+
1445
+ ```ruby
1446
+ # Development/debugging - use pretty JSON
1447
+ snapshot.write_to_file("debug_session.json", pretty: true)
1448
+
1449
+ # Production Redis/DB - use compact JSON
1450
+ redis.set("snapshot:#{id}", snapshot.to_json(pretty: false))
1451
+ ```
1452
+
1453
+ ### 5. Restore Before Accessing Agents
1454
+
1455
+ ```ruby
1456
+ # ✅ Correct order
1457
+ swarm = SwarmSDK.build { ... }
1458
+ swarm.restore(snapshot)
1459
+ agent = swarm.agent(:backend) # Accesses after restore
1460
+
1461
+ # ❌ Wrong order
1462
+ swarm = SwarmSDK.build { ... }
1463
+ agent = swarm.agent(:backend) # Initializes agent first
1464
+ swarm.restore(snapshot) # Restore won't work correctly
1465
+ ```
1466
+
1467
+ ## Security Considerations
1468
+
1469
+ ### Snapshot Content
1470
+
1471
+ Snapshots contain:
1472
+ - Full conversation history (may include sensitive data)
1473
+ - File paths that were read
1474
+ - Scratchpad content
1475
+
1476
+ **Recommendations**:
1477
+ - Encrypt snapshots if storing sensitive conversations
1478
+ - Use appropriate access controls on snapshot storage
1479
+ - Implement data retention policies
1480
+ - Don't commit snapshots to version control
1481
+
1482
+ ### Digest Verification
1483
+
1484
+ Digest tracking prevents:
1485
+ - Editing files that changed externally (prevents data corruption)
1486
+ - Time-of-check-time-of-use (TOCTOU) bugs
1487
+ - Stale content edits based on LLM memory
1488
+
1489
+ ## Examples
1490
+
1491
+ See `examples/snapshot_demo.rb` for a complete working example.
1492
+
1493
+ ## Related Documentation
1494
+
1495
+ - [Getting Started](getting-started.md) - Basic SwarmSDK usage
1496
+ - [Complete Tutorial](complete-tutorial.md) - Full SwarmSDK tutorial
1497
+ - [Rails Integration](rails-integration.md) - Using snapshots in Rails apps
1498
+ - [Composable Swarms](composable-swarms.md) - Sub-swarm snapshots