agentlings 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {agentlings-0.2.1 → agentlings-0.2.3}/.gitignore +2 -0
  2. agentlings-0.2.3/DESIGN-memory-sleep.md +505 -0
  3. {agentlings-0.2.1 → agentlings-0.2.3}/PKG-INFO +78 -38
  4. {agentlings-0.2.1 → agentlings-0.2.3}/README.md +77 -37
  5. {agentlings-0.2.1 → agentlings-0.2.3}/pyproject.toml +1 -1
  6. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/protocol/a2a.py +16 -2
  7. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/protocol/a2a_task_store.py +12 -3
  8. agentlings-0.2.3/tests/unit/test_a2a_executor.py +292 -0
  9. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_a2a_task_store.py +16 -0
  10. {agentlings-0.2.1 → agentlings-0.2.3}/.env.example +0 -0
  11. {agentlings-0.2.1 → agentlings-0.2.3}/.github/workflows/ci.yml +0 -0
  12. {agentlings-0.2.1 → agentlings-0.2.3}/.github/workflows/publish.yml +0 -0
  13. {agentlings-0.2.1 → agentlings-0.2.3}/CLAUDE.md +0 -0
  14. {agentlings-0.2.1 → agentlings-0.2.3}/Dockerfile +0 -0
  15. {agentlings-0.2.1 → agentlings-0.2.3}/LICENSE +0 -0
  16. {agentlings-0.2.1 → agentlings-0.2.3}/agent.example.yaml +0 -0
  17. {agentlings-0.2.1 → agentlings-0.2.3}/docker-compose.test.yml +0 -0
  18. {agentlings-0.2.1 → agentlings-0.2.3}/logo.png +0 -0
  19. {agentlings-0.2.1 → agentlings-0.2.3}/scripts/release.sh +0 -0
  20. {agentlings-0.2.1 → agentlings-0.2.3}/sleep.png +0 -0
  21. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/__init__.py +0 -0
  22. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/__main__.py +0 -0
  23. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/cli/__init__.py +0 -0
  24. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/cli/_migrations.py +0 -0
  25. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/cli/_templates.py +0 -0
  26. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/cli/_version.py +0 -0
  27. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/cli/init.py +0 -0
  28. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/cli/upgrade.py +0 -0
  29. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/config.py +0 -0
  30. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/__init__.py +0 -0
  31. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/completion.py +0 -0
  32. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/llm.py +0 -0
  33. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/loop.py +0 -0
  34. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/memory_models.py +0 -0
  35. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/memory_store.py +0 -0
  36. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/models.py +0 -0
  37. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/prompt.py +0 -0
  38. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/scheduler.py +0 -0
  39. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/sleep.py +0 -0
  40. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/store.py +0 -0
  41. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/task.py +0 -0
  42. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/core/telemetry.py +0 -0
  43. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/log.py +0 -0
  44. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/migrations/__init__.py +0 -0
  45. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/migrations/m0001_seed.py +0 -0
  46. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/protocol/__init__.py +0 -0
  47. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/protocol/agent_card.py +0 -0
  48. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/protocol/mcp.py +0 -0
  49. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/server.py +0 -0
  50. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/templates/__init__.py +0 -0
  51. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/templates/default/.env.example +0 -0
  52. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/templates/default/agent.yaml +0 -0
  53. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/tools/__init__.py +0 -0
  54. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/tools/builtins.py +0 -0
  55. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/tools/memory.py +0 -0
  56. {agentlings-0.2.1 → agentlings-0.2.3}/src/agentlings/tools/registry.py +0 -0
  57. {agentlings-0.2.1 → agentlings-0.2.3}/tests/Dockerfile +0 -0
  58. {agentlings-0.2.1 → agentlings-0.2.3}/tests/__init__.py +0 -0
  59. {agentlings-0.2.1 → agentlings-0.2.3}/tests/agent.test.yaml +0 -0
  60. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/__init__.py +0 -0
  61. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/a2a_client.py +0 -0
  62. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/conftest.py +0 -0
  63. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/mcp_client.py +0 -0
  64. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/test_a2a.py +0 -0
  65. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/test_agent_card.py +0 -0
  66. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/test_mcp.py +0 -0
  67. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/test_ollama.py +0 -0
  68. {agentlings-0.2.1 → agentlings-0.2.3}/tests/integration/test_task_flow.py +0 -0
  69. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/__init__.py +0 -0
  70. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/conftest.py +0 -0
  71. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_agent_card.py +0 -0
  72. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_cli_init.py +0 -0
  73. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_cli_upgrade.py +0 -0
  74. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_completion.py +0 -0
  75. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_config.py +0 -0
  76. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_live_api.py +0 -0
  77. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_llm.py +0 -0
  78. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_logging.py +0 -0
  79. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_loop.py +0 -0
  80. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_mcp_handler.py +0 -0
  81. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_memory_models.py +0 -0
  82. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_memory_store.py +0 -0
  83. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_memory_tool.py +0 -0
  84. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_models.py +0 -0
  85. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_prompt.py +0 -0
  86. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_scheduler.py +0 -0
  87. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_sleep.py +0 -0
  88. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_store.py +0 -0
  89. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_task.py +0 -0
  90. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_telemetry.py +0 -0
  91. {agentlings-0.2.1 → agentlings-0.2.3}/tests/unit/test_tools.py +0 -0
@@ -10,3 +10,5 @@ build/
10
10
  .pytest_cache/
11
11
  .venv/
12
12
  venv/
13
+ .idea/
14
+ .DS_Store
@@ -0,0 +1,505 @@
1
+ # Memory & Sleep Cycle
2
+
3
+ Design document for persistent memory and nightly sleep cycle features in Agentlings.
4
+
5
+ ## Overview
6
+
7
+ Agentlings are currently stateless between conversations. Memory and sleep transform an agentling from a tool that forgets into an agent that learns. Memory provides a small, curated working context injected into every interaction. The sleep cycle is a nightly process that journals activity, consolidates new knowledge, prunes stale information, and performs retention cleanup.
8
+
9
+ ## Memory
10
+
11
+ ### Storage
12
+
13
+ Memory is a YAML file stored in the agent's data directory alongside JSONL conversation journals:
14
+
15
+ ```
16
+ data/
17
+ conversations/
18
+ abc123.jsonl
19
+ def456.jsonl
20
+ journals/
21
+ 2026-04-01.md
22
+ 2026-04-02.md
23
+ memory.json
24
+ ```
25
+
26
+ ### Format
27
+
28
+ ```json
29
+ {
30
+ "entries": [
31
+ {
32
+ "key": "cluster-node-count",
33
+ "value": "4 nodes: node1 (control), node2-4 (workers)",
34
+ "recorded": "2026-04-01T10:00:00Z"
35
+ },
36
+ {
37
+ "key": "known-issue-coredns",
38
+ "value": "CoreDNS restarts on node3, related to memory pressure",
39
+ "recorded": "2026-03-28T14:30:00Z"
40
+ },
41
+ {
42
+ "key": "storage-path",
43
+ "value": "/mnt/lab/k3s is the source of truth for all manifests",
44
+ "recorded": "2026-03-25T09:00:00Z"
45
+ }
46
+ ]
47
+ }
48
+ ```
49
+
50
+ JSON for all runtime data, YAML for config. Conversations are JSONL, batch API responses are JSON, structured outputs return JSON. No serialization boundary to cross. The Pydantic models (see below) serialize directly to and from this file.
51
+
52
+ ### Pydantic Models
53
+
54
+ All structured data exchanged with the LLM uses Pydantic models and Anthropic's structured outputs (`output_config` with `json_schema`). The API guarantees responses conform to the schema. No prompt-based JSON formatting, no parsing retries, no validation loops.
55
+
56
+ ```python
57
+ from pydantic import BaseModel
58
+ from datetime import datetime
59
+
60
+
61
+ class MemoryEntry(BaseModel):
62
+ key: str
63
+ value: str
64
+ recorded: datetime
65
+
66
+
67
+ class MemoryStore(BaseModel):
68
+ entries: list[MemoryEntry]
69
+
70
+
71
+ class MemoryCandidate(BaseModel):
72
+ key: str
73
+ value: str
74
+
75
+
76
+ class ConversationSummary(BaseModel):
77
+ summary: str
78
+ memory_candidates: list[MemoryCandidate]
79
+
80
+
81
+ class ConsolidatedMemory(BaseModel):
82
+ entries: list[MemoryEntry]
83
+ ```
84
+
85
+ `MemoryStore` is the on-disk format (`memory.json`). `ConversationSummary` is the output of each deep sleep batch call. `ConsolidatedMemory` is the output of the REM consolidation call. The `memory_edit` tool reads and writes `MemoryStore` directly.
86
+
87
+ ### Injection
88
+
89
+ Context is injected into every LLM call in this order:
90
+
91
+ 1. System prompt (identity, personality, foundational knowledge)
92
+ 2. Memory block (what the agent has learned)
93
+ 3. Data directory context (where the agent's own files live)
94
+ 4. Conversation replay (current interaction)
95
+
96
+ If memory is empty or the file does not exist, the memory block is omitted. The system prompt is the agent's day-one knowledge. Memory is what it learns over time. No overlap, no ambiguity.
97
+
98
+ ### Data Directory Awareness
99
+
100
+ The agent is told about its own data directory so it can use existing filesystem tools to access past context beyond what is in memory. The following is appended after the memory block:
101
+
102
+ ```
103
+ Your data directory is at {data_dir}. It contains:
104
+ - memory.json: your long-term memory (also provided above)
105
+ - journals/YYYY-MM-DD.md: daily summaries of your past activity
106
+ - conversations/*.jsonl: raw conversation logs
107
+
108
+ You can read these files using your filesystem tools to recall past context
109
+ that is not in your current memory. For example:
110
+ - List journals to see which days you were active
111
+ - Read a journal to recall what happened on a specific day
112
+ - Search across journals to find when an issue first appeared
113
+ - Read conversation logs for full detail on a past interaction
114
+ ```
115
+
116
+ This requires no new tools. The agent already has `read_file`, `list_directory`, and `search_files` from the filesystem tool group. It just needs to know where to look. The journals and conversations become a searchable archive that extends memory without consuming token budget.
117
+
118
+ ### Token Budget
119
+
120
+ Memory has a configurable hard token budget (default: 2000 tokens). The agent must stay within this budget. If it approaches the limit, it must decide what to drop before adding new facts. Token counting is performed before injection.
121
+
122
+ ### Tool
123
+
124
+ The agent gets a `memory_edit` tool with three operations:
125
+
126
+ | Operation | Description |
127
+ |-----------|-------------|
128
+ | `set` | Upsert an entry by key. Sets `recorded` to current timestamp. |
129
+ | `remove` | Delete an entry by key. |
130
+ | `list` | Return all current memory entries. |
131
+
132
+ The system prompt instructs the agent on what belongs in memory: operational knowledge, patterns, decisions made, environmental facts, known issues, things that changed. Memory is not a knowledge base. It is working context.
133
+
134
+ ### Scope
135
+
136
+ Memory is per-agent, never shared. If two agents need to exchange knowledge, they do so over A2A or MCP. Shared memory creates coupling, versioning problems, and conflict resolution complexity. Clean protocol boundaries replace shared state.
137
+
138
+ ### First Boot
139
+
140
+ If memory does not exist or is empty, there is no memory. The system prompt carries all foundational knowledge for day one.
141
+
142
+ ## Sleep Cycle
143
+
144
+ ### Configuration
145
+
146
+ The sleep cycle is configured in the agent YAML:
147
+
148
+ ```yaml
149
+ sleep:
150
+ schedule: "0 2 * * *" # cron expression, default 2am
151
+ journal_retention_days: 30 # how long to keep journal files
152
+ conversation_retention_days: 14 # how long to keep JSONL files
153
+ memory_max_entries: 50 # hard cap on memory entries
154
+ model: null # override model for sleep (null = use agent default)
155
+ ```
156
+
157
+ ### Scheduling
158
+
159
+ An asyncio task in the main process evaluates the cron expression and fires the sleep cycle. Since agentlings are already long-running uvicorn processes, this fits naturally. No external scheduler required.
160
+
161
+ ### Phases
162
+
163
+ The sleep cycle maps to biological sleep phases. Each phase is a distinct function, independently testable, with a clear single responsibility.
164
+
165
+ #### 1. Light Sleep -- Gate Check
166
+
167
+ Quick check: were there any conversations today? If not, skip all subsequent phases. No work, no spend. Memory stays as-is until the next active day gives the model something to evaluate against.
168
+
169
+ Also performs pre-flight validation: is the LLM reachable, is the data directory writable.
170
+
171
+ #### 2. Deep Sleep -- Replay and File
172
+
173
+ During biological deep sleep, the hippocampus replays experiences and files them from short-term into long-term storage. This phase does the same.
174
+
175
+ For each conversation from today:
176
+
177
+ 1. Locate the last compaction marker in the JSONL (the "story so far")
178
+ 2. Collect messages from the compaction marker forward
179
+ 3. Submit a summary call to the LLM
180
+
181
+ All summary calls are submitted as a single **batch request** to the Anthropic Message Batches API. This runs at 50% cost and processes in parallel. The sleep cycle polls for completion with a configurable timeout (default: 30 minutes). If the batch fails or times out, skip to deep sleep housekeeping with whatever results are available.
182
+
183
+ Each summary call receives:
184
+
185
+ - The agent's system prompt (provides the lens for judging importance)
186
+ - Current memory (so it knows what the agent already knows)
187
+ - The conversation content (from last compaction marker forward)
188
+
189
+ The system prompt is cached across batch calls by the Anthropic API, so it is paid for once at full price and cached for the remainder of the batch.
190
+
191
+ Each call returns a `ConversationSummary` via Anthropic's structured outputs:
192
+
193
+ ```python
194
+ response = client.messages.parse(
195
+ model=sleep_model,
196
+ max_tokens=4096,
197
+ system=system_prompt,
198
+ messages=[
199
+ {"role": "user", "content": f"Current memory:\n{current_memory}\n\nConversation:\n{conversation}"}
200
+ ],
201
+ output_format=ConversationSummary,
202
+ )
203
+ summary = response.parsed_output # typed ConversationSummary
204
+ ```
205
+
206
+ The API guarantees the response conforms to the `ConversationSummary` schema. No JSON parsing, no validation, no retries.
207
+
208
+ Note: for batch requests, `output_config` is included in each individual request body within the batch. Results are parsed from the batch response JSONL using `ConversationSummary.model_validate_json()`. The `client.messages.parse()` example above illustrates the schema contract; the actual batch implementation uses the batch API with the same `output_config`.
209
+
210
+ **Prompt for per-conversation summary:**
211
+
212
+ ```
213
+ You are performing a nightly review of a conversation that took place today.
214
+
215
+ Produce a concise summary of what happened: what was asked, what actions were
216
+ taken, what the outcome was, and anything left unresolved.
217
+
218
+ Extract any facts worth adding to your long-term memory. Only extract NEW facts
219
+ not already in your current memory. Focus on operational knowledge, patterns,
220
+ decisions, things that changed. Ignore passing context.
221
+
222
+ If the conversation was trivial or contained nothing new worth remembering,
223
+ return an empty memory_candidates list.
224
+ ```
225
+
226
+ The prompt focuses purely on the cognitive task. All format concerns are handled by the `ConversationSummary` Pydantic model and `output_config`.
227
+
228
+ Once all batch results are collected, token-count the summaries. If they fit in a single context window, write the journal directly. If not, batch-reduce the summaries until they fit (this should be rare given compaction keeps individual conversations bounded).
229
+
230
+ Write the journal to `journals/YYYY-MM-DD.md`.
231
+
232
+ #### 3. REM -- Integrate and Prune
233
+
234
+ During biological REM sleep, the brain integrates newly filed memories with existing ones, forms associations, and weakens connections that are not reinforced. This phase does the same.
235
+
236
+ A single LLM call receives:
237
+
238
+ - Current memory (all existing entries)
239
+ - Today's journal (the consolidated narrative)
240
+ - All memory candidates extracted during deep sleep
241
+
242
+ The call outputs a `ConsolidatedMemory` via structured outputs:
243
+
244
+ ```python
245
+ response = client.messages.parse(
246
+ model=sleep_model,
247
+ max_tokens=4096,
248
+ system=agent_system_prompt,
249
+ messages=[
250
+ {"role": "user", "content": f"Current memory:\n{current_memory}\n\nToday's journal:\n{journal}\n\nNew candidates:\n{memory_candidates}"}
251
+ ],
252
+ output_format=ConsolidatedMemory,
253
+ )
254
+ new_memory = response.parsed_output # typed ConsolidatedMemory
255
+ ```
256
+
257
+ **Prompt for REM consolidation:**
258
+
259
+ ```
260
+ You are performing nightly memory maintenance.
261
+
262
+ Your job:
263
+ 1. Integrate new candidates that add genuine value. Deduplicate against existing entries.
264
+ 2. Review every existing entry. Is it still relevant? Has it been superseded by
265
+ something learned today? Would it help you do your job tomorrow?
266
+ 3. Drop anything that is stale, redundant, or no longer operationally useful.
267
+ 4. You have a hard limit of {memory_max_entries} entries.
268
+
269
+ Preserve the recorded timestamp for entries you keep unchanged.
270
+ Set recorded to the current date for new or modified entries.
271
+ ```
272
+
273
+ The prompt focuses on judgment. The `ConsolidatedMemory` schema enforces the output structure.
274
+
275
+ The output is written atomically to `memory.json` (write to temp file, rename).
276
+
277
+ #### 4. Deep Sleep (Housekeeping) -- Retention Cleanup
278
+
279
+ Delete JSONL conversation files older than `conversation_retention_days`. Delete journal files older than `journal_retention_days`. This is simple filesystem work, no LLM calls.
280
+
281
+ ```
282
+ data/conversations/ -- delete where file age > conversation_retention_days
283
+ data/journals/ -- delete where filename date > journal_retention_days
284
+ ```
285
+
286
+ ### Error Handling
287
+
288
+ Take what succeeds, skip what fails. If the batch returns partial results, journal and consolidate from what is available. If consolidation fails, memory stays unchanged. If housekeeping fails, files are retained an extra day. Telemetry and logging surface failures for iteration. No retries in v1.
289
+
290
+ ### Quiet Days
291
+
292
+ If light sleep finds no conversations, the entire cycle is skipped. No LLM calls, no cost. Memory and journals are untouched.
293
+
294
+ ### Concurrency
295
+
296
+ The sleep cycle runs at 2am. If a conversation is in progress (unlikely but possible), the sleep cycle skips that conversation. It processes only conversations that have been idle (no new messages) for a configurable grace period (default: 5 minutes). The memory file is written atomically via temp-file-and-rename.
297
+
298
+ ## Observability
299
+
300
+ ### OpenTelemetry Integration
301
+
302
+ The sleep cycle emits telemetry to an OpenTelemetry collector via HTTP or gRPC. The collector endpoint and protocol are configurable:
303
+
304
+ ```yaml
305
+ telemetry:
306
+ enabled: true
307
+ endpoint: "http://otel-collector:4318" # HTTP endpoint (default)
308
+ protocol: "http" # "http" or "grpc"
309
+ service_name: "agentling"
310
+ insecure: true # disable TLS for local collectors
311
+ ```
312
+
313
+ ### Spans
314
+
315
+ The sleep cycle produces a hierarchical span tree:
316
+
317
+ ```
318
+ agentling.sleep
319
+ agentling.sleep.light_sleep
320
+ agentling.sleep.deep_sleep
321
+ agentling.sleep.deep_sleep.batch_submit
322
+ agentling.sleep.deep_sleep.batch_poll
323
+ agentling.sleep.deep_sleep.journal_write
324
+ agentling.sleep.rem
325
+ agentling.sleep.rem.consolidate
326
+ agentling.sleep.rem.memory_write
327
+ agentling.sleep.housekeeping
328
+ agentling.sleep.housekeeping.conversations
329
+ agentling.sleep.housekeeping.journals
330
+ ```
331
+
332
+ ### Span Attributes
333
+
334
+ All sleep spans include:
335
+
336
+ | Attribute | Description |
337
+ |------------------------------------|------------------------------------------------|
338
+ | `agent.name` | Agent name from YAML config |
339
+ | `sleep.phase` | Current phase (light_sleep, deep_sleep, rem, housekeeping) |
340
+ | `sleep.date` | Date being processed (YYYY-MM-DD) |
341
+
342
+ Phase-specific attributes:
343
+
344
+ **Light Sleep:**
345
+
346
+ | Attribute | Description |
347
+ |------------------------------------|------------------------------------------------|
348
+ | `sleep.conversations_found` | Number of conversations from today |
349
+ | `sleep.skipped` | Whether the cycle was skipped (no work) |
350
+
351
+ **Deep Sleep:**
352
+
353
+ | Attribute | Description |
354
+ |------------------------------------|------------------------------------------------|
355
+ | `sleep.batch.request_count` | Number of conversations in the batch |
356
+ | `sleep.batch.id` | Anthropic batch ID |
357
+ | `sleep.batch.status` | Final batch status |
358
+ | `sleep.batch.succeeded` | Count of successful results |
359
+ | `sleep.batch.failed` | Count of failed results |
360
+ | `sleep.batch.duration_ms` | Time from submit to completion |
361
+ | `sleep.batch.model` | Model used for batch calls |
362
+ | `sleep.journal.token_count` | Token count of written journal |
363
+ | `sleep.memory_candidates.count` | Total memory candidates extracted |
364
+
365
+ **REM:**
366
+
367
+ | Attribute | Description |
368
+ |------------------------------------|------------------------------------------------|
369
+ | `sleep.rem.model` | Model used for consolidation |
370
+ | `sleep.rem.input_token_count` | Tokens in consolidation prompt |
371
+ | `sleep.rem.output_token_count` | Tokens in consolidation response |
372
+ | `sleep.rem.entries_before` | Memory entry count before consolidation |
373
+ | `sleep.rem.entries_after` | Memory entry count after consolidation |
374
+ | `sleep.rem.entries_added` | New entries added |
375
+ | `sleep.rem.entries_pruned` | Entries removed during pruning |
376
+ | `sleep.rem.entries_modified` | Existing entries that were updated |
377
+
378
+ **Housekeeping:**
379
+
380
+ | Attribute | Description |
381
+ |------------------------------------|------------------------------------------------|
382
+ | `sleep.housekeeping.conversations_deleted` | Conversation files deleted |
383
+ | `sleep.housekeeping.journals_deleted` | Journal files deleted |
384
+ | `sleep.housekeeping.bytes_reclaimed` | Disk space freed |
385
+
386
+ ### Metrics
387
+
388
+ The following metrics are emitted as OpenTelemetry metrics:
389
+
390
+ | Metric | Type | Description |
391
+ |-------------------------------------------|-----------|------------------------------------------|
392
+ | `agentling.sleep.duration_seconds` | Histogram | Total sleep cycle duration |
393
+ | `agentling.sleep.phase.duration_seconds` | Histogram | Per-phase duration (tagged by phase) |
394
+ | `agentling.sleep.batch.cost_usd` | Counter | Estimated batch API cost |
395
+ | `agentling.sleep.rem.cost_usd` | Counter | Estimated consolidation call cost |
396
+ | `agentling.sleep.memory.entry_count` | Gauge | Current memory entry count post-sleep |
397
+ | `agentling.sleep.memory.token_count` | Gauge | Current memory token count post-sleep |
398
+ | `agentling.sleep.conversations_processed` | Counter | Conversations processed per cycle |
399
+ | `agentling.sleep.errors` | Counter | Errors per cycle (tagged by phase) |
400
+ | `agentling.sleep.skipped` | Counter | Cycles skipped due to no activity |
401
+
402
+ ### Events
403
+
404
+ Sleep phase transitions are logged as OpenTelemetry log events on the root span:
405
+
406
+ ```
407
+ [SLEEP:LIGHT] Checking for today's conversations
408
+ [SLEEP:LIGHT] Found 7 conversations, proceeding
409
+ [SLEEP:DEEP] Submitting batch of 7 summary requests
410
+ [SLEEP:DEEP] Batch msgbatch_abc123 completed: 7 succeeded, 0 failed
411
+ [SLEEP:DEEP] Journal written: journals/2026-04-01.md (847 tokens)
412
+ [SLEEP:REM] Consolidating memory: 12 existing + 5 candidates
413
+ [SLEEP:REM] Memory updated: 14 entries (3 added, 1 pruned, 0 modified)
414
+ [SLEEP:HOUSEKEEPING] Deleted 3 conversations, 0 journals, reclaimed 45KB
415
+ [SLEEP] Cycle complete in 47.3s
416
+ ```
417
+
418
+ ### Memory Tool Telemetry
419
+
420
+ The `memory_edit` tool (used during normal conversations) also emits spans:
421
+
422
+ ```
423
+ agentling.memory.set -- attributes: key, value_token_count, total_entries, total_token_count
424
+ agentling.memory.remove -- attributes: key, total_entries, total_token_count
425
+ agentling.memory.list -- attributes: total_entries, total_token_count
426
+ ```
427
+
428
+ This provides visibility into memory changes happening during the day, complementing the nightly sleep cycle telemetry.
429
+
430
+ ## Agent YAML Changes
431
+
432
+ The agent YAML gains two new top-level sections:
433
+
434
+ ```yaml
435
+ name: k3s-agentling
436
+ description: A k3s cluster management agent
437
+
438
+ tools:
439
+ - bash
440
+ - filesystem
441
+ - memory # new: enables memory_edit tool
442
+
443
+ skills:
444
+ - id: k8s-ops
445
+ name: Kubernetes Operations
446
+ description: Manage cluster resources, diagnose issues, apply manifests
447
+ tags: [kubernetes, k3s, devops]
448
+
449
+ system_prompt: |
450
+ You are a DevOps engineer managing a k3s Kubernetes cluster.
451
+ ...
452
+
453
+ sleep: # new: sleep cycle configuration
454
+ schedule: "0 2 * * *"
455
+ journal_retention_days: 30
456
+ conversation_retention_days: 14
457
+ memory_max_entries: 50
458
+ model: null # null = use agent's default model
459
+
460
+ telemetry: # new: OpenTelemetry configuration
461
+ enabled: false
462
+ endpoint: "http://localhost:4318"
463
+ protocol: "http"
464
+ service_name: "agentling"
465
+ insecure: true
466
+
467
+ memory: # new: memory configuration
468
+ token_budget: 2000
469
+ ```
470
+
471
+ ## Environment Variables
472
+
473
+ | Variable | Default | Description |
474
+ |------------------------|---------|--------------------------------------------|
475
+ | `AGENT_OTEL_ENDPOINT` | -- | OpenTelemetry collector endpoint |
476
+ | `AGENT_OTEL_PROTOCOL` | `http` | Collector protocol (`http` or `grpc`) |
477
+ | `AGENT_OTEL_INSECURE` | `true` | Disable TLS for collector connection |
478
+
479
+ Environment variables override YAML config for secrets and deployment-specific values.
480
+
481
+ ## Dependencies
482
+
483
+ | Package | Purpose |
484
+ |---------------------------------|-------------------------------|
485
+ | `opentelemetry-api` | OTel tracing and metrics API |
486
+ | `opentelemetry-sdk` | OTel SDK implementation |
487
+ | `opentelemetry-exporter-otlp` | OTLP HTTP/gRPC exporter |
488
+
489
+ No other new dependencies. Memory uses JSON (stdlib `json`). Pydantic is already a dependency (required by `a2a-sdk` and `pydantic-settings`). Structured outputs use `client.messages.parse()` from the existing `anthropic` SDK. Token counting uses the `anthropic` SDK's tokenizer. Scheduling uses `asyncio` (stdlib). File operations use `os`/`pathlib` (stdlib).
490
+
491
+ ## CLI
492
+
493
+ Two new commands:
494
+
495
+ ```bash
496
+ # Show current memory
497
+ agentling memory show
498
+
499
+ # Trigger sleep cycle manually (useful for testing)
500
+ agentling sleep --date 2026-04-01
501
+ ```
502
+
503
+ ## Summary
504
+
505
+ Memory gives the agent durable working context. Sleep gives it a nightly reflection cycle that journals, consolidates, and prunes. OpenTelemetry provides full visibility into both. The design adds three dependencies (all OpenTelemetry), uses the existing file-based storage paradigm, leverages Anthropic's structured outputs with Pydantic models for guaranteed schema compliance, and leans on the batch API for cost-efficient nightly processing.