livingai 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. livingai-0.4.0/.github/workflows/ci.yml +54 -0
  2. livingai-0.4.0/.github/workflows/publish.yml +29 -0
  3. livingai-0.4.0/.gitignore +17 -0
  4. livingai-0.4.0/CONTRIBUTING.md +99 -0
  5. livingai-0.4.0/LICENSE +19 -0
  6. livingai-0.4.0/PKG-INFO +251 -0
  7. livingai-0.4.0/README.md +217 -0
  8. livingai-0.4.0/benchmarks/benchmark.py +172 -0
  9. livingai-0.4.0/docker-compose.yml +63 -0
  10. livingai-0.4.0/docs/README.md +32 -0
  11. livingai-0.4.0/docs/adapters.md +76 -0
  12. livingai-0.4.0/docs/api-reference.md +125 -0
  13. livingai-0.4.0/docs/checkpointing.md +85 -0
  14. livingai-0.4.0/docs/cli.md +57 -0
  15. livingai-0.4.0/docs/concepts.md +75 -0
  16. livingai-0.4.0/docs/migration.md +88 -0
  17. livingai-0.4.0/docs/quickstart.md +76 -0
  18. livingai-0.4.0/docs/recovery.md +79 -0
  19. livingai-0.4.0/docs/replay.md +66 -0
  20. livingai-0.4.0/examples/01_basic_checkpoint.py +45 -0
  21. livingai-0.4.0/examples/02_crash_recovery.py +89 -0
  22. livingai-0.4.0/examples/03_mock_tools_debugging.py +64 -0
  23. livingai-0.4.0/examples/04_cost_tracking.py +63 -0
  24. livingai-0.4.0/examples/05_langgraph_agent.py +55 -0
  25. livingai-0.4.0/examples/README.md +30 -0
  26. livingai-0.4.0/livingai/__init__.py +76 -0
  27. livingai-0.4.0/livingai/adapters/__init__.py +19 -0
  28. livingai-0.4.0/livingai/adapters/_base.py +136 -0
  29. livingai-0.4.0/livingai/adapters/crewai.py +37 -0
  30. livingai-0.4.0/livingai/adapters/langgraph.py +51 -0
  31. livingai-0.4.0/livingai/adapters/openai_agents.py +37 -0
  32. livingai-0.4.0/livingai/checkpoint.py +194 -0
  33. livingai-0.4.0/livingai/cli.py +148 -0
  34. livingai-0.4.0/livingai/compression.py +78 -0
  35. livingai-0.4.0/livingai/graph.py +207 -0
  36. livingai-0.4.0/livingai/metrics.py +66 -0
  37. livingai-0.4.0/livingai/py.typed +1 -0
  38. livingai-0.4.0/livingai/recovery.py +173 -0
  39. livingai-0.4.0/livingai/replay.py +137 -0
  40. livingai-0.4.0/livingai/storage/__init__.py +47 -0
  41. livingai-0.4.0/livingai/storage/sqlite_store.py +138 -0
  42. livingai-0.4.0/livingai/stores/__init__.py +23 -0
  43. livingai-0.4.0/livingai/stores/postgres.py +188 -0
  44. livingai-0.4.0/livingai/stores/redis.py +149 -0
  45. livingai-0.4.0/pyproject.toml +49 -0
  46. livingai-0.4.0/tests/conftest.py +8 -0
  47. livingai-0.4.0/tests/test_adapter.py +138 -0
  48. livingai-0.4.0/tests/test_checkpoint.py +290 -0
  49. livingai-0.4.0/tests/test_graph.py +135 -0
  50. livingai-0.4.0/tests/test_more_adapters.py +96 -0
  51. livingai-0.4.0/tests/test_postgres_store.py +221 -0
  52. livingai-0.4.0/tests/test_recovery.py +202 -0
  53. livingai-0.4.0/tests/test_redis_store.py +145 -0
  54. livingai-0.4.0/tests/test_replay.py +219 -0
  55. livingai-0.4.0/tests/test_sqlite_store.py +150 -0
  56. livingai-0.4.0/tests/test_stress.py +137 -0
@@ -0,0 +1,54 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
16
+ defaults:
17
+ run:
18
+ working-directory: livingai_runtime
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+
27
+ - name: Install
28
+ run: |
29
+ python -m pip install --upgrade pip
30
+ pip install -e ".[dev]"
31
+ pip install mypy
32
+
33
+ - name: Type check (mypy --strict)
34
+ run: mypy --strict livingai
35
+
36
+ - name: Run tests with coverage
37
+ run: |
38
+ coverage run -m pytest -q
39
+ coverage report -m --include="*/livingai/*" --fail-under=100
40
+
41
+ benchmark:
42
+ runs-on: ubuntu-latest
43
+ defaults:
44
+ run:
45
+ working-directory: livingai_runtime
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ - uses: actions/setup-python@v5
49
+ with:
50
+ python-version: "3.12"
51
+ - name: Install
52
+ run: pip install -e .
53
+ - name: Run benchmarks
54
+ run: python benchmarks/benchmark.py --json
@@ -0,0 +1,29 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ defaults:
11
+ run:
12
+ working-directory: livingai_runtime
13
+ permissions:
14
+ id-token: write # trusted publishing (OIDC), no API token needed
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+
21
+ - name: Build
22
+ run: |
23
+ python -m pip install --upgrade pip build
24
+ python -m build
25
+
26
+ - name: Publish to PyPI
27
+ uses: pypa/gh-action-pypi-publish@release/v1
28
+ with:
29
+ packages-dir: livingai_runtime/dist
@@ -0,0 +1,17 @@
1
+ # Python caches / build artifacts
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ build/
6
+ dist/
7
+
8
+ # Test / coverage
9
+ .pytest_cache/
10
+ .coverage
11
+ .coverage.*
12
+ htmlcov/
13
+
14
+ # Local databases produced by SQLiteStore
15
+ *.db
16
+ *.sqlite
17
+ *.sqlite3
@@ -0,0 +1,99 @@
1
+ # Contributing to Living AI Runtime
2
+
3
+ Thanks for your interest in contributing! This is an infrastructure project, so
4
+ the bar is **correctness and reliability first**. Boring, well-tested code wins.
5
+
6
+ ## Development setup
7
+
8
+ ```bash
9
+ cd livingai_runtime
10
+ python -m venv .venv && . .venv/bin/activate # or your preferred env
11
+ pip install -e ".[dev]"
12
+ pip install mypy
13
+ ```
14
+
15
+ The core has **zero runtime dependencies** — only the standard library. Do not
16
+ add a runtime dependency to `livingai/` without discussion; it undermines the
17
+ zero-config promise.
18
+
19
+ ## Running the checks locally
20
+
21
+ Everything CI runs, you can run locally:
22
+
23
+ ```bash
24
+ # Tests
25
+ python -m pytest -q
26
+
27
+ # Coverage (must stay at 100%)
28
+ python -m coverage run -m pytest -q
29
+ python -m coverage report -m --include="*/livingai/*" --fail-under=100
30
+
31
+ # Strict type checking (must pass clean)
32
+ python -m mypy --strict livingai
33
+
34
+ # Benchmarks (sanity check performance)
35
+ python benchmarks/benchmark.py
36
+ ```
37
+
38
+ ## Standards
39
+
40
+ - **100% test coverage** on `livingai/`. New code needs new tests.
41
+ - **`mypy --strict` clean.** Full type hints; the package ships `py.typed`.
42
+ - **Async-first.** All I/O is `async`; provide sync wrappers only when needed.
43
+ - **Append-only.** Never mutate stored records; write a new node version.
44
+ - **No global state.** Engines and stores are independent instances.
45
+ - Tests use plain `asyncio.run(...)` (no `pytest-asyncio` dependency).
46
+
47
+ ## Adding a new framework adapter
48
+
49
+ Adapters are thin translation layers. Subclass
50
+ [`BaseAdapter`](livingai/adapters/_base.py) and set three attributes:
51
+
52
+ ```python
53
+ from livingai.adapters._base import BaseAdapter
54
+
55
+ class MyFrameworkAdapter(BaseAdapter):
56
+ framework = "myframework" # metadata["framework"] tag
57
+ node_key = "mf_node" # metadata key for the node name
58
+ tool_hints = ("tool", "call", ...) # names that mark side-effecting TOOL nodes
59
+ ```
60
+
61
+ Rules:
62
+
63
+ - **Do not import the framework package.** Consume events as plain data so the
64
+ adapter runs anywhere and the core stays dependency-free.
65
+ - Mark side-effecting steps as `TOOL` (auto non-idempotent) so recovery never
66
+ re-runs them. Allow explicit `idempotent=` overrides.
67
+ - Add tests mirroring [`tests/test_more_adapters.py`](tests/test_more_adapters.py).
68
+ - Export it from `livingai/adapters/__init__.py` and the top-level `livingai`.
69
+
70
+ ## Extending the storage protocol
71
+
72
+ New backends (Redis, Postgres, ...) implement the
73
+ [`CheckpointStore`](livingai/storage/__init__.py) protocol:
74
+
75
+ ```python
76
+ async def write(node) -> None # append-only
77
+ async def read(node_id) -> ExecutionNode | None
78
+ async def list_by_execution(execution_id) -> list[ExecutionNode]
79
+ async def get_latest_checkpoint(execution_id) -> ExecutionNode | None
80
+ ```
81
+
82
+ A new backend must pass the **same** test suite as `SQLiteStore`
83
+ ([`tests/test_sqlite_store.py`](tests/test_sqlite_store.py)) — parametrize the
84
+ store fixture rather than duplicating tests.
85
+
86
+ ## Pull request checklist
87
+
88
+ - [ ] Tests added/updated; `pytest` green.
89
+ - [ ] Coverage at 100%.
90
+ - [ ] `mypy --strict` clean.
91
+ - [ ] Public API changes reflected in `docs/` and the top-level `__all__`.
92
+ - [ ] No new runtime dependency in `livingai/` (unless discussed).
93
+ - [ ] Commit messages are clear and scoped.
94
+
95
+ ## Reporting bugs
96
+
97
+ Prefer a minimal reproduction using the in-memory `SQLiteStore()`. Include the
98
+ Python version and the exact steps. For anything involving data loss or
99
+ corruption, please flag it clearly — that is the highest-priority class of bug.
livingai-0.4.0/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+
17
+ Copyright 2026 Living AI
18
+
19
+ A full copy of the Apache 2.0 license text is available at the URL above.
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: livingai
3
+ Version: 0.4.0
4
+ Summary: Checkpoint, recovery, and replay infrastructure for AI agents.
5
+ Project-URL: Homepage, https://github.com/livingai/livingai
6
+ Project-URL: Repository, https://github.com/livingai/livingai
7
+ Project-URL: Documentation, https://github.com/livingai/livingai#readme
8
+ Author: Living AI
9
+ License: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: agents,checkpoint,llm,observability,recovery,replay
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.9
23
+ Provides-Extra: dev
24
+ Requires-Dist: coverage>=7; extra == 'dev'
25
+ Requires-Dist: fakeredis; extra == 'dev'
26
+ Requires-Dist: pytest-asyncio; extra == 'dev'
27
+ Requires-Dist: pytest>=7; extra == 'dev'
28
+ Requires-Dist: redis>=4.2; extra == 'dev'
29
+ Provides-Extra: postgres
30
+ Requires-Dist: asyncpg>=0.28; extra == 'postgres'
31
+ Provides-Extra: redis
32
+ Requires-Dist: redis>=4.2; extra == 'redis'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # Living AI
36
+
37
+ **Crash recovery, checkpointing, and replay for AI agents — one runtime that works across LangGraph, CrewAI, and OpenAI Agents.**
38
+
39
+ [![CI](https://img.shields.io/badge/CI-py3.9–3.12-brightgreen)](.github/workflows/ci.yml)
40
+ [![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)](#quality)
41
+ [![Types](https://img.shields.io/badge/mypy-strict-blue)](#quality)
42
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
43
+ [![Runtime deps](https://img.shields.io/badge/runtime%20deps-0-blueviolet)](pyproject.toml)
44
+
45
+ ---
46
+
47
+ ## The problem
48
+
49
+ AI agents crash. A process dies mid-workflow — after the LLM reasoned, after the
50
+ tool charged a card, three steps into a ten-step plan — and all of that work is
51
+ gone. You restart from zero, pay for the tokens again, and hope the tool doesn't
52
+ fire its side effects twice. And when something goes wrong, you can't replay what
53
+ happened to understand *why*.
54
+
55
+ ## The solution
56
+
57
+ Living AI records every step of an agent execution to an **append-only log**, so
58
+ any run can be:
59
+
60
+ - **Recovered** — resume from the last durable checkpoint after a crash, replaying
61
+ only the *idempotent* work and never re-running side-effecting tool calls
62
+ (payments, emails, API writes).
63
+ - **Replayed** — re-run a recorded execution for debugging, with `MOCK_TOOLS` mode
64
+ returning recorded tool responses so you can iterate on reasoning without real
65
+ API calls.
66
+ - **Audited** — inspect cost, latency, and the full node graph of any run.
67
+
68
+ ## Why it's different
69
+
70
+ Most observability and checkpointing tools lock you into one framework. Living AI
71
+ ships a **framework-agnostic core** with thin adapters for all three major agent
72
+ frameworks — the same recovery guarantees whether you use LangGraph, CrewAI, or
73
+ the OpenAI Agents SDK:
74
+
75
+ ```python
76
+ from livingai.adapters import LangGraphAdapter, CrewAIAdapter, OpenAIAgentsAdapter
77
+ ```
78
+
79
+ And the core has **zero runtime dependencies** — it's pure standard library.
80
+
81
+ ## Install
82
+
83
+ ```bash
84
+ pip install livingai
85
+ ```
86
+
87
+ ## Crash recovery in 18 lines
88
+
89
+ ```python
90
+ import asyncio
91
+ from livingai import (
92
+ CheckpointEngine, ExecutionNode, NodeType, RecoveryEngine, SQLiteStore, Status,
93
+ )
94
+
95
+
96
+ async def main():
97
+ engine = CheckpointEngine(SQLiteStore("agent.db"))
98
+
99
+ # Your agent checkpoints after an expensive step.
100
+ step = ExecutionNode(execution_id="run-1", type=NodeType.PROMPT,
101
+ status=Status.SUCCESS, output="plan ready")
102
+ await engine.save(step, state=b"...serialized agent state...")
103
+
104
+ # A tool with real side effects runs (e.g. charging a card).
105
+ charge = ExecutionNode(execution_id="run-1", type=NodeType.TOOL,
106
+ status=Status.SUCCESS, output={"receipt": "R-1"})
107
+ await engine.save(charge)
108
+
109
+ # 💥 The process crashes. On restart, recover from the durable log:
110
+ recovery = RecoveryEngine(CheckpointEngine(SQLiteStore("agent.db")))
111
+ plan = await recovery.plan("run-1")
112
+ print("resume from :", plan.resume_node_id) # last durable checkpoint
113
+ print("replay safe :", len(plan.replay_nodes)) # idempotent work to redo
114
+ print("skip effects:", len(plan.skipped_nodes)) # card is NOT re-charged
115
+
116
+
117
+ asyncio.run(main())
118
+ ```
119
+
120
+ ```
121
+ resume from : d482c31e-...
122
+ replay safe : 0
123
+ skip effects: 1 # the card is never charged twice
124
+ ```
125
+
126
+ The [`examples/`](examples/README.md) directory has five runnable demos (crash
127
+ recovery, `MOCK_TOOLS` debugging, cost tracking, and the LangGraph adapter) — none
128
+ require an LLM or network.
129
+
130
+ ## Performance
131
+
132
+ Checkpointing is on the hot path of every agent step, so it has to be fast. It is.
133
+
134
+ | Metric | Result | Notes |
135
+ | --- | --- | --- |
136
+ | Checkpoint write (p50) | **~0.3 ms** | 50 KB compressed state blob |
137
+ | Checkpoint write (p95) | **~0.8 ms** | |
138
+ | Checkpoint write (p99) | **~1 ms** | ~50× under the 50 ms budget |
139
+ | Hot recovery read | **~4 µs** | vs ~190 µs cold — ~40× faster |
140
+ | Compression | **60–99%** | typical agent state (histories, docs) |
141
+
142
+ Measured on a dev laptop with the **default 50 ms overhead budget**, 50 KB blobs,
143
+ 2000 writes — the same configuration you get out of the box. Reproduce with
144
+ `python benchmarks/benchmark.py`.
145
+
146
+ The overhead budget is enforced *in code*: a checkpoint write that would exceed it
147
+ is dropped and logged as *missed* rather than ever blocking your agent thread.
148
+
149
+ ## How it works
150
+
151
+ ```
152
+ ExecutionNode ──► CheckpointStore (Tier 2: durable, append-only)
153
+ â–² â–²
154
+ │ │
155
+ Adapters CheckpointEngine ──► HotCache (Tier 1: LRU + TTL)
156
+ (LangGraph/ │
157
+ CrewAI/ RecoveryEngine ──► RecoveryPlan (replay vs. skip)
158
+ OpenAI) ReplaySession ──► FULL / FROM_NODE / MOCK_TOOLS / COUNTERFACTUAL
159
+ ```
160
+
161
+ Every execution is a DAG of `ExecutionNode` records. The log is never mutated,
162
+ only appended to — so any point in time can be reconstructed deterministically.
163
+ `TOOL` nodes default to **non-idempotent**, which is how recovery knows never to
164
+ re-run side effects. See [docs/concepts.md](docs/concepts.md) for the full model.
165
+
166
+ ## CLI
167
+
168
+ ```bash
169
+ livingai list --db agent.db # execution ids
170
+ livingai show run-1 --db agent.db # the node graph
171
+ livingai replay run-1 --db agent.db --mode MOCK_TOOLS
172
+ ```
173
+
174
+ ## Documentation
175
+
176
+ [Quickstart](docs/quickstart.md) ·
177
+ [Concepts](docs/concepts.md) ·
178
+ [Checkpointing](docs/checkpointing.md) ·
179
+ [Recovery](docs/recovery.md) ·
180
+ [Replay](docs/replay.md) ·
181
+ [CLI](docs/cli.md) ·
182
+ [Adapters](docs/adapters.md) ·
183
+ [Migrating from other checkpointers](docs/migration.md) ·
184
+ [API Reference](docs/api-reference.md)
185
+
186
+ ## Quality
187
+
188
+ - **108 tests, 100% line coverage** — including crash-simulation and stress tests
189
+ (10k-node graphs, concurrent writers, write contention).
190
+ - **`mypy --strict` clean** across all source files; ships `py.typed`.
191
+ - **CI matrix** on Python 3.9–3.12 with a 100%-coverage gate.
192
+
193
+ ```bash
194
+ pip install -e ".[dev]"
195
+ python -m pytest -q # run the suite
196
+ mypy --strict livingai # type check
197
+ python benchmarks/benchmark.py # reproduce the numbers above
198
+ ```
199
+
200
+ ## Design principles
201
+
202
+ | Principle | How |
203
+ | --- | --- |
204
+ | Zero-dependency core | Standard library only (`sqlite3`, `asyncio`, `zlib`, `dataclasses`, `uuid`). |
205
+ | Append-only log | Every write inserts a new row; nothing is mutated or deleted. |
206
+ | Framework-agnostic | No framework imports in the core; framework data lives in `metadata`. |
207
+ | Async-first I/O | Storage is `async`; sync SQLite runs off the event loop. |
208
+ | Bounded overhead | Cold writes run under `asyncio.wait_for`; overruns are dropped, never blocking the agent. |
209
+
210
+ ## Roadmap
211
+
212
+ Shipped: core data model, checkpoint engine, recovery engine, replay engine, CLI,
213
+ LangGraph / CrewAI / OpenAI adapters, benchmarks, docs, Redis store, PostgreSQL store.
214
+
215
+ **Optional backends** — swap the default SQLite store for Redis or PostgreSQL
216
+ with a single import (no core changes required):
217
+
218
+ ```bash
219
+ pip install "livingai[redis]" # hot Redis store
220
+ pip install "livingai[postgres]" # PostgreSQL cold store
221
+ ```
222
+
223
+ ```python
224
+ from livingai.stores.redis import RedisStore
225
+ from livingai.stores.postgres import PostgresStore
226
+
227
+ # Redis
228
+ engine = CheckpointEngine(RedisStore(url="redis://localhost:6379"))
229
+
230
+ # PostgreSQL
231
+ store = PostgresStore(dsn="postgresql://user:pass@localhost/livingai")
232
+ await store.initialize() # creates tables once
233
+ engine = CheckpointEngine(store)
234
+ ```
235
+
236
+ A **Docker Compose** dev stack (Postgres + Redis) ships with the repo:
237
+
238
+ ```bash
239
+ docker compose up -d # starts postgres:5432 + redis:6379
240
+ ```
241
+
242
+ Next: FastAPI cloud backend (5 endpoints), cloud client (`CloudSync`), web replay dashboard.
243
+
244
+ ## Contributing
245
+
246
+ See [CONTRIBUTING.md](CONTRIBUTING.md) — development setup, running tests, code
247
+ style, and how to add a new framework adapter or storage backend.
248
+
249
+ ## License
250
+
251
+ Apache-2.0 — see [LICENSE](LICENSE).
@@ -0,0 +1,217 @@
1
+ # Living AI
2
+
3
+ **Crash recovery, checkpointing, and replay for AI agents — one runtime that works across LangGraph, CrewAI, and OpenAI Agents.**
4
+
5
+ [![CI](https://img.shields.io/badge/CI-py3.9–3.12-brightgreen)](.github/workflows/ci.yml)
6
+ [![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)](#quality)
7
+ [![Types](https://img.shields.io/badge/mypy-strict-blue)](#quality)
8
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
9
+ [![Runtime deps](https://img.shields.io/badge/runtime%20deps-0-blueviolet)](pyproject.toml)
10
+
11
+ ---
12
+
13
+ ## The problem
14
+
15
+ AI agents crash. A process dies mid-workflow — after the LLM reasoned, after the
16
+ tool charged a card, three steps into a ten-step plan — and all of that work is
17
+ gone. You restart from zero, pay for the tokens again, and hope the tool doesn't
18
+ fire its side effects twice. And when something goes wrong, you can't replay what
19
+ happened to understand *why*.
20
+
21
+ ## The solution
22
+
23
+ Living AI records every step of an agent execution to an **append-only log**, so
24
+ any run can be:
25
+
26
+ - **Recovered** — resume from the last durable checkpoint after a crash, replaying
27
+ only the *idempotent* work and never re-running side-effecting tool calls
28
+ (payments, emails, API writes).
29
+ - **Replayed** — re-run a recorded execution for debugging, with `MOCK_TOOLS` mode
30
+ returning recorded tool responses so you can iterate on reasoning without real
31
+ API calls.
32
+ - **Audited** — inspect cost, latency, and the full node graph of any run.
33
+
34
+ ## Why it's different
35
+
36
+ Most observability and checkpointing tools lock you into one framework. Living AI
37
+ ships a **framework-agnostic core** with thin adapters for all three major agent
38
+ frameworks — the same recovery guarantees whether you use LangGraph, CrewAI, or
39
+ the OpenAI Agents SDK:
40
+
41
+ ```python
42
+ from livingai.adapters import LangGraphAdapter, CrewAIAdapter, OpenAIAgentsAdapter
43
+ ```
44
+
45
+ And the core has **zero runtime dependencies** — it's pure standard library.
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ pip install livingai
51
+ ```
52
+
53
+ ## Crash recovery in 18 lines
54
+
55
+ ```python
56
+ import asyncio
57
+ from livingai import (
58
+ CheckpointEngine, ExecutionNode, NodeType, RecoveryEngine, SQLiteStore, Status,
59
+ )
60
+
61
+
62
+ async def main():
63
+ engine = CheckpointEngine(SQLiteStore("agent.db"))
64
+
65
+ # Your agent checkpoints after an expensive step.
66
+ step = ExecutionNode(execution_id="run-1", type=NodeType.PROMPT,
67
+ status=Status.SUCCESS, output="plan ready")
68
+ await engine.save(step, state=b"...serialized agent state...")
69
+
70
+ # A tool with real side effects runs (e.g. charging a card).
71
+ charge = ExecutionNode(execution_id="run-1", type=NodeType.TOOL,
72
+ status=Status.SUCCESS, output={"receipt": "R-1"})
73
+ await engine.save(charge)
74
+
75
+ # 💥 The process crashes. On restart, recover from the durable log:
76
+ recovery = RecoveryEngine(CheckpointEngine(SQLiteStore("agent.db")))
77
+ plan = await recovery.plan("run-1")
78
+ print("resume from :", plan.resume_node_id) # last durable checkpoint
79
+ print("replay safe :", len(plan.replay_nodes)) # idempotent work to redo
80
+ print("skip effects:", len(plan.skipped_nodes)) # card is NOT re-charged
81
+
82
+
83
+ asyncio.run(main())
84
+ ```
85
+
86
+ ```
87
+ resume from : d482c31e-...
88
+ replay safe : 0
89
+ skip effects: 1 # the card is never charged twice
90
+ ```
91
+
92
+ The [`examples/`](examples/README.md) directory has five runnable demos (crash
93
+ recovery, `MOCK_TOOLS` debugging, cost tracking, and the LangGraph adapter) — none
94
+ require an LLM or network.
95
+
96
+ ## Performance
97
+
98
+ Checkpointing is on the hot path of every agent step, so it has to be fast. It is.
99
+
100
+ | Metric | Result | Notes |
101
+ | --- | --- | --- |
102
+ | Checkpoint write (p50) | **~0.3 ms** | 50 KB compressed state blob |
103
+ | Checkpoint write (p95) | **~0.8 ms** | |
104
+ | Checkpoint write (p99) | **~1 ms** | ~50× under the 50 ms budget |
105
+ | Hot recovery read | **~4 µs** | vs ~190 µs cold — ~40× faster |
106
+ | Compression | **60–99%** | typical agent state (histories, docs) |
107
+
108
+ Measured on a dev laptop with the **default 50 ms overhead budget**, 50 KB blobs,
109
+ 2000 writes — the same configuration you get out of the box. Reproduce with
110
+ `python benchmarks/benchmark.py`.
111
+
112
+ The overhead budget is enforced *in code*: a checkpoint write that would exceed it
113
+ is dropped and logged as *missed* rather than ever blocking your agent thread.
114
+
115
+ ## How it works
116
+
117
+ ```
118
+ ExecutionNode ──► CheckpointStore (Tier 2: durable, append-only)
119
+ â–² â–²
120
+ │ │
121
+ Adapters CheckpointEngine ──► HotCache (Tier 1: LRU + TTL)
122
+ (LangGraph/ │
123
+ CrewAI/ RecoveryEngine ──► RecoveryPlan (replay vs. skip)
124
+ OpenAI) ReplaySession ──► FULL / FROM_NODE / MOCK_TOOLS / COUNTERFACTUAL
125
+ ```
126
+
127
+ Every execution is a DAG of `ExecutionNode` records. The log is never mutated,
128
+ only appended to — so any point in time can be reconstructed deterministically.
129
+ `TOOL` nodes default to **non-idempotent**, which is how recovery knows never to
130
+ re-run side effects. See [docs/concepts.md](docs/concepts.md) for the full model.
131
+
132
+ ## CLI
133
+
134
+ ```bash
135
+ livingai list --db agent.db # execution ids
136
+ livingai show run-1 --db agent.db # the node graph
137
+ livingai replay run-1 --db agent.db --mode MOCK_TOOLS
138
+ ```
139
+
140
+ ## Documentation
141
+
142
+ [Quickstart](docs/quickstart.md) ·
143
+ [Concepts](docs/concepts.md) ·
144
+ [Checkpointing](docs/checkpointing.md) ·
145
+ [Recovery](docs/recovery.md) ·
146
+ [Replay](docs/replay.md) ·
147
+ [CLI](docs/cli.md) ·
148
+ [Adapters](docs/adapters.md) ·
149
+ [Migrating from other checkpointers](docs/migration.md) ·
150
+ [API Reference](docs/api-reference.md)
151
+
152
+ ## Quality
153
+
154
+ - **108 tests, 100% line coverage** — including crash-simulation and stress tests
155
+ (10k-node graphs, concurrent writers, write contention).
156
+ - **`mypy --strict` clean** across all source files; ships `py.typed`.
157
+ - **CI matrix** on Python 3.9–3.12 with a 100%-coverage gate.
158
+
159
+ ```bash
160
+ pip install -e ".[dev]"
161
+ python -m pytest -q # run the suite
162
+ mypy --strict livingai # type check
163
+ python benchmarks/benchmark.py # reproduce the numbers above
164
+ ```
165
+
166
+ ## Design principles
167
+
168
+ | Principle | How |
169
+ | --- | --- |
170
+ | Zero-dependency core | Standard library only (`sqlite3`, `asyncio`, `zlib`, `dataclasses`, `uuid`). |
171
+ | Append-only log | Every write inserts a new row; nothing is mutated or deleted. |
172
+ | Framework-agnostic | No framework imports in the core; framework data lives in `metadata`. |
173
+ | Async-first I/O | Storage is `async`; sync SQLite runs off the event loop. |
174
+ | Bounded overhead | Cold writes run under `asyncio.wait_for`; overruns are dropped, never blocking the agent. |
175
+
176
+ ## Roadmap
177
+
178
+ Shipped: core data model, checkpoint engine, recovery engine, replay engine, CLI,
179
+ LangGraph / CrewAI / OpenAI adapters, benchmarks, docs, Redis store, PostgreSQL store.
180
+
181
+ **Optional backends** — swap the default SQLite store for Redis or PostgreSQL
182
+ with a single import (no core changes required):
183
+
184
+ ```bash
185
+ pip install "livingai[redis]" # hot Redis store
186
+ pip install "livingai[postgres]" # PostgreSQL cold store
187
+ ```
188
+
189
+ ```python
190
+ from livingai.stores.redis import RedisStore
191
+ from livingai.stores.postgres import PostgresStore
192
+
193
+ # Redis
194
+ engine = CheckpointEngine(RedisStore(url="redis://localhost:6379"))
195
+
196
+ # PostgreSQL
197
+ store = PostgresStore(dsn="postgresql://user:pass@localhost/livingai")
198
+ await store.initialize() # creates tables once
199
+ engine = CheckpointEngine(store)
200
+ ```
201
+
202
+ A **Docker Compose** dev stack (Postgres + Redis) ships with the repo:
203
+
204
+ ```bash
205
+ docker compose up -d # starts postgres:5432 + redis:6379
206
+ ```
207
+
208
+ Next: FastAPI cloud backend (5 endpoints), cloud client (`CloudSync`), web replay dashboard.
209
+
210
+ ## Contributing
211
+
212
+ See [CONTRIBUTING.md](CONTRIBUTING.md) — development setup, running tests, code
213
+ style, and how to add a new framework adapter or storage backend.
214
+
215
+ ## License
216
+
217
+ Apache-2.0 — see [LICENSE](LICENSE).