revive-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- revive_sdk-0.1.0/.gitignore +33 -0
- revive_sdk-0.1.0/PKG-INFO +99 -0
- revive_sdk-0.1.0/README.md +70 -0
- revive_sdk-0.1.0/benchmarks/live_graph_killer_demo.py +453 -0
- revive_sdk-0.1.0/benchmarks/live_langgraph_nango.py +269 -0
- revive_sdk-0.1.0/benchmarks/revivebench.py +374 -0
- revive_sdk-0.1.0/examples/langgraph_agent.py +128 -0
- revive_sdk-0.1.0/examples/mock_idp.py +119 -0
- revive_sdk-0.1.0/examples/nightly_briefing.py +158 -0
- revive_sdk-0.1.0/examples/parley_approval.py +70 -0
- revive_sdk-0.1.0/pyproject.toml +35 -0
- revive_sdk-0.1.0/revive/__init__.py +45 -0
- revive_sdk-0.1.0/revive/adapters/__init__.py +5 -0
- revive_sdk-0.1.0/revive/adapters/anthropic_tools.py +60 -0
- revive_sdk-0.1.0/revive/adapters/langgraph.py +89 -0
- revive_sdk-0.1.0/revive/adapters/openai_agents.py +63 -0
- revive_sdk-0.1.0/revive/adapters/temporal.py +67 -0
- revive_sdk-0.1.0/revive/checkpoint.py +199 -0
- revive_sdk-0.1.0/revive/classifier.py +198 -0
- revive_sdk-0.1.0/revive/client.py +145 -0
- revive_sdk-0.1.0/revive/engine.py +294 -0
- revive_sdk-0.1.0/revive/postgres.py +153 -0
- revive_sdk-0.1.0/revive/providers.py +89 -0
- revive_sdk-0.1.0/revive/rendezvous.py +113 -0
- revive_sdk-0.1.0/revive/reporter.py +60 -0
- revive_sdk-0.1.0/tests/test_classifier.py +61 -0
- revive_sdk-0.1.0/tests/test_recovery.py +151 -0
- revive_sdk-0.1.0/tests/test_temporal_adapter.py +43 -0
- revive_sdk-0.1.0/tests/test_webhook.py +48 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/node_modules
|
|
2
|
+
/.next
|
|
3
|
+
/.next-dev
|
|
4
|
+
/out
|
|
5
|
+
/build
|
|
6
|
+
*.tsbuildinfo
|
|
7
|
+
next-env.d.ts
|
|
8
|
+
.DS_Store
|
|
9
|
+
npm-debug.log*
|
|
10
|
+
.env*
|
|
11
|
+
!.env.example
|
|
12
|
+
|
|
13
|
+
# python sidecar
|
|
14
|
+
.venv/
|
|
15
|
+
__pycache__/
|
|
16
|
+
*.pyc
|
|
17
|
+
sidecar/**/*.db
|
|
18
|
+
revive.db
|
|
19
|
+
.revive/
|
|
20
|
+
|
|
21
|
+
# deck / video build artifacts (keep the .pptx and .mp4, ignore the rest)
|
|
22
|
+
deck/node_modules/
|
|
23
|
+
deck/*.pdf
|
|
24
|
+
deck/slide-*.jpg
|
|
25
|
+
video/frames/
|
|
26
|
+
video/check_*.jpg
|
|
27
|
+
|
|
28
|
+
video/product_frames/
|
|
29
|
+
video/prev_*.jpg
|
|
30
|
+
video/vcheck*.jpg
|
|
31
|
+
video/pconcat.txt
|
|
32
|
+
video/total.txt
|
|
33
|
+
.vercel
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: revive-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Agent recovery control plane SDK: protect agent actions so a failed credential parks the run and resumes exactly-once, without duplicating side effects. LangGraph, OpenAI Agents, and Anthropic tool-use adapters included.
|
|
5
|
+
Project-URL: Homepage, https://revivelabs.app
|
|
6
|
+
Project-URL: Repository, https://github.com/FlyingPotato437/revive
|
|
7
|
+
Project-URL: Issues, https://github.com/FlyingPotato437/revive/issues
|
|
8
|
+
Author: Revive Labs
|
|
9
|
+
License: Apache-2.0
|
|
10
|
+
Keywords: agents,ai-agents,durable-execution,langgraph,mcp,oauth,temporal,token-refresh
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: langgraph>=0.2; extra == 'dev'
|
|
18
|
+
Provides-Extra: hosted
|
|
19
|
+
Requires-Dist: psycopg[binary]>=3.2; extra == 'hosted'
|
|
20
|
+
Requires-Dist: temporalio>=1.7; extra == 'hosted'
|
|
21
|
+
Provides-Extra: langgraph
|
|
22
|
+
Requires-Dist: langgraph-checkpoint-sqlite<4,>=3.1; extra == 'langgraph'
|
|
23
|
+
Requires-Dist: langgraph<2,>=1.2; extra == 'langgraph'
|
|
24
|
+
Provides-Extra: postgres
|
|
25
|
+
Requires-Dist: psycopg[binary]>=3.2; extra == 'postgres'
|
|
26
|
+
Provides-Extra: temporal
|
|
27
|
+
Requires-Dist: temporalio>=1.7; extra == 'temporal'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# revive-sdk
|
|
31
|
+
|
|
32
|
+
Agent recovery control plane SDK for Python. Wrap an agent's real-world action
|
|
33
|
+
so that when its credential fails, the run parks, the right account owner
|
|
34
|
+
reconnects, and the run resumes — without ever duplicating a side effect that
|
|
35
|
+
already committed.
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install revive-sdk
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Protect one action
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from revive import ReviveClient, ReviveParkedError
|
|
45
|
+
|
|
46
|
+
revive = ReviveClient("https://revivelabs.app", api_key="rv_live_…")
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
result = revive.protect_action(
|
|
50
|
+
run_id=run_id,
|
|
51
|
+
connection_id="conn_microsoft_ops",
|
|
52
|
+
action_key="send_followup_email",
|
|
53
|
+
execute=lambda: graph_send_mail(message), # your real side effect
|
|
54
|
+
)
|
|
55
|
+
# executed exactly once; a retry returns the stored result, never re-sends
|
|
56
|
+
except ReviveParkedError as parked:
|
|
57
|
+
# the credential is dead — send parked.parked.recovery_url to the account owner
|
|
58
|
+
notify(parked.parked.recovery_url)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
`protect_action` registers the action, gates execution on the ledger verdict
|
|
62
|
+
(`safe_to_execute` / `already_committed` / `reconcile_first`), records the
|
|
63
|
+
attempt, and opens a recovery case when the credential is rejected. Idempotency
|
|
64
|
+
keys are derived from `run_id + action_key` unless you pass `idem_key`.
|
|
65
|
+
|
|
66
|
+
## Framework adapters
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
# OpenAI Agents SDK — protect a function tool
|
|
70
|
+
from revive.adapters.openai_agents import revive_tool
|
|
71
|
+
|
|
72
|
+
@revive_tool(revive, connection_id="conn_microsoft_ops")
|
|
73
|
+
def send_followup_email(run_id: str, to: str, subject: str) -> dict:
|
|
74
|
+
...
|
|
75
|
+
|
|
76
|
+
# Anthropic tool use — guard the tool-execution side of the loop
|
|
77
|
+
from revive.adapters.anthropic_tools import ReviveToolGuard
|
|
78
|
+
guard = ReviveToolGuard(revive, connection_id="conn_microsoft_ops",
|
|
79
|
+
protected={"send_email", "create_ticket"})
|
|
80
|
+
|
|
81
|
+
# LangGraph — see revive.adapters.langgraph (install with: pip install "revive-sdk[langgraph]")
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## What Revive is not
|
|
85
|
+
|
|
86
|
+
Revive does not take custody of provider tokens — that stays with your
|
|
87
|
+
credential vault (Nango, Auth0, Entra). It coordinates the in-flight run
|
|
88
|
+
around a credential change: park, verify identity, fence stale workers,
|
|
89
|
+
reconcile, resume.
|
|
90
|
+
|
|
91
|
+
## Self-hosting (optional)
|
|
92
|
+
|
|
93
|
+
The package also ships the local park/resume engine (`Engine`,
|
|
94
|
+
`CheckpointStore`, `PostgresCheckpointStore`) for running the recovery loop
|
|
95
|
+
in-process instead of against the hosted control plane. See the repository.
|
|
96
|
+
|
|
97
|
+
Extras: `revive-sdk[langgraph]`, `revive-sdk[temporal]`, `revive-sdk[postgres]`.
|
|
98
|
+
|
|
99
|
+
License: Apache-2.0 · https://revivelabs.app
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# revive-sdk
|
|
2
|
+
|
|
3
|
+
Agent recovery control plane SDK for Python. Wrap an agent's real-world action
|
|
4
|
+
so that when its credential fails, the run parks, the right account owner
|
|
5
|
+
reconnects, and the run resumes — without ever duplicating a side effect that
|
|
6
|
+
already committed.
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install revive-sdk
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Protect one action
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from revive import ReviveClient, ReviveParkedError
|
|
16
|
+
|
|
17
|
+
revive = ReviveClient("https://revivelabs.app", api_key="rv_live_…")
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
result = revive.protect_action(
|
|
21
|
+
run_id=run_id,
|
|
22
|
+
connection_id="conn_microsoft_ops",
|
|
23
|
+
action_key="send_followup_email",
|
|
24
|
+
execute=lambda: graph_send_mail(message), # your real side effect
|
|
25
|
+
)
|
|
26
|
+
# executed exactly once; a retry returns the stored result, never re-sends
|
|
27
|
+
except ReviveParkedError as parked:
|
|
28
|
+
# the credential is dead — send parked.parked.recovery_url to the account owner
|
|
29
|
+
notify(parked.parked.recovery_url)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
`protect_action` registers the action, gates execution on the ledger verdict
|
|
33
|
+
(`safe_to_execute` / `already_committed` / `reconcile_first`), records the
|
|
34
|
+
attempt, and opens a recovery case when the credential is rejected. Idempotency
|
|
35
|
+
keys are derived from `run_id + action_key` unless you pass `idem_key`.
|
|
36
|
+
|
|
37
|
+
## Framework adapters
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
# OpenAI Agents SDK — protect a function tool
|
|
41
|
+
from revive.adapters.openai_agents import revive_tool
|
|
42
|
+
|
|
43
|
+
@revive_tool(revive, connection_id="conn_microsoft_ops")
|
|
44
|
+
def send_followup_email(run_id: str, to: str, subject: str) -> dict:
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
# Anthropic tool use — guard the tool-execution side of the loop
|
|
48
|
+
from revive.adapters.anthropic_tools import ReviveToolGuard
|
|
49
|
+
guard = ReviveToolGuard(revive, connection_id="conn_microsoft_ops",
|
|
50
|
+
protected={"send_email", "create_ticket"})
|
|
51
|
+
|
|
52
|
+
# LangGraph — see revive.adapters.langgraph (install with: pip install "revive-sdk[langgraph]")
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## What Revive is not
|
|
56
|
+
|
|
57
|
+
Revive does not take custody of provider tokens — that stays with your
|
|
58
|
+
credential vault (Nango, Auth0, Entra). It coordinates the in-flight run
|
|
59
|
+
around a credential change: park, verify identity, fence stale workers,
|
|
60
|
+
reconcile, resume.
|
|
61
|
+
|
|
62
|
+
## Self-hosting (optional)
|
|
63
|
+
|
|
64
|
+
The package also ships the local park/resume engine (`Engine`,
|
|
65
|
+
`CheckpointStore`, `PostgresCheckpointStore`) for running the recovery loop
|
|
66
|
+
in-process instead of against the hosted control plane. See the repository.
|
|
67
|
+
|
|
68
|
+
Extras: `revive-sdk[langgraph]`, `revive-sdk[temporal]`, `revive-sdk[postgres]`.
|
|
69
|
+
|
|
70
|
+
License: Apache-2.0 · https://revivelabs.app
|
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Interactive Revive demo with LangGraph, Nango, and Microsoft Graph.
|
|
3
|
+
|
|
4
|
+
The demo performs real reads, parks one durable LangGraph thread, opens a real
|
|
5
|
+
Revive recovery case, waits for Nango reauthorization, then receives Revive's
|
|
6
|
+
signed runtime callback. After resume it creates and sends one message, injects
|
|
7
|
+
transport loss after Graph returns 202, reconciles Sent Items, and proves that
|
|
8
|
+
the send endpoint was called once.
|
|
9
|
+
|
|
10
|
+
This sends a temporary message to REVIVE_DEMO_RECIPIENT. Use an account you own.
|
|
11
|
+
The script deletes matching Sent Items and Inbox copies after verification.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import hashlib
|
|
16
|
+
import hmac
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import sqlite3
|
|
20
|
+
import sys
|
|
21
|
+
import threading
|
|
22
|
+
import time
|
|
23
|
+
import urllib.error
|
|
24
|
+
import urllib.parse
|
|
25
|
+
import urllib.request
|
|
26
|
+
import uuid
|
|
27
|
+
from datetime import datetime, timedelta, timezone
|
|
28
|
+
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import NotRequired, TypedDict
|
|
31
|
+
|
|
32
|
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
|
33
|
+
from langgraph.graph import END, START, StateGraph
|
|
34
|
+
from langgraph.types import Command, interrupt
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
ROOT = Path(__file__).resolve().parents[2]
|
|
38
|
+
STATE_DIR = ROOT / ".revive"
|
|
39
|
+
CHECKPOINT_DB = STATE_DIR / "langgraph-killer-demo.db"
|
|
40
|
+
LEDGER_DB = STATE_DIR / "langgraph-killer-ledger.db"
|
|
41
|
+
RESULT_FILE = ROOT / "benchmarks" / "results" / "revive-killer-demo-live.json"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_env() -> None:
|
|
45
|
+
path = ROOT / ".env.local"
|
|
46
|
+
if not path.exists():
|
|
47
|
+
return
|
|
48
|
+
for raw in path.read_text().splitlines():
|
|
49
|
+
line = raw.strip()
|
|
50
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
51
|
+
continue
|
|
52
|
+
key, value = line.split("=", 1)
|
|
53
|
+
os.environ.setdefault(key, value.strip().strip('"').strip("'"))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
load_env()
|
|
57
|
+
NANGO_SECRET = os.environ.get("NANGO_SECRET_KEY", "")
|
|
58
|
+
CONNECTION_ID = os.environ.get("NANGO_CERT_CONNECTION_ID", "")
|
|
59
|
+
INTEGRATION_ID = os.environ.get("NANGO_CERT_INTEGRATION_ID", "microsoft-tenant-specific")
|
|
60
|
+
NANGO_URL = os.environ.get("NANGO_BASE_URL", "https://api.nango.dev").rstrip("/")
|
|
61
|
+
REVIVE_URL = os.environ.get("REVIVE_BASE_URL", "http://localhost:3000").rstrip("/")
|
|
62
|
+
REVIVE_API_KEY = os.environ.get("REVIVE_API_KEY", "")
|
|
63
|
+
WORKER_SECRET = os.environ.get("REVIVE_WORKER_SECRET", "")
|
|
64
|
+
CALLBACK_SECRET = os.environ.get("REVIVE_RUNTIME_RESUME_SECRET", "")
|
|
65
|
+
CALLBACK_HOST = os.environ.get("REVIVE_RUNTIME_CALLBACK_HOST", "127.0.0.1")
|
|
66
|
+
CALLBACK_PORT = int(os.environ.get("REVIVE_RUNTIME_CALLBACK_PORT", "8788"))
|
|
67
|
+
RECIPIENT = os.environ.get("REVIVE_DEMO_RECIPIENT", "")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DemoState(TypedDict):
|
|
71
|
+
run_id: str
|
|
72
|
+
checkpoint_ref: str
|
|
73
|
+
subject: str
|
|
74
|
+
recipient: str
|
|
75
|
+
account_id: NotRequired[str]
|
|
76
|
+
calendar_events: NotRequired[int]
|
|
77
|
+
connection_id: NotRequired[str]
|
|
78
|
+
lease_generation: NotRequired[int]
|
|
79
|
+
action_id: NotRequired[str]
|
|
80
|
+
remote_id: NotRequired[str]
|
|
81
|
+
status: NotRequired[str]
|
|
82
|
+
reconciled: NotRequired[bool]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SimulatedTransportLoss(RuntimeError):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def proxy(method: str, path: str, body: dict | None = None) -> tuple[int, dict | None]:
|
|
90
|
+
request = urllib.request.Request(
|
|
91
|
+
f"{NANGO_URL}/proxy{path}",
|
|
92
|
+
method=method,
|
|
93
|
+
headers={
|
|
94
|
+
"Authorization": f"Bearer {NANGO_SECRET}",
|
|
95
|
+
"Provider-Config-Key": INTEGRATION_ID,
|
|
96
|
+
"Connection-Id": CONNECTION_ID,
|
|
97
|
+
"Content-Type": "application/json",
|
|
98
|
+
},
|
|
99
|
+
data=json.dumps(body).encode() if body is not None else None,
|
|
100
|
+
)
|
|
101
|
+
try:
|
|
102
|
+
with urllib.request.urlopen(request, timeout=25) as response:
|
|
103
|
+
raw = response.read()
|
|
104
|
+
return response.status, json.loads(raw) if raw else None
|
|
105
|
+
except urllib.error.HTTPError as error:
|
|
106
|
+
raw = error.read()
|
|
107
|
+
payload = json.loads(raw) if raw else None
|
|
108
|
+
raise RuntimeError(f"Nango proxy {method} {path} failed with {error.code}: {payload}") from error
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def revive(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
|
|
112
|
+
request = urllib.request.Request(
|
|
113
|
+
f"{REVIVE_URL}{path}",
|
|
114
|
+
method=method,
|
|
115
|
+
headers={"Authorization": f"Bearer {REVIVE_API_KEY}", "Content-Type": "application/json"},
|
|
116
|
+
data=json.dumps(body).encode() if body is not None else None,
|
|
117
|
+
)
|
|
118
|
+
try:
|
|
119
|
+
with urllib.request.urlopen(request, timeout=25) as response:
|
|
120
|
+
raw = response.read()
|
|
121
|
+
return response.status, json.loads(raw) if raw else {}
|
|
122
|
+
except urllib.error.HTTPError as error:
|
|
123
|
+
raw = error.read()
|
|
124
|
+
payload = json.loads(raw) if raw else {}
|
|
125
|
+
return error.code, payload
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def demo_ledger() -> sqlite3.Connection:
|
|
129
|
+
db = sqlite3.connect(LEDGER_DB)
|
|
130
|
+
db.execute(
|
|
131
|
+
"""create table if not exists killer_actions (
|
|
132
|
+
action_key text primary key,
|
|
133
|
+
send_calls integer not null default 0,
|
|
134
|
+
draft_id text,
|
|
135
|
+
state text not null,
|
|
136
|
+
updated_at real not null
|
|
137
|
+
)"""
|
|
138
|
+
)
|
|
139
|
+
return db
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def inspect_identity(state: DemoState) -> dict:
|
|
143
|
+
status, profile = proxy("GET", "/v1.0/me?$select=id,userPrincipalName,mail")
|
|
144
|
+
if status != 200 or not isinstance(profile, dict) or not profile.get("id"):
|
|
145
|
+
raise RuntimeError("Graph identity read failed")
|
|
146
|
+
return {"account_id": profile.get("mail") or profile.get("userPrincipalName") or profile["id"]}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def inspect_calendar(_: DemoState) -> dict:
|
|
150
|
+
now = datetime.now(timezone.utc)
|
|
151
|
+
query = urllib.parse.urlencode({
|
|
152
|
+
"startDateTime": now.isoformat(),
|
|
153
|
+
"endDateTime": (now + timedelta(days=1)).isoformat(),
|
|
154
|
+
"$select": "id",
|
|
155
|
+
"$top": "10",
|
|
156
|
+
})
|
|
157
|
+
status, payload = proxy("GET", f"/v1.0/me/calendarView?{query}")
|
|
158
|
+
if status != 200 or not isinstance(payload, dict):
|
|
159
|
+
raise RuntimeError("Graph calendar read failed")
|
|
160
|
+
return {"calendar_events": len(payload.get("value", []))}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def wait_for_sent_reconciliation(action_id: str, subject: str) -> dict:
|
|
164
|
+
for _ in range(20):
|
|
165
|
+
status, result = revive("POST", f"/v1/actions/{action_id}/reconcile-graph", {
|
|
166
|
+
"integrationId": INTEGRATION_ID,
|
|
167
|
+
"subject": subject,
|
|
168
|
+
"windowMinutes": 30,
|
|
169
|
+
})
|
|
170
|
+
if status == 200 and result.get("outcome") == "committed":
|
|
171
|
+
return result
|
|
172
|
+
if status != 200 or result.get("outcome") == "unknown":
|
|
173
|
+
raise RuntimeError(f"Graph reconciliation could not prove the send: {result}")
|
|
174
|
+
time.sleep(1.5)
|
|
175
|
+
raise RuntimeError("sent message did not become visible inside the reconciliation window")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def protected_send(state: DemoState) -> dict:
|
|
179
|
+
recovery = interrupt({
|
|
180
|
+
"kind": "credential_recovery",
|
|
181
|
+
"provider": "microsoft",
|
|
182
|
+
"reason": "credential grant rejected after profile and calendar reads",
|
|
183
|
+
"run_id": state["run_id"],
|
|
184
|
+
"checkpoint_id": state["checkpoint_ref"],
|
|
185
|
+
})
|
|
186
|
+
if not isinstance(recovery, dict) or recovery.get("connection_id") != CONNECTION_ID:
|
|
187
|
+
raise RuntimeError("runtime callback supplied the wrong connection")
|
|
188
|
+
generation = int(recovery.get("lease_generation") or 0)
|
|
189
|
+
if generation < 2:
|
|
190
|
+
raise RuntimeError("runtime callback did not advance the credential generation")
|
|
191
|
+
|
|
192
|
+
status, action = revive("POST", "/v1/actions", {
|
|
193
|
+
"runId": state["run_id"],
|
|
194
|
+
"checkpointId": state["checkpoint_ref"],
|
|
195
|
+
"connectionId": CONNECTION_ID,
|
|
196
|
+
"actionKey": "sendMail",
|
|
197
|
+
"idempotencyKey": f"{state['run_id']}:sendMail",
|
|
198
|
+
"leaseGeneration": generation,
|
|
199
|
+
})
|
|
200
|
+
if status != 200:
|
|
201
|
+
raise RuntimeError(f"action registration failed: {action}")
|
|
202
|
+
action_id = str(action["id"])
|
|
203
|
+
if action.get("replayVerdict") == "already_committed":
|
|
204
|
+
return {"status": "completed", "action_id": action_id, "reconciled": True,
|
|
205
|
+
"connection_id": CONNECTION_ID, "lease_generation": generation}
|
|
206
|
+
if action.get("replayVerdict") == "reconcile_first":
|
|
207
|
+
result = wait_for_sent_reconciliation(action_id, state["subject"])
|
|
208
|
+
return {
|
|
209
|
+
"status": "completed",
|
|
210
|
+
"action_id": action_id,
|
|
211
|
+
"remote_id": result.get("remoteId"),
|
|
212
|
+
"reconciled": True,
|
|
213
|
+
"connection_id": CONNECTION_ID,
|
|
214
|
+
"lease_generation": generation,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
started_status, started = revive("POST", f"/v1/actions/{action_id}/started", {})
|
|
218
|
+
if started_status != 200:
|
|
219
|
+
raise RuntimeError(f"action could not enter started state: {started}")
|
|
220
|
+
draft_status, draft = proxy("POST", "/v1.0/me/messages", {
|
|
221
|
+
"subject": state["subject"],
|
|
222
|
+
"body": {
|
|
223
|
+
"contentType": "Text",
|
|
224
|
+
"content": "Revive live recovery demo. This temporary message proves one safe send after credential recovery.",
|
|
225
|
+
},
|
|
226
|
+
"toRecipients": [{"emailAddress": {"address": state["recipient"]}}],
|
|
227
|
+
"internetMessageHeaders": [{"name": "x-revive-run-id", "value": state["run_id"]}],
|
|
228
|
+
})
|
|
229
|
+
if draft_status != 201 or not isinstance(draft, dict) or not draft.get("id"):
|
|
230
|
+
raise RuntimeError("Graph did not create the protected draft")
|
|
231
|
+
draft_id = str(draft["id"])
|
|
232
|
+
send_status, _ = proxy("POST", f"/v1.0/me/messages/{urllib.parse.quote(draft_id, safe='')}/send")
|
|
233
|
+
# Graph answers 202; the Nango proxy re-wraps the empty accepted response
|
|
234
|
+
# as 200. Both mean the send was accepted.
|
|
235
|
+
if send_status not in (200, 202):
|
|
236
|
+
raise RuntimeError(f"Graph send returned {send_status}, expected 200/202")
|
|
237
|
+
with demo_ledger() as db:
|
|
238
|
+
db.execute(
|
|
239
|
+
"insert into killer_actions (action_key, send_calls, draft_id, state, updated_at) values (?, 1, ?, 'uncertain', ?) "
|
|
240
|
+
"on conflict(action_key) do update set send_calls = killer_actions.send_calls + 1, draft_id = excluded.draft_id, state = 'uncertain', updated_at = excluded.updated_at",
|
|
241
|
+
(f"{state['run_id']}:sendMail", draft_id, time.time()),
|
|
242
|
+
)
|
|
243
|
+
raise SimulatedTransportLoss("controlled loss after Graph accepted the send")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def build_graph(saver: SqliteSaver):
|
|
247
|
+
builder = StateGraph(DemoState)
|
|
248
|
+
builder.add_node("identity", inspect_identity)
|
|
249
|
+
builder.add_node("calendar", inspect_calendar)
|
|
250
|
+
builder.add_node("protected_send", protected_send)
|
|
251
|
+
builder.add_edge(START, "identity")
|
|
252
|
+
builder.add_edge("identity", "calendar")
|
|
253
|
+
builder.add_edge("calendar", "protected_send")
|
|
254
|
+
builder.add_edge("protected_send", END)
|
|
255
|
+
return builder.compile(checkpointer=saver)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def verify_callback(headers, body: bytes) -> bool:
|
|
259
|
+
event_id = headers.get("webhook-id", "")
|
|
260
|
+
timestamp = headers.get("webhook-timestamp", "")
|
|
261
|
+
signature = headers.get("webhook-signature", "")
|
|
262
|
+
try:
|
|
263
|
+
if abs(time.time() - int(timestamp)) > 300:
|
|
264
|
+
return False
|
|
265
|
+
except ValueError:
|
|
266
|
+
return False
|
|
267
|
+
digest = hmac.new(CALLBACK_SECRET.encode(), f"{event_id}.{timestamp}.".encode() + body, hashlib.sha256).hexdigest()
|
|
268
|
+
return hmac.compare_digest(signature, f"v1,{digest}")
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def delete_matching_messages(subject: str) -> dict:
|
|
272
|
+
deleted = 0
|
|
273
|
+
errors: list[str] = []
|
|
274
|
+
for folder in ("sentitems", "inbox", "drafts"):
|
|
275
|
+
filter_value = urllib.parse.quote(f"subject eq '{subject.replace(chr(39), chr(39) * 2)}'", safe="")
|
|
276
|
+
try:
|
|
277
|
+
_, payload = proxy("GET", f"/v1.0/me/mailFolders/{folder}/messages?$filter={filter_value}&$select=id&$top=10")
|
|
278
|
+
for message in (payload or {}).get("value", []):
|
|
279
|
+
try:
|
|
280
|
+
status, _ = proxy("DELETE", f"/v1.0/me/messages/{urllib.parse.quote(str(message['id']), safe='')}")
|
|
281
|
+
if status == 204:
|
|
282
|
+
deleted += 1
|
|
283
|
+
except Exception as error:
|
|
284
|
+
errors.append(str(error))
|
|
285
|
+
except Exception as error:
|
|
286
|
+
errors.append(str(error))
|
|
287
|
+
return {"attempted": True, "deleted": deleted, "errors": errors}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def main() -> int:
|
|
291
|
+
required = {
|
|
292
|
+
"NANGO_SECRET_KEY": NANGO_SECRET,
|
|
293
|
+
"NANGO_CERT_CONNECTION_ID": CONNECTION_ID,
|
|
294
|
+
"REVIVE_API_KEY": REVIVE_API_KEY,
|
|
295
|
+
"REVIVE_RUNTIME_RESUME_SECRET": CALLBACK_SECRET,
|
|
296
|
+
"REVIVE_DEMO_RECIPIENT": RECIPIENT,
|
|
297
|
+
}
|
|
298
|
+
missing = [name for name, value in required.items() if not value]
|
|
299
|
+
if missing:
|
|
300
|
+
print(f"Missing required environment: {', '.join(missing)}", file=sys.stderr)
|
|
301
|
+
return 2
|
|
302
|
+
|
|
303
|
+
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
|
304
|
+
RESULT_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
305
|
+
run_id = f"killer-{uuid.uuid4().hex[:12]}"
|
|
306
|
+
checkpoint_id = f"langgraph:{run_id}:protected_send"
|
|
307
|
+
subject = f"Revive recovery proof {run_id}"
|
|
308
|
+
initial_state: DemoState = {
|
|
309
|
+
"run_id": run_id,
|
|
310
|
+
"checkpoint_ref": checkpoint_id,
|
|
311
|
+
"subject": subject,
|
|
312
|
+
"recipient": RECIPIENT,
|
|
313
|
+
}
|
|
314
|
+
finished = threading.Event()
|
|
315
|
+
outcome: dict = {}
|
|
316
|
+
started_at = time.perf_counter()
|
|
317
|
+
|
|
318
|
+
with SqliteSaver.from_conn_string(str(CHECKPOINT_DB)) as saver:
|
|
319
|
+
graph = build_graph(saver)
|
|
320
|
+
config = {"configurable": {"thread_id": run_id}}
|
|
321
|
+
first = graph.invoke(initial_state, config=config)
|
|
322
|
+
if len(first.get("__interrupt__", [])) != 1:
|
|
323
|
+
raise RuntimeError("LangGraph did not park at the send boundary")
|
|
324
|
+
status, recovery_case = revive("POST", "/v1/recovery-cases", {
|
|
325
|
+
"runId": run_id,
|
|
326
|
+
"checkpointId": checkpoint_id,
|
|
327
|
+
"connectionId": CONNECTION_ID,
|
|
328
|
+
"actionKey": "sendMail",
|
|
329
|
+
"idempotencyKey": f"{run_id}:sendMail",
|
|
330
|
+
"provider": "microsoft",
|
|
331
|
+
"policy": "interactive_reauth",
|
|
332
|
+
"reason": "invalid_grant: Microsoft connection revoked during the run",
|
|
333
|
+
"leaseGeneration": 1,
|
|
334
|
+
"mutationDispatched": False,
|
|
335
|
+
})
|
|
336
|
+
if status != 200 or not recovery_case.get("url"):
|
|
337
|
+
raise RuntimeError(f"Revive did not open the recovery case: {recovery_case}")
|
|
338
|
+
|
|
339
|
+
class CallbackHandler(BaseHTTPRequestHandler):
|
|
340
|
+
def do_POST(self): # noqa: N802
|
|
341
|
+
if self.path != "/revive/resume":
|
|
342
|
+
self.send_error(404)
|
|
343
|
+
return
|
|
344
|
+
body = self.rfile.read(int(self.headers.get("content-length", "0")))
|
|
345
|
+
if not verify_callback(self.headers, body):
|
|
346
|
+
self.send_error(401)
|
|
347
|
+
return
|
|
348
|
+
try:
|
|
349
|
+
event = json.loads(body)
|
|
350
|
+
data = event["data"]
|
|
351
|
+
if data.get("runId") != run_id or data.get("checkpointId") != checkpoint_id:
|
|
352
|
+
# A stale queue job from an earlier demo run. Acknowledge
|
|
353
|
+
# nothing, fail the DELIVERY only: this demo must not end.
|
|
354
|
+
encoded = json.dumps({"ok": False, "ignored": "foreign run"}).encode()
|
|
355
|
+
self.send_response(409)
|
|
356
|
+
self.send_header("content-type", "application/json")
|
|
357
|
+
self.send_header("content-length", str(len(encoded)))
|
|
358
|
+
self.end_headers()
|
|
359
|
+
self.wfile.write(encoded)
|
|
360
|
+
return
|
|
361
|
+
resume = {"connection_id": data["connectionId"], "lease_generation": data["generation"]}
|
|
362
|
+
try:
|
|
363
|
+
graph.invoke(Command(resume=resume), config=config)
|
|
364
|
+
raise RuntimeError("transport-loss injection did not run")
|
|
365
|
+
except SimulatedTransportLoss:
|
|
366
|
+
pass
|
|
367
|
+
final = graph.invoke(None, config=config)
|
|
368
|
+
with demo_ledger() as db:
|
|
369
|
+
row = db.execute(
|
|
370
|
+
"select send_calls from killer_actions where action_key = ?",
|
|
371
|
+
(f"{run_id}:sendMail",),
|
|
372
|
+
).fetchone()
|
|
373
|
+
assertions = {
|
|
374
|
+
"sameThreadResumed": final.get("status") == "completed",
|
|
375
|
+
# advanceLease yields a strictly newer generation per
|
|
376
|
+
# recovery; repeat demo runs on one connection go 2,3,4…
|
|
377
|
+
"leaseGenerationAdvanced": final.get("lease_generation") == data["generation"] and int(data["generation"]) >= 2,
|
|
378
|
+
"sideEffectReconciled": final.get("reconciled") is True,
|
|
379
|
+
"singleSendCall": bool(row and row[0] == 1),
|
|
380
|
+
}
|
|
381
|
+
outcome.update({"passed": all(assertions.values()), "assertions": assertions, "final": final})
|
|
382
|
+
payload = {"ok": True, "resumed": True, "runId": run_id, "checkpointId": checkpoint_id}
|
|
383
|
+
encoded = json.dumps(payload).encode()
|
|
384
|
+
self.send_response(200)
|
|
385
|
+
self.send_header("content-type", "application/json")
|
|
386
|
+
self.send_header("content-length", str(len(encoded)))
|
|
387
|
+
self.end_headers()
|
|
388
|
+
self.wfile.write(encoded)
|
|
389
|
+
except Exception as error:
|
|
390
|
+
outcome.update({"passed": False, "error": str(error)})
|
|
391
|
+
self.send_error(500)
|
|
392
|
+
finally:
|
|
393
|
+
finished.set()
|
|
394
|
+
|
|
395
|
+
def log_message(self, format, *args): # noqa: A003
|
|
396
|
+
return
|
|
397
|
+
|
|
398
|
+
server = ThreadingHTTPServer((CALLBACK_HOST, CALLBACK_PORT), CallbackHandler)
|
|
399
|
+
|
|
400
|
+
def drain_queue() -> None:
|
|
401
|
+
if not WORKER_SECRET:
|
|
402
|
+
return
|
|
403
|
+
while not finished.is_set():
|
|
404
|
+
request = urllib.request.Request(
|
|
405
|
+
f"{REVIVE_URL}/api/internal/jobs/drain?limit=5",
|
|
406
|
+
method="POST",
|
|
407
|
+
headers={"Authorization": f"Bearer {WORKER_SECRET}", "x-revive-worker-id": "killer-demo"},
|
|
408
|
+
)
|
|
409
|
+
try:
|
|
410
|
+
urllib.request.urlopen(request, timeout=30).read()
|
|
411
|
+
except Exception:
|
|
412
|
+
pass
|
|
413
|
+
finished.wait(1)
|
|
414
|
+
|
|
415
|
+
threading.Thread(target=drain_queue, daemon=True).start()
|
|
416
|
+
callback_url = f"http://{CALLBACK_HOST}:{CALLBACK_PORT}/revive/resume"
|
|
417
|
+
recovery_url = urllib.parse.urljoin(f"{REVIVE_URL}/", str(recovery_case["url"]).lstrip("/"))
|
|
418
|
+
print(json.dumps({
|
|
419
|
+
"status": "parked",
|
|
420
|
+
"runId": run_id,
|
|
421
|
+
"completedTools": ["Microsoft Graph profile", "Microsoft Graph calendar"],
|
|
422
|
+
"recoveryUrl": recovery_url,
|
|
423
|
+
"callbackUrl": callback_url,
|
|
424
|
+
"next": "Open recoveryUrl and reconnect the bound Microsoft account.",
|
|
425
|
+
}, indent=2))
|
|
426
|
+
while not finished.is_set():
|
|
427
|
+
server.handle_request()
|
|
428
|
+
server.server_close()
|
|
429
|
+
|
|
430
|
+
cleanup = delete_matching_messages(subject) if outcome.get("passed") else {"attempted": False}
|
|
431
|
+
artifact = {
|
|
432
|
+
"schemaVersion": 1,
|
|
433
|
+
"kind": "live-recovery-killer-demo",
|
|
434
|
+
"generatedAt": datetime.now(timezone.utc).isoformat(),
|
|
435
|
+
"passed": bool(outcome.get("passed")),
|
|
436
|
+
"runtime": {"name": "LangGraph", "threadId": run_id, "checkpointId": checkpoint_id},
|
|
437
|
+
"credentialSystem": {"name": "Nango", "integrationId": INTEGRATION_ID,
|
|
438
|
+
"connectionHash": hashlib.sha256(CONNECTION_ID.encode()).hexdigest()[:16]},
|
|
439
|
+
"provider": {"name": "Microsoft Graph", "operation": "send existing draft"},
|
|
440
|
+
"failureInjection": "controlled response loss after Graph returned 202 Accepted",
|
|
441
|
+
"durationMs": round((time.perf_counter() - started_at) * 1000, 3),
|
|
442
|
+
"assertions": outcome.get("assertions", {}),
|
|
443
|
+
"error": outcome.get("error"),
|
|
444
|
+
"cleanup": cleanup,
|
|
445
|
+
"claimsExcluded": ["provider-wide success rate", "availability", "customer MTTR"],
|
|
446
|
+
}
|
|
447
|
+
RESULT_FILE.write_text(json.dumps(artifact, indent=2) + "\n")
|
|
448
|
+
print(json.dumps({"passed": artifact["passed"], "result": str(RESULT_FILE), "cleanup": cleanup}))
|
|
449
|
+
return 0 if artifact["passed"] else 1
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
if __name__ == "__main__":
|
|
453
|
+
raise SystemExit(main())
|