connectonion 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +1 -1
- connectonion/cli/co_ai/main.py +2 -2
- connectonion/cli/co_ai/prompts/connectonion/concepts/trust.md +166 -208
- connectonion/cli/commands/copy_commands.py +21 -0
- connectonion/cli/commands/trust_commands.py +152 -0
- connectonion/cli/main.py +82 -0
- connectonion/core/llm.py +2 -2
- connectonion/docs/concepts/fast_rules.md +237 -0
- connectonion/docs/concepts/onboarding.md +465 -0
- connectonion/docs/concepts/trust.md +933 -192
- connectonion/docs/design-decisions/023-trust-policy-system-design.md +323 -0
- connectonion/docs/network/README.md +23 -1
- connectonion/docs/network/connect.md +135 -0
- connectonion/docs/network/host.md +73 -4
- connectonion/network/__init__.py +7 -6
- connectonion/network/asgi/__init__.py +3 -0
- connectonion/network/asgi/http.py +125 -19
- connectonion/network/asgi/websocket.py +276 -15
- connectonion/network/connect.py +145 -29
- connectonion/network/host/auth.py +70 -67
- connectonion/network/host/routes.py +88 -3
- connectonion/network/host/server.py +100 -17
- connectonion/network/trust/__init__.py +27 -19
- connectonion/network/trust/factory.py +51 -24
- connectonion/network/trust/fast_rules.py +100 -0
- connectonion/network/trust/tools.py +316 -32
- connectonion/network/trust/trust_agent.py +403 -0
- connectonion/transcribe.py +1 -1
- {connectonion-0.6.4.dist-info → connectonion-0.6.5.dist-info}/METADATA +1 -1
- {connectonion-0.6.4.dist-info → connectonion-0.6.5.dist-info}/RECORD +32 -27
- connectonion/network/trust/prompts.py +0 -71
- {connectonion-0.6.4.dist-info → connectonion-0.6.5.dist-info}/WHEEL +0 -0
- {connectonion-0.6.4.dist-info → connectonion-0.6.5.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# Trust Policy System Design: Two-Tier Verification
|
|
2
|
+
|
|
3
|
+
*February 2025*
|
|
4
|
+
|
|
5
|
+
After designing the high-level trust architecture (see [020-trust-system-and-network-architecture.md](020-trust-system-and-network-architecture.md)), we needed to answer a harder question: how do we verify requests without burning tokens on every single one?
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## The Token Problem
|
|
10
|
+
|
|
11
|
+
Trust verification that uses an LLM costs tokens. Every verification burns money.
|
|
12
|
+
|
|
13
|
+
A popular agent receives 1000 requests/day. If each verification costs $0.001 on average, that's $1/day just for trust decisions. $365/year spent on "should I trust this?" instead of doing actual work.
|
|
14
|
+
|
|
15
|
+
Most verifications are trivial. "Is this client on the whitelist?" doesn't need LLM reasoning. "Does this client have a valid invite code?" is a simple lookup.
|
|
16
|
+
|
|
17
|
+
90% of trust decisions are mechanical. Only 10% need judgment.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Two-Tier Verification
|
|
22
|
+
|
|
23
|
+
We split trust into two layers:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
┌─────────────────────────────────────────────────────┐
|
|
27
|
+
│ FAST RULES (no tokens, instant) │
|
|
28
|
+
│ │
|
|
29
|
+
│ - Whitelist check │
|
|
30
|
+
│ - Blocklist check │
|
|
31
|
+
│ - Invite code verification │
|
|
32
|
+
│ - Payment verification │
|
|
33
|
+
│ - Client level checks (stranger, contact, etc.) │
|
|
34
|
+
│ │
|
|
35
|
+
│ 90% of requests resolved here │
|
|
36
|
+
└─────────────────────────────────────────────────────┘
|
|
37
|
+
│
|
|
38
|
+
│ Only when fast rules can't decide
|
|
39
|
+
▼
|
|
40
|
+
┌─────────────────────────────────────────────────────┐
|
|
41
|
+
│ TRUST AGENT (LLM, burns tokens, rare) │
|
|
42
|
+
│ │
|
|
43
|
+
│ - Behavior analysis │
|
|
44
|
+
│ - Complex promotion decisions │
|
|
45
|
+
│ - Edge cases │
|
|
46
|
+
│ │
|
|
47
|
+
│ 10% of requests (at most) │
|
|
48
|
+
└─────────────────────────────────────────────────────┘
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Fast rules handle common cases. LLM handles complex ones. Most requests never reach the LLM.
|
|
52
|
+
|
|
53
|
+
### Alternatives We Rejected
|
|
54
|
+
|
|
55
|
+
**All LLM** - every request goes through trust agent:
|
|
56
|
+
- Expensive ($365+/year for active agents)
|
|
57
|
+
- Slow (LLM latency on every request)
|
|
58
|
+
- Overkill for trivial decisions
|
|
59
|
+
|
|
60
|
+
**All Code** - hardcoded rules only:
|
|
61
|
+
- Inflexible (can't handle edge cases)
|
|
62
|
+
- Brittle (every scenario needs explicit code)
|
|
63
|
+
- No judgment for nuanced decisions
|
|
64
|
+
|
|
65
|
+
Two tiers gives us cost efficiency (90% free), speed (instant for common cases), flexibility (LLM for edge cases), and progressive complexity.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Client States
|
|
70
|
+
|
|
71
|
+
We needed a clear model for how trust evolves:
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
Promotion Chain (earned trust):
|
|
75
|
+
|
|
76
|
+
┌─────────────┐ verify ┌─────────────┐ earn trust ┌─────────────┐
|
|
77
|
+
│ Stranger │ ──────────► │ Contact │ ───────────► │ Whitelist │
|
|
78
|
+
└─────────────┘ └─────────────┘ └─────────────┘
|
|
79
|
+
│ │ │
|
|
80
|
+
│ block │ demote │ demote
|
|
81
|
+
▼ ▼ ▼
|
|
82
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
83
|
+
│ Blocklist │ │ Stranger │ │ Contact │
|
|
84
|
+
└─────────────┘ └─────────────┘ └─────────────┘
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
Admin (separate, manual only):
|
|
88
|
+
|
|
89
|
+
┌─────────────┐ set_admin() ┌─────────────┐
|
|
90
|
+
│ Any Level │ ─────────────► │ Admin │
|
|
91
|
+
└─────────────┘ └─────────────┘
|
|
92
|
+
│
|
|
93
|
+
remove_admin()
|
|
94
|
+
│
|
|
95
|
+
▼
|
|
96
|
+
┌─────────────┐
|
|
97
|
+
│ Previous │
|
|
98
|
+
└─────────────┘
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Why Admin Is Separate
|
|
102
|
+
|
|
103
|
+
Early design had `promote_to_admin()` as part of the chain. Wrong for several reasons:
|
|
104
|
+
|
|
105
|
+
1. **Different nature**: Admin is about authority, not trust level. A contact can be admin. A whitelist member might not be.
|
|
106
|
+
|
|
107
|
+
2. **Security**: Auto-promotion to admin is dangerous. Admin should be a deliberate, manual decision.
|
|
108
|
+
|
|
109
|
+
3. **Audit trail**: Admin changes need explicit `by_admin` tracking. Who granted it? Who revoked it?
|
|
110
|
+
|
|
111
|
+
4. **Simplicity**: Keeping admin separate makes the promotion chain cleaner.
|
|
112
|
+
|
|
113
|
+
### The Atomic Functions
|
|
114
|
+
|
|
115
|
+
Instead of generic `promote()` and `demote()`, we use explicit atomic functions:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# Promotion (earned)
|
|
119
|
+
promote_to_contact(client_id) # Stranger → Contact
|
|
120
|
+
promote_to_whitelist(client_id) # Contact → Whitelist
|
|
121
|
+
|
|
122
|
+
# Demotion
|
|
123
|
+
demote_to_contact(client_id) # Whitelist → Contact
|
|
124
|
+
demote_to_stranger(client_id) # Contact → Stranger
|
|
125
|
+
|
|
126
|
+
# Blocking
|
|
127
|
+
block(client_id, reason)
|
|
128
|
+
unblock(client_id)
|
|
129
|
+
|
|
130
|
+
# Admin (manual only)
|
|
131
|
+
set_admin(client_id, by_admin)
|
|
132
|
+
remove_admin(client_id, by_admin)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
`promote_to_contact()` is clearer than `promote(client_id, level="contact")`. No ambiguity. No wrong level. Self-documenting.
|
|
136
|
+
|
|
137
|
+
These functions serve as tools for both host (direct calls for fast rule actions) and trust agent (LLM calls for complex decisions).
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## YAML + Markdown Format
|
|
142
|
+
|
|
143
|
+
Trust policies need two things:
|
|
144
|
+
1. Configuration for fast rules (structured, machine-readable)
|
|
145
|
+
2. Instructions for trust agent (natural language, LLM-readable)
|
|
146
|
+
|
|
147
|
+
We chose YAML frontmatter for config, Markdown body for instructions.
|
|
148
|
+
|
|
149
|
+
```yaml
|
|
150
|
+
# prompts/trust/careful.md
|
|
151
|
+
---
|
|
152
|
+
fast_rules:
|
|
153
|
+
- if: has_invite_code
|
|
154
|
+
action: verify_invite
|
|
155
|
+
on_success: promote_to_contact
|
|
156
|
+
|
|
157
|
+
- if: is_blocked
|
|
158
|
+
action: deny
|
|
159
|
+
|
|
160
|
+
- if: is_whitelist
|
|
161
|
+
action: allow
|
|
162
|
+
|
|
163
|
+
- if: is_stranger
|
|
164
|
+
action: deny
|
|
165
|
+
|
|
166
|
+
use_agent:
|
|
167
|
+
- when: requests > 10
|
|
168
|
+
reason: "Evaluate for promotion"
|
|
169
|
+
|
|
170
|
+
cache: 24h
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
# Trust Agent Policy
|
|
174
|
+
|
|
175
|
+
You handle complex trust decisions.
|
|
176
|
+
|
|
177
|
+
## Available Tools
|
|
178
|
+
- promote_to_contact(client_id)
|
|
179
|
+
- block(client_id, reason)
|
|
180
|
+
|
|
181
|
+
## When to Promote
|
|
182
|
+
Promote stranger to contact when:
|
|
183
|
+
- 10+ requests with good behavior
|
|
184
|
+
- No suspicious patterns
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Why YAML Frontmatter
|
|
188
|
+
|
|
189
|
+
**Separate config file** (`careful.yaml` + `careful.md`): Two files to maintain, easy to get out of sync.
|
|
190
|
+
|
|
191
|
+
**JSON in markdown**: Awkward to edit, syntax highlighting breaks.
|
|
192
|
+
|
|
193
|
+
**Pure YAML file** with policy as multi-line string: Ugly, no markdown preview, harder to write prose.
|
|
194
|
+
|
|
195
|
+
YAML frontmatter won: single file, standard format (Jekyll, Hugo, Obsidian all use it), editors understand it, clear separation between config and prose.
|
|
196
|
+
|
|
197
|
+
### Fast Rules Structure
|
|
198
|
+
|
|
199
|
+
We iterated on the syntax. First version had separate `verify:` and `fast_rules:` sections—confusing overlap, unclear precedence.
|
|
200
|
+
|
|
201
|
+
Final version: everything under `fast_rules:`. Single place for all rules, order determines precedence.
|
|
202
|
+
|
|
203
|
+
```yaml
|
|
204
|
+
fast_rules:
|
|
205
|
+
- if: has_invite_code
|
|
206
|
+
action: verify_invite
|
|
207
|
+
on_success: promote_to_contact
|
|
208
|
+
- if: is_stranger
|
|
209
|
+
action: deny
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## TrustAgent Design
|
|
215
|
+
|
|
216
|
+
### Why Inherit from Agent
|
|
217
|
+
|
|
218
|
+
Should TrustAgent be a separate class or inherit from Agent?
|
|
219
|
+
|
|
220
|
+
**Separate interface** (`TrustVerifier` Protocol): New abstraction to learn, can't reuse Agent features (tools, LLM, events), inconsistent with framework.
|
|
221
|
+
|
|
222
|
+
**Inherit from Agent**: Consistent with framework, all Agent features available, same API users already know.
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
class TrustAgent(Agent):
|
|
226
|
+
# Inherits: system_prompt, tools, input(), etc.
|
|
227
|
+
# Adds: should_allow(), verify_invite(), etc.
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
We went with inheritance.
|
|
231
|
+
|
|
232
|
+
### No Formal Interface
|
|
233
|
+
|
|
234
|
+
No formal Protocol/ABC. TrustAgent is just an Agent with specific methods.
|
|
235
|
+
|
|
236
|
+
Python isn't Java—duck typing is idiomatic. If it has `should_allow()`, it's a trust agent. There's only one implementation, so no need to abstract. "TrustAgent inherits from Agent" is clearer than "TrustAgent implements ITrustVerifier which extends Protocol..."
|
|
237
|
+
|
|
238
|
+
YAGNI. We can add an interface later if we need it.
|
|
239
|
+
|
|
240
|
+
### Config vs system_prompt
|
|
241
|
+
|
|
242
|
+
Early design: `TrustAgent(config="prompts/trust/careful.md")`
|
|
243
|
+
|
|
244
|
+
We changed to: `TrustAgent(system_prompt="prompts/trust/careful.md")`
|
|
245
|
+
|
|
246
|
+
Every Agent has `system_prompt`. TrustAgent is an Agent. Therefore TrustAgent uses `system_prompt`. The YAML frontmatter is parsed internally and stored as `self.trust_config`, but externally it looks like a normal Agent.
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## Fixed Routes in Host
|
|
251
|
+
|
|
252
|
+
### The Dynamic Routes Problem
|
|
253
|
+
|
|
254
|
+
Early design: trust agent dynamically registers routes based on policy. Policy enables `verify_invite` → route appears. Policy disables it → route disappears.
|
|
255
|
+
|
|
256
|
+
Problems: unpredictable (which routes exist depends on policy), documentation nightmare, client confusion (consumers don't know what endpoints to call).
|
|
257
|
+
|
|
258
|
+
### Fixed Routes Solution
|
|
259
|
+
|
|
260
|
+
Host provides fixed routes. Policy enables/disables behavior, not existence.
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
# Always exist:
|
|
264
|
+
/trust/verify/invite # If disabled → returns clear error
|
|
265
|
+
/trust/verify/payment
|
|
266
|
+
/trust/promote
|
|
267
|
+
/trust/demote
|
|
268
|
+
/trust/block
|
|
269
|
+
/trust/admin/set
|
|
270
|
+
/trust/admin/remove
|
|
271
|
+
/input
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Same routes always exist. One set to document. Clients know what's available. No route registration logic.
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Security Considerations
|
|
279
|
+
|
|
280
|
+
### Promote Injection
|
|
281
|
+
|
|
282
|
+
TrustAgent has `promote_to_contact()` as a tool. During skill testing, a malicious agent could craft prompts to trick the trust agent into calling it—promoting itself, bypassing verification.
|
|
283
|
+
|
|
284
|
+
Possible solutions (needs design decision):
|
|
285
|
+
1. Skill-test mode with no list management tools
|
|
286
|
+
2. Human confirmation for list changes
|
|
287
|
+
3. Read-only trust agent instance for skill tests
|
|
288
|
+
4. All promotions logged, easy to review/revert
|
|
289
|
+
|
|
290
|
+
Open TODO. The design acknowledges the risk.
|
|
291
|
+
|
|
292
|
+
### Why We Accept This Risk (For Now)
|
|
293
|
+
|
|
294
|
+
Trust agent having tools is the right design. The injection risk is edge-case. Unexpected promotions are auditable and revertable. By documenting it, we ensure it's not forgotten.
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Principles
|
|
299
|
+
|
|
300
|
+
**Simple things simple**: `host(agent, trust="careful")` just works. Full control when needed: `host(agent, trust=custom_trust_agent)`.
|
|
301
|
+
|
|
302
|
+
**Progressive disclosure**: Level 0 (`host(agent)`) → Level 1 (`trust="strict"`) → Level 2 (`trust="./my_policy.md"`) → Level 3 (`trust=TrustAgent(...)`). Users at level 0 don't see levels 1-3.
|
|
303
|
+
|
|
304
|
+
**Behavior over identity**: Trust levels (stranger, contact, whitelist) are earned through behavior, not granted through identity. Admin is the exception—identity matters (which existing admin granted it).
|
|
305
|
+
|
|
306
|
+
**Explicit over implicit**: `promote_to_contact()` not `promote(level="contact")`. `set_admin(by_admin="...")` not `set_admin()`. `verify_invite` in YAML, not automatic route detection.
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## Summary
|
|
311
|
+
|
|
312
|
+
| Decision | Choice | Why |
|
|
313
|
+
|----------|--------|-----|
|
|
314
|
+
| Verification approach | Two-tier (fast rules + LLM) | 90% free, 10% smart |
|
|
315
|
+
| Policy format | YAML frontmatter + Markdown | One file, clear separation |
|
|
316
|
+
| Client states | Stranger → Contact → Whitelist (Admin separate) | Clear progression, admin is special |
|
|
317
|
+
| Functions | Atomic (promote_to_contact, not promote) | Self-documenting, no ambiguity |
|
|
318
|
+
| TrustAgent | Inherits from Agent | Consistent API, reuse features |
|
|
319
|
+
| Interface | No formal Protocol | YAGNI, duck typing is fine |
|
|
320
|
+
| Routes | Fixed in host | Predictable, documentable |
|
|
321
|
+
| Config parameter | system_prompt (not config) | Consistent with Agent |
|
|
322
|
+
|
|
323
|
+
The system balances cost (fast rules are free), flexibility (LLM for edge cases), simplicity (presets for common cases), and power (custom agents for advanced users).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Multi-Agent Networking
|
|
2
2
|
|
|
3
|
-
Connect and collaborate between agents.
|
|
3
|
+
Connect and collaborate between agents with automatic reliability and recovery.
|
|
4
4
|
|
|
5
5
|
## Core Concepts
|
|
6
6
|
|
|
@@ -8,6 +8,28 @@ Connect and collaborate between agents.
|
|
|
8
8
|
- [connect.md](connect.md) - Connect to remote agents with `connect()`
|
|
9
9
|
- [connection.md](connection.md) - Stream events and communicate with clients
|
|
10
10
|
|
|
11
|
+
## Key Features
|
|
12
|
+
|
|
13
|
+
### 🔄 Automatic Connection Recovery
|
|
14
|
+
- WebSocket failures automatically fall back to HTTP polling
|
|
15
|
+
- Page refresh doesn't lose your work
|
|
16
|
+
- Results recoverable for 24 hours
|
|
17
|
+
|
|
18
|
+
### 💓 Built-in Keep-Alive
|
|
19
|
+
- Server sends PING every 30 seconds
|
|
20
|
+
- Client automatically responds with PONG
|
|
21
|
+
- Dead connections detected within 60 seconds
|
|
22
|
+
|
|
23
|
+
### ⏱️ Extended Timeout
|
|
24
|
+
- Default 10-minute timeout for long-running tasks
|
|
25
|
+
- Configurable per request
|
|
26
|
+
- Works even if agent takes hours (via polling)
|
|
27
|
+
|
|
28
|
+
### 📦 Session Persistence
|
|
29
|
+
- Results stored server-side for 24 hours
|
|
30
|
+
- Session ID automatically managed
|
|
31
|
+
- localStorage (browser) survives page refreshes
|
|
32
|
+
|
|
11
33
|
## Protocol Specifications
|
|
12
34
|
|
|
13
35
|
- [protocol/agent-relay-protocol.md](protocol/agent-relay-protocol.md) - Agent relay protocol spec
|
|
@@ -193,6 +193,141 @@ curl https://oo.openonion.ai/api/relay/agents/0x3d4017c3...
|
|
|
193
193
|
|
|
194
194
|
---
|
|
195
195
|
|
|
196
|
+
## Connection Reliability & Recovery
|
|
197
|
+
|
|
198
|
+
The ConnectOnion client (TypeScript/Python) automatically handles connection failures and recovers results seamlessly.
|
|
199
|
+
|
|
200
|
+
### Automatic Keep-Alive
|
|
201
|
+
|
|
202
|
+
**Server sends PING every 30 seconds:**
|
|
203
|
+
- Client automatically responds with PONG
|
|
204
|
+
- Keeps connection alive through proxies and firewalls
|
|
205
|
+
- Detects dead connections within 60 seconds
|
|
206
|
+
|
|
207
|
+
No configuration needed - handled automatically by the SDK.
|
|
208
|
+
|
|
209
|
+
### Extended Timeout
|
|
210
|
+
|
|
211
|
+
**Default timeout: 10 minutes** (600 seconds)
|
|
212
|
+
|
|
213
|
+
Long-running agent tasks have plenty of time to complete:
|
|
214
|
+
|
|
215
|
+
```typescript
|
|
216
|
+
// TypeScript - default 10 minutes
|
|
217
|
+
const response = await agent.input("Analyze this large dataset");
|
|
218
|
+
|
|
219
|
+
// Override if needed (5 minutes)
|
|
220
|
+
const response = await agent.input("Quick task", 300000);
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Automatic Session Recovery
|
|
224
|
+
|
|
225
|
+
If the WebSocket connection fails (network drop, timeout, page refresh), the SDK **automatically polls** the server to retrieve your result:
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
1. Connection fails or times out
|
|
229
|
+
↓
|
|
230
|
+
2. SDK polls GET /sessions/{session_id} every 10s
|
|
231
|
+
↓
|
|
232
|
+
3. Server returns result when ready
|
|
233
|
+
↓
|
|
234
|
+
4. SDK returns result to your code
|
|
235
|
+
↓
|
|
236
|
+
5. You get the result as if nothing happened! ✅
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
**What this means for you:**
|
|
240
|
+
- ✅ Page refresh during long tasks? No problem.
|
|
241
|
+
- ✅ Network hiccup? Result still delivered.
|
|
242
|
+
- ✅ Connection timeout? Automatically recovered.
|
|
243
|
+
- ✅ Agent takes 15 minutes? You still get the result.
|
|
244
|
+
|
|
245
|
+
**Configuration (TypeScript):**
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
const agent = connect("0x123...", {
|
|
249
|
+
enablePolling: true, // Default: true
|
|
250
|
+
pollIntervalMs: 10000, // Poll every 10s (default)
|
|
251
|
+
maxPollAttempts: 30 // Try for 5 minutes (default)
|
|
252
|
+
});
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
**Session persistence:**
|
|
256
|
+
- Results stored server-side for **24 hours**
|
|
257
|
+
- Session ID automatically generated and tracked
|
|
258
|
+
- localStorage used (browser) to survive page refreshes
|
|
259
|
+
|
|
260
|
+
### Connection Lifecycle
|
|
261
|
+
|
|
262
|
+
```
|
|
263
|
+
┌─────────────────────────────────────────────────────┐
|
|
264
|
+
│ Normal Operation (WebSocket) │
|
|
265
|
+
├─────────────────────────────────────────────────────┤
|
|
266
|
+
│ 1. Connect via WebSocket │
|
|
267
|
+
│ 2. Send INPUT with session_id │
|
|
268
|
+
│ 3. Receive PING every 30s, respond with PONG │
|
|
269
|
+
│ 4. Receive streaming events │
|
|
270
|
+
│ 5. Receive OUTPUT (result) │
|
|
271
|
+
│ 6. Close connection │
|
|
272
|
+
└─────────────────────────────────────────────────────┘
|
|
273
|
+
|
|
274
|
+
┌─────────────────────────────────────────────────────┐
|
|
275
|
+
│ Recovery Mode (HTTP Polling) │
|
|
276
|
+
├─────────────────────────────────────────────────────┤
|
|
277
|
+
│ 1. WebSocket fails/timeout │
|
|
278
|
+
│ 2. SDK polls: GET /sessions/{session_id} │
|
|
279
|
+
│ 3. Server responds: {"status": "running"} │
|
|
280
|
+
│ 4. Wait 10s, poll again │
|
|
281
|
+
│ 5. Server responds: {"status": "done", "result"} │
|
|
282
|
+
│ 6. SDK returns result to your code │
|
|
283
|
+
└─────────────────────────────────────────────────────┘
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Error Scenarios Handled
|
|
287
|
+
|
|
288
|
+
| Scenario | What Happens |
|
|
289
|
+
|----------|-------------|
|
|
290
|
+
| **Network disconnect** | Automatic polling recovers result |
|
|
291
|
+
| **Page refresh** | Session ID in localStorage, poll for result |
|
|
292
|
+
| **10-minute timeout** | Polling activates, waits for completion |
|
|
293
|
+
| **Server restart** | Polling continues, result available when server back |
|
|
294
|
+
| **Connection drops mid-stream** | Polling recovers final result |
|
|
295
|
+
|
|
296
|
+
### Best Practices
|
|
297
|
+
|
|
298
|
+
**1. For long-running tasks:**
|
|
299
|
+
```typescript
|
|
300
|
+
// Just call input() - recovery is automatic
|
|
301
|
+
const response = await agent.input("Process 1GB of data");
|
|
302
|
+
// Works even if it takes 20 minutes
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
**2. For user feedback:**
|
|
306
|
+
```typescript
|
|
307
|
+
agent.on('reconnecting', () => {
|
|
308
|
+
showMessage('Connection lost, recovering...');
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
agent.on('polling', () => {
|
|
312
|
+
showMessage('Checking for results...');
|
|
313
|
+
});
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**3. For critical operations:**
|
|
317
|
+
```typescript
|
|
318
|
+
try {
|
|
319
|
+
const response = await agent.input("Critical task");
|
|
320
|
+
console.log("Success:", response.text);
|
|
321
|
+
} catch (error) {
|
|
322
|
+
// Only fails if:
|
|
323
|
+
// - Server down for 24+ hours
|
|
324
|
+
// - Session expired (24h TTL)
|
|
325
|
+
console.error("Failed:", error);
|
|
326
|
+
}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
196
331
|
## Message Protocol
|
|
197
332
|
|
|
198
333
|
### INPUT (Client → Relay → Agent)
|
|
@@ -359,7 +359,7 @@ curl http://localhost:8000/admin/sessions \
|
|
|
359
359
|
|
|
360
360
|
## WebSocket API
|
|
361
361
|
|
|
362
|
-
WebSocket provides real-time communication
|
|
362
|
+
WebSocket provides real-time communication with automatic keep-alive and session recovery.
|
|
363
363
|
|
|
364
364
|
### Connect
|
|
365
365
|
|
|
@@ -372,7 +372,10 @@ const ws = new WebSocket("ws://localhost:8000/ws");
|
|
|
372
372
|
```javascript
|
|
373
373
|
ws.send(JSON.stringify({
|
|
374
374
|
type: "INPUT",
|
|
375
|
-
prompt: "Translate hello to Spanish"
|
|
375
|
+
prompt: "Translate hello to Spanish",
|
|
376
|
+
session: {
|
|
377
|
+
session_id: "550e8400-e29b-41d4-a716-446655440000" // Optional: for session continuity
|
|
378
|
+
}
|
|
376
379
|
}));
|
|
377
380
|
```
|
|
378
381
|
|
|
@@ -382,8 +385,12 @@ ws.send(JSON.stringify({
|
|
|
382
385
|
ws.onmessage = (event) => {
|
|
383
386
|
const msg = JSON.parse(event.data);
|
|
384
387
|
|
|
385
|
-
if (msg.type === "
|
|
388
|
+
if (msg.type === "PING") {
|
|
389
|
+
// Server keep-alive check (every 30s)
|
|
390
|
+
ws.send(JSON.stringify({ type: "PONG" }));
|
|
391
|
+
} else if (msg.type === "OUTPUT") {
|
|
386
392
|
console.log("Result:", msg.result);
|
|
393
|
+
console.log("Session ID:", msg.session_id);
|
|
387
394
|
} else if (msg.type === "ERROR") {
|
|
388
395
|
console.error("Error:", msg.message);
|
|
389
396
|
} else {
|
|
@@ -397,8 +404,10 @@ ws.onmessage = (event) => {
|
|
|
397
404
|
|
|
398
405
|
| Type | Direction | Purpose |
|
|
399
406
|
|------|-----------|---------|
|
|
400
|
-
| INPUT | Client → Agent | Send prompt |
|
|
407
|
+
| INPUT | Client → Agent | Send prompt with optional session |
|
|
401
408
|
| OUTPUT | Agent → Client | Final result + session data |
|
|
409
|
+
| PING | Agent → Client | Connection keep-alive (every 30s) |
|
|
410
|
+
| PONG | Client → Agent | Acknowledge keep-alive |
|
|
402
411
|
| tool_call | Agent → Client | Tool started |
|
|
403
412
|
| tool_result | Agent → Client | Tool completed |
|
|
404
413
|
| thinking | Agent → Client | Agent is processing |
|
|
@@ -406,6 +415,66 @@ ws.onmessage = (event) => {
|
|
|
406
415
|
| approval_needed | Agent → Client | Tool approval required |
|
|
407
416
|
| ERROR | Agent → Client | Error message |
|
|
408
417
|
|
|
418
|
+
### Connection Keep-Alive
|
|
419
|
+
|
|
420
|
+
The server automatically sends **PING** messages every 30 seconds to keep the connection alive and detect dead connections:
|
|
421
|
+
|
|
422
|
+
```javascript
|
|
423
|
+
ws.onmessage = (event) => {
|
|
424
|
+
const msg = JSON.parse(event.data);
|
|
425
|
+
|
|
426
|
+
if (msg.type === "PING") {
|
|
427
|
+
// Respond immediately to keep connection alive
|
|
428
|
+
ws.send(JSON.stringify({ type: "PONG" }));
|
|
429
|
+
}
|
|
430
|
+
};
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
**Why it matters:**
|
|
434
|
+
- Keeps connection alive through proxies and firewalls
|
|
435
|
+
- Detects dead connections within 60 seconds
|
|
436
|
+
- Prevents silent connection failures
|
|
437
|
+
|
|
438
|
+
### Session Recovery
|
|
439
|
+
|
|
440
|
+
If your WebSocket disconnects (network failure, timeout, page refresh), you can recover the result using the session_id:
|
|
441
|
+
|
|
442
|
+
**1. Generate and save session_id before connecting:**
|
|
443
|
+
```javascript
|
|
444
|
+
const sessionId = crypto.randomUUID();
|
|
445
|
+
localStorage.setItem('active_session', sessionId);
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
**2. Include session_id in INPUT:**
|
|
449
|
+
```javascript
|
|
450
|
+
ws.send(JSON.stringify({
|
|
451
|
+
type: "INPUT",
|
|
452
|
+
prompt: "Long running task...",
|
|
453
|
+
session: { session_id: sessionId }
|
|
454
|
+
}));
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
**3. If connection drops, poll for result:**
|
|
458
|
+
```javascript
|
|
459
|
+
ws.onerror = async () => {
|
|
460
|
+
// Connection failed, try to recover result
|
|
461
|
+
const response = await fetch(`http://localhost:8000/sessions/${sessionId}`);
|
|
462
|
+
const data = await response.json();
|
|
463
|
+
|
|
464
|
+
if (data.status === "done") {
|
|
465
|
+
console.log("Recovered result:", data.result);
|
|
466
|
+
} else if (data.status === "running") {
|
|
467
|
+
// Still processing, poll again after delay
|
|
468
|
+
setTimeout(() => pollForResult(sessionId), 10000);
|
|
469
|
+
}
|
|
470
|
+
};
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
**Session lifecycle:**
|
|
474
|
+
- Results stored for **24 hours** (configurable via `result_ttl`)
|
|
475
|
+
- Status can be `"running"` or `"done"`
|
|
476
|
+
- Supports recovery after network failures, timeouts, or page refreshes
|
|
477
|
+
|
|
409
478
|
---
|
|
410
479
|
|
|
411
480
|
## Design: Stateless Sessions
|
connectonion/network/__init__.py
CHANGED
|
@@ -4,7 +4,7 @@ LLM-Note:
|
|
|
4
4
|
Dependencies: imports from [host/, io/, connect.py, relay.py, announce.py, trust/] | imported by [__init__.py main package, user code] | tested via submodule tests
|
|
5
5
|
Data flow: pure re-export module aggregating networking functionality
|
|
6
6
|
State/Effects: no state
|
|
7
|
-
Integration: exposes host(agent, port, trust), create_app(), IO/WebSocketIO, SessionStorage/Session, connect(url), RemoteAgent, Response, relay server (relay_connect, serve_loop), announce (create_announce_message), trust (
|
|
7
|
+
Integration: exposes host(agent, port, trust), create_app(), IO/WebSocketIO, SessionStorage/Session, connect(url), RemoteAgent, Response, relay server (relay_connect, serve_loop), announce (create_announce_message), trust (TrustAgent) | unified networking API surface
|
|
8
8
|
Performance: trivial
|
|
9
9
|
Errors: none
|
|
10
10
|
Network layer for hosting and connecting agents.
|
|
@@ -16,7 +16,7 @@ This module contains:
|
|
|
16
16
|
- relay: Agent relay server for P2P discovery
|
|
17
17
|
- connect: Multi-agent networking (RemoteAgent)
|
|
18
18
|
- announce: Service announcement protocol
|
|
19
|
-
- trust: Trust verification system
|
|
19
|
+
- trust: Trust verification system (TrustAgent is the single interface)
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
from .host import host, create_app, SessionStorage, Session
|
|
@@ -24,7 +24,7 @@ from .io import IO, WebSocketIO
|
|
|
24
24
|
from .connect import connect, RemoteAgent, Response
|
|
25
25
|
from .relay import connect as relay_connect, serve_loop
|
|
26
26
|
from .announce import create_announce_message
|
|
27
|
-
from .trust import
|
|
27
|
+
from .trust import TrustAgent, Decision, get_default_trust_level, TRUST_LEVELS, parse_policy
|
|
28
28
|
from . import relay, announce
|
|
29
29
|
|
|
30
30
|
__all__ = [
|
|
@@ -40,11 +40,12 @@ __all__ = [
|
|
|
40
40
|
"relay_connect",
|
|
41
41
|
"serve_loop",
|
|
42
42
|
"create_announce_message",
|
|
43
|
-
|
|
43
|
+
# Trust (TrustAgent is the single interface)
|
|
44
|
+
"TrustAgent",
|
|
45
|
+
"Decision",
|
|
44
46
|
"get_default_trust_level",
|
|
45
47
|
"TRUST_LEVELS",
|
|
46
|
-
"
|
|
47
|
-
"TRUST_PROMPTS",
|
|
48
|
+
"parse_policy",
|
|
48
49
|
"relay",
|
|
49
50
|
"announce",
|
|
50
51
|
]
|
|
@@ -23,6 +23,7 @@ def create_app(
|
|
|
23
23
|
route_handlers: dict,
|
|
24
24
|
storage,
|
|
25
25
|
trust: str = "careful",
|
|
26
|
+
trust_config: dict | None = None,
|
|
26
27
|
blacklist: list | None = None,
|
|
27
28
|
whitelist: list | None = None,
|
|
28
29
|
):
|
|
@@ -32,6 +33,7 @@ def create_app(
|
|
|
32
33
|
route_handlers: Dict of route handler functions
|
|
33
34
|
storage: SessionStorage instance
|
|
34
35
|
trust: Trust level (open/careful/strict)
|
|
36
|
+
trust_config: Parsed YAML config from trust policy (for /info onboard)
|
|
35
37
|
blacklist: Blocked identities
|
|
36
38
|
whitelist: Allowed identities
|
|
37
39
|
|
|
@@ -49,6 +51,7 @@ def create_app(
|
|
|
49
51
|
route_handlers=route_handlers,
|
|
50
52
|
storage=storage,
|
|
51
53
|
trust=trust,
|
|
54
|
+
trust_config=trust_config,
|
|
52
55
|
start_time=start_time,
|
|
53
56
|
blacklist=blacklist,
|
|
54
57
|
whitelist=whitelist,
|