@framers/agentos 0.5.8 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +122 -362
  2. package/dist/api/agency.d.ts.map +1 -1
  3. package/dist/api/agency.js +0 -2
  4. package/dist/api/agency.js.map +1 -1
  5. package/dist/api/runtime/AgentOSOrchestrator.d.ts.map +1 -1
  6. package/dist/api/runtime/AgentOSOrchestrator.js +6 -3
  7. package/dist/api/runtime/AgentOSOrchestrator.js.map +1 -1
  8. package/dist/api/runtime/strategies/debate.d.ts.map +1 -1
  9. package/dist/api/runtime/strategies/debate.js +64 -21
  10. package/dist/api/runtime/strategies/debate.js.map +1 -1
  11. package/dist/api/runtime/strategies/graph.d.ts.map +1 -1
  12. package/dist/api/runtime/strategies/graph.js +11 -25
  13. package/dist/api/runtime/strategies/graph.js.map +1 -1
  14. package/dist/api/runtime/strategies/hierarchical.d.ts.map +1 -1
  15. package/dist/api/runtime/strategies/hierarchical.js +27 -7
  16. package/dist/api/runtime/strategies/hierarchical.js.map +1 -1
  17. package/dist/api/runtime/strategies/parallel.d.ts.map +1 -1
  18. package/dist/api/runtime/strategies/parallel.js +4 -8
  19. package/dist/api/runtime/strategies/parallel.js.map +1 -1
  20. package/dist/api/runtime/strategies/review-loop.d.ts +25 -0
  21. package/dist/api/runtime/strategies/review-loop.d.ts.map +1 -1
  22. package/dist/api/runtime/strategies/review-loop.js +5 -13
  23. package/dist/api/runtime/strategies/review-loop.js.map +1 -1
  24. package/dist/api/runtime/strategies/sequential.d.ts.map +1 -1
  25. package/dist/api/runtime/strategies/sequential.js +11 -25
  26. package/dist/api/runtime/strategies/sequential.js.map +1 -1
  27. package/dist/api/runtime/strategies/shared.d.ts +45 -8
  28. package/dist/api/runtime/strategies/shared.d.ts.map +1 -1
  29. package/dist/api/runtime/strategies/shared.js +47 -8
  30. package/dist/api/runtime/strategies/shared.js.map +1 -1
  31. package/dist/api/types.d.ts +16 -0
  32. package/dist/api/types.d.ts.map +1 -1
  33. package/dist/api/types.js.map +1 -1
  34. package/dist/emergent/SandboxedToolForge.d.ts +8 -10
  35. package/dist/emergent/SandboxedToolForge.d.ts.map +1 -1
  36. package/dist/emergent/SandboxedToolForge.js +0 -0
  37. package/dist/emergent/SandboxedToolForge.js.map +1 -1
  38. package/dist/emergent/types.d.ts +9 -5
  39. package/dist/emergent/types.d.ts.map +1 -1
  40. package/dist/emergent/types.js.map +1 -1
  41. package/dist/sandbox/executor/CodeSandbox.js +1 -1
  42. package/dist/sandbox/executor/CodeSandbox.js.map +1 -1
  43. package/package.json +1 -1
package/README.md CHANGED
@@ -1,12 +1,14 @@
1
1
  <div align="center">
2
2
 
3
3
  <a href="https://agentos.sh">
4
- <img src="https://raw.githubusercontent.com/framersai/agentos/master/assets/agentos-primary-no-tagline-transparent-2x.png" alt="AgentOS — TypeScript AI Agent Framework" height="100" />
4
+ <img src="https://raw.githubusercontent.com/framersai/agentos/master/assets/agentos-primary-no-tagline-transparent-2x.png" alt="AgentOS — TypeScript AI Agent Framework with Cognitive Memory" height="100" />
5
5
  </a>
6
6
 
7
7
  <br />
8
8
 
9
- **Open-source TypeScript runtime for autonomous AI agents with cognitive memory, HEXACO personality, and emergent tool forging.**
9
+ # **AgentOS** — Open-Source TypeScript AI Agent Runtime with Cognitive Memory, HEXACO Personality, and Runtime Tool Forging
10
+
11
+ **85.6% on LongMemEval-S** at $0.0090/correct · **70.2% on LongMemEval-M** (first open-source library above 65% on the 1.5M-token variant) · 16 LLM providers · 8 neuroscience-backed memory mechanisms · MIT-friendly Apache 2.0
10
12
 
11
13
  [![npm](https://img.shields.io/npm/v/@framers/agentos?style=flat-square&logo=npm&color=cb3837)](https://www.npmjs.com/package/@framers/agentos)
12
14
  [![CI](https://img.shields.io/github/actions/workflow/status/framersai/agentos/ci.yml?branch=master&style=flat-square&logo=github&label=CI)](https://github.com/framersai/agentos/actions/workflows/ci.yml)
@@ -16,265 +18,132 @@
16
18
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue?style=flat-square)](https://opensource.org/licenses/Apache-2.0)
17
19
  [![Discord](https://img.shields.io/badge/Discord-Join%20Us-5865F2?style=flat-square&logo=discord)](https://wilds.ai/discord)
18
20
 
19
- [Website](https://agentos.sh) · [Docs](https://docs.agentos.sh) · [npm](https://www.npmjs.com/package/@framers/agentos) · [GitHub](https://github.com/framersai/agentos) · [Discord](https://wilds.ai/discord) · [Blog](https://docs.agentos.sh/blog)
21
+ [**Benchmarks**](https://docs.agentos.sh/benchmarks) · [Website](https://agentos.sh) · [Docs](https://docs.agentos.sh) · [npm](https://www.npmjs.com/package/@framers/agentos) · [Discord](https://wilds.ai/discord) · [Blog](https://docs.agentos.sh/blog)
20
22
 
21
23
  </div>
22
24
 
23
25
  ---
24
26
 
25
- ## What is AgentOS?
26
-
27
- AgentOS is a TypeScript runtime for building AI agents that remember, adapt, and create new tools at runtime. Each agent is a **Generalized Mind Instance** (GMI) with its own personality, memory lifecycle, and behavioral adaptation loop.
28
-
29
- ### Why AgentOS over alternatives?
30
-
31
- | vs. | AgentOS differentiator |
32
- |-----|------------------------|
33
- | **LangChain / LangGraph** | Cognitive memory (8 neuroscience-backed mechanisms), HEXACO personality, runtime tool forging |
34
- | **Vercel AI SDK** | Multi-agent teams (6 strategies), full RAG pipeline (7 vector backends), guardrails, voice/telephony |
35
- | **CrewAI / Mastra** | Unified orchestration (workflow DAGs + agent graphs + goal-driven missions), personality-driven routing |
36
-
37
- > **Full comparison:** [AgentOS vs LangGraph vs CrewAI vs Mastra](https://docs.agentos.sh/blog/2026/02/20/agentos-vs-langgraph-vs-crewai)
38
-
39
- ---
40
-
41
- ## Classifier-Driven Memory Pipeline
42
-
43
- Most memory libraries retrieve on every query. AgentOS gates memory through three independent LLM-as-judge classifiers, so trivial queries skip retrieval entirely, queries that need memory get the right architecture, and the right reader handles each category.
44
-
45
- ```
46
- User query
47
-
48
-
49
- ┌──────────────────────────────────┐
50
- │ Stage 1: QueryClassifier │ gpt-5-mini few-shot, ~$0.0001 / query
51
- │ Memory needed at all? │
52
- │ T0 = none ────────────────► answer from context, skip retrieval
53
- │ T1+ = simple/moderate/complex │
54
- └──────────────────────────────────┘
55
- │ (T1+ only)
56
-
57
- ┌──────────────────────────────────┐
58
- │ Stage 2: MemoryRouter │ reuses Stage 1 classification
59
- │ Which retrieval architecture? │
60
- │ canonical-hybrid · OM-v10 · OM-v11
61
- └──────────────────────────────────┘
62
-
63
-
64
- ┌──────────────────────────────────┐
65
- │ Stage 3: ReaderRouter │ reuses Stage 1 classification
66
- │ Which reader tier? │
67
- │ gpt-4o (TR/SSU) · gpt-5-mini (SSA/SSP/KU/MS)
68
- └──────────────────────────────────┘
69
-
70
-
71
- Grounded answer
72
- ```
73
-
74
- Each stage is a small LLM-as-judge classifier (gpt-5-mini, ~$0.0001-0.0014 per call). Each stage is independent and shippable on its own. Stages 2 and 3 reuse the Stage 1 classification output, so the full pipeline costs **one classifier call per query**, not three.
75
-
76
- **Validated on LongMemEval-S Phase B at N=500, gpt-4o judge, bootstrap CI 10k resamples**: 85.6% [82.4%, 88.6%] accuracy at $0.0090 per correct, 4-second average latency. Beats Mastra OM gpt-4o (84.2% published) on accuracy. Beats EmergenceMem Simple Fast (80.6% measured apples-to-apples in our harness) by +5.0 pp on accuracy at 6.5× lower cost-per-correct.
77
-
78
- | Primitive | Source | Decision per query | Cost per call |
79
- |---|---|---|---:|
80
- | `QueryClassifier` | `@framers/agentos/query-router` | T0/none vs T1/simple vs T2/moderate vs T3/complex | ~$0.0001 |
81
- | `MemoryRouter` | `@framers/agentos/memory-router` | canonical-hybrid vs observational-memory-v10 vs observational-memory-v11 | reuses Stage 1 output |
82
- | `ReaderRouter` | `@framers/agentos/memory-router` (v0.5.5) | gpt-4o vs gpt-5-mini per category | reuses Stage 1 output |
83
-
84
- The pipeline is novel because the **T0 / no-memory gate** removes retrieval entirely for queries that don't need it (greetings, small talk, general knowledge), saving the embedding+rerank+reader cost on a substantial fraction of typical agent traffic. The per-category dispatch then routes the remaining queries to the architecture and reader best-suited for the question type, calibrated from per-category Phase B accuracy data on LongMemEval-S.
85
-
86
- **[Full benchmark suite + reproducible run JSONs →](https://github.com/framersai/agentos-bench)** · **[Cognitive Pipeline docs →](https://docs.agentos.sh/features/cognitive-pipeline)** · **[Query Router docs →](https://docs.agentos.sh/features/query-routing)** · **[Memory Router docs →](https://docs.agentos.sh/features/memory-router)**
87
-
88
- ---
89
-
90
- ## See It In Action
91
-
92
- ### 🌀 Paracosm — AI Agent Swarm Simulation
93
-
94
- Define any scenario as JSON. Run it with AI commanders that have different HEXACO personalities. Same starting conditions, different decisions, divergent civilizations. Built on AgentOS.
95
-
96
- ```bash
97
- npm install paracosm
98
- ```
99
-
100
- **[Live Demo](https://paracosm.agentos.sh/sim)** · **[GitHub](https://github.com/framersai/paracosm)** · **[npm](https://www.npmjs.com/package/paracosm)** · **[Landing Page](https://paracosm.agentos.sh)**
101
-
102
- ---
103
-
104
27
  ## Install
105
28
 
106
29
  ```bash
107
30
  npm install @framers/agentos
108
31
  ```
109
32
 
110
- ### Configure API Keys
111
-
112
- ```bash
113
- # Environment variables (recommended for production)
114
- export OPENAI_API_KEY=sk-...
115
- export ANTHROPIC_API_KEY=sk-ant-...
116
- export GEMINI_API_KEY=AIza...
33
+ ```typescript
34
+ import { agent } from '@framers/agentos';
117
35
 
118
- # Key rotation — comma-separated keys auto-rotate with quota detection
119
- export OPENAI_API_KEY=sk-key1,sk-key2,sk-key3
120
- ```
36
+ const tutor = agent({
37
+ provider: 'anthropic',
38
+ instructions: 'You are a patient CS tutor.',
39
+ personality: { openness: 0.9, conscientiousness: 0.95 },
40
+ memory: { types: ['episodic', 'semantic'], working: { enabled: true } },
41
+ });
121
42
 
122
- ```typescript
123
- // Or pass apiKey inline (multi-tenant apps, tests, dynamic config)
124
- await generateText({ provider: 'openai', apiKey: 'sk-...', prompt: '...' });
43
+ const session = tutor.session('student-1');
44
+ await session.send('Explain recursion with an analogy.');
45
+ await session.send('Can you expand on that?'); // remembers context
125
46
  ```
126
47
 
127
- All high-level functions accept `apiKey` and `baseUrl` parameters.
48
+ [Full quickstart](https://docs.agentos.sh/getting-started) · [Examples cookbook](https://docs.agentos.sh/getting-started/examples) · [API reference](https://docs.agentos.sh/api)
128
49
 
129
50
  ---
130
51
 
131
- ## Quick Start
52
+ ## Memory Benchmarks at Matched Reader
132
53
 
133
- ### Generate Text
54
+ Honest, apples-to-apples comparison: same `gpt-4o` reader, same dataset, same Phase B N=500, same `gpt-4o-2024-08-06` judge with rubric `2026-04-18.1` (judge FPR 1% [0%, 3%]). Cross-provider configurations are excluded because they cannot be reproduced from public methodology disclosures.
134
55
 
135
- ```typescript
136
- import { generateText } from '@framers/agentos';
56
+ ### LongMemEval-S Phase B (115K tokens, 50 sessions)
137
57
 
138
- // Auto-detect provider from env vars
139
- const { text } = await generateText({
140
- prompt: 'Explain TCP handshakes in 3 bullets.',
141
- });
142
-
143
- // Pin a provider
144
- const { text: claude } = await generateText({
145
- provider: 'anthropic',
146
- prompt: 'Compare TCP and UDP.',
147
- });
148
- ```
58
+ | System (gpt-4o reader) | Accuracy | 95% CI | $/correct | p50 latency | Source |
59
+ |---|---:|---|---:|---:|---|
60
+ | EmergenceMem Internal | 86.0% | not published | not published | 5,650 ms | [emergence.ai](https://www.emergence.ai/blog/sota-on-longmemeval-with-rag) |
61
+ | **🚀 AgentOS canonical-hybrid + reader-router** | **85.6%** | **[82.4%, 88.6%]** | **$0.0090** | **3,558 ms** | [post](https://docs.agentos.sh/blog/2026/04/28/reader-router-pareto-win) |
62
+ | Mastra OM gpt-4o (gemini-flash observer) | 84.23% | not published | not published | not published | [mastra.ai](https://mastra.ai/research/observational-memory) |
63
+ | Supermemory gpt-4o | 81.6% | not published | not published | not published | [supermemory.ai](https://supermemory.ai/research/) |
64
+ | EmergenceMem Simple Fast (in our harness) | 80.6% | [77.0%, 84.0%] | $0.0586 | 3,703 ms | [adapter](https://github.com/framersai/agentos-bench/blob/master/vendors/emergence-simple-fast/) |
65
+ | Zep self / independent reproduction | 71.2% / 63.8% | not published | not published | — | [self](https://blog.getzep.com/state-of-the-art-agent-memory/) / [arXiv](https://arxiv.org/abs/2512.13564) |
149
66
 
150
- 16 providers, automatic fallback. When the primary provider returns a retryable error (HTTP 402/429/5xx, network failures, auth issues), `generateText` walks the canonical fallback chain for that provider using whichever API keys are present in the environment no extra imports, no chain construction needed:
67
+ **+1.4 pp at point estimate over Mastra OM gpt-4o at the matched reader.** Mastra publishes no CI; their 84.23% sits inside our 95% CI [82.4%, 88.6%], so the gap is at the threshold of statistical significance. EmergenceMem Internal's 86.0% (no CI) also sits inside our CI; we are statistically tied with both. AgentOS p50 latency 3,558 ms vs EmergenceMem's published median 5,650 ms (-2,092 ms at the median; the only vendor that publishes a comparable latency number).
151
68
 
152
- ```typescript
153
- import { generateText } from '@framers/agentos';
69
+ **Cost at scale**: $0.0090 per memory-grounded answer = $9 per 1,000 RAG calls. A chatbot averaging 5 RAG calls per conversation across 1,000 conversations costs ~$45.
154
70
 
155
- const { text } = await generateText({
156
- provider: 'anthropic',
157
- prompt: 'Compare TCP and UDP.',
158
- });
159
- // Anthropic primary, falls through to OpenAI / Gemini / OpenRouter / etc. on retryable errors
160
- ```
71
+ ### LongMemEval-M Phase B (1.5M tokens, 500 sessions)
161
72
 
162
- Want strict single-provider routing (e.g. for billing isolation, capability auditing, or provider-pinned tests)? Pass an empty array to opt out:
73
+ The harder variant. M's haystacks exceed every production context window. Most vendors stop at S because raw long-context fits there.
163
74
 
164
- ```typescript
165
- const { text } = await generateText({
166
- provider: 'anthropic',
167
- prompt: 'Compare TCP and UDP.',
168
- fallbackProviders: [], // strict mode fail if Anthropic is unavailable
169
- });
170
- ```
75
+ | System | Accuracy | 95% CI | License | Source |
76
+ |---|---:|---|---|---|
77
+ | AgentBrain | 71.7% | not published | closed-source SaaS | [github.com/AgentBrainHQ](https://github.com/AgentBrainHQ) |
78
+ | **🚀 AgentOS** (sem-embed + reader-router + top-K=5) | **70.2%** | **[66.0%, 74.0%]** | **MIT** | [post](https://docs.agentos.sh/blog/2026/04/29/longmemeval-m-70-with-topk5) |
79
+ | LongMemEval paper academic baseline | 65.7% | not published | open repo | [Wu et al., ICLR 2025](https://arxiv.org/abs/2410.10813) |
80
+ | Mem0 v3, Mastra, Hindsight, Zep, EmergenceMem, Supermemory, Letta, others | not published | — | various | reports S only |
171
81
 
172
- Or supply your own chain (and import `buildFallbackChain` only if you want to derive a default chain to splice from):
82
+ **Statistically tied with AgentBrain's closed-source SaaS** (their 71.7% sits inside our CI). **+4.5 pp above the LongMemEval paper's academic ceiling.** **First open-source memory library above 65% on M with full methodology disclosure** (bootstrap CIs, per-case run JSONs, reproducible CLI).
173
83
 
174
- ```typescript
175
- import { generateText, buildFallbackChain } from '@framers/agentos';
84
+ > **[Full benchmarks page →](https://docs.agentos.sh/benchmarks)** · **[Reproducible run JSONs →](https://github.com/framersai/agentos-bench/tree/master/results/runs)** · **[Methodology audit →](https://docs.agentos.sh/blog/2026/04/24/memory-benchmark-transparency-audit)**
176
85
 
177
- const { text } = await generateText({
178
- provider: 'anthropic',
179
- prompt: 'Compare TCP and UDP.',
180
- fallbackProviders: [
181
- { provider: 'openai', model: 'gpt-4o-mini' },
182
- { provider: 'openrouter' },
183
- ],
184
- });
185
- ```
86
+ ---
186
87
 
187
- ### Streaming
88
+ ## Classifier-Driven Memory Pipeline
188
89
 
189
- ```typescript
190
- import { streamText } from '@framers/agentos';
90
+ Most memory libraries retrieve on every query. AgentOS gates memory through three LLM-as-judge classifiers in a single shared pass, so trivial queries skip retrieval entirely and the rest get the right architecture and reader per category.
191
91
 
192
- const stream = streamText({ provider: 'openai', prompt: 'Write a haiku.' });
193
- for await (const chunk of stream.textStream) process.stdout.write(chunk);
194
92
  ```
195
-
196
- ### Structured Output
197
-
198
- ```typescript
199
- import { generateObject } from '@framers/agentos';
200
- import { z } from 'zod';
201
-
202
- const { object } = await generateObject({
203
- provider: 'gemini',
204
- schema: z.object({
205
- sentiment: z.enum(['positive', 'negative', 'neutral']),
206
- topics: z.array(z.string()),
207
- }),
208
- prompt: 'Analyze: "Great camera but disappointing battery."',
209
- });
93
+ User query
94
+
95
+ ▼ Stage 1: QueryClassifier (gpt-5-mini, ~$0.0001/query)
96
+ │ T0=none ─────► answer from context, skip retrieval
97
+ │ T1+=needs memory
98
+ Stage 2: MemoryRouter → canonical-hybrid · OM-v10 · OM-v11
99
+ ▼ Stage 3: ReaderRouter → gpt-4o (TR/SSU) · gpt-5-mini (SSA/SSP/KU/MS)
100
+
101
+ Grounded answer
210
102
  ```
211
103
 
212
- ### Create an Agent
104
+ Stages 2 and 3 reuse the Stage 1 classification, so the full pipeline costs **one classifier call per query**, not three. **The T0 / no-memory gate is the novel piece**: removing retrieval entirely for greetings and small talk saves the embedding + rerank + reader cost on a substantial fraction of typical agent traffic.
213
105
 
214
- ```typescript
215
- import { agent } from '@framers/agentos';
216
-
217
- const bot = agent({ provider: 'anthropic', instructions: 'You are a helpful assistant.' });
218
- const reply = await bot.session('demo').send('What is 2+2?');
219
- console.log(reply.text);
220
- ```
106
+ | Primitive | Source | Decision |
107
+ |---|---|---|
108
+ | `QueryClassifier` | [`@framers/agentos/query-router`](https://docs.agentos.sh/features/query-routing) | T0/none vs T1/simple vs T2/moderate vs T3/complex |
109
+ | `MemoryRouter` | [`@framers/agentos/memory-router`](https://docs.agentos.sh/features/memory-router) | canonical-hybrid vs observational-memory-v10 vs v11 |
110
+ | `ReaderRouter` | [`@framers/agentos/memory-router`](https://docs.agentos.sh/features/memory-router) | gpt-4o vs gpt-5-mini per category |
221
111
 
222
- ### Agent with Personality & Memory
112
+ [Cognitive Pipeline docs →](https://docs.agentos.sh/features/cognitive-pipeline) · [Architecture deep dive →](https://docs.agentos.sh/blog/2026/04/10/cognitive-memory-architecture-deep-dive) · [Beyond RAG →](https://docs.agentos.sh/blog/2026/03/31/cognitive-memory-beyond-rag)
223
113
 
224
- ```typescript
225
- const tutor = agent({
226
- provider: 'anthropic',
227
- instructions: 'You are a patient CS tutor.',
228
- personality: {
229
- openness: 0.9,
230
- conscientiousness: 0.95,
231
- agreeableness: 0.85,
232
- },
233
- memory: {
234
- types: ['episodic', 'semantic'],
235
- working: { enabled: true, maxTokens: 1200 },
236
- },
237
- });
238
-
239
- const session = tutor.session('student-1');
240
- await session.send('Explain recursion with an analogy.');
241
- await session.send('Can you expand on that?'); // remembers context
242
- ```
243
-
244
- #### Memory on direct calls
114
+ ---
245
115
 
246
- Memory auto-wires on `agent.stream()` / `agent.generate()` as well — sessions are not required to get memory integration. As of AgentOS 0.2.0, any call path invokes `memoryProvider.getContext` before the LLM call and `memoryProvider.observe` after.
116
+ ## Why AgentOS
247
117
 
248
- ```typescript
249
- import type { AgentMemoryProvider } from '@framers/agentos';
118
+ | vs. | AgentOS differentiator |
119
+ |---|---|
120
+ | **LangChain / LangGraph** | Cognitive memory ([8 neuroscience-backed mechanisms](https://docs.agentos.sh/features/cognitive-memory)), HEXACO personality, runtime tool forging |
121
+ | **Vercel AI SDK** | Multi-agent teams (6 strategies), 7 vector backends, [guardrails](https://docs.agentos.sh/features/guardrails-architecture), voice/telephony |
122
+ | **CrewAI / Mastra** | Unified orchestration (DAGs + graphs + missions), personality-driven routing, **published reproducible numbers on LongMemEval-S (85.6%) and LongMemEval-M (70.2%) with full methodology disclosure** |
250
123
 
251
- const myProvider: AgentMemoryProvider = {
252
- async getContext(text, opts) {
253
- return { contextText: await recallRelevant(text, opts?.tokenBudget) };
254
- },
255
- async observe(role, text) {
256
- await persist(role, text);
257
- },
258
- };
124
+ [Full framework comparison →](https://docs.agentos.sh/blog/2026/02/20/agentos-vs-langgraph-vs-crewai)
259
125
 
260
- const tutor = agent({
261
- provider: 'anthropic',
262
- instructions: 'You are a patient CS tutor.',
263
- memoryProvider: myProvider,
264
- });
126
+ ---
265
127
 
266
- // Direct stream — memory context injected before the call, observations
267
- // recorded after. No session required.
268
- const stream = tutor.stream('Explain recursion.');
128
+ ## Key Features
269
129
 
270
- // Session same memory wiring, plus per-session conversation history.
271
- const session = tutor.session('student-1');
272
- await session.send('Continue where we left off.');
273
- ```
130
+ | Category | Highlights |
131
+ |---|---|
132
+ | **LLM Providers** | 16: OpenAI, Anthropic, Gemini, Groq, Ollama, OpenRouter, Together, Mistral, xAI, Claude/Gemini CLI, + 5 image/video |
133
+ | **Cognitive Memory** | 8 mechanisms: reconsolidation, retrieval-induced forgetting, involuntary recall, FOK, gist extraction, schema encoding, source decay, emotion regulation |
134
+ | **HEXACO Personality** | 6 traits modulate memory, retrieval bias, response style |
135
+ | **RAG Pipeline** | 7 vector backends · 4 retrieval strategies · GraphRAG · HyDE · Cohere rerank-v3.5 |
136
+ | **Multi-Agent Teams** | 6 coordination strategies · shared memory · inter-agent messaging · HITL gates |
137
+ | **Orchestration** | `workflow()` DAGs · `AgentGraph` cycles · `mission()` goal-driven planning · checkpointing |
138
+ | **Guardrails** | 5 security tiers · 6 packs (PII, ML classifiers, topicality, code safety, grounding, content policy) |
139
+ | **Emergent Capabilities** | Runtime tool forging · 4 self-improvement tools · tiered promotion · skill export |
140
+ | **Voice & Telephony** | ElevenLabs, Deepgram, Whisper · Twilio, Telnyx, Plivo |
141
+ | **Channels** | 37 platform adapters (Telegram, Discord, Slack, WhatsApp, webchat, ...) |
142
+ | **Observability** | OpenTelemetry · usage ledger · cost guard · circuit breaker |
274
143
 
275
- Both `getContext` and `observe` hooks are optional; implementations may provide read-only or write-only memory behavior.
144
+ ---
276
145
 
277
- ### Multi-Agent Teams
146
+ ## Multi-Agent in 6 Lines
278
147
 
279
148
  ```typescript
280
149
  import { agency } from '@framers/agentos';
@@ -283,180 +152,81 @@ const team = agency({
283
152
  strategy: 'graph',
284
153
  agents: {
285
154
  researcher: { provider: 'anthropic', instructions: 'Find relevant facts.' },
286
- writer: { provider: 'openai', instructions: 'Write a clear summary.', dependsOn: ['researcher'] },
287
- reviewer: { provider: 'gemini', instructions: 'Check accuracy.', dependsOn: ['writer'] },
155
+ writer: { provider: 'openai', instructions: 'Summarize clearly.', dependsOn: ['researcher'] },
156
+ reviewer: { provider: 'gemini', instructions: 'Check accuracy.', dependsOn: ['writer'] },
288
157
  },
289
158
  });
290
159
 
291
160
  const result = await team.generate('Compare TCP vs UDP for game networking.');
292
161
  ```
293
162
 
294
- 6 strategies: `sequential` · `parallel` · `debate` · `review-loop` · `hierarchical` · `graph`
295
-
296
- ### Multimodal
297
-
298
- ```typescript
299
- import { generateImage, generateVideo, generateMusic, performOCR, embedText } from '@framers/agentos';
300
-
301
- const image = await generateImage({ provider: 'openai', prompt: 'Neon cityscape at sunset' });
302
- const video = await generateVideo({ prompt: 'Drone over misty forest' });
303
- const music = await generateMusic({ prompt: 'Lo-fi hip hop beat' });
304
- const ocr = await performOCR({ image: './receipt.png', strategy: 'progressive' });
305
- const embed = await embedText({ provider: 'openai', input: ['hello', 'world'] });
306
- ```
163
+ Strategies: `sequential` · `parallel` · `debate` · `review-loop` · `hierarchical` · `graph`. [Multi-agent docs →](https://docs.agentos.sh/features/multi-agent)
307
164
 
308
- ### Orchestration
309
-
310
- Three authoring APIs, one graph runtime:
165
+ ---
311
166
 
312
- ```typescript
313
- import { workflow, AgentGraph, mission } from '@framers/agentos/orchestration';
167
+ ## See It In Action
314
168
 
315
- // 1. workflow()deterministic DAG
316
- const pipe = workflow('content').step('research', { tool: 'web_search' }).then('draft', { gmi: { instructions: '...' } }).compile();
169
+ ### 🌀 ParacosmAI Agent Swarm Simulation
317
170
 
318
- // 2. AgentGraph cycles, subgraphs
319
- const graph = new AgentGraph('review').addNode('draft', gmiNode({...})).addNode('review', judgeNode({...})).addEdge('draft','review').compile();
171
+ Define any scenario as JSON. Run it with AI commanders that have different HEXACO personalities. Same starting conditions, different decisions, divergent civilizations. Built on AgentOS.
320
172
 
321
- // 3. mission() — goal-driven, planner decides steps
322
- const m = mission('research').goal('Research {topic}').planner({ strategy: 'adaptive' }).compile();
173
+ ```bash
174
+ npm install paracosm
323
175
  ```
324
176
 
325
- ## API Surfaces
326
-
327
- AgentOS exposes related entry points at different depths. The shared config surface does not imply identical enforcement across them.
328
-
329
- - The lightweight `agent()` facade owns prompt assembly, sessions, personality shaping, hooks, tools, and usage-ledger forwarding.
330
- - `generateText()` and `streamText()` are the low-level generation helpers for provider control, native tool calling, and text-fallback tool loops.
331
- - The full `AgentOS` runtime and `agency()` own emergent tooling, guardrails, discovery, RAG initialization, permissions/security tiers, HITL, channels/voice, and provenance-aware orchestration.
177
+ [Live Demo](https://paracosm.agentos.sh/sim) · [GitHub](https://github.com/framersai/paracosm) · [npm](https://www.npmjs.com/package/paracosm)
332
178
 
333
179
  ---
334
180
 
335
- ## Key Features
336
-
337
- | Category | Highlights |
338
- |----------|-----------|
339
- | **LLM Providers** | 16 providers: OpenAI, Anthropic, Gemini, Groq, Ollama, OpenRouter, Together, Mistral, xAI, Claude CLI, Gemini CLI, + 5 image/video |
340
- | **Cognitive Memory** | 8 neuroscience-backed mechanisms (reconsolidation, RIF, involuntary recall, FOK, gist extraction, schema encoding, source decay, emotion regulation) |
341
- | **HEXACO Personality** | 6 traits modulate memory, retrieval bias, response style — agents have consistent identity |
342
- | **RAG Pipeline** | 7 vector backends (InMemory, SQL, HNSW, Qdrant, Neo4j, pgvector, Pinecone) · 4 retrieval strategies · GraphRAG |
343
- | **Multi-Agent Teams** | 6 coordination strategies · shared memory · inter-agent messaging · HITL approval gates |
344
- | **Orchestration** | `workflow()` DAGs · `AgentGraph` cycles/subgraphs · `mission()` goal-driven planning · persistent checkpointing |
345
- | **Guardrails** | 5 security tiers · 6 packs (PII redaction, ML classifiers, topicality, code safety, grounding, content policy) |
346
- | **Emergent Capabilities** | Runtime tool forging · 4 self-improvement tools · tiered promotion (session → agent → shared) · skill export |
347
- | **Capability Discovery** | Semantic per-turn tool selection · ~90% token reduction · 3-tier context model · Neo4j graph backend |
348
- | **Skills** | 88 curated skills · 3-tier architecture (engine, content, catalog SDK) · auto-update on install |
349
- | **Voice & Telephony** | ElevenLabs, Deepgram, OpenAI Whisper · Twilio, Telnyx, Plivo |
350
- | **Channels** | 37 platform adapters (Telegram, Discord, Slack, WhatsApp, webchat, and more) |
351
- | **Structured Output** | Zod-validated JSON extraction with retry · provider-native structured output |
352
- | **Observability** | OpenTelemetry traces/metrics · usage ledger · cost guard · circuit breaker |
353
-
354
- ---
355
-
356
- ## Default Models Per Provider
357
-
358
- | Provider | Text Model | Image Model | Env Var |
359
- |---|---|---|---|
360
- | `openai` | gpt-4o | gpt-image-1 | `OPENAI_API_KEY` |
361
- | `anthropic` | claude-sonnet-4 | — | `ANTHROPIC_API_KEY` |
362
- | `gemini` | gemini-2.5-flash | — | `GEMINI_API_KEY` |
363
- | `groq` | llama-3.3-70b | — | `GROQ_API_KEY` |
364
- | `ollama` | llama3.2 | stable-diffusion | `OLLAMA_BASE_URL` |
365
- | `openrouter` | openai/gpt-4o | — | `OPENROUTER_API_KEY` |
366
- | `together` | Llama-3.1-70B | — | `TOGETHER_API_KEY` |
367
- | `mistral` | mistral-large | — | `MISTRAL_API_KEY` |
368
- | `xai` | grok-2 | — | `XAI_API_KEY` |
369
- | `stability` | — | stable-diffusion-xl | `STABILITY_API_KEY` |
370
- | `replicate` | — | flux-1.1-pro | `REPLICATE_API_TOKEN` |
371
- | `bfl` | — | flux-pro-1.1 | `BFL_API_KEY` |
372
- | `fal` | — | fal-ai/flux/dev | `FAL_API_KEY` |
373
- | `claude-code-cli` | claude-sonnet-4 | — | `claude` on PATH |
374
- | `gemini-cli` | gemini-2.5-flash | — | `gemini` on PATH |
375
-
376
- Auto-detection: OpenAI → Anthropic → OpenRouter → Gemini → Groq → Together → Mistral → xAI → CLI → Ollama
181
+ ## Configure API Keys
377
182
 
378
- ### Model String Formats
379
-
380
- Three ways to specify a model:
381
-
382
- ```ts
383
- // 1. Separate fields (recommended)
384
- generateText({ provider: 'anthropic', model: 'claude-sonnet-4-20250514', prompt: '...' });
385
-
386
- // 2. Colon format (canonical combined string)
387
- generateText({ model: 'anthropic:claude-sonnet-4-20250514', prompt: '...' });
388
-
389
- // 3. Slash format (also supported for known providers)
390
- generateText({ model: 'anthropic/claude-sonnet-4-20250514', prompt: '...' });
183
+ ```bash
184
+ export OPENAI_API_KEY=sk-...
185
+ export ANTHROPIC_API_KEY=sk-ant-...
186
+ export GEMINI_API_KEY=AIza...
391
187
 
392
- // Auto-detect (omit both provider and model)
393
- generateText({ prompt: '...' }); // uses first available provider
188
+ # Comma-separated keys auto-rotate with quota detection
189
+ export OPENAI_API_KEY=sk-key1,sk-key2,sk-key3
394
190
  ```
395
191
 
396
- The slash format only splits on known provider prefixes (`openai`, `anthropic`, `openrouter`, etc.). Unknown prefixes like `meta-llama/llama-3.1-8b` pass through as a plain model name to the auto-detected provider.
192
+ Or pass `apiKey` inline on any call. Auto-detection order: OpenAI Anthropic OpenRouter Gemini Groq Together Mistral xAI CLI → Ollama. [Default models per provider →](https://docs.agentos.sh/architecture/llm-providers)
397
193
 
398
194
  ---
399
195
 
400
- ## API Reference
401
-
402
- ### High-Level Functions
403
-
404
- | Function | Description |
405
- |----------|-------------|
406
- | `generateText()` | Text generation with multi-step tool calling |
407
- | `streamText()` | Streaming text with async iterables |
408
- | `generateObject()` | Zod-validated structured output |
409
- | `streamObject()` | Streaming structured output |
410
- | `generateImage()` | Image generation (7 providers, character consistency) |
411
- | `generateVideo()` | Video generation |
412
- | `generateMusic()` / `generateSFX()` | Audio generation |
413
- | `performOCR()` | Text extraction from images |
414
- | `embedText()` | Embedding generation |
415
- | `agent()` | Lightweight stateful agent for prompts, tools, memory, and sessions |
416
- | `agency()` | Multi-agent teams plus full runtime-owned orchestration features |
417
-
418
- ### Orchestration
196
+ ## API Surfaces
419
197
 
420
- | Builder | Description |
421
- |---------|-------------|
422
- | `workflow(name)` | Deterministic DAG with typed steps |
423
- | `AgentGraph` | Explicit graph with cycles, subgraphs |
424
- | `mission(name)` | Goal-driven, planner decides steps |
198
+ - **`agent()`**: lightweight stateful agent. Prompts, sessions, personality, hooks, tools, memory.
199
+ - **`agency()`**: multi-agent teams + full runtime. Emergent tooling, guardrails, RAG, voice, channels, HITL.
200
+ - **`generateText()` / `streamText()` / `generateObject()` / `generateImage()` / `generateVideo()` / `generateMusic()` / `performOCR()` / `embedText()`**: low-level multi-modal helpers with native tool calling.
201
+ - **`workflow()` / `AgentGraph` / `mission()`**: three orchestration authoring APIs over one graph runtime.
425
202
 
426
- Full API reference: [docs.agentos.sh/api](https://docs.agentos.sh/api)
203
+ [Full API reference →](https://docs.agentos.sh/api) · [High-Level API guide →](https://docs.agentos.sh/getting-started/high-level-api)
427
204
 
428
205
  ---
429
206
 
430
207
  ## Ecosystem
431
208
 
432
209
  | Package | Description |
433
- |---------|-------------|
434
- | [`@framers/agentos`](https://www.npmjs.com/package/@framers/agentos) | Core runtime — agents, providers, memory, RAG, orchestration, guardrails |
210
+ |---|---|
211
+ | [`@framers/agentos`](https://www.npmjs.com/package/@framers/agentos) | Core runtime |
435
212
  | [`@framers/agentos-extensions`](https://www.npmjs.com/package/@framers/agentos-extensions) | 100+ extensions and templates |
436
- | [`@framers/agentos-extensions-registry`](https://www.npmjs.com/package/@framers/agentos-extensions-registry) | Curated manifest builder |
437
213
  | [`@framers/agentos-skills`](https://www.npmjs.com/package/@framers/agentos-skills) | 88 curated SKILL.md definitions |
438
- | [`@framers/agentos-skills-registry`](https://www.npmjs.com/package/@framers/agentos-skills-registry) | Skills catalog SDK |
214
+ | [`@framers/agentos-bench`](https://github.com/framersai/agentos-bench) | Open benchmark harness with bootstrap CIs, judge-FPR probes, per-case run JSONs |
439
215
  | [`@framers/sql-storage-adapter`](https://www.npmjs.com/package/@framers/sql-storage-adapter) | SQL persistence (SQLite, Postgres, IndexedDB) |
216
+ | [paracosm](https://www.npmjs.com/package/paracosm) | AI agent swarm simulation engine |
440
217
 
441
218
  ---
442
219
 
443
- ## Documentation
444
-
445
- | Guide | Topic |
446
- |-------|-------|
447
- | [Architecture](./docs/architecture/ARCHITECTURE.md) | System design, data flow, layer breakdown |
448
- | [High-Level API](./docs/getting-started/HIGH_LEVEL_API.md) | `generateText`, `agent`, `agency` reference |
449
- | [Orchestration](./docs/orchestration/UNIFIED_ORCHESTRATION.md) | Workflows, graphs, missions |
450
- | [Cognitive Memory](./docs/memory/COGNITIVE_MECHANISMS.md) | 8 mechanisms, 30+ APA citations |
451
- | [RAG Configuration](./docs/memory/RAG_MEMORY_CONFIGURATION.md) | Vector stores, embeddings, data sources |
452
- | [Guardrails](./docs/safety/GUARDRAILS_USAGE.md) | 5 tiers, 6 packs |
453
- | [Human-in-the-Loop](./docs/safety/HUMAN_IN_THE_LOOP.md) | Approval workflows, escalation |
454
- | [Emergent Capabilities](./docs/architecture/EMERGENT_CAPABILITIES.md) | Runtime tool forging |
455
- | [Channels & Platforms](./docs/architecture/PLATFORM_SUPPORT.md) | 37 platform adapters |
456
- | [Voice Pipeline](./docs/features/VOICE_PIPELINE.md) | TTS, STT, telephony |
457
- | [Uncensored Content](./docs/features/UNCENSORED_CONTENT.md) | `policyTier`-driven routing for mature text + image generation |
220
+ ## Documentation & Community
458
221
 
459
- Full documentation: [docs.agentos.sh](https://docs.agentos.sh)
222
+ - **[Benchmarks](https://docs.agentos.sh/benchmarks)**: matched-reader SOTA tables, bootstrap CIs, methodology audit
223
+ - **[Architecture](https://docs.agentos.sh/architecture/system-architecture)**: system design, layer breakdown
224
+ - **[Cognitive Memory](https://docs.agentos.sh/features/cognitive-memory)**: 8 mechanisms with 30+ APA citations
225
+ - **[RAG Configuration](https://docs.agentos.sh/features/rag-memory-configuration)**: vector stores, embeddings, sources
226
+ - **[Guardrails](https://docs.agentos.sh/features/guardrails-architecture)**: 5 tiers, 6 packs
227
+ - **[Voice Pipeline](https://docs.agentos.sh/features/voice-pipeline)**: TTS, STT, telephony
228
+ - **[Blog](https://docs.agentos.sh/blog)**: engineering posts, benchmark publications, transparency audits
229
+ - **[Discord](https://wilds.ai/discord)** · **[GitHub Issues](https://github.com/framersai/agentos/issues)** · **[Wilds.ai](https://wilds.ai)** (AI game worlds powered by AgentOS)
460
230
 
461
231
  ---
462
232
 
@@ -467,17 +237,7 @@ git clone https://github.com/framersai/agentos.git && cd agentos
467
237
  pnpm install && pnpm build && pnpm test
468
238
  ```
469
239
 
470
- We use [Conventional Commits](https://www.conventionalcommits.org/). See the [Contributing Guide](https://github.com/framersai/agentos/blob/master/CONTRIBUTING.md).
471
-
472
- ---
473
-
474
- ## Community
475
-
476
- - **Discord:** [wilds.ai/discord](https://wilds.ai/discord)
477
- - **GitHub Issues:** [github.com/framersai/agentos/issues](https://github.com/framersai/agentos/issues)
478
- - **Blog:** [docs.agentos.sh/blog](https://docs.agentos.sh/blog)
479
- - **Paracosm:** [paracosm.agentos.sh](https://paracosm.agentos.sh) — AI agent swarm simulation engine built on AgentOS
480
- - **Wilds.ai:** [wilds.ai](https://wilds.ai) — AI game worlds powered by AgentOS
240
+ [Contributing Guide](https://github.com/framersai/agentos/blob/master/CONTRIBUTING.md) · We use [Conventional Commits](https://www.conventionalcommits.org/).
481
241
 
482
242
  ---
483
243
 
@@ -1 +1 @@
1
- {"version":3,"file":"agency.d.ts","sourceRoot":"","sources":["../../src/api/agency.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAGH,OAAO,KAAK,EACV,aAAa,EACb,KAAK,EAWN,MAAM,YAAY,CAAC;AAapB;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,aAAa,GAAG,KAAK,CAgrBjD;AA+qBD;;;;GAIG;AACH,MAAM,WAAW,2BAA2B;IAC1C,6DAA6D;IAC7D,MAAM,EAAE,OAAO,CAAC;IAChB,4EAA4E;IAC5E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC7B,YAAY,EAAE,MAAM,EAAE,EACtB,SAAS,CAAC,EAAE,aAAa,CAAC,IAAI,CAAC,GAC9B,OAAO,CAAC,2BAA2B,CAAC,CA+DtC"}
1
+ {"version":3,"file":"agency.d.ts","sourceRoot":"","sources":["../../src/api/agency.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAGH,OAAO,KAAK,EACV,aAAa,EACb,KAAK,EAWN,MAAM,YAAY,CAAC;AAapB;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,MAAM,CAAC,IAAI,EAAE,aAAa,GAAG,KAAK,CA8qBjD;AA+qBD;;;;GAIG;AACH,MAAM,WAAW,2BAA2B;IAC1C,6DAA6D;IAC7D,MAAM,EAAE,OAAO,CAAC;IAChB,4EAA4E;IAC5E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC7B,YAAY,EAAE,MAAM,EAAE,EACtB,SAAS,CAAC,EAAE,aAAa,CAAC,IAAI,CAAC,GAC9B,OAAO,CAAC,2BAA2B,CAAC,CA+DtC"}
@@ -161,12 +161,10 @@ export function agency(opts) {
161
161
  const maxValidationRetries = controls?.maxValidationRetries ?? 1;
162
162
  const hasValidation = !!opts.output;
163
163
  let currentPrompt = preparedPrompt;
164
- let lastResult = null;
165
164
  let lastFinalized = null;
166
165
  const maxAttempts = hasValidation ? maxValidationRetries + 1 : 1;
167
166
  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
168
167
  const result = (await strategy.execute(currentPrompt, execOpts));
169
- lastResult = result;
170
168
  const finalized = await finalizeExecutionResult(result, start, sessionId);
171
169
  lastFinalized = finalized;
172
170
  // Success path: no validation required, OR validation produced a `parsed` value