agentfootprint 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/AGENTS.md +80 -0
  2. package/CLAUDE.md +80 -0
  3. package/README.md +51 -1
  4. package/README.proposed.md +258 -0
  5. package/ai-instructions/claude-code/SKILL.md +56 -5
  6. package/ai-instructions/clinerules +44 -5
  7. package/ai-instructions/copilot-instructions.md +44 -5
  8. package/ai-instructions/cursor/agentfootprint.md +44 -5
  9. package/ai-instructions/kiro/agentfootprint.md +44 -5
  10. package/ai-instructions/windsurfrules +44 -5
  11. package/dist/adapters/llm/AnthropicProvider.js +0 -31
  12. package/dist/adapters/llm/AnthropicProvider.js.map +1 -1
  13. package/dist/adapters/llm/BedrockProvider.js +2 -28
  14. package/dist/adapters/llm/BedrockProvider.js.map +1 -1
  15. package/dist/adapters/llm/BrowserAnthropicProvider.js +1 -31
  16. package/dist/adapters/llm/BrowserAnthropicProvider.js.map +1 -1
  17. package/dist/adapters/llm/BrowserOpenAIProvider.js +1 -11
  18. package/dist/adapters/llm/BrowserOpenAIProvider.js.map +1 -1
  19. package/dist/adapters/llm/MockProvider.js +35 -1
  20. package/dist/adapters/llm/MockProvider.js.map +1 -1
  21. package/dist/adapters/llm/OpenAIProvider.js +3 -29
  22. package/dist/adapters/llm/OpenAIProvider.js.map +1 -1
  23. package/dist/adapters/memory/agentcore.js +305 -0
  24. package/dist/adapters/memory/agentcore.js.map +1 -0
  25. package/dist/adapters/memory/redis.js +287 -0
  26. package/dist/adapters/memory/redis.js.map +1 -0
  27. package/dist/core/Agent.js +11 -0
  28. package/dist/core/Agent.js.map +1 -1
  29. package/dist/esm/adapters/llm/AnthropicProvider.js +0 -31
  30. package/dist/esm/adapters/llm/AnthropicProvider.js.map +1 -1
  31. package/dist/esm/adapters/llm/BedrockProvider.js +2 -28
  32. package/dist/esm/adapters/llm/BedrockProvider.js.map +1 -1
  33. package/dist/esm/adapters/llm/BrowserAnthropicProvider.js +1 -31
  34. package/dist/esm/adapters/llm/BrowserAnthropicProvider.js.map +1 -1
  35. package/dist/esm/adapters/llm/BrowserOpenAIProvider.js +1 -11
  36. package/dist/esm/adapters/llm/BrowserOpenAIProvider.js.map +1 -1
  37. package/dist/esm/adapters/llm/MockProvider.js +35 -1
  38. package/dist/esm/adapters/llm/MockProvider.js.map +1 -1
  39. package/dist/esm/adapters/llm/OpenAIProvider.js +3 -29
  40. package/dist/esm/adapters/llm/OpenAIProvider.js.map +1 -1
  41. package/dist/esm/adapters/memory/agentcore.js +301 -0
  42. package/dist/esm/adapters/memory/agentcore.js.map +1 -0
  43. package/dist/esm/adapters/memory/redis.js +283 -0
  44. package/dist/esm/adapters/memory/redis.js.map +1 -0
  45. package/dist/esm/core/Agent.js +11 -0
  46. package/dist/esm/core/Agent.js.map +1 -1
  47. package/dist/esm/index.js +6 -1
  48. package/dist/esm/index.js.map +1 -1
  49. package/dist/esm/lib/injection-engine/index.js +0 -54
  50. package/dist/esm/lib/injection-engine/index.js.map +1 -1
  51. package/dist/esm/lib/mcp/index.js +9 -0
  52. package/dist/esm/lib/mcp/index.js.map +1 -0
  53. package/dist/esm/lib/mcp/mcpClient.js +176 -0
  54. package/dist/esm/lib/mcp/mcpClient.js.map +1 -0
  55. package/dist/esm/lib/mcp/mockMcpClient.js +97 -0
  56. package/dist/esm/lib/mcp/mockMcpClient.js.map +1 -0
  57. package/dist/esm/lib/mcp/types.js +24 -0
  58. package/dist/esm/lib/mcp/types.js.map +1 -0
  59. package/dist/esm/lib/rag/defineRAG.js +0 -18
  60. package/dist/esm/lib/rag/defineRAG.js.map +1 -1
  61. package/dist/esm/lib/rag/indexDocuments.js +39 -4
  62. package/dist/esm/lib/rag/indexDocuments.js.map +1 -1
  63. package/dist/esm/memory/causal/loadSnapshot.js +1 -1
  64. package/dist/esm/memory/define.js +0 -14
  65. package/dist/esm/memory/define.js.map +1 -1
  66. package/dist/esm/memory/define.types.js +0 -10
  67. package/dist/esm/memory/define.types.js.map +1 -1
  68. package/dist/esm/resilience/index.js +0 -44
  69. package/dist/esm/resilience/index.js.map +1 -1
  70. package/dist/esm/stream.js +0 -29
  71. package/dist/esm/stream.js.map +1 -1
  72. package/dist/index.js +8 -1
  73. package/dist/index.js.map +1 -1
  74. package/dist/instructions.js +21 -0
  75. package/dist/instructions.js.map +1 -0
  76. package/dist/lib/injection-engine/index.js +0 -54
  77. package/dist/lib/injection-engine/index.js.map +1 -1
  78. package/dist/lib/instructions/defineInstruction.js +35 -0
  79. package/dist/lib/instructions/defineInstruction.js.map +1 -0
  80. package/dist/lib/instructions/evaluator.js +38 -0
  81. package/dist/lib/instructions/evaluator.js.map +1 -0
  82. package/dist/lib/instructions/index.js +48 -0
  83. package/dist/lib/instructions/index.js.map +1 -0
  84. package/dist/lib/instructions/types.js +22 -0
  85. package/dist/lib/instructions/types.js.map +1 -0
  86. package/dist/lib/mcp/index.js +14 -0
  87. package/dist/lib/mcp/index.js.map +1 -0
  88. package/dist/lib/mcp/mcpClient.js +180 -0
  89. package/dist/lib/mcp/mcpClient.js.map +1 -0
  90. package/dist/lib/mcp/mockMcpClient.js +101 -0
  91. package/dist/lib/mcp/mockMcpClient.js.map +1 -0
  92. package/dist/lib/mcp/types.js +25 -0
  93. package/dist/lib/mcp/types.js.map +1 -0
  94. package/dist/lib/rag/defineRAG.js +0 -18
  95. package/dist/lib/rag/defineRAG.js.map +1 -1
  96. package/dist/lib/rag/indexDocuments.js +39 -4
  97. package/dist/lib/rag/indexDocuments.js.map +1 -1
  98. package/dist/memory/causal/loadSnapshot.js +1 -1
  99. package/dist/memory/conversationHelpers.js +39 -0
  100. package/dist/memory/conversationHelpers.js.map +1 -0
  101. package/dist/memory/define.js +0 -14
  102. package/dist/memory/define.js.map +1 -1
  103. package/dist/memory/define.types.js +0 -10
  104. package/dist/memory/define.types.js.map +1 -1
  105. package/dist/resilience/index.js +0 -44
  106. package/dist/resilience/index.js.map +1 -1
  107. package/dist/stream.js +0 -29
  108. package/dist/stream.js.map +1 -1
  109. package/dist/types/adapters/llm/AnthropicProvider.d.ts +0 -31
  110. package/dist/types/adapters/llm/AnthropicProvider.d.ts.map +1 -1
  111. package/dist/types/adapters/llm/BedrockProvider.d.ts +2 -28
  112. package/dist/types/adapters/llm/BedrockProvider.d.ts.map +1 -1
  113. package/dist/types/adapters/llm/BrowserAnthropicProvider.d.ts +1 -31
  114. package/dist/types/adapters/llm/BrowserAnthropicProvider.d.ts.map +1 -1
  115. package/dist/types/adapters/llm/BrowserOpenAIProvider.d.ts +1 -11
  116. package/dist/types/adapters/llm/BrowserOpenAIProvider.d.ts.map +1 -1
  117. package/dist/types/adapters/llm/MockProvider.d.ts +45 -0
  118. package/dist/types/adapters/llm/MockProvider.d.ts.map +1 -1
  119. package/dist/types/adapters/llm/OpenAIProvider.d.ts +3 -29
  120. package/dist/types/adapters/llm/OpenAIProvider.d.ts.map +1 -1
  121. package/dist/types/adapters/memory/agentcore.d.ts +157 -0
  122. package/dist/types/adapters/memory/agentcore.d.ts.map +1 -0
  123. package/dist/types/adapters/memory/redis.d.ts +126 -0
  124. package/dist/types/adapters/memory/redis.d.ts.map +1 -0
  125. package/dist/types/core/Agent.d.ts +7 -0
  126. package/dist/types/core/Agent.d.ts.map +1 -1
  127. package/dist/types/index.d.ts +2 -1
  128. package/dist/types/index.d.ts.map +1 -1
  129. package/dist/types/instructions.d.ts +5 -0
  130. package/dist/types/instructions.d.ts.map +1 -0
  131. package/dist/types/lib/injection-engine/index.d.ts +0 -54
  132. package/dist/types/lib/injection-engine/index.d.ts.map +1 -1
  133. package/dist/types/lib/instructions/defineInstruction.d.ts +22 -0
  134. package/dist/types/lib/instructions/defineInstruction.d.ts.map +1 -0
  135. package/dist/types/lib/instructions/evaluator.d.ts +11 -0
  136. package/dist/types/lib/instructions/evaluator.d.ts.map +1 -0
  137. package/dist/types/lib/instructions/index.d.ts +44 -0
  138. package/dist/types/lib/instructions/index.d.ts.map +1 -0
  139. package/dist/types/lib/instructions/types.d.ts +100 -0
  140. package/dist/types/lib/instructions/types.d.ts.map +1 -0
  141. package/dist/types/lib/mcp/index.d.ts +10 -0
  142. package/dist/types/lib/mcp/index.d.ts.map +1 -0
  143. package/dist/types/lib/mcp/mcpClient.d.ts +47 -0
  144. package/dist/types/lib/mcp/mcpClient.d.ts.map +1 -0
  145. package/dist/types/lib/mcp/mockMcpClient.d.ts +66 -0
  146. package/dist/types/lib/mcp/mockMcpClient.d.ts.map +1 -0
  147. package/dist/types/lib/mcp/types.d.ts +134 -0
  148. package/dist/types/lib/mcp/types.d.ts.map +1 -0
  149. package/dist/types/lib/rag/defineRAG.d.ts +18 -21
  150. package/dist/types/lib/rag/defineRAG.d.ts.map +1 -1
  151. package/dist/types/lib/rag/indexDocuments.d.ts +30 -1
  152. package/dist/types/lib/rag/indexDocuments.d.ts.map +1 -1
  153. package/dist/types/memory/causal/loadSnapshot.d.ts +1 -1
  154. package/dist/types/memory/conversationHelpers.d.ts +19 -0
  155. package/dist/types/memory/conversationHelpers.d.ts.map +1 -0
  156. package/dist/types/memory/define.d.ts +0 -14
  157. package/dist/types/memory/define.d.ts.map +1 -1
  158. package/dist/types/memory/define.types.d.ts +2 -12
  159. package/dist/types/memory/define.types.d.ts.map +1 -1
  160. package/dist/types/resilience/index.d.ts +0 -44
  161. package/dist/types/resilience/index.d.ts.map +1 -1
  162. package/dist/types/stream.d.ts +0 -29
  163. package/dist/types/stream.d.ts.map +1 -1
  164. package/package.json +20 -1
package/AGENTS.md CHANGED
@@ -32,8 +32,87 @@ The flavors are how you *mark intent* — but they all reduce to one `Injection`
32
32
  | **Instruction** | Predicate (`activeWhen` / `on-tool-return`) | system-prompt or messages |
33
33
  | **Fact** | Always-on (data) | system-prompt or messages |
34
34
 
35
+ ## Mock-first development (RECOMMENDED workflow)
36
+
37
+ Build the entire app — agent, context engineering, tools, memory, RAG, MCP — against in-memory mocks first. Validate logic and patterns end-to-end with $0 API cost. Swap real infrastructure in, one boundary at a time, only after the flow is right.
38
+
39
+ ```typescript
40
+ import { Agent, mock, InMemoryStore, mockEmbedder, defineTool } from 'agentfootprint';
41
+
42
+ // Mock LLM provider — no API key, deterministic reply.
43
+ const provider = mock({ reply: 'Refunds take 3 business days.' });
44
+
45
+ // Inline-mocked tool — no real backend yet.
46
+ const lookup = defineTool({
47
+ schema: { name: 'lookup', description: '...', inputSchema: {} },
48
+ execute: async () => 'mock data',
49
+ });
50
+
51
+ const agent = Agent.create({ provider, model: 'mock' }).tool(lookup).build();
52
+ await agent.run({ message: 'How long does a refund take?' });
53
+ ```
54
+
55
+ | Boundary | Mock for development | Production swap |
56
+ |---|---|---|
57
+ | LLM provider | `mock({ reply })` · `mock({ replies })` for scripted ReAct | `anthropic()` · `openai()` · `bedrock()` · `ollama()` |
58
+ | Embedder | `mockEmbedder()` | OpenAI / Cohere / Bedrock embedder factory |
59
+ | Memory store | `InMemoryStore` | `RedisStore` (`agentfootprint/memory-redis`) · `AgentCoreStore` (`agentfootprint/memory-agentcore`) · DynamoDB / Postgres / Pinecone (planned) |
60
+ | MCP server | `mockMcpClient({ tools })` — in-memory, no SDK | `mcpClient({ transport })` to a real server |
61
+ | Tool execute | inline `async () => '...'` closure | real implementation |
62
+
63
+ When generating starter code for users, **default to the mock surface** unless they explicitly say they have a key / endpoint / store ready. Show real-provider code as the "swap" step, not the first step.
64
+
65
+ **Subpath imports** for memory adapters keep the main barrel small + tree-shaking clean:
66
+
67
+ ```typescript
68
+ import { RedisStore } from 'agentfootprint/memory-redis';
69
+ import { AgentCoreStore } from 'agentfootprint/memory-agentcore';
70
+ ```
71
+
72
+ Both lazy-require their SDK (`ioredis` / `@aws-sdk/client-bedrock-agent-runtime`) and accept `_client` for test injection.
73
+
74
+ **Multi-turn mock for tool-using ReAct:**
75
+
76
+ ```typescript
77
+ const provider = mock({
78
+ replies: [
79
+ { toolCalls: [{ id: '1', name: 'lookup', args: { topic: 'refunds' } }] },
80
+ { content: 'Refunds take 3 business days.' },
81
+ ],
82
+ });
83
+ ```
84
+
85
+ Each `complete()` consumes one reply in order. Exhaustion throws loud — misnumbered scripts fail tests instead of silently looping.
86
+
35
87
  ## Public API
36
88
 
89
+ ### MCP — `mcpClient` (connect to MCP servers, register their tools)
90
+
91
+ ```typescript
92
+ import { Agent, mcpClient } from 'agentfootprint';
93
+
94
+ const slack = await mcpClient({
95
+ name: 'slack',
96
+ transport: { transport: 'stdio', command: 'npx', args: ['@example/slack-mcp'] },
97
+ });
98
+
99
+ const agent = Agent.create({ provider })
100
+ .tools(await slack.tools()) // pull ALL tools from the server in one call
101
+ .build();
102
+
103
+ await agent.run({ message: '...' });
104
+ await slack.close();
105
+ ```
106
+
107
+ Transports: `stdio` (local subprocess), `http` (Streamable HTTP). The
108
+ `@modelcontextprotocol/sdk` peer-dep is lazy-required — zero runtime
109
+ cost when MCP isn't used. Friendly install hint if missing.
110
+
111
+ `agent.tools(arr)` is the bulk-register companion to `agent.tool(t)`.
112
+ Pair with `await client.tools()` to register everything an MCP server
113
+ exposes in one builder call. Tool-name uniqueness is still validated
114
+ at `.build()` across MCP servers + manual `.tool()` calls.
115
+
37
116
  ### RAG — `defineRAG` (one factory, one helper)
38
117
 
39
118
  ```typescript
@@ -387,6 +466,7 @@ Recorders (auto-attached when relevant builder method is called):
387
466
  | Cross-run "why?" replay | `defineMemory({ type: CAUSAL, strategy: TOP_K })` ⭐ |
388
467
  | Long conversation overflows context | `defineMemory({ type: EPISODIC, strategy: SUMMARIZE })` |
389
468
  | Retrieve from a document corpus | `defineRAG({ store, embedder, topK, threshold })` |
469
+ | Use tools from an external MCP server | `mcpClient({ transport, ... })` + `agent.tools(await c.tools())` |
390
470
 
391
471
  ## Build & Test
392
472
 
package/CLAUDE.md CHANGED
@@ -32,8 +32,87 @@ The flavors are how you *mark intent* — but they all reduce to one `Injection`
32
32
  | **Instruction** | Predicate (`activeWhen` / `on-tool-return`) | system-prompt or messages |
33
33
  | **Fact** | Always-on (data) | system-prompt or messages |
34
34
 
35
+ ## Mock-first development (RECOMMENDED workflow)
36
+
37
+ Build the entire app — agent, context engineering, tools, memory, RAG, MCP — against in-memory mocks first. Validate logic and patterns end-to-end with $0 API cost. Swap real infrastructure in, one boundary at a time, only after the flow is right.
38
+
39
+ ```typescript
40
+ import { Agent, mock, InMemoryStore, mockEmbedder, defineTool } from 'agentfootprint';
41
+
42
+ // Mock LLM provider — no API key, deterministic reply.
43
+ const provider = mock({ reply: 'Refunds take 3 business days.' });
44
+
45
+ // Inline-mocked tool — no real backend yet.
46
+ const lookup = defineTool({
47
+ schema: { name: 'lookup', description: '...', inputSchema: {} },
48
+ execute: async () => 'mock data',
49
+ });
50
+
51
+ const agent = Agent.create({ provider, model: 'mock' }).tool(lookup).build();
52
+ await agent.run({ message: 'How long does a refund take?' });
53
+ ```
54
+
55
+ | Boundary | Mock for development | Production swap |
56
+ |---|---|---|
57
+ | LLM provider | `mock({ reply })` · `mock({ replies })` for scripted ReAct | `anthropic()` · `openai()` · `bedrock()` · `ollama()` |
58
+ | Embedder | `mockEmbedder()` | OpenAI / Cohere / Bedrock embedder factory |
59
+ | Memory store | `InMemoryStore` | `RedisStore` (`agentfootprint/memory-redis`) · `AgentCoreStore` (`agentfootprint/memory-agentcore`) · DynamoDB / Postgres / Pinecone (planned) |
60
+ | MCP server | `mockMcpClient({ tools })` — in-memory, no SDK | `mcpClient({ transport })` to a real server |
61
+ | Tool execute | inline `async () => '...'` closure | real implementation |
62
+
63
+ When generating starter code for users, **default to the mock surface** unless they explicitly say they have a key / endpoint / store ready. Show real-provider code as the "swap" step, not the first step.
64
+
65
+ **Subpath imports** for memory adapters keep the main barrel small + tree-shaking clean:
66
+
67
+ ```typescript
68
+ import { RedisStore } from 'agentfootprint/memory-redis';
69
+ import { AgentCoreStore } from 'agentfootprint/memory-agentcore';
70
+ ```
71
+
72
+ Both lazy-require their SDK (`ioredis` / `@aws-sdk/client-bedrock-agent-runtime`) and accept `_client` for test injection.
73
+
74
+ **Multi-turn mock for tool-using ReAct:**
75
+
76
+ ```typescript
77
+ const provider = mock({
78
+ replies: [
79
+ { toolCalls: [{ id: '1', name: 'lookup', args: { topic: 'refunds' } }] },
80
+ { content: 'Refunds take 3 business days.' },
81
+ ],
82
+ });
83
+ ```
84
+
85
+ Each `complete()` consumes one reply in order. Exhaustion throws loud — misnumbered scripts fail tests instead of silently looping.
86
+
35
87
  ## Public API
36
88
 
89
+ ### MCP — `mcpClient` (connect to MCP servers, register their tools)
90
+
91
+ ```typescript
92
+ import { Agent, mcpClient } from 'agentfootprint';
93
+
94
+ const slack = await mcpClient({
95
+ name: 'slack',
96
+ transport: { transport: 'stdio', command: 'npx', args: ['@example/slack-mcp'] },
97
+ });
98
+
99
+ const agent = Agent.create({ provider })
100
+ .tools(await slack.tools()) // pull ALL tools from the server in one call
101
+ .build();
102
+
103
+ await agent.run({ message: '...' });
104
+ await slack.close();
105
+ ```
106
+
107
+ Transports: `stdio` (local subprocess), `http` (Streamable HTTP). The
108
+ `@modelcontextprotocol/sdk` peer-dep is lazy-required — zero runtime
109
+ cost when MCP isn't used. Friendly install hint if missing.
110
+
111
+ `agent.tools(arr)` is the bulk-register companion to `agent.tool(t)`.
112
+ Pair with `await client.tools()` to register everything an MCP server
113
+ exposes in one builder call. Tool-name uniqueness is still validated
114
+ at `.build()` across MCP servers + manual `.tool()` calls.
115
+
37
116
  ### RAG — `defineRAG` (one factory, one helper)
38
117
 
39
118
  ```typescript
@@ -387,6 +466,7 @@ Recorders (auto-attached when relevant builder method is called):
387
466
  | Cross-run "why?" replay | `defineMemory({ type: CAUSAL, strategy: TOP_K })` ⭐ |
388
467
  | Long conversation overflows context | `defineMemory({ type: EPISODIC, strategy: SUMMARIZE })` |
389
468
  | Retrieve from a document corpus | `defineRAG({ store, embedder, topK, threshold })` |
469
+ | Use tools from an external MCP server | `mcpClient({ transport, ... })` + `agent.tools(await c.tools())` |
390
470
 
391
471
  ## Build & Test
392
472
 
package/README.md CHANGED
@@ -233,6 +233,55 @@ Every `.steering` / `.instruction` / `.memory` / `.tool` call adds an injection
233
233
 
234
234
  ---
235
235
 
236
+ ## Build with mocks first — swap real infra later
237
+
238
+ Generative AI app development is expensive when every iteration hits a paid API. agentfootprint is designed so you can **build the entire app — agent, context engineering, tool chains, memory, RAG — against in-memory mocks**, prove the logic and patterns end-to-end with zero API cost, then swap real infrastructure in piece by piece.
239
+
240
+ ```typescript
241
+ import {
242
+ Agent, defineTool, defineSteering, defineMemory,
243
+ MEMORY_TYPES, MEMORY_STRATEGIES,
244
+ mock, InMemoryStore, // ← the mock surfaces
245
+ } from 'agentfootprint';
246
+
247
+ const agent = Agent.create({
248
+ provider: mock({ reply: 'Refunds take 3 business days.' }), // ← no API key
249
+ model: 'mock',
250
+ })
251
+ .steering(defineSteering({ id: 'tone', prompt: 'Be friendly.' }))
252
+ .tool(defineTool({
253
+ schema: { name: 'lookup', description: '...', inputSchema: {} },
254
+ execute: async () => 'mock data', // ← inline mock
255
+ }))
256
+ .memory(defineMemory({
257
+ id: 'short-term',
258
+ type: MEMORY_TYPES.EPISODIC,
259
+ strategy: { kind: MEMORY_STRATEGIES.WINDOW, size: 10 },
260
+ store: new InMemoryStore(), // ← ephemeral
261
+ }))
262
+ .build();
263
+
264
+ await agent.run({ message: 'How long does a refund take?' });
265
+ ```
266
+
267
+ The whole flow runs offline. Iterate on context engineering, narrative, control-flow patterns, error handling, multi-agent compositions — **without** spending a cent.
268
+
269
+ When the logic is right, swap one boundary at a time:
270
+
271
+ | Boundary | Mock for development | Production swap |
272
+ |---|---|---|
273
+ | **LLM provider** | `mock({ reply })` · `mock({ replies })` for scripted ReAct | `anthropic()` · `openai()` · `bedrock()` · `ollama()` |
274
+ | **Embedder** | `mockEmbedder()` | OpenAI / Cohere / Bedrock embedder (factories on roadmap) |
275
+ | **Memory store** | `InMemoryStore` | `RedisStore` (`agentfootprint/memory-redis`) · `AgentCoreStore` (`agentfootprint/memory-agentcore`) · DynamoDB / Postgres / Pinecone (planned) |
276
+ | **MCP server** | `mockMcpClient({ tools })` — in-memory, no SDK | `mcpClient({ transport })` to a real server |
277
+ | **Tool execution** | `defineTool({ execute: async () => '...' })` | Same `defineTool`, real implementation |
278
+
279
+ Each swap is one line. The flowchart, narrative, recorders, and tests don't change. Ship the patterns first; pay for tokens last.
280
+
281
+ > Why this matters: it's the difference between *learning context engineering by trying things* and *learning by burning your API budget*. The library treats $0 development as a first-class workflow, not an afterthought.
282
+
283
+ ---
284
+
236
285
  ## Memory — one factory, four types, seven strategies
237
286
 
238
287
  `defineMemory({ type, strategy, store })` — one factory dispatches `type × strategy.kind` onto the right pipeline.
@@ -291,7 +340,8 @@ The same snapshot data shape becomes RL/SFT/DPO training data in v2.1+.
291
340
 
292
341
  | Release | Focus |
293
342
  |---|---|
294
- | v2.1 | RAG flavor (`defineRAG`) · Redis memory store adapter · MCP integration · CircuitBreaker as a first-class primitive · 3-tier structured-output fallback |
343
+ | ~~v2.1~~ | RAG flavor (`defineRAG`) shipped in 2.1.0 |
344
+ | v2.2 | MCP integration (`mcpClient`) ✓ · Redis memory store adapter · CircuitBreaker primitive · 3-tier structured-output fallback |
295
345
  | v2.2 | Governance subsystem (`Policy`, `BudgetTracker`, role-based access) · DynamoDB / Postgres / Pinecone store adapters |
296
346
  | v2.3 | Causal training-data exports — `causalMemory.exportForTraining({ format: 'sft' \| 'dpo' \| 'process' })` for HuggingFace / OpenAI / Anthropic batch fine-tune |
297
347
  | v2.4+ | Deep Agents (planning-before-execution) · A2A protocol · Lens UI deep-link |
@@ -0,0 +1,258 @@
1
+ <p align="center">
2
+ <h1 align="center">agentfootprint</h1>
3
+ <p align="center">
4
+ <strong>Build agents whose every decision, tool call, and memory write is a typed event<br>you can replay and audit — same day, or six months later.</strong>
5
+ </p>
6
+ </p>
7
+
8
+ <p align="center">
9
+ <a href="https://github.com/footprintjs/agentfootprint/actions"><img src="https://github.com/footprintjs/agentfootprint/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
10
+ <a href="https://www.npmjs.com/package/agentfootprint"><img src="https://img.shields.io/npm/v/agentfootprint.svg?style=flat" alt="npm version"></a>
11
+ <a href="https://www.npmjs.com/package/agentfootprint"><img src="https://img.shields.io/npm/dm/agentfootprint.svg" alt="Downloads"></a>
12
+ <a href="https://github.com/footprintjs/agentfootprint/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="MIT"></a>
13
+ </p>
14
+
15
+ <br>
16
+
17
+ <!-- ┌────────────────────────────────────────────────────────────────┐
18
+ │ 📹 30-second demo video goes here. │
19
+ │ Embed: GitHub-hosted MP4 or Loom thumbnail. │
20
+ │ Content: paste a trace into the viewer → drag the │
21
+ │ time-travel slider → every step is there. │
22
+ │ This is the single most important asset on this page. │
23
+ └────────────────────────────────────────────────────────────────┘ -->
24
+
25
+ > **Try it without installing anything →** [Open the live trace viewer](https://footprintjs.github.io/agent-playground/#/viewer), paste the [sample trace](./examples/sample-trace.json), drag the slider. You'll see what your agent did, not what it logged.
26
+
27
+ ---
28
+
29
+ ## In 30 seconds
30
+
31
+ ```bash
32
+ npm install agentfootprint footprintjs
33
+ ```
34
+
35
+ ```typescript
36
+ import { Agent, defineTool, mock } from 'agentfootprint';
37
+
38
+ const weather = defineTool({
39
+ name: 'weather',
40
+ description: 'Get current weather for a city.',
41
+ inputSchema: {
42
+ type: 'object',
43
+ properties: { city: { type: 'string' } },
44
+ required: ['city'],
45
+ },
46
+ execute: async ({ city }: { city: string }) => `${city}: 72°F, sunny`,
47
+ });
48
+
49
+ const agent = Agent.create({
50
+ provider: mock({ reply: 'Paris is 72°F and sunny.' }), // ← no API key
51
+ model: 'mock',
52
+ })
53
+ .system('You answer weather questions using the weather tool.')
54
+ .tool(weather)
55
+ .build();
56
+
57
+ const result = await agent.run({ message: 'Weather in Paris?' });
58
+ console.log(result); // → "Paris is 72°F and sunny."
59
+ ```
60
+
61
+ That runs offline, deterministically, in <100ms, with no API key. Swap `mock(...)` for `anthropic(...)` / `openai(...)` / `bedrock(...)` / `ollama(...)` for production. Nothing else changes.
62
+
63
+ ---
64
+
65
+ ## The model in your head
66
+
67
+ Every LLM call has three slots. **Every "agent feature" — Skill, Steering doc, Instruction, Fact, Memory replay, RAG chunk — is content flowing into one of them, under one of four triggers.**
68
+
69
+ ```
70
+ ┌─────────────────────────────────────┐
71
+ │ │
72
+ │ Your LLM call has 3 slots: │
73
+ │ │
74
+ │ system messages tools │
75
+ │ ▲ ▲ ▲ │
76
+ └───────┼──────────┼──────────┼───────┘
77
+ │ │ │
78
+ │ one │ one │
79
+ │ Injection│ Injection│
80
+ │ fires… │ fires… │
81
+ │ │ │
82
+ ┌──────────────┴────┐ ┌──┴───┐ ┌──┴────┐
83
+ │ defineSkill │ │ ... │ │ ... │
84
+ │ defineSteering │ │ │ │ │
85
+ │ defineInstruction │ │ │ │ │
86
+ │ defineFact │ │ │ │ │
87
+ │ defineMemory(read) │ │ │ │ │
88
+ │ defineRAG (v2.1) │ │ │ │ │
89
+ │ …your next idea │ │ │ │ │
90
+ └────────────────────┘ └──────┘ └───────┘
91
+
92
+ …under one of:
93
+ always · rule · on-tool-return · llm-activated
94
+ ```
95
+
96
+ **There's no fourth slot.** There won't be. Every named pattern in the agent literature — Reflexion, Tree-of-Thoughts, Skills, RAG, Constitutional AI — reduces to *which slot* + *which trigger*. You learn one model; the field's growth lands as new factories on the same primitive.
97
+
98
+ ---
99
+
100
+ ## What you can build
101
+
102
+ Three example shapes, all runnable end-to-end with `npm run example examples/<file>.ts`.
103
+
104
+ ### Customer support agent (with skills, memory, and audit trail)
105
+
106
+ A support agent that activates a "billing" skill when needed, remembers the customer across sessions, and produces an audit-grade trace.
107
+
108
+ ```typescript
109
+ const agent = Agent.create({ provider: anthropic(...), model: 'claude-sonnet-4-5-20250929' })
110
+ .system('You are a friendly support assistant.')
111
+ .skill(billingSkill) // LLM activates with read_skill('billing')
112
+ .steering(toneGuidelines) // always-on
113
+ .memory(conversationMemory) // remembers across .run() calls
114
+ .build();
115
+ ```
116
+
117
+ → [`examples/context-engineering/06-mixed-flavors.ts`](examples/context-engineering/06-mixed-flavors.ts)
118
+
119
+ ### Research pipeline (multi-agent, fan-out + merge)
120
+
121
+ Three perspectives explore in parallel; an LLM merges their findings.
122
+
123
+ ```typescript
124
+ const research = Parallel.create()
125
+ .branch(optimist).branch(skeptic).branch(historian)
126
+ .merge(synthesizer)
127
+ .build();
128
+
129
+ await research.run({ message: 'Should we adopt microservices?' });
130
+ ```
131
+
132
+ → [`examples/patterns/05-tot.ts`](examples/patterns/05-tot.ts) (Tree-of-Thoughts) · [`examples/patterns/01-self-consistency.ts`](examples/patterns/01-self-consistency.ts)
133
+
134
+ ### Streaming chat agent (token-by-token to a browser)
135
+
136
+ <!-- ┌────────────────────────────────────────────────────────────────┐
137
+ │ 📹 Streaming demo clip here. │
138
+ │ A short loop: user types → token-by-token streaming → │
139
+ │ tool call appears mid-stream → final answer. │
140
+ │ Demonstrates `provider.stream()` + SSE bridge. │
141
+ └────────────────────────────────────────────────────────────────┘ -->
142
+
143
+ ```typescript
144
+ agent.on('agentfootprint.stream.token', (e) => res.write(e.payload.content));
145
+ agent.on('agentfootprint.stream.tool_start', (e) => res.write(`\n→ calling ${e.payload.toolName}...\n`));
146
+ await agent.run({ message: userInput });
147
+ ```
148
+
149
+ → [`docs-site/guides/streaming/`](docs-site/src/content/docs/guides/streaming.mdx)
150
+
151
+ ---
152
+
153
+ ## The differentiator
154
+
155
+ Agent frameworks are a crowded shelf. Two things in here are not on the rest of that shelf.
156
+
157
+ ### 1. Causal memory — replay *why*, not just *what*
158
+
159
+ Other libraries' memory remembers what was said. **agentfootprint's `defineMemory({ type: CAUSAL })` remembers the decision evidence** — every `decide()` and `select()` value the agent's flowchart captured during the run. New questions cosine-match against past queries, inject the prior decision evidence, and the LLM answers from *exact past facts* — not reconstruction.
160
+
161
+ ```typescript
162
+ const causal = defineMemory({
163
+ id: 'causal',
164
+ type: MEMORY_TYPES.CAUSAL,
165
+ strategy: { kind: MEMORY_STRATEGIES.TOP_K, topK: 1, threshold: 0.7, embedder },
166
+ store,
167
+ projection: SNAPSHOT_PROJECTIONS.DECISIONS, // inject "why" only, not "what"
168
+ });
169
+
170
+ // Monday: agent decides loan #42 should be rejected (creditScore=580, threshold=600)
171
+ // Friday: user asks "Why was my application rejected?"
172
+ // → Causal memory loads the exact decision evidence from Monday.
173
+ // → LLM answers from the SOURCE, not from memory of memory.
174
+ ```
175
+
176
+ → [`examples/memory/06-causal-snapshot.ts`](examples/memory/06-causal-snapshot.ts) — runs end-to-end with mock embedder, ~50 lines.
177
+
178
+ The same snapshot data shape becomes RL/SFT/DPO training data in v2.3+. Every successful production run becomes a labeled trajectory.
179
+
180
+ ### 2. Trace export → trace replay
181
+
182
+ Every run exports as one JSON blob. Paste it into the viewer six months later, on a different machine. Every decision, every tool call, every memory write is on a draggable time-travel slider. **No log parsing. No reconstruction. The trace IS the evidence.**
183
+
184
+ ```typescript
185
+ const trace = exportTrace(agent); // serialize
186
+ fs.writeFileSync('incident-2026-04-29.json', trace); // archive
187
+
188
+ // Later — different team, different machine
189
+ <TraceViewer trace={JSON.parse(fs.readFileSync('incident-2026-04-29.json'))} />
190
+ ```
191
+
192
+ That round-trip is the difference between "we shipped an agent and hope it's working" and "we can audit any production decision after the fact."
193
+
194
+ ---
195
+
196
+ ## Mocks first, prod second
197
+
198
+ Generative AI development is expensive when every iteration hits a paid API. agentfootprint is designed so you build the entire app — agent, context engineering, memory, RAG — against in-memory mocks, prove the logic end-to-end with **zero API cost**, then swap real infrastructure in one boundary at a time.
199
+
200
+ | Boundary | Dev (mock) | Prod (swap one line) |
201
+ |---|---|---|
202
+ | LLM provider | `mock({ reply })` | `anthropic()` · `openai()` · `bedrock()` · `ollama()` |
203
+ | Embedder | `mockEmbedder()` | OpenAI / Cohere / Bedrock embedder |
204
+ | Memory store | `InMemoryStore` | Redis · DynamoDB · Postgres · Pinecone |
205
+ | MCP server | `mcpClient({ _client })` | `mcpClient({ transport })` |
206
+ | Tool execution | inline closure | real implementation |
207
+
208
+ The flowchart, recorders, narrative, and tests don't change between dev and prod. **Ship the patterns first; pay for tokens last.**
209
+
210
+ ---
211
+
212
+ ## Pick your starting door
213
+
214
+ | If you are... | Start here |
215
+ |---|---|
216
+ | 🎓 **New to agents** — never built one before | [5-minute Quick Start](https://footprintjs.github.io/agentfootprint/getting-started/quick-start/) → first agent runs offline |
217
+ | 🛠️ **A LangChain / CrewAI / LangGraph user** | [Migration sketch](https://footprintjs.github.io/agentfootprint/getting-started/vs/) — same patterns, fewer classes |
218
+ | 🏗️ **Architecting an enterprise rollout** | [Production guide](https://footprintjs.github.io/agentfootprint/guides/deployment/) — multi-tenant identity, audit trails, redaction, OTel |
219
+ | 🔬 **Researcher / extending the framework** | [Extension guide](https://footprintjs.github.io/agentfootprint/contributing/extension-guide/) — add a new flavor in 50 lines |
220
+
221
+ Every code snippet on the docs site is a real, runnable file in [`examples/`](examples/) — every example is also an end-to-end test in CI. There is no docs-only code in this repo.
222
+
223
+ ---
224
+
225
+ ## What ships today (v2.0)
226
+
227
+ - **2 primitives** — `LLMCall`, `Agent` (the ReAct loop)
228
+ - **4 compositions** — `Sequence`, `Parallel`, `Conditional`, `Loop`
229
+ - **6 LLM providers** — Anthropic · OpenAI · Bedrock · Ollama · Browser-Anthropic · Browser-OpenAI · Mock
230
+ - **One Injection primitive** — `defineSkill` / `defineSteering` / `defineInstruction` / `defineFact` (one engine, four typed factories, all reduce to `{ trigger, slot }`)
231
+ - **One Memory factory** — `defineMemory({ type, strategy, store })` — 4 types × 7 strategies including **Causal**
232
+ - **47 typed observability events** across 13 domains — context · stream · agent · cost · skill · permission · eval · memory · …
233
+ - **Pause / resume** — JSON-serializable checkpoints; pause via `askHuman`/`pauseHere`, resume hours later on a different server
234
+ - **Resilience** — `withRetry`, `withFallback`, `resilientProvider`
235
+ - **AI-coding-tool support** — bundled instructions for Claude Code · Cursor · Windsurf · Cline · Kiro · Copilot
236
+ - **33 runnable examples** organized by DNA layer (core · core-flow · patterns · context-engineering · memory · features)
237
+
238
+ ## What's next (clearly marked roadmap)
239
+
240
+ | Release | Focus |
241
+ |---|---|
242
+ | v2.1 ✓ | RAG flavor (`defineRAG`) — *shipped* |
243
+ | v2.2 | MCP integration (`mcpClient`) ✓ · Redis adapter · CircuitBreaker · 3-tier output fallback |
244
+ | v2.3 | Governance (`Policy`, `BudgetTracker`) · DynamoDB / Postgres / Pinecone adapters |
245
+ | v2.4 | Causal training-data exports — `causalMemory.exportForTraining({ format: 'sft' \| 'dpo' \| 'process' })` |
246
+ | v2.5+ | Deep Agents (planning-before-execution) · A2A protocol · Lens UI deep-link |
247
+
248
+ Roadmap items are *not* claims about the current API. If a feature isn't in `npm install agentfootprint` today, it's listed here, not in the documentation.
249
+
250
+ ---
251
+
252
+ ## Built on
253
+
254
+ [footprintjs](https://github.com/footprintjs/footPrint) — the flowchart pattern for backend code. The decision-evidence capture, narrative recording, and time-travel checkpointing this library uses are footprintjs primitives. You don't need to learn footprintjs to use agentfootprint, but if you want to build your own primitives at this depth, [start there](https://footprintjs.github.io/footPrint/).
255
+
256
+ ## License
257
+
258
+ [MIT](./LICENSE) © [Sanjay Krishna Anbalagan](https://github.com/sanjay1909)
@@ -68,7 +68,21 @@ import {
68
68
  (`agentfootprint/instructions`, `agentfootprint/observe`,
69
69
  `agentfootprint/security`, `agentfootprint/explain` — these are v1).
70
70
 
71
- ## Hello agent
71
+ ## Mock-first development (RECOMMENDED)
72
+
73
+ Build the entire agent + context engineering + tools + memory + RAG + MCP with in-memory mocks first. Validate logic and patterns end-to-end with $0 API cost. Swap real infrastructure in one boundary at a time after the flow is right.
74
+
75
+ | Mock | Production swap |
76
+ |---|---|
77
+ | `mock({ reply })` · `mock({ replies })` for scripted multi-turn | `anthropic()` / `openai()` / `bedrock()` / `ollama()` |
78
+ | `InMemoryStore` | `RedisStore` (`agentfootprint/memory-redis`) · `AgentCoreStore` (`agentfootprint/memory-agentcore`) · Dynamo · Postgres · Pinecone (planned) |
79
+ | `mockEmbedder()` | OpenAI / Cohere / Bedrock embedder factory |
80
+ | `mockMcpClient({ tools })` — in-memory, no SDK | `mcpClient({ transport })` real server |
81
+ | inline `defineTool({ execute: async () => '...' })` | real implementation |
82
+
83
+ When generating starter code, default to the mock surface unless the user explicitly says they have a key / endpoint / store ready.
84
+
85
+ ## Hello agent — mock-first
72
86
 
73
87
  ```typescript
74
88
  const weather = defineTool({
@@ -81,12 +95,12 @@ const weather = defineTool({
81
95
  required: ['city'],
82
96
  },
83
97
  },
84
- execute: async (args) => `${(args as { city: string }).city}: 72°F`,
98
+ execute: async (args) => `${(args as { city: string }).city}: 72°F`, // mock data
85
99
  });
86
100
 
87
101
  const agent = Agent.create({
88
- provider: anthropic({ apiKey: process.env.ANTHROPIC_API_KEY! }),
89
- model: 'claude-sonnet-4-5-20250929',
102
+ provider: mock({ reply: 'San Francisco: 72°F, sunny.' }), // ← no API key
103
+ model: 'mock',
90
104
  maxIterations: 10,
91
105
  })
92
106
  .system('You are a helpful weather assistant.')
@@ -96,7 +110,12 @@ const agent = Agent.create({
96
110
  const result = await agent.run({ message: 'Weather in SF?' });
97
111
  ```
98
112
 
99
- For testing, swap `anthropic({...})` for `mock({ reply: 'sunny' })` same agent, $0.
113
+ When the logic is right, swap to a real providerone line:
114
+
115
+ ```typescript
116
+ provider: anthropic({ apiKey: process.env.ANTHROPIC_API_KEY! }),
117
+ model: 'claude-sonnet-4-5-20250929',
118
+ ```
100
119
 
101
120
  ## Context engineering
102
121
 
@@ -210,6 +229,38 @@ The 7 **strategies**:
210
229
  - `TOP_K` (score-threshold) · `EXTRACT` (LLM distills on write)
211
230
  - `DECAY` (recency-weighted, planned) · `HYBRID` (compose multiple)
212
231
 
232
+ ## MCP — `mcpClient` (connect to external MCP servers)
233
+
234
+ ```typescript
235
+ import { Agent, mcpClient } from 'agentfootprint';
236
+
237
+ const slack = await mcpClient({
238
+ name: 'slack',
239
+ transport: { transport: 'stdio', command: 'npx', args: ['@example/slack-mcp'] },
240
+ });
241
+
242
+ const agent = Agent.create({ provider })
243
+ .tools(await slack.tools()) // pull ALL tools from server in one call
244
+ .build();
245
+
246
+ await agent.run({ message: '...' });
247
+ await slack.close();
248
+ ```
249
+
250
+ Transports:
251
+ - `{ transport: 'stdio', command, args, env?, cwd? }` — local subprocess
252
+ - `{ transport: 'http', url, headers? }` — remote Streamable HTTP
253
+
254
+ The `@modelcontextprotocol/sdk` peer-dep is **lazy-required** — zero
255
+ runtime cost when MCP isn't used. Friendly install hint if missing.
256
+
257
+ `agent.tools(arr)` is the bulk-register companion to `agent.tool(t)`.
258
+ Tool-name uniqueness is validated at `.build()` across MCP servers +
259
+ manual `.tool()` calls — duplicates throw early.
260
+
261
+ Server-side support (exposing your agent as an MCP tool to other LLMs)
262
+ is a separate concern, not yet shipped.
263
+
213
264
  ## RAG — `defineRAG` + `indexDocuments`
214
265
 
215
266
  ```typescript