groundswell 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/system_prompts/task-breakdown.md +100 -0
  3. package/PRPs/001-hierarchical-workflow-engine.md +2438 -0
  4. package/PRPs/PRDs/001-hierarchical-workflow-engine.md +543 -0
  5. package/PRPs/PRDs/002-agent-prompt.md +390 -0
  6. package/PRPs/PRDs/003-agent-prompt.md +943 -0
  7. package/PRPs/PRDs/004-agent-prompt.md +1136 -0
  8. package/PRPs/PRDs/tasks-001.json +492 -0
  9. package/PRPs/README.md +83 -0
  10. package/PRPs/templates/prp_base.md +222 -0
  11. package/README.md +218 -0
  12. package/docs/agent.md +422 -0
  13. package/docs/prompt.md +419 -0
  14. package/docs/workflow.md +600 -0
  15. package/examples/README.md +244 -0
  16. package/examples/examples/01-basic-workflow.ts +100 -0
  17. package/examples/examples/02-decorator-options.ts +217 -0
  18. package/examples/examples/03-parent-child.ts +241 -0
  19. package/examples/examples/04-observers-debugger.ts +340 -0
  20. package/examples/examples/05-error-handling.ts +387 -0
  21. package/examples/examples/06-concurrent-tasks.ts +352 -0
  22. package/examples/examples/07-agent-loops.ts +432 -0
  23. package/examples/examples/08-sdk-features.ts +667 -0
  24. package/examples/examples/09-reflection.ts +573 -0
  25. package/examples/examples/10-introspection.ts +550 -0
  26. package/examples/index.ts +143 -0
  27. package/examples/utils/helpers.ts +57 -0
  28. package/llms_full.txt +5890 -0
  29. package/package.json +63 -0
  30. package/plan/P1P2/PRP.md +527 -0
  31. package/plan/P1P2/research/LRU_CACHE_BEST_PRACTICES.md +1929 -0
  32. package/plan/P1P2/research/LRU_CACHE_CODE_PATTERNS.md +857 -0
  33. package/plan/P1P2/research/LRU_CACHE_INTEGRATION_GUIDE.md +738 -0
  34. package/plan/P1P2/research/LRU_CACHE_RESEARCH_INDEX.md +424 -0
  35. package/plan/P1P2/research/REFLECTION_INDEX.md +291 -0
  36. package/plan/P1P2/research/REFLECTION_RESEARCH_REPORT.md +1342 -0
  37. package/plan/P1P2/research/RESEARCH_SUMMARY.md +342 -0
  38. package/plan/P1P2/research/anthropic-sdk.md +174 -0
  39. package/plan/P1P2/research/async-local-storage.md +200 -0
  40. package/plan/P1P2/research/reflection-code-patterns.md +1205 -0
  41. package/plan/P1P2/research/reflection-decision-matrix.md +421 -0
  42. package/plan/P1P2/research/reflection-implementation-guide.md +1341 -0
  43. package/plan/P1P2/research/reflection-integration-guide.md +834 -0
  44. package/plan/P1P2/research/reflection-patterns.md +1468 -0
  45. package/plan/P1P2/research/reflection-quick-reference.md +558 -0
  46. package/plan/P1P2/research/zod-schema.md +152 -0
  47. package/plan/P3P4/PRP.md +1388 -0
  48. package/plan/P3P4/research/caching-lru.md +116 -0
  49. package/plan/P3P4/research/introspection-tools.md +177 -0
  50. package/plan/P3P4/research/reflection-patterns.md +117 -0
  51. package/plan/P4P5/PRP.md +1136 -0
  52. package/plan/P4P5/research/RESEARCH_SUMMARY.md +151 -0
  53. package/plan/architecture/external_deps.md +358 -0
  54. package/plan/architecture/system_context.md +242 -0
  55. package/plan/backlog.json +867 -0
  56. package/plan/research/INTROSPECTION_RESEARCH_SUMMARY.md +378 -0
  57. package/plan/research/README-INTROSPECTION.md +352 -0
  58. package/plan/research/agent-introspection-patterns.md +1085 -0
  59. package/plan/research/introspection-security-guide.md +928 -0
  60. package/plan/research/introspection-tool-examples.md +875 -0
  61. package/scripts/generate-llms-full.ts +206 -0
  62. package/src/__tests__/integration/agent-workflow.test.ts +256 -0
  63. package/src/__tests__/integration/tree-mirroring.test.ts +114 -0
  64. package/src/__tests__/unit/agent.test.ts +169 -0
  65. package/src/__tests__/unit/cache-key.test.ts +182 -0
  66. package/src/__tests__/unit/cache.test.ts +172 -0
  67. package/src/__tests__/unit/context.test.ts +138 -0
  68. package/src/__tests__/unit/decorators.test.ts +100 -0
  69. package/src/__tests__/unit/introspection-tools.test.ts +277 -0
  70. package/src/__tests__/unit/prompt.test.ts +135 -0
  71. package/src/__tests__/unit/reflection.test.ts +210 -0
  72. package/src/__tests__/unit/tree-debugger.test.ts +85 -0
  73. package/src/__tests__/unit/workflow.test.ts +81 -0
  74. package/src/cache/cache-key.ts +244 -0
  75. package/src/cache/cache.ts +236 -0
  76. package/src/cache/index.ts +8 -0
  77. package/src/core/agent.ts +573 -0
  78. package/src/core/context.ts +119 -0
  79. package/src/core/event-tree.ts +260 -0
  80. package/src/core/factory.ts +123 -0
  81. package/src/core/index.ts +17 -0
  82. package/src/core/logger.ts +87 -0
  83. package/src/core/mcp-handler.ts +184 -0
  84. package/src/core/prompt.ts +150 -0
  85. package/src/core/workflow-context.ts +349 -0
  86. package/src/core/workflow.ts +302 -0
  87. package/src/debugger/index.ts +1 -0
  88. package/src/debugger/tree-debugger.ts +210 -0
  89. package/src/decorators/index.ts +3 -0
  90. package/src/decorators/observed-state.ts +95 -0
  91. package/src/decorators/step.ts +139 -0
  92. package/src/decorators/task.ts +96 -0
  93. package/src/examples/index.ts +2 -0
  94. package/src/examples/tdd-orchestrator.ts +65 -0
  95. package/src/examples/test-cycle-workflow.ts +64 -0
  96. package/src/index.ts +140 -0
  97. package/src/reflection/index.ts +5 -0
  98. package/src/reflection/reflection.ts +407 -0
  99. package/src/tools/index.ts +36 -0
  100. package/src/tools/introspection.ts +464 -0
  101. package/src/types/agent.ts +90 -0
  102. package/src/types/decorators.ts +25 -0
  103. package/src/types/error-strategy.ts +13 -0
  104. package/src/types/error.ts +20 -0
  105. package/src/types/events.ts +74 -0
  106. package/src/types/index.ts +55 -0
  107. package/src/types/logging.ts +24 -0
  108. package/src/types/observer.ts +18 -0
  109. package/src/types/prompt.ts +40 -0
  110. package/src/types/reflection.ts +117 -0
  111. package/src/types/sdk-primitives.ts +128 -0
  112. package/src/types/snapshot.ts +14 -0
  113. package/src/types/workflow-context.ts +163 -0
  114. package/src/types/workflow.ts +37 -0
  115. package/src/utils/id.ts +11 -0
  116. package/src/utils/index.ts +3 -0
  117. package/src/utils/observable.ts +77 -0
  118. package/tasks.json +0 -0
  119. package/tsconfig.json +22 -0
  120. package/vitest.config.ts +16 -0
@@ -0,0 +1,352 @@
1
+ # Agent Introspection and Self-Awareness Research
2
+
3
+ Complete research package on implementing agent introspection capabilities in the Groundswell workflow orchestration framework.
4
+
5
+ ---
6
+
7
+ ## Quick Navigation
8
+
9
+ ### For Everyone: Start Here
10
+ - **[INTROSPECTION_RESEARCH_SUMMARY.md](./INTROSPECTION_RESEARCH_SUMMARY.md)** - Overview, key findings, and implementation roadmap (5 min read)
11
+
12
+ ### For Framework Developers
13
+ - **[agent-introspection-patterns.md](./agent-introspection-patterns.md)** - Complete patterns, architecture, integration with Groundswell (30 min read)
14
+ - **[introspection-tool-examples.md](./introspection-tool-examples.md)** - Ready-to-implement tool definitions with examples (20 min read)
15
+
16
+ ### For Security and Operations Teams
17
+ - **[introspection-security-guide.md](./introspection-security-guide.md)** - Threat models, mitigations, implementation checklist (25 min read)
18
+
19
+ ---
20
+
21
+ ## Research Scope
22
+
23
+ This research covers how agents in an AI orchestration framework can introspect and gain awareness of:
24
+
25
+ ### What Agents Can Inspect
26
+ 1. **Their Position in Workflow Hierarchy**
27
+ - Parent, ancestors, siblings relationships
28
+ - Depth in tree
29
+ - Execution status
30
+
31
+ 2. **Ancestor Workflow Outputs**
32
+ - Results from parent and prior workflows
33
+ - Structured execution data
34
+ - Performance metrics
35
+
36
+ 3. **Cache Status**
37
+ - Which results are cached
38
+ - Cache freshness
39
+ - Cache hit rates
40
+
41
+ 4. **Execution History**
42
+ - Events that occurred
43
+ - Errors and warnings
44
+ - Tool invocations
45
+
46
+ 5. **State Snapshots**
47
+ - Internal state at decision points
48
+ - Captured via @ObservedState decorators
49
+ - State evolution over time
50
+
51
+ ### What Agents Can Do (Self-Modification)
52
+ 1. **Spawn Child Workflows**
53
+ - Create parallel or sequential children
54
+ - Via pre-approved templates only
55
+ - With resource limits and approval
56
+
57
+ 2. **Generate Dynamic Prompts**
58
+ - Create context-aware prompts for children
59
+ - Based on current analysis
60
+ - With safety validation
61
+
62
+ ### What Agents CANNOT Do
63
+ - Modify their own code or prompts
64
+ - Modify parent or sibling state
65
+ - Create arbitrary workflows (only templates)
66
+ - Access secrets in state
67
+ - Query unbounded results
68
+ - Modify or delete execution history
69
+
70
+ ---
71
+
72
+ ## Key Findings Summary
73
+
74
+ ### Seven Core Introspection Tools
75
+
76
+ All tools follow Anthropic's JSON Schema tool format:
77
+
78
+ | Tool Name | Purpose | Risk Level |
79
+ |-----------|---------|-----------|
80
+ | `workflow_inspect_hierarchy` | Discover position in tree | LOW |
81
+ | `workflow_read_ancestor_outputs` | Access parent results | MEDIUM |
82
+ | `workflow_inspect_cache` | Check cache status | LOW |
83
+ | `workflow_read_event_history` | Review what happened | LOW |
84
+ | `workflow_inspect_state_snapshot` | View internal state | MEDIUM |
85
+ | `workflow_spawn_child` | Create child workflows | HIGH |
86
+ | `workflow_generate_dynamic_prompt` | Create prompts | HIGH |
87
+
88
+ ### Security Principles
89
+
90
+ 1. **Read-Only First**: Introspection tools never modify state
91
+ 2. **Explicit Filters**: Agents explicitly request data they need
92
+ 3. **Hard Limits**: All queries have maximum bounds (depth, items, size, time)
93
+ 4. **Output Validation**: Ancestor outputs treated as untrusted input
94
+ 5. **Template-Based**: Spawning requires pre-approved templates
95
+ 6. **Audit Everything**: All introspection queries are logged
96
+ 7. **Tenant Isolation**: Agents only see their own workflow tree
97
+
98
+ ### Threat Mitigation
99
+
100
+ | Threat | Mitigation |
101
+ |--------|-----------|
102
+ | Secret Exfiltration | Redaction patterns, secret filtering |
103
+ | Prompt Injection | Output validation, injection detection |
104
+ | Privilege Escalation | Template-based spawning, depth limits |
105
+ | Denial of Service | Query limits (depth, items, size, time, rate) |
106
+
107
+ ---
108
+
109
+ ## Implementation Phases
110
+
111
+ ### Phase 1: Introspection (Weeks 1-2)
112
+ Low-risk inspection tools without spawning
113
+ - `workflow_inspect_hierarchy`
114
+ - `workflow_read_ancestor_outputs`
115
+ - `workflow_inspect_cache`
116
+ - `workflow_read_event_history`
117
+ - `workflow_inspect_state_snapshot`
118
+
119
+ ### Phase 2: Security (Weeks 3-4)
120
+ Add protections and validation
121
+ - Input schema validation
122
+ - Output sanitization
123
+ - Secret filtering
124
+ - Audit logging
125
+
126
+ ### Phase 3: Self-Modification (Weeks 5-6)
127
+ Add controlled spawning
128
+ - `workflow_spawn_child` (template-based)
129
+ - `workflow_generate_dynamic_prompt` (validated)
130
+ - Resource enforcement
131
+ - Privilege checks
132
+
133
+ ### Phase 4: Operations (Weeks 7-8)
134
+ Production readiness
135
+ - Monitoring and alerting
136
+ - Documentation
137
+ - Testing (unit, integration, penetration)
138
+
139
+ ---
140
+
141
+ ## File Organization
142
+
143
+ ```
144
+ plan/research/
145
+ ├── README-INTROSPECTION.md ← You are here
146
+ ├── INTROSPECTION_RESEARCH_SUMMARY.md ← Executive summary
147
+ ├── agent-introspection-patterns.md ← Main technical reference
148
+ ├── introspection-tool-examples.md ← Code examples
149
+ └── introspection-security-guide.md ← Security deep-dive
150
+ ```
151
+
152
+ ---
153
+
154
+ ## Groundswell Integration Points
155
+
156
+ ### Existing Architecture
157
+
158
+ The Groundswell codebase already has everything needed:
159
+
160
+ **EventTreeHandle** (src/core/event-tree.ts)
161
+ - Already implements `getAncestors()` and `getChildren()`
162
+ - Can be extended for introspection
163
+
164
+ **WorkflowContext** (src/core/workflow-context.ts)
165
+ - Already maintains connection to root workflow
166
+ - Can provide execution context
167
+
168
+ **WorkflowNode** (src/types/workflow.ts)
169
+ - Already tracks hierarchy
170
+ - Can expose hierarchy info to agents
171
+
172
+ **Agent** (src/core/agent.ts)
173
+ - Already supports tool invocation
174
+ - Can register introspection tools
175
+
176
+ ### New Components Needed
177
+
178
+ ```typescript
179
+ // src/core/introspection-service.ts
180
+ class WorkflowIntrospectionService {
181
+ inspectHierarchy(nodeId?: string): HierarchyInfo
182
+ readAncestorOutputs(ancestorId?: string): object
183
+ inspectCache(nodeId?: string): object
184
+ readEventHistory(...): object
185
+ inspectStateSnapshot(...): object
186
+ }
187
+
188
+ // src/core/introspection-tools.ts
189
+ function createIntrospectionTools(
190
+ introspectionService: WorkflowIntrospectionService
191
+ ): Tool[]
192
+
193
+ // src/types/introspection.ts
194
+ interface HierarchyInfo { ... }
195
+ interface IntrospectionLimits { ... }
196
+ interface StateAccessPolicy { ... }
197
+ ```
198
+
199
+ ---
200
+
201
+ ## Starting Points for Different Roles
202
+
203
+ ### Framework Developer
204
+ 1. Read INTROSPECTION_RESEARCH_SUMMARY.md (10 min)
205
+ 2. Read agent-introspection-patterns.md (30 min)
206
+ 3. Review introspection-tool-examples.md (20 min)
207
+ 4. Look at "Implementation Patterns for Groundswell" section
208
+ 5. Create src/core/introspection-service.ts based on examples
209
+
210
+ ### Security Engineer
211
+ 1. Read INTROSPECTION_RESEARCH_SUMMARY.md (10 min)
212
+ 2. Review introspection-security-guide.md (25 min)
213
+ 3. Work through threat models and mitigations
214
+ 4. Create implementation checklist
215
+ 5. Plan penetration testing
216
+
217
+ ### DevOps / Operations
218
+ 1. Read INTROSPECTION_RESEARCH_SUMMARY.md (10 min)
219
+ 2. Review "Operational Recommendations" in security-guide.md
220
+ 3. Plan monitoring setup
221
+ 4. Create incident response procedures
222
+ 5. Set up audit logging
223
+
224
+ ### Agent Developer / User
225
+ 1. Read INTROSPECTION_RESEARCH_SUMMARY.md (10 min)
226
+ 2. Review introspection-tool-examples.md (20 min)
227
+ 3. Study "Integration Patterns" section
228
+ 4. Review security checklist
229
+ 5. Test with provided examples
230
+
231
+ ---
232
+
233
+ ## Key Concepts Explained
234
+
235
+ ### Hierarchy Introspection
236
+
237
+ Agents live in a tree of workflows:
238
+
239
+ ```
240
+ Root Workflow
241
+ ├── Child 1 (Agent working here)
242
+ │ ├── Grandchild 1
243
+ │ ├── Grandchild 2 (Agent working here)
244
+ │ └── Grandchild 3
245
+ └── Child 2
246
+ └── Grandchild 4
247
+ ```
248
+
249
+ Via `workflow_inspect_hierarchy`, Agent can learn:
250
+ - "I'm Grandchild 2"
251
+ - "My parent is Child 1"
252
+ - "My siblings are Grandchild 1 and 3"
253
+ - "My ancestors are Child 1 and Root Workflow"
254
+ - "I'm 3 levels deep"
255
+
256
+ ### Output Introspection
257
+
258
+ Each workflow produces outputs. Via `workflow_read_ancestor_outputs`:
259
+ - "What did my parent output?"
260
+ - "What did my grandparent output?"
261
+ - "Here's the full chain: Root → Child 1 → Me"
262
+
263
+ Agents use this to understand what prior workflows accomplished.
264
+
265
+ ### State Snapshots
266
+
267
+ Workflows can declare state with `@ObservedState`:
268
+ ```typescript
269
+ class MyWorkflow {
270
+ @ObservedState() errorCount = 0;
271
+ @ObservedState() successRate = 1.0;
272
+ }
273
+ ```
274
+
275
+ Via `workflow_inspect_state_snapshot`:
276
+ - "What was the error count when I started?"
277
+ - "How did the success rate change over time?"
278
+ - "What was the state at each decision point?"
279
+
280
+ ### Self-Modification
281
+
282
+ Agents can spawn children, but only:
283
+ - Using pre-approved templates
284
+ - With resource limits
285
+ - With parent approval (checked at runtime)
286
+ - With depth-based capability degradation
287
+
288
+ Example:
289
+ ```
290
+ Root (can spawn any template, max 5 children)
291
+ └── Processor (can spawn data_validation template, max 10 children)
292
+ ├── Validator 1 (cannot spawn any children)
293
+ ├── Validator 2 (cannot spawn any children)
294
+ └── Validator 3 (cannot spawn any children)
295
+ ```
296
+
297
+ ---
298
+
299
+ ## Security Assumptions
300
+
301
+ This research assumes:
302
+
303
+ 1. **Anthropic API is secure** (latest Claude models, official SDK)
304
+ 2. **Sandboxed execution** available (container-based isolation)
305
+ 3. **Audit logging** implemented (all queries logged)
306
+ 4. **Network isolation** available (tool execution restricted)
307
+ 5. **Secret management** in place (no secrets in code)
308
+
309
+ If any assumption is violated, refer to security guide for alternatives.
310
+
311
+ ---
312
+
313
+ ## References and Sources
314
+
315
+ All research is based on:
316
+
317
+ - **Anthropic Research**: Introspection in LLMs, Tool Use, Agent SDK
318
+ - **MCP Protocol**: Model Context Protocol tool specifications
319
+ - **Security Research**: AgentArmor, design patterns from Stanford/Berkeley
320
+ - **Cloud Providers**: AWS Bedrock, Google ADK, Azure AI Agent Service
321
+ - **Frameworks**: LangGraph, CrewAI, LangChain patterns
322
+
323
+ See each document for full citations.
324
+
325
+ ---
326
+
327
+ ## Questions?
328
+
329
+ 1. **General questions?** See INTROSPECTION_RESEARCH_SUMMARY.md FAQ section
330
+ 2. **Technical questions?** See agent-introspection-patterns.md
331
+ 3. **Security questions?** See introspection-security-guide.md
332
+ 4. **Implementation questions?** See introspection-tool-examples.md
333
+
334
+ ---
335
+
336
+ ## Contributing to This Research
337
+
338
+ If you discover:
339
+ - Issues with recommendations
340
+ - New threat vectors
341
+ - Better security patterns
342
+ - Optimization opportunities
343
+
344
+ Please file a GitHub issue with details and we'll update the research.
345
+
346
+ ---
347
+
348
+ **Status**: COMPLETE - Ready for implementation
349
+ **Confidence**: HIGH - Based on production research and patterns
350
+ **Applicable To**: All workflow orchestration frameworks (especially Groundswell)
351
+ **Last Updated**: December 8, 2025
352
+