groundswell 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/system_prompts/task-breakdown.md +100 -0
  3. package/PRPs/001-hierarchical-workflow-engine.md +2438 -0
  4. package/PRPs/PRDs/001-hierarchical-workflow-engine.md +543 -0
  5. package/PRPs/PRDs/002-agent-prompt.md +390 -0
  6. package/PRPs/PRDs/003-agent-prompt.md +943 -0
  7. package/PRPs/PRDs/004-agent-prompt.md +1136 -0
  8. package/PRPs/PRDs/tasks-001.json +492 -0
  9. package/PRPs/README.md +83 -0
  10. package/PRPs/templates/prp_base.md +222 -0
  11. package/README.md +218 -0
  12. package/docs/agent.md +422 -0
  13. package/docs/prompt.md +419 -0
  14. package/docs/workflow.md +600 -0
  15. package/examples/README.md +244 -0
  16. package/examples/examples/01-basic-workflow.ts +100 -0
  17. package/examples/examples/02-decorator-options.ts +217 -0
  18. package/examples/examples/03-parent-child.ts +241 -0
  19. package/examples/examples/04-observers-debugger.ts +340 -0
  20. package/examples/examples/05-error-handling.ts +387 -0
  21. package/examples/examples/06-concurrent-tasks.ts +352 -0
  22. package/examples/examples/07-agent-loops.ts +432 -0
  23. package/examples/examples/08-sdk-features.ts +667 -0
  24. package/examples/examples/09-reflection.ts +573 -0
  25. package/examples/examples/10-introspection.ts +550 -0
  26. package/examples/index.ts +143 -0
  27. package/examples/utils/helpers.ts +57 -0
  28. package/llms_full.txt +5890 -0
  29. package/package.json +63 -0
  30. package/plan/P1P2/PRP.md +527 -0
  31. package/plan/P1P2/research/LRU_CACHE_BEST_PRACTICES.md +1929 -0
  32. package/plan/P1P2/research/LRU_CACHE_CODE_PATTERNS.md +857 -0
  33. package/plan/P1P2/research/LRU_CACHE_INTEGRATION_GUIDE.md +738 -0
  34. package/plan/P1P2/research/LRU_CACHE_RESEARCH_INDEX.md +424 -0
  35. package/plan/P1P2/research/REFLECTION_INDEX.md +291 -0
  36. package/plan/P1P2/research/REFLECTION_RESEARCH_REPORT.md +1342 -0
  37. package/plan/P1P2/research/RESEARCH_SUMMARY.md +342 -0
  38. package/plan/P1P2/research/anthropic-sdk.md +174 -0
  39. package/plan/P1P2/research/async-local-storage.md +200 -0
  40. package/plan/P1P2/research/reflection-code-patterns.md +1205 -0
  41. package/plan/P1P2/research/reflection-decision-matrix.md +421 -0
  42. package/plan/P1P2/research/reflection-implementation-guide.md +1341 -0
  43. package/plan/P1P2/research/reflection-integration-guide.md +834 -0
  44. package/plan/P1P2/research/reflection-patterns.md +1468 -0
  45. package/plan/P1P2/research/reflection-quick-reference.md +558 -0
  46. package/plan/P1P2/research/zod-schema.md +152 -0
  47. package/plan/P3P4/PRP.md +1388 -0
  48. package/plan/P3P4/research/caching-lru.md +116 -0
  49. package/plan/P3P4/research/introspection-tools.md +177 -0
  50. package/plan/P3P4/research/reflection-patterns.md +117 -0
  51. package/plan/P4P5/PRP.md +1136 -0
  52. package/plan/P4P5/research/RESEARCH_SUMMARY.md +151 -0
  53. package/plan/architecture/external_deps.md +358 -0
  54. package/plan/architecture/system_context.md +242 -0
  55. package/plan/backlog.json +867 -0
  56. package/plan/research/INTROSPECTION_RESEARCH_SUMMARY.md +378 -0
  57. package/plan/research/README-INTROSPECTION.md +352 -0
  58. package/plan/research/agent-introspection-patterns.md +1085 -0
  59. package/plan/research/introspection-security-guide.md +928 -0
  60. package/plan/research/introspection-tool-examples.md +875 -0
  61. package/scripts/generate-llms-full.ts +206 -0
  62. package/src/__tests__/integration/agent-workflow.test.ts +256 -0
  63. package/src/__tests__/integration/tree-mirroring.test.ts +114 -0
  64. package/src/__tests__/unit/agent.test.ts +169 -0
  65. package/src/__tests__/unit/cache-key.test.ts +182 -0
  66. package/src/__tests__/unit/cache.test.ts +172 -0
  67. package/src/__tests__/unit/context.test.ts +138 -0
  68. package/src/__tests__/unit/decorators.test.ts +100 -0
  69. package/src/__tests__/unit/introspection-tools.test.ts +277 -0
  70. package/src/__tests__/unit/prompt.test.ts +135 -0
  71. package/src/__tests__/unit/reflection.test.ts +210 -0
  72. package/src/__tests__/unit/tree-debugger.test.ts +85 -0
  73. package/src/__tests__/unit/workflow.test.ts +81 -0
  74. package/src/cache/cache-key.ts +244 -0
  75. package/src/cache/cache.ts +236 -0
  76. package/src/cache/index.ts +8 -0
  77. package/src/core/agent.ts +573 -0
  78. package/src/core/context.ts +119 -0
  79. package/src/core/event-tree.ts +260 -0
  80. package/src/core/factory.ts +123 -0
  81. package/src/core/index.ts +17 -0
  82. package/src/core/logger.ts +87 -0
  83. package/src/core/mcp-handler.ts +184 -0
  84. package/src/core/prompt.ts +150 -0
  85. package/src/core/workflow-context.ts +349 -0
  86. package/src/core/workflow.ts +302 -0
  87. package/src/debugger/index.ts +1 -0
  88. package/src/debugger/tree-debugger.ts +210 -0
  89. package/src/decorators/index.ts +3 -0
  90. package/src/decorators/observed-state.ts +95 -0
  91. package/src/decorators/step.ts +139 -0
  92. package/src/decorators/task.ts +96 -0
  93. package/src/examples/index.ts +2 -0
  94. package/src/examples/tdd-orchestrator.ts +65 -0
  95. package/src/examples/test-cycle-workflow.ts +64 -0
  96. package/src/index.ts +140 -0
  97. package/src/reflection/index.ts +5 -0
  98. package/src/reflection/reflection.ts +407 -0
  99. package/src/tools/index.ts +36 -0
  100. package/src/tools/introspection.ts +464 -0
  101. package/src/types/agent.ts +90 -0
  102. package/src/types/decorators.ts +25 -0
  103. package/src/types/error-strategy.ts +13 -0
  104. package/src/types/error.ts +20 -0
  105. package/src/types/events.ts +74 -0
  106. package/src/types/index.ts +55 -0
  107. package/src/types/logging.ts +24 -0
  108. package/src/types/observer.ts +18 -0
  109. package/src/types/prompt.ts +40 -0
  110. package/src/types/reflection.ts +117 -0
  111. package/src/types/sdk-primitives.ts +128 -0
  112. package/src/types/snapshot.ts +14 -0
  113. package/src/types/workflow-context.ts +163 -0
  114. package/src/types/workflow.ts +37 -0
  115. package/src/utils/id.ts +11 -0
  116. package/src/utils/index.ts +3 -0
  117. package/src/utils/observable.ts +77 -0
  118. package/tasks.json +0 -0
  119. package/tsconfig.json +22 -0
  120. package/vitest.config.ts +16 -0
@@ -0,0 +1,1085 @@
1
+ # Agent Introspection and Self-Awareness Patterns in AI Orchestration Frameworks
2
+
3
+ **Research Date:** December 8, 2025
4
+ **Focus:** Anthropic Tool Format, Hierarchy Inspection, Security Boundaries, Self-Modification Capabilities
5
+
6
+ ---
7
+
8
+ ## Table of Contents
9
+
10
+ 1. [Introspection Capabilities](#introspection-capabilities)
11
+ 2. [Anthropic Tool Definition Format](#anthropic-tool-definition-format)
12
+ 3. [Hierarchy Inspection Patterns](#hierarchy-inspection-patterns)
13
+ 4. [Security Considerations](#security-considerations)
14
+ 5. [Self-Modification Capabilities](#self-modification-capabilities)
15
+ 6. [Implementation Patterns for Groundswell](#implementation-patterns-for-groundswell)
16
+
17
+ ---
18
+
19
+ ## Introspection Capabilities
20
+
21
+ ### Current State of Agent Introspection
22
+
23
+ Research from Anthropic demonstrates that modern LLMs like Claude exhibit **emergent introspective awareness**, though it remains "highly unreliable and limited in scope." Key findings include:
24
+
25
+ - Claude models can examine and critique their own outputs
26
+ - Self-analysis ("introspection") capabilities are less developed than task execution
27
+ - Agents benefit from explicit introspection tools rather than relying on emergent capabilities
28
+
29
+ ### How Agents Should Inspect Position in Workflow Hierarchy
30
+
31
+ Agents require access to contextual information about their place in execution hierarchy:
32
+
33
+ **Position Inspection Elements:**
34
+ - **Workflow Identity**: Current workflow ID, name, status
35
+ - **Parent Context**: Parent workflow ID, name, execution stage
36
+ - **Ancestor Chain**: Full path from current node to root
37
+ - **Sibling Information**: Peer workflows at same hierarchy level
38
+ - **Hierarchy Depth**: How many levels deep in the tree
39
+ - **Execution Timeline**: When workflow started, current elapsed time
40
+
41
+ **Prior Output Inspection:**
42
+ - **Ancestor Outputs**: Results from parent workflows and prior steps
43
+ - **Sibling Results**: Outputs from parallel or sequential peer workflows
44
+ - **Cache Status**: What results have been cached vs. computed fresh
45
+ - **State Snapshots**: Captured state at key decision points
46
+ - **Event History**: Log of all events in the execution tree
47
+
48
+ ### Design Pattern: Ancestor Chain Traversal
49
+
50
+ From research on tree data structures (DOM navigation, XPath axes):
51
+
52
+ ```
53
+ Current Node (ID: workflow-456)
54
+ ↑ parent
55
+ Parent Node (ID: workflow-123, status: running)
56
+ ↑ parent
57
+ Root Node (ID: root-001, status: running)
58
+ ```
59
+
60
+ **Key Methods for Tree Traversal:**
61
+ - `ancestors(nodeId)` → returns path from node to root
62
+ - `parent(nodeId)` → returns immediate parent
63
+ - `root()` → returns root workflow
64
+ - `siblings(nodeId)` → returns peers at same level
65
+ - `children(nodeId)` → returns all direct children
66
+ - `descendants(nodeId)` → returns full subtree
67
+
68
+ ---
69
+
70
+ ## Anthropic Tool Definition Format
71
+
72
+ ### Tool Structure (JSON Schema)
73
+
74
+ Based on Anthropic's official documentation and MCP Protocol specification:
75
+
76
+ ```json
77
+ {
78
+ "name": "inspect_workflow_hierarchy",
79
+ "description": "Inspect the current workflow's position in the execution hierarchy, including ancestors, siblings, and prior results.",
80
+ "input_schema": {
81
+ "type": "object",
82
+ "properties": {
83
+ "node_id": {
84
+ "type": "string",
85
+ "description": "The workflow node ID to inspect. If omitted, inspects current workflow."
86
+ },
87
+ "include_ancestors": {
88
+ "type": "boolean",
89
+ "description": "Include full ancestor chain from current node to root",
90
+ "default": true
91
+ },
92
+ "include_siblings": {
93
+ "type": "boolean",
94
+ "description": "Include sibling workflows at same hierarchy level",
95
+ "default": false
96
+ },
97
+ "include_state_snapshots": {
98
+ "type": "boolean",
99
+ "description": "Include captured state snapshots from ancestor workflows",
100
+ "default": false
101
+ },
102
+ "include_prior_outputs": {
103
+ "type": "boolean",
104
+ "description": "Include execution outputs from ancestor workflows",
105
+ "default": false
106
+ }
107
+ },
108
+ "required": ["node_id"]
109
+ }
110
+ }
111
+ ```
112
+
113
+ ### Core Tool Naming Conventions
114
+
115
+ **Recommended Naming Patterns for Introspection Tools:**
116
+
117
+ 1. **Prefix-Based (Resource + Action):**
118
+ - `workflow_inspect_hierarchy`
119
+ - `workflow_inspect_state`
120
+ - `workflow_inspect_cache`
121
+ - `workflow_read_ancestor_outputs`
122
+
123
+ 2. **Verb-First (Clearer Intent):**
124
+ - `inspect_workflow_hierarchy`
125
+ - `read_workflow_state`
126
+ - `query_execution_history`
127
+ - `get_ancestor_results`
128
+
129
+ 3. **Hierarchical Grouping (MCP Style):**
130
+ - `workflows/inspect_hierarchy`
131
+ - `workflows/read_state`
132
+ - `workflows/list_ancestors`
133
+ - `cache/get_status`
134
+
135
+ **Anthropic Best Practices:**
136
+ - Use snake_case for tool names
137
+ - Group related tools with common prefixes (e.g., `workflow_*`)
138
+ - Avoid generic names like `get` or `query` - be specific
139
+ - Names should be immediately understandable to the model
140
+
141
+ ### Complete Tool Definition Set
142
+
143
+ #### 1. Workflow Hierarchy Inspector
144
+
145
+ ```json
146
+ {
147
+ "name": "workflow_inspect_hierarchy",
148
+ "description": "Get the current workflow's position in the execution hierarchy. Returns parent, ancestors, siblings, and depth information.",
149
+ "input_schema": {
150
+ "type": "object",
151
+ "properties": {
152
+ "node_id": {
153
+ "type": "string",
154
+ "description": "Workflow node ID. If omitted, uses current workflow context."
155
+ },
156
+ "depth": {
157
+ "type": "string",
158
+ "enum": ["current_only", "parent_only", "ancestors_only", "full_tree"],
159
+ "description": "How much of the hierarchy to return",
160
+ "default": "full_tree"
161
+ }
162
+ },
163
+ "required": []
164
+ }
165
+ }
166
+ ```
167
+
168
+ **Example Response:**
169
+ ```json
170
+ {
171
+ "current": {
172
+ "id": "workflow-456",
173
+ "name": "DataProcessingStep",
174
+ "status": "running",
175
+ "started_at": 1702080000000,
176
+ "elapsed_ms": 5432
177
+ },
178
+ "parent": {
179
+ "id": "workflow-123",
180
+ "name": "MainOrchestrator",
181
+ "status": "running",
182
+ "depth": 1
183
+ },
184
+ "ancestors": [
185
+ {
186
+ "id": "workflow-123",
187
+ "name": "MainOrchestrator",
188
+ "depth": 1
189
+ },
190
+ {
191
+ "id": "root-001",
192
+ "name": "RootWorkflow",
193
+ "depth": 2
194
+ }
195
+ ],
196
+ "siblings": [
197
+ {
198
+ "id": "workflow-789",
199
+ "name": "DataValidationStep",
200
+ "status": "completed"
201
+ },
202
+ {
203
+ "id": "workflow-999",
204
+ "name": "DataTransformStep",
205
+ "status": "pending"
206
+ }
207
+ ],
208
+ "hierarchy_depth": 2,
209
+ "total_siblings": 2
210
+ }
211
+ ```
212
+
213
+ #### 2. Ancestor Output Reader
214
+
215
+ ```json
216
+ {
217
+ "name": "workflow_read_ancestor_outputs",
218
+ "description": "Read execution outputs and results from ancestor workflows. Supports filtering by ancestor name or ID.",
219
+ "input_schema": {
220
+ "type": "object",
221
+ "properties": {
222
+ "ancestor_id": {
223
+ "type": "string",
224
+ "description": "Specific ancestor workflow ID. If omitted, returns outputs from all ancestors in order."
225
+ },
226
+ "ancestor_name": {
227
+ "type": "string",
228
+ "description": "Filter by ancestor workflow name (e.g., 'MainOrchestrator')"
229
+ },
230
+ "include_state_snapshots": {
231
+ "type": "boolean",
232
+ "description": "Include full state snapshots from ancestors",
233
+ "default": false
234
+ },
235
+ "limit_ancestry_depth": {
236
+ "type": "integer",
237
+ "description": "Only go this many levels up (1=parent only, 2=parent+grandparent, etc)",
238
+ "minimum": 1
239
+ }
240
+ },
241
+ "required": []
242
+ }
243
+ }
244
+ ```
245
+
246
+ **Example Response:**
247
+ ```json
248
+ {
249
+ "outputs": [
250
+ {
251
+ "workflow_id": "workflow-123",
252
+ "workflow_name": "MainOrchestrator",
253
+ "depth": 1,
254
+ "status": "running",
255
+ "result": {
256
+ "data_schema": "validated",
257
+ "record_count": 15000,
258
+ "processing_duration_ms": 2341
259
+ },
260
+ "state_snapshot": {
261
+ "timestamp": 1702080005000,
262
+ "error_count": 0,
263
+ "warnings": ["High memory usage detected"]
264
+ }
265
+ }
266
+ ]
267
+ }
268
+ ```
269
+
270
+ #### 3. Cache Status Inspector
271
+
272
+ ```json
273
+ {
274
+ "name": "workflow_inspect_cache",
275
+ "description": "Inspect caching status for current workflow and ancestors. Shows which results have been cached vs computed fresh.",
276
+ "input_schema": {
277
+ "type": "object",
278
+ "properties": {
279
+ "node_id": {
280
+ "type": "string",
281
+ "description": "Workflow node ID to check cache for"
282
+ },
283
+ "check_ancestors": {
284
+ "type": "boolean",
285
+ "description": "Also check cache status for ancestor workflows",
286
+ "default": true
287
+ },
288
+ "cache_key_filter": {
289
+ "type": "string",
290
+ "description": "Only return cache entries matching this filter (supports wildcards)"
291
+ }
292
+ },
293
+ "required": []
294
+ }
295
+ }
296
+ ```
297
+
298
+ **Example Response:**
299
+ ```json
300
+ {
301
+ "workflow_id": "workflow-456",
302
+ "cache_status": {
303
+ "enabled": true,
304
+ "entries": [
305
+ {
306
+ "key": "data_validation_result",
307
+ "cached": true,
308
+ "age_ms": 1234,
309
+ "hit_count": 3,
310
+ "source_workflow": "workflow-123"
311
+ },
312
+ {
313
+ "key": "schema_analysis",
314
+ "cached": false,
315
+ "computed_ms": 5678,
316
+ "computation_time": 5678
317
+ }
318
+ ],
319
+ "total_cache_size_bytes": 102400,
320
+ "cache_hit_rate": 0.75
321
+ }
322
+ }
323
+ ```
324
+
325
+ #### 4. Event History Reader
326
+
327
+ ```json
328
+ {
329
+ "name": "workflow_read_event_history",
330
+ "description": "Read events from workflow execution tree. Supports filtering by type, workflow, time range.",
331
+ "input_schema": {
332
+ "type": "object",
333
+ "properties": {
334
+ "workflow_id": {
335
+ "type": "string",
336
+ "description": "Filter to specific workflow (if omitted, includes all)"
337
+ },
338
+ "event_types": {
339
+ "type": "array",
340
+ "items": {
341
+ "type": "string",
342
+ "enum": ["stepStart", "stepEnd", "toolInvocation", "error", "stateSnapshot", "childAttached"]
343
+ },
344
+ "description": "Only include these event types"
345
+ },
346
+ "limit": {
347
+ "type": "integer",
348
+ "description": "Maximum number of events to return",
349
+ "default": 100,
350
+ "maximum": 1000
351
+ },
352
+ "include_full_context": {
353
+ "type": "boolean",
354
+ "description": "Include full event details and payload",
355
+ "default": false
356
+ }
357
+ },
358
+ "required": []
359
+ }
360
+ }
361
+ ```
362
+
363
+ #### 5. Workflow State Snapshot Reader
364
+
365
+ ```json
366
+ {
367
+ "name": "workflow_inspect_state_snapshot",
368
+ "description": "Read captured state snapshot from a specific workflow. Shows decorated @ObservedState values at point of capture.",
369
+ "input_schema": {
370
+ "type": "object",
371
+ "properties": {
372
+ "workflow_id": {
373
+ "type": "string",
374
+ "description": "Workflow to read state from"
375
+ },
376
+ "snapshot_timestamp": {
377
+ "type": "integer",
378
+ "description": "Specific snapshot timestamp (if omitted, returns latest)"
379
+ },
380
+ "property_filter": {
381
+ "type": "string",
382
+ "description": "Only return state properties matching this filter"
383
+ }
384
+ },
385
+ "required": ["workflow_id"]
386
+ }
387
+ }
388
+ ```
389
+
390
+ ---
391
+
392
+ ## Hierarchy Inspection Patterns
393
+
394
+ ### Tree Navigation Query Language
395
+
396
+ From research on LangGraph, XPath, and ADK workflow patterns, effective hierarchy inspection requires:
397
+
398
+ **1. Axis-Based Navigation (XPath-Inspired):**
399
+
400
+ ```
401
+ ancestor::* → All ancestors from current to root
402
+ ancestor-or-self::* → Current node plus all ancestors
403
+ parent::* → Immediate parent only
404
+ sibling::* → All siblings at same level
405
+ child::* → All direct children
406
+ descendant::* → All children recursively
407
+ descendant-or-self::* → Current plus all descendants
408
+ ```
409
+
410
+ **2. Predicate Filtering:**
411
+
412
+ ```
413
+ [name="MainOrchestrator"] → Filter by name
414
+ [status="completed"] → Filter by status
415
+ [depth=1] → Filter by hierarchy depth
416
+ [time_range="1702080000000..1702081000000"] → Filter by time
417
+ ```
418
+
419
+ ### Exposure Architecture
420
+
421
+ **What Agents Should See (READ-ONLY):**
422
+ - Workflow IDs and names
423
+ - Execution status (idle, running, completed, failed)
424
+ - Hierarchy relationships (parent, children, siblings)
425
+ - Execution timeline (start time, duration, elapsed)
426
+ - Results and outputs from ancestor workflows
427
+ - State snapshots from decision points
428
+ - Event history and logs
429
+ - Cache status and hit rates
430
+
431
+ **What Agents Should NEVER See:**
432
+ - Internal implementation details
433
+ - Memory pointers or internal node references
434
+ - Credentials or secrets in ancestor state
435
+ - Private parent workflow configurations
436
+ - Sibling workflow inputs (only outputs)
437
+ - System-level performance metrics
438
+ - Other tenant's data (multi-tenant systems)
439
+
440
+ ### Security Boundaries - Read-Only Access Pattern
441
+
442
+ Implement introspection as **read-only query tools** with these protections:
443
+
444
+ ```typescript
445
+ interface WorkflowIntrospectionRequest {
446
+ // Which node to query
447
+ node_id: string;
448
+
449
+ // What information is requested
450
+ query_type: 'hierarchy' | 'outputs' | 'state' | 'events';
451
+
452
+ // Explicit data inclusion flags (deny-by-default)
453
+ include_outputs?: boolean;
454
+ include_state_snapshots?: boolean;
455
+ include_event_history?: boolean;
456
+ include_cache_status?: boolean;
457
+
458
+ // Scope limitations
459
+ max_ancestry_depth?: number; // Limit how far up tree we traverse
460
+ max_results?: number; // Limit result set size
461
+ time_range_start?: number; // Prevent excessive historical queries
462
+ time_range_end?: number;
463
+ }
464
+
465
+ interface WorkflowIntrospectionResponse {
466
+ // What was queried
467
+ request_id: string;
468
+
469
+ // Results (filtered by permissions)
470
+ data: {
471
+ hierarchy?: HierarchyData;
472
+ outputs?: OutputData;
473
+ state?: StateData;
474
+ events?: EventData;
475
+ };
476
+
477
+ // Metadata for debugging
478
+ execution_time_ms: number;
479
+ results_count: number;
480
+ is_complete: boolean; // Whether all requested data is included
481
+ truncation_reason?: string; // Why results might be incomplete
482
+ }
483
+ ```
484
+
485
+ ---
486
+
487
+ ## Security Considerations
488
+
489
+ ### Key Threat Vectors for Agent Introspection
490
+
491
+ **1. Prompt Injection via Introspection Data**
492
+ - Untrusted data in ancestor outputs could inject prompts
493
+ - **Mitigation**: Introspection returns structured, validated data only
494
+ - Agents cannot execute code embedded in returned state
495
+
496
+ **2. Information Leakage**
497
+ - Agents could read sensitive data from ancestor states
498
+ - **Mitigation**:
499
+ - Read-only access only
500
+ - Explicit `include_sensitive_data` flags
501
+ - Audit logging of all introspection queries
502
+ - Secrets never included in state snapshots
503
+
504
+ **3. Recursive Self-Modification**
505
+ - Agent could inspect itself and modify its own execution
506
+ - **Mitigation**:
507
+ - Agents cannot read or modify their own prompts
508
+ - Introspection tools return data only, never take actions
509
+ - Self-spawning workflows require explicit approval
510
+
511
+ **4. Denial of Service via Deep Hierarchy**
512
+ - Agents could query very deep trees or large result sets
513
+ - **Mitigation**:
514
+ - `max_ancestry_depth` limit (e.g., 10 levels)
515
+ - `max_results` limit (e.g., 1000 items)
516
+ - Pagination for large result sets
517
+ - Query timeout (e.g., 5 second max)
518
+
519
+ ### Design Pattern: Capability Tokens
520
+
521
+ From AWS Bedrock and Cerbos research, use explicit capabilities:
522
+
523
+ ```typescript
524
+ interface AgentCapabilities {
525
+ // Introspection permissions
526
+ can_inspect_hierarchy: boolean;
527
+ can_read_ancestor_outputs: boolean;
528
+ can_read_state_snapshots: boolean;
529
+ can_read_event_history: boolean;
530
+
531
+ // Modification permissions
532
+ can_spawn_child_workflows: boolean;
533
+ can_modify_sibling_state: boolean;
534
+ can_spawn_arbitrary_agents: boolean;
535
+
536
+ // Resource limits
537
+ max_ancestry_depth: number;
538
+ max_concurrent_children: number;
539
+ max_result_size_bytes: number;
540
+ }
541
+ ```
542
+
543
+ ### Sandboxing Pattern: Container Isolation
544
+
545
+ From research on multi-agent security:
546
+
547
+ ```typescript
548
+ interface AgentSandbox {
549
+ // Execution environment
550
+ container_id: string;
551
+ memory_limit_mb: number;
552
+ cpu_shares: number;
553
+ network_restricted: boolean;
554
+
555
+ // Filesystem isolation
556
+ mount_readonly: string[]; // Read-only mounts
557
+ mount_readwrite: string[]; // Writable mounts
558
+ mount_hidden: string[]; // Completely hidden
559
+
560
+ // Tool access restrictions
561
+ allowed_tools: string[]; // Whitelist of tools
562
+ forbidden_tools: string[]; // Blacklist of tools
563
+
564
+ // Data access controls
565
+ visible_workflow_ids: string[]; // Which workflows this agent can see
566
+ allowed_data_categories: string[]; // PII, secrets, etc.
567
+ }
568
+ ```
569
+
570
+ ---
571
+
572
+ ## Self-Modification Capabilities
573
+
574
+ ### Safe Patterns for Workflow Spawning
575
+
576
+ Research from LoopStacks, LangGraph, and Google ADK identifies several safe patterns:
577
+
578
+ **1. Explicit Spawning with Approval**
579
+
580
+ Agents cannot unilaterally spawn workflows. Instead:
581
+
582
+ ```json
583
+ {
584
+ "name": "workflow_spawn_child",
585
+ "description": "Request to spawn a child workflow. Requires explicit configuration and parent approval.",
586
+ "input_schema": {
587
+ "type": "object",
588
+ "properties": {
589
+ "child_workflow_name": {
590
+ "type": "string",
591
+ "description": "Name for the child workflow"
592
+ },
593
+ "workflow_template_id": {
594
+ "type": "string",
595
+ "description": "Pre-approved template to use (REQUIRED - agents cannot define arbitrary workflows)"
596
+ },
597
+ "input_data": {
598
+ "type": "object",
599
+ "description": "Data to pass to child workflow"
600
+ },
601
+ "parallel_execution": {
602
+ "type": "boolean",
603
+ "description": "Whether to run in parallel or sequentially",
604
+ "default": false
605
+ },
606
+ "timeout_seconds": {
607
+ "type": "integer",
608
+ "description": "Maximum execution time",
609
+ "minimum": 1,
610
+ "maximum": 3600
611
+ }
612
+ },
613
+ "required": ["child_workflow_name", "workflow_template_id"]
614
+ }
615
+ }
616
+ ```
617
+
618
+ **2. Prompt Generation with Validation**
619
+
620
+ ```json
621
+ {
622
+ "name": "workflow_generate_dynamic_prompt",
623
+ "description": "Generate a prompt for a child workflow based on current analysis. Generated prompt is validated before execution.",
624
+ "input_schema": {
625
+ "type": "object",
626
+ "properties": {
627
+ "target_workflow_id": {
628
+ "type": "string",
629
+ "description": "Which child workflow to generate prompt for"
630
+ },
631
+ "prompt_template": {
632
+ "type": "string",
633
+ "enum": [
634
+ "data_analysis",
635
+ "validation",
636
+ "transformation",
637
+ "summarization",
638
+ "decision_making"
639
+ ],
640
+ "description": "Template constrains prompt generation"
641
+ },
642
+ "template_variables": {
643
+ "type": "object",
644
+ "description": "Variables to substitute in template"
645
+ },
646
+ "system_context": {
647
+ "type": "string",
648
+ "description": "System-level context to include (optional)"
649
+ }
650
+ },
651
+ "required": ["target_workflow_id", "prompt_template"]
652
+ }
653
+ }
654
+ ```
655
+
656
+ **3. Safety Limits on Self-Modification**
657
+
658
+ ```typescript
659
+ interface WorkflowModificationLimits {
660
+ // Spawning limits
661
+ max_children_per_workflow: number; // e.g., 5
662
+ max_total_descendants: number; // e.g., 100
663
+ max_depth: number; // e.g., 10 levels
664
+
665
+ // Prompt generation limits
666
+ max_prompt_length_chars: number; // e.g., 5000
667
+ max_dynamic_prompts_per_session: number; // e.g., 10
668
+
669
+ // State modification limits
670
+ can_modify_own_state: boolean; // false - only read
671
+ can_modify_parent_state: boolean; // false
672
+ can_modify_sibling_state: boolean; // false
673
+ can_modify_ancestor_state: boolean; // false
674
+
675
+ // Execution limits
676
+ max_concurrent_child_workflows: number; // e.g., 3
677
+ max_total_execution_time_seconds: number; // e.g., 3600
678
+ }
679
+ ```
680
+
681
+ ### Self-Awareness Patterns from Anthropic Research
682
+
683
+ Anthropic's research on introspection suggests these metacognitive capabilities:
684
+
685
+ **1. Reflexion Pattern** (Self-Evaluation and Memory)
686
+ - After each reasoning-action cycle, agent critiques output
687
+ - Stores insights for future reference
688
+ - Enables "thinking about thinking"
689
+
690
+ **2. Chain-of-Thought with Introspection**
691
+ - Agent reasons through multiple steps
692
+ - At each step, checks own reasoning against hierarchy context
693
+ - Can revise based on ancestor examples
694
+
695
+ **3. Self-Correcting Workflows**
696
+ - Agent detects errors via introspection
697
+ - Can spawn correction workflows
698
+ - Learns which patterns work within organization
699
+
700
+ ---
701
+
702
+ ## Implementation Patterns for Groundswell
703
+
704
+ ### Integration with Existing Groundswell Architecture
705
+
706
+ Based on analysis of `/src/core/workflow.ts`, `/src/core/agent.ts`, and `/src/core/event-tree.ts`:
707
+
708
+ **Current Capabilities:**
709
+ - `EventTreeHandle` already provides `getAncestors()` and `getChildren()`
710
+ - `WorkflowContext` maintains connection to root workflow
711
+ - `WorkflowNode` tracks hierarchy relationships
712
+ - Event emission is already implemented
713
+
714
+ **Proposed Introspection Tool Implementation:**
715
+
716
+ #### 1. WorkflowIntrospectionService
717
+
718
+ ```typescript
719
+ // src/core/introspection-service.ts
720
+
721
+ import type { WorkflowNode, WorkflowEvent } from '../types/index.js';
722
+ import type { EventTreeHandle } from '../types/workflow-context.js';
723
+
724
+ export interface HierarchyInfo {
725
+ current: {
726
+ id: string;
727
+ name: string;
728
+ status: WorkflowStatus;
729
+ started_at: number;
730
+ elapsed_ms: number;
731
+ };
732
+ parent?: {
733
+ id: string;
734
+ name: string;
735
+ status: WorkflowStatus;
736
+ depth: number;
737
+ };
738
+ ancestors: Array<{
739
+ id: string;
740
+ name: string;
741
+ status: WorkflowStatus;
742
+ depth: number;
743
+ }>;
744
+ siblings: Array<{
745
+ id: string;
746
+ name: string;
747
+ status: WorkflowStatus;
748
+ }>;
749
+ hierarchy_depth: number;
750
+ total_siblings: number;
751
+ }
752
+
753
+ export class WorkflowIntrospectionService {
754
+ constructor(
755
+ private eventTree: EventTreeHandle,
756
+ private workflowNode: WorkflowNode
757
+ ) {}
758
+
759
+ /**
760
+ * Get full hierarchy information for a node
761
+ */
762
+ inspectHierarchy(nodeId?: string): HierarchyInfo {
763
+ const targetId = nodeId || this.workflowNode.id;
764
+ const node = this.eventTree.getNode(targetId);
765
+
766
+ if (!node) {
767
+ throw new Error(`Node not found: ${targetId}`);
768
+ }
769
+
770
+ const ancestors = this.eventTree.getAncestors(targetId);
771
+ const parent = ancestors[0];
772
+ const children = this.eventTree.getChildren(parent?.id || '');
773
+
774
+ return {
775
+ current: this.extractNodeInfo(node),
776
+ parent: parent ? this.extractNodeInfo(parent) : undefined,
777
+ ancestors: ancestors.map(a => this.extractNodeInfo(a)),
778
+ siblings: children.filter(c => c.id !== targetId),
779
+ hierarchy_depth: ancestors.length,
780
+ total_siblings: children.length - 1
781
+ };
782
+ }
783
+
784
+ /**
785
+ * Read outputs from ancestor workflows
786
+ */
787
+ readAncestorOutputs(ancestorId?: string, maxDepth?: number): object {
788
+ const ancestors = this.eventTree.getAncestors(this.workflowNode.id);
789
+ const filtered = maxDepth
790
+ ? ancestors.slice(0, maxDepth)
791
+ : ancestors;
792
+
793
+ if (ancestorId) {
794
+ const ancestor = filtered.find(a => a.id === ancestorId);
795
+ if (!ancestor) {
796
+ throw new Error(`Ancestor not found: ${ancestorId}`);
797
+ }
798
+ return this.extractNodeOutput(ancestor);
799
+ }
800
+
801
+ return {
802
+ outputs: filtered.map(a => ({
803
+ workflow_id: a.id,
804
+ result: this.extractNodeOutput(a)
805
+ }))
806
+ };
807
+ }
808
+
809
+ /**
810
+ * Inspect cache status
811
+ */
812
+ inspectCache(nodeId?: string): object {
813
+ // Implementation would check WorkflowContext cache
814
+ // Return structure matching tool specification above
815
+ return {
816
+ workflow_id: nodeId || this.workflowNode.id,
817
+ cache_status: {
818
+ enabled: true,
819
+ entries: [],
820
+ total_cache_size_bytes: 0,
821
+ cache_hit_rate: 0
822
+ }
823
+ };
824
+ }
825
+
826
+ /**
827
+ * Read event history
828
+ */
829
+ readEventHistory(workflowId?: string, eventTypes?: string[], limit = 100): object {
830
+ const targetId = workflowId || this.workflowNode.id;
831
+ const node = this.eventTree.getNode(targetId);
832
+
833
+ if (!node) {
834
+ throw new Error(`Node not found: ${targetId}`);
835
+ }
836
+
837
+ const events = this.extractEvents(node, eventTypes).slice(0, limit);
838
+ return { events, total_count: events.length };
839
+ }
840
+
841
+ // Helper methods
842
+ private extractNodeInfo(node: any): object {
843
+ return {
844
+ id: node.id,
845
+ name: node.name,
846
+ status: node.status || 'unknown',
847
+ depth: this.calculateDepth(node)
848
+ };
849
+ }
850
+
851
+ private extractNodeOutput(node: any): object {
852
+ // Extract result data from node
853
+ return node.payload || node.metrics || {};
854
+ }
855
+
856
+ private extractEvents(node: any, types?: string[]): WorkflowEvent[] {
857
+ if (!node.events) return [];
858
+
859
+ if (types && types.length > 0) {
860
+ return node.events.filter(e => types.includes(e.type));
861
+ }
862
+
863
+ return node.events;
864
+ }
865
+
866
+ private calculateDepth(node: any): number {
867
+ if (!node.parentId) return 0;
868
+ const parent = this.eventTree.getNode(node.parentId);
869
+ return parent ? 1 + this.calculateDepth(parent) : 1;
870
+ }
871
+ }
872
+ ```
873
+
874
+ #### 2. Introspection Tools for Agent
875
+
876
+ ```typescript
877
+ // src/core/introspection-tools.ts
878
+
879
+ import type { Tool } from '../types/index.js';
880
+ import { WorkflowIntrospectionService } from './introspection-service.js';
881
+
882
+ export function createIntrospectionTools(
883
+ introspectionService: WorkflowIntrospectionService
884
+ ): Tool[] {
885
+ return [
886
+ {
887
+ name: 'workflow_inspect_hierarchy',
888
+ description: 'Get the current workflow\'s position in the execution hierarchy, including ancestors, siblings, and depth.',
889
+ input_schema: {
890
+ type: 'object' as const,
891
+ properties: {
892
+ node_id: {
893
+ type: 'string',
894
+ description: 'Workflow node ID. If omitted, uses current workflow context.'
895
+ },
896
+ depth: {
897
+ type: 'string',
898
+ enum: ['current_only', 'parent_only', 'ancestors_only', 'full_tree'],
899
+ description: 'How much of the hierarchy to return',
900
+ default: 'full_tree'
901
+ }
902
+ },
903
+ required: []
904
+ },
905
+ handler: async (input: any) => {
906
+ const hierarchy = introspectionService.inspectHierarchy(input.node_id);
907
+
908
+ // Apply depth filter
909
+ if (input.depth === 'current_only') {
910
+ return { current: hierarchy.current };
911
+ } else if (input.depth === 'parent_only') {
912
+ return { current: hierarchy.current, parent: hierarchy.parent };
913
+ } else if (input.depth === 'ancestors_only') {
914
+ return { ancestors: hierarchy.ancestors };
915
+ }
916
+
917
+ return hierarchy;
918
+ }
919
+ },
920
+
921
+ {
922
+ name: 'workflow_read_ancestor_outputs',
923
+ description: 'Read execution outputs and results from ancestor workflows.',
924
+ input_schema: {
925
+ type: 'object' as const,
926
+ properties: {
927
+ ancestor_id: {
928
+ type: 'string',
929
+ description: 'Specific ancestor workflow ID'
930
+ },
931
+ max_depth: {
932
+ type: 'integer',
933
+ description: 'Only go this many levels up'
934
+ }
935
+ },
936
+ required: []
937
+ },
938
+ handler: async (input: any) => {
939
+ return introspectionService.readAncestorOutputs(input.ancestor_id, input.max_depth);
940
+ }
941
+ },
942
+
943
+ {
944
+ name: 'workflow_inspect_cache',
945
+ description: 'Inspect caching status for current or specified workflow.',
946
+ input_schema: {
947
+ type: 'object' as const,
948
+ properties: {
949
+ node_id: {
950
+ type: 'string',
951
+ description: 'Workflow node ID'
952
+ }
953
+ },
954
+ required: []
955
+ },
956
+ handler: async (input: any) => {
957
+ return introspectionService.inspectCache(input.node_id);
958
+ }
959
+ },
960
+
961
+ {
962
+ name: 'workflow_read_event_history',
963
+ description: 'Read events from workflow execution tree.',
964
+ input_schema: {
965
+ type: 'object' as const,
966
+ properties: {
967
+ workflow_id: {
968
+ type: 'string',
969
+ description: 'Filter to specific workflow'
970
+ },
971
+ event_types: {
972
+ type: 'array',
973
+ items: {
974
+ type: 'string',
975
+ enum: ['stepStart', 'stepEnd', 'toolInvocation', 'error', 'stateSnapshot']
976
+ },
977
+ description: 'Only include these event types'
978
+ },
979
+ limit: {
980
+ type: 'integer',
981
+ description: 'Maximum number of events',
982
+ default: 100
983
+ }
984
+ },
985
+ required: []
986
+ },
987
+ handler: async (input: any) => {
988
+ return introspectionService.readEventHistory(
989
+ input.workflow_id,
990
+ input.event_types,
991
+ input.limit
992
+ );
993
+ }
994
+ }
995
+ ];
996
+ }
997
+ ```
998
+
999
+ #### 3. Wiring Into WorkflowContext
1000
+
1001
+ ```typescript
1002
+ // In createWorkflowContext() function
1003
+ // src/core/workflow-context.ts
1004
+
1005
+ import { WorkflowIntrospectionService } from './introspection-service.js';
1006
+ import { createIntrospectionTools } from './introspection-tools.js';
1007
+
1008
+ export function createWorkflowContext(workflow, parentId?, enableReflection?) {
1009
+ // ... existing code ...
1010
+
1011
+ // Create introspection service
1012
+ const introspectionService = new WorkflowIntrospectionService(
1013
+ eventTree,
1014
+ workflow.getNode()
1015
+ );
1016
+
1017
+ // Create introspection tools
1018
+ const introspectionTools = createIntrospectionTools(introspectionService);
1019
+
1020
+ // Add to agent config
1021
+ const agentConfig = {
1022
+ ...existingConfig,
1023
+ tools: [...(existingConfig.tools || []), ...introspectionTools]
1024
+ };
1025
+
1026
+ return {
1027
+ // ... existing context properties ...
1028
+ introspectionService,
1029
+ getIntrospectionTools: () => introspectionTools
1030
+ };
1031
+ }
1032
+ ```
1033
+
1034
+ ---
1035
+
1036
+ ## Best Practices Summary
1037
+
1038
+ ### For Tool Design
1039
+
1040
+ 1. **Explicit Over Implicit**: Require agents to explicitly request sensitive data
1041
+ 2. **Structured Schemas**: Use strict JSON schemas to prevent injection
1042
+ 3. **Clear Names**: Tool names should be immediately understandable
1043
+ 4. **Comprehensive Descriptions**: Include examples in descriptions
1044
+ 5. **Error Messages**: Return detailed, actionable error messages
1045
+
1046
+ ### For Hierarchy Inspection
1047
+
1048
+ 1. **Read-Only Access**: Introspection tools never modify state
1049
+ 2. **Depth Limits**: Prevent unbounded tree traversal
1050
+ 3. **Result Limits**: Cap result set sizes
1051
+ 4. **Time Limits**: Implement query timeouts
1052
+ 5. **Audit Logging**: Log all introspection queries
1053
+
1054
+ ### For Self-Modification
1055
+
1056
+ 1. **Approval Required**: No unilateral workflow spawning
1057
+ 2. **Template-Based**: Agents use pre-approved templates
1058
+ 3. **Prompt Validation**: Generated prompts are validated before execution
1059
+ 4. **Resource Limits**: Cap concurrent children, depth, total descendants
1060
+ 5. **Capability Tokens**: Explicitly grant permissions
1061
+
1062
+ ### For Security
1063
+
1064
+ 1. **Sandboxing**: Run agents in isolated containers
1065
+ 2. **Least Privilege**: Start read-only, grant permissions as needed
1066
+ 3. **Secrets Protection**: Never include credentials in introspection data
1067
+ 4. **Tenant Isolation**: Agents only see their own workflow tree
1068
+ 5. **Network Restrictions**: Isolate tool execution from host network
1069
+
1070
+ ---
1071
+
1072
+ ## References
1073
+
1074
+ ### Key Research Sources
1075
+
1076
+ - **Anthropic Introspection Research**: [Emergent introspective awareness in large language models](https://www.anthropic.com/research/introspection)
1077
+ - **Anthropic Tool Use Docs**: [Tool use with Claude](https://platform.claude.com/docs/en/agents-and-tools/tool-use/overview)
1078
+ - **Claude Agent SDK**: [Building agents with the Claude Agent SDK](https://www.anthropic.com/engineering/building-agents-with-the-claude-agent-sdk)
1079
+ - **Model Context Protocol**: [Tools - MCP Specification](https://modelcontextprotocol.io/docs/concepts/tools)
1080
+ - **Multi-Agent Security**: [Securing Agentic AI: authorization patterns](https://dev.to/siddhantkcode/securing-agentic-ai-authorization-patterns-for-autonomous-systems-3ajo)
1081
+ - **Prompt Injection Defense**: [Design Patterns for Securing LLM Agents against Prompt Injection](https://arxiv.org/abs/2506.08837)
1082
+ - **Hierarchical Multi-Agent Systems**: [A Taxonomy of Hierarchical Multi-Agent Systems](https://arxiv.org/html/2508.12683)
1083
+ - **LangGraph Workflows**: [Workflows and agents - LangChain Documentation](https://docs.langchain.com/oss/python/langgraph/workflows-agents)
1084
+ - **Google ADK Safety**: [Safety and Security for AI Agents - Agent Development Kit](https://google.github.io/adk-docs/safety/)
1085
+