groundswell 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +188 -0
- package/README.md +99 -5
- package/dist/__tests__/adversarial/attachChild-performance.test.d.ts +16 -0
- package/dist/__tests__/adversarial/attachChild-performance.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/attachChild-performance.test.js +187 -0
- package/dist/__tests__/adversarial/attachChild-performance.test.js.map +1 -0
- package/dist/__tests__/adversarial/circular-reference.test.d.ts +13 -0
- package/dist/__tests__/adversarial/circular-reference.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/circular-reference.test.js +92 -0
- package/dist/__tests__/adversarial/circular-reference.test.js.map +1 -0
- package/dist/__tests__/adversarial/complex-circular-reference.test.d.ts +16 -0
- package/dist/__tests__/adversarial/complex-circular-reference.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/complex-circular-reference.test.js +127 -0
- package/dist/__tests__/adversarial/complex-circular-reference.test.js.map +1 -0
- package/dist/__tests__/adversarial/concurrent-task-failures.test.d.ts +21 -0
- package/dist/__tests__/adversarial/concurrent-task-failures.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/concurrent-task-failures.test.js +667 -0
- package/dist/__tests__/adversarial/concurrent-task-failures.test.js.map +1 -0
- package/dist/__tests__/adversarial/deep-analysis.test.d.ts +6 -0
- package/dist/__tests__/adversarial/deep-analysis.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/deep-analysis.test.js +877 -0
- package/dist/__tests__/adversarial/deep-analysis.test.js.map +1 -0
- package/dist/__tests__/adversarial/deep-hierarchy-stress.test.d.ts +13 -0
- package/dist/__tests__/adversarial/deep-hierarchy-stress.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/deep-hierarchy-stress.test.js +186 -0
- package/dist/__tests__/adversarial/deep-hierarchy-stress.test.js.map +1 -0
- package/dist/__tests__/adversarial/e2e-prd-validation.test.d.ts +6 -0
- package/dist/__tests__/adversarial/e2e-prd-validation.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/e2e-prd-validation.test.js +626 -0
- package/dist/__tests__/adversarial/e2e-prd-validation.test.js.map +1 -0
- package/dist/__tests__/adversarial/edge-case.test.d.ts +6 -0
- package/dist/__tests__/adversarial/edge-case.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/edge-case.test.js +857 -0
- package/dist/__tests__/adversarial/edge-case.test.js.map +1 -0
- package/dist/__tests__/adversarial/error-merge-strategy.test.d.ts +20 -0
- package/dist/__tests__/adversarial/error-merge-strategy.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/error-merge-strategy.test.js +907 -0
- package/dist/__tests__/adversarial/error-merge-strategy.test.js.map +1 -0
- package/dist/__tests__/adversarial/incremental-performance.test.d.ts +2 -0
- package/dist/__tests__/adversarial/incremental-performance.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/incremental-performance.test.js +113 -0
- package/dist/__tests__/adversarial/incremental-performance.test.js.map +1 -0
- package/dist/__tests__/adversarial/node-map-update-benchmarks.test.d.ts +22 -0
- package/dist/__tests__/adversarial/node-map-update-benchmarks.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/node-map-update-benchmarks.test.js +383 -0
- package/dist/__tests__/adversarial/node-map-update-benchmarks.test.js.map +1 -0
- package/dist/__tests__/adversarial/observer-propagation.test.d.ts +21 -0
- package/dist/__tests__/adversarial/observer-propagation.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/observer-propagation.test.js +404 -0
- package/dist/__tests__/adversarial/observer-propagation.test.js.map +1 -0
- package/dist/__tests__/adversarial/parent-validation.test.d.ts +13 -0
- package/dist/__tests__/adversarial/parent-validation.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/parent-validation.test.js +128 -0
- package/dist/__tests__/adversarial/parent-validation.test.js.map +1 -0
- package/dist/__tests__/adversarial/prd-12-2-compliance.test.d.ts +20 -0
- package/dist/__tests__/adversarial/prd-12-2-compliance.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/prd-12-2-compliance.test.js +482 -0
- package/dist/__tests__/adversarial/prd-12-2-compliance.test.js.map +1 -0
- package/dist/__tests__/adversarial/prd-compliance.test.d.ts +6 -0
- package/dist/__tests__/adversarial/prd-compliance.test.d.ts.map +1 -0
- package/dist/__tests__/adversarial/prd-compliance.test.js +886 -0
- package/dist/__tests__/adversarial/prd-compliance.test.js.map +1 -0
- package/dist/__tests__/compatibility/backward-compatibility.test.d.ts +22 -0
- package/dist/__tests__/compatibility/backward-compatibility.test.d.ts.map +1 -0
- package/dist/__tests__/compatibility/backward-compatibility.test.js +1843 -0
- package/dist/__tests__/compatibility/backward-compatibility.test.js.map +1 -0
- package/dist/__tests__/helpers/index.d.ts +10 -0
- package/dist/__tests__/helpers/index.d.ts.map +1 -0
- package/dist/__tests__/helpers/index.js +10 -0
- package/dist/__tests__/helpers/index.js.map +1 -0
- package/dist/__tests__/helpers/tree-verification.d.ts +90 -0
- package/dist/__tests__/helpers/tree-verification.d.ts.map +1 -0
- package/dist/__tests__/helpers/tree-verification.js +202 -0
- package/dist/__tests__/helpers/tree-verification.js.map +1 -0
- package/dist/__tests__/integration/agent-workflow.test.d.ts +2 -0
- package/dist/__tests__/integration/agent-workflow.test.d.ts.map +1 -0
- package/dist/__tests__/integration/agent-workflow.test.js +256 -0
- package/dist/__tests__/integration/agent-workflow.test.js.map +1 -0
- package/dist/__tests__/integration/bidirectional-consistency.test.d.ts +14 -0
- package/dist/__tests__/integration/bidirectional-consistency.test.d.ts.map +1 -0
- package/dist/__tests__/integration/bidirectional-consistency.test.js +668 -0
- package/dist/__tests__/integration/bidirectional-consistency.test.js.map +1 -0
- package/dist/__tests__/integration/observer-logging.test.d.ts +2 -0
- package/dist/__tests__/integration/observer-logging.test.d.ts.map +1 -0
- package/dist/__tests__/integration/observer-logging.test.js +517 -0
- package/dist/__tests__/integration/observer-logging.test.js.map +1 -0
- package/dist/__tests__/integration/tree-mirroring.test.d.ts +2 -0
- package/dist/__tests__/integration/tree-mirroring.test.d.ts.map +1 -0
- package/dist/__tests__/integration/tree-mirroring.test.js +117 -0
- package/dist/__tests__/integration/tree-mirroring.test.js.map +1 -0
- package/dist/__tests__/integration/workflow-reparenting.test.d.ts +12 -0
- package/dist/__tests__/integration/workflow-reparenting.test.d.ts.map +1 -0
- package/dist/__tests__/integration/workflow-reparenting.test.js +239 -0
- package/dist/__tests__/integration/workflow-reparenting.test.js.map +1 -0
- package/dist/__tests__/unit/agent.test.d.ts +2 -0
- package/dist/__tests__/unit/agent.test.d.ts.map +1 -0
- package/dist/__tests__/unit/agent.test.js +143 -0
- package/dist/__tests__/unit/agent.test.js.map +1 -0
- package/dist/__tests__/unit/cache-key.test.d.ts +5 -0
- package/dist/__tests__/unit/cache-key.test.d.ts.map +1 -0
- package/dist/__tests__/unit/cache-key.test.js +145 -0
- package/dist/__tests__/unit/cache-key.test.js.map +1 -0
- package/dist/__tests__/unit/cache.test.d.ts +5 -0
- package/dist/__tests__/unit/cache.test.d.ts.map +1 -0
- package/dist/__tests__/unit/cache.test.js +132 -0
- package/dist/__tests__/unit/cache.test.js.map +1 -0
- package/dist/__tests__/unit/context.test.d.ts +2 -0
- package/dist/__tests__/unit/context.test.d.ts.map +1 -0
- package/dist/__tests__/unit/context.test.js +220 -0
- package/dist/__tests__/unit/context.test.js.map +1 -0
- package/dist/__tests__/unit/decorators.test.d.ts +2 -0
- package/dist/__tests__/unit/decorators.test.d.ts.map +1 -0
- package/dist/__tests__/unit/decorators.test.js +162 -0
- package/dist/__tests__/unit/decorators.test.js.map +1 -0
- package/dist/__tests__/unit/introspection-tools.test.d.ts +5 -0
- package/dist/__tests__/unit/introspection-tools.test.d.ts.map +1 -0
- package/dist/__tests__/unit/introspection-tools.test.js +191 -0
- package/dist/__tests__/unit/introspection-tools.test.js.map +1 -0
- package/dist/__tests__/unit/logger.test.d.ts +2 -0
- package/dist/__tests__/unit/logger.test.d.ts.map +1 -0
- package/dist/__tests__/unit/logger.test.js +241 -0
- package/dist/__tests__/unit/logger.test.js.map +1 -0
- package/dist/__tests__/unit/observable.test.d.ts +2 -0
- package/dist/__tests__/unit/observable.test.d.ts.map +1 -0
- package/dist/__tests__/unit/observable.test.js +251 -0
- package/dist/__tests__/unit/observable.test.js.map +1 -0
- package/dist/__tests__/unit/prompt.test.d.ts +2 -0
- package/dist/__tests__/unit/prompt.test.d.ts.map +1 -0
- package/dist/__tests__/unit/prompt.test.js +113 -0
- package/dist/__tests__/unit/prompt.test.js.map +1 -0
- package/dist/__tests__/unit/reflection.test.d.ts +5 -0
- package/dist/__tests__/unit/reflection.test.d.ts.map +1 -0
- package/dist/__tests__/unit/reflection.test.js +160 -0
- package/dist/__tests__/unit/reflection.test.js.map +1 -0
- package/dist/__tests__/unit/tree-debugger-incremental.test.d.ts +2 -0
- package/dist/__tests__/unit/tree-debugger-incremental.test.d.ts.map +1 -0
- package/dist/__tests__/unit/tree-debugger-incremental.test.js +136 -0
- package/dist/__tests__/unit/tree-debugger-incremental.test.js.map +1 -0
- package/dist/__tests__/unit/tree-debugger.test.d.ts +2 -0
- package/dist/__tests__/unit/tree-debugger.test.d.ts.map +1 -0
- package/dist/__tests__/unit/tree-debugger.test.js +69 -0
- package/dist/__tests__/unit/tree-debugger.test.js.map +1 -0
- package/dist/__tests__/unit/utils/workflow-error-utils.test.d.ts +2 -0
- package/dist/__tests__/unit/utils/workflow-error-utils.test.d.ts.map +1 -0
- package/dist/__tests__/unit/utils/workflow-error-utils.test.js +154 -0
- package/dist/__tests__/unit/utils/workflow-error-utils.test.js.map +1 -0
- package/dist/__tests__/unit/workflow-detachChild.test.d.ts +2 -0
- package/dist/__tests__/unit/workflow-detachChild.test.d.ts.map +1 -0
- package/dist/__tests__/unit/workflow-detachChild.test.js +76 -0
- package/dist/__tests__/unit/workflow-detachChild.test.js.map +1 -0
- package/dist/__tests__/unit/workflow-emitEvent-childDetached.test.d.ts +2 -0
- package/dist/__tests__/unit/workflow-emitEvent-childDetached.test.d.ts.map +1 -0
- package/dist/__tests__/unit/workflow-emitEvent-childDetached.test.js +122 -0
- package/dist/__tests__/unit/workflow-emitEvent-childDetached.test.js.map +1 -0
- package/dist/__tests__/unit/workflow-isDescendantOf.test.d.ts +2 -0
- package/dist/__tests__/unit/workflow-isDescendantOf.test.d.ts.map +1 -0
- package/dist/__tests__/unit/workflow-isDescendantOf.test.js +140 -0
- package/dist/__tests__/unit/workflow-isDescendantOf.test.js.map +1 -0
- package/dist/__tests__/unit/workflow.test.d.ts +2 -0
- package/dist/__tests__/unit/workflow.test.d.ts.map +1 -0
- package/dist/__tests__/unit/workflow.test.js +330 -0
- package/dist/__tests__/unit/workflow.test.js.map +1 -0
- package/dist/cache/cache-key.d.ts +66 -0
- package/dist/cache/cache-key.d.ts.map +1 -0
- package/dist/cache/cache-key.js +195 -0
- package/dist/cache/cache-key.js.map +1 -0
- package/dist/cache/cache.d.ts +104 -0
- package/dist/cache/cache.d.ts.map +1 -0
- package/dist/cache/cache.js +179 -0
- package/dist/cache/cache.js.map +1 -0
- package/{src/cache/index.ts → dist/cache/index.d.ts} +1 -1
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +6 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/core/agent.d.ts +112 -0
- package/dist/core/agent.d.ts.map +1 -0
- package/dist/core/agent.js +426 -0
- package/dist/core/agent.js.map +1 -0
- package/{src/core/context.ts → dist/core/context.d.ts} +16 -67
- package/dist/core/context.d.ts.map +1 -0
- package/dist/core/context.js +80 -0
- package/dist/core/context.js.map +1 -0
- package/dist/core/event-tree.d.ts +72 -0
- package/dist/core/event-tree.d.ts.map +1 -0
- package/dist/core/event-tree.js +211 -0
- package/dist/core/event-tree.js.map +1 -0
- package/{src/core/factory.ts → dist/core/factory.d.ts} +6 -27
- package/dist/core/factory.d.ts.map +1 -0
- package/dist/core/factory.js +110 -0
- package/dist/core/factory.js.map +1 -0
- package/{src/core/index.ts → dist/core/index.d.ts} +2 -10
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +9 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/logger.d.ts +50 -0
- package/dist/core/logger.d.ts.map +1 -0
- package/dist/core/logger.js +91 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/mcp-handler.d.ts +69 -0
- package/dist/core/mcp-handler.d.ts.map +1 -0
- package/dist/core/mcp-handler.js +143 -0
- package/dist/core/mcp-handler.js.map +1 -0
- package/dist/core/prompt.d.ts +80 -0
- package/dist/core/prompt.d.ts.map +1 -0
- package/dist/core/prompt.js +120 -0
- package/dist/core/prompt.js.map +1 -0
- package/dist/core/workflow-context.d.ts +57 -0
- package/dist/core/workflow-context.d.ts.map +1 -0
- package/dist/core/workflow-context.js +263 -0
- package/dist/core/workflow-context.js.map +1 -0
- package/dist/core/workflow.d.ts +241 -0
- package/dist/core/workflow.d.ts.map +1 -0
- package/dist/core/workflow.js +464 -0
- package/dist/core/workflow.js.map +1 -0
- package/dist/debugger/index.d.ts +2 -0
- package/dist/debugger/index.d.ts.map +1 -0
- package/{src/debugger/index.ts → dist/debugger/index.js} +1 -0
- package/dist/debugger/index.js.map +1 -0
- package/dist/debugger/tree-debugger.d.ts +71 -0
- package/dist/debugger/tree-debugger.d.ts.map +1 -0
- package/dist/debugger/tree-debugger.js +198 -0
- package/dist/debugger/tree-debugger.js.map +1 -0
- package/dist/decorators/index.d.ts +4 -0
- package/dist/decorators/index.d.ts.map +1 -0
- package/{src/decorators/index.ts → dist/decorators/index.js} +1 -0
- package/dist/decorators/index.js.map +1 -0
- package/dist/decorators/observed-state.d.ts +32 -0
- package/dist/decorators/observed-state.d.ts.map +1 -0
- package/dist/decorators/observed-state.js +79 -0
- package/dist/decorators/observed-state.js.map +1 -0
- package/dist/decorators/step.d.ts +15 -0
- package/dist/decorators/step.d.ts.map +1 -0
- package/dist/decorators/step.js +110 -0
- package/dist/decorators/step.js.map +1 -0
- package/dist/decorators/task.d.ts +50 -0
- package/dist/decorators/task.d.ts.map +1 -0
- package/dist/decorators/task.js +118 -0
- package/dist/decorators/task.js.map +1 -0
- package/dist/examples/index.d.ts +3 -0
- package/dist/examples/index.d.ts.map +1 -0
- package/{src/examples/index.ts → dist/examples/index.js} +1 -0
- package/dist/examples/index.js.map +1 -0
- package/dist/examples/tdd-orchestrator.d.ts +15 -0
- package/dist/examples/tdd-orchestrator.d.ts.map +1 -0
- package/dist/examples/tdd-orchestrator.js +121 -0
- package/dist/examples/tdd-orchestrator.js.map +1 -0
- package/dist/examples/test-cycle-workflow.d.ts +14 -0
- package/dist/examples/test-cycle-workflow.d.ts.map +1 -0
- package/dist/examples/test-cycle-workflow.js +116 -0
- package/dist/examples/test-cycle-workflow.js.map +1 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/dist/reflection/index.d.ts +5 -0
- package/dist/reflection/index.d.ts.map +1 -0
- package/{src/reflection/index.ts → dist/reflection/index.js} +1 -1
- package/dist/reflection/index.js.map +1 -0
- package/dist/reflection/reflection.d.ts +84 -0
- package/dist/reflection/reflection.d.ts.map +1 -0
- package/dist/reflection/reflection.js +329 -0
- package/dist/reflection/reflection.js.map +1 -0
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +11 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/introspection.d.ts +165 -0
- package/dist/tools/introspection.d.ts.map +1 -0
- package/dist/tools/introspection.js +324 -0
- package/dist/tools/introspection.js.map +1 -0
- package/dist/types/agent.d.ts +66 -0
- package/dist/types/agent.d.ts.map +1 -0
- package/dist/types/agent.js +6 -0
- package/dist/types/agent.js.map +1 -0
- package/dist/types/decorators.d.ts +31 -0
- package/dist/types/decorators.d.ts.map +1 -0
- package/dist/types/decorators.js +2 -0
- package/dist/types/decorators.js.map +1 -0
- package/dist/types/error-strategy.d.ts +13 -0
- package/dist/types/error-strategy.d.ts.map +1 -0
- package/dist/types/error-strategy.js +2 -0
- package/dist/types/error-strategy.js.map +1 -0
- package/dist/types/error.d.ts +20 -0
- package/dist/types/error.d.ts.map +1 -0
- package/dist/types/error.js +2 -0
- package/dist/types/error.js.map +1 -0
- package/dist/types/events.d.ts +87 -0
- package/dist/types/events.d.ts.map +1 -0
- package/dist/types/events.js +2 -0
- package/dist/types/events.js.map +1 -0
- package/dist/types/index.d.ts +15 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/logging.d.ts +24 -0
- package/dist/types/logging.d.ts.map +1 -0
- package/dist/types/logging.js +2 -0
- package/dist/types/logging.js.map +1 -0
- package/dist/types/observer.d.ts +18 -0
- package/dist/types/observer.d.ts.map +1 -0
- package/dist/types/observer.js +2 -0
- package/dist/types/observer.js.map +1 -0
- package/dist/types/prompt.d.ts +31 -0
- package/dist/types/prompt.d.ts.map +1 -0
- package/dist/types/prompt.js +6 -0
- package/dist/types/prompt.js.map +1 -0
- package/dist/types/reflection.d.ts +96 -0
- package/dist/types/reflection.d.ts.map +1 -0
- package/dist/types/reflection.js +24 -0
- package/dist/types/reflection.js.map +1 -0
- package/dist/types/sdk-primitives.d.ts +118 -0
- package/dist/types/sdk-primitives.d.ts.map +1 -0
- package/dist/types/sdk-primitives.js +6 -0
- package/dist/types/sdk-primitives.js.map +1 -0
- package/{src/types/snapshot.ts → dist/types/snapshot.d.ts} +5 -5
- package/dist/types/snapshot.d.ts.map +1 -0
- package/dist/types/snapshot.js +2 -0
- package/dist/types/snapshot.js.map +1 -0
- package/dist/types/workflow-context.d.ts +139 -0
- package/dist/types/workflow-context.d.ts.map +1 -0
- package/dist/types/workflow-context.js +8 -0
- package/dist/types/workflow-context.js.map +1 -0
- package/dist/types/workflow.d.ts +30 -0
- package/dist/types/workflow.d.ts.map +1 -0
- package/dist/types/workflow.js +2 -0
- package/dist/types/workflow.js.map +1 -0
- package/dist/utils/id.d.ts +6 -0
- package/dist/utils/id.d.ts.map +1 -0
- package/dist/utils/id.js +12 -0
- package/dist/utils/id.js.map +1 -0
- package/{src/utils/index.ts → dist/utils/index.d.ts} +2 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/observable.d.ts +54 -0
- package/dist/utils/observable.d.ts.map +1 -0
- package/dist/utils/observable.js +82 -0
- package/dist/utils/observable.js.map +1 -0
- package/dist/utils/workflow-error-utils.d.ts +22 -0
- package/dist/utils/workflow-error-utils.d.ts.map +1 -0
- package/dist/utils/workflow-error-utils.js +45 -0
- package/dist/utils/workflow-error-utils.js.map +1 -0
- package/package.json +7 -2
- package/.claude/settings.local.json +0 -9
- package/.claude/system_prompts/task-breakdown.md +0 -100
- package/PRPs/001-hierarchical-workflow-engine.md +0 -2438
- package/PRPs/PRDs/001-hierarchical-workflow-engine.md +0 -543
- package/PRPs/PRDs/002-agent-prompt.md +0 -390
- package/PRPs/PRDs/003-agent-prompt.md +0 -943
- package/PRPs/PRDs/004-agent-prompt.md +0 -1136
- package/PRPs/PRDs/tasks-001.json +0 -492
- package/PRPs/README.md +0 -83
- package/PRPs/templates/prp_base.md +0 -222
- package/docs/agent.md +0 -422
- package/docs/prompt.md +0 -419
- package/docs/workflow.md +0 -600
- package/examples/README.md +0 -244
- package/examples/examples/01-basic-workflow.ts +0 -100
- package/examples/examples/02-decorator-options.ts +0 -217
- package/examples/examples/03-parent-child.ts +0 -241
- package/examples/examples/04-observers-debugger.ts +0 -340
- package/examples/examples/05-error-handling.ts +0 -387
- package/examples/examples/06-concurrent-tasks.ts +0 -352
- package/examples/examples/07-agent-loops.ts +0 -432
- package/examples/examples/08-sdk-features.ts +0 -667
- package/examples/examples/09-reflection.ts +0 -573
- package/examples/examples/10-introspection.ts +0 -550
- package/examples/index.ts +0 -143
- package/examples/utils/helpers.ts +0 -57
- package/llms_full.txt +0 -5890
- package/plan/P1P2/PRP.md +0 -527
- package/plan/P1P2/research/LRU_CACHE_BEST_PRACTICES.md +0 -1929
- package/plan/P1P2/research/LRU_CACHE_CODE_PATTERNS.md +0 -857
- package/plan/P1P2/research/LRU_CACHE_INTEGRATION_GUIDE.md +0 -738
- package/plan/P1P2/research/LRU_CACHE_RESEARCH_INDEX.md +0 -424
- package/plan/P1P2/research/REFLECTION_INDEX.md +0 -291
- package/plan/P1P2/research/REFLECTION_RESEARCH_REPORT.md +0 -1342
- package/plan/P1P2/research/RESEARCH_SUMMARY.md +0 -342
- package/plan/P1P2/research/anthropic-sdk.md +0 -174
- package/plan/P1P2/research/async-local-storage.md +0 -200
- package/plan/P1P2/research/reflection-code-patterns.md +0 -1205
- package/plan/P1P2/research/reflection-decision-matrix.md +0 -421
- package/plan/P1P2/research/reflection-implementation-guide.md +0 -1341
- package/plan/P1P2/research/reflection-integration-guide.md +0 -834
- package/plan/P1P2/research/reflection-patterns.md +0 -1468
- package/plan/P1P2/research/reflection-quick-reference.md +0 -558
- package/plan/P1P2/research/zod-schema.md +0 -152
- package/plan/P3P4/PRP.md +0 -1388
- package/plan/P3P4/research/caching-lru.md +0 -116
- package/plan/P3P4/research/introspection-tools.md +0 -177
- package/plan/P3P4/research/reflection-patterns.md +0 -117
- package/plan/P4P5/PRP.md +0 -1136
- package/plan/P4P5/research/RESEARCH_SUMMARY.md +0 -151
- package/plan/architecture/external_deps.md +0 -358
- package/plan/architecture/system_context.md +0 -242
- package/plan/backlog.json +0 -867
- package/plan/research/INTROSPECTION_RESEARCH_SUMMARY.md +0 -378
- package/plan/research/README-INTROSPECTION.md +0 -352
- package/plan/research/agent-introspection-patterns.md +0 -1085
- package/plan/research/introspection-security-guide.md +0 -928
- package/plan/research/introspection-tool-examples.md +0 -875
- package/scripts/generate-llms-full.ts +0 -206
- package/src/__tests__/integration/agent-workflow.test.ts +0 -256
- package/src/__tests__/integration/tree-mirroring.test.ts +0 -114
- package/src/__tests__/unit/agent.test.ts +0 -169
- package/src/__tests__/unit/cache-key.test.ts +0 -182
- package/src/__tests__/unit/cache.test.ts +0 -172
- package/src/__tests__/unit/context.test.ts +0 -138
- package/src/__tests__/unit/decorators.test.ts +0 -100
- package/src/__tests__/unit/introspection-tools.test.ts +0 -277
- package/src/__tests__/unit/prompt.test.ts +0 -135
- package/src/__tests__/unit/reflection.test.ts +0 -210
- package/src/__tests__/unit/tree-debugger.test.ts +0 -85
- package/src/__tests__/unit/workflow.test.ts +0 -81
- package/src/cache/cache-key.ts +0 -244
- package/src/cache/cache.ts +0 -236
- package/src/core/agent.ts +0 -573
- package/src/core/event-tree.ts +0 -260
- package/src/core/logger.ts +0 -87
- package/src/core/mcp-handler.ts +0 -184
- package/src/core/prompt.ts +0 -150
- package/src/core/workflow-context.ts +0 -349
- package/src/core/workflow.ts +0 -302
- package/src/debugger/tree-debugger.ts +0 -210
- package/src/decorators/observed-state.ts +0 -95
- package/src/decorators/step.ts +0 -139
- package/src/decorators/task.ts +0 -96
- package/src/examples/tdd-orchestrator.ts +0 -65
- package/src/examples/test-cycle-workflow.ts +0 -64
- package/src/index.ts +0 -140
- package/src/reflection/reflection.ts +0 -407
- package/src/tools/index.ts +0 -36
- package/src/tools/introspection.ts +0 -464
- package/src/types/agent.ts +0 -90
- package/src/types/decorators.ts +0 -25
- package/src/types/error-strategy.ts +0 -13
- package/src/types/error.ts +0 -20
- package/src/types/events.ts +0 -74
- package/src/types/index.ts +0 -55
- package/src/types/logging.ts +0 -24
- package/src/types/observer.ts +0 -18
- package/src/types/prompt.ts +0 -40
- package/src/types/reflection.ts +0 -117
- package/src/types/sdk-primitives.ts +0 -128
- package/src/types/workflow-context.ts +0 -163
- package/src/types/workflow.ts +0 -37
- package/src/utils/id.ts +0 -11
- package/src/utils/observable.ts +0 -77
- package/tasks.json +0 -0
- package/tsconfig.json +0 -22
- package/vitest.config.ts +0 -16
|
@@ -1,1341 +0,0 @@
|
|
|
1
|
-
# Reflection and Self-Correction: Implementation Guide
|
|
2
|
-
|
|
3
|
-
This guide provides practical TypeScript/Python code examples for implementing reflection patterns in agent orchestration systems.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## 1. Basic Reflection Loop Implementation
|
|
8
|
-
|
|
9
|
-
### TypeScript: Simple Reflection Pattern
|
|
10
|
-
|
|
11
|
-
```typescript
|
|
12
|
-
interface ReflectionState {
|
|
13
|
-
task: string;
|
|
14
|
-
attempt: number;
|
|
15
|
-
output: string;
|
|
16
|
-
feedback: string;
|
|
17
|
-
quality: number;
|
|
18
|
-
shouldRetry: boolean;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
class ReflectionAgent {
|
|
22
|
-
private maxAttempts = 3;
|
|
23
|
-
private qualityThreshold = 0.8;
|
|
24
|
-
|
|
25
|
-
async executeWithReflection(task: string): Promise<string> {
|
|
26
|
-
let state: ReflectionState = {
|
|
27
|
-
task,
|
|
28
|
-
attempt: 0,
|
|
29
|
-
output: "",
|
|
30
|
-
feedback: "",
|
|
31
|
-
quality: 0,
|
|
32
|
-
shouldRetry: true,
|
|
33
|
-
};
|
|
34
|
-
|
|
35
|
-
while (state.attempt < this.maxAttempts && state.shouldRetry) {
|
|
36
|
-
state.attempt++;
|
|
37
|
-
|
|
38
|
-
// Step 1: Generate output
|
|
39
|
-
state.output = await this.generateOutput(state.task);
|
|
40
|
-
|
|
41
|
-
// Step 2: Evaluate quality
|
|
42
|
-
state.quality = await this.evaluateQuality(state.output, state.task);
|
|
43
|
-
|
|
44
|
-
// Step 3: Check if good enough
|
|
45
|
-
if (state.quality >= this.qualityThreshold) {
|
|
46
|
-
return state.output;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// Step 4: Generate feedback for reflection
|
|
50
|
-
state.feedback = await this.generateFeedback(
|
|
51
|
-
state.task,
|
|
52
|
-
state.output,
|
|
53
|
-
state.quality
|
|
54
|
-
);
|
|
55
|
-
|
|
56
|
-
// Step 5: Use feedback to improve
|
|
57
|
-
state.output = await this.improveBasedOnFeedback(
|
|
58
|
-
state.output,
|
|
59
|
-
state.feedback
|
|
60
|
-
);
|
|
61
|
-
|
|
62
|
-
// Step 6: Decide whether to continue
|
|
63
|
-
state.shouldRetry = this.shouldContinueReflecting(
|
|
64
|
-
state.attempt,
|
|
65
|
-
state.quality
|
|
66
|
-
);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
return state.output;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
private async generateOutput(task: string): Promise<string> {
|
|
73
|
-
// Call LLM to generate initial output
|
|
74
|
-
const response = await this.llm.generate({
|
|
75
|
-
prompt: task,
|
|
76
|
-
maxTokens: 1000,
|
|
77
|
-
});
|
|
78
|
-
return response.text;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
private async evaluateQuality(output: string, task: string): Promise<number> {
|
|
82
|
-
// Use evaluator to score output (0-1)
|
|
83
|
-
const evaluation = await this.llm.generate({
|
|
84
|
-
prompt: `Rate the quality of this response to "${task}": ${output}
|
|
85
|
-
Respond with only a number between 0 and 1.`,
|
|
86
|
-
maxTokens: 10,
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
const score = parseFloat(evaluation.text.trim());
|
|
90
|
-
return Math.max(0, Math.min(1, score)); // Clamp to 0-1
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
private async generateFeedback(
|
|
94
|
-
task: string,
|
|
95
|
-
output: string,
|
|
96
|
-
quality: number
|
|
97
|
-
): Promise<string> {
|
|
98
|
-
if (quality > 0.7) {
|
|
99
|
-
// High quality - minor improvements
|
|
100
|
-
return await this.llm.generate({
|
|
101
|
-
prompt: `The response to "${task}" is good but could be better.
|
|
102
|
-
Current response: ${output}
|
|
103
|
-
|
|
104
|
-
What specific improvements would make this response better?
|
|
105
|
-
Focus on small, actionable improvements.`,
|
|
106
|
-
maxTokens: 500,
|
|
107
|
-
});
|
|
108
|
-
} else {
|
|
109
|
-
// Low quality - major rework
|
|
110
|
-
return await this.llm.generate({
|
|
111
|
-
prompt: `The response to "${task}" needs significant improvement.
|
|
112
|
-
Current response: ${output}
|
|
113
|
-
|
|
114
|
-
What are the main problems with this response?
|
|
115
|
-
What should be completely rewritten?`,
|
|
116
|
-
maxTokens: 500,
|
|
117
|
-
});
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
private async improveBasedOnFeedback(
|
|
122
|
-
output: string,
|
|
123
|
-
feedback: string
|
|
124
|
-
): Promise<string> {
|
|
125
|
-
return await this.llm.generate({
|
|
126
|
-
prompt: `Original output: ${output}
|
|
127
|
-
|
|
128
|
-
Feedback: ${feedback}
|
|
129
|
-
|
|
130
|
-
Please improve the output based on the feedback provided.
|
|
131
|
-
Generate an improved version that addresses all feedback points.`,
|
|
132
|
-
maxTokens: 1500,
|
|
133
|
-
});
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
private shouldContinueReflecting(attempt: number, quality: number): boolean {
|
|
137
|
-
// Never exceed max attempts
|
|
138
|
-
if (attempt >= this.maxAttempts) return false;
|
|
139
|
-
|
|
140
|
-
// Stop if quality is good
|
|
141
|
-
if (quality >= this.qualityThreshold) return false;
|
|
142
|
-
|
|
143
|
-
// Continue if quality is low and we have attempts left
|
|
144
|
-
return true;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
### Python: Evidence-Grounded Reflection (Reflexion Pattern)
|
|
150
|
-
|
|
151
|
-
```python
|
|
152
|
-
from typing import Dict, List, Optional
|
|
153
|
-
from dataclasses import dataclass
|
|
154
|
-
from datetime import datetime
|
|
155
|
-
|
|
156
|
-
@dataclass
|
|
157
|
-
class ReflectionCycle:
|
|
158
|
-
attempt: int
|
|
159
|
-
output: str
|
|
160
|
-
feedback: str
|
|
161
|
-
external_evidence: Dict[str, any]
|
|
162
|
-
quality_score: float
|
|
163
|
-
timestamp: datetime
|
|
164
|
-
|
|
165
|
-
class ReflexionAgent:
|
|
166
|
-
"""Implementation of Reflexion pattern with evidence grounding"""
|
|
167
|
-
|
|
168
|
-
def __init__(self, llm_client, max_attempts: int = 3):
|
|
169
|
-
self.llm = llm_client
|
|
170
|
-
self.max_attempts = max_attempts
|
|
171
|
-
self.memory: List[ReflectionCycle] = []
|
|
172
|
-
|
|
173
|
-
def execute_with_reflexion(self, task: str, tools: Dict) -> str:
|
|
174
|
-
"""Execute task with Reflexion feedback loop"""
|
|
175
|
-
|
|
176
|
-
for attempt in range(1, self.max_attempts + 1):
|
|
177
|
-
# Generate output
|
|
178
|
-
output = self._generate_output(task, attempt)
|
|
179
|
-
|
|
180
|
-
# Get external evidence (tool results, retrieval, etc.)
|
|
181
|
-
evidence = self._gather_evidence(task, output, tools)
|
|
182
|
-
|
|
183
|
-
# Evaluate with external grounding
|
|
184
|
-
feedback = self._generate_grounded_feedback(
|
|
185
|
-
task=task,
|
|
186
|
-
output=output,
|
|
187
|
-
evidence=evidence
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
# Score quality
|
|
191
|
-
quality_score = self._score_with_evidence(output, evidence)
|
|
192
|
-
|
|
193
|
-
# Store in memory
|
|
194
|
-
cycle = ReflectionCycle(
|
|
195
|
-
attempt=attempt,
|
|
196
|
-
output=output,
|
|
197
|
-
feedback=feedback,
|
|
198
|
-
external_evidence=evidence,
|
|
199
|
-
quality_score=quality_score,
|
|
200
|
-
timestamp=datetime.now()
|
|
201
|
-
)
|
|
202
|
-
self.memory.append(cycle)
|
|
203
|
-
|
|
204
|
-
# Check stopping conditions
|
|
205
|
-
if quality_score >= 0.85:
|
|
206
|
-
return output
|
|
207
|
-
|
|
208
|
-
if attempt < self.max_attempts:
|
|
209
|
-
# Use feedback to improve
|
|
210
|
-
output = self._revise_with_feedback(output, feedback, self.memory)
|
|
211
|
-
|
|
212
|
-
return self.memory[-1].output
|
|
213
|
-
|
|
214
|
-
def _generate_output(self, task: str, attempt: int) -> str:
|
|
215
|
-
"""Generate output, incorporating prior attempts if available"""
|
|
216
|
-
|
|
217
|
-
memory_context = ""
|
|
218
|
-
if attempt > 1:
|
|
219
|
-
# Add context from prior attempts
|
|
220
|
-
memory_context = self._format_memory_for_context(self.memory)
|
|
221
|
-
|
|
222
|
-
prompt = f"""Task: {task}
|
|
223
|
-
|
|
224
|
-
{memory_context}
|
|
225
|
-
|
|
226
|
-
Generate your response to this task."""
|
|
227
|
-
|
|
228
|
-
response = self.llm.generate(prompt)
|
|
229
|
-
return response
|
|
230
|
-
|
|
231
|
-
def _gather_evidence(self, task: str, output: str, tools: Dict) -> Dict:
|
|
232
|
-
"""Gather external evidence to ground reflection"""
|
|
233
|
-
|
|
234
|
-
evidence = {}
|
|
235
|
-
|
|
236
|
-
# Run tools to get evidence
|
|
237
|
-
for tool_name, tool_func in tools.items():
|
|
238
|
-
try:
|
|
239
|
-
evidence[tool_name] = tool_func(output)
|
|
240
|
-
except Exception as e:
|
|
241
|
-
evidence[tool_name] = {"error": str(e)}
|
|
242
|
-
|
|
243
|
-
return evidence
|
|
244
|
-
|
|
245
|
-
def _generate_grounded_feedback(
|
|
246
|
-
self,
|
|
247
|
-
task: str,
|
|
248
|
-
output: str,
|
|
249
|
-
evidence: Dict
|
|
250
|
-
) -> str:
|
|
251
|
-
"""Generate feedback grounded in external evidence"""
|
|
252
|
-
|
|
253
|
-
evidence_summary = self._format_evidence(evidence)
|
|
254
|
-
|
|
255
|
-
prompt = f"""Task: {task}
|
|
256
|
-
|
|
257
|
-
Response: {output}
|
|
258
|
-
|
|
259
|
-
External Evidence:
|
|
260
|
-
{evidence_summary}
|
|
261
|
-
|
|
262
|
-
Based on the external evidence, please provide constructive feedback:
|
|
263
|
-
|
|
264
|
-
1. What does the evidence tell us about the quality of this response?
|
|
265
|
-
2. Are there claims in the response that contradict the evidence?
|
|
266
|
-
3. What specific improvements are needed?
|
|
267
|
-
4. Rate your confidence in this feedback (1-10)
|
|
268
|
-
|
|
269
|
-
Format as actionable feedback."""
|
|
270
|
-
|
|
271
|
-
feedback = self.llm.generate(prompt)
|
|
272
|
-
return feedback
|
|
273
|
-
|
|
274
|
-
def _score_with_evidence(self, output: str, evidence: Dict) -> float:
|
|
275
|
-
"""Score output quality using evidence"""
|
|
276
|
-
|
|
277
|
-
# Simple heuristic: check if output aligns with evidence
|
|
278
|
-
alignment_scores = []
|
|
279
|
-
|
|
280
|
-
for tool_name, tool_result in evidence.items():
|
|
281
|
-
if isinstance(tool_result, dict) and "score" in tool_result:
|
|
282
|
-
alignment_scores.append(tool_result["score"])
|
|
283
|
-
|
|
284
|
-
if alignment_scores:
|
|
285
|
-
return sum(alignment_scores) / len(alignment_scores)
|
|
286
|
-
|
|
287
|
-
# Fallback to LLM evaluation
|
|
288
|
-
eval_prompt = f"""Rate this response quality (0-1):
|
|
289
|
-
Response: {output}
|
|
290
|
-
Evidence: {evidence}
|
|
291
|
-
|
|
292
|
-
Respond with only a decimal between 0 and 1."""
|
|
293
|
-
|
|
294
|
-
score_text = self.llm.generate(eval_prompt)
|
|
295
|
-
return float(score_text.strip())
|
|
296
|
-
|
|
297
|
-
def _revise_with_feedback(
|
|
298
|
-
self,
|
|
299
|
-
output: str,
|
|
300
|
-
feedback: str,
|
|
301
|
-
memory: List[ReflectionCycle]
|
|
302
|
-
) -> str:
|
|
303
|
-
"""Revise output using feedback and prior learnings"""
|
|
304
|
-
|
|
305
|
-
lessons_learned = self._extract_lessons(memory)
|
|
306
|
-
|
|
307
|
-
prompt = f"""Original response: {output}
|
|
308
|
-
|
|
309
|
-
Feedback: {feedback}
|
|
310
|
-
|
|
311
|
-
Lessons from prior attempts:
|
|
312
|
-
{lessons_learned}
|
|
313
|
-
|
|
314
|
-
Please revise the response to:
|
|
315
|
-
1. Address all feedback points
|
|
316
|
-
2. Incorporate lessons learned
|
|
317
|
-
3. Maintain strengths from original response
|
|
318
|
-
4. Fix specific issues identified
|
|
319
|
-
|
|
320
|
-
Provide revised response."""
|
|
321
|
-
|
|
322
|
-
revised = self.llm.generate(prompt)
|
|
323
|
-
return revised
|
|
324
|
-
|
|
325
|
-
def _format_memory_for_context(self, memory: List[ReflectionCycle]) -> str:
|
|
326
|
-
"""Format memory for context window inclusion"""
|
|
327
|
-
|
|
328
|
-
if not memory:
|
|
329
|
-
return ""
|
|
330
|
-
|
|
331
|
-
formatted = "Prior attempts:\n"
|
|
332
|
-
for cycle in memory[-2:]: # Include last 2 attempts
|
|
333
|
-
formatted += f"""
|
|
334
|
-
Attempt {cycle.attempt}:
|
|
335
|
-
- Output: {cycle.output[:200]}...
|
|
336
|
-
- Quality: {cycle.quality_score}
|
|
337
|
-
- Key feedback: {cycle.feedback[:200]}...
|
|
338
|
-
"""
|
|
339
|
-
return formatted
|
|
340
|
-
|
|
341
|
-
def _format_evidence(self, evidence: Dict) -> str:
|
|
342
|
-
"""Format evidence for readability"""
|
|
343
|
-
|
|
344
|
-
formatted = ""
|
|
345
|
-
for tool_name, result in evidence.items():
|
|
346
|
-
formatted += f"\n{tool_name}:\n{result}\n"
|
|
347
|
-
return formatted
|
|
348
|
-
|
|
349
|
-
def _extract_lessons(self, memory: List[ReflectionCycle]) -> str:
|
|
350
|
-
"""Extract patterns/lessons from memory"""
|
|
351
|
-
|
|
352
|
-
if len(memory) < 2:
|
|
353
|
-
return "No prior attempts yet."
|
|
354
|
-
|
|
355
|
-
lessons = []
|
|
356
|
-
prev_cycle = memory[-2] if len(memory) > 1 else None
|
|
357
|
-
|
|
358
|
-
if prev_cycle:
|
|
359
|
-
lessons.append(f"Previous attempt had quality score: {prev_cycle.quality_score}")
|
|
360
|
-
lessons.append(f"That feedback was: {prev_cycle.feedback[:100]}...")
|
|
361
|
-
|
|
362
|
-
return "\n".join(lessons)
|
|
363
|
-
```
|
|
364
|
-
|
|
365
|
-
---
|
|
366
|
-
|
|
367
|
-
## 2. Loop Detection and Prevention
|
|
368
|
-
|
|
369
|
-
### TypeScript: Loop Detection System
|
|
370
|
-
|
|
371
|
-
```typescript
|
|
372
|
-
interface OutputHistoryEntry {
|
|
373
|
-
timestamp: number;
|
|
374
|
-
output: string;
|
|
375
|
-
error: string;
|
|
376
|
-
tokenHash: string;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
class LoopDetectionSystem {
|
|
380
|
-
private outputHistory: OutputHistoryEntry[] = [];
|
|
381
|
-
private readonly similarityThreshold = 0.95;
|
|
382
|
-
private readonly maxIdenticalConsecutive = 2;
|
|
383
|
-
|
|
384
|
-
/**
|
|
385
|
-
* Check if current attempt indicates an infinite loop
|
|
386
|
-
*/
|
|
387
|
-
isInfiniteLoop(
|
|
388
|
-
currentOutput: string,
|
|
389
|
-
currentError: string,
|
|
390
|
-
attemptNumber: number
|
|
391
|
-
): boolean {
|
|
392
|
-
const now = Date.now();
|
|
393
|
-
|
|
394
|
-
// Check 1: Identical outputs
|
|
395
|
-
if (this.detectIdenticalOutputLoop(currentOutput)) {
|
|
396
|
-
return true;
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
// Check 2: Repeated errors
|
|
400
|
-
if (this.detectRepeatedErrors(currentError)) {
|
|
401
|
-
return true;
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
// Check 3: Low variance in outputs
|
|
405
|
-
if (this.detectLowVarianceLoop(currentOutput)) {
|
|
406
|
-
return true;
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
// Check 4: Too many attempts in short time
|
|
410
|
-
if (this.detectRapidRepeatedAttempts(now)) {
|
|
411
|
-
return true;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
return false;
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
private detectIdenticalOutputLoop(output: string): boolean {
|
|
418
|
-
const hash = this.hashOutput(output);
|
|
419
|
-
|
|
420
|
-
// Check if last 2 outputs are identical
|
|
421
|
-
const recent = this.outputHistory.slice(-2);
|
|
422
|
-
if (recent.length >= 2) {
|
|
423
|
-
const allIdentical = recent.every((e) => e.tokenHash === hash);
|
|
424
|
-
if (allIdentical) {
|
|
425
|
-
console.warn("Loop detected: Identical outputs");
|
|
426
|
-
return true;
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
return false;
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
private detectRepeatedErrors(error: string): boolean {
|
|
434
|
-
// Check if same error appeared 2+ times
|
|
435
|
-
const recentErrors = this.outputHistory
|
|
436
|
-
.slice(-3)
|
|
437
|
-
.map((e) => e.error)
|
|
438
|
-
.filter((e) => e.length > 0);
|
|
439
|
-
|
|
440
|
-
const errorCounts = new Map<string, number>();
|
|
441
|
-
for (const err of recentErrors) {
|
|
442
|
-
errorCounts.set(err, (errorCounts.get(err) || 0) + 1);
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
const isRepeated = recentErrors.some((e) => errorCounts.get(e)! >= 2);
|
|
446
|
-
if (isRepeated) {
|
|
447
|
-
console.warn("Loop detected: Repeated errors");
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
return isRepeated;
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
private detectLowVarianceLoop(output: string): boolean {
|
|
454
|
-
if (this.outputHistory.length < 3) {
|
|
455
|
-
return false;
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
const recent = this.outputHistory.slice(-3).map((e) => e.output);
|
|
459
|
-
|
|
460
|
-
// Compute pairwise similarities
|
|
461
|
-
const similarities: number[] = [];
|
|
462
|
-
for (let i = 0; i < recent.length - 1; i++) {
|
|
463
|
-
const sim = this.computeSimilarity(recent[i], recent[i + 1]);
|
|
464
|
-
similarities.push(sim);
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
const avgSimilarity = similarities.reduce((a, b) => a + b) / similarities.length;
|
|
468
|
-
|
|
469
|
-
if (avgSimilarity > this.similarityThreshold) {
|
|
470
|
-
console.warn(`Loop detected: Low variance (similarity: ${avgSimilarity})`);
|
|
471
|
-
return true;
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
return false;
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
private detectRapidRepeatedAttempts(now: number): boolean {
|
|
478
|
-
// Check if more than 3 attempts in last 10 seconds
|
|
479
|
-
const recentWindow = 10_000; // 10 seconds
|
|
480
|
-
const recentAttempts = this.outputHistory.filter(
|
|
481
|
-
(e) => now - e.timestamp < recentWindow
|
|
482
|
-
);
|
|
483
|
-
|
|
484
|
-
if (recentAttempts.length > 5) {
|
|
485
|
-
console.warn(
|
|
486
|
-
`Loop detected: Too many rapid attempts (${recentAttempts.length} in 10s)`
|
|
487
|
-
);
|
|
488
|
-
return true;
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
return false;
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
/**
|
|
495
|
-
* Record an output for loop detection tracking
|
|
496
|
-
*/
|
|
497
|
-
recordOutput(output: string, error: string = ""): void {
|
|
498
|
-
this.outputHistory.push({
|
|
499
|
-
timestamp: Date.now(),
|
|
500
|
-
output,
|
|
501
|
-
error,
|
|
502
|
-
tokenHash: this.hashOutput(output),
|
|
503
|
-
});
|
|
504
|
-
|
|
505
|
-
// Keep only last 10 outputs
|
|
506
|
-
if (this.outputHistory.length > 10) {
|
|
507
|
-
this.outputHistory.shift();
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
/**
|
|
512
|
-
* Get recovery suggestion when loop is detected
|
|
513
|
-
*/
|
|
514
|
-
getRecoverySuggestion(attemptNumber: number): string {
|
|
515
|
-
if (this.detectIdenticalOutputLoop(this.outputHistory[-1]?.output || "")) {
|
|
516
|
-
return "Identical outputs detected. Try a different approach or tool.";
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
if (attemptNumber >= 3) {
|
|
520
|
-
return "Maximum attempts reached. Escalate to supervisor or human.";
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
return `Try using a different reflection strategy. Current strategy is looping.`;
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
private computeSimilarity(text1: string, text2: string): number {
|
|
527
|
-
if (!text1 || !text2) return 0;
|
|
528
|
-
|
|
529
|
-
const tokens1 = new Set(text1.split(/\s+/));
|
|
530
|
-
const tokens2 = new Set(text2.split(/\s+/));
|
|
531
|
-
|
|
532
|
-
if (tokens1.size === 0 || tokens2.size === 0) return 0;
|
|
533
|
-
|
|
534
|
-
const intersection = new Set([...tokens1].filter((x) => tokens2.has(x)));
|
|
535
|
-
const union = new Set([...tokens1, ...tokens2]);
|
|
536
|
-
|
|
537
|
-
return intersection.size / union.size;
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
private hashOutput(output: string): string {
|
|
541
|
-
// Simple hash for comparison
|
|
542
|
-
let hash = 0;
|
|
543
|
-
for (let i = 0; i < output.length; i++) {
|
|
544
|
-
const char = output.charCodeAt(i);
|
|
545
|
-
hash = (hash << 5) - hash + char;
|
|
546
|
-
}
|
|
547
|
-
return hash.toString(36);
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
```
|
|
551
|
-
|
|
552
|
-
---
|
|
553
|
-
|
|
554
|
-
## 3. Context Window Management
|
|
555
|
-
|
|
556
|
-
### Python: Token Budget Allocation
|
|
557
|
-
|
|
558
|
-
```python
|
|
559
|
-
from typing import Dict, Optional
|
|
560
|
-
from dataclasses import dataclass
|
|
561
|
-
|
|
562
|
-
@dataclass
|
|
563
|
-
class TokenBudget:
|
|
564
|
-
"""Track and allocate token budget"""
|
|
565
|
-
total_budget: int
|
|
566
|
-
used: int = 0
|
|
567
|
-
reserved_for_final: int = 5000
|
|
568
|
-
|
|
569
|
-
@property
|
|
570
|
-
def remaining(self) -> int:
|
|
571
|
-
return self.total_budget - self.used - self.reserved_for_final
|
|
572
|
-
|
|
573
|
-
@property
|
|
574
|
-
def percentage_used(self) -> float:
|
|
575
|
-
return (self.used / self.total_budget) * 100
|
|
576
|
-
|
|
577
|
-
def allocate_for_reflection_cycle(
|
|
578
|
-
self,
|
|
579
|
-
cycle_number: int,
|
|
580
|
-
max_cycles: int
|
|
581
|
-
) -> Dict[str, int]:
|
|
582
|
-
"""Allocate tokens dynamically based on progress"""
|
|
583
|
-
|
|
584
|
-
available = self.remaining
|
|
585
|
-
|
|
586
|
-
# Earlier cycles get more budget, later cycles get less
|
|
587
|
-
progress_ratio = cycle_number / max_cycles
|
|
588
|
-
budget_multiplier = 1.0 - (progress_ratio * 0.5)
|
|
589
|
-
|
|
590
|
-
action_budget = int(available * budget_multiplier * 0.6)
|
|
591
|
-
reflection_budget = int(available * budget_multiplier * 0.4)
|
|
592
|
-
|
|
593
|
-
return {
|
|
594
|
-
"action": action_budget,
|
|
595
|
-
"reflection": reflection_budget,
|
|
596
|
-
"total": action_budget + reflection_budget,
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
def record_usage(self, tokens: int) -> None:
|
|
600
|
-
"""Record token usage"""
|
|
601
|
-
self.used += tokens
|
|
602
|
-
|
|
603
|
-
def compress_context(self, context: str, target_tokens: int) -> str:
|
|
604
|
-
"""Compress context to fit within token budget"""
|
|
605
|
-
|
|
606
|
-
current_tokens = self._estimate_tokens(context)
|
|
607
|
-
|
|
608
|
-
if current_tokens <= target_tokens:
|
|
609
|
-
return context
|
|
610
|
-
|
|
611
|
-
# Strategy 1: Remove oldest entries
|
|
612
|
-
lines = context.split("\n")
|
|
613
|
-
compressed_lines = lines[-int(len(lines) * target_tokens / current_tokens) :]
|
|
614
|
-
compressed = "\n".join(compressed_lines)
|
|
615
|
-
|
|
616
|
-
if self._estimate_tokens(compressed) <= target_tokens:
|
|
617
|
-
return compressed
|
|
618
|
-
|
|
619
|
-
# Strategy 2: Summarize
|
|
620
|
-
summary_prompt = f"""Please summarize this context in 50 tokens or less:
|
|
621
|
-
|
|
622
|
-
{context}
|
|
623
|
-
|
|
624
|
-
Summary:"""
|
|
625
|
-
|
|
626
|
-
# Call LLM to summarize
|
|
627
|
-
# return self.llm.generate(summary_prompt)
|
|
628
|
-
return context[:target_tokens * 4] # Rough estimate
|
|
629
|
-
|
|
630
|
-
def will_exceed_budget(self, additional_tokens: int) -> bool:
|
|
631
|
-
"""Check if additional tokens would exceed budget"""
|
|
632
|
-
return self.used + additional_tokens > self.total_budget
|
|
633
|
-
|
|
634
|
-
def _estimate_tokens(self, text: str) -> int:
|
|
635
|
-
"""Rough estimation: 1 token ≈ 4 characters"""
|
|
636
|
-
return len(text) // 4
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
class ReflectionWithTokenTracking:
|
|
640
|
-
"""Reflection that respects token budgets"""
|
|
641
|
-
|
|
642
|
-
def __init__(self, llm_client, total_token_budget: int = 100000):
|
|
643
|
-
self.llm = llm_client
|
|
644
|
-
self.budget = TokenBudget(total_token_budget)
|
|
645
|
-
|
|
646
|
-
def execute_with_budget(self, task: str, max_cycles: int = 3) -> str:
|
|
647
|
-
"""Execute task while respecting token budget"""
|
|
648
|
-
|
|
649
|
-
for cycle in range(1, max_cycles + 1):
|
|
650
|
-
# Check if we have budget
|
|
651
|
-
if self.budget.remaining < 5000:
|
|
652
|
-
print(f"Token budget exhausted ({self.budget.percentage_used}% used)")
|
|
653
|
-
break
|
|
654
|
-
|
|
655
|
-
# Allocate budget for this cycle
|
|
656
|
-
cycle_budget = self.budget.allocate_for_reflection_cycle(
|
|
657
|
-
cycle, max_cycles
|
|
658
|
-
)
|
|
659
|
-
|
|
660
|
-
print(f"Cycle {cycle}: {cycle_budget['total']} tokens available")
|
|
661
|
-
|
|
662
|
-
# Generate with budget constraint
|
|
663
|
-
output = self._generate_with_budget(
|
|
664
|
-
task,
|
|
665
|
-
max_tokens=cycle_budget["action"]
|
|
666
|
-
)
|
|
667
|
-
|
|
668
|
-
# Record token usage
|
|
669
|
-
estimated_tokens = len(output) // 4
|
|
670
|
-
self.budget.record_usage(estimated_tokens)
|
|
671
|
-
|
|
672
|
-
# Reflect with budget constraint
|
|
673
|
-
if cycle < max_cycles:
|
|
674
|
-
feedback = self._reflect_with_budget(
|
|
675
|
-
output,
|
|
676
|
-
max_tokens=cycle_budget["reflection"]
|
|
677
|
-
)
|
|
678
|
-
|
|
679
|
-
estimated_feedback_tokens = len(feedback) // 4
|
|
680
|
-
self.budget.record_usage(estimated_feedback_tokens)
|
|
681
|
-
|
|
682
|
-
return output
|
|
683
|
-
|
|
684
|
-
def _generate_with_budget(self, task: str, max_tokens: int) -> str:
|
|
685
|
-
"""Generate with token limit"""
|
|
686
|
-
return self.llm.generate(
|
|
687
|
-
prompt=task,
|
|
688
|
-
max_tokens=max_tokens
|
|
689
|
-
)
|
|
690
|
-
|
|
691
|
-
def _reflect_with_budget(self, output: str, max_tokens: int) -> str:
|
|
692
|
-
"""Reflect with token limit"""
|
|
693
|
-
return self.llm.generate(
|
|
694
|
-
prompt=f"Reflect on: {output}",
|
|
695
|
-
max_tokens=max_tokens
|
|
696
|
-
)
|
|
697
|
-
```
|
|
698
|
-
|
|
699
|
-
---
|
|
700
|
-
|
|
701
|
-
## 4. Security: Input Sanitization
|
|
702
|
-
|
|
703
|
-
### TypeScript: Secure Reflection Execution
|
|
704
|
-
|
|
705
|
-
```typescript
|
|
706
|
-
interface ReflectionSecurityPolicy {
|
|
707
|
-
allowedTools: Set<string>;
|
|
708
|
-
maxReflectionLength: number;
|
|
709
|
-
forbiddenKeywords: string[];
|
|
710
|
-
enableCommandExecution: boolean;
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
class SecureReflectionExecutor {
|
|
714
|
-
private policy: ReflectionSecurityPolicy;
|
|
715
|
-
|
|
716
|
-
constructor(policy: ReflectionSecurityPolicy) {
|
|
717
|
-
this.policy = policy;
|
|
718
|
-
}
|
|
719
|
-
|
|
720
|
-
/**
|
|
721
|
-
* Execute reflection with security checks
|
|
722
|
-
*/
|
|
723
|
-
async executeSecureReflection(
|
|
724
|
-
feedback: string,
|
|
725
|
-
currentOutput: string
|
|
726
|
-
): Promise<string> {
|
|
727
|
-
// Validate feedback
|
|
728
|
-
this.validateReflectionFeedback(feedback);
|
|
729
|
-
|
|
730
|
-
// Sanitize before sending to LLM
|
|
731
|
-
const sanitizedFeedback = this.sanitizeReflectionFeedback(feedback);
|
|
732
|
-
|
|
733
|
-
// Execute safely
|
|
734
|
-
return await this.llm.generate({
|
|
735
|
-
prompt: this.buildSecureReflectionPrompt(
|
|
736
|
-
sanitizedFeedback,
|
|
737
|
-
currentOutput
|
|
738
|
-
),
|
|
739
|
-
maxTokens: 2000,
|
|
740
|
-
});
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
/**
|
|
744
|
-
* Validate reflection feedback doesn't contain attacks
|
|
745
|
-
*/
|
|
746
|
-
private validateReflectionFeedback(feedback: string): void {
|
|
747
|
-
// Check 1: Length limit prevents prompt injection
|
|
748
|
-
if (feedback.length > this.policy.maxReflectionLength) {
|
|
749
|
-
throw new Error(
|
|
750
|
-
`Feedback too long (${feedback.length} > ${this.policy.maxReflectionLength})`
|
|
751
|
-
);
|
|
752
|
-
}
|
|
753
|
-
|
|
754
|
-
// Check 2: Forbidden keywords
|
|
755
|
-
const lowerFeedback = feedback.toLowerCase();
|
|
756
|
-
for (const keyword of this.policy.forbiddenKeywords) {
|
|
757
|
-
if (lowerFeedback.includes(keyword.toLowerCase())) {
|
|
758
|
-
throw new Error(
|
|
759
|
-
`Feedback contains forbidden keyword: "${keyword}"`
|
|
760
|
-
);
|
|
761
|
-
}
|
|
762
|
-
}
|
|
763
|
-
|
|
764
|
-
// Check 3: Tool mentions must be whitelisted
|
|
765
|
-
const toolMentions = this.extractToolNames(feedback);
|
|
766
|
-
for (const tool of toolMentions) {
|
|
767
|
-
if (!this.policy.allowedTools.has(tool)) {
|
|
768
|
-
throw new Error(
|
|
769
|
-
`Feedback references unauthorized tool: "${tool}"`
|
|
770
|
-
);
|
|
771
|
-
}
|
|
772
|
-
}
|
|
773
|
-
|
|
774
|
-
// Check 4: No command execution attempts
|
|
775
|
-
if (!this.policy.enableCommandExecution) {
|
|
776
|
-
const commandPatterns = [
|
|
777
|
-
/execute.*command/i,
|
|
778
|
-
/run.*shell/i,
|
|
779
|
-
/system.*call/i,
|
|
780
|
-
/subprocess/i,
|
|
781
|
-
];
|
|
782
|
-
|
|
783
|
-
for (const pattern of commandPatterns) {
|
|
784
|
-
if (pattern.test(feedback)) {
|
|
785
|
-
throw new Error("Feedback attempts to trigger command execution");
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
private sanitizeReflectionFeedback(feedback: string): string {
|
|
792
|
-
// Truncate to max length
|
|
793
|
-
let sanitized = feedback.substring(0, this.policy.maxReflectionLength);
|
|
794
|
-
|
|
795
|
-
// Remove suspicious patterns
|
|
796
|
-
const suspiciousPatterns = [
|
|
797
|
-
/<script[^>]*>.*?<\/script>/gi, // Scripts
|
|
798
|
-
/javascript:/gi, // JS protocol
|
|
799
|
-
/on\w+\s*=/gi, // Event handlers
|
|
800
|
-
];
|
|
801
|
-
|
|
802
|
-
for (const pattern of suspiciousPatterns) {
|
|
803
|
-
sanitized = sanitized.replace(pattern, "[REMOVED]");
|
|
804
|
-
}
|
|
805
|
-
|
|
806
|
-
return sanitized;
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
private buildSecureReflectionPrompt(
|
|
810
|
-
feedback: string,
|
|
811
|
-
currentOutput: string
|
|
812
|
-
): string {
|
|
813
|
-
return `You are helping to improve a response.
|
|
814
|
-
|
|
815
|
-
Current response:
|
|
816
|
-
${currentOutput}
|
|
817
|
-
|
|
818
|
-
Feedback (trusted source):
|
|
819
|
-
${feedback}
|
|
820
|
-
|
|
821
|
-
Based on this feedback, improve the response. Only use allowed tools: ${Array.from(this.policy.allowedTools).join(", ")}
|
|
822
|
-
|
|
823
|
-
Do not execute any commands or access credentials.
|
|
824
|
-
|
|
825
|
-
Improved response:`;
|
|
826
|
-
}
|
|
827
|
-
|
|
828
|
-
private extractToolNames(text: string): Set<string> {
|
|
829
|
-
const toolPattern = /\b(tool|use)[:_](\w+)\b/gi;
|
|
830
|
-
const tools = new Set<string>();
|
|
831
|
-
|
|
832
|
-
let match;
|
|
833
|
-
while ((match = toolPattern.exec(text)) !== null) {
|
|
834
|
-
tools.add(match[2].toLowerCase());
|
|
835
|
-
}
|
|
836
|
-
|
|
837
|
-
return tools;
|
|
838
|
-
}
|
|
839
|
-
}
|
|
840
|
-
|
|
841
|
-
// Usage:
|
|
842
|
-
const policy: ReflectionSecurityPolicy = {
|
|
843
|
-
allowedTools: new Set(["search", "compute", "validate"]),
|
|
844
|
-
maxReflectionLength: 1000,
|
|
845
|
-
forbiddenKeywords: [
|
|
846
|
-
"api_key",
|
|
847
|
-
"password",
|
|
848
|
-
"token",
|
|
849
|
-
"secret",
|
|
850
|
-
"execute",
|
|
851
|
-
"system",
|
|
852
|
-
],
|
|
853
|
-
enableCommandExecution: false,
|
|
854
|
-
};
|
|
855
|
-
|
|
856
|
-
const executor = new SecureReflectionExecutor(policy);
|
|
857
|
-
|
|
858
|
-
try {
|
|
859
|
-
const improved = await executor.executeSecureReflection(
|
|
860
|
-
userProvidedFeedback,
|
|
861
|
-
currentOutput
|
|
862
|
-
);
|
|
863
|
-
} catch (e) {
|
|
864
|
-
console.error("Security validation failed:", e.message);
|
|
865
|
-
}
|
|
866
|
-
```
|
|
867
|
-
|
|
868
|
-
---
|
|
869
|
-
|
|
870
|
-
## 5. Multi-Level Reflection Framework
|
|
871
|
-
|
|
872
|
-
### Python: Hierarchical Reflection
|
|
873
|
-
|
|
874
|
-
```python
|
|
875
|
-
from typing import List, Dict, Callable, Any
|
|
876
|
-
from enum import Enum
|
|
877
|
-
from dataclasses import dataclass
|
|
878
|
-
|
|
879
|
-
class ReflectionLevel(Enum):
|
|
880
|
-
"""Levels of reflection in hierarchical system"""
|
|
881
|
-
PROMPT_LEVEL = "prompt" # Individual LLM refinement
|
|
882
|
-
AGENT_LEVEL = "agent" # Agent evaluates its actions
|
|
883
|
-
ORCHESTRATION_LEVEL = "orch" # Parent reviews agent work
|
|
884
|
-
WORKFLOW_LEVEL = "workflow" # Overall workflow assessment
|
|
885
|
-
|
|
886
|
-
@dataclass
|
|
887
|
-
class ReflectionRequest:
|
|
888
|
-
level: ReflectionLevel
|
|
889
|
-
subject: str
|
|
890
|
-
context: Dict[str, Any]
|
|
891
|
-
evaluator: Callable
|
|
892
|
-
max_cycles: int = 3
|
|
893
|
-
|
|
894
|
-
class HierarchicalReflectionFramework:
|
|
895
|
-
"""Multi-level reflection system"""
|
|
896
|
-
|
|
897
|
-
def __init__(self, llm_client):
|
|
898
|
-
self.llm = llm_client
|
|
899
|
-
self.reflection_handlers = {
|
|
900
|
-
ReflectionLevel.PROMPT_LEVEL: self._handle_prompt_reflection,
|
|
901
|
-
ReflectionLevel.AGENT_LEVEL: self._handle_agent_reflection,
|
|
902
|
-
ReflectionLevel.ORCHESTRATION_LEVEL: self._handle_orchestration_reflection,
|
|
903
|
-
ReflectionLevel.WORKFLOW_LEVEL: self._handle_workflow_reflection,
|
|
904
|
-
}
|
|
905
|
-
|
|
906
|
-
async def reflect(self, request: ReflectionRequest) -> Dict[str, Any]:
|
|
907
|
-
"""Execute reflection at appropriate level"""
|
|
908
|
-
|
|
909
|
-
handler = self.reflection_handlers.get(request.level)
|
|
910
|
-
if not handler:
|
|
911
|
-
raise ValueError(f"Unknown reflection level: {request.level}")
|
|
912
|
-
|
|
913
|
-
return await handler(request)
|
|
914
|
-
|
|
915
|
-
async def _handle_prompt_reflection(
|
|
916
|
-
self, request: ReflectionRequest
|
|
917
|
-
) -> Dict[str, Any]:
|
|
918
|
-
"""Reflection at prompt level: Improve individual response"""
|
|
919
|
-
|
|
920
|
-
output = request.subject
|
|
921
|
-
max_cycles = request.max_cycles
|
|
922
|
-
|
|
923
|
-
for cycle in range(max_cycles):
|
|
924
|
-
# Evaluate current output
|
|
925
|
-
quality, issues = await request.evaluator(output)
|
|
926
|
-
|
|
927
|
-
if quality > 0.8:
|
|
928
|
-
return {
|
|
929
|
-
"level": "prompt",
|
|
930
|
-
"final_output": output,
|
|
931
|
-
"quality": quality,
|
|
932
|
-
"cycles": cycle + 1,
|
|
933
|
-
}
|
|
934
|
-
|
|
935
|
-
# Generate improvement suggestions
|
|
936
|
-
prompt = f"""Please improve this response:
|
|
937
|
-
|
|
938
|
-
Current response:
|
|
939
|
-
{output}
|
|
940
|
-
|
|
941
|
-
Issues identified:
|
|
942
|
-
{issues}
|
|
943
|
-
|
|
944
|
-
Provide an improved version that addresses all identified issues."""
|
|
945
|
-
|
|
946
|
-
output = await self.llm.generate(prompt)
|
|
947
|
-
|
|
948
|
-
return {
|
|
949
|
-
"level": "prompt",
|
|
950
|
-
"final_output": output,
|
|
951
|
-
"quality": quality,
|
|
952
|
-
"cycles": max_cycles,
|
|
953
|
-
}
|
|
954
|
-
|
|
955
|
-
async def _handle_agent_reflection(
|
|
956
|
-
self, request: ReflectionRequest
|
|
957
|
-
) -> Dict[str, Any]:
|
|
958
|
-
"""Reflection at agent level: Evaluate actions and decisions"""
|
|
959
|
-
|
|
960
|
-
actions = request.subject
|
|
961
|
-
context = request.context
|
|
962
|
-
|
|
963
|
-
# Evaluate action sequence
|
|
964
|
-
evaluation = await request.evaluator(actions)
|
|
965
|
-
|
|
966
|
-
prompt = f"""Review this action sequence:
|
|
967
|
-
|
|
968
|
-
Actions: {actions}
|
|
969
|
-
Context: {context}
|
|
970
|
-
Evaluation: {evaluation}
|
|
971
|
-
|
|
972
|
-
1. Did these actions achieve the goal?
|
|
973
|
-
2. What alternative actions would have been better?
|
|
974
|
-
3. What did you learn from this attempt?
|
|
975
|
-
4. What should the next attempt focus on?"""
|
|
976
|
-
|
|
977
|
-
reflection = await self.llm.generate(prompt)
|
|
978
|
-
|
|
979
|
-
return {
|
|
980
|
-
"level": "agent",
|
|
981
|
-
"actions": actions,
|
|
982
|
-
"evaluation": evaluation,
|
|
983
|
-
"reflection": reflection,
|
|
984
|
-
"recommendation": self._extract_recommendation(reflection),
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
async def _handle_orchestration_reflection(
|
|
988
|
-
self, request: ReflectionRequest
|
|
989
|
-
) -> Dict[str, Any]:
|
|
990
|
-
"""Reflection at orchestration level: Review agent work"""
|
|
991
|
-
|
|
992
|
-
agent_output = request.subject
|
|
993
|
-
parent_context = request.context
|
|
994
|
-
|
|
995
|
-
# Manager reviews
|
|
996
|
-
prompt = f"""As a manager, review this agent's work:
|
|
997
|
-
|
|
998
|
-
Output: {agent_output}
|
|
999
|
-
|
|
1000
|
-
Parent goals: {parent_context.get('goals')}
|
|
1001
|
-
Quality requirements: {parent_context.get('requirements')}
|
|
1002
|
-
|
|
1003
|
-
1. Did the agent meet requirements?
|
|
1004
|
-
2. Is the quality acceptable?
|
|
1005
|
-
3. Should we request revision?
|
|
1006
|
-
4. Should we reassign to different agent?
|
|
1007
|
-
5. What feedback should we give?"""
|
|
1008
|
-
|
|
1009
|
-
review = await self.llm.generate(prompt)
|
|
1010
|
-
|
|
1011
|
-
decision = self._parse_manager_decision(review)
|
|
1012
|
-
|
|
1013
|
-
return {
|
|
1014
|
-
"level": "orchestration",
|
|
1015
|
-
"agent_output": agent_output,
|
|
1016
|
-
"manager_review": review,
|
|
1017
|
-
"decision": decision, # "accept", "revise", "reassign"
|
|
1018
|
-
"feedback": self._extract_feedback(review),
|
|
1019
|
-
}
|
|
1020
|
-
|
|
1021
|
-
async def _handle_workflow_reflection(
|
|
1022
|
-
self, request: ReflectionRequest
|
|
1023
|
-
) -> Dict[str, Any]:
|
|
1024
|
-
"""Reflection at workflow level: Assess overall progress"""
|
|
1025
|
-
|
|
1026
|
-
workflow_state = request.subject
|
|
1027
|
-
workflow_context = request.context
|
|
1028
|
-
|
|
1029
|
-
prompt = f"""Review overall workflow progress:
|
|
1030
|
-
|
|
1031
|
-
Current state:
|
|
1032
|
-
{workflow_state}
|
|
1033
|
-
|
|
1034
|
-
Context:
|
|
1035
|
-
{workflow_context}
|
|
1036
|
-
|
|
1037
|
-
Assessment:
|
|
1038
|
-
1. Are we on track to complete the workflow?
|
|
1039
|
-
2. What stages are complete?
|
|
1040
|
-
3. What stages are blocked?
|
|
1041
|
-
4. Are there any critical issues?
|
|
1042
|
-
5. Should we pivot or continue current approach?"""
|
|
1043
|
-
|
|
1044
|
-
assessment = await self.llm.generate(prompt)
|
|
1045
|
-
|
|
1046
|
-
return {
|
|
1047
|
-
"level": "workflow",
|
|
1048
|
-
"state": workflow_state,
|
|
1049
|
-
"assessment": assessment,
|
|
1050
|
-
"status": self._extract_workflow_status(assessment),
|
|
1051
|
-
"recommendations": self._extract_recommendations(assessment),
|
|
1052
|
-
}
|
|
1053
|
-
|
|
1054
|
-
def _extract_recommendation(self, reflection: str) -> str:
|
|
1055
|
-
"""Extract action recommendation from reflection"""
|
|
1056
|
-
# Simple heuristic - in production, use more sophisticated parsing
|
|
1057
|
-
if "try" in reflection.lower():
|
|
1058
|
-
return "retry_with_changes"
|
|
1059
|
-
if "different" in reflection.lower():
|
|
1060
|
-
return "try_different_approach"
|
|
1061
|
-
return "continue_current"
|
|
1062
|
-
|
|
1063
|
-
def _parse_manager_decision(self, review: str) -> str:
|
|
1064
|
-
"""Parse manager's decision on agent work"""
|
|
1065
|
-
review_lower = review.lower()
|
|
1066
|
-
if "revise" in review_lower or "redo" in review_lower:
|
|
1067
|
-
return "revise"
|
|
1068
|
-
if "reassign" in review_lower or "different agent" in review_lower:
|
|
1069
|
-
return "reassign"
|
|
1070
|
-
if "accept" in review_lower or "good" in review_lower:
|
|
1071
|
-
return "accept"
|
|
1072
|
-
return "undecided"
|
|
1073
|
-
|
|
1074
|
-
def _extract_feedback(self, text: str) -> str:
|
|
1075
|
-
"""Extract feedback from manager review"""
|
|
1076
|
-
lines = text.split("\n")
|
|
1077
|
-
feedback_lines = [l for l in lines if l.strip()]
|
|
1078
|
-
return "\n".join(feedback_lines[:3])
|
|
1079
|
-
|
|
1080
|
-
def _extract_workflow_status(self, assessment: str) -> str:
|
|
1081
|
-
"""Extract overall workflow status"""
|
|
1082
|
-
if "blocked" in assessment.lower():
|
|
1083
|
-
return "blocked"
|
|
1084
|
-
if "on track" in assessment.lower():
|
|
1085
|
-
return "on_track"
|
|
1086
|
-
if "issue" in assessment.lower():
|
|
1087
|
-
return "issues"
|
|
1088
|
-
return "unknown"
|
|
1089
|
-
|
|
1090
|
-
def _extract_recommendations(self, assessment: str) -> List[str]:
|
|
1091
|
-
"""Extract action recommendations from assessment"""
|
|
1092
|
-
# In production, use more sophisticated extraction
|
|
1093
|
-
recommendations = []
|
|
1094
|
-
if "pivot" in assessment.lower():
|
|
1095
|
-
recommendations.append("Consider pivoting strategy")
|
|
1096
|
-
if "urgent" in assessment.lower():
|
|
1097
|
-
recommendations.append("Address urgent issues immediately")
|
|
1098
|
-
return recommendations
|
|
1099
|
-
```
|
|
1100
|
-
|
|
1101
|
-
---
|
|
1102
|
-
|
|
1103
|
-
## 6. Monitoring and Metrics
|
|
1104
|
-
|
|
1105
|
-
### TypeScript: Reflection Metrics Collection
|
|
1106
|
-
|
|
1107
|
-
```typescript
|
|
1108
|
-
interface ReflectionMetrics {
|
|
1109
|
-
taskId: string;
|
|
1110
|
-
attemptNumber: number;
|
|
1111
|
-
reflectionApproach: string;
|
|
1112
|
-
qualityBefore: number;
|
|
1113
|
-
qualityAfter: number;
|
|
1114
|
-
tokensUsed: number;
|
|
1115
|
-
wallClockMs: number;
|
|
1116
|
-
errorsIdentified: string[];
|
|
1117
|
-
success: boolean;
|
|
1118
|
-
timestamp: Date;
|
|
1119
|
-
}
|
|
1120
|
-
|
|
1121
|
-
class ReflectionMetricsCollector {
|
|
1122
|
-
private metrics: ReflectionMetrics[] = [];
|
|
1123
|
-
private readonly storageInterval = 10; // Store after every 10 metrics
|
|
1124
|
-
|
|
1125
|
-
recordReflectionCycle(
|
|
1126
|
-
taskId: string,
|
|
1127
|
-
attempt: number,
|
|
1128
|
-
approach: string,
|
|
1129
|
-
qualityBefore: number,
|
|
1130
|
-
qualityAfter: number,
|
|
1131
|
-
tokensUsed: number,
|
|
1132
|
-
wallClockMs: number,
|
|
1133
|
-
errors: string[],
|
|
1134
|
-
success: boolean
|
|
1135
|
-
): void {
|
|
1136
|
-
const metric: ReflectionMetrics = {
|
|
1137
|
-
taskId,
|
|
1138
|
-
attemptNumber: attempt,
|
|
1139
|
-
reflectionApproach: approach,
|
|
1140
|
-
qualityBefore,
|
|
1141
|
-
qualityAfter,
|
|
1142
|
-
tokensUsed,
|
|
1143
|
-
wallClockMs,
|
|
1144
|
-
errorsIdentified: errors,
|
|
1145
|
-
success,
|
|
1146
|
-
timestamp: new Date(),
|
|
1147
|
-
};
|
|
1148
|
-
|
|
1149
|
-
this.metrics.push(metric);
|
|
1150
|
-
|
|
1151
|
-
if (this.metrics.length % this.storageInterval === 0) {
|
|
1152
|
-
this.persistMetrics();
|
|
1153
|
-
}
|
|
1154
|
-
}
|
|
1155
|
-
|
|
1156
|
-
getMetricsSummary(): {
|
|
1157
|
-
totalTasks: number;
|
|
1158
|
-
successRate: number;
|
|
1159
|
-
avgQualityImprovement: number;
|
|
1160
|
-
avgTokensPerTask: number;
|
|
1161
|
-
avgAttemptsPerTask: number;
|
|
1162
|
-
} {
|
|
1163
|
-
if (this.metrics.length === 0) {
|
|
1164
|
-
return {
|
|
1165
|
-
totalTasks: 0,
|
|
1166
|
-
successRate: 0,
|
|
1167
|
-
avgQualityImprovement: 0,
|
|
1168
|
-
avgTokensPerTask: 0,
|
|
1169
|
-
avgAttemptsPerTask: 0,
|
|
1170
|
-
};
|
|
1171
|
-
}
|
|
1172
|
-
|
|
1173
|
-
const uniqueTasks = new Set(this.metrics.map((m) => m.taskId)).size;
|
|
1174
|
-
const successCount = this.metrics.filter((m) => m.success).length;
|
|
1175
|
-
const totalImprovement = this.metrics.reduce(
|
|
1176
|
-
(sum, m) => sum + (m.qualityAfter - m.qualityBefore),
|
|
1177
|
-
0
|
|
1178
|
-
);
|
|
1179
|
-
const totalTokens = this.metrics.reduce((sum, m) => sum + m.tokensUsed, 0);
|
|
1180
|
-
const totalAttempts = this.metrics.length;
|
|
1181
|
-
|
|
1182
|
-
return {
|
|
1183
|
-
totalTasks: uniqueTasks,
|
|
1184
|
-
successRate: successCount / this.metrics.length,
|
|
1185
|
-
avgQualityImprovement: totalImprovement / this.metrics.length,
|
|
1186
|
-
avgTokensPerTask: totalTokens / uniqueTasks,
|
|
1187
|
-
avgAttemptsPerTask: totalAttempts / uniqueTasks,
|
|
1188
|
-
};
|
|
1189
|
-
}
|
|
1190
|
-
|
|
1191
|
-
getApproachComparison(): Map<string, any> {
|
|
1192
|
-
const approaches = new Map<
|
|
1193
|
-
string,
|
|
1194
|
-
{ count: number; avgImprovement: number; successRate: number }
|
|
1195
|
-
>();
|
|
1196
|
-
|
|
1197
|
-
for (const metric of this.metrics) {
|
|
1198
|
-
if (!approaches.has(metric.reflectionApproach)) {
|
|
1199
|
-
approaches.set(metric.reflectionApproach, {
|
|
1200
|
-
count: 0,
|
|
1201
|
-
totalImprovement: 0,
|
|
1202
|
-
successCount: 0,
|
|
1203
|
-
});
|
|
1204
|
-
}
|
|
1205
|
-
|
|
1206
|
-
const stats = approaches.get(metric.reflectionApproach)!;
|
|
1207
|
-
stats.count += 1;
|
|
1208
|
-
stats.totalImprovement += metric.qualityAfter - metric.qualityBefore;
|
|
1209
|
-
if (metric.success) stats.successCount += 1;
|
|
1210
|
-
}
|
|
1211
|
-
|
|
1212
|
-
// Convert to comparison format
|
|
1213
|
-
const comparison = new Map<string, any>();
|
|
1214
|
-
for (const [approach, stats] of approaches) {
|
|
1215
|
-
comparison.set(approach, {
|
|
1216
|
-
count: stats.count,
|
|
1217
|
-
avgImprovement: stats.totalImprovement / stats.count,
|
|
1218
|
-
successRate: stats.successCount / stats.count,
|
|
1219
|
-
});
|
|
1220
|
-
}
|
|
1221
|
-
|
|
1222
|
-
return comparison;
|
|
1223
|
-
}
|
|
1224
|
-
|
|
1225
|
-
private persistMetrics(): void {
|
|
1226
|
-
// Save metrics to storage (database, file, analytics service)
|
|
1227
|
-
const metricsToSave = this.metrics.splice(0, this.storageInterval);
|
|
1228
|
-
console.log(
|
|
1229
|
-
`Persisting ${metricsToSave.length} metrics to storage...`
|
|
1230
|
-
);
|
|
1231
|
-
// this.storage.saveMetrics(metricsToSave);
|
|
1232
|
-
}
|
|
1233
|
-
}
|
|
1234
|
-
```
|
|
1235
|
-
|
|
1236
|
-
---
|
|
1237
|
-
|
|
1238
|
-
## 7. Orchestration Integration Example
|
|
1239
|
-
|
|
1240
|
-
### TypeScript: Reflection in Agent Orchestration
|
|
1241
|
-
|
|
1242
|
-
```typescript
|
|
1243
|
-
interface AgentTask {
|
|
1244
|
-
id: string;
|
|
1245
|
-
description: string;
|
|
1246
|
-
requiredQuality: number;
|
|
1247
|
-
maxAttempts: number;
|
|
1248
|
-
}
|
|
1249
|
-
|
|
1250
|
-
class OrchestratedAgentWithReflection {
|
|
1251
|
-
private reflectionLoop: ReflectionAgent;
|
|
1252
|
-
private loopDetection: LoopDetectionSystem;
|
|
1253
|
-
private metrics: ReflectionMetricsCollector;
|
|
1254
|
-
|
|
1255
|
-
async executeTask(task: AgentTask): Promise<{
|
|
1256
|
-
output: string;
|
|
1257
|
-
quality: number;
|
|
1258
|
-
attempts: number;
|
|
1259
|
-
}> {
|
|
1260
|
-
console.log(`[Task ${task.id}] Starting with reflection loop`);
|
|
1261
|
-
|
|
1262
|
-
let output = "";
|
|
1263
|
-
let quality = 0;
|
|
1264
|
-
let attempt = 0;
|
|
1265
|
-
|
|
1266
|
-
for (attempt = 1; attempt <= task.maxAttempts; attempt++) {
|
|
1267
|
-
// Step 1: Generate output
|
|
1268
|
-
const startTime = Date.now();
|
|
1269
|
-
output = await this.generateOutput(task);
|
|
1270
|
-
|
|
1271
|
-
// Step 2: Detect loops
|
|
1272
|
-
if (this.loopDetection.isInfiniteLoop(output, "", attempt)) {
|
|
1273
|
-
console.error(`[Task ${task.id}] Loop detected at attempt ${attempt}`);
|
|
1274
|
-
console.log(this.loopDetection.getRecoverySuggestion(attempt));
|
|
1275
|
-
break;
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
|
-
// Step 3: Evaluate quality
|
|
1279
|
-
quality = await this.evaluateQuality(output, task);
|
|
1280
|
-
|
|
1281
|
-
const wallClockMs = Date.now() - startTime;
|
|
1282
|
-
|
|
1283
|
-
// Step 4: Record metrics
|
|
1284
|
-
this.metrics.recordReflectionCycle(
|
|
1285
|
-
task.id,
|
|
1286
|
-
attempt,
|
|
1287
|
-
"evidence_grounded",
|
|
1288
|
-
quality,
|
|
1289
|
-
quality,
|
|
1290
|
-
Math.floor(output.length / 4), // Rough token count
|
|
1291
|
-
wallClockMs,
|
|
1292
|
-
[],
|
|
1293
|
-
quality >= task.requiredQuality
|
|
1294
|
-
);
|
|
1295
|
-
|
|
1296
|
-
// Step 5: Check if done
|
|
1297
|
-
if (quality >= task.requiredQuality) {
|
|
1298
|
-
console.log(`[Task ${task.id}] Met quality threshold on attempt ${attempt}`);
|
|
1299
|
-
break;
|
|
1300
|
-
}
|
|
1301
|
-
|
|
1302
|
-
// Step 6: Reflect and improve
|
|
1303
|
-
if (attempt < task.maxAttempts) {
|
|
1304
|
-
console.log(`[Task ${task.id}] Reflecting to improve...`);
|
|
1305
|
-
output = await this.reflectionLoop.executeWithReflection(
|
|
1306
|
-
task.description
|
|
1307
|
-
);
|
|
1308
|
-
}
|
|
1309
|
-
|
|
1310
|
-
this.loopDetection.recordOutput(output, "");
|
|
1311
|
-
}
|
|
1312
|
-
|
|
1313
|
-
console.log(
|
|
1314
|
-
`[Task ${task.id}] Complete: Quality=${quality}, Attempts=${attempt}`
|
|
1315
|
-
);
|
|
1316
|
-
|
|
1317
|
-
return { output, quality, attempts: attempt };
|
|
1318
|
-
}
|
|
1319
|
-
|
|
1320
|
-
// ... implementation details ...
|
|
1321
|
-
}
|
|
1322
|
-
```
|
|
1323
|
-
|
|
1324
|
-
---
|
|
1325
|
-
|
|
1326
|
-
## Conclusion
|
|
1327
|
-
|
|
1328
|
-
These implementation patterns provide:
|
|
1329
|
-
|
|
1330
|
-
1. **Basic Reflection**: Simple generate→reflect→improve cycles
|
|
1331
|
-
2. **Advanced Reflection**: Evidence-grounded feedback with external tools
|
|
1332
|
-
3. **Safety**: Loop detection, token budgeting, security validation
|
|
1333
|
-
4. **Observability**: Metrics collection and performance monitoring
|
|
1334
|
-
5. **Orchestration**: Multi-level reflection in hierarchical systems
|
|
1335
|
-
|
|
1336
|
-
All patterns are designed to be:
|
|
1337
|
-
- **Composable**: Mix and match as needed
|
|
1338
|
-
- **Observable**: Integrate with monitoring systems
|
|
1339
|
-
- **Secure**: Include security checks and validation
|
|
1340
|
-
- **Efficient**: Respect token budgets and time constraints
|
|
1341
|
-
|