@stackmemoryai/stackmemory 0.3.16 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/README.md +48 -2
  2. package/dist/cli/commands/skills.js +15 -2
  3. package/dist/cli/commands/skills.js.map +2 -2
  4. package/dist/cli/index.js +113 -834
  5. package/dist/cli/index.js.map +3 -3
  6. package/dist/core/context/dual-stack-manager.js +1 -1
  7. package/dist/core/context/dual-stack-manager.js.map +1 -1
  8. package/dist/core/context/frame-manager.js +3 -0
  9. package/dist/core/context/frame-manager.js.map +2 -2
  10. package/dist/integrations/claude-code/subagent-client.js +106 -3
  11. package/dist/integrations/claude-code/subagent-client.js.map +2 -2
  12. package/dist/servers/railway/config.js +51 -0
  13. package/dist/servers/railway/config.js.map +7 -0
  14. package/dist/servers/railway/index-enhanced.js +156 -0
  15. package/dist/servers/railway/index-enhanced.js.map +7 -0
  16. package/dist/servers/railway/minimal.js +48 -3
  17. package/dist/servers/railway/minimal.js.map +2 -2
  18. package/dist/servers/railway/storage-test.js +455 -0
  19. package/dist/servers/railway/storage-test.js.map +7 -0
  20. package/dist/skills/claude-skills.js +13 -12
  21. package/dist/skills/claude-skills.js.map +2 -2
  22. package/dist/skills/recursive-agent-orchestrator.js +27 -18
  23. package/dist/skills/recursive-agent-orchestrator.js.map +2 -2
  24. package/dist/skills/unified-rlm-orchestrator.js.map +2 -2
  25. package/package.json +6 -18
  26. package/scripts/README-TESTING.md +186 -0
  27. package/scripts/analyze-cli-security.js +288 -0
  28. package/scripts/archive/add-phase-tasks-to-linear.js +163 -0
  29. package/scripts/archive/analyze-linear-duplicates.js +214 -0
  30. package/scripts/archive/analyze-remaining-duplicates.js +230 -0
  31. package/scripts/archive/analyze-sta-duplicates.js +292 -0
  32. package/scripts/archive/analyze-sta-graphql.js +399 -0
  33. package/scripts/archive/cancel-duplicate-tasks.ts +246 -0
  34. package/scripts/archive/check-all-duplicates.ts +419 -0
  35. package/scripts/archive/clean-duplicate-tasks.js +114 -0
  36. package/scripts/archive/cleanup-duplicate-tasks.ts +286 -0
  37. package/scripts/archive/create-phase-tasks.js +387 -0
  38. package/scripts/archive/delete-linear-duplicates.js +182 -0
  39. package/scripts/archive/delete-remaining-duplicates.js +158 -0
  40. package/scripts/archive/delete-sta-duplicates.js +201 -0
  41. package/scripts/archive/delete-sta-oauth.js +201 -0
  42. package/scripts/archive/export-sta-tasks.js +62 -0
  43. package/scripts/archive/install-auto-sync.js +266 -0
  44. package/scripts/archive/install-chromadb-hooks.sh +133 -0
  45. package/scripts/archive/install-enhanced-clear-hooks.sh +431 -0
  46. package/scripts/archive/install-post-task-hooks.sh +289 -0
  47. package/scripts/archive/install-stackmemory-hooks.sh +420 -0
  48. package/scripts/archive/merge-linear-duplicates-safe.ts +362 -0
  49. package/scripts/archive/merge-linear-duplicates.ts +180 -0
  50. package/scripts/archive/remove-sta-tasks.js +70 -0
  51. package/scripts/archive/setup-background-sync.sh +168 -0
  52. package/scripts/archive/setup-claude-auto-triggers.sh +181 -0
  53. package/scripts/archive/setup-claude-autostart.sh +305 -0
  54. package/scripts/archive/setup-git-hooks.sh +25 -0
  55. package/scripts/archive/setup-linear-oauth.sh +46 -0
  56. package/scripts/archive/setup-mcp.sh +113 -0
  57. package/scripts/archive/setup-railway-deployment.sh +81 -0
  58. package/scripts/auto-handoff.sh +262 -0
  59. package/scripts/background-sync-manager.js +416 -0
  60. package/scripts/benchmark-performance.ts +57 -0
  61. package/scripts/check-redis.ts +48 -0
  62. package/scripts/chromadb-auto-loader.sh +128 -0
  63. package/scripts/chromadb-context-loader.js +479 -0
  64. package/scripts/claude-chromadb-hook.js +460 -0
  65. package/scripts/claude-code-wrapper.sh +66 -0
  66. package/scripts/claude-linear-skill.js +455 -0
  67. package/scripts/claude-pre-commit.sh +302 -0
  68. package/scripts/claude-sm-autostart.js +532 -0
  69. package/scripts/claude-sm-setup.sh +367 -0
  70. package/scripts/claude-with-chromadb.sh +69 -0
  71. package/scripts/claude-worktree-manager.sh +323 -0
  72. package/scripts/claude-worktree-monitor.sh +371 -0
  73. package/scripts/claude-worktree-setup.sh +327 -0
  74. package/scripts/clean-linear-backlog.js +273 -0
  75. package/scripts/cleanup-old-sessions.sh +57 -0
  76. package/scripts/codex-wrapper.sh +88 -0
  77. package/scripts/create-sandbox.sh +269 -0
  78. package/scripts/debug-linear-update.js +174 -0
  79. package/scripts/delete-linear-tasks.js +167 -0
  80. package/scripts/deploy.sh +89 -0
  81. package/scripts/deployment/railway.sh +352 -0
  82. package/scripts/deployment/test-deployment.js +194 -0
  83. package/scripts/detect-and-rehydrate.js +162 -0
  84. package/scripts/detect-and-rehydrate.mjs +165 -0
  85. package/scripts/development/create-demo-tasks.js +143 -0
  86. package/scripts/development/debug-frame-test.js +16 -0
  87. package/scripts/development/demo-auto-sync.js +128 -0
  88. package/scripts/development/fix-all-imports.js +213 -0
  89. package/scripts/development/fix-imports.js +229 -0
  90. package/scripts/development/fix-lint-loop.cjs +103 -0
  91. package/scripts/development/fix-project-id.ts +161 -0
  92. package/scripts/development/fix-strict-mode-issues.ts +291 -0
  93. package/scripts/development/reorganize-structure.sh +228 -0
  94. package/scripts/development/test-persistence-direct.js +148 -0
  95. package/scripts/development/test-persistence.js +114 -0
  96. package/scripts/development/test-tasks.js +93 -0
  97. package/scripts/development/update-imports.js +212 -0
  98. package/scripts/fetch-linear-status.js +125 -0
  99. package/scripts/git-hooks/README.md +310 -0
  100. package/scripts/git-hooks/branch-context-manager.sh +342 -0
  101. package/scripts/git-hooks/post-checkout-stackmemory.sh +63 -0
  102. package/scripts/git-hooks/post-commit-stackmemory.sh +305 -0
  103. package/scripts/git-hooks/pre-commit-stackmemory.sh +275 -0
  104. package/scripts/hooks/cleanup-shell.sh +130 -0
  105. package/scripts/hooks/task-complete.sh +114 -0
  106. package/scripts/initialize.ts +129 -0
  107. package/scripts/install-claude-hooks-auto.js +104 -0
  108. package/scripts/install-claude-hooks.sh +133 -0
  109. package/scripts/install-global.sh +296 -0
  110. package/scripts/install.sh +235 -0
  111. package/scripts/linear-auto-sync.js +262 -0
  112. package/scripts/linear-auto-sync.sh +161 -0
  113. package/scripts/linear-sync-daemon.js +150 -0
  114. package/scripts/linear-task-review.js +237 -0
  115. package/scripts/list-linear-tasks.ts +178 -0
  116. package/scripts/mcp-proxy.js +66 -0
  117. package/scripts/opencode-wrapper.sh +85 -0
  118. package/scripts/publish-local.js +74 -0
  119. package/scripts/query-chromadb.ts +201 -0
  120. package/scripts/railway-env-setup.sh +39 -0
  121. package/scripts/reconcile-local-tasks.js +170 -0
  122. package/scripts/recreate-frames-db.js +89 -0
  123. package/scripts/setup/claude-integration.js +138 -0
  124. package/scripts/setup/configure-alias.js +125 -0
  125. package/scripts/setup/configure-codex-alias.js +161 -0
  126. package/scripts/setup/configure-opencode-alias.js +175 -0
  127. package/scripts/setup-claude-integration.js +204 -0
  128. package/scripts/setup-claude-integration.sh +183 -0
  129. package/scripts/setup.sh +31 -0
  130. package/scripts/show-linear-summary.ts +172 -0
  131. package/scripts/stackmemory-auto-handoff.sh +231 -0
  132. package/scripts/stackmemory-daemon.sh +40 -0
  133. package/scripts/start-linear-sync-daemon.sh +141 -0
  134. package/scripts/start-temporal-paradox.sh +214 -0
  135. package/scripts/status.ts +159 -0
  136. package/scripts/sync-and-clean-tasks.js +258 -0
  137. package/scripts/sync-frames-from-railway.js +228 -0
  138. package/scripts/sync-linear-graphql.js +303 -0
  139. package/scripts/sync-linear-tasks.js +186 -0
  140. package/scripts/test-auto-triggers.sh +57 -0
  141. package/scripts/test-browser-mcp.js +74 -0
  142. package/scripts/test-chromadb-full.js +115 -0
  143. package/scripts/test-chromadb-hooks.sh +28 -0
  144. package/scripts/test-chromadb-sync.ts +245 -0
  145. package/scripts/test-cli-security.js +293 -0
  146. package/scripts/test-hooks-persistence.sh +220 -0
  147. package/scripts/test-installation-scenarios.sh +359 -0
  148. package/scripts/test-installation.sh +224 -0
  149. package/scripts/test-mcp.js +163 -0
  150. package/scripts/test-pre-publish-quick.sh +75 -0
  151. package/scripts/test-quality-gates.sh +263 -0
  152. package/scripts/test-railway-db.js +222 -0
  153. package/scripts/test-redis-storage.ts +490 -0
  154. package/scripts/test-rlm-basic.sh +122 -0
  155. package/scripts/test-rlm-comprehensive.sh +260 -0
  156. package/scripts/test-rlm-e2e.sh +268 -0
  157. package/scripts/test-rlm-simple.js +90 -0
  158. package/scripts/test-rlm.js +110 -0
  159. package/scripts/test-session-handoff.sh +165 -0
  160. package/scripts/test-shell-integration.sh +275 -0
  161. package/scripts/testing/ab-test-runner.ts +508 -0
  162. package/scripts/testing/collect-metrics.ts +457 -0
  163. package/scripts/testing/quick-effectiveness-demo.js +187 -0
  164. package/scripts/testing/real-performance-test.js +422 -0
  165. package/scripts/testing/run-effectiveness-tests.sh +176 -0
  166. package/scripts/testing/scripts/testing/ab-test-runner.js +363 -0
  167. package/scripts/testing/scripts/testing/collect-metrics.js +292 -0
  168. package/scripts/testing/simple-effectiveness-test.js +310 -0
  169. package/scripts/testing/src/core/context/context-bridge.js +253 -0
  170. package/scripts/testing/src/core/context/frame-manager.js +746 -0
  171. package/scripts/testing/src/core/context/shared-context-layer.js +437 -0
  172. package/scripts/testing/src/core/database/database-adapter.js +54 -0
  173. package/scripts/testing/src/core/errors/index.js +291 -0
  174. package/scripts/testing/src/core/errors/recovery.js +268 -0
  175. package/scripts/testing/src/core/monitoring/logger.js +145 -0
  176. package/scripts/testing/src/core/retrieval/context-retriever.js +516 -0
  177. package/scripts/testing/src/core/session/index.js +1 -0
  178. package/scripts/testing/src/core/session/session-manager.js +323 -0
  179. package/scripts/testing/src/core/trace/cli-trace-wrapper.js +140 -0
  180. package/scripts/testing/src/core/trace/db-trace-wrapper.js +251 -0
  181. package/scripts/testing/src/core/trace/debug-trace.js +398 -0
  182. package/scripts/testing/src/core/trace/index.js +120 -0
  183. package/scripts/testing/src/core/trace/linear-api-wrapper.js +204 -0
  184. package/scripts/update-linear-status.js +268 -0
  185. package/scripts/update-linear-tasks-fixed.js +284 -0
  186. package/templates/claude-hooks/hooks.json +5 -0
  187. package/templates/claude-hooks/on-clear.js +56 -0
  188. package/templates/claude-hooks/on-startup.js +56 -0
  189. package/templates/claude-hooks/tool-use-trace.js +67 -0
  190. package/dist/features/tui/components/analytics-panel.js +0 -157
  191. package/dist/features/tui/components/analytics-panel.js.map +0 -7
  192. package/dist/features/tui/components/frame-visualizer.js +0 -377
  193. package/dist/features/tui/components/frame-visualizer.js.map +0 -7
  194. package/dist/features/tui/components/pr-tracker.js +0 -135
  195. package/dist/features/tui/components/pr-tracker.js.map +0 -7
  196. package/dist/features/tui/components/session-monitor.js +0 -299
  197. package/dist/features/tui/components/session-monitor.js.map +0 -7
  198. package/dist/features/tui/components/subagent-fleet.js +0 -395
  199. package/dist/features/tui/components/subagent-fleet.js.map +0 -7
  200. package/dist/features/tui/components/task-board.js +0 -1139
  201. package/dist/features/tui/components/task-board.js.map +0 -7
  202. package/dist/features/tui/index.js +0 -408
  203. package/dist/features/tui/index.js.map +0 -7
  204. package/dist/features/tui/services/data-service.js +0 -641
  205. package/dist/features/tui/services/data-service.js.map +0 -7
  206. package/dist/features/tui/services/linear-task-reader.js +0 -102
  207. package/dist/features/tui/services/linear-task-reader.js.map +0 -7
  208. package/dist/features/tui/services/websocket-client.js +0 -162
  209. package/dist/features/tui/services/websocket-client.js.map +0 -7
  210. package/dist/features/tui/terminal-compat.js +0 -220
  211. package/dist/features/tui/terminal-compat.js.map +0 -7
  212. package/dist/features/tui/types.js +0 -1
  213. package/dist/features/tui/types.js.map +0 -7
@@ -0,0 +1,508 @@
1
+ #!/usr/bin/env node
2
+ import { MetricsCollector } from './collect-metrics.js';
3
+ import { spawn } from 'child_process';
4
+ import * as fs from 'fs/promises';
5
+ import * as path from 'path';
6
+
7
+ export interface TestScenario {
8
+ id: string;
9
+ name: string;
10
+ type: 'feature_dev' | 'bug_fix' | 'refactor' | 'complex_debug';
11
+ description: string;
12
+ steps: WorkflowStep[];
13
+ expectedDuration: number; // minutes
14
+ contextBreaks: ContextBreak[];
15
+ complexity: 'low' | 'medium' | 'high' | 'very_high';
16
+ }
17
+
18
+ export interface WorkflowStep {
19
+ action: string;
20
+ command?: string;
21
+ expectedOutput?: string;
22
+ requiresContext?: boolean;
23
+ }
24
+
25
+ export interface ContextBreak {
26
+ afterStep: number;
27
+ duration: number; // minutes
28
+ type: 'session_end' | 'interruption' | 'team_handoff';
29
+ }
30
+
31
+ export interface TestRun {
32
+ id: string;
33
+ scenario: TestScenario;
34
+ variant: 'with_stackmemory' | 'without_stackmemory';
35
+ startTime: Date;
36
+ endTime?: Date;
37
+ metrics: Record<string, unknown>;
38
+ recordings: ToolCallRecording[];
39
+ success: boolean;
40
+ errors: string[];
41
+ }
42
+
43
+ export interface ToolCallRecording {
44
+ timestamp: Date;
45
+ tool: string;
46
+ parameters: Record<string, unknown>;
47
+ result: unknown;
48
+ duration: number;
49
+ }
50
+
51
+ export class ABTestRunner {
52
+ private collector: MetricsCollector;
53
+ private scenarios: Map<string, TestScenario> = new Map();
54
+ private runs: TestRun[] = [];
55
+ private stackMemoryEnabled: boolean = false;
56
+
57
+ constructor() {
58
+ this.collector = new MetricsCollector();
59
+ this.loadScenarios();
60
+ }
61
+
62
+ private loadScenarios(): void {
63
+ // Define test scenarios
64
+ const scenarios: TestScenario[] = [
65
+ {
66
+ id: 'multi_session_feature',
67
+ name: 'E-commerce checkout flow',
68
+ type: 'feature_dev',
69
+ description:
70
+ 'Implement a complete checkout flow with payment integration',
71
+ complexity: 'high',
72
+ expectedDuration: 180,
73
+ steps: [
74
+ {
75
+ action: 'Design checkout flow architecture',
76
+ requiresContext: false,
77
+ },
78
+ { action: 'Implement cart validation', requiresContext: true },
79
+ { action: 'Add payment gateway integration', requiresContext: true },
80
+ { action: 'Create checkout UI components', requiresContext: true },
81
+ { action: 'Add order confirmation', requiresContext: true },
82
+ { action: 'Write integration tests', requiresContext: true },
83
+ ],
84
+ contextBreaks: [
85
+ { afterStep: 2, duration: 480, type: 'session_end' }, // Overnight
86
+ { afterStep: 4, duration: 60, type: 'interruption' }, // Lunch break
87
+ ],
88
+ },
89
+ {
90
+ id: 'complex_debugging',
91
+ name: 'Performance issue in production',
92
+ type: 'complex_debug',
93
+ description:
94
+ 'Debug and fix a memory leak causing performance degradation',
95
+ complexity: 'high',
96
+ expectedDuration: 120,
97
+ steps: [
98
+ { action: 'Analyze performance metrics', requiresContext: false },
99
+ { action: 'Profile memory usage', requiresContext: true },
100
+ { action: 'Identify memory leak source', requiresContext: true },
101
+ { action: 'Implement fix', requiresContext: true },
102
+ { action: 'Verify fix with tests', requiresContext: true },
103
+ ],
104
+ contextBreaks: [{ afterStep: 3, duration: 30, type: 'team_handoff' }],
105
+ },
106
+ {
107
+ id: 'large_refactoring',
108
+ name: 'Migrate authentication system',
109
+ type: 'refactor',
110
+ description: 'Refactor from session-based to JWT authentication',
111
+ complexity: 'very_high',
112
+ expectedDuration: 360,
113
+ steps: [
114
+ {
115
+ action: 'Analyze current auth implementation',
116
+ requiresContext: false,
117
+ },
118
+ { action: 'Design JWT architecture', requiresContext: true },
119
+ { action: 'Implement JWT service', requiresContext: true },
120
+ { action: 'Migrate user sessions', requiresContext: true },
121
+ { action: 'Update API endpoints', requiresContext: true },
122
+ { action: 'Migrate frontend auth', requiresContext: true },
123
+ { action: 'Add refresh token logic', requiresContext: true },
124
+ { action: 'Update tests', requiresContext: true },
125
+ { action: 'Performance testing', requiresContext: true },
126
+ ],
127
+ contextBreaks: [
128
+ { afterStep: 2, duration: 480, type: 'session_end' },
129
+ { afterStep: 4, duration: 480, type: 'session_end' },
130
+ { afterStep: 6, duration: 60, type: 'interruption' },
131
+ { afterStep: 7, duration: 480, type: 'session_end' },
132
+ ],
133
+ },
134
+ {
135
+ id: 'rapid_bug_fixes',
136
+ name: 'Fix 5 related bugs',
137
+ type: 'bug_fix',
138
+ description: 'Fix multiple related bugs in the user registration flow',
139
+ complexity: 'medium',
140
+ expectedDuration: 90,
141
+ steps: [
142
+ { action: 'Fix email validation bug', requiresContext: false },
143
+ { action: 'Fix password strength checker', requiresContext: true },
144
+ { action: 'Fix duplicate user check', requiresContext: true },
145
+ { action: 'Fix confirmation email sending', requiresContext: true },
146
+ { action: 'Fix redirect after registration', requiresContext: true },
147
+ ],
148
+ contextBreaks: [
149
+ { afterStep: 1, duration: 15, type: 'interruption' },
150
+ { afterStep: 2, duration: 15, type: 'interruption' },
151
+ { afterStep: 3, duration: 15, type: 'interruption' },
152
+ { afterStep: 4, duration: 15, type: 'interruption' },
153
+ ],
154
+ },
155
+ ];
156
+
157
+ scenarios.forEach((scenario) => {
158
+ this.scenarios.set(scenario.id, scenario);
159
+ });
160
+ }
161
+
162
+ async initialize(): Promise<void> {
163
+ await this.collector.initialize();
164
+ }
165
+
166
+ async enableStackMemory(): Promise<void> {
167
+ console.log('Enabling StackMemory...');
168
+ this.stackMemoryEnabled = true;
169
+
170
+ // Start StackMemory daemon if not running
171
+ try {
172
+ await this.executeCommand('stackmemory-daemon status');
173
+ } catch {
174
+ await this.executeCommand('stackmemory-daemon start');
175
+ }
176
+ }
177
+
178
+ async disableStackMemory(): Promise<void> {
179
+ console.log('Disabling StackMemory...');
180
+ this.stackMemoryEnabled = false;
181
+
182
+ // Stop StackMemory daemon
183
+ try {
184
+ await this.executeCommand('stackmemory-daemon stop');
185
+ } catch {
186
+ // Ignore if already stopped
187
+ }
188
+ }
189
+
190
+ private executeCommand(command: string): Promise<string> {
191
+ return new Promise((resolve, reject) => {
192
+ const child = spawn(command, { shell: true });
193
+ let output = '';
194
+ let error = '';
195
+
196
+ child.stdout.on('data', (data) => {
197
+ output += data.toString();
198
+ });
199
+
200
+ child.stderr.on('data', (data) => {
201
+ error += data.toString();
202
+ });
203
+
204
+ child.on('close', (code) => {
205
+ if (code === 0) {
206
+ resolve(output);
207
+ } else {
208
+ reject(new Error(error || `Command failed with code ${code}`));
209
+ }
210
+ });
211
+ });
212
+ }
213
+
214
+ async runScenario(
215
+ scenarioId: string,
216
+ variant: 'with_stackmemory' | 'without_stackmemory'
217
+ ): Promise<TestRun> {
218
+ const scenario = this.scenarios.get(scenarioId);
219
+ if (!scenario) {
220
+ throw new Error(`Scenario ${scenarioId} not found`);
221
+ }
222
+
223
+ console.log(`\nRunning scenario: ${scenario.name} (${variant})`);
224
+ console.log(`Expected duration: ${scenario.expectedDuration} minutes`);
225
+ console.log(`Complexity: ${scenario.complexity}`);
226
+ console.log(`Context breaks: ${scenario.contextBreaks.length}`);
227
+
228
+ // Enable/disable StackMemory based on variant
229
+ if (variant === 'with_stackmemory') {
230
+ await this.enableStackMemory();
231
+ } else {
232
+ await this.disableStackMemory();
233
+ }
234
+
235
+ const runId = `${scenarioId}-${variant}-${Date.now()}`;
236
+ const sessionId = await this.collector.startSession(variant);
237
+
238
+ const run: TestRun = {
239
+ id: runId,
240
+ scenario,
241
+ variant,
242
+ startTime: new Date(),
243
+ metrics: {},
244
+ recordings: [],
245
+ success: false,
246
+ errors: [],
247
+ };
248
+
249
+ try {
250
+ // Execute scenario steps
251
+ for (let i = 0; i < scenario.steps.length; i++) {
252
+ const step = scenario.steps[i];
253
+ console.log(`\nStep ${i + 1}/${scenario.steps.length}: ${step.action}`);
254
+
255
+ // Simulate step execution
256
+ await this.executeStep(step, sessionId, run);
257
+
258
+ // Check for context break
259
+ const contextBreak = scenario.contextBreaks.find(
260
+ (cb) => cb.afterStep === i + 1
261
+ );
262
+ if (contextBreak) {
263
+ console.log(
264
+ `\nContext break: ${contextBreak.type} for ${contextBreak.duration} minutes`
265
+ );
266
+ await this.simulateContextBreak(contextBreak, sessionId);
267
+ }
268
+ }
269
+
270
+ run.success = true;
271
+ } catch (error: unknown) {
272
+ const errorMessage =
273
+ error instanceof Error ? error.message : String(error);
274
+ console.error(`Scenario failed: ${errorMessage}`);
275
+ run.errors.push(errorMessage);
276
+ this.collector.trackError(sessionId, error);
277
+ }
278
+
279
+ // Collect final metrics
280
+ run.endTime = new Date();
281
+ run.metrics = await this.collector.endSession(sessionId);
282
+
283
+ // Save run results
284
+ this.runs.push(run);
285
+ await this.saveRun(run);
286
+
287
+ return run;
288
+ }
289
+
290
+ private async executeStep(
291
+ step: WorkflowStep,
292
+ sessionId: string,
293
+ run: TestRun
294
+ ): Promise<void> {
295
+ const startTime = Date.now();
296
+
297
+ // Track tool call
298
+ this.collector.trackToolCall(sessionId, 'execute_step');
299
+
300
+ // If step requires context and we're testing with StackMemory
301
+ if (step.requiresContext && this.stackMemoryEnabled) {
302
+ const contextTime =
303
+ await this.collector.measureContextReestablishment(sessionId);
304
+ console.log(` Context retrieved in ${(contextTime / 1000).toFixed(2)}s`);
305
+ }
306
+
307
+ // Simulate step execution with command if provided
308
+ if (step.command) {
309
+ try {
310
+ const output = await this.executeCommand(step.command);
311
+
312
+ // Record tool call
313
+ run.recordings.push({
314
+ timestamp: new Date(),
315
+ tool: 'command',
316
+ parameters: { command: step.command },
317
+ result: output,
318
+ duration: Date.now() - startTime,
319
+ });
320
+ } catch (error: unknown) {
321
+ this.collector.trackError(sessionId, error as Error);
322
+ throw error;
323
+ }
324
+ } else {
325
+ // Simulate work being done
326
+ await this.simulateWork(2000 + Math.random() * 3000);
327
+ }
328
+
329
+ // Randomly simulate decisions and frame creation
330
+ if (Math.random() > 0.5) {
331
+ this.collector.trackFrameCreation(sessionId, `frame-${Date.now()}`);
332
+ }
333
+
334
+ if (Math.random() > 0.7) {
335
+ this.collector.trackDecision(sessionId, `Decision for ${step.action}`);
336
+ }
337
+
338
+ console.log(
339
+ ` Step completed in ${((Date.now() - startTime) / 1000).toFixed(2)}s`
340
+ );
341
+ }
342
+
343
+ private async simulateContextBreak(
344
+ contextBreak: ContextBreak,
345
+ sessionId: string
346
+ ): Promise<void> {
347
+ // Simulate time passing
348
+ console.log(` Simulating ${contextBreak.duration} minute break...`);
349
+
350
+ if (contextBreak.type === 'session_end' && this.stackMemoryEnabled) {
351
+ // Simulate session end with StackMemory
352
+ this.collector.trackFrameClosure(sessionId, 'session-frame', true);
353
+ }
354
+
355
+ // In real testing, we would actually wait or simulate the time passing
356
+ await this.simulateWork(1000);
357
+
358
+ // After break, measure context reestablishment
359
+ if (this.stackMemoryEnabled) {
360
+ const reestablishTime =
361
+ await this.collector.measureContextReestablishment(sessionId);
362
+ console.log(
363
+ ` Context reestablished in ${(reestablishTime / 1000).toFixed(2)}s`
364
+ );
365
+ } else {
366
+ // Without StackMemory, simulate manual context reestablishment
367
+ console.log(` Manual context reestablishment required (est. 5 minutes)`);
368
+ this.collector.trackRework(sessionId);
369
+ }
370
+ }
371
+
372
+ private simulateWork(ms: number): Promise<void> {
373
+ return new Promise((resolve) => setTimeout(resolve, ms));
374
+ }
375
+
376
+ async runAllScenarios(): Promise<void> {
377
+ console.log('='.repeat(60));
378
+ console.log('Starting A/B Test Suite');
379
+ console.log('='.repeat(60));
380
+
381
+ for (const scenario of this.scenarios.values()) {
382
+ // Run without StackMemory
383
+ await this.runScenario(scenario.id, 'without_stackmemory');
384
+
385
+ // Run with StackMemory
386
+ await this.runScenario(scenario.id, 'with_stackmemory');
387
+ }
388
+
389
+ await this.generateComparison();
390
+ }
391
+
392
+ async generateComparison(): Promise<void> {
393
+ const withStackMemory = this.runs.filter(
394
+ (r) => r.variant === 'with_stackmemory'
395
+ );
396
+ const withoutStackMemory = this.runs.filter(
397
+ (r) => r.variant === 'without_stackmemory'
398
+ );
399
+
400
+ console.log('\n' + '='.repeat(60));
401
+ console.log('A/B Test Results Summary');
402
+ console.log('='.repeat(60));
403
+
404
+ for (const scenario of this.scenarios.values()) {
405
+ const withRun = withStackMemory.find(
406
+ (r) => r.scenario.id === scenario.id
407
+ );
408
+ const withoutRun = withoutStackMemory.find(
409
+ (r) => r.scenario.id === scenario.id
410
+ );
411
+
412
+ if (withRun && withoutRun) {
413
+ console.log(`\n${scenario.name}:`);
414
+ console.log(
415
+ ` Without StackMemory: ${((withoutRun.metrics.completionTime || 0) / 1000 / 60).toFixed(2)} min`
416
+ );
417
+ console.log(
418
+ ` With StackMemory: ${((withRun.metrics.completionTime || 0) / 1000 / 60).toFixed(2)} min`
419
+ );
420
+
421
+ const improvement =
422
+ ((withoutRun.metrics.completionTime -
423
+ withRun.metrics.completionTime) /
424
+ withoutRun.metrics.completionTime) *
425
+ 100;
426
+ console.log(` Improvement: ${improvement.toFixed(1)}%`);
427
+ }
428
+ }
429
+
430
+ // Generate detailed report
431
+ await this.collector.generateReport('./test-results/ab-test-report.md');
432
+ }
433
+
434
+ private async saveRun(run: TestRun): Promise<void> {
435
+ const outputDir = './test-results/runs';
436
+ await fs.mkdir(outputDir, { recursive: true });
437
+
438
+ const filename = path.join(outputDir, `${run.id}.json`);
439
+ await fs.writeFile(filename, JSON.stringify(run, null, 2));
440
+
441
+ console.log(`Run saved to: ${filename}`);
442
+ }
443
+
444
+ async runSpecificScenario(scenarioId: string): Promise<void> {
445
+ if (!this.scenarios.has(scenarioId)) {
446
+ console.error(`Scenario '${scenarioId}' not found`);
447
+ console.log('Available scenarios:');
448
+ for (const [id, scenario] of this.scenarios) {
449
+ console.log(` - ${id}: ${scenario.name}`);
450
+ }
451
+ return;
452
+ }
453
+
454
+ // Run both variants
455
+ await this.runScenario(scenarioId, 'without_stackmemory');
456
+ await this.runScenario(scenarioId, 'with_stackmemory');
457
+
458
+ await this.generateComparison();
459
+ }
460
+ }
461
+
462
+ // CLI interface
463
+ if (import.meta.url === `file://${process.argv[1]}`) {
464
+ const runner = new ABTestRunner();
465
+
466
+ async function main() {
467
+ await runner.initialize();
468
+
469
+ const command = process.argv[2];
470
+ const scenarioId = process.argv[3];
471
+
472
+ switch (command) {
473
+ case 'all':
474
+ await runner.runAllScenarios();
475
+ break;
476
+
477
+ case 'scenario':
478
+ if (!scenarioId) {
479
+ console.error('Please specify a scenario ID');
480
+ process.exit(1);
481
+ }
482
+ await runner.runSpecificScenario(scenarioId);
483
+ break;
484
+
485
+ case 'list':
486
+ console.log('Available scenarios:');
487
+ console.log(' - multi_session_feature: E-commerce checkout flow');
488
+ console.log(' - complex_debugging: Performance issue in production');
489
+ console.log(' - large_refactoring: Migrate authentication system');
490
+ console.log(' - rapid_bug_fixes: Fix 5 related bugs');
491
+ break;
492
+
493
+ default:
494
+ console.log(
495
+ 'Usage: ab-test-runner.ts [all|scenario|list] [scenario-id]'
496
+ );
497
+ console.log('');
498
+ console.log('Commands:');
499
+ console.log(' all - Run all test scenarios');
500
+ console.log(' scenario - Run a specific scenario');
501
+ console.log(' list - List available scenarios');
502
+ }
503
+
504
+ process.exit(0);
505
+ }
506
+
507
+ main().catch(console.error);
508
+ }