testchimp-runner-core 0.0.35 → 0.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +7 -4
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +73 -15
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +1 -2
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
- package/dist/orchestrator/page-som-handler.js +51 -25
- package/dist/orchestrator/page-som-handler.js.map +1 -1
- package/package.json +6 -1
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
- package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
- package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
- package/plandocs/INTEGRATION_COMPLETE.md +0 -322
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
- package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
- package/plandocs/PHASE_1_COMPLETE.md +0 -165
- package/plandocs/PHASE_1_SUMMARY.md +0 -184
- package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
- package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
- package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
- package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
- package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
- package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
- package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
- package/plandocs/exploratory-mode-support.plan.md +0 -928
- package/plandocs/journey-id-tracking-addendum.md +0 -227
- package/releasenotes/RELEASE_0.0.26.md +0 -165
- package/releasenotes/RELEASE_0.0.27.md +0 -236
- package/releasenotes/RELEASE_0.0.28.md +0 -286
- package/src/auth-config.ts +0 -84
- package/src/credit-usage-service.ts +0 -188
- package/src/env-loader.ts +0 -103
- package/src/execution-service.ts +0 -996
- package/src/file-handler.ts +0 -104
- package/src/index.ts +0 -432
- package/src/llm-facade.ts +0 -821
- package/src/llm-provider.ts +0 -53
- package/src/model-constants.ts +0 -35
- package/src/orchestrator/decision-parser.ts +0 -139
- package/src/orchestrator/index.ts +0 -58
- package/src/orchestrator/orchestrator-agent.ts +0 -1282
- package/src/orchestrator/orchestrator-prompts.ts +0 -786
- package/src/orchestrator/page-som-handler.ts +0 -1565
- package/src/orchestrator/som-types.ts +0 -188
- package/src/orchestrator/tool-registry.ts +0 -184
- package/src/orchestrator/tools/check-page-ready.ts +0 -75
- package/src/orchestrator/tools/extract-data.ts +0 -92
- package/src/orchestrator/tools/index.ts +0 -15
- package/src/orchestrator/tools/inspect-page.ts +0 -42
- package/src/orchestrator/tools/recall-history.ts +0 -72
- package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
- package/src/orchestrator/tools/take-screenshot.ts +0 -128
- package/src/orchestrator/tools/verify-action-result.ts +0 -159
- package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
- package/src/orchestrator/types.ts +0 -291
- package/src/playwright-mcp-service.ts +0 -224
- package/src/progress-reporter.ts +0 -144
- package/src/prompts.ts +0 -842
- package/src/providers/backend-proxy-llm-provider.ts +0 -91
- package/src/providers/local-llm-provider.ts +0 -38
- package/src/scenario-service.ts +0 -252
- package/src/scenario-worker-class.ts +0 -1110
- package/src/script-utils.ts +0 -203
- package/src/types.ts +0 -239
- package/src/utils/browser-utils.ts +0 -348
- package/src/utils/coordinate-converter.ts +0 -162
- package/src/utils/page-info-retry.ts +0 -65
- package/src/utils/page-info-utils.ts +0 -285
- package/testchimp-runner-core-0.0.35.tgz +0 -0
- package/tsconfig.json +0 -19
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Orchestrator Agent Types
|
|
3
|
-
* Core types for the tool-using, memory-maintaining orchestrator agent
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Self-reflection guidance for next iteration
|
|
8
|
-
*/
|
|
9
|
-
export interface SelfReflection {
|
|
10
|
-
guidanceForNext: string; // Free-form text: "Try data-testid selectors instead of text"
|
|
11
|
-
detectingLoop: boolean; // Agent signals if it's repeating same approach
|
|
12
|
-
loopReasoning?: string; // "Tried text-based selectors 3 times, switching to IDs"
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Note to future self - iteration-specific memory for tactical continuity
|
|
17
|
-
* More flexible than structured SelfReflection - agent can write freely
|
|
18
|
-
*/
|
|
19
|
-
export interface NoteToFutureSelf {
|
|
20
|
-
fromIteration: number;
|
|
21
|
-
content: string; // FREE-FORM - agent writes whatever tactical info it needs for next iteration
|
|
22
|
-
// Examples:
|
|
23
|
-
// - "Tried #menu-btn, failed. Will try SVG child next."
|
|
24
|
-
// - "Plan: Hover over menu to reveal dropdown, then click Profile option."
|
|
25
|
-
// - "Element loads async. Wait 2s after page load before clicking."
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Coordinate-based action (fallback when selectors fail)
|
|
30
|
-
* Uses percentage-based positioning for resolution independence
|
|
31
|
-
*/
|
|
32
|
-
export interface CoordinateAction {
|
|
33
|
-
type: 'coordinate';
|
|
34
|
-
action: 'click' | 'doubleClick' | 'rightClick' | 'hover' | 'drag' | 'fill' | 'scroll';
|
|
35
|
-
|
|
36
|
-
// Primary coordinates as percentages (0.0 to 100.0, 3 decimal precision for ~1px accuracy)
|
|
37
|
-
xPercent: number; // 0 = left edge, 100 = right edge
|
|
38
|
-
yPercent: number; // 0 = top edge, 100 = bottom edge
|
|
39
|
-
|
|
40
|
-
// For drag actions
|
|
41
|
-
toXPercent?: number;
|
|
42
|
-
toYPercent?: number;
|
|
43
|
-
|
|
44
|
-
// For fill actions (click then type)
|
|
45
|
-
value?: string;
|
|
46
|
-
|
|
47
|
-
// For scroll actions
|
|
48
|
-
scrollAmount?: number; // Positive = scroll down, negative = scroll up
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Journey memory - tracks the agent's journey through the scenario
|
|
53
|
-
*/
|
|
54
|
-
export interface JourneyMemory {
|
|
55
|
-
// Step-by-step history (TEXT only, no screenshots)
|
|
56
|
-
history: MemoryStep[];
|
|
57
|
-
|
|
58
|
-
// Learnings accumulated throughout journey
|
|
59
|
-
experiences: string[];
|
|
60
|
-
|
|
61
|
-
// Data extracted and saved for later steps
|
|
62
|
-
extractedData: Record<string, string>;
|
|
63
|
-
|
|
64
|
-
// Latest note from agent (persists across steps for continuity)
|
|
65
|
-
latestNote?: NoteToFutureSelf;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Individual step in journey history
|
|
70
|
-
*/
|
|
71
|
-
export interface MemoryStep {
|
|
72
|
-
stepNumber: number;
|
|
73
|
-
iteration?: number;
|
|
74
|
-
action: string; // Human-readable: "Filled login form"
|
|
75
|
-
code: string; // Executed code: "await page.fill('#email', ...)"
|
|
76
|
-
result: 'success' | 'failure';
|
|
77
|
-
observation: string; // What was observed: "Form submitted, redirected to dashboard"
|
|
78
|
-
error?: string; // Error details if failed
|
|
79
|
-
url: string; // URL after action
|
|
80
|
-
timestamp: number;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Tool call request from agent
|
|
85
|
-
*/
|
|
86
|
-
export interface ToolCall {
|
|
87
|
-
name: string;
|
|
88
|
-
params: Record<string, any>;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
/**
|
|
92
|
-
* Tool execution result
|
|
93
|
-
*/
|
|
94
|
-
export interface ToolResult {
|
|
95
|
-
success: boolean;
|
|
96
|
-
data?: any;
|
|
97
|
-
error?: string;
|
|
98
|
-
learning?: string; // For exploratory actions: text-based learning extracted from screenshot
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
/**
|
|
102
|
-
* Agent decision output
|
|
103
|
-
*/
|
|
104
|
-
export interface AgentDecision {
|
|
105
|
-
// Tool requests (agent can call multiple tools)
|
|
106
|
-
toolCalls?: ToolCall[];
|
|
107
|
-
toolReasoning?: string;
|
|
108
|
-
needsToolResults?: boolean; // Wait for tool results before proceeding with commands
|
|
109
|
-
|
|
110
|
-
// Command batch (executed sequentially)
|
|
111
|
-
commands?: string[]; // Plain Playwright commands
|
|
112
|
-
commandReasoning?: string;
|
|
113
|
-
|
|
114
|
-
// Self-reflection for next iteration
|
|
115
|
-
selfReflection?: SelfReflection;
|
|
116
|
-
|
|
117
|
-
// Note to future self (NEW - free-form iteration memory)
|
|
118
|
-
noteToFutureSelf?: string; // Free-form tactical note for next iteration
|
|
119
|
-
|
|
120
|
-
// Coordinate-based action (NEW - fallback when selectors fail)
|
|
121
|
-
coordinateAction?: CoordinateAction;
|
|
122
|
-
|
|
123
|
-
// Memory updates
|
|
124
|
-
memoryUpdate?: {
|
|
125
|
-
action: string;
|
|
126
|
-
observation: string;
|
|
127
|
-
extractedData?: Record<string, any>;
|
|
128
|
-
};
|
|
129
|
-
|
|
130
|
-
// Learnings to add to experiences
|
|
131
|
-
experiences?: string[];
|
|
132
|
-
|
|
133
|
-
// Termination decision
|
|
134
|
-
status: 'complete' | 'stuck' | 'infeasible' | 'continue';
|
|
135
|
-
statusReasoning: string;
|
|
136
|
-
reasoning: string; // Overall reasoning for this iteration
|
|
137
|
-
|
|
138
|
-
// Blocker detection (e.g., unexpected modals, tours, cookie consents)
|
|
139
|
-
blockerDetected?: {
|
|
140
|
-
description: string; // What's blocking (e.g., "Cookie consent modal")
|
|
141
|
-
clearingCommands: string[]; // Commands to dismiss/clear it
|
|
142
|
-
};
|
|
143
|
-
|
|
144
|
-
// Step re-evaluation (last resort after repeated failures)
|
|
145
|
-
stepReEvaluation?: {
|
|
146
|
-
detected: boolean; // True if agent is questioning current step
|
|
147
|
-
issue: 'prior_incomplete' | 'already_done' | 'wrong_order' | null;
|
|
148
|
-
explanation: string; // Why agent thinks step order is off
|
|
149
|
-
};
|
|
150
|
-
|
|
151
|
-
// Meta-learning: Suggested prompt improvements based on journey learnings
|
|
152
|
-
debugInfo?: {
|
|
153
|
-
suggestedPromptUpdates?: string; // Confident suggestions for improving system/user prompts
|
|
154
|
-
reasoning?: string; // Why these updates would help
|
|
155
|
-
};
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/**
|
|
159
|
-
* Context provided to agent each iteration
|
|
160
|
-
*/
|
|
161
|
-
export interface AgentContext {
|
|
162
|
-
// Goals & progress
|
|
163
|
-
overallGoal: string;
|
|
164
|
-
currentStepGoal: string;
|
|
165
|
-
stepNumber: number;
|
|
166
|
-
totalSteps: number;
|
|
167
|
-
completedSteps: string[];
|
|
168
|
-
remainingSteps: string[];
|
|
169
|
-
|
|
170
|
-
// Current state (fresh)
|
|
171
|
-
currentPageInfo: any; // PageInfo from getEnhancedPageInfo
|
|
172
|
-
currentURL: string;
|
|
173
|
-
|
|
174
|
-
// Recent memory (6-7 steps)
|
|
175
|
-
recentSteps: MemoryStep[];
|
|
176
|
-
|
|
177
|
-
// Learnings & data
|
|
178
|
-
experiences: string[];
|
|
179
|
-
extractedData: Record<string, string>;
|
|
180
|
-
|
|
181
|
-
// Note from previous iteration (tactical continuity)
|
|
182
|
-
noteFromPreviousIteration?: NoteToFutureSelf;
|
|
183
|
-
|
|
184
|
-
// Test data / credentials for exploration
|
|
185
|
-
testDataPrompt?: string;
|
|
186
|
-
|
|
187
|
-
// SoM (Set-of-Marks) screenshot with visual markers
|
|
188
|
-
somScreenshot?: string; // Data URL of screenshot with SoM markers
|
|
189
|
-
somElementMap?: string; // Text map of SoM IDs to element details for disambiguation
|
|
190
|
-
|
|
191
|
-
// Repair mode context (undefined for script gen/exploration)
|
|
192
|
-
priorSteps?: string[]; // Steps completed before current (e.g., ["1. Navigate", "2. Login"])
|
|
193
|
-
nextSteps?: string[]; // Steps after current (e.g., ["5. Submit", "6. Verify"])
|
|
194
|
-
|
|
195
|
-
// Tool results from this iteration (if any)
|
|
196
|
-
toolResults?: Record<string, ToolResult>;
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
/**
|
|
200
|
-
* Exploration mode configuration
|
|
201
|
-
*/
|
|
202
|
-
export interface ExplorationMode {
|
|
203
|
-
enabled: boolean; // Whether exploration mode is active
|
|
204
|
-
explorationPrompt: string; // Journey-specific focus: "Explore Dashboard and test all widgets"
|
|
205
|
-
testDataPrompt?: string; // Test data, credentials context
|
|
206
|
-
maxExplorationSteps?: number; // Budget limit (default: 50) - agent can stop earlier
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
/**
|
|
210
|
-
* Configurable guardrails
|
|
211
|
-
*/
|
|
212
|
-
export interface AgentConfig {
|
|
213
|
-
// Per-step limits
|
|
214
|
-
maxIterationsPerStep?: number; // Default: 8
|
|
215
|
-
maxToolCallsPerIteration?: number; // Default: 5
|
|
216
|
-
maxCommandsPerIteration?: number; // Default: 5
|
|
217
|
-
maxExploratoryActionsPerIteration?: number; // Default: 3 (Phase 2)
|
|
218
|
-
maxExploratoryActionsPerStep?: number; // Default: 10 (Phase 2)
|
|
219
|
-
|
|
220
|
-
// Scenario-wide limits
|
|
221
|
-
maxConsecutiveStepFailures?: number; // Default: 3 (increased from 2 to allow for UI changes)
|
|
222
|
-
maxTotalIterations?: number; // Default: 50 (across all steps)
|
|
223
|
-
continueOnStepFailure?: boolean; // Default: true (try subsequent steps even if commands fail)
|
|
224
|
-
// NOTE: ALWAYS stops on agent_stuck/infeasible (explicit agent decision)
|
|
225
|
-
|
|
226
|
-
// Memory limits
|
|
227
|
-
maxExperiences?: number; // Default: 20
|
|
228
|
-
maxHistorySize?: number; // Default: 100
|
|
229
|
-
recentStepsCount?: number; // Default: 7
|
|
230
|
-
|
|
231
|
-
// Timeouts
|
|
232
|
-
commandTimeout?: number; // Default: 30000 (30s)
|
|
233
|
-
explorationTimeout?: number; // Default: 2000 (2s) (Phase 2)
|
|
234
|
-
|
|
235
|
-
// Allowed actions
|
|
236
|
-
allowedExplorationActions?: string[]; // Default: ['hover', 'click_info', 'click_menu', 'focus'] (Phase 2)
|
|
237
|
-
allowedDomains?: string[]; // For navigate_to_url validation
|
|
238
|
-
|
|
239
|
-
// Feature flags
|
|
240
|
-
enableCoordinateMode?: boolean; // Default: false (experimental - disable until stable)
|
|
241
|
-
useSoM?: boolean; // Default: true (Set-of-Marks visual mode)
|
|
242
|
-
somUseSomIdBasedCommands?: boolean; // Default: false (use semantic selectors first)
|
|
243
|
-
somRestrictCoordinates?: boolean; // Default: false (if true, strongly discourage coord commands except as absolute last resort)
|
|
244
|
-
|
|
245
|
-
// Exploration mode (NEW)
|
|
246
|
-
explorationMode?: ExplorationMode;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
/**
|
|
250
|
-
* Step execution result from orchestrator
|
|
251
|
-
*/
|
|
252
|
-
export interface OrchestratorStepResult {
|
|
253
|
-
success: boolean;
|
|
254
|
-
commands: string[];
|
|
255
|
-
iterations: number;
|
|
256
|
-
terminationReason?: 'complete' | 'stuck' | 'infeasible' | 'system_limit' | 'agent_stuck';
|
|
257
|
-
memory: JourneyMemory;
|
|
258
|
-
error?: string;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
/**
|
|
262
|
-
* Default config values
|
|
263
|
-
*/
|
|
264
|
-
export const DEFAULT_AGENT_CONFIG: Required<AgentConfig> = {
|
|
265
|
-
maxIterationsPerStep: 8,
|
|
266
|
-
maxToolCallsPerIteration: 5,
|
|
267
|
-
maxCommandsPerIteration: 5,
|
|
268
|
-
maxExploratoryActionsPerIteration: 3,
|
|
269
|
-
maxExploratoryActionsPerStep: 10,
|
|
270
|
-
maxConsecutiveStepFailures: 3, // Increased from 2
|
|
271
|
-
maxTotalIterations: 50,
|
|
272
|
-
continueOnStepFailure: true, // Try subsequent steps even if one fails
|
|
273
|
-
maxExperiences: 20,
|
|
274
|
-
maxHistorySize: 100,
|
|
275
|
-
recentStepsCount: 7,
|
|
276
|
-
commandTimeout: 30000,
|
|
277
|
-
explorationTimeout: 2000,
|
|
278
|
-
allowedExplorationActions: ['hover', 'click_info', 'click_menu', 'focus'],
|
|
279
|
-
allowedDomains: [],
|
|
280
|
-
enableCoordinateMode: false, // Disabled by default - experimental feature
|
|
281
|
-
useSoM: true, // Enabled by default - use Set-of-Marks visual mode
|
|
282
|
-
somUseSomIdBasedCommands: false, // Use semantic selectors first
|
|
283
|
-
somRestrictCoordinates: false, // Allow coords as valid fallback (for exploration)
|
|
284
|
-
explorationMode: {
|
|
285
|
-
enabled: false,
|
|
286
|
-
explorationPrompt: '',
|
|
287
|
-
testDataPrompt: undefined,
|
|
288
|
-
maxExplorationSteps: 50
|
|
289
|
-
}
|
|
290
|
-
};
|
|
291
|
-
|
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
import { ScriptResult, PlaywrightConfig } from './types';
|
|
2
|
-
import { initializeBrowser } from './utils/browser-utils';
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Service for executing Playwright scripts using worker pool
|
|
6
|
-
*/
|
|
7
|
-
export class PlaywrightMCPService {
|
|
8
|
-
private isConnected = false;
|
|
9
|
-
private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
10
|
-
|
|
11
|
-
constructor() {
|
|
12
|
-
// No initialization needed for direct Playwright execution
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Set a logger callback for capturing execution logs
|
|
17
|
-
*/
|
|
18
|
-
setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
|
|
19
|
-
this.logger = logger;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Log a message using the configured logger
|
|
24
|
-
*/
|
|
25
|
-
private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
|
|
26
|
-
if (this.logger) {
|
|
27
|
-
this.logger(message, level);
|
|
28
|
-
}
|
|
29
|
-
// No console fallback - logs are routed to consumer
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Initialize the service
|
|
34
|
-
*/
|
|
35
|
-
async initialize(): Promise<void> {
|
|
36
|
-
try {
|
|
37
|
-
this.log('Initializing Playwright service...');
|
|
38
|
-
|
|
39
|
-
// No specific initialization needed for direct Playwright execution
|
|
40
|
-
this.isConnected = true;
|
|
41
|
-
this.log('Playwright service initialized successfully');
|
|
42
|
-
} catch (error) {
|
|
43
|
-
throw new Error(`Failed to initialize Playwright service: ${error}`);
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Execute a complete job (prescript + script + postscript) using worker pool
|
|
49
|
-
*/
|
|
50
|
-
async executeJob(prescript: string | undefined, script: string, postscript: string | undefined, config?: PlaywrightConfig): Promise<{
|
|
51
|
-
success: boolean;
|
|
52
|
-
results: {
|
|
53
|
-
prescript?: ScriptResult;
|
|
54
|
-
script: ScriptResult;
|
|
55
|
-
postscript?: ScriptResult;
|
|
56
|
-
};
|
|
57
|
-
executionTime: number;
|
|
58
|
-
error?: string;
|
|
59
|
-
}> {
|
|
60
|
-
if (!this.isConnected) {
|
|
61
|
-
throw new Error('Service not initialized');
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
try {
|
|
65
|
-
// Execute the job directly using Playwright
|
|
66
|
-
return await this.executeScriptDirectly(prescript, script, postscript, config);
|
|
67
|
-
} catch (error) {
|
|
68
|
-
return {
|
|
69
|
-
success: false,
|
|
70
|
-
results: {
|
|
71
|
-
script: { success: false, output: '', error: '', executionTime: 0 }
|
|
72
|
-
},
|
|
73
|
-
executionTime: 0,
|
|
74
|
-
error: error instanceof Error ? error.message : 'Unknown error occurred'
|
|
75
|
-
};
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
/**
|
|
82
|
-
* Prepare the script content for execution
|
|
83
|
-
*/
|
|
84
|
-
private prepareScript(script: string, config?: PlaywrightConfig): string {
|
|
85
|
-
// If the script looks like a test file, return as-is
|
|
86
|
-
if (script.includes('test(') || script.includes('describe(')) {
|
|
87
|
-
return script;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// If it's a list of Playwright commands, wrap in a test
|
|
91
|
-
return `
|
|
92
|
-
test('executed script', async ({ page }) => {
|
|
93
|
-
${script}
|
|
94
|
-
});
|
|
95
|
-
`;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* Close the service
|
|
100
|
-
*/
|
|
101
|
-
/**
|
|
102
|
-
* Execute script directly using Playwright
|
|
103
|
-
*/
|
|
104
|
-
private async executeScriptDirectly(
|
|
105
|
-
prescript: string | undefined,
|
|
106
|
-
script: string,
|
|
107
|
-
postscript: string | undefined,
|
|
108
|
-
config?: PlaywrightConfig
|
|
109
|
-
): Promise<{
|
|
110
|
-
success: boolean;
|
|
111
|
-
results: {
|
|
112
|
-
prescript?: ScriptResult;
|
|
113
|
-
script: ScriptResult;
|
|
114
|
-
postscript?: ScriptResult;
|
|
115
|
-
};
|
|
116
|
-
executionTime: number;
|
|
117
|
-
error?: string;
|
|
118
|
-
}> {
|
|
119
|
-
const startTime = Date.now();
|
|
120
|
-
|
|
121
|
-
// Dynamically import Playwright types
|
|
122
|
-
const playwright = require('playwright');
|
|
123
|
-
|
|
124
|
-
let browser: any | undefined;
|
|
125
|
-
let context: any | undefined;
|
|
126
|
-
let page: any | undefined;
|
|
127
|
-
|
|
128
|
-
try {
|
|
129
|
-
// Use the centralized browser initialization utility
|
|
130
|
-
const browserInstance = await initializeBrowser(config, undefined, undefined, this.logger);
|
|
131
|
-
browser = browserInstance.browser;
|
|
132
|
-
context = browserInstance.context;
|
|
133
|
-
page = browserInstance.page;
|
|
134
|
-
|
|
135
|
-
const results: {
|
|
136
|
-
prescript?: ScriptResult;
|
|
137
|
-
script: ScriptResult;
|
|
138
|
-
postscript?: ScriptResult;
|
|
139
|
-
} = {
|
|
140
|
-
script: { success: false, output: '', error: '', executionTime: 0 }
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
// Execute prescript
|
|
144
|
-
if (prescript) {
|
|
145
|
-
try {
|
|
146
|
-
const scriptFunction = new Function('page', 'browser', 'context', `
|
|
147
|
-
return (async () => {
|
|
148
|
-
${prescript}
|
|
149
|
-
})();
|
|
150
|
-
`);
|
|
151
|
-
await scriptFunction(page, browser, context);
|
|
152
|
-
results.prescript = { success: true, output: 'Prescript executed successfully', error: '', executionTime: 0 };
|
|
153
|
-
} catch (error: any) {
|
|
154
|
-
results.prescript = { success: false, output: '', error: error.message, executionTime: 0 };
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Execute main script
|
|
159
|
-
try {
|
|
160
|
-
const scriptFunction = new Function('page', 'browser', 'context', `
|
|
161
|
-
return (async () => {
|
|
162
|
-
${script}
|
|
163
|
-
})();
|
|
164
|
-
`);
|
|
165
|
-
await scriptFunction(page, browser, context);
|
|
166
|
-
results.script = { success: true, output: 'Script executed successfully', error: '', executionTime: 0 };
|
|
167
|
-
} catch (error: any) {
|
|
168
|
-
results.script = { success: false, output: '', error: error.message, executionTime: 0 };
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// Execute postscript
|
|
172
|
-
if (postscript) {
|
|
173
|
-
try {
|
|
174
|
-
const scriptFunction = new Function('page', 'browser', 'context', `
|
|
175
|
-
return (async () => {
|
|
176
|
-
${postscript}
|
|
177
|
-
})();
|
|
178
|
-
`);
|
|
179
|
-
await scriptFunction(page, browser, context);
|
|
180
|
-
results.postscript = { success: true, output: 'Postscript executed successfully', error: '', executionTime: 0 };
|
|
181
|
-
} catch (error: any) {
|
|
182
|
-
results.postscript = { success: false, output: '', error: error.message, executionTime: 0 };
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
return {
|
|
187
|
-
success: results.script.success,
|
|
188
|
-
results,
|
|
189
|
-
executionTime: Date.now() - startTime
|
|
190
|
-
};
|
|
191
|
-
|
|
192
|
-
} catch (error: any) {
|
|
193
|
-
return {
|
|
194
|
-
success: false,
|
|
195
|
-
results: {
|
|
196
|
-
script: { success: false, output: '', error: error.message, executionTime: 0 }
|
|
197
|
-
},
|
|
198
|
-
executionTime: Date.now() - startTime,
|
|
199
|
-
error: error.message
|
|
200
|
-
};
|
|
201
|
-
} finally {
|
|
202
|
-
if (browser) {
|
|
203
|
-
await browser.close();
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
async close(): Promise<void> {
|
|
209
|
-
try {
|
|
210
|
-
// No cleanup needed for direct Playwright execution
|
|
211
|
-
this.isConnected = false;
|
|
212
|
-
this.log('Playwright service closed');
|
|
213
|
-
} catch (error) {
|
|
214
|
-
this.log(`Error during shutdown: ${error}`, 'error');
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
/**
|
|
219
|
-
* Check if the service is ready
|
|
220
|
-
*/
|
|
221
|
-
isReady(): boolean {
|
|
222
|
-
return this.isConnected;
|
|
223
|
-
}
|
|
224
|
-
}
|
package/src/progress-reporter.ts
DELETED
|
@@ -1,144 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Progress Reporter Interface
|
|
3
|
-
* Allows consumers to track execution progress (logs, DB writes, etc.)
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Execution status for a step
|
|
8
|
-
* Matches scriptservice StepExecutionStatus enum
|
|
9
|
-
*/
|
|
10
|
-
export enum StepExecutionStatus {
|
|
11
|
-
SUCCESS = 'SUCCESS_STEP_EXECUTION',
|
|
12
|
-
FAILURE = 'FAILURE_STEP_EXECUTION',
|
|
13
|
-
IN_PROGRESS = 'IN_PROGRESS_STEP_EXECUTION'
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Step progress report - camelCase for TypeScript
|
|
18
|
-
* Structure matches scriptservice's SmartTestExecutionStep / ScriptGenStep
|
|
19
|
-
*/
|
|
20
|
-
export interface StepProgress {
|
|
21
|
-
jobId: string; // Job ID for DB keying
|
|
22
|
-
stepId?: string; // Unique step identifier
|
|
23
|
-
stepNumber: number; // Step index (1-based)
|
|
24
|
-
description: string; // Step description
|
|
25
|
-
code?: string; // Playwright command executed
|
|
26
|
-
screenshotDataUrl?: string; // Screenshot as data URL (data:image/png;base64,...)
|
|
27
|
-
status: StepExecutionStatus; // Execution status
|
|
28
|
-
error?: string; // Error message if failed
|
|
29
|
-
wasRepaired?: boolean; // For repair mode
|
|
30
|
-
subActionCount?: number; // Internal tracking
|
|
31
|
-
attempt?: number; // Internal tracking
|
|
32
|
-
|
|
33
|
-
// Agent transparency (for orchestrator mode)
|
|
34
|
-
agentIteration?: number; // Which iteration of the agent loop
|
|
35
|
-
agentReasoning?: string; // Agent's reasoning for this iteration
|
|
36
|
-
agentSelfReflection?: any; // SelfReflection guidance for next iteration
|
|
37
|
-
agentExperiences?: string[]; // Learnings from this iteration
|
|
38
|
-
agentToolsUsed?: string[]; // Tools the agent used
|
|
39
|
-
agentStatus?: string; // Agent's status decision
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Job progress report - camelCase for TypeScript
|
|
44
|
-
*/
|
|
45
|
-
export interface JobProgress {
|
|
46
|
-
jobId: string; // Job identifier
|
|
47
|
-
status: 'started' | 'in_progress' | 'completed' | 'failed';
|
|
48
|
-
currentStep?: number; // Current step being executed
|
|
49
|
-
totalSteps?: number; // Total steps in scenario
|
|
50
|
-
testName?: string; // Test name
|
|
51
|
-
script?: string; // Generated/repaired script
|
|
52
|
-
error?: string; // Error message if failed
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Token usage tracking
|
|
57
|
-
*/
|
|
58
|
-
export interface TokenUsage {
|
|
59
|
-
jobId: string;
|
|
60
|
-
stepNumber?: number;
|
|
61
|
-
iteration?: number;
|
|
62
|
-
inputTokens: number;
|
|
63
|
-
outputTokens: number;
|
|
64
|
-
includesImage: boolean;
|
|
65
|
-
model: string;
|
|
66
|
-
timestamp: number;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Additional step info for lifecycle callbacks
|
|
71
|
-
*/
|
|
72
|
-
export interface StepInfo {
|
|
73
|
-
stepId?: string;
|
|
74
|
-
stepNumber: number;
|
|
75
|
-
description: string;
|
|
76
|
-
code?: string;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Progress reporter interface for external consumers
|
|
81
|
-
*/
|
|
82
|
-
export interface ProgressReporter {
|
|
83
|
-
/**
|
|
84
|
-
* Called when a step starts, updates, or completes
|
|
85
|
-
* - VS Extension/GitHub: Log to console
|
|
86
|
-
* - Script Service: Write to DB, upload screenshot to GCS
|
|
87
|
-
*/
|
|
88
|
-
onStepProgress?(progress: StepProgress): Promise<void>;
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Called when overall job status changes
|
|
92
|
-
* - VS Extension/GitHub: Log status
|
|
93
|
-
* - Script Service: Update job in DB
|
|
94
|
-
*/
|
|
95
|
-
onJobProgress?(progress: JobProgress): Promise<void>;
|
|
96
|
-
|
|
97
|
-
/**
|
|
98
|
-
* Called when script is generated/updated
|
|
99
|
-
*/
|
|
100
|
-
onScriptGenerated?(jobId: string, script: string, testName: string): Promise<void>;
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Called when script is repaired/updated
|
|
104
|
-
*/
|
|
105
|
-
onScriptRepaired?(jobId: string, originalScript: string, repairedScript: string, confidence: number): Promise<void>;
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Called when tokens are used (for cost tracking)
|
|
109
|
-
* - VS Extension/GitHub: Log token usage
|
|
110
|
-
* - Script Service: Store in DB for analytics
|
|
111
|
-
*/
|
|
112
|
-
onTokensUsed?(usage: TokenUsage): Promise<void>;
|
|
113
|
-
|
|
114
|
-
/**
|
|
115
|
-
* Generic logging (for environments that don't need structured progress)
|
|
116
|
-
*/
|
|
117
|
-
log?(message: string, level?: 'log' | 'error' | 'warn'): void;
|
|
118
|
-
|
|
119
|
-
/**
|
|
120
|
-
* LIFECYCLE CALLBACKS (optional - used by scriptservice, ignored by local clients)
|
|
121
|
-
*/
|
|
122
|
-
|
|
123
|
-
/**
|
|
124
|
-
* Called before test execution starts
|
|
125
|
-
* - Script Service: Initialize browser context, set up DB records
|
|
126
|
-
* - VS Extension/GitHub: Not used (ignore)
|
|
127
|
-
*/
|
|
128
|
-
beforeStartTest?(page: any, browser: any, context: any): Promise<void>;
|
|
129
|
-
|
|
130
|
-
/**
|
|
131
|
-
* Called before each step execution
|
|
132
|
-
* - Script Service: Update step status to IN_PROGRESS in DB
|
|
133
|
-
* - VS Extension/GitHub: Not used (ignore)
|
|
134
|
-
*/
|
|
135
|
-
beforeStepStart?(step: StepInfo, page: any): Promise<void>;
|
|
136
|
-
|
|
137
|
-
/**
|
|
138
|
-
* Called after test execution completes (success or failure)
|
|
139
|
-
* - Script Service: Write final status to DB, cleanup resources
|
|
140
|
-
* - VS Extension/GitHub: Not used (return value is sufficient)
|
|
141
|
-
*/
|
|
142
|
-
afterEndTest?(status: 'passed' | 'failed', error?: string, page?: any): Promise<void>;
|
|
143
|
-
}
|
|
144
|
-
|