testchimp-runner-core 0.0.35 โ†’ 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  2. package/dist/orchestrator/orchestrator-agent.js +7 -4
  3. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  4. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  5. package/dist/orchestrator/orchestrator-prompts.js +73 -15
  6. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  7. package/dist/orchestrator/page-som-handler.d.ts +1 -2
  8. package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
  9. package/dist/orchestrator/page-som-handler.js +51 -25
  10. package/dist/orchestrator/page-som-handler.js.map +1 -1
  11. package/package.json +6 -1
  12. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  13. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  14. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  15. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  16. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  17. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  18. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  19. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  20. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  21. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  22. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  23. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  24. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  25. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  26. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  27. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  28. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  29. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  30. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  31. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  32. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  33. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  34. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  35. package/plandocs/exploratory-mode-support-v2.plan.md +0 -953
  36. package/plandocs/exploratory-mode-support.plan.md +0 -928
  37. package/plandocs/journey-id-tracking-addendum.md +0 -227
  38. package/releasenotes/RELEASE_0.0.26.md +0 -165
  39. package/releasenotes/RELEASE_0.0.27.md +0 -236
  40. package/releasenotes/RELEASE_0.0.28.md +0 -286
  41. package/src/auth-config.ts +0 -84
  42. package/src/credit-usage-service.ts +0 -188
  43. package/src/env-loader.ts +0 -103
  44. package/src/execution-service.ts +0 -996
  45. package/src/file-handler.ts +0 -104
  46. package/src/index.ts +0 -432
  47. package/src/llm-facade.ts +0 -821
  48. package/src/llm-provider.ts +0 -53
  49. package/src/model-constants.ts +0 -35
  50. package/src/orchestrator/decision-parser.ts +0 -139
  51. package/src/orchestrator/index.ts +0 -58
  52. package/src/orchestrator/orchestrator-agent.ts +0 -1282
  53. package/src/orchestrator/orchestrator-prompts.ts +0 -786
  54. package/src/orchestrator/page-som-handler.ts +0 -1565
  55. package/src/orchestrator/som-types.ts +0 -188
  56. package/src/orchestrator/tool-registry.ts +0 -184
  57. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  58. package/src/orchestrator/tools/extract-data.ts +0 -92
  59. package/src/orchestrator/tools/index.ts +0 -15
  60. package/src/orchestrator/tools/inspect-page.ts +0 -42
  61. package/src/orchestrator/tools/recall-history.ts +0 -72
  62. package/src/orchestrator/tools/refresh-som-markers.ts +0 -69
  63. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  64. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  65. package/src/orchestrator/tools/view-previous-screenshot.ts +0 -103
  66. package/src/orchestrator/types.ts +0 -291
  67. package/src/playwright-mcp-service.ts +0 -224
  68. package/src/progress-reporter.ts +0 -144
  69. package/src/prompts.ts +0 -842
  70. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  71. package/src/providers/local-llm-provider.ts +0 -38
  72. package/src/scenario-service.ts +0 -252
  73. package/src/scenario-worker-class.ts +0 -1110
  74. package/src/script-utils.ts +0 -203
  75. package/src/types.ts +0 -239
  76. package/src/utils/browser-utils.ts +0 -348
  77. package/src/utils/coordinate-converter.ts +0 -162
  78. package/src/utils/page-info-retry.ts +0 -65
  79. package/src/utils/page-info-utils.ts +0 -285
  80. package/testchimp-runner-core-0.0.35.tgz +0 -0
  81. package/tsconfig.json +0 -19
@@ -1,53 +0,0 @@
1
- /**
2
- * LLM Provider Interface
3
- * Abstraction for LLM calls - allows different implementations (backend proxy, local, etc.)
4
- */
5
-
6
- /**
7
- * Labeled image for multi-image LLM requests
8
- * Used for before/after comparisons or multi-screenshot analysis
9
- */
10
- export interface LabeledImage {
11
- label: string; // e.g., "Before", "After", "Screenshot 1"
12
- dataUrl: string; // Base64 data URL (data:image/png;base64,...)
13
- }
14
-
15
- /**
16
- * LLM request - camelCase for TypeScript
17
- */
18
- export interface LLMRequest {
19
- model?: string;
20
- systemPrompt?: string;
21
- userPrompt?: string;
22
- imageUrl?: string; // For vision models (data URL) - backward compatible, single image
23
- images?: LabeledImage[]; // For multi-image requests (e.g., before/after comparison)
24
- }
25
-
26
- /**
27
- * LLM response
28
- */
29
- export interface LLMResponse {
30
- answer: string;
31
- usage?: {
32
- inputTokens: number;
33
- outputTokens: number;
34
- };
35
- }
36
-
37
- /**
38
- * Abstract LLM provider interface
39
- * Implementations can call backend proxy, local LLM, or any other service
40
- */
41
- export interface LLMProvider {
42
- /**
43
- * Call LLM with given request
44
- * Implementations handle authentication, API calls, retries, etc.
45
- */
46
- callLLM(request: LLMRequest): Promise<LLMResponse>;
47
-
48
- /**
49
- * Optional: Set logger for LLM provider to log its activities
50
- */
51
- setLogger?(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void;
52
- }
53
-
@@ -1,35 +0,0 @@
1
- /**
2
- * Model constants for LLM interactions
3
- *
4
- * Centralized model configuration to make it easy to update models across the codebase.
5
- */
6
-
7
- /**
8
- * Default model for complex reasoning tasks:
9
- * - Command generation (Playwright selectors)
10
- * - Goal completion checks (critical decisions)
11
- * - Repair suggestions (complex logic)
12
- * - Agent orchestration (decision-making)
13
- */
14
- export const DEFAULT_MODEL = 'gpt-5-mini';
15
-
16
- /**
17
- * Simpler model for structured/guided tasks:
18
- * - Scenario breakdown (structured splitting)
19
- * - Screenshot need assessment (yes/no decision)
20
- * - Repair confidence assessment (comparison task)
21
- * - Test name generation (text formatting)
22
- * - Hashtag generation (semantic tags)
23
- * - Script parsing (extract steps)
24
- * - Final script merging (text concatenation)
25
- */
26
- export const DEFAULT_SIMPLER_MODEL = 'gpt-4o-mini';
27
-
28
- /**
29
- * Vision-capable model for visual diagnostics:
30
- * - Screenshot analysis
31
- * - Visual element identification
32
- * - UI blocker detection
33
- */
34
- export const VISION_MODEL = 'gpt-5-mini';
35
-
@@ -1,139 +0,0 @@
1
- /**
2
- * Decision Parser Utility
3
- * Parses and validates agent decisions from LLM responses
4
- */
5
-
6
- import { AgentDecision } from './types';
7
- import { isSomVerification, isSomCommand } from './som-types';
8
-
9
- export class DecisionParser {
10
- private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
11
-
12
- constructor(logger?: (message: string, level?: 'log' | 'error' | 'warn') => void) {
13
- this.logger = logger;
14
- }
15
-
16
- /**
17
- * Parse agent decision from LLM response
18
- */
19
- parse(response: string): AgentDecision {
20
- try {
21
- // Extract JSON from response
22
- const jsonMatch = response.match(/\{[\s\S]*\}/);
23
- if (!jsonMatch) {
24
- this.logger?.(`[DecisionParser] โœ— No JSON found in LLM response`, 'error');
25
- this.logger?.(`[DecisionParser] ๐Ÿ“„ FULL LLM RESPONSE:\n${response}`, 'error');
26
- throw new Error('No JSON found in response');
27
- }
28
-
29
- const parsed = JSON.parse(jsonMatch[0]);
30
-
31
- // Validate required fields
32
- // Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
33
- if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
34
- this.logger?.(`[DecisionParser] โœ— Missing required fields in parsed JSON`, 'error');
35
- this.logger?.(`[DecisionParser] ๐Ÿ“„ FULL LLM RESPONSE:\n${response}`, 'error');
36
- this.logger?.(`[DecisionParser] ๐Ÿ“„ PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
37
- this.logger?.(`[DecisionParser] โŒ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
38
- throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
39
- }
40
-
41
- // Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
42
- if (!parsed.reasoning && parsed.statusReasoning) {
43
- parsed.reasoning = parsed.statusReasoning;
44
- }
45
-
46
- return parsed as AgentDecision;
47
-
48
- } catch (error: any) {
49
- this.logger?.(`[DecisionParser] โœ— Failed to parse agent decision: ${error.message}`, 'error');
50
-
51
- // Only log full response if not already logged above
52
- if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
53
- this.logger?.(`[DecisionParser] ๐Ÿ“„ FULL LLM RESPONSE:\n${response}`, 'error');
54
- }
55
-
56
- // Return fallback
57
- return {
58
- status: 'stuck',
59
- statusReasoning: 'Failed to parse agent response',
60
- reasoning: `Parse error: ${error.message}`
61
- };
62
- }
63
- }
64
-
65
- /**
66
- * Log agent decision for debugging
67
- */
68
- log(decision: AgentDecision, iteration: number): void {
69
- this.logger?.(`[Orchestrator] ๐Ÿ’ญ REASONING: ${decision.reasoning}`);
70
-
71
- if (decision.selfReflection) {
72
- this.logger?.(`[Orchestrator] ๐Ÿง  SELF-REFLECTION:`);
73
- this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
74
- if (decision.selfReflection.detectingLoop) {
75
- this.logger?.(`[Orchestrator] ๐Ÿ”„ LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
76
- }
77
- }
78
-
79
- if (decision.toolCalls && decision.toolCalls.length > 0) {
80
- this.logger?.(`[Orchestrator] ๐Ÿ”ง TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
81
- if (decision.toolReasoning) {
82
- this.logger?.(`[Orchestrator] ๐Ÿ“‹ Why: ${decision.toolReasoning}`);
83
- }
84
- }
85
-
86
- if (decision.blockerDetected) {
87
- this.logger?.(`[Orchestrator] ๐Ÿšง BLOCKER: ${decision.blockerDetected.description}`, 'warn');
88
- this.logger?.(`[Orchestrator] ๐Ÿงน Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
89
- }
90
-
91
- if (decision.stepReEvaluation?.detected) {
92
- this.logger?.(`[Orchestrator] ๐Ÿ” STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
93
- this.logger?.(`[Orchestrator] ๐Ÿ“ Explanation: ${decision.stepReEvaluation.explanation}`);
94
- }
95
-
96
- if (decision.commands && decision.commands.length > 0) {
97
- this.logger?.(`[Orchestrator] ๐Ÿ“ COMMANDS (${decision.commands.length}):`);
98
- decision.commands.slice(0, 3).forEach((cmd: any, i) => {
99
- // Handle string commands (standard mode), SomCommand, and SomVerification objects
100
- if (typeof cmd === 'string') {
101
- this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
102
- } else if (isSomVerification(cmd)) {
103
- const ref = cmd.elementRef || cmd.selector || 'unknown';
104
- this.logger?.(`[Orchestrator] ${i + 1}. [VERIFY] ${cmd.verificationType} on ${ref}: ${cmd.expected}`);
105
- } else if (isSomCommand(cmd)) {
106
- this.logger?.(`[Orchestrator] ${i + 1}. [ACTION] ${cmd.action} on ${cmd.elementRef || 'coord'}`);
107
- } else {
108
- this.logger?.(`[Orchestrator] ${i + 1}. ${JSON.stringify(cmd).substring(0, 80)}...`);
109
- }
110
- });
111
- if (decision.commands.length > 3) {
112
- this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
113
- }
114
- if (decision.commandReasoning) {
115
- this.logger?.(`[Orchestrator] ๐Ÿ’ก Why: ${decision.commandReasoning}`);
116
- }
117
- }
118
-
119
- // Log meta-learning suggestions with clear demarcations
120
- if (decision.debugInfo?.suggestedPromptUpdates) {
121
- this.logger?.(``, 'warn');
122
- this.logger?.(`โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•`, 'warn');
123
- this.logger?.(`[Orchestrator] ๐Ÿง  META-LEARNING SUGGESTION FROM AGENT`, 'warn');
124
- this.logger?.(`โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•`, 'warn');
125
- this.logger?.(``, 'warn');
126
- this.logger?.(`SUGGESTED PROMPT UPDATE:`, 'warn');
127
- this.logger?.(` ${decision.debugInfo.suggestedPromptUpdates}`, 'warn');
128
- this.logger?.(``, 'warn');
129
- if (decision.debugInfo.reasoning) {
130
- this.logger?.(`REASONING:`, 'warn');
131
- this.logger?.(` ${decision.debugInfo.reasoning}`, 'warn');
132
- this.logger?.(``, 'warn');
133
- }
134
- this.logger?.(`โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•`, 'warn');
135
- this.logger?.(``, 'warn');
136
- }
137
- }
138
- }
139
-
@@ -1,58 +0,0 @@
1
- /**
2
- * Orchestrator Module Exports
3
- */
4
-
5
- // Core orchestrator
6
- export { OrchestratorAgent } from './orchestrator-agent';
7
-
8
- // SoM (Set-of-Marks) handler
9
- export { PageSoMHandler } from './page-som-handler';
10
-
11
- // Tool system
12
- export { ToolRegistry, Tool, ToolParameter, ToolExecutionContext } from './tool-registry';
13
-
14
- // 8 information-gathering tools (state changes via Playwright commands)
15
- export {
16
- TakeScreenshotTool,
17
- ViewPreviousScreenshotTool,
18
- RefreshSomMarkersTool,
19
- RecallHistoryTool,
20
- InspectPageTool,
21
- CheckPageReadyTool,
22
- ExtractDataTool,
23
- VerifyActionResultTool
24
- } from './tools';
25
-
26
- // Types
27
- export {
28
- AgentConfig,
29
- AgentContext,
30
- AgentDecision,
31
- JourneyMemory,
32
- MemoryStep,
33
- OrchestratorStepResult,
34
- SelfReflection,
35
- ToolCall,
36
- ToolResult,
37
- ExplorationMode,
38
- DEFAULT_AGENT_CONFIG
39
- } from './types';
40
-
41
- // SoM Types
42
- export {
43
- SomCommand,
44
- SomElement,
45
- SemanticCommandResult,
46
- CommandRunStatus,
47
- CommandAttempt,
48
- DomMutation,
49
- InteractionAction,
50
- Coordinate,
51
- TypedSelector,
52
- SomVerification,
53
- VerificationType,
54
- SomCommandOrVerification,
55
- isSomVerification,
56
- isSomCommand
57
- } from './som-types';
58
-