testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
@@ -1,144 +0,0 @@
1
- # Coordinate Mode Diagnosis - Live Test Results
2
-
3
- ## Test Scenario: PeopleHR Employee Information Flow
4
-
5
- ### ✅ What Worked:
6
-
7
- 1. **Coordinate fallback DID activate** (after fix from >= 3 to >= 5)
8
- 2. **Agent successfully used coordinates** at (87.5%, 23.438%)
9
- 3. **Physical clicks succeeded** - page.mouse.click(1120, 169)
10
- 4. **Agent learned** to stick with coordinates after selectors failed
11
-
12
- ### ❌ What Didn't Work:
13
-
14
- **Agent hit max iterations (8) without marking "complete"**
15
-
16
- ## Detailed Step 6 Flow:
17
-
18
- ```
19
- Iteration 1: Selector attempt → Timeout ❌
20
- Iteration 2: Selector attempt → Timeout ❌
21
- Iteration 3: Selector attempt → Timeout ❌
22
- Iteration 4: 🎯 COORDINATE MODE → Click (87.5%, 23.438%) → ✅ Success
23
- Iteration 5: Repeat coordinate → ✅ Success
24
- Iteration 6: Repeat coordinate → ✅ Success (?)
25
- Iteration 7: Repeat coordinate → ✅ Success
26
- Iteration 8: Repeat coordinate → ✅ Success
27
- Result: ⚠️ Max iterations → system_limit
28
- ```
29
-
30
- ## Root Cause Analysis:
31
-
32
- ### Problem: **No Goal Verification After Coordinate Success**
33
-
34
- **With selectors:**
35
- ```typescript
36
- await page.getByRole('button').click();
37
- // Can verify: await expect(button).toHaveState('pressed')
38
- // Can check: New elements appeared, URL changed, etc.
39
- ```
40
-
41
- **With coordinates:**
42
- ```typescript
43
- await page.mouse.click(1120, 169);
44
- // ❓ Did it work? No element reference!
45
- // ❓ How to verify? Can't check button state
46
- // ❓ What changed? Need to inspect DOM/screenshot
47
- ```
48
-
49
- ### Why Agent Kept Retrying:
50
-
51
- **Agent's reasoning (iterations 5-8):**
52
- - "Coordinate click succeeded (executed without error)"
53
- - "But I don't know if goal was achieved"
54
- - "Step says 'Click on New' - did the New form open?"
55
- - "I should try again to be sure..."
56
- - → **Loops until max iterations**
57
-
58
- ## Solutions to Consider:
59
-
60
- ### Option 1: **Trust Coordinate Success** (Simple)
61
- After coordinate click succeeds:
62
- - Wait 500ms for UI response
63
- - Mark status="complete" automatically
64
- - Assume click worked (trust the coordinates)
65
-
66
- ```typescript
67
- if (coordinateAction && coordResult.allSucceeded) {
68
- await page.waitForTimeout(500); // Let UI respond
69
- return { status: 'complete', reasoning: 'Coordinate click succeeded' };
70
- }
71
- ```
72
-
73
- **Pros**: Simple, fast
74
- **Cons**: No verification of actual goal achievement
75
-
76
- ### Option 2: **Visual Verification** (Better)
77
- After coordinate click:
78
- - Wait 500ms
79
- - Take screenshot
80
- - Compare before/after
81
- - If changed → complete, else → retry with different coords
82
-
83
- ```typescript
84
- const beforeScreenshot = await page.screenshot();
85
- await page.mouse.click(x, y);
86
- await page.waitForTimeout(500);
87
- const afterScreenshot = await page.screenshot();
88
- if (screenshotsAreDifferent(before, after)) {
89
- return { status: 'complete' };
90
- }
91
- ```
92
-
93
- **Pros**: Validates something changed
94
- **Cons**: Slower, more LLM calls
95
-
96
- ### Option 3: **DOM Change Detection** (Balanced)
97
- After coordinate click:
98
- - Capture DOM snapshot before
99
- - Click coordinates
100
- - Capture DOM snapshot after
101
- - If new elements/navigation → complete
102
-
103
- ```typescript
104
- const beforeUrl = page.url();
105
- const beforeElements = await getEnhancedPageInfo(page);
106
- await page.mouse.click(x, y);
107
- await page.waitForTimeout(500);
108
- const afterUrl = page.url();
109
- const afterElements = await getEnhancedPageInfo(page);
110
-
111
- if (afterUrl !== beforeUrl || afterElements.count !== beforeElements.count) {
112
- return { status: 'complete', reasoning: 'Page state changed after coordinate click' };
113
- }
114
- ```
115
-
116
- **Pros**: Fast, objective verification
117
- **Cons**: Might miss subtle changes (modal opens without URL/element count change)
118
-
119
- ### Option 4: **Prompt Guidance** (Immediate)
120
- Update prompt to tell agent:
121
- "After coordinate click succeeds, mark status='complete' unless you can clearly verify it failed"
122
-
123
- **Pros**: No code changes
124
- **Cons**: Relies on LLM judgment
125
-
126
- ## Recommendation:
127
-
128
- **Hybrid approach:**
129
- 1. **Immediate** (Prompt): Tell agent to trust coordinate success
130
- 2. **Phase 2** (Code): Add DOM change detection for validation
131
-
132
- ## Current Status:
133
-
134
- - ✅ Coordinate fallback works technically
135
- - ✅ Physical clicks succeed
136
- - ❌ Agent doesn't know when to stop
137
- - 🔧 Need completion detection logic
138
-
139
- ## Test Results Summary:
140
-
141
- **Steps 1-5**: ✅ All completed successfully
142
- **Step 6**: ⚠️ Coordinates worked but hit max iterations (no completion detection)
143
- **Overall**: Coordinate mode is functional but needs completion logic
144
-
@@ -1,253 +0,0 @@
1
- # Credit Usage Callback Architecture
2
-
3
- ## Summary
4
- Added callback-based credit usage reporting to allow server-side integration to update DB directly without axios calls, while client-side continues using axios calls to backend API.
5
-
6
- ## Architecture
7
-
8
- ### Callback-First Approach
9
-
10
- ```typescript
11
- export interface CreditUsage {
12
- credits: number;
13
- usageReason: CreditUsageReason;
14
- jobId?: string;
15
- timestamp: number;
16
- }
17
-
18
- export type CreditUsageCallback = (usage: CreditUsage) => void | Promise<void>;
19
- ```
20
-
21
- ### Behavior
22
-
23
- ```typescript
24
- async reportCreditUsage(credits, usageReason, jobId) {
25
- const creditUsage = { credits, usageReason, jobId, timestamp: Date.now() };
26
-
27
- // 1. If callback provided: Use callback (SERVER-SIDE)
28
- if (this.creditUsageCallback) {
29
- await this.creditUsageCallback(creditUsage);
30
- return {}; // No axios call needed
31
- }
32
-
33
- // 2. No callback but auth configured: Use axios (CLIENT-SIDE)
34
- if (this.authConfig) {
35
- await axios.post(`${backend}/localagent/insert_credit_usage`, ...);
36
- return response.data;
37
- }
38
-
39
- // 3. No callback and no auth: Development mode
40
- return {};
41
- }
42
- ```
43
-
44
- ## Usage Scenarios
45
-
46
- ### Server-Side (scriptservice)
47
-
48
- ```typescript
49
- import { TestChimpService, CreditUsage, CreditUsageReason } from 'testchimp-runner-core';
50
-
51
- const service = new TestChimpService(
52
- fileHandler,
53
- undefined, // NO auth config
54
- backendUrl,
55
- maxWorkers,
56
- llmProvider,
57
- progressReporter,
58
- orchestratorOptions,
59
- async (creditUsage: CreditUsage) => {
60
- // Update DB directly - NO axios calls
61
- await db.insertCreditUsage({
62
- credits: creditUsage.credits,
63
- usageReason: creditUsage.usageReason,
64
- jobId: creditUsage.jobId,
65
- timestamp: creditUsage.timestamp
66
- });
67
- }
68
- );
69
- ```
70
-
71
- **Result:**
72
- - ✅ Callback called → DB updated directly
73
- - ❌ No axios calls (no auth configured)
74
- - ✅ Full control over DB updates
75
-
76
- ### Client-Side (vs-extension, github-action)
77
-
78
- ```typescript
79
- import { TestChimpService } from 'testchimp-runner-core';
80
-
81
- const service = new TestChimpService(
82
- fileHandler,
83
- authConfig, // Auth configured
84
- backendUrl,
85
- maxWorkers,
86
- llmProvider,
87
- progressReporter,
88
- orchestratorOptions
89
- // NO callback - uses axios
90
- );
91
- ```
92
-
93
- **Result:**
94
- - ❌ No callback provided
95
- - ✅ Axios call made to backend API (because auth configured)
96
- - ✅ Backend handles DB update
97
-
98
- ### Development Mode (local testing)
99
-
100
- ```typescript
101
- const service = new TestChimpService(
102
- fileHandler
103
- // No auth, no callback
104
- );
105
- ```
106
-
107
- **Result:**
108
- - ❌ No callback
109
- - ❌ No auth
110
- - ⚠️ Warning logged: "Credit usage not tracked"
111
- - ✅ Continues without error
112
-
113
- ## API
114
-
115
- ### Constructor
116
- ```typescript
117
- new TestChimpService(
118
- fileHandler?,
119
- authConfig?,
120
- backendUrl?,
121
- maxWorkers?,
122
- llmProvider?,
123
- progressReporter?,
124
- orchestratorOptions?,
125
- creditUsageCallback? // NEW
126
- )
127
- ```
128
-
129
- ### Method
130
- ```typescript
131
- service.setCreditUsageCallback((creditUsage) => {
132
- // Handle credit usage in your system
133
- console.log(`Used ${creditUsage.credits} credits for ${creditUsage.usageReason}`);
134
- });
135
- ```
136
-
137
- ## Exported Types
138
-
139
- ```typescript
140
- export {
141
- CreditUsageCallback, // Type for callback function
142
- CreditUsage, // Interface for usage data
143
- CreditUsageReason // Enum: SCRIPT_GENERATE, TEST_REPAIR, etc.
144
- };
145
- ```
146
-
147
- ## Benefits
148
-
149
- ### For Server-Side
150
- 1. **No Network Calls** - Direct DB updates via callback
151
- 2. **Full Control** - Custom logic for credit tracking
152
- 3. **Performance** - No HTTP round-trip overhead
153
- 4. **Reliability** - No network failures
154
-
155
- ### For Client-Side
156
- 1. **Backward Compatible** - Existing code works unchanged
157
- 2. **Centralized** - Backend API handles all credit logic
158
- 3. **Simple** - Just configure auth, no callback needed
159
-
160
- ### For Both
161
- 1. **Flexible** - Each consumer decides how to handle credits
162
- 2. **Testable** - Can mock callbacks easily
163
- 3. **Observable** - Callback provides visibility into credit usage
164
-
165
- ## Implementation Notes
166
-
167
- ### Preservation Across Service Recreations
168
-
169
- Credit callback is stored and reapplied when services are recreated:
170
-
171
- ```typescript
172
- // Store in TestChimpService
173
- private creditUsageCallback?: CreditUsageCallback;
174
-
175
- // Pass to CreditUsageService on every recreation
176
- this.creditUsageService = new CreditUsageService(
177
- this.authConfig,
178
- this.backendUrl,
179
- this.creditUsageCallback // Always preserved
180
- );
181
- ```
182
-
183
- ### Error Handling
184
-
185
- **Callback-based (server-side):**
186
- - Callback error → Throws (critical for DB updates)
187
-
188
- **Axios-based (client-side):**
189
- - Axios error → Throws (critical for credit tracking)
190
-
191
- **Development mode:**
192
- - No tracking → Logs warning, continues
193
-
194
- ## Example: Server-Side Integration
195
-
196
- ```typescript
197
- // In scriptservice
198
- import { TestChimpService, CreditUsage, CreditUsageReason } from 'testchimp-runner-core';
199
-
200
- class ScriptService {
201
- private testChimpService: TestChimpService;
202
-
203
- constructor() {
204
- this.testChimpService = new TestChimpService(
205
- new CustomFileHandler(),
206
- undefined, // No auth - server-side doesn't need it
207
- 'http://localhost:3000', // Internal backend URL
208
- 5,
209
- new CustomLLMProvider(), // Server has its own LLM provider
210
- customProgressReporter,
211
- { useOrchestrator: true },
212
- async (creditUsage: CreditUsage) => {
213
- // Direct DB update - no HTTP calls
214
- await this.creditRepository.insert({
215
- userId: this.getCurrentUserId(),
216
- credits: creditUsage.credits,
217
- reason: creditUsage.usageReason,
218
- jobId: creditUsage.jobId,
219
- timestamp: new Date(creditUsage.timestamp)
220
- });
221
- }
222
- );
223
- }
224
- }
225
- ```
226
-
227
- ## Files Modified
228
-
229
- 1. `/src/credit-usage-service.ts` - Added callback support, callback-first logic
230
- 2. `/src/index.ts` - Accept credit callback in constructor, expose `setCreditUsageCallback()`, preserve across recreations
231
- 3. Exported types: `CreditUsage`, `CreditUsageCallback`, `CreditUsageReason`
232
-
233
- ## Testing
234
-
235
- ### Server-Side
236
- 1. Set credit callback
237
- 2. Generate script
238
- 3. Verify callback called with correct credit data
239
- 4. Verify NO axios calls made
240
-
241
- ### Client-Side
242
- 1. Configure auth (no callback)
243
- 2. Generate script
244
- 3. Verify axios call made to backend
245
- 4. Verify backend receives credit data
246
-
247
- ## Backward Compatibility
248
-
249
- ✅ Fully backward compatible
250
- - Existing consumers work unchanged
251
- - Optional callback parameter
252
- - Existing axios behavior preserved for client-side
253
-