@superblocksteam/vite-plugin-file-sync 2.0.67 → 2.0.68-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/dist/ai-service/agent/tools/apis/analysis.d.ts.map +1 -1
  2. package/dist/ai-service/agent/tools/apis/analysis.js +4 -0
  3. package/dist/ai-service/agent/tools/apis/analysis.js.map +1 -1
  4. package/dist/ai-service/agent/tools/apis/api-executor.d.ts +9 -1
  5. package/dist/ai-service/agent/tools/apis/api-executor.d.ts.map +1 -1
  6. package/dist/ai-service/agent/tools/apis/api-executor.js +4 -1
  7. package/dist/ai-service/agent/tools/apis/api-executor.js.map +1 -1
  8. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts +1 -0
  9. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts.map +1 -1
  10. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js +1 -1
  11. package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js.map +1 -1
  12. package/dist/ai-service/agent/tools/apis/test-api.d.ts +5 -0
  13. package/dist/ai-service/agent/tools/apis/test-api.d.ts.map +1 -1
  14. package/dist/ai-service/agent/tools/apis/test-api.js +37 -18
  15. package/dist/ai-service/agent/tools/apis/test-api.js.map +1 -1
  16. package/dist/ai-service/agent/tools/build-delete-file.d.ts.map +1 -1
  17. package/dist/ai-service/agent/tools/build-delete-file.js +29 -0
  18. package/dist/ai-service/agent/tools/build-delete-file.js.map +1 -1
  19. package/dist/ai-service/agent/tools/integrations/execute-request.d.ts.map +1 -1
  20. package/dist/ai-service/agent/tools/integrations/execute-request.js +5 -4
  21. package/dist/ai-service/agent/tools/integrations/execute-request.js.map +1 -1
  22. package/dist/ai-service/agent/tools.d.ts.map +1 -1
  23. package/dist/ai-service/agent/tools.js +17 -6
  24. package/dist/ai-service/agent/tools.js.map +1 -1
  25. package/dist/ai-service/agent/tools2/entity-permissions.d.ts +23 -20
  26. package/dist/ai-service/agent/tools2/entity-permissions.d.ts.map +1 -1
  27. package/dist/ai-service/agent/tools2/entity-permissions.js +15 -11
  28. package/dist/ai-service/agent/tools2/entity-permissions.js.map +1 -1
  29. package/dist/ai-service/agent/tools2/example.js +2 -2
  30. package/dist/ai-service/agent/tools2/example.js.map +1 -1
  31. package/dist/ai-service/agent/tools2/index.d.ts +1 -1
  32. package/dist/ai-service/agent/tools2/index.d.ts.map +1 -1
  33. package/dist/ai-service/agent/tools2/index.js +1 -1
  34. package/dist/ai-service/agent/tools2/index.js.map +1 -1
  35. package/dist/ai-service/agent/tools2/registry.d.ts +4 -4
  36. package/dist/ai-service/agent/tools2/registry.d.ts.map +1 -1
  37. package/dist/ai-service/agent/tools2/registry.js +42 -29
  38. package/dist/ai-service/agent/tools2/registry.js.map +1 -1
  39. package/dist/ai-service/agent/tools2/tools/read.d.ts +1 -1
  40. package/dist/ai-service/agent/tools2/types.d.ts +36 -15
  41. package/dist/ai-service/agent/tools2/types.d.ts.map +1 -1
  42. package/dist/ai-service/agent/tools2/types.js.map +1 -1
  43. package/dist/ai-service/chat/chat-session-store.d.ts +5 -7
  44. package/dist/ai-service/chat/chat-session-store.d.ts.map +1 -1
  45. package/dist/ai-service/chat/chat-session-store.js +36 -17
  46. package/dist/ai-service/chat/chat-session-store.js.map +1 -1
  47. package/dist/ai-service/chat/extract-history.d.ts +0 -85
  48. package/dist/ai-service/chat/extract-history.d.ts.map +1 -1
  49. package/dist/ai-service/chat/extract-history.js +3 -239
  50. package/dist/ai-service/chat/extract-history.js.map +1 -1
  51. package/dist/ai-service/index.d.ts +26 -4
  52. package/dist/ai-service/index.d.ts.map +1 -1
  53. package/dist/ai-service/index.js +125 -4
  54. package/dist/ai-service/index.js.map +1 -1
  55. package/dist/ai-service/judge/debug-browser.d.ts +8 -0
  56. package/dist/ai-service/judge/debug-browser.d.ts.map +1 -0
  57. package/dist/ai-service/judge/debug-browser.js +79 -0
  58. package/dist/ai-service/judge/debug-browser.js.map +1 -0
  59. package/dist/ai-service/judge/index.d.ts +12 -0
  60. package/dist/ai-service/judge/index.d.ts.map +1 -0
  61. package/dist/ai-service/judge/index.js +11 -0
  62. package/dist/ai-service/judge/index.js.map +1 -0
  63. package/dist/ai-service/judge/integration/mcp-client.d.ts +82 -0
  64. package/dist/ai-service/judge/integration/mcp-client.d.ts.map +1 -0
  65. package/dist/ai-service/judge/integration/mcp-client.js +276 -0
  66. package/dist/ai-service/judge/integration/mcp-client.js.map +1 -0
  67. package/dist/ai-service/judge/integration/playwright-bridge.d.ts +142 -0
  68. package/dist/ai-service/judge/integration/playwright-bridge.d.ts.map +1 -0
  69. package/dist/ai-service/judge/integration/playwright-bridge.js +217 -0
  70. package/dist/ai-service/judge/integration/playwright-bridge.js.map +1 -0
  71. package/dist/ai-service/judge/judge-eval-http.d.ts +3 -0
  72. package/dist/ai-service/judge/judge-eval-http.d.ts.map +1 -0
  73. package/dist/ai-service/judge/judge-eval-http.js +541 -0
  74. package/dist/ai-service/judge/judge-eval-http.js.map +1 -0
  75. package/dist/ai-service/judge/judge-eval-service-runner.d.ts +35 -0
  76. package/dist/ai-service/judge/judge-eval-service-runner.d.ts.map +1 -0
  77. package/dist/ai-service/judge/judge-eval-service-runner.js +124 -0
  78. package/dist/ai-service/judge/judge-eval-service-runner.js.map +1 -0
  79. package/dist/ai-service/judge/judge-executor.d.ts +65 -0
  80. package/dist/ai-service/judge/judge-executor.d.ts.map +1 -0
  81. package/dist/ai-service/judge/judge-executor.js +334 -0
  82. package/dist/ai-service/judge/judge-executor.js.map +1 -0
  83. package/dist/ai-service/judge/judge-service.d.ts +161 -0
  84. package/dist/ai-service/judge/judge-service.d.ts.map +1 -0
  85. package/dist/ai-service/judge/judge-service.js +241 -0
  86. package/dist/ai-service/judge/judge-service.js.map +1 -0
  87. package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts +37 -0
  88. package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts.map +1 -0
  89. package/dist/ai-service/judge/prompts/evaluation-criteria.js +283 -0
  90. package/dist/ai-service/judge/prompts/evaluation-criteria.js.map +1 -0
  91. package/dist/ai-service/judge/prompts/system-prompt.d.ts +30 -0
  92. package/dist/ai-service/judge/prompts/system-prompt.d.ts.map +1 -0
  93. package/dist/ai-service/judge/prompts/system-prompt.js +212 -0
  94. package/dist/ai-service/judge/prompts/system-prompt.js.map +1 -0
  95. package/dist/ai-service/judge/storage/csv-storage.d.ts +99 -0
  96. package/dist/ai-service/judge/storage/csv-storage.d.ts.map +1 -0
  97. package/dist/ai-service/judge/storage/csv-storage.js +274 -0
  98. package/dist/ai-service/judge/storage/csv-storage.js.map +1 -0
  99. package/dist/ai-service/judge/storage/index.d.ts +9 -0
  100. package/dist/ai-service/judge/storage/index.d.ts.map +1 -0
  101. package/dist/ai-service/judge/storage/index.js +7 -0
  102. package/dist/ai-service/judge/storage/index.js.map +1 -0
  103. package/dist/ai-service/judge/storage/interface.d.ts +51 -0
  104. package/dist/ai-service/judge/storage/interface.d.ts.map +1 -0
  105. package/dist/ai-service/judge/storage/interface.js +8 -0
  106. package/dist/ai-service/judge/storage/interface.js.map +1 -0
  107. package/dist/ai-service/judge/storage/types.d.ts +54 -0
  108. package/dist/ai-service/judge/storage/types.d.ts.map +1 -0
  109. package/dist/ai-service/judge/storage/types.js +7 -0
  110. package/dist/ai-service/judge/storage/types.js.map +1 -0
  111. package/dist/ai-service/judge/tools/index.d.ts +22 -0
  112. package/dist/ai-service/judge/tools/index.d.ts.map +1 -0
  113. package/dist/ai-service/judge/tools/index.js +29 -0
  114. package/dist/ai-service/judge/tools/index.js.map +1 -0
  115. package/dist/ai-service/judge/tools/playwright-action.d.ts +18 -0
  116. package/dist/ai-service/judge/tools/playwright-action.d.ts.map +1 -0
  117. package/dist/ai-service/judge/tools/playwright-action.js +171 -0
  118. package/dist/ai-service/judge/tools/playwright-action.js.map +1 -0
  119. package/dist/ai-service/judge/tools/submit-feedback.d.ts +41 -0
  120. package/dist/ai-service/judge/tools/submit-feedback.d.ts.map +1 -0
  121. package/dist/ai-service/judge/tools/submit-feedback.js +150 -0
  122. package/dist/ai-service/judge/tools/submit-feedback.js.map +1 -0
  123. package/dist/ai-service/judge/types.d.ts +169 -0
  124. package/dist/ai-service/judge/types.d.ts.map +1 -0
  125. package/dist/ai-service/judge/types.js +8 -0
  126. package/dist/ai-service/judge/types.js.map +1 -0
  127. package/dist/ai-service/llm/context/constants.d.ts +7 -0
  128. package/dist/ai-service/llm/context/constants.d.ts.map +1 -1
  129. package/dist/ai-service/llm/context/constants.js +7 -0
  130. package/dist/ai-service/llm/context/constants.js.map +1 -1
  131. package/dist/ai-service/llm/context/context.d.ts +8 -1
  132. package/dist/ai-service/llm/context/context.d.ts.map +1 -1
  133. package/dist/ai-service/llm/context/context.js +47 -12
  134. package/dist/ai-service/llm/context/context.js.map +1 -1
  135. package/dist/ai-service/llm/context/internal-types.d.ts +1 -0
  136. package/dist/ai-service/llm/context/internal-types.d.ts.map +1 -1
  137. package/dist/ai-service/llm/context/internal-types.js.map +1 -1
  138. package/dist/ai-service/llm/context/manager.d.ts +2 -1
  139. package/dist/ai-service/llm/context/manager.d.ts.map +1 -1
  140. package/dist/ai-service/llm/context/manager.js +2 -1
  141. package/dist/ai-service/llm/context/manager.js.map +1 -1
  142. package/dist/ai-service/llm/context/utils/message-utils.d.ts +10 -0
  143. package/dist/ai-service/llm/context/utils/message-utils.d.ts.map +1 -1
  144. package/dist/ai-service/llm/context/utils/message-utils.js +74 -0
  145. package/dist/ai-service/llm/context/utils/message-utils.js.map +1 -1
  146. package/dist/ai-service/llm/error.d.ts +1 -1
  147. package/dist/ai-service/llm/interaction/adapters/vercel.d.ts.map +1 -1
  148. package/dist/ai-service/llm/interaction/adapters/vercel.js.map +1 -1
  149. package/dist/ai-service/llm/interaction/provider.d.ts +10 -9
  150. package/dist/ai-service/llm/interaction/provider.d.ts.map +1 -1
  151. package/dist/ai-service/llmobs/middleware/stream-text.d.ts +8 -8
  152. package/dist/ai-service/llmobs/middleware/stream-text.d.ts.map +1 -1
  153. package/dist/ai-service/llmobs/middleware/stream-text.js.map +1 -1
  154. package/dist/ai-service/llmobs/tracer.d.ts.map +1 -1
  155. package/dist/ai-service/llmobs/tracer.js +2 -1
  156. package/dist/ai-service/llmobs/tracer.js.map +1 -1
  157. package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts +53 -0
  158. package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts.map +1 -0
  159. package/dist/ai-service/mcp/embedded-playwright-mcp-server.js +541 -0
  160. package/dist/ai-service/mcp/embedded-playwright-mcp-server.js.map +1 -0
  161. package/dist/ai-service/mcp/playwright-server.d.ts +114 -0
  162. package/dist/ai-service/mcp/playwright-server.d.ts.map +1 -0
  163. package/dist/ai-service/mcp/playwright-server.js +109 -0
  164. package/dist/ai-service/mcp/playwright-server.js.map +1 -0
  165. package/dist/ai-service/state-machine/clark-fsm.d.ts +4 -1
  166. package/dist/ai-service/state-machine/clark-fsm.d.ts.map +1 -1
  167. package/dist/ai-service/state-machine/clark-fsm.js +3 -1
  168. package/dist/ai-service/state-machine/clark-fsm.js.map +1 -1
  169. package/dist/ai-service/state-machine/handlers/idle.d.ts.map +1 -1
  170. package/dist/ai-service/state-machine/handlers/idle.js +3 -1
  171. package/dist/ai-service/state-machine/handlers/idle.js.map +1 -1
  172. package/dist/ai-service/state-machine/handlers/runtime-reviewing.d.ts.map +1 -1
  173. package/dist/ai-service/state-machine/handlers/runtime-reviewing.js +4 -1
  174. package/dist/ai-service/state-machine/handlers/runtime-reviewing.js.map +1 -1
  175. package/dist/ai-service/state-machine/helpers/context-id.d.ts +1 -1
  176. package/dist/ai-service/state-machine/helpers/context-id.d.ts.map +1 -1
  177. package/dist/ai-service/state-machine/helpers/context-id.js +6 -7
  178. package/dist/ai-service/state-machine/helpers/context-id.js.map +1 -1
  179. package/dist/ai-service/state-machine/mocks.d.ts +1 -0
  180. package/dist/ai-service/state-machine/mocks.d.ts.map +1 -1
  181. package/dist/ai-service/state-machine/mocks.js +5 -1
  182. package/dist/ai-service/state-machine/mocks.js.map +1 -1
  183. package/dist/server-rpc/client.js +1 -1
  184. package/dist/server-rpc/client.js.map +1 -1
  185. package/dist/socket-manager.d.ts.map +1 -1
  186. package/dist/socket-manager.js +26 -6
  187. package/dist/socket-manager.js.map +1 -1
  188. package/dist/sync-service/index.d.ts +5 -0
  189. package/dist/sync-service/index.d.ts.map +1 -1
  190. package/dist/sync-service/index.js +13 -1
  191. package/dist/sync-service/index.js.map +1 -1
  192. package/package.json +7 -6
@@ -0,0 +1,283 @@
1
+ /**
2
+ * Evaluation criteria builder for judge assessments.
3
+ *
4
+ * Provides utilities for creating structured evaluation criteria
5
+ * based on simulation prompts and complexity levels.
6
+ */
7
+ /**
8
+ * Builds evaluation criteria from a simulation prompt.
9
+ *
10
+ * Analyzes the prompt structure and content to generate
11
+ * appropriate evaluation criteria for the judge.
12
+ *
13
+ * @param prompt - Simulation prompt object
14
+ * @returns Structured evaluation criteria
15
+ */
16
+ export function buildCriteriaFromPrompt(prompt) {
17
+ const criteria = {
18
+ functionalRequirements: [],
19
+ uiRequirements: [],
20
+ dataRequirements: [],
21
+ performanceRequirements: [],
22
+ customCriteria: [],
23
+ };
24
+ // Combine all prompt text for analysis
25
+ const fullText = [prompt.name, prompt.description, ...prompt.prompts]
26
+ .join(" ")
27
+ .toLowerCase();
28
+ // Extract functional requirements
29
+ criteria.functionalRequirements = extractFunctionalRequirements(fullText, prompt.complexity);
30
+ // Extract UI requirements
31
+ criteria.uiRequirements = extractUIRequirements(fullText, prompt.complexity);
32
+ // Extract data requirements
33
+ criteria.dataRequirements = extractDataRequirements(fullText, prompt.complexity);
34
+ // Add performance requirements based on complexity
35
+ if (prompt.complexity === "high" || prompt.complexity === "medium") {
36
+ criteria.performanceRequirements = extractPerformanceRequirements(prompt.complexity);
37
+ }
38
+ // Add custom criteria based on specific prompt patterns
39
+ criteria.customCriteria = extractCustomCriteria(prompt);
40
+ return criteria;
41
+ }
42
+ /**
43
+ * Extracts functional requirements from prompt text.
44
+ *
45
+ * @param text - Combined prompt text
46
+ * @param complexity - Task complexity
47
+ * @returns Array of functional requirements
48
+ */
49
+ function extractFunctionalRequirements(text, complexity) {
50
+ const requirements = [];
51
+ // CRUD operations
52
+ if (text.includes("create") || text.includes("add") || text.includes("new")) {
53
+ requirements.push("Ability to create new records with proper validation");
54
+ }
55
+ if (text.includes("read") ||
56
+ text.includes("view") ||
57
+ text.includes("display")) {
58
+ requirements.push("Ability to view and read existing data");
59
+ }
60
+ if (text.includes("update") ||
61
+ text.includes("edit") ||
62
+ text.includes("modify")) {
63
+ requirements.push("Ability to update existing records");
64
+ }
65
+ if (text.includes("delete") || text.includes("remove")) {
66
+ requirements.push("Ability to delete records with confirmation");
67
+ }
68
+ // Search and filter
69
+ if (text.includes("search") || text.includes("find")) {
70
+ requirements.push("Search functionality works correctly");
71
+ }
72
+ if (text.includes("filter") || text.includes("sort")) {
73
+ requirements.push("Filter and sort capabilities function properly");
74
+ }
75
+ // Forms and validation
76
+ if (text.includes("form") || text.includes("input")) {
77
+ requirements.push("Forms include appropriate validation");
78
+ requirements.push("Error messages are clear and helpful");
79
+ }
80
+ // Authentication
81
+ if (text.includes("auth") ||
82
+ text.includes("login") ||
83
+ text.includes("user")) {
84
+ requirements.push("Authentication flow works correctly");
85
+ requirements.push("User sessions are properly managed");
86
+ }
87
+ // Workflow
88
+ if (text.includes("workflow") ||
89
+ text.includes("process") ||
90
+ text.includes("step")) {
91
+ requirements.push("Multi-step workflows progress correctly");
92
+ requirements.push("State is maintained between steps");
93
+ }
94
+ // Data operations
95
+ if (text.includes("import") || text.includes("export")) {
96
+ requirements.push("Data import/export functionality works");
97
+ }
98
+ if (text.includes("report") || text.includes("analytics")) {
99
+ requirements.push("Reports display accurate data");
100
+ requirements.push("Analytics calculations are correct");
101
+ }
102
+ // Notifications
103
+ if (text.includes("notify") ||
104
+ text.includes("alert") ||
105
+ text.includes("email")) {
106
+ requirements.push("Notifications are triggered appropriately");
107
+ }
108
+ // Add complexity-based requirements
109
+ if (complexity === "high") {
110
+ requirements.push("Complex business logic is correctly implemented");
111
+ requirements.push("Edge cases are properly handled");
112
+ }
113
+ // Ensure minimum requirements
114
+ if (requirements.length === 0) {
115
+ requirements.push("Core functionality works as described");
116
+ requirements.push("Application completes primary use case");
117
+ }
118
+ return requirements;
119
+ }
120
+ /**
121
+ * Extracts UI/UX requirements from prompt text.
122
+ *
123
+ * @param text - Combined prompt text
124
+ * @param complexity - Task complexity
125
+ * @returns Array of UI requirements
126
+ */
127
+ function extractUIRequirements(text, complexity) {
128
+ const requirements = [];
129
+ // Basic UI requirements
130
+ requirements.push("Interface is clean and organized");
131
+ requirements.push("Navigation is intuitive");
132
+ // Specific UI elements
133
+ if (text.includes("table") || text.includes("grid")) {
134
+ requirements.push("Tables/grids display data clearly");
135
+ requirements.push("Column headers are descriptive");
136
+ }
137
+ if (text.includes("chart") ||
138
+ text.includes("graph") ||
139
+ text.includes("visual")) {
140
+ requirements.push("Data visualizations are clear and accurate");
141
+ }
142
+ if (text.includes("modal") || text.includes("dialog")) {
143
+ requirements.push("Modals/dialogs work correctly");
144
+ }
145
+ if (text.includes("tab") || text.includes("accordion")) {
146
+ requirements.push("Tab/accordion navigation works properly");
147
+ }
148
+ // Responsive design
149
+ if (text.includes("responsive") || text.includes("mobile")) {
150
+ requirements.push("UI is responsive to different screen sizes");
151
+ }
152
+ // Accessibility
153
+ if (complexity === "high") {
154
+ requirements.push("UI elements have proper labels for accessibility");
155
+ }
156
+ // Feedback
157
+ requirements.push("User actions provide appropriate feedback");
158
+ if (text.includes("loading") || text.includes("spinner")) {
159
+ requirements.push("Loading states are shown during operations");
160
+ }
161
+ // Error handling
162
+ requirements.push("Error states are clearly communicated");
163
+ return requirements;
164
+ }
165
+ /**
166
+ * Extracts data integration requirements from prompt text.
167
+ *
168
+ * @param text - Combined prompt text
169
+ * @param complexity - Task complexity
170
+ * @returns Array of data requirements
171
+ */
172
+ function extractDataRequirements(text, complexity) {
173
+ const requirements = [];
174
+ // API integrations
175
+ if (text.includes("api") ||
176
+ text.includes("rest") ||
177
+ text.includes("graphql")) {
178
+ requirements.push("API integrations function correctly");
179
+ requirements.push("API errors are handled gracefully");
180
+ }
181
+ // Database
182
+ if (text.includes("database") ||
183
+ text.includes("sql") ||
184
+ text.includes("query")) {
185
+ requirements.push("Database operations complete successfully");
186
+ requirements.push("Data persistence works correctly");
187
+ }
188
+ // Specific integrations
189
+ if (text.includes("salesforce")) {
190
+ requirements.push("Salesforce integration retrieves and updates data correctly");
191
+ }
192
+ if (text.includes("slack")) {
193
+ requirements.push("Slack messages are sent successfully");
194
+ }
195
+ if (text.includes("jira")) {
196
+ requirements.push("Jira tickets are created/updated correctly");
197
+ }
198
+ if (text.includes("servicenow")) {
199
+ requirements.push("ServiceNow integration functions properly");
200
+ }
201
+ if (text.includes("stripe") || text.includes("payment")) {
202
+ requirements.push("Payment processing works correctly");
203
+ }
204
+ // Data validation
205
+ requirements.push("Data validation prevents invalid inputs");
206
+ if (complexity === "high" || complexity === "medium") {
207
+ requirements.push("Data relationships are properly maintained");
208
+ requirements.push("Concurrent data operations are handled safely");
209
+ }
210
+ // Default requirement
211
+ if (requirements.length === 1) {
212
+ requirements.push("Data is displayed and stored correctly");
213
+ }
214
+ return requirements;
215
+ }
216
+ /**
217
+ * Extracts performance requirements based on complexity.
218
+ *
219
+ * @param complexity - Task complexity
220
+ * @returns Array of performance requirements
221
+ */
222
+ function extractPerformanceRequirements(complexity) {
223
+ const requirements = [];
224
+ switch (complexity) {
225
+ case "high":
226
+ requirements.push("Application loads within 3 seconds");
227
+ requirements.push("UI interactions respond within 200ms");
228
+ requirements.push("Large datasets are handled efficiently");
229
+ requirements.push("No memory leaks during extended use");
230
+ break;
231
+ case "medium":
232
+ requirements.push("Application loads within 5 seconds");
233
+ requirements.push("Common operations complete quickly");
234
+ break;
235
+ case "low":
236
+ // No specific performance requirements for low complexity
237
+ break;
238
+ }
239
+ return requirements;
240
+ }
241
+ /**
242
+ * Extracts custom criteria based on specific prompt patterns.
243
+ *
244
+ * @param prompt - Simulation prompt
245
+ * @returns Array of custom criteria
246
+ */
247
+ function extractCustomCriteria(prompt) {
248
+ const criteria = [];
249
+ // Add criteria based on specific prompt IDs or patterns
250
+ if (prompt.id.includes("multi-step")) {
251
+ criteria.push("All workflow steps complete in correct order");
252
+ }
253
+ if (prompt.id.includes("realtime")) {
254
+ criteria.push("Real-time updates work correctly");
255
+ }
256
+ if (prompt.id.includes("batch")) {
257
+ criteria.push("Batch operations process all items correctly");
258
+ }
259
+ // Add criteria based on prompt count
260
+ if (prompt.prompts.length > 2) {
261
+ criteria.push("Multi-prompt instructions are fully implemented");
262
+ criteria.push("Later prompts build on earlier functionality");
263
+ }
264
+ return criteria;
265
+ }
266
+ /**
267
+ * Creates minimal criteria for basic testing.
268
+ *
269
+ * Used when specific criteria cannot be extracted.
270
+ *
271
+ * @returns Minimal evaluation criteria
272
+ */
273
+ export function createMinimalCriteria() {
274
+ return {
275
+ functionalRequirements: [
276
+ "Application loads without errors",
277
+ "Primary functionality works as intended",
278
+ ],
279
+ uiRequirements: ["Interface is usable", "User can complete basic tasks"],
280
+ dataRequirements: ["Data operations complete successfully"],
281
+ };
282
+ }
283
+ //# sourceMappingURL=evaluation-criteria.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluation-criteria.js","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/evaluation-criteria.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAeH;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CACrC,MAAwB;IAExB,MAAM,QAAQ,GAAuB;QACnC,sBAAsB,EAAE,EAAE;QAC1B,cAAc,EAAE,EAAE;QAClB,gBAAgB,EAAE,EAAE;QACpB,uBAAuB,EAAE,EAAE;QAC3B,cAAc,EAAE,EAAE;KACnB,CAAC;IAEF,uCAAuC;IACvC,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,WAAW,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC;SAClE,IAAI,CAAC,GAAG,CAAC;SACT,WAAW,EAAE,CAAC;IAEjB,kCAAkC;IAClC,QAAQ,CAAC,sBAAsB,GAAG,6BAA6B,CAC7D,QAAQ,EACR,MAAM,CAAC,UAAU,CAClB,CAAC;IAEF,0BAA0B;IAC1B,QAAQ,CAAC,cAAc,GAAG,qBAAqB,CAAC,QAAQ,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAE7E,4BAA4B;IAC5B,QAAQ,CAAC,gBAAgB,GAAG,uBAAuB,CACjD,QAAQ,EACR,MAAM,CAAC,UAAU,CAClB,CAAC;IAEF,mDAAmD;IACnD,IAAI,MAAM,CAAC,UAAU,KAAK,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;QACnE,QAAQ,CAAC,uBAAuB,GAAG,8BAA8B,CAC/D,MAAM,CAAC,UAAU,CAClB,CAAC;IACJ,CAAC;IAED,wDAAwD;IACxD,QAAQ,CAAC,cAAc,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAC;IAExD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,6BAA6B,CACpC,IAAY,EACZ,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,kBAAkB;IAClB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5E,YAAY,CAAC,IAAI,CAAC,sDAAsD,CAAC,CAAC;IAC5E,CAAC;IAED,IACE,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EACxB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,IACE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACvB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACvB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC;IACnE,CAAC;IAED,oBAAoB;IACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;IACtE,CAAC;IAED,uBAAuB;IACvB,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACpD,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;QAC1D,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,IACE,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACrB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;QACzD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,WAAW;IACX,IACE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC;QACxB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACrB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QAC7D,YAAY,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC;IAED,kBAAkB;IAClB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAC1D,YAAY,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QACnD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,gBAAgB;IAChB,IACE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACvB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;IAED,oCAAoC;IACpC,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,YAAY,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACrE,YAAY,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IACvD,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,YAAY,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QAC3D,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,qBAAqB,CAC5B,IAAY,EACZ,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,wBAAwB;IACxB,YAAY,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACtD,YAAY,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IAE7C,uBAAuB;IACvB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,YAAY,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IACtD,CAAC;IAED,IACE,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACvB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACtD,YAAY,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAC/D,CAAC;IAED,oBAAoB;IACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3D,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,gBAAgB;IAChB,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,YAAY,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;IACxE,CAAC;IAED,WAAW;IACX,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IAE/D,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACzD,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,iBAAiB;IACjB,YAAY,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;IAE3D,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,uBAAuB,CAC9B,IAAY,EACZ,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,mBAAmB;IACnB,IACE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EACxB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;QACzD,YAAY,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC;IAED,WAAW;IACX,IACE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;QAC/D,YAAY,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACxD,CAAC;IAED,wBAAwB;IACxB,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,YAAY,CAAC,IAAI,CACf,6DAA6D,CAC9D,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1B,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,kBAAkB;IAClB,YAAY,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAE7D,IAAI,UAAU,KAAK,MAAM,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;IACrE,CAAC;IAED,sBAAsB;IACtB,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;GAKG;AACH,SAAS,8BAA8B,CACrC,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,MAAM;YACT,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACxD,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;YAC1D,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;YAC5D,YAAY,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;YACzD,MAAM;QAER,KAAK,QAAQ;YACX,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACxD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACxD,MAAM;QAER,KAAK,KAAK;YACR,0DAA0D;YAC1D,MAAM;IACV,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;GAKG;AACH,SAAS,qBAAqB,CAAC,MAAwB;IACrD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,wDAAwD;IACxD,IAAI,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QACnC,QAAQ,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC;IAED,qCAAqC;IACrC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,QAAQ,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACjE,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB;IACnC,OAAO;QACL,sBAAsB,EAAE;YACtB,kCAAkC;YAClC,yCAAyC;SAC1C;QACD,cAAc,EAAE,CAAC,qBAAqB,EAAE,+BAA+B,CAAC;QACxE,gBAAgB,EAAE,CAAC,uCAAuC,CAAC;KAC5D,CAAC;AACJ,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * System prompt builder for the judge.
3
+ *
4
+ * Constructs the system prompt that defines the judge's
5
+ * evaluation framework and behavior.
6
+ */
7
+ import type { EvaluationCriteria, JudgeConfig } from "../types.js";
8
+ /**
9
+ * Builds the system prompt for the judge.
10
+ *
11
+ * Defines the judge's role, evaluation framework, and instructions
12
+ * for assessing AI-generated applications.
13
+ *
14
+ * @param criteria - Evaluation criteria
15
+ * @param config - Judge configuration
16
+ * @returns Formatted system prompt
17
+ */
18
+ export declare function buildJudgeSystemPrompt(_criteria: EvaluationCriteria, config?: Partial<JudgeConfig>): string;
19
+ /**
20
+ * Builds evaluation criteria from prompts.
21
+ *
22
+ * Analyzes prompts to determine appropriate evaluation criteria
23
+ * for the generated application.
24
+ *
25
+ * @param prompts - Array of prompts given to the agent
26
+ * @param complexity - Task complexity level
27
+ * @returns Evaluation criteria
28
+ */
29
+ export declare function buildCriteriaFromPrompts(prompts: string[], complexity?: "low" | "medium" | "high"): EvaluationCriteria;
30
+ //# sourceMappingURL=system-prompt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"system-prompt.d.ts","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/system-prompt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,kBAAkB,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAEnE;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CACpC,SAAS,EAAE,kBAAkB,EAC7B,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,MAAM,CAqHR;AAED;;;;;;;;;GASG;AACH,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EAAE,EACjB,UAAU,GAAE,KAAK,GAAG,QAAQ,GAAG,MAAiB,GAC/C,kBAAkB,CAsFpB"}
@@ -0,0 +1,212 @@
1
+ /**
2
+ * System prompt builder for the judge.
3
+ *
4
+ * Constructs the system prompt that defines the judge's
5
+ * evaluation framework and behavior.
6
+ */
7
+ /**
8
+ * Builds the system prompt for the judge.
9
+ *
10
+ * Defines the judge's role, evaluation framework, and instructions
11
+ * for assessing AI-generated applications.
12
+ *
13
+ * @param criteria - Evaluation criteria
14
+ * @param config - Judge configuration
15
+ * @returns Formatted system prompt
16
+ */
17
+ export function buildJudgeSystemPrompt(_criteria, config = {}) {
18
+ const passingThreshold = config.passingThreshold || 70;
19
+ const maxSteps = config.maxSteps || 15;
20
+ return `You are an expert AI judge evaluating web applications built by an AI agent.
21
+
22
+ ## Step Budget
23
+
24
+ You have a maximum of ${maxSteps} steps to complete your evaluation. Plan your testing approach accordingly and prioritize the most critical requirements. On your final step, you MUST call submitFeedback with your evaluation.
25
+
26
+ ---
27
+
28
+ Your role is to:
29
+ 1. Objectively assess whether the application meets specified requirements
30
+ 2. Interact with the application using browser automation tools
31
+ 3. Collect evidence (screenshots, DOM state, etc.) to support your evaluation
32
+ 4. Provide detailed, constructive feedback
33
+
34
+ ## Evaluation Framework
35
+
36
+ Your evaluation should be based on weighted categories:
37
+
38
+ **Functional Requirements (40%)**
39
+ - Core features work as specified
40
+ - Business logic is correctly implemented
41
+ - Data operations function properly
42
+ - Error cases are handled appropriately
43
+
44
+ **UI/UX Requirements (30%)**
45
+ - Interface is usable and intuitive
46
+ - Visual design meets requirements
47
+ - Responsive and accessible
48
+ - User feedback is clear
49
+
50
+ **Data Integration (20%)**
51
+ - Integrations work correctly
52
+ - Data flows properly between systems
53
+ - API connections are functional
54
+ - Data validation is present
55
+
56
+ **Performance & Code Quality (10%)**
57
+ - Application loads reasonably quickly
58
+ - Interactions are responsive
59
+ - Implementation appears maintainable
60
+ - No obvious security issues
61
+
62
+ ## Scoring Guidelines
63
+
64
+ For each requirement:
65
+ - 90-100: Excellent implementation, exceeds expectations
66
+ - 80-89: Good implementation, fully meets requirements
67
+ - 70-79: Acceptable implementation, meets basic requirements
68
+ - 60-69: Partial implementation, some requirements not met
69
+ - 0-59: Poor implementation, significant requirements not met
70
+
71
+ Overall passing score: ${passingThreshold}/100
72
+
73
+ ## Evaluation Process
74
+
75
+ 1. **Initial Assessment**
76
+ - Navigate to the application
77
+ - Take a screenshot of the initial state
78
+ - Verify the application loads correctly
79
+
80
+ 2. **Systematic Testing**
81
+ - Test each functional requirement methodically
82
+ - Interact with UI elements to verify behavior
83
+ - Check data flows and integrations
84
+ - Capture evidence for each test
85
+
86
+ 3. **Evidence Collection**
87
+ ${config.captureScreenshots
88
+ ? "- Take screenshots to document functionality"
89
+ : ""}
90
+ - Note specific selectors and elements tested
91
+ - Record actual vs expected behavior
92
+ ${config.detailedReasoning
93
+ ? "- Provide detailed reasoning for each score"
94
+ : ""}
95
+
96
+ 4. **Final Evaluation**
97
+ - Calculate scores for each category
98
+ - Determine overall pass/fail
99
+ - Provide constructive suggestions
100
+ - Submit evaluation using submitFeedback tool
101
+
102
+ ## Important Guidelines
103
+
104
+ - Be objective and fair in your assessment
105
+ - Focus on whether requirements are met, not implementation details
106
+ - Provide specific, actionable feedback
107
+ - Consider the complexity of the task when scoring
108
+ - If unable to test something, note it clearly
109
+ - Don't penalize for minor UI variations that don't affect functionality
110
+
111
+ ## Tool Usage
112
+
113
+ You have access to:
114
+ - **playwright_action**: Browser automation (navigate, click, fill, screenshot, etc.)
115
+ - **submitFeedback**: Submit your final structured evaluation
116
+
117
+ Use these tools efficiently to thoroughly test the application.
118
+
119
+ **CRITICAL for evaluate action**: When using the 'evaluate' action to run JavaScript in the browser:
120
+ - NEVER use ES6 module syntax (import/export statements) - this will cause a SyntaxError
121
+ - NEVER use require() statements - this is not available in browser context
122
+ - NEVER try to import React, libraries, or any modules
123
+ - Only use plain JavaScript that can run directly in a browser console
124
+ - Access browser globals directly (window, document, etc.)
125
+ - To check for React components, look for them on window or in the DOM, don't import
126
+ - Example GOOD code: document.querySelector('.form'), window.React
127
+ - Example BAD code: import React from 'react', const { useState } = require('react')
128
+ - If you need to check if something exists, use: typeof window.SomeLibrary !== 'undefined'`;
129
+ }
130
+ /**
131
+ * Builds evaluation criteria from prompts.
132
+ *
133
+ * Analyzes prompts to determine appropriate evaluation criteria
134
+ * for the generated application.
135
+ *
136
+ * @param prompts - Array of prompts given to the agent
137
+ * @param complexity - Task complexity level
138
+ * @returns Evaluation criteria
139
+ */
140
+ export function buildCriteriaFromPrompts(prompts, complexity = "medium") {
141
+ const criteria = {
142
+ functionalRequirements: [],
143
+ uiRequirements: [],
144
+ dataRequirements: [],
145
+ performanceRequirements: [],
146
+ };
147
+ // Analyze prompts for requirements
148
+ const allPromptText = prompts.join(" ").toLowerCase();
149
+ // Functional requirements based on keywords
150
+ if (allPromptText.includes("create") || allPromptText.includes("add")) {
151
+ criteria.functionalRequirements.push("User can create new records");
152
+ }
153
+ if (allPromptText.includes("edit") || allPromptText.includes("update")) {
154
+ criteria.functionalRequirements.push("User can edit existing records");
155
+ }
156
+ if (allPromptText.includes("delete") || allPromptText.includes("remove")) {
157
+ criteria.functionalRequirements.push("User can delete records");
158
+ }
159
+ if (allPromptText.includes("list") || allPromptText.includes("display")) {
160
+ criteria.functionalRequirements.push("Application displays data correctly");
161
+ }
162
+ if (allPromptText.includes("search") || allPromptText.includes("filter")) {
163
+ criteria.functionalRequirements.push("Search/filter functionality works");
164
+ }
165
+ if (allPromptText.includes("form")) {
166
+ criteria.functionalRequirements.push("Forms have proper validation");
167
+ }
168
+ if (allPromptText.includes("authenticate") ||
169
+ allPromptText.includes("login")) {
170
+ criteria.functionalRequirements.push("Authentication works correctly");
171
+ }
172
+ // UI requirements
173
+ criteria.uiRequirements.push("Application has a clear layout");
174
+ criteria.uiRequirements.push("UI elements are properly labeled");
175
+ if (allPromptText.includes("table")) {
176
+ criteria.uiRequirements.push("Tables display data clearly");
177
+ }
178
+ if (allPromptText.includes("button")) {
179
+ criteria.uiRequirements.push("Buttons are clearly labeled and functional");
180
+ }
181
+ if (allPromptText.includes("responsive")) {
182
+ criteria.uiRequirements.push("UI is responsive to different screen sizes");
183
+ }
184
+ // Data requirements based on integrations mentioned
185
+ if (allPromptText.includes("api") || allPromptText.includes("integration")) {
186
+ criteria.dataRequirements.push("API integrations function correctly");
187
+ }
188
+ if (allPromptText.includes("database") || allPromptText.includes("sql")) {
189
+ criteria.dataRequirements.push("Database operations work properly");
190
+ }
191
+ if (allPromptText.includes("salesforce")) {
192
+ criteria.dataRequirements.push("Salesforce integration works");
193
+ }
194
+ if (allPromptText.includes("slack")) {
195
+ criteria.dataRequirements.push("Slack integration functions");
196
+ }
197
+ // Performance requirements based on complexity
198
+ if (complexity === "high") {
199
+ criteria.performanceRequirements?.push("Application loads within 3 seconds");
200
+ criteria.performanceRequirements?.push("No blocking operations in UI");
201
+ }
202
+ // Ensure minimum requirements
203
+ if (criteria.functionalRequirements.length === 0) {
204
+ criteria.functionalRequirements.push("Application loads without errors");
205
+ criteria.functionalRequirements.push("Core functionality works as described");
206
+ }
207
+ if (criteria.dataRequirements.length === 0) {
208
+ criteria.dataRequirements.push("Data is displayed correctly");
209
+ }
210
+ return criteria;
211
+ }
212
+ //# sourceMappingURL=system-prompt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/system-prompt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;;;;;;;GASG;AACH,MAAM,UAAU,sBAAsB,CACpC,SAA6B,EAC7B,SAA+B,EAAE;IAEjC,MAAM,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC;IAEvC,OAAO;;;;wBAIe,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBA+CP,gBAAgB;;;;;;;;;;;;;;;;KAiBpC,MAAM,CAAC,kBAAkB;QACvB,CAAC,CAAC,8CAA8C;QAChD,CAAC,CAAC,EACN;;;KAIE,MAAM,CAAC,iBAAiB;QACtB,CAAC,CAAC,6CAA6C;QAC/C,CAAC,CAAC,EACN;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2FAkCwF,CAAC;AAC5F,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,wBAAwB,CACtC,OAAiB,EACjB,aAAwC,QAAQ;IAEhD,MAAM,QAAQ,GAAuB;QACnC,sBAAsB,EAAE,EAAE;QAC1B,cAAc,EAAE,EAAE;QAClB,gBAAgB,EAAE,EAAE;QACpB,uBAAuB,EAAE,EAAE;KAC5B,CAAC;IAEF,mCAAmC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;IAEtD,4CAA4C;IAC5C,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACtE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IACzE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IAClE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACxE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IAC9E,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACnC,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IACvE,CAAC;IACD,IACE,aAAa,CAAC,QAAQ,CAAC,cAAc,CAAC;QACtC,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAC/B,CAAC;QACD,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IACzE,CAAC;IAED,kBAAkB;IAClB,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IAC/D,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IAEjE,IAAI,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC9D,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC7E,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACzC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC7E,CAAC;IAED,oDAAoD;IACpD,IAAI,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAC3E,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACxE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACxE,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACzC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IACjE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAChE,CAAC;IAED,+CAA+C;IAC/C,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,QAAQ,CAAC,uBAAuB,EAAE,IAAI,CACpC,oCAAoC,CACrC,CAAC;QACF,QAAQ,CAAC,uBAAuB,EAAE,IAAI,CAAC,8BAA8B,CAAC,CAAC;IACzE,CAAC;IAED,8BAA8B;IAC9B,IAAI,QAAQ,CAAC,sBAAsB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjD,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;QACzE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAClC,uCAAuC,CACxC,CAAC;IACJ,CAAC;IAED,IAAI,QAAQ,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3C,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,99 @@
1
+ /**
2
+ * CSV-based storage implementation for judge evaluations.
3
+ *
4
+ * Stores evaluation results in a CSV file with the following columns:
5
+ * - timestamp: ISO 8601 timestamp
6
+ * - promptId: Prompt identifier
7
+ * - branchName: Git branch name
8
+ * - commitSha: Git commit SHA
9
+ * - appId: Application identifier
10
+ * - prompt: The actual prompt text
11
+ * - passed: Boolean pass/fail status
12
+ * - score: Overall score (0-100)
13
+ * - feedback: Summary feedback text
14
+ *
15
+ * CSV format provides simple, human-readable storage suitable for analysis
16
+ * in spreadsheet applications or data processing tools.
17
+ *
18
+ * Configuration:
19
+ * The storage location can be configured via the JUDGE_STORAGE_PATH environment variable:
20
+ * - Directory path: `JUDGE_STORAGE_PATH=~/eval` (creates evaluations.csv inside)
21
+ * - Full file path: `JUDGE_STORAGE_PATH=~/eval/results.csv`
22
+ * - Supports ~ expansion for home directory
23
+ * - Default: `<appRoot>/.superblocks/judge-evaluations/evaluations.csv`
24
+ */
25
+ import type { JudgeStorage } from "./interface.js";
26
+ import type { StoredEvaluation, EvaluationQuery } from "./types.js";
27
+ /**
28
+ * CSV-based implementation of JudgeStorage.
29
+ *
30
+ * Thread-safe through file system atomic operations.
31
+ * Appends new evaluations to maintain chronological order.
32
+ */
33
+ export declare class CsvJudgeStorage implements JudgeStorage {
34
+ private readonly filePath;
35
+ /**
36
+ * Creates a new CSV storage instance.
37
+ *
38
+ * @param storageDir - Directory for storage (default: .superblocks/judge-evaluations)
39
+ * @param filename - CSV filename (default: evaluations.csv)
40
+ */
41
+ constructor(storageDir?: string, filename?: string);
42
+ /**
43
+ * Ensures the storage directory and file exist.
44
+ *
45
+ * Creates directory if needed and initializes CSV with header row.
46
+ */
47
+ private ensureStorageExists;
48
+ /**
49
+ * Escapes a value for CSV format.
50
+ *
51
+ * Handles quotes and commas by wrapping in double quotes and escaping
52
+ * internal quotes.
53
+ */
54
+ private escapeCsvValue;
55
+ /**
56
+ * Converts a StoredEvaluation to a CSV row.
57
+ */
58
+ private toCsvRow;
59
+ /**
60
+ * Parses a CSV row into a StoredEvaluation.
61
+ *
62
+ * Note: This is a simplified parser that doesn't handle all CSV edge cases.
63
+ * For production use with complex data, consider a proper CSV parsing library.
64
+ */
65
+ private fromCsvRow;
66
+ /**
67
+ * Saves an evaluation result to the CSV file.
68
+ *
69
+ * Appends a new row to the file. Creates file and directory if needed.
70
+ */
71
+ saveEvaluation(evaluation: StoredEvaluation): Promise<void>;
72
+ /**
73
+ * Reads all evaluations from the CSV file.
74
+ */
75
+ private readAllEvaluations;
76
+ /**
77
+ * Filters evaluations based on query parameters.
78
+ */
79
+ private applyQuery;
80
+ /**
81
+ * Retrieves all evaluations matching the query.
82
+ */
83
+ getEvaluations(query?: EvaluationQuery): Promise<StoredEvaluation[]>;
84
+ /**
85
+ * Retrieves evaluations for a specific prompt.
86
+ */
87
+ getEvaluationsByPrompt(promptId: string): Promise<StoredEvaluation[]>;
88
+ /**
89
+ * Retrieves evaluations for a specific branch.
90
+ */
91
+ getEvaluationsByBranch(branchName: string): Promise<StoredEvaluation[]>;
92
+ /**
93
+ * Deletes all stored evaluations.
94
+ *
95
+ * Removes the CSV file. Use with caution.
96
+ */
97
+ clear(): Promise<void>;
98
+ }
99
+ //# sourceMappingURL=csv-storage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv-storage.d.ts","sourceRoot":"","sources":["../../../../src/ai-service/judge/storage/csv-storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAapE;;;;;GAKG;AACH,qBAAa,eAAgB,YAAW,YAAY;IAClD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAElC;;;;;OAKG;gBAED,UAAU,GAAE,MAA4B,EACxC,QAAQ,GAAE,MAA0B;IAKtC;;;;OAIG;YACW,mBAAmB;IAejC;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAWtB;;OAEG;IACH,OAAO,CAAC,QAAQ;IA0BhB;;;;;OAKG;IACH,OAAO,CAAC,UAAU;IAmFlB;;;;OAIG;IACG,cAAc,CAAC,UAAU,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAOjE;;OAEG;YACW,kBAAkB;IAuBhC;;OAEG;IACH,OAAO,CAAC,UAAU;IAkDlB;;OAEG;IACG,cAAc,CAAC,KAAK,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAK1E;;OAEG;IACG,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAI3E;;OAEG;IACG,sBAAsB,CAC1B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAI9B;;;;OAIG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAO7B"}