@superblocksteam/vite-plugin-file-sync 2.0.67 → 2.0.68-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-service/agent/tools/apis/analysis.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/analysis.js +4 -0
- package/dist/ai-service/agent/tools/apis/analysis.js.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-executor.d.ts +9 -1
- package/dist/ai-service/agent/tools/apis/api-executor.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-executor.js +4 -1
- package/dist/ai-service/agent/tools/apis/api-executor.js.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts +1 -0
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js +1 -1
- package/dist/ai-service/agent/tools/apis/api-validation-orchestrator.js.map +1 -1
- package/dist/ai-service/agent/tools/apis/test-api.d.ts +5 -0
- package/dist/ai-service/agent/tools/apis/test-api.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/apis/test-api.js +37 -18
- package/dist/ai-service/agent/tools/apis/test-api.js.map +1 -1
- package/dist/ai-service/agent/tools/build-delete-file.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/build-delete-file.js +29 -0
- package/dist/ai-service/agent/tools/build-delete-file.js.map +1 -1
- package/dist/ai-service/agent/tools/integrations/execute-request.d.ts.map +1 -1
- package/dist/ai-service/agent/tools/integrations/execute-request.js +5 -4
- package/dist/ai-service/agent/tools/integrations/execute-request.js.map +1 -1
- package/dist/ai-service/agent/tools.d.ts.map +1 -1
- package/dist/ai-service/agent/tools.js +17 -6
- package/dist/ai-service/agent/tools.js.map +1 -1
- package/dist/ai-service/agent/tools2/entity-permissions.d.ts +23 -20
- package/dist/ai-service/agent/tools2/entity-permissions.d.ts.map +1 -1
- package/dist/ai-service/agent/tools2/entity-permissions.js +15 -11
- package/dist/ai-service/agent/tools2/entity-permissions.js.map +1 -1
- package/dist/ai-service/agent/tools2/example.js +2 -2
- package/dist/ai-service/agent/tools2/example.js.map +1 -1
- package/dist/ai-service/agent/tools2/index.d.ts +1 -1
- package/dist/ai-service/agent/tools2/index.d.ts.map +1 -1
- package/dist/ai-service/agent/tools2/index.js +1 -1
- package/dist/ai-service/agent/tools2/index.js.map +1 -1
- package/dist/ai-service/agent/tools2/registry.d.ts +4 -4
- package/dist/ai-service/agent/tools2/registry.d.ts.map +1 -1
- package/dist/ai-service/agent/tools2/registry.js +42 -29
- package/dist/ai-service/agent/tools2/registry.js.map +1 -1
- package/dist/ai-service/agent/tools2/tools/read.d.ts +1 -1
- package/dist/ai-service/agent/tools2/types.d.ts +36 -15
- package/dist/ai-service/agent/tools2/types.d.ts.map +1 -1
- package/dist/ai-service/agent/tools2/types.js.map +1 -1
- package/dist/ai-service/chat/chat-session-store.d.ts +5 -7
- package/dist/ai-service/chat/chat-session-store.d.ts.map +1 -1
- package/dist/ai-service/chat/chat-session-store.js +36 -17
- package/dist/ai-service/chat/chat-session-store.js.map +1 -1
- package/dist/ai-service/chat/extract-history.d.ts +0 -85
- package/dist/ai-service/chat/extract-history.d.ts.map +1 -1
- package/dist/ai-service/chat/extract-history.js +3 -239
- package/dist/ai-service/chat/extract-history.js.map +1 -1
- package/dist/ai-service/index.d.ts +26 -4
- package/dist/ai-service/index.d.ts.map +1 -1
- package/dist/ai-service/index.js +125 -4
- package/dist/ai-service/index.js.map +1 -1
- package/dist/ai-service/judge/debug-browser.d.ts +8 -0
- package/dist/ai-service/judge/debug-browser.d.ts.map +1 -0
- package/dist/ai-service/judge/debug-browser.js +79 -0
- package/dist/ai-service/judge/debug-browser.js.map +1 -0
- package/dist/ai-service/judge/index.d.ts +12 -0
- package/dist/ai-service/judge/index.d.ts.map +1 -0
- package/dist/ai-service/judge/index.js +11 -0
- package/dist/ai-service/judge/index.js.map +1 -0
- package/dist/ai-service/judge/integration/mcp-client.d.ts +82 -0
- package/dist/ai-service/judge/integration/mcp-client.d.ts.map +1 -0
- package/dist/ai-service/judge/integration/mcp-client.js +276 -0
- package/dist/ai-service/judge/integration/mcp-client.js.map +1 -0
- package/dist/ai-service/judge/integration/playwright-bridge.d.ts +142 -0
- package/dist/ai-service/judge/integration/playwright-bridge.d.ts.map +1 -0
- package/dist/ai-service/judge/integration/playwright-bridge.js +217 -0
- package/dist/ai-service/judge/integration/playwright-bridge.js.map +1 -0
- package/dist/ai-service/judge/judge-eval-http.d.ts +3 -0
- package/dist/ai-service/judge/judge-eval-http.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-eval-http.js +541 -0
- package/dist/ai-service/judge/judge-eval-http.js.map +1 -0
- package/dist/ai-service/judge/judge-eval-service-runner.d.ts +35 -0
- package/dist/ai-service/judge/judge-eval-service-runner.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-eval-service-runner.js +124 -0
- package/dist/ai-service/judge/judge-eval-service-runner.js.map +1 -0
- package/dist/ai-service/judge/judge-executor.d.ts +65 -0
- package/dist/ai-service/judge/judge-executor.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-executor.js +334 -0
- package/dist/ai-service/judge/judge-executor.js.map +1 -0
- package/dist/ai-service/judge/judge-service.d.ts +161 -0
- package/dist/ai-service/judge/judge-service.d.ts.map +1 -0
- package/dist/ai-service/judge/judge-service.js +241 -0
- package/dist/ai-service/judge/judge-service.js.map +1 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts +37 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts.map +1 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.js +283 -0
- package/dist/ai-service/judge/prompts/evaluation-criteria.js.map +1 -0
- package/dist/ai-service/judge/prompts/system-prompt.d.ts +30 -0
- package/dist/ai-service/judge/prompts/system-prompt.d.ts.map +1 -0
- package/dist/ai-service/judge/prompts/system-prompt.js +212 -0
- package/dist/ai-service/judge/prompts/system-prompt.js.map +1 -0
- package/dist/ai-service/judge/storage/csv-storage.d.ts +99 -0
- package/dist/ai-service/judge/storage/csv-storage.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/csv-storage.js +274 -0
- package/dist/ai-service/judge/storage/csv-storage.js.map +1 -0
- package/dist/ai-service/judge/storage/index.d.ts +9 -0
- package/dist/ai-service/judge/storage/index.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/index.js +7 -0
- package/dist/ai-service/judge/storage/index.js.map +1 -0
- package/dist/ai-service/judge/storage/interface.d.ts +51 -0
- package/dist/ai-service/judge/storage/interface.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/interface.js +8 -0
- package/dist/ai-service/judge/storage/interface.js.map +1 -0
- package/dist/ai-service/judge/storage/types.d.ts +54 -0
- package/dist/ai-service/judge/storage/types.d.ts.map +1 -0
- package/dist/ai-service/judge/storage/types.js +7 -0
- package/dist/ai-service/judge/storage/types.js.map +1 -0
- package/dist/ai-service/judge/tools/index.d.ts +22 -0
- package/dist/ai-service/judge/tools/index.d.ts.map +1 -0
- package/dist/ai-service/judge/tools/index.js +29 -0
- package/dist/ai-service/judge/tools/index.js.map +1 -0
- package/dist/ai-service/judge/tools/playwright-action.d.ts +18 -0
- package/dist/ai-service/judge/tools/playwright-action.d.ts.map +1 -0
- package/dist/ai-service/judge/tools/playwright-action.js +171 -0
- package/dist/ai-service/judge/tools/playwright-action.js.map +1 -0
- package/dist/ai-service/judge/tools/submit-feedback.d.ts +41 -0
- package/dist/ai-service/judge/tools/submit-feedback.d.ts.map +1 -0
- package/dist/ai-service/judge/tools/submit-feedback.js +150 -0
- package/dist/ai-service/judge/tools/submit-feedback.js.map +1 -0
- package/dist/ai-service/judge/types.d.ts +169 -0
- package/dist/ai-service/judge/types.d.ts.map +1 -0
- package/dist/ai-service/judge/types.js +8 -0
- package/dist/ai-service/judge/types.js.map +1 -0
- package/dist/ai-service/llm/context/constants.d.ts +7 -0
- package/dist/ai-service/llm/context/constants.d.ts.map +1 -1
- package/dist/ai-service/llm/context/constants.js +7 -0
- package/dist/ai-service/llm/context/constants.js.map +1 -1
- package/dist/ai-service/llm/context/context.d.ts +8 -1
- package/dist/ai-service/llm/context/context.d.ts.map +1 -1
- package/dist/ai-service/llm/context/context.js +47 -12
- package/dist/ai-service/llm/context/context.js.map +1 -1
- package/dist/ai-service/llm/context/internal-types.d.ts +1 -0
- package/dist/ai-service/llm/context/internal-types.d.ts.map +1 -1
- package/dist/ai-service/llm/context/internal-types.js.map +1 -1
- package/dist/ai-service/llm/context/manager.d.ts +2 -1
- package/dist/ai-service/llm/context/manager.d.ts.map +1 -1
- package/dist/ai-service/llm/context/manager.js +2 -1
- package/dist/ai-service/llm/context/manager.js.map +1 -1
- package/dist/ai-service/llm/context/utils/message-utils.d.ts +10 -0
- package/dist/ai-service/llm/context/utils/message-utils.d.ts.map +1 -1
- package/dist/ai-service/llm/context/utils/message-utils.js +74 -0
- package/dist/ai-service/llm/context/utils/message-utils.js.map +1 -1
- package/dist/ai-service/llm/error.d.ts +1 -1
- package/dist/ai-service/llm/interaction/adapters/vercel.d.ts.map +1 -1
- package/dist/ai-service/llm/interaction/adapters/vercel.js.map +1 -1
- package/dist/ai-service/llm/interaction/provider.d.ts +10 -9
- package/dist/ai-service/llm/interaction/provider.d.ts.map +1 -1
- package/dist/ai-service/llmobs/middleware/stream-text.d.ts +8 -8
- package/dist/ai-service/llmobs/middleware/stream-text.d.ts.map +1 -1
- package/dist/ai-service/llmobs/middleware/stream-text.js.map +1 -1
- package/dist/ai-service/llmobs/tracer.d.ts.map +1 -1
- package/dist/ai-service/llmobs/tracer.js +2 -1
- package/dist/ai-service/llmobs/tracer.js.map +1 -1
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts +53 -0
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.d.ts.map +1 -0
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.js +541 -0
- package/dist/ai-service/mcp/embedded-playwright-mcp-server.js.map +1 -0
- package/dist/ai-service/mcp/playwright-server.d.ts +114 -0
- package/dist/ai-service/mcp/playwright-server.d.ts.map +1 -0
- package/dist/ai-service/mcp/playwright-server.js +109 -0
- package/dist/ai-service/mcp/playwright-server.js.map +1 -0
- package/dist/ai-service/state-machine/clark-fsm.d.ts +4 -1
- package/dist/ai-service/state-machine/clark-fsm.d.ts.map +1 -1
- package/dist/ai-service/state-machine/clark-fsm.js +3 -1
- package/dist/ai-service/state-machine/clark-fsm.js.map +1 -1
- package/dist/ai-service/state-machine/handlers/idle.d.ts.map +1 -1
- package/dist/ai-service/state-machine/handlers/idle.js +3 -1
- package/dist/ai-service/state-machine/handlers/idle.js.map +1 -1
- package/dist/ai-service/state-machine/handlers/runtime-reviewing.d.ts.map +1 -1
- package/dist/ai-service/state-machine/handlers/runtime-reviewing.js +4 -1
- package/dist/ai-service/state-machine/handlers/runtime-reviewing.js.map +1 -1
- package/dist/ai-service/state-machine/helpers/context-id.d.ts +1 -1
- package/dist/ai-service/state-machine/helpers/context-id.d.ts.map +1 -1
- package/dist/ai-service/state-machine/helpers/context-id.js +6 -7
- package/dist/ai-service/state-machine/helpers/context-id.js.map +1 -1
- package/dist/ai-service/state-machine/mocks.d.ts +1 -0
- package/dist/ai-service/state-machine/mocks.d.ts.map +1 -1
- package/dist/ai-service/state-machine/mocks.js +5 -1
- package/dist/ai-service/state-machine/mocks.js.map +1 -1
- package/dist/server-rpc/client.js +1 -1
- package/dist/server-rpc/client.js.map +1 -1
- package/dist/socket-manager.d.ts.map +1 -1
- package/dist/socket-manager.js +26 -6
- package/dist/socket-manager.js.map +1 -1
- package/dist/sync-service/index.d.ts +5 -0
- package/dist/sync-service/index.d.ts.map +1 -1
- package/dist/sync-service/index.js +13 -1
- package/dist/sync-service/index.js.map +1 -1
- package/package.json +7 -6
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evaluation criteria builder for judge assessments.
|
|
3
|
+
*
|
|
4
|
+
* Provides utilities for creating structured evaluation criteria
|
|
5
|
+
* based on simulation prompts and complexity levels.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Builds evaluation criteria from a simulation prompt.
|
|
9
|
+
*
|
|
10
|
+
* Analyzes the prompt structure and content to generate
|
|
11
|
+
* appropriate evaluation criteria for the judge.
|
|
12
|
+
*
|
|
13
|
+
* @param prompt - Simulation prompt object
|
|
14
|
+
* @returns Structured evaluation criteria
|
|
15
|
+
*/
|
|
16
|
+
export function buildCriteriaFromPrompt(prompt) {
|
|
17
|
+
const criteria = {
|
|
18
|
+
functionalRequirements: [],
|
|
19
|
+
uiRequirements: [],
|
|
20
|
+
dataRequirements: [],
|
|
21
|
+
performanceRequirements: [],
|
|
22
|
+
customCriteria: [],
|
|
23
|
+
};
|
|
24
|
+
// Combine all prompt text for analysis
|
|
25
|
+
const fullText = [prompt.name, prompt.description, ...prompt.prompts]
|
|
26
|
+
.join(" ")
|
|
27
|
+
.toLowerCase();
|
|
28
|
+
// Extract functional requirements
|
|
29
|
+
criteria.functionalRequirements = extractFunctionalRequirements(fullText, prompt.complexity);
|
|
30
|
+
// Extract UI requirements
|
|
31
|
+
criteria.uiRequirements = extractUIRequirements(fullText, prompt.complexity);
|
|
32
|
+
// Extract data requirements
|
|
33
|
+
criteria.dataRequirements = extractDataRequirements(fullText, prompt.complexity);
|
|
34
|
+
// Add performance requirements based on complexity
|
|
35
|
+
if (prompt.complexity === "high" || prompt.complexity === "medium") {
|
|
36
|
+
criteria.performanceRequirements = extractPerformanceRequirements(prompt.complexity);
|
|
37
|
+
}
|
|
38
|
+
// Add custom criteria based on specific prompt patterns
|
|
39
|
+
criteria.customCriteria = extractCustomCriteria(prompt);
|
|
40
|
+
return criteria;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Extracts functional requirements from prompt text.
|
|
44
|
+
*
|
|
45
|
+
* @param text - Combined prompt text
|
|
46
|
+
* @param complexity - Task complexity
|
|
47
|
+
* @returns Array of functional requirements
|
|
48
|
+
*/
|
|
49
|
+
function extractFunctionalRequirements(text, complexity) {
|
|
50
|
+
const requirements = [];
|
|
51
|
+
// CRUD operations
|
|
52
|
+
if (text.includes("create") || text.includes("add") || text.includes("new")) {
|
|
53
|
+
requirements.push("Ability to create new records with proper validation");
|
|
54
|
+
}
|
|
55
|
+
if (text.includes("read") ||
|
|
56
|
+
text.includes("view") ||
|
|
57
|
+
text.includes("display")) {
|
|
58
|
+
requirements.push("Ability to view and read existing data");
|
|
59
|
+
}
|
|
60
|
+
if (text.includes("update") ||
|
|
61
|
+
text.includes("edit") ||
|
|
62
|
+
text.includes("modify")) {
|
|
63
|
+
requirements.push("Ability to update existing records");
|
|
64
|
+
}
|
|
65
|
+
if (text.includes("delete") || text.includes("remove")) {
|
|
66
|
+
requirements.push("Ability to delete records with confirmation");
|
|
67
|
+
}
|
|
68
|
+
// Search and filter
|
|
69
|
+
if (text.includes("search") || text.includes("find")) {
|
|
70
|
+
requirements.push("Search functionality works correctly");
|
|
71
|
+
}
|
|
72
|
+
if (text.includes("filter") || text.includes("sort")) {
|
|
73
|
+
requirements.push("Filter and sort capabilities function properly");
|
|
74
|
+
}
|
|
75
|
+
// Forms and validation
|
|
76
|
+
if (text.includes("form") || text.includes("input")) {
|
|
77
|
+
requirements.push("Forms include appropriate validation");
|
|
78
|
+
requirements.push("Error messages are clear and helpful");
|
|
79
|
+
}
|
|
80
|
+
// Authentication
|
|
81
|
+
if (text.includes("auth") ||
|
|
82
|
+
text.includes("login") ||
|
|
83
|
+
text.includes("user")) {
|
|
84
|
+
requirements.push("Authentication flow works correctly");
|
|
85
|
+
requirements.push("User sessions are properly managed");
|
|
86
|
+
}
|
|
87
|
+
// Workflow
|
|
88
|
+
if (text.includes("workflow") ||
|
|
89
|
+
text.includes("process") ||
|
|
90
|
+
text.includes("step")) {
|
|
91
|
+
requirements.push("Multi-step workflows progress correctly");
|
|
92
|
+
requirements.push("State is maintained between steps");
|
|
93
|
+
}
|
|
94
|
+
// Data operations
|
|
95
|
+
if (text.includes("import") || text.includes("export")) {
|
|
96
|
+
requirements.push("Data import/export functionality works");
|
|
97
|
+
}
|
|
98
|
+
if (text.includes("report") || text.includes("analytics")) {
|
|
99
|
+
requirements.push("Reports display accurate data");
|
|
100
|
+
requirements.push("Analytics calculations are correct");
|
|
101
|
+
}
|
|
102
|
+
// Notifications
|
|
103
|
+
if (text.includes("notify") ||
|
|
104
|
+
text.includes("alert") ||
|
|
105
|
+
text.includes("email")) {
|
|
106
|
+
requirements.push("Notifications are triggered appropriately");
|
|
107
|
+
}
|
|
108
|
+
// Add complexity-based requirements
|
|
109
|
+
if (complexity === "high") {
|
|
110
|
+
requirements.push("Complex business logic is correctly implemented");
|
|
111
|
+
requirements.push("Edge cases are properly handled");
|
|
112
|
+
}
|
|
113
|
+
// Ensure minimum requirements
|
|
114
|
+
if (requirements.length === 0) {
|
|
115
|
+
requirements.push("Core functionality works as described");
|
|
116
|
+
requirements.push("Application completes primary use case");
|
|
117
|
+
}
|
|
118
|
+
return requirements;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Extracts UI/UX requirements from prompt text.
|
|
122
|
+
*
|
|
123
|
+
* @param text - Combined prompt text
|
|
124
|
+
* @param complexity - Task complexity
|
|
125
|
+
* @returns Array of UI requirements
|
|
126
|
+
*/
|
|
127
|
+
function extractUIRequirements(text, complexity) {
|
|
128
|
+
const requirements = [];
|
|
129
|
+
// Basic UI requirements
|
|
130
|
+
requirements.push("Interface is clean and organized");
|
|
131
|
+
requirements.push("Navigation is intuitive");
|
|
132
|
+
// Specific UI elements
|
|
133
|
+
if (text.includes("table") || text.includes("grid")) {
|
|
134
|
+
requirements.push("Tables/grids display data clearly");
|
|
135
|
+
requirements.push("Column headers are descriptive");
|
|
136
|
+
}
|
|
137
|
+
if (text.includes("chart") ||
|
|
138
|
+
text.includes("graph") ||
|
|
139
|
+
text.includes("visual")) {
|
|
140
|
+
requirements.push("Data visualizations are clear and accurate");
|
|
141
|
+
}
|
|
142
|
+
if (text.includes("modal") || text.includes("dialog")) {
|
|
143
|
+
requirements.push("Modals/dialogs work correctly");
|
|
144
|
+
}
|
|
145
|
+
if (text.includes("tab") || text.includes("accordion")) {
|
|
146
|
+
requirements.push("Tab/accordion navigation works properly");
|
|
147
|
+
}
|
|
148
|
+
// Responsive design
|
|
149
|
+
if (text.includes("responsive") || text.includes("mobile")) {
|
|
150
|
+
requirements.push("UI is responsive to different screen sizes");
|
|
151
|
+
}
|
|
152
|
+
// Accessibility
|
|
153
|
+
if (complexity === "high") {
|
|
154
|
+
requirements.push("UI elements have proper labels for accessibility");
|
|
155
|
+
}
|
|
156
|
+
// Feedback
|
|
157
|
+
requirements.push("User actions provide appropriate feedback");
|
|
158
|
+
if (text.includes("loading") || text.includes("spinner")) {
|
|
159
|
+
requirements.push("Loading states are shown during operations");
|
|
160
|
+
}
|
|
161
|
+
// Error handling
|
|
162
|
+
requirements.push("Error states are clearly communicated");
|
|
163
|
+
return requirements;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Extracts data integration requirements from prompt text.
|
|
167
|
+
*
|
|
168
|
+
* @param text - Combined prompt text
|
|
169
|
+
* @param complexity - Task complexity
|
|
170
|
+
* @returns Array of data requirements
|
|
171
|
+
*/
|
|
172
|
+
function extractDataRequirements(text, complexity) {
|
|
173
|
+
const requirements = [];
|
|
174
|
+
// API integrations
|
|
175
|
+
if (text.includes("api") ||
|
|
176
|
+
text.includes("rest") ||
|
|
177
|
+
text.includes("graphql")) {
|
|
178
|
+
requirements.push("API integrations function correctly");
|
|
179
|
+
requirements.push("API errors are handled gracefully");
|
|
180
|
+
}
|
|
181
|
+
// Database
|
|
182
|
+
if (text.includes("database") ||
|
|
183
|
+
text.includes("sql") ||
|
|
184
|
+
text.includes("query")) {
|
|
185
|
+
requirements.push("Database operations complete successfully");
|
|
186
|
+
requirements.push("Data persistence works correctly");
|
|
187
|
+
}
|
|
188
|
+
// Specific integrations
|
|
189
|
+
if (text.includes("salesforce")) {
|
|
190
|
+
requirements.push("Salesforce integration retrieves and updates data correctly");
|
|
191
|
+
}
|
|
192
|
+
if (text.includes("slack")) {
|
|
193
|
+
requirements.push("Slack messages are sent successfully");
|
|
194
|
+
}
|
|
195
|
+
if (text.includes("jira")) {
|
|
196
|
+
requirements.push("Jira tickets are created/updated correctly");
|
|
197
|
+
}
|
|
198
|
+
if (text.includes("servicenow")) {
|
|
199
|
+
requirements.push("ServiceNow integration functions properly");
|
|
200
|
+
}
|
|
201
|
+
if (text.includes("stripe") || text.includes("payment")) {
|
|
202
|
+
requirements.push("Payment processing works correctly");
|
|
203
|
+
}
|
|
204
|
+
// Data validation
|
|
205
|
+
requirements.push("Data validation prevents invalid inputs");
|
|
206
|
+
if (complexity === "high" || complexity === "medium") {
|
|
207
|
+
requirements.push("Data relationships are properly maintained");
|
|
208
|
+
requirements.push("Concurrent data operations are handled safely");
|
|
209
|
+
}
|
|
210
|
+
// Default requirement
|
|
211
|
+
if (requirements.length === 1) {
|
|
212
|
+
requirements.push("Data is displayed and stored correctly");
|
|
213
|
+
}
|
|
214
|
+
return requirements;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Extracts performance requirements based on complexity.
|
|
218
|
+
*
|
|
219
|
+
* @param complexity - Task complexity
|
|
220
|
+
* @returns Array of performance requirements
|
|
221
|
+
*/
|
|
222
|
+
function extractPerformanceRequirements(complexity) {
|
|
223
|
+
const requirements = [];
|
|
224
|
+
switch (complexity) {
|
|
225
|
+
case "high":
|
|
226
|
+
requirements.push("Application loads within 3 seconds");
|
|
227
|
+
requirements.push("UI interactions respond within 200ms");
|
|
228
|
+
requirements.push("Large datasets are handled efficiently");
|
|
229
|
+
requirements.push("No memory leaks during extended use");
|
|
230
|
+
break;
|
|
231
|
+
case "medium":
|
|
232
|
+
requirements.push("Application loads within 5 seconds");
|
|
233
|
+
requirements.push("Common operations complete quickly");
|
|
234
|
+
break;
|
|
235
|
+
case "low":
|
|
236
|
+
// No specific performance requirements for low complexity
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
return requirements;
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Extracts custom criteria based on specific prompt patterns.
|
|
243
|
+
*
|
|
244
|
+
* @param prompt - Simulation prompt
|
|
245
|
+
* @returns Array of custom criteria
|
|
246
|
+
*/
|
|
247
|
+
function extractCustomCriteria(prompt) {
|
|
248
|
+
const criteria = [];
|
|
249
|
+
// Add criteria based on specific prompt IDs or patterns
|
|
250
|
+
if (prompt.id.includes("multi-step")) {
|
|
251
|
+
criteria.push("All workflow steps complete in correct order");
|
|
252
|
+
}
|
|
253
|
+
if (prompt.id.includes("realtime")) {
|
|
254
|
+
criteria.push("Real-time updates work correctly");
|
|
255
|
+
}
|
|
256
|
+
if (prompt.id.includes("batch")) {
|
|
257
|
+
criteria.push("Batch operations process all items correctly");
|
|
258
|
+
}
|
|
259
|
+
// Add criteria based on prompt count
|
|
260
|
+
if (prompt.prompts.length > 2) {
|
|
261
|
+
criteria.push("Multi-prompt instructions are fully implemented");
|
|
262
|
+
criteria.push("Later prompts build on earlier functionality");
|
|
263
|
+
}
|
|
264
|
+
return criteria;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Creates minimal criteria for basic testing.
|
|
268
|
+
*
|
|
269
|
+
* Used when specific criteria cannot be extracted.
|
|
270
|
+
*
|
|
271
|
+
* @returns Minimal evaluation criteria
|
|
272
|
+
*/
|
|
273
|
+
export function createMinimalCriteria() {
|
|
274
|
+
return {
|
|
275
|
+
functionalRequirements: [
|
|
276
|
+
"Application loads without errors",
|
|
277
|
+
"Primary functionality works as intended",
|
|
278
|
+
],
|
|
279
|
+
uiRequirements: ["Interface is usable", "User can complete basic tasks"],
|
|
280
|
+
dataRequirements: ["Data operations complete successfully"],
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
//# sourceMappingURL=evaluation-criteria.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluation-criteria.js","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/evaluation-criteria.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAeH;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CACrC,MAAwB;IAExB,MAAM,QAAQ,GAAuB;QACnC,sBAAsB,EAAE,EAAE;QAC1B,cAAc,EAAE,EAAE;QAClB,gBAAgB,EAAE,EAAE;QACpB,uBAAuB,EAAE,EAAE;QAC3B,cAAc,EAAE,EAAE;KACnB,CAAC;IAEF,uCAAuC;IACvC,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,WAAW,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC;SAClE,IAAI,CAAC,GAAG,CAAC;SACT,WAAW,EAAE,CAAC;IAEjB,kCAAkC;IAClC,QAAQ,CAAC,sBAAsB,GAAG,6BAA6B,CAC7D,QAAQ,EACR,MAAM,CAAC,UAAU,CAClB,CAAC;IAEF,0BAA0B;IAC1B,QAAQ,CAAC,cAAc,GAAG,qBAAqB,CAAC,QAAQ,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAE7E,4BAA4B;IAC5B,QAAQ,CAAC,gBAAgB,GAAG,uBAAuB,CACjD,QAAQ,EACR,MAAM,CAAC,UAAU,CAClB,CAAC;IAEF,mDAAmD;IACnD,IAAI,MAAM,CAAC,UAAU,KAAK,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;QACnE,QAAQ,CAAC,uBAAuB,GAAG,8BAA8B,CAC/D,MAAM,CAAC,UAAU,CAClB,CAAC;IACJ,CAAC;IAED,wDAAwD;IACxD,QAAQ,CAAC,cAAc,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAC;IAExD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,6BAA6B,CACpC,IAAY,EACZ,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,kBAAkB;IAClB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5E,YAAY,CAAC,IAAI,CAAC,sDAAsD,CAAC,CAAC;IAC5E,CAAC;IAED,IACE,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EACxB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,IACE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACvB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACvB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,6CAA6C,CAAC,CAAC;IACnE,CAAC;IAED,oBAAoB;IACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;IACtE,CAAC;IAED,uBAAuB;IACvB,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACpD,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;QAC1D,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,IACE,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACrB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;QACzD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,WAAW;IACX,IACE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC;QACxB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACrB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QAC7D,YAAY,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC;IAED,kBAAkB;IAClB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAC1D,YAAY,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QACnD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,gBAAgB;IAChB,IACE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACvB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;IAED,oCAAoC;IACpC,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,YAAY,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACrE,YAAY,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IACvD,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,YAAY,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QAC3D,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,qBAAqB,CAC5B,IAAY,EACZ,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,wBAAwB;IACxB,YAAY,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACtD,YAAY,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IAE7C,uBAAuB;IACvB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,YAAY,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IACtD,CAAC;IAED,IACE,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QACtB,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACvB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACtD,YAAY,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACvD,YAAY,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAC/D,CAAC;IAED,oBAAoB;IACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3D,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,gBAAgB;IAChB,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,YAAY,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;IACxE,CAAC;IAED,WAAW;IACX,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IAE/D,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACzD,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,iBAAiB;IACjB,YAAY,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;IAE3D,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,uBAAuB,CAC9B,IAAY,EACZ,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,mBAAmB;IACnB,IACE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EACxB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;QACzD,YAAY,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC;IAED,WAAW;IACX,IACE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;QAC/D,YAAY,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACxD,CAAC;IAED,wBAAwB;IACxB,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,YAAY,CAAC,IAAI,CACf,6DAA6D,CAC9D,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1B,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAClE,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,YAAY,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACxD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAC1D,CAAC;IAED,kBAAkB;IAClB,YAAY,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAE7D,IAAI,UAAU,KAAK,MAAM,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;IACrE,CAAC;IAED,sBAAsB;IACtB,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;GAKG;AACH,SAAS,8BAA8B,CACrC,UAAqC;IAErC,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,MAAM;YACT,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACxD,YAAY,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;YAC1D,YAAY,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;YAC5D,YAAY,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;YACzD,MAAM;QAER,KAAK,QAAQ;YACX,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACxD,YAAY,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YACxD,MAAM;QAER,KAAK,KAAK;YACR,0DAA0D;YAC1D,MAAM;IACV,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;;GAKG;AACH,SAAS,qBAAqB,CAAC,MAAwB;IACrD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,wDAAwD;IACxD,IAAI,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACrC,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QACnC,QAAQ,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,MAAM,CAAC,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAChC,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC;IAED,qCAAqC;IACrC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,QAAQ,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACjE,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,qBAAqB;IACnC,OAAO;QACL,sBAAsB,EAAE;YACtB,kCAAkC;YAClC,yCAAyC;SAC1C;QACD,cAAc,EAAE,CAAC,qBAAqB,EAAE,+BAA+B,CAAC;QACxE,gBAAgB,EAAE,CAAC,uCAAuC,CAAC;KAC5D,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt builder for the judge.
|
|
3
|
+
*
|
|
4
|
+
* Constructs the system prompt that defines the judge's
|
|
5
|
+
* evaluation framework and behavior.
|
|
6
|
+
*/
|
|
7
|
+
import type { EvaluationCriteria, JudgeConfig } from "../types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Builds the system prompt for the judge.
|
|
10
|
+
*
|
|
11
|
+
* Defines the judge's role, evaluation framework, and instructions
|
|
12
|
+
* for assessing AI-generated applications.
|
|
13
|
+
*
|
|
14
|
+
* @param criteria - Evaluation criteria
|
|
15
|
+
* @param config - Judge configuration
|
|
16
|
+
* @returns Formatted system prompt
|
|
17
|
+
*/
|
|
18
|
+
export declare function buildJudgeSystemPrompt(_criteria: EvaluationCriteria, config?: Partial<JudgeConfig>): string;
|
|
19
|
+
/**
|
|
20
|
+
* Builds evaluation criteria from prompts.
|
|
21
|
+
*
|
|
22
|
+
* Analyzes prompts to determine appropriate evaluation criteria
|
|
23
|
+
* for the generated application.
|
|
24
|
+
*
|
|
25
|
+
* @param prompts - Array of prompts given to the agent
|
|
26
|
+
* @param complexity - Task complexity level
|
|
27
|
+
* @returns Evaluation criteria
|
|
28
|
+
*/
|
|
29
|
+
export declare function buildCriteriaFromPrompts(prompts: string[], complexity?: "low" | "medium" | "high"): EvaluationCriteria;
|
|
30
|
+
//# sourceMappingURL=system-prompt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-prompt.d.ts","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/system-prompt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,kBAAkB,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAEnE;;;;;;;;;GASG;AACH,wBAAgB,sBAAsB,CACpC,SAAS,EAAE,kBAAkB,EAC7B,MAAM,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,MAAM,CAqHR;AAED;;;;;;;;;GASG;AACH,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EAAE,EACjB,UAAU,GAAE,KAAK,GAAG,QAAQ,GAAG,MAAiB,GAC/C,kBAAkB,CAsFpB"}
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt builder for the judge.
|
|
3
|
+
*
|
|
4
|
+
* Constructs the system prompt that defines the judge's
|
|
5
|
+
* evaluation framework and behavior.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Builds the system prompt for the judge.
|
|
9
|
+
*
|
|
10
|
+
* Defines the judge's role, evaluation framework, and instructions
|
|
11
|
+
* for assessing AI-generated applications.
|
|
12
|
+
*
|
|
13
|
+
* @param criteria - Evaluation criteria
|
|
14
|
+
* @param config - Judge configuration
|
|
15
|
+
* @returns Formatted system prompt
|
|
16
|
+
*/
|
|
17
|
+
export function buildJudgeSystemPrompt(_criteria, config = {}) {
|
|
18
|
+
const passingThreshold = config.passingThreshold || 70;
|
|
19
|
+
const maxSteps = config.maxSteps || 15;
|
|
20
|
+
return `You are an expert AI judge evaluating web applications built by an AI agent.
|
|
21
|
+
|
|
22
|
+
## Step Budget
|
|
23
|
+
|
|
24
|
+
You have a maximum of ${maxSteps} steps to complete your evaluation. Plan your testing approach accordingly and prioritize the most critical requirements. On your final step, you MUST call submitFeedback with your evaluation.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
Your role is to:
|
|
29
|
+
1. Objectively assess whether the application meets specified requirements
|
|
30
|
+
2. Interact with the application using browser automation tools
|
|
31
|
+
3. Collect evidence (screenshots, DOM state, etc.) to support your evaluation
|
|
32
|
+
4. Provide detailed, constructive feedback
|
|
33
|
+
|
|
34
|
+
## Evaluation Framework
|
|
35
|
+
|
|
36
|
+
Your evaluation should be based on weighted categories:
|
|
37
|
+
|
|
38
|
+
**Functional Requirements (40%)**
|
|
39
|
+
- Core features work as specified
|
|
40
|
+
- Business logic is correctly implemented
|
|
41
|
+
- Data operations function properly
|
|
42
|
+
- Error cases are handled appropriately
|
|
43
|
+
|
|
44
|
+
**UI/UX Requirements (30%)**
|
|
45
|
+
- Interface is usable and intuitive
|
|
46
|
+
- Visual design meets requirements
|
|
47
|
+
- Responsive and accessible
|
|
48
|
+
- User feedback is clear
|
|
49
|
+
|
|
50
|
+
**Data Integration (20%)**
|
|
51
|
+
- Integrations work correctly
|
|
52
|
+
- Data flows properly between systems
|
|
53
|
+
- API connections are functional
|
|
54
|
+
- Data validation is present
|
|
55
|
+
|
|
56
|
+
**Performance & Code Quality (10%)**
|
|
57
|
+
- Application loads reasonably quickly
|
|
58
|
+
- Interactions are responsive
|
|
59
|
+
- Implementation appears maintainable
|
|
60
|
+
- No obvious security issues
|
|
61
|
+
|
|
62
|
+
## Scoring Guidelines
|
|
63
|
+
|
|
64
|
+
For each requirement:
|
|
65
|
+
- 90-100: Excellent implementation, exceeds expectations
|
|
66
|
+
- 80-89: Good implementation, fully meets requirements
|
|
67
|
+
- 70-79: Acceptable implementation, meets basic requirements
|
|
68
|
+
- 60-69: Partial implementation, some requirements not met
|
|
69
|
+
- 0-59: Poor implementation, significant requirements not met
|
|
70
|
+
|
|
71
|
+
Overall passing score: ${passingThreshold}/100
|
|
72
|
+
|
|
73
|
+
## Evaluation Process
|
|
74
|
+
|
|
75
|
+
1. **Initial Assessment**
|
|
76
|
+
- Navigate to the application
|
|
77
|
+
- Take a screenshot of the initial state
|
|
78
|
+
- Verify the application loads correctly
|
|
79
|
+
|
|
80
|
+
2. **Systematic Testing**
|
|
81
|
+
- Test each functional requirement methodically
|
|
82
|
+
- Interact with UI elements to verify behavior
|
|
83
|
+
- Check data flows and integrations
|
|
84
|
+
- Capture evidence for each test
|
|
85
|
+
|
|
86
|
+
3. **Evidence Collection**
|
|
87
|
+
${config.captureScreenshots
|
|
88
|
+
? "- Take screenshots to document functionality"
|
|
89
|
+
: ""}
|
|
90
|
+
- Note specific selectors and elements tested
|
|
91
|
+
- Record actual vs expected behavior
|
|
92
|
+
${config.detailedReasoning
|
|
93
|
+
? "- Provide detailed reasoning for each score"
|
|
94
|
+
: ""}
|
|
95
|
+
|
|
96
|
+
4. **Final Evaluation**
|
|
97
|
+
- Calculate scores for each category
|
|
98
|
+
- Determine overall pass/fail
|
|
99
|
+
- Provide constructive suggestions
|
|
100
|
+
- Submit evaluation using submitFeedback tool
|
|
101
|
+
|
|
102
|
+
## Important Guidelines
|
|
103
|
+
|
|
104
|
+
- Be objective and fair in your assessment
|
|
105
|
+
- Focus on whether requirements are met, not implementation details
|
|
106
|
+
- Provide specific, actionable feedback
|
|
107
|
+
- Consider the complexity of the task when scoring
|
|
108
|
+
- If unable to test something, note it clearly
|
|
109
|
+
- Don't penalize for minor UI variations that don't affect functionality
|
|
110
|
+
|
|
111
|
+
## Tool Usage
|
|
112
|
+
|
|
113
|
+
You have access to:
|
|
114
|
+
- **playwright_action**: Browser automation (navigate, click, fill, screenshot, etc.)
|
|
115
|
+
- **submitFeedback**: Submit your final structured evaluation
|
|
116
|
+
|
|
117
|
+
Use these tools efficiently to thoroughly test the application.
|
|
118
|
+
|
|
119
|
+
**CRITICAL for evaluate action**: When using the 'evaluate' action to run JavaScript in the browser:
|
|
120
|
+
- NEVER use ES6 module syntax (import/export statements) - this will cause a SyntaxError
|
|
121
|
+
- NEVER use require() statements - this is not available in browser context
|
|
122
|
+
- NEVER try to import React, libraries, or any modules
|
|
123
|
+
- Only use plain JavaScript that can run directly in a browser console
|
|
124
|
+
- Access browser globals directly (window, document, etc.)
|
|
125
|
+
- To check for React components, look for them on window or in the DOM, don't import
|
|
126
|
+
- Example GOOD code: document.querySelector('.form'), window.React
|
|
127
|
+
- Example BAD code: import React from 'react', const { useState } = require('react')
|
|
128
|
+
- If you need to check if something exists, use: typeof window.SomeLibrary !== 'undefined'`;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Builds evaluation criteria from prompts.
|
|
132
|
+
*
|
|
133
|
+
* Analyzes prompts to determine appropriate evaluation criteria
|
|
134
|
+
* for the generated application.
|
|
135
|
+
*
|
|
136
|
+
* @param prompts - Array of prompts given to the agent
|
|
137
|
+
* @param complexity - Task complexity level
|
|
138
|
+
* @returns Evaluation criteria
|
|
139
|
+
*/
|
|
140
|
+
export function buildCriteriaFromPrompts(prompts, complexity = "medium") {
|
|
141
|
+
const criteria = {
|
|
142
|
+
functionalRequirements: [],
|
|
143
|
+
uiRequirements: [],
|
|
144
|
+
dataRequirements: [],
|
|
145
|
+
performanceRequirements: [],
|
|
146
|
+
};
|
|
147
|
+
// Analyze prompts for requirements
|
|
148
|
+
const allPromptText = prompts.join(" ").toLowerCase();
|
|
149
|
+
// Functional requirements based on keywords
|
|
150
|
+
if (allPromptText.includes("create") || allPromptText.includes("add")) {
|
|
151
|
+
criteria.functionalRequirements.push("User can create new records");
|
|
152
|
+
}
|
|
153
|
+
if (allPromptText.includes("edit") || allPromptText.includes("update")) {
|
|
154
|
+
criteria.functionalRequirements.push("User can edit existing records");
|
|
155
|
+
}
|
|
156
|
+
if (allPromptText.includes("delete") || allPromptText.includes("remove")) {
|
|
157
|
+
criteria.functionalRequirements.push("User can delete records");
|
|
158
|
+
}
|
|
159
|
+
if (allPromptText.includes("list") || allPromptText.includes("display")) {
|
|
160
|
+
criteria.functionalRequirements.push("Application displays data correctly");
|
|
161
|
+
}
|
|
162
|
+
if (allPromptText.includes("search") || allPromptText.includes("filter")) {
|
|
163
|
+
criteria.functionalRequirements.push("Search/filter functionality works");
|
|
164
|
+
}
|
|
165
|
+
if (allPromptText.includes("form")) {
|
|
166
|
+
criteria.functionalRequirements.push("Forms have proper validation");
|
|
167
|
+
}
|
|
168
|
+
if (allPromptText.includes("authenticate") ||
|
|
169
|
+
allPromptText.includes("login")) {
|
|
170
|
+
criteria.functionalRequirements.push("Authentication works correctly");
|
|
171
|
+
}
|
|
172
|
+
// UI requirements
|
|
173
|
+
criteria.uiRequirements.push("Application has a clear layout");
|
|
174
|
+
criteria.uiRequirements.push("UI elements are properly labeled");
|
|
175
|
+
if (allPromptText.includes("table")) {
|
|
176
|
+
criteria.uiRequirements.push("Tables display data clearly");
|
|
177
|
+
}
|
|
178
|
+
if (allPromptText.includes("button")) {
|
|
179
|
+
criteria.uiRequirements.push("Buttons are clearly labeled and functional");
|
|
180
|
+
}
|
|
181
|
+
if (allPromptText.includes("responsive")) {
|
|
182
|
+
criteria.uiRequirements.push("UI is responsive to different screen sizes");
|
|
183
|
+
}
|
|
184
|
+
// Data requirements based on integrations mentioned
|
|
185
|
+
if (allPromptText.includes("api") || allPromptText.includes("integration")) {
|
|
186
|
+
criteria.dataRequirements.push("API integrations function correctly");
|
|
187
|
+
}
|
|
188
|
+
if (allPromptText.includes("database") || allPromptText.includes("sql")) {
|
|
189
|
+
criteria.dataRequirements.push("Database operations work properly");
|
|
190
|
+
}
|
|
191
|
+
if (allPromptText.includes("salesforce")) {
|
|
192
|
+
criteria.dataRequirements.push("Salesforce integration works");
|
|
193
|
+
}
|
|
194
|
+
if (allPromptText.includes("slack")) {
|
|
195
|
+
criteria.dataRequirements.push("Slack integration functions");
|
|
196
|
+
}
|
|
197
|
+
// Performance requirements based on complexity
|
|
198
|
+
if (complexity === "high") {
|
|
199
|
+
criteria.performanceRequirements?.push("Application loads within 3 seconds");
|
|
200
|
+
criteria.performanceRequirements?.push("No blocking operations in UI");
|
|
201
|
+
}
|
|
202
|
+
// Ensure minimum requirements
|
|
203
|
+
if (criteria.functionalRequirements.length === 0) {
|
|
204
|
+
criteria.functionalRequirements.push("Application loads without errors");
|
|
205
|
+
criteria.functionalRequirements.push("Core functionality works as described");
|
|
206
|
+
}
|
|
207
|
+
if (criteria.dataRequirements.length === 0) {
|
|
208
|
+
criteria.dataRequirements.push("Data is displayed correctly");
|
|
209
|
+
}
|
|
210
|
+
return criteria;
|
|
211
|
+
}
|
|
212
|
+
//# sourceMappingURL=system-prompt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-prompt.js","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/system-prompt.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;;;;;;;GASG;AACH,MAAM,UAAU,sBAAsB,CACpC,SAA6B,EAC7B,SAA+B,EAAE;IAEjC,MAAM,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,IAAI,EAAE,CAAC;IACvD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC;IAEvC,OAAO;;;;wBAIe,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;yBA+CP,gBAAgB;;;;;;;;;;;;;;;;KAiBpC,MAAM,CAAC,kBAAkB;QACvB,CAAC,CAAC,8CAA8C;QAChD,CAAC,CAAC,EACN;;;KAIE,MAAM,CAAC,iBAAiB;QACtB,CAAC,CAAC,6CAA6C;QAC/C,CAAC,CAAC,EACN;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;2FAkCwF,CAAC;AAC5F,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,wBAAwB,CACtC,OAAiB,EACjB,aAAwC,QAAQ;IAEhD,MAAM,QAAQ,GAAuB;QACnC,sBAAsB,EAAE,EAAE;QAC1B,cAAc,EAAE,EAAE;QAClB,gBAAgB,EAAE,EAAE;QACpB,uBAAuB,EAAE,EAAE;KAC5B,CAAC;IAEF,mCAAmC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;IAEtD,4CAA4C;IAC5C,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACtE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IACzE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;IAClE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACxE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IAC9E,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IAC5E,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACnC,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IACvE,CAAC;IACD,IACE,aAAa,CAAC,QAAQ,CAAC,cAAc,CAAC;QACtC,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAC/B,CAAC;QACD,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IACzE,CAAC;IAED,kBAAkB;IAClB,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IAC/D,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IAEjE,IAAI,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC9D,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC7E,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACzC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;IAC7E,CAAC;IAED,oDAAoD;IACpD,IAAI,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAC3E,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;IACxE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACxE,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;QACzC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IACjE,CAAC;IACD,IAAI,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACpC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAChE,CAAC;IAED,+CAA+C;IAC/C,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,QAAQ,CAAC,uBAAuB,EAAE,IAAI,CACpC,oCAAoC,CACrC,CAAC;QACF,QAAQ,CAAC,uBAAuB,EAAE,IAAI,CAAC,8BAA8B,CAAC,CAAC;IACzE,CAAC;IAED,8BAA8B;IAC9B,IAAI,QAAQ,CAAC,sBAAsB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjD,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;QACzE,QAAQ,CAAC,sBAAsB,CAAC,IAAI,CAClC,uCAAuC,CACxC,CAAC;IACJ,CAAC;IAED,IAAI,QAAQ,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3C,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV-based storage implementation for judge evaluations.
|
|
3
|
+
*
|
|
4
|
+
* Stores evaluation results in a CSV file with the following columns:
|
|
5
|
+
* - timestamp: ISO 8601 timestamp
|
|
6
|
+
* - promptId: Prompt identifier
|
|
7
|
+
* - branchName: Git branch name
|
|
8
|
+
* - commitSha: Git commit SHA
|
|
9
|
+
* - appId: Application identifier
|
|
10
|
+
* - prompt: The actual prompt text
|
|
11
|
+
* - passed: Boolean pass/fail status
|
|
12
|
+
* - score: Overall score (0-100)
|
|
13
|
+
* - feedback: Summary feedback text
|
|
14
|
+
*
|
|
15
|
+
* CSV format provides simple, human-readable storage suitable for analysis
|
|
16
|
+
* in spreadsheet applications or data processing tools.
|
|
17
|
+
*
|
|
18
|
+
* Configuration:
|
|
19
|
+
* The storage location can be configured via the JUDGE_STORAGE_PATH environment variable:
|
|
20
|
+
* - Directory path: `JUDGE_STORAGE_PATH=~/eval` (creates evaluations.csv inside)
|
|
21
|
+
* - Full file path: `JUDGE_STORAGE_PATH=~/eval/results.csv`
|
|
22
|
+
* - Supports ~ expansion for home directory
|
|
23
|
+
* - Default: `<appRoot>/.superblocks/judge-evaluations/evaluations.csv`
|
|
24
|
+
*/
|
|
25
|
+
import type { JudgeStorage } from "./interface.js";
|
|
26
|
+
import type { StoredEvaluation, EvaluationQuery } from "./types.js";
|
|
27
|
+
/**
|
|
28
|
+
* CSV-based implementation of JudgeStorage.
|
|
29
|
+
*
|
|
30
|
+
* Thread-safe through file system atomic operations.
|
|
31
|
+
* Appends new evaluations to maintain chronological order.
|
|
32
|
+
*/
|
|
33
|
+
export declare class CsvJudgeStorage implements JudgeStorage {
|
|
34
|
+
private readonly filePath;
|
|
35
|
+
/**
|
|
36
|
+
* Creates a new CSV storage instance.
|
|
37
|
+
*
|
|
38
|
+
* @param storageDir - Directory for storage (default: .superblocks/judge-evaluations)
|
|
39
|
+
* @param filename - CSV filename (default: evaluations.csv)
|
|
40
|
+
*/
|
|
41
|
+
constructor(storageDir?: string, filename?: string);
|
|
42
|
+
/**
|
|
43
|
+
* Ensures the storage directory and file exist.
|
|
44
|
+
*
|
|
45
|
+
* Creates directory if needed and initializes CSV with header row.
|
|
46
|
+
*/
|
|
47
|
+
private ensureStorageExists;
|
|
48
|
+
/**
|
|
49
|
+
* Escapes a value for CSV format.
|
|
50
|
+
*
|
|
51
|
+
* Handles quotes and commas by wrapping in double quotes and escaping
|
|
52
|
+
* internal quotes.
|
|
53
|
+
*/
|
|
54
|
+
private escapeCsvValue;
|
|
55
|
+
/**
|
|
56
|
+
* Converts a StoredEvaluation to a CSV row.
|
|
57
|
+
*/
|
|
58
|
+
private toCsvRow;
|
|
59
|
+
/**
|
|
60
|
+
* Parses a CSV row into a StoredEvaluation.
|
|
61
|
+
*
|
|
62
|
+
* Note: This is a simplified parser that doesn't handle all CSV edge cases.
|
|
63
|
+
* For production use with complex data, consider a proper CSV parsing library.
|
|
64
|
+
*/
|
|
65
|
+
private fromCsvRow;
|
|
66
|
+
/**
|
|
67
|
+
* Saves an evaluation result to the CSV file.
|
|
68
|
+
*
|
|
69
|
+
* Appends a new row to the file. Creates file and directory if needed.
|
|
70
|
+
*/
|
|
71
|
+
saveEvaluation(evaluation: StoredEvaluation): Promise<void>;
|
|
72
|
+
/**
|
|
73
|
+
* Reads all evaluations from the CSV file.
|
|
74
|
+
*/
|
|
75
|
+
private readAllEvaluations;
|
|
76
|
+
/**
|
|
77
|
+
* Filters evaluations based on query parameters.
|
|
78
|
+
*/
|
|
79
|
+
private applyQuery;
|
|
80
|
+
/**
|
|
81
|
+
* Retrieves all evaluations matching the query.
|
|
82
|
+
*/
|
|
83
|
+
getEvaluations(query?: EvaluationQuery): Promise<StoredEvaluation[]>;
|
|
84
|
+
/**
|
|
85
|
+
* Retrieves evaluations for a specific prompt.
|
|
86
|
+
*/
|
|
87
|
+
getEvaluationsByPrompt(promptId: string): Promise<StoredEvaluation[]>;
|
|
88
|
+
/**
|
|
89
|
+
* Retrieves evaluations for a specific branch.
|
|
90
|
+
*/
|
|
91
|
+
getEvaluationsByBranch(branchName: string): Promise<StoredEvaluation[]>;
|
|
92
|
+
/**
|
|
93
|
+
* Deletes all stored evaluations.
|
|
94
|
+
*
|
|
95
|
+
* Removes the CSV file. Use with caution.
|
|
96
|
+
*/
|
|
97
|
+
clear(): Promise<void>;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=csv-storage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv-storage.d.ts","sourceRoot":"","sources":["../../../../src/ai-service/judge/storage/csv-storage.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAapE;;;;;GAKG;AACH,qBAAa,eAAgB,YAAW,YAAY;IAClD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAElC;;;;;OAKG;gBAED,UAAU,GAAE,MAA4B,EACxC,QAAQ,GAAE,MAA0B;IAKtC;;;;OAIG;YACW,mBAAmB;IAejC;;;;;OAKG;IACH,OAAO,CAAC,cAAc;IAWtB;;OAEG;IACH,OAAO,CAAC,QAAQ;IA0BhB;;;;;OAKG;IACH,OAAO,CAAC,UAAU;IAmFlB;;;;OAIG;IACG,cAAc,CAAC,UAAU,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAOjE;;OAEG;YACW,kBAAkB;IAuBhC;;OAEG;IACH,OAAO,CAAC,UAAU;IAkDlB;;OAEG;IACG,cAAc,CAAC,KAAK,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAK1E;;OAEG;IACG,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAI3E;;OAEG;IACG,sBAAsB,CAC1B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAI9B;;;;OAIG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAO7B"}
|