@evalstudio/core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/dist/connector.d.ts +101 -0
  2. package/dist/connector.d.ts.map +1 -0
  3. package/dist/connector.js +477 -0
  4. package/dist/connector.js.map +1 -0
  5. package/dist/eval.d.ts +66 -0
  6. package/dist/eval.d.ts.map +1 -0
  7. package/dist/eval.js +188 -0
  8. package/dist/eval.js.map +1 -0
  9. package/dist/evaluator.d.ts +37 -0
  10. package/dist/evaluator.d.ts.map +1 -0
  11. package/dist/evaluator.js +121 -0
  12. package/dist/evaluator.js.map +1 -0
  13. package/dist/execution.d.ts +29 -0
  14. package/dist/execution.d.ts.map +1 -0
  15. package/dist/execution.js +94 -0
  16. package/dist/execution.js.map +1 -0
  17. package/dist/index.d.ts +17 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +16 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/llm-client.d.ts +31 -0
  22. package/dist/llm-client.d.ts.map +1 -0
  23. package/dist/llm-client.js +121 -0
  24. package/dist/llm-client.js.map +1 -0
  25. package/dist/llm-provider.d.ts +46 -0
  26. package/dist/llm-provider.d.ts.map +1 -0
  27. package/dist/llm-provider.js +199 -0
  28. package/dist/llm-provider.js.map +1 -0
  29. package/dist/persona-generator.d.ts +34 -0
  30. package/dist/persona-generator.d.ts.map +1 -0
  31. package/dist/persona-generator.js +99 -0
  32. package/dist/persona-generator.js.map +1 -0
  33. package/dist/persona.d.ts +28 -0
  34. package/dist/persona.d.ts.map +1 -0
  35. package/dist/persona.js +100 -0
  36. package/dist/persona.js.map +1 -0
  37. package/dist/project.d.ts +43 -0
  38. package/dist/project.d.ts.map +1 -0
  39. package/dist/project.js +114 -0
  40. package/dist/project.js.map +1 -0
  41. package/dist/prompt.d.ts +31 -0
  42. package/dist/prompt.d.ts.map +1 -0
  43. package/dist/prompt.js +73 -0
  44. package/dist/prompt.js.map +1 -0
  45. package/dist/run-processor.d.ts +127 -0
  46. package/dist/run-processor.d.ts.map +1 -0
  47. package/dist/run-processor.js +495 -0
  48. package/dist/run-processor.js.map +1 -0
  49. package/dist/run.d.ts +101 -0
  50. package/dist/run.d.ts.map +1 -0
  51. package/dist/run.js +279 -0
  52. package/dist/run.js.map +1 -0
  53. package/dist/scenario.d.ts +66 -0
  54. package/dist/scenario.d.ts.map +1 -0
  55. package/dist/scenario.js +110 -0
  56. package/dist/scenario.js.map +1 -0
  57. package/dist/status.d.ts +10 -0
  58. package/dist/status.d.ts.map +1 -0
  59. package/dist/status.js +15 -0
  60. package/dist/status.js.map +1 -0
  61. package/dist/storage.d.ts +11 -0
  62. package/dist/storage.d.ts.map +1 -0
  63. package/dist/storage.js +57 -0
  64. package/dist/storage.js.map +1 -0
  65. package/dist/types.d.ts +46 -0
  66. package/dist/types.d.ts.map +1 -0
  67. package/dist/types.js +26 -0
  68. package/dist/types.js.map +1 -0
  69. package/package.json +51 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"persona.js","sourceRoot":"","sources":["../src/persona.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAyB7C,SAAS,cAAc;IACrB,OAAO,IAAI,CAAC,aAAa,EAAE,EAAE,eAAe,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,YAAY;IACnB,MAAM,IAAI,GAAG,cAAc,EAAE,CAAC;IAC9B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAc,CAAC;AACvC,CAAC;AAED,SAAS,YAAY,CAAC,QAAmB;IACvC,MAAM,IAAI,GAAG,cAAc,EAAE,CAAC;IAC9B,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAyB;IACrD,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAC5C,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,oBAAoB,KAAK,CAAC,SAAS,aAAa,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAEhC,IACE,QAAQ,CAAC,IAAI,CACX,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,KAAK,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,CAChE,EACD,CAAC;QACD,MAAM,IAAI,KAAK,CACb,sBAAsB,KAAK,CAAC,IAAI,kCAAkC,CACnE,CAAC;IACJ,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,OAAO,GAAY;QACvB,EAAE,EAAE,UAAU,EAAE;QAChB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,GAAG;KACf,CAAC;IAEF,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACvB,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEvB,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,EAAU;IACnC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,SAAiB,EACjB,IAAY;IAEZ,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,SAAkB;IAC7C,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,EAAU,EACV,KAAyB;IAEzB,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAErD,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAEhC,IACE,KAAK,CAAC,IAAI;QACV,QAAQ,CAAC,IAAI,CACX,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,SAAS,KAAK,OAAO,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,CAC5E,EACD,CAAC;QACD,MAAM,IAAI,KAAK,CACb,sBAAsB,KAAK,CAAC,IAAI,kCAAkC,CACnE,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAY;QACvB,GAAG,OAAO;QACV,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI;QAChC,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW;QACrD,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,OAAO,CAAC,YAAY;QACxD,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IAEF,QAAQ,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC;IAC1B,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEvB,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,EAAU;IACtC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAErD,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC1B,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEvB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,SAAiB;IACvD,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;IACnE,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAEvD,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;QACrB,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * LLM settings for a specific use-case (evaluation or persona generation)
3
+ */
4
+ export interface LLMUseCaseSettings {
5
+ providerId: string;
6
+ model?: string;
7
+ }
8
+ /**
9
+ * Project-level LLM configuration for different use-cases
10
+ */
11
+ export interface ProjectLLMSettings {
12
+ /** LLM settings for evaluation/judging conversations */
13
+ evaluation?: LLMUseCaseSettings;
14
+ /** LLM settings for persona response generation */
15
+ persona?: LLMUseCaseSettings;
16
+ }
17
+ export interface Project {
18
+ id: string;
19
+ name: string;
20
+ description?: string;
21
+ /** Default LLM settings for the project */
22
+ llmSettings?: ProjectLLMSettings;
23
+ createdAt: string;
24
+ updatedAt: string;
25
+ }
26
+ export interface CreateProjectInput {
27
+ name: string;
28
+ description?: string;
29
+ llmSettings?: ProjectLLMSettings;
30
+ }
31
+ export interface UpdateProjectInput {
32
+ name?: string;
33
+ description?: string;
34
+ /** Set to null to clear LLM settings */
35
+ llmSettings?: ProjectLLMSettings | null;
36
+ }
37
+ export declare function createProject(input: CreateProjectInput): Project;
38
+ export declare function getProject(id: string): Project | undefined;
39
+ export declare function getProjectByName(name: string): Project | undefined;
40
+ export declare function listProjects(): Project[];
41
+ export declare function updateProject(id: string, input: UpdateProjectInput): Project | undefined;
42
+ export declare function deleteProject(id: string): boolean;
43
+ //# sourceMappingURL=project.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"project.d.ts","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAMA;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,wDAAwD;IACxD,UAAU,CAAC,EAAE,kBAAkB,CAAC;IAChC,mDAAmD;IACnD,OAAO,CAAC,EAAE,kBAAkB,CAAC;CAC9B;AAED,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,2CAA2C;IAC3C,WAAW,CAAC,EAAE,kBAAkB,CAAC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,kBAAkB,CAAC;CAClC;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,WAAW,CAAC,EAAE,kBAAkB,GAAG,IAAI,CAAC;CACzC;AAoBD,wBAAgB,aAAa,CAAC,KAAK,EAAE,kBAAkB,GAAG,OAAO,CAqBhE;AAED,wBAAgB,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,CAG1D;AAED,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,CAGlE;AAED,wBAAgB,YAAY,IAAI,OAAO,EAAE,CAExC;AAED,wBAAgB,aAAa,CAC3B,EAAE,EAAE,MAAM,EACV,KAAK,EAAE,kBAAkB,GACxB,OAAO,GAAG,SAAS,CA+DrB;AAED,wBAAgB,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAYjD"}
@@ -0,0 +1,114 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { getLLMProvider } from "./llm-provider.js";
5
+ import { getStorageDir } from "./storage.js";
6
+ function getStoragePath() {
7
+ return join(getStorageDir(), "projects.json");
8
+ }
9
+ function loadProjects() {
10
+ const path = getStoragePath();
11
+ if (!existsSync(path)) {
12
+ return [];
13
+ }
14
+ const data = readFileSync(path, "utf-8");
15
+ return JSON.parse(data);
16
+ }
17
+ function saveProjects(projects) {
18
+ const path = getStoragePath();
19
+ writeFileSync(path, JSON.stringify(projects, null, 2));
20
+ }
21
+ export function createProject(input) {
22
+ const projects = loadProjects();
23
+ if (projects.some((p) => p.name === input.name)) {
24
+ throw new Error(`Project with name "${input.name}" already exists`);
25
+ }
26
+ const now = new Date().toISOString();
27
+ const project = {
28
+ id: randomUUID(),
29
+ name: input.name,
30
+ description: input.description,
31
+ llmSettings: input.llmSettings,
32
+ createdAt: now,
33
+ updatedAt: now,
34
+ };
35
+ projects.push(project);
36
+ saveProjects(projects);
37
+ return project;
38
+ }
39
+ export function getProject(id) {
40
+ const projects = loadProjects();
41
+ return projects.find((p) => p.id === id);
42
+ }
43
+ export function getProjectByName(name) {
44
+ const projects = loadProjects();
45
+ return projects.find((p) => p.name === name);
46
+ }
47
+ export function listProjects() {
48
+ return loadProjects();
49
+ }
50
+ export function updateProject(id, input) {
51
+ const projects = loadProjects();
52
+ const index = projects.findIndex((p) => p.id === id);
53
+ if (index === -1) {
54
+ return undefined;
55
+ }
56
+ if (input.name && projects.some((p) => p.name === input.name && p.id !== id)) {
57
+ throw new Error(`Project with name "${input.name}" already exists`);
58
+ }
59
+ // Validate LLM settings if provided
60
+ if (input.llmSettings) {
61
+ const { evaluation, persona } = input.llmSettings;
62
+ if (evaluation?.providerId) {
63
+ const provider = getLLMProvider(evaluation.providerId);
64
+ if (!provider) {
65
+ throw new Error(`LLM Provider with id "${evaluation.providerId}" not found`);
66
+ }
67
+ if (provider.projectId !== id) {
68
+ throw new Error("Evaluation LLM Provider does not belong to this project");
69
+ }
70
+ }
71
+ if (persona?.providerId) {
72
+ const provider = getLLMProvider(persona.providerId);
73
+ if (!provider) {
74
+ throw new Error(`LLM Provider with id "${persona.providerId}" not found`);
75
+ }
76
+ if (provider.projectId !== id) {
77
+ throw new Error("Persona LLM Provider does not belong to this project");
78
+ }
79
+ }
80
+ }
81
+ const project = projects[index];
82
+ // Handle llmSettings: null clears, undefined keeps existing, object updates
83
+ let newLLMSettings;
84
+ if (input.llmSettings === null) {
85
+ newLLMSettings = undefined;
86
+ }
87
+ else if (input.llmSettings !== undefined) {
88
+ newLLMSettings = input.llmSettings;
89
+ }
90
+ else {
91
+ newLLMSettings = project.llmSettings;
92
+ }
93
+ const updated = {
94
+ ...project,
95
+ name: input.name ?? project.name,
96
+ description: input.description ?? project.description,
97
+ llmSettings: newLLMSettings,
98
+ updatedAt: new Date().toISOString(),
99
+ };
100
+ projects[index] = updated;
101
+ saveProjects(projects);
102
+ return updated;
103
+ }
104
+ export function deleteProject(id) {
105
+ const projects = loadProjects();
106
+ const index = projects.findIndex((p) => p.id === id);
107
+ if (index === -1) {
108
+ return false;
109
+ }
110
+ projects.splice(index, 1);
111
+ saveProjects(projects);
112
+ return true;
113
+ }
114
+ //# sourceMappingURL=project.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"project.js","sourceRoot":"","sources":["../src/project.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AA2C7C,SAAS,cAAc;IACrB,OAAO,IAAI,CAAC,aAAa,EAAE,EAAE,eAAe,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,YAAY;IACnB,MAAM,IAAI,GAAG,cAAc,EAAE,CAAC;IAC9B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAc,CAAC;AACvC,CAAC;AAED,SAAS,YAAY,CAAC,QAAmB;IACvC,MAAM,IAAI,GAAG,cAAc,EAAE,CAAC;IAC9B,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAyB;IACrD,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAEhC,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,CAAC,IAAI,kBAAkB,CAAC,CAAC;IACtE,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,OAAO,GAAY;QACvB,EAAE,EAAE,UAAU,EAAE;QAChB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,GAAG;KACf,CAAC;IAEF,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACvB,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEvB,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,EAAU;IACnC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AAC/C,CAAC;AAED,MAAM,UAAU,YAAY;IAC1B,OAAO,YAAY,EAAE,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,EAAU,EACV,KAAyB;IAEzB,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAErD,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,KAAK,CAAC,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QAC7E,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,CAAC,IAAI,kBAAkB,CAAC,CAAC;IACtE,CAAC;IAED,oCAAoC;IACpC,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC;QACtB,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,KAAK,CAAC,WAAW,CAAC;QAClD,IAAI,UAAU,EAAE,UAAU,EAAE,CAAC;YAC3B,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;YACvD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,MAAM,IAAI,KAAK,CACb,yBAAyB,UAAU,CAAC,UAAU,aAAa,CAC5D,CAAC;YACJ,CAAC;YACD,IAAI,QAAQ,CAAC,SAAS,KAAK,EAAE,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;YAC7E,CAAC;QACH,CAAC;QACD,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;YACxB,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YACpD,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,MAAM,IAAI,KAAK,CACb,yBAAyB,OAAO,CAAC,UAAU,aAAa,CACzD,CAAC;YACJ,CAAC;YACD,IAAI,QAAQ,CAAC,SAAS,KAAK,EAAE,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAEhC,4EAA4E;IAC5E,IAAI,cAA8C,CAAC;IACnD,IAAI,KAAK,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;QAC/B,cAAc,GAAG,SAAS,CAAC;IAC7B,CAAC;SAAM,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;QAC3C,cAAc,GAAG,KAAK,CAAC,WAAW,CAAC;IACrC,CAAC;SAAM,CAAC;QACN,cAAc,GAAG,OAAO,CAAC,WAAW,CAAC;IACvC,CAAC;IAED,MAAM,OAAO,GAAY;QACvB,GAAG,OAAO;QACV,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI;QAChC,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW;QACrD,WAAW,EAAE,cAAc;QAC3B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;IAEF,QAAQ,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC;IAC1B,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEvB,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,EAAU;IACtC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;IAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAErD,IAAI,KAAK,KAAK,CAAC,CAAC,EAAE,CAAC;QACjB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC1B,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEvB,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1,31 @@
1
+ import type { Persona } from "./persona.js";
2
+ import type { Scenario } from "./scenario.js";
3
+ /**
4
+ * Input for building the test agent system prompt.
5
+ * Accepts partial persona/scenario objects so it can work with
6
+ * both full entities and the embedded relations from EvalWithRelations.
7
+ */
8
+ export interface BuildTestAgentPromptInput {
9
+ persona?: Pick<Persona, "name" | "description" | "systemPrompt"> | null;
10
+ scenario?: Pick<Scenario, "name" | "instructions" | "messages"> | null;
11
+ }
12
+ /**
13
+ * Builds the system prompt for the test agent that will impersonate
14
+ * a user persona and simulate a scenario when interacting with the
15
+ * chatbot being tested.
16
+ *
17
+ * The test agent's role is to act as a realistic user, following the
18
+ * persona's characteristics and the scenario's context to evaluate
19
+ * how well the chatbot handles the interaction.
20
+ */
21
+ export declare function buildTestAgentSystemPrompt(input: BuildTestAgentPromptInput): string;
22
+ /**
23
+ * Builds a messages array in OpenAI format for the test agent,
24
+ * starting with the system prompt and including any initial seed
25
+ * messages from the scenario.
26
+ */
27
+ export declare function buildTestAgentMessages(input: BuildTestAgentPromptInput): Array<{
28
+ role: "system" | "user" | "assistant";
29
+ content: string;
30
+ }>;
31
+ //# sourceMappingURL=prompt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAG9C;;;;GAIG;AACH,MAAM,WAAW,yBAAyB;IACxC,OAAO,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,GAAG,cAAc,CAAC,GAAG,IAAI,CAAC;IACxE,QAAQ,CAAC,EAAE,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC;CACxE;AAED;;;;;;;;GAQG;AACH,wBAAgB,0BAA0B,CAAC,KAAK,EAAE,yBAAyB,GAAG,MAAM,CA4CnF;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,yBAAyB,GAC/B,KAAK,CAAC;IAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAiBnE"}
package/dist/prompt.js ADDED
@@ -0,0 +1,73 @@
1
+ import { getMessageContentAsString } from "./types.js";
2
+ /**
3
+ * Builds the system prompt for the test agent that will impersonate
4
+ * a user persona and simulate a scenario when interacting with the
5
+ * chatbot being tested.
6
+ *
7
+ * The test agent's role is to act as a realistic user, following the
8
+ * persona's characteristics and the scenario's context to evaluate
9
+ * how well the chatbot handles the interaction.
10
+ */
11
+ export function buildTestAgentSystemPrompt(input) {
12
+ const { persona, scenario } = input;
13
+ let personaContent = "";
14
+ if (persona) {
15
+ personaContent = `
16
+ ## User Persona
17
+
18
+ Name:
19
+ ${persona.name || "N/A"}
20
+
21
+ Character Instructions:
22
+ ${persona.systemPrompt || "N/A"}`;
23
+ }
24
+ let scenarioContent = "";
25
+ if (scenario?.instructions) {
26
+ scenarioContent = `
27
+ ## Scenario
28
+
29
+ ${scenario.instructions}
30
+ `;
31
+ }
32
+ const systemPrompt = `
33
+ You are a test agent simulating a user interaction with a chatbot.
34
+ Your role is to impersonate a specific user persona and simulate a realistic conversation
35
+ based on the given scenario. Behave naturally as this user would, staying in character
36
+ throughout the conversation.
37
+
38
+ ${personaContent}
39
+
40
+ ${scenarioContent}
41
+
42
+ ## Guidelines
43
+
44
+ - Stay in character as the user persona throughout the conversation
45
+ - Follow the scenario context to guide your messages and goals
46
+ - Respond naturally as a real user would, with realistic questions, concerns, or requests
47
+ - Do not break character or reveal that you are a test agent
48
+ - If the chatbot asks clarifying questions, answer them based on the persona and scenario
49
+ - Express appropriate emotions based on the scenario (frustration, curiosity, urgency, etc.)`;
50
+ return systemPrompt;
51
+ }
52
+ /**
53
+ * Builds a messages array in OpenAI format for the test agent,
54
+ * starting with the system prompt and including any initial seed
55
+ * messages from the scenario.
56
+ */
57
+ export function buildTestAgentMessages(input) {
58
+ const systemPrompt = buildTestAgentSystemPrompt(input);
59
+ const messages = [
60
+ { role: "system", content: systemPrompt },
61
+ ];
62
+ // Include scenario's initial seed messages if present
63
+ if (input.scenario?.messages && input.scenario.messages.length > 0) {
64
+ for (const msg of input.scenario.messages) {
65
+ messages.push({
66
+ role: msg.role,
67
+ content: getMessageContentAsString(msg.content),
68
+ });
69
+ }
70
+ }
71
+ return messages;
72
+ }
73
+ //# sourceMappingURL=prompt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,yBAAyB,EAAE,MAAM,YAAY,CAAC;AAYvD;;;;;;;;GAQG;AACH,MAAM,UAAU,0BAA0B,CAAC,KAAgC;IACzE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;IAEpC,IAAI,cAAc,GAAG,EAAE,CAAC;IACxB,IAAI,OAAO,EAAE,CAAC;QACZ,cAAc,GAAG;;;;EAInB,OAAO,CAAC,IAAI,IAAI,KAAK;;;EAGrB,OAAO,CAAC,YAAY,IAAI,KAAK,EAAE,CAAC;IAChC,CAAC;IAED,IAAI,eAAe,GAAG,EAAE,CAAC;IACzB,IAAI,QAAQ,EAAE,YAAY,EAAE,CAAC;QAC3B,eAAe,GAAG;;;EAGpB,QAAQ,CAAC,YAAY;CACtB,CAAC;IACA,CAAC;IAED,MAAM,YAAY,GAAW;;;;;;EAM7B,cAAc;;EAEd,eAAe;;;;;;;;;6FAS4E,CAAC;IAE5F,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CACpC,KAAgC;IAEhC,MAAM,YAAY,GAAG,0BAA0B,CAAC,KAAK,CAAC,CAAC;IACvD,MAAM,QAAQ,GAAsE;QAClF,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;KAC1C,CAAC;IAEF,sDAAsD;IACtD,IAAI,KAAK,CAAC,QAAQ,EAAE,QAAQ,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnE,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;YAC1C,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,GAAG,CAAC,IAAuC;gBACjD,OAAO,EAAE,yBAAyB,CAAC,GAAG,CAAC,OAAO,CAAC;aAChD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,127 @@
1
+ import { type ConnectorInvokeResult } from "./connector.js";
2
+ import { type Run, type RunStatus } from "./run.js";
3
+ export type { RunStatus };
4
+ export interface RunProcessorOptions {
5
+ /** Polling interval in milliseconds (default: 5000) */
6
+ pollIntervalMs?: number;
7
+ /** Maximum concurrent run executions (default: 3) */
8
+ maxConcurrent?: number;
9
+ /** Filter runs by project ID (optional) */
10
+ projectId?: string;
11
+ /** Callback for status changes */
12
+ onStatusChange?: (runId: string, status: RunStatus, run: Run) => void;
13
+ /** Callback when a run starts */
14
+ onRunStart?: (run: Run) => void;
15
+ /** Callback when a run completes */
16
+ onRunComplete?: (run: Run, result: ConnectorInvokeResult) => void;
17
+ /** Callback when a run fails */
18
+ onRunError?: (run: Run, error: Error) => void;
19
+ }
20
+ /**
21
+ * Background processor for executing queued evaluation runs.
22
+ *
23
+ * The RunProcessor polls for runs with status "queued" and executes them
24
+ * via the configured connector. It supports concurrent execution and provides
25
+ * callbacks for monitoring status changes.
26
+ *
27
+ * Works from both CLI and API contexts - the same processor logic can be used
28
+ * with different status update mechanisms (terminal output vs WebSocket).
29
+ *
30
+ * @example
31
+ * ```typescript
32
+ * const processor = new RunProcessor({
33
+ * pollIntervalMs: 5000,
34
+ * maxConcurrent: 3,
35
+ * onStatusChange: (runId, status, run) => {
36
+ * console.log(`Run ${runId} is now ${status}`);
37
+ * },
38
+ * });
39
+ *
40
+ * processor.start();
41
+ *
42
+ * // Later: graceful shutdown
43
+ * await processor.stop();
44
+ * ```
45
+ */
46
+ export declare class RunProcessor {
47
+ private running;
48
+ private intervalId;
49
+ private activeRuns;
50
+ private options;
51
+ constructor(options?: RunProcessorOptions);
52
+ /**
53
+ * Starts the processor loop.
54
+ * Call this on server/CLI startup.
55
+ */
56
+ start(): void;
57
+ /**
58
+ * Stops the processor gracefully.
59
+ * Waits for active runs to complete.
60
+ */
61
+ stop(): Promise<void>;
62
+ /**
63
+ * Process a single tick (useful for testing or one-shot processing).
64
+ * Returns the number of runs started and waits for them to complete.
65
+ */
66
+ processOnce(): Promise<number>;
67
+ /**
68
+ * Returns true if the processor is currently running.
69
+ */
70
+ isRunning(): boolean;
71
+ /**
72
+ * Returns the number of currently active runs.
73
+ */
74
+ getActiveRunCount(): number;
75
+ /**
76
+ * Main processing tick - picks up queued runs and executes them.
77
+ * Returns the number of runs started.
78
+ * @param oneShot If true, waits for runs to complete before returning
79
+ */
80
+ private tick;
81
+ /**
82
+ * Atomically claims a run for processing.
83
+ * Returns true if successful, false if already claimed.
84
+ */
85
+ private claimRun;
86
+ /**
87
+ * Executes a single run with the evaluation loop.
88
+ *
89
+ * The loop:
90
+ * 1. Sends conversation to tested agent
91
+ * 2. Evaluates agent response against success/failure criteria
92
+ * 3. If success criteria met or failure criteria met, finish the run
93
+ * 4. If max messages reached, finish the run
94
+ * 5. Otherwise, generate a new persona message and continue
95
+ */
96
+ private executeRun;
97
+ /**
98
+ * Gets the thread ID for LangGraph.
99
+ * Uses the stored threadId if available (set on retry), otherwise uses run.id.
100
+ */
101
+ private getThreadId;
102
+ /**
103
+ * Gets the role of the last non-system message.
104
+ * Used to determine if we need to generate a persona message before invoking the connector.
105
+ */
106
+ private getLastMessageRole;
107
+ /**
108
+ * Executes the main evaluation loop.
109
+ * Both success and failure criteria are evaluated at every turn.
110
+ * The loop stops when:
111
+ * - Success criteria is met (run succeeds)
112
+ * - Failure criteria is met AND failureCriteriaMode is "every_turn" (run fails early)
113
+ * - Max messages reached without success (run fails; default mode "on_max_messages")
114
+ */
115
+ private executeEvaluationLoop;
116
+ /**
117
+ * Builds all messages including system prompt for a run.
118
+ * These messages are stored in the run for visibility in the UI.
119
+ */
120
+ private buildAllMessages;
121
+ /**
122
+ * Recovers runs that were interrupted by server crash.
123
+ * Only recovers runs for this processor's project filter.
124
+ */
125
+ private recoverStuckRuns;
126
+ }
127
+ //# sourceMappingURL=run-processor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-processor.d.ts","sourceRoot":"","sources":["../src/run-processor.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,qBAAqB,EAC3B,MAAM,gBAAgB,CAAC;AAKxB,OAAO,EAIL,KAAK,GAAG,EACR,KAAK,SAAS,EAEf,MAAM,UAAU,CAAC;AAgBlB,YAAY,EAAE,SAAS,EAAE,CAAC;AAE1B,MAAM,WAAW,mBAAmB;IAClC,uDAAuD;IACvD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,qDAAqD;IACrD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,2CAA2C;IAC3C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,cAAc,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,KAAK,IAAI,CAAC;IACtE,iCAAiC;IACjC,UAAU,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,KAAK,IAAI,CAAC;IAChC,oCAAoC;IACpC,aAAa,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,qBAAqB,KAAK,IAAI,CAAC;IAClE,gCAAgC;IAChC,UAAU,CAAC,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;CAC/C;AAYD;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,UAAU,CAA+B;IACjD,OAAO,CAAC,UAAU,CAAoC;IACtD,OAAO,CAAC,OAAO,CAAkB;gBAErB,OAAO,GAAE,mBAAwB;IAY7C;;;OAGG;IACH,KAAK,IAAI,IAAI;IAeb;;;OAGG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAY3B;;;OAGG;IACG,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAIpC;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;OAEG;IACH,iBAAiB,IAAI,MAAM;IAI3B;;;;OAIG;YACW,IAAI;IA+ClB;;;OAGG;IACH,OAAO,CAAC,QAAQ;IAehB;;;;;;;;;OASG;YACW,UAAU;IAmIxB;;;OAGG;IACH,OAAO,CAAC,WAAW;IAInB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAM1B;;;;;;;OAOG;YACW,qBAAqB;IAsLnC;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAuCxB;;;OAGG;IACH,OAAO,CAAC,gBAAgB;CAUzB"}