@orq-ai/evaluatorq 1.0.0-6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +226 -0
  2. package/dist/index.d.ts +5 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +4 -0
  5. package/dist/lib/effects.d.ts +12 -0
  6. package/dist/lib/effects.d.ts.map +1 -0
  7. package/dist/lib/effects.js +89 -0
  8. package/dist/lib/evaluatorq.d.ts +11 -0
  9. package/dist/lib/evaluatorq.d.ts.map +1 -0
  10. package/dist/lib/evaluatorq.js +135 -0
  11. package/dist/lib/progress.d.ts +18 -0
  12. package/dist/lib/progress.d.ts.map +1 -0
  13. package/dist/lib/progress.js +114 -0
  14. package/dist/lib/table-display.d.ts +4 -0
  15. package/dist/lib/table-display.d.ts.map +1 -0
  16. package/dist/lib/table-display.js +261 -0
  17. package/dist/lib/types.d.ts +63 -0
  18. package/dist/lib/types.d.ts.map +1 -0
  19. package/dist/lib/types.js +1 -0
  20. package/dist/lib/visualizer/html-generator.d.ts +4 -0
  21. package/dist/lib/visualizer/html-generator.d.ts.map +1 -0
  22. package/dist/lib/visualizer/html-generator.js +339 -0
  23. package/dist/lib/visualizer/index.d.ts +13 -0
  24. package/dist/lib/visualizer/index.d.ts.map +1 -0
  25. package/dist/lib/visualizer/index.js +49 -0
  26. package/dist/lib/visualizer/types.d.ts +17 -0
  27. package/dist/lib/visualizer/types.d.ts.map +1 -0
  28. package/dist/lib/visualizer/types.js +1 -0
  29. package/dist/tsconfig.lib.tsbuildinfo +1 -0
  30. package/package.json +32 -0
  31. package/src/index.ts +4 -0
  32. package/src/lib/effects.ts +174 -0
  33. package/src/lib/evaluatorq.ts +260 -0
  34. package/src/lib/progress.ts +170 -0
  35. package/src/lib/table-display.ts +352 -0
  36. package/src/lib/types.ts +79 -0
  37. package/src/lib/visualizer/html-generator.ts +364 -0
  38. package/src/lib/visualizer/index.ts +70 -0
  39. package/src/lib/visualizer/types.ts +17 -0
  40. package/tsconfig.json +10 -0
  41. package/tsconfig.lib.json +14 -0
@@ -0,0 +1,174 @@
1
+ import { Effect, pipe } from "effect";
2
+
3
+ import { ProgressService } from "./progress.js";
4
+ import type {
5
+ DataPoint,
6
+ DataPointResult,
7
+ Job,
8
+ JobResult,
9
+ Scorer,
10
+ } from "./types.js";
11
+
12
+ export function processDataPointEffect(
13
+ dataPromise: Promise<DataPoint>,
14
+ rowIndex: number,
15
+ jobs: Job[],
16
+ evaluators: { name: string; scorer: Scorer }[],
17
+ parallelism: number,
18
+ ): Effect.Effect<DataPointResult[], Error, ProgressService> {
19
+ return pipe(
20
+ Effect.tryPromise({
21
+ try: () => dataPromise,
22
+ catch: (error) => error as Error,
23
+ }),
24
+ Effect.flatMap((dataPoint) =>
25
+ Effect.gen(function* (_) {
26
+ const progress = yield* _(ProgressService);
27
+
28
+ // Update progress for this data point
29
+ yield* _(
30
+ progress.updateProgress({
31
+ currentDataPoint: rowIndex + 1,
32
+ phase: "processing",
33
+ }),
34
+ );
35
+
36
+ // Process jobs
37
+ const jobResults = yield* _(
38
+ Effect.forEach(
39
+ jobs,
40
+ (job) => processJobEffect(job, dataPoint, rowIndex, evaluators),
41
+ { concurrency: parallelism },
42
+ ),
43
+ );
44
+
45
+ return [
46
+ {
47
+ dataPoint,
48
+ jobResults,
49
+ },
50
+ ];
51
+ }),
52
+ ),
53
+ Effect.catchAll((error) =>
54
+ Effect.succeed([
55
+ {
56
+ dataPoint: { inputs: {} }, // Placeholder since we couldn't get the actual data
57
+ error,
58
+ },
59
+ ]),
60
+ ),
61
+ );
62
+ }
63
+
64
+ export function processJobEffect(
65
+ job: Job,
66
+ dataPoint: DataPoint,
67
+ rowIndex: number,
68
+ evaluators: { name: string; scorer: Scorer }[],
69
+ ): Effect.Effect<JobResult, Error, ProgressService> {
70
+ return Effect.gen(function* (_) {
71
+ const progress = yield* _(ProgressService);
72
+
73
+ // Update progress with current job
74
+ const jobResult = yield* _(
75
+ pipe(
76
+ Effect.Do,
77
+ Effect.bind("jobName", () =>
78
+ Effect.sync(() => {
79
+ // Try to get job name from a test run or use a placeholder
80
+ return "job";
81
+ }),
82
+ ),
83
+ Effect.tap(({ jobName }) =>
84
+ progress.updateProgress({
85
+ currentJob: jobName,
86
+ phase: "processing",
87
+ }),
88
+ ),
89
+ Effect.bind("result", () =>
90
+ Effect.tryPromise({
91
+ try: () => job(dataPoint, rowIndex),
92
+ catch: (error) => error as Error,
93
+ }),
94
+ ),
95
+ Effect.tap(({ result }) =>
96
+ progress.updateProgress({
97
+ currentJob: result.name,
98
+ }),
99
+ ),
100
+ Effect.map(({ result }) => result),
101
+ ),
102
+ );
103
+
104
+ // Process evaluators if any
105
+ if (evaluators.length > 0) {
106
+ // Update phase to evaluating
107
+ yield* _(progress.updateProgress({ phase: "evaluating" }));
108
+
109
+ const evaluatorScores = yield* _(
110
+ Effect.forEach(
111
+ evaluators,
112
+ (evaluator) =>
113
+ Effect.gen(function* (_) {
114
+ // Update current evaluator
115
+ yield* _(
116
+ progress.updateProgress({
117
+ currentEvaluator: evaluator.name,
118
+ }),
119
+ );
120
+
121
+ const score = yield* _(
122
+ pipe(
123
+ Effect.tryPromise({
124
+ try: async () => {
125
+ const result = await evaluator.scorer({
126
+ data: dataPoint,
127
+ output: jobResult.output,
128
+ });
129
+ return result;
130
+ },
131
+ catch: (error) => error as Error,
132
+ }),
133
+ Effect.map((score) => ({
134
+ evaluatorName: evaluator.name,
135
+ score: score as string | number | boolean,
136
+ })),
137
+ Effect.catchAll((error) =>
138
+ Effect.succeed({
139
+ evaluatorName: evaluator.name,
140
+ score: "" as string,
141
+ error: error as Error,
142
+ }),
143
+ ),
144
+ ),
145
+ );
146
+
147
+ return score;
148
+ }),
149
+ { concurrency: "unbounded" },
150
+ ),
151
+ );
152
+
153
+ return {
154
+ jobName: jobResult.name,
155
+ output: jobResult.output,
156
+ evaluatorScores,
157
+ };
158
+ }
159
+
160
+ return {
161
+ jobName: jobResult.name,
162
+ output: jobResult.output,
163
+ evaluatorScores: [],
164
+ };
165
+ }).pipe(
166
+ Effect.catchAll((error) =>
167
+ Effect.succeed({
168
+ jobName: "Unknown", // We don't know the job name if it threw before returning
169
+ output: null,
170
+ error,
171
+ }),
172
+ ),
173
+ );
174
+ }
@@ -0,0 +1,260 @@
1
+ import { Effect, pipe } from "effect";
2
+
3
+ import type { Orq } from "@orq-ai/node";
4
+
5
+ import { processDataPointEffect } from "./effects.js";
6
+ import {
7
+ ProgressService,
8
+ ProgressServiceLive,
9
+ withProgress,
10
+ } from "./progress.js";
11
+ import { displayResultsTableEffect } from "./table-display.js";
12
+ import type {
13
+ DataPoint,
14
+ EvaluatorParams,
15
+ EvaluatorqResult,
16
+ Job,
17
+ } from "./types.js";
18
+
19
+ async function setupOrqClient(apiKey: string) {
20
+ try {
21
+ const client = await import("@orq-ai/node");
22
+
23
+ return new client.Orq({ apiKey, serverURL: "https://my.staging.orq.ai" });
24
+ } catch (error: unknown) {
25
+ const err = error as Error & { code?: string };
26
+ if (
27
+ err.code === "MODULE_NOT_FOUND" ||
28
+ err.code === "ERR_MODULE_NOT_FOUND" ||
29
+ err.message?.includes("Cannot find module")
30
+ ) {
31
+ throw new Error(
32
+ "The @orq-ai/node package is not installed. To use dataset features, please install it:\n" +
33
+ " npm install @orq-ai/node\n" +
34
+ " # or\n" +
35
+ " yarn add @orq-ai/node\n" +
36
+ " # or\n" +
37
+ " bun add @orq-ai/node",
38
+ );
39
+ }
40
+ throw new Error(`Failed to setup ORQ client: ${err.message || err}`);
41
+ }
42
+ }
43
+
44
+ async function fetchDatasetAsDataPoints(
45
+ orqClient: Orq,
46
+ datasetId: string,
47
+ ): Promise<Promise<DataPoint>[]> {
48
+ try {
49
+ const response = await orqClient.datasets.listDatapoints({ datasetId });
50
+
51
+ return response.data.map((datapoint) =>
52
+ Promise.resolve({
53
+ inputs: datapoint.inputs || {},
54
+ expectedOutput: datapoint.expectedOutput,
55
+ } as DataPoint),
56
+ );
57
+ } catch (error) {
58
+ throw new Error(
59
+ `Failed to fetch dataset ${datasetId}: ${error instanceof Error ? error.message : String(error)}`,
60
+ );
61
+ }
62
+ }
63
+
64
+ /**
65
+ * @param _name - The name of the evaluation run.
66
+ * @param params - The parameters for the evaluation run.
67
+ * @returns The results of the evaluation run.
68
+ */
69
+ export async function evaluatorq(
70
+ _name: string,
71
+ params: EvaluatorParams,
72
+ ): Promise<EvaluatorqResult> {
73
+ const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
74
+
75
+ let orqClient: Orq | undefined;
76
+ const orqApiKey = process.env.ORQ_API_KEY;
77
+
78
+ if (orqApiKey) {
79
+ orqClient = await setupOrqClient(orqApiKey);
80
+ }
81
+
82
+ let dataPromises: Promise<DataPoint>[];
83
+
84
+ // Handle datasetId case
85
+ if ("datasetId" in data) {
86
+ if (!orqApiKey || !orqClient) {
87
+ throw new Error(
88
+ "ORQ_API_KEY environment variable must be set to fetch datapoints from Orq platform.",
89
+ );
90
+ }
91
+ dataPromises = await fetchDatasetAsDataPoints(orqClient, data.datasetId);
92
+ } else {
93
+ dataPromises = data as Promise<DataPoint>[];
94
+ }
95
+
96
+ // Create Effect for processing all data points
97
+ const program = pipe(
98
+ Effect.gen(function* (_) {
99
+ const progress = yield* _(ProgressService);
100
+
101
+ // Initialize progress
102
+ yield* _(
103
+ progress.updateProgress({
104
+ totalDataPoints: dataPromises.length,
105
+ currentDataPoint: 0,
106
+ phase: "initializing",
107
+ }),
108
+ );
109
+
110
+ // Process data points
111
+ const results = yield* _(
112
+ Effect.forEach(
113
+ dataPromises.map((dataPromise, index) => ({ dataPromise, index })),
114
+ ({ dataPromise, index }) =>
115
+ processDataPointEffect(
116
+ dataPromise,
117
+ index,
118
+ jobs,
119
+ evaluators,
120
+ parallelism,
121
+ ),
122
+ { concurrency: parallelism },
123
+ ),
124
+ );
125
+
126
+ return results.flat();
127
+ }),
128
+ // Conditionally add table display
129
+ print
130
+ ? Effect.tap((results) => displayResultsTableEffect(results))
131
+ : Effect.tap(() => Effect.void),
132
+ // Provide the progress service
133
+ Effect.provide(ProgressServiceLive),
134
+ // Wrap with progress tracking
135
+ (effect) => withProgress(effect, print),
136
+ );
137
+
138
+ // Run the Effect and convert back to Promise
139
+ return Effect.runPromise(program);
140
+ }
141
+
142
+ // Create an Effect that runs evaluation and optionally displays results
143
+ export const evaluatorqEffect = (
144
+ _name: string,
145
+ params: EvaluatorParams,
146
+ ): Effect.Effect<EvaluatorqResult, Error, never> => {
147
+ const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
148
+
149
+ // Handle datasetId case
150
+ if ("datasetId" in data) {
151
+ return Effect.gen(function* (_) {
152
+ const apiKey = process.env.ORQ_API_KEY;
153
+ if (!apiKey) {
154
+ return yield* _(
155
+ Effect.fail(
156
+ new Error(
157
+ "ORQ_API_KEY environment variable must be set to fetch datasets from Orq platform.",
158
+ ),
159
+ ),
160
+ );
161
+ }
162
+
163
+ const orqClient = yield* _(
164
+ Effect.tryPromise({
165
+ try: () => setupOrqClient(apiKey),
166
+ catch: (error) =>
167
+ new Error(
168
+ `Failed to setup Orq client: ${error instanceof Error ? error.message : String(error)}`,
169
+ ),
170
+ }),
171
+ );
172
+
173
+ if (!orqClient) {
174
+ return yield* _(Effect.fail(new Error("Failed to setup Orq client")));
175
+ }
176
+
177
+ const dataPromises = yield* _(
178
+ Effect.tryPromise({
179
+ try: () => fetchDatasetAsDataPoints(orqClient, data.datasetId),
180
+ catch: (error) =>
181
+ error instanceof Error
182
+ ? error
183
+ : new Error(`Failed to fetch dataset: ${String(error)}`),
184
+ }),
185
+ );
186
+
187
+ return yield* _(
188
+ runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print),
189
+ );
190
+ });
191
+ }
192
+
193
+ const dataPromises = data as Promise<DataPoint>[];
194
+ return runEvaluationEffect(
195
+ dataPromises,
196
+ evaluators,
197
+ jobs,
198
+ parallelism,
199
+ print,
200
+ );
201
+ };
202
+
203
+ // Extract common evaluation logic
204
+ const runEvaluationEffect = (
205
+ dataPromises: Promise<DataPoint>[],
206
+ evaluators: EvaluatorParams["evaluators"] = [],
207
+ jobs: Job[],
208
+ parallelism: number,
209
+ print: boolean,
210
+ ): Effect.Effect<EvaluatorqResult, Error, never> =>
211
+ pipe(
212
+ Effect.gen(function* (_) {
213
+ const progress = yield* _(ProgressService);
214
+
215
+ // Initialize progress
216
+ yield* _(
217
+ progress.updateProgress({
218
+ totalDataPoints: dataPromises.length,
219
+ currentDataPoint: 0,
220
+ phase: "initializing",
221
+ }),
222
+ );
223
+
224
+ // Process data points
225
+ const results = yield* _(
226
+ Effect.forEach(
227
+ dataPromises.map((dataPromise, index) => ({ dataPromise, index })),
228
+ ({ dataPromise, index }) =>
229
+ processDataPointEffect(
230
+ dataPromise,
231
+ index,
232
+ jobs,
233
+ evaluators,
234
+ parallelism,
235
+ ),
236
+ { concurrency: parallelism },
237
+ ),
238
+ );
239
+
240
+ return results.flat();
241
+ }),
242
+ // Conditionally add table display
243
+ print
244
+ ? Effect.tap((results) => displayResultsTableEffect(results))
245
+ : Effect.tap(() => Effect.void),
246
+ // Provide the progress service
247
+ Effect.provide(ProgressServiceLive),
248
+ // Wrap with progress tracking
249
+ (effect) => withProgress(effect, print),
250
+ );
251
+
252
+ // Composable evaluatorq with display
253
+ export const evaluatorqWithTableEffect = (
254
+ name: string,
255
+ params: EvaluatorParams,
256
+ ): Effect.Effect<EvaluatorqResult, Error, never> =>
257
+ pipe(
258
+ evaluatorqEffect(name, params),
259
+ Effect.tap((results) => displayResultsTableEffect(results)),
260
+ );
@@ -0,0 +1,170 @@
1
+ import chalk from "chalk";
2
+ import { Context, Effect, Layer } from "effect";
3
+ import ora, { type Ora } from "ora";
4
+
5
+ // Progress state types
6
+ export interface ProgressState {
7
+ totalDataPoints: number;
8
+ currentDataPoint: number;
9
+ currentJob?: string;
10
+ currentEvaluator?: string;
11
+ phase: "initializing" | "processing" | "evaluating" | "completed";
12
+ }
13
+
14
+ // Progress service interface
15
+ export interface ProgressService {
16
+ readonly updateProgress: (
17
+ update: Partial<ProgressState>,
18
+ ) => Effect.Effect<void>;
19
+ readonly startSpinner: () => Effect.Effect<void>;
20
+ readonly stopSpinner: () => Effect.Effect<void>;
21
+ readonly showMessage: (message: string) => Effect.Effect<void>;
22
+ }
23
+
24
+ // Context tag for the progress service
25
+ export const ProgressService =
26
+ Context.GenericTag<ProgressService>("ProgressService");
27
+
28
+ // Spinner instance
29
+ let spinner: Ora | null = null;
30
+
31
+ // Create the progress service implementation
32
+ const makeProgressService = (): ProgressService => {
33
+ let state: ProgressState = {
34
+ totalDataPoints: 0,
35
+ currentDataPoint: 0,
36
+ phase: "initializing",
37
+ };
38
+
39
+ const formatProgressText = (): string => {
40
+ const percentage =
41
+ state.totalDataPoints > 0
42
+ ? Math.round((state.currentDataPoint / state.totalDataPoints) * 100)
43
+ : 0;
44
+
45
+ let text = "";
46
+
47
+ switch (state.phase) {
48
+ case "initializing":
49
+ text = chalk.cyan("Initializing evaluation...");
50
+ break;
51
+ case "processing":
52
+ text = chalk.cyan(
53
+ `Processing data point ${state.currentDataPoint}/${state.totalDataPoints} (${percentage}%)`,
54
+ );
55
+ if (state.currentJob) {
56
+ text += chalk.gray(
57
+ ` - Running job: ${chalk.white(state.currentJob)}`,
58
+ );
59
+ }
60
+ break;
61
+ case "evaluating":
62
+ text = chalk.cyan(
63
+ `Evaluating results ${state.currentDataPoint}/${state.totalDataPoints} (${percentage}%)`,
64
+ );
65
+ if (state.currentEvaluator) {
66
+ text += chalk.gray(
67
+ ` - Running evaluator: ${chalk.white(state.currentEvaluator)}`,
68
+ );
69
+ }
70
+ break;
71
+ case "completed":
72
+ text = chalk.green("✓ Evaluation completed");
73
+ break;
74
+ }
75
+
76
+ return text;
77
+ };
78
+
79
+ return {
80
+ updateProgress: (update) =>
81
+ Effect.sync(() => {
82
+ state = { ...state, ...update };
83
+ if (spinner) {
84
+ spinner.text = formatProgressText();
85
+ }
86
+ }),
87
+
88
+ startSpinner: () =>
89
+ Effect.sync(() => {
90
+ if (!spinner) {
91
+ // Reserve space first by printing empty lines
92
+ process.stdout.write("\n\n\n");
93
+ // Move cursor back up to where we want the spinner
94
+ process.stdout.write("\x1b[3A");
95
+
96
+ spinner = ora({
97
+ text: formatProgressText(),
98
+ spinner: "dots",
99
+ color: "cyan",
100
+ });
101
+ spinner.start();
102
+ }
103
+ }),
104
+
105
+ stopSpinner: () =>
106
+ Effect.sync(() => {
107
+ if (spinner) {
108
+ if (state.phase === "completed") {
109
+ spinner.succeed(chalk.green("✓ Evaluation completed successfully"));
110
+ // Just one newline since table display adds its own
111
+ process.stdout.write("\n");
112
+ } else {
113
+ spinner.stop();
114
+ // Just one newline since table display adds its own
115
+ process.stdout.write("\n");
116
+ }
117
+ spinner = null;
118
+ }
119
+ }),
120
+
121
+ showMessage: (message) =>
122
+ Effect.sync(() => {
123
+ if (spinner) {
124
+ spinner.info(message);
125
+ } else {
126
+ console.log(message);
127
+ }
128
+ }),
129
+ };
130
+ };
131
+
132
+ // Create a layer for the progress service
133
+ export const ProgressServiceLive = Layer.succeed(
134
+ ProgressService,
135
+ makeProgressService(),
136
+ );
137
+
138
+ // Helper function to run with progress tracking
139
+ export const withProgress = <R, E, A>(
140
+ effect: Effect.Effect<A, E, R>,
141
+ showProgress: boolean = true,
142
+ ): Effect.Effect<A, E, R> => {
143
+ if (!showProgress) {
144
+ return effect;
145
+ }
146
+
147
+ return Effect.gen(function* (_) {
148
+ const progress = yield* _(ProgressService);
149
+
150
+ // Start spinner
151
+ yield* _(progress.startSpinner());
152
+
153
+ try {
154
+ // Run the effect
155
+ const result = yield* _(effect);
156
+
157
+ // Update to completed state
158
+ yield* _(progress.updateProgress({ phase: "completed" }));
159
+
160
+ // Stop spinner with success
161
+ yield* _(progress.stopSpinner());
162
+
163
+ return result;
164
+ } catch (error) {
165
+ // Stop spinner on error
166
+ yield* _(progress.stopSpinner());
167
+ throw error;
168
+ }
169
+ }).pipe(Effect.provide(ProgressServiceLive));
170
+ };