@orq-ai/evaluatorq 1.0.0-10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +239 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/lib/effects.d.ts +12 -0
- package/dist/lib/effects.d.ts.map +1 -0
- package/dist/lib/effects.js +89 -0
- package/dist/lib/evaluatorq.d.ts +11 -0
- package/dist/lib/evaluatorq.d.ts.map +1 -0
- package/dist/lib/evaluatorq.js +139 -0
- package/dist/lib/progress.d.ts +18 -0
- package/dist/lib/progress.d.ts.map +1 -0
- package/dist/lib/progress.js +114 -0
- package/dist/lib/table-display.d.ts +4 -0
- package/dist/lib/table-display.d.ts.map +1 -0
- package/dist/lib/table-display.js +261 -0
- package/dist/lib/types.d.ts +64 -0
- package/dist/lib/types.d.ts.map +1 -0
- package/dist/lib/types.js +1 -0
- package/dist/lib/visualizer/html-generator.d.ts +4 -0
- package/dist/lib/visualizer/html-generator.d.ts.map +1 -0
- package/dist/lib/visualizer/html-generator.js +339 -0
- package/dist/lib/visualizer/index.d.ts +13 -0
- package/dist/lib/visualizer/index.d.ts.map +1 -0
- package/dist/lib/visualizer/index.js +49 -0
- package/dist/lib/visualizer/types.d.ts +17 -0
- package/dist/lib/visualizer/types.d.ts.map +1 -0
- package/dist/lib/visualizer/types.js +1 -0
- package/dist/tsconfig.lib.tsbuildinfo +1 -0
- package/package.json +61 -0
package/README.md
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# @orq-ai/evaluatorq
|
|
2
|
+
|
|
3
|
+
An evaluation framework library that provides a flexible way to run parallel evaluations and optionally integrate with the Orq AI platform.
|
|
4
|
+
|
|
5
|
+
## 🎯 Features
|
|
6
|
+
|
|
7
|
+
- **Parallel Execution**: Run multiple evaluation jobs concurrently with progress tracking
|
|
8
|
+
- **Flexible Data Sources**: Support for inline data, promises, and Orq platform datasets
|
|
9
|
+
- **Type-safe**: Fully written in TypeScript
|
|
10
|
+
- **Orq Platform Integration**: Seamlessly fetch and evaluate datasets from Orq AI (optional)
|
|
11
|
+
|
|
12
|
+
## 📥 Installation
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
npm install @orq-ai/evaluatorq
|
|
16
|
+
# or
|
|
17
|
+
yarn add @orq-ai/evaluatorq
|
|
18
|
+
# or
|
|
19
|
+
bun add @orq-ai/evaluatorq
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Peer Dependencies
|
|
23
|
+
|
|
24
|
+
If you want to use the Orq platform integration:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npm install @orq-ai/node
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## 🚀 Quick Start
|
|
31
|
+
|
|
32
|
+
### Basic Usage
|
|
33
|
+
|
|
34
|
+
```typescript
|
|
35
|
+
import { evaluatorq } from "@orq-ai/evaluatorq";
|
|
36
|
+
|
|
37
|
+
await evaluatorq("text-analysis", {
|
|
38
|
+
data: [
|
|
39
|
+
{ inputs: { text: "Hello world" } },
|
|
40
|
+
{ inputs: { text: "Testing evaluation" } },
|
|
41
|
+
],
|
|
42
|
+
jobs: [
|
|
43
|
+
async (data) => {
|
|
44
|
+
const text = data.inputs.text;
|
|
45
|
+
const analysis = {
|
|
46
|
+
length: text.length,
|
|
47
|
+
wordCount: text.split(" ").length,
|
|
48
|
+
uppercase: text.toUpperCase(),
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
name: "text-analyzer",
|
|
53
|
+
output: analysis,
|
|
54
|
+
};
|
|
55
|
+
},
|
|
56
|
+
],
|
|
57
|
+
evaluators: [
|
|
58
|
+
{
|
|
59
|
+
name: "length-check",
|
|
60
|
+
scorer: async ({ output }) => {
|
|
61
|
+
return output.length > 10 ? 1 : 0;
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Using Orq Platform Datasets
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
import { evaluatorq } from "@orq-ai/evaluatorq";
|
|
72
|
+
|
|
73
|
+
// Requires ORQ_API_KEY environment variable
|
|
74
|
+
await evaluatorq("dataset-evaluation", {
|
|
75
|
+
data: {
|
|
76
|
+
datasetId: "your-dataset-id", // From Orq platform
|
|
77
|
+
},
|
|
78
|
+
jobs: [
|
|
79
|
+
async (data) => {
|
|
80
|
+
// Process each data point from the dataset
|
|
81
|
+
return {
|
|
82
|
+
name: "processor",
|
|
83
|
+
output: processData(data),
|
|
84
|
+
};
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
evaluators: [
|
|
88
|
+
{
|
|
89
|
+
name: "accuracy",
|
|
90
|
+
scorer: async ({ data, output }) => {
|
|
91
|
+
// Compare output with expected results
|
|
92
|
+
return calculateScore(output, data.expectedOutput);
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
],
|
|
96
|
+
});
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Advanced Features
|
|
100
|
+
|
|
101
|
+
#### Multiple Jobs
|
|
102
|
+
|
|
103
|
+
Run multiple jobs in parallel for each data point:
|
|
104
|
+
|
|
105
|
+
```typescript
|
|
106
|
+
await evaluatorq("multi-job-eval", {
|
|
107
|
+
data: [...],
|
|
108
|
+
jobs: [
|
|
109
|
+
async (data) => ({
|
|
110
|
+
name: "preprocessor",
|
|
111
|
+
output: preprocess(data),
|
|
112
|
+
}),
|
|
113
|
+
async (data) => ({
|
|
114
|
+
name: "analyzer",
|
|
115
|
+
output: analyze(data),
|
|
116
|
+
}),
|
|
117
|
+
async (data) => ({
|
|
118
|
+
name: "transformer",
|
|
119
|
+
output: transform(data),
|
|
120
|
+
}),
|
|
121
|
+
],
|
|
122
|
+
evaluators: [...],
|
|
123
|
+
});
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
#### Custom Error Handling
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
await evaluatorq("error-handling", {
|
|
130
|
+
data: [...],
|
|
131
|
+
jobs: [
|
|
132
|
+
async (data) => {
|
|
133
|
+
try {
|
|
134
|
+
const result = await riskyOperation(data);
|
|
135
|
+
return { name: "risky-job", output: result };
|
|
136
|
+
} catch (error) {
|
|
137
|
+
// Errors are captured and included in the evaluation results
|
|
138
|
+
throw new Error(`Failed to process: ${error.message}`);
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
],
|
|
142
|
+
evaluators: [...],
|
|
143
|
+
});
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
#### Async Data Sources
|
|
147
|
+
|
|
148
|
+
```typescript
|
|
149
|
+
// Create an array of promises for async data
|
|
150
|
+
const dataPromises = Array.from({ length: 1000 }, (_, i) =>
|
|
151
|
+
Promise.resolve({ inputs: { value: i } })
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
await evaluatorq("async-eval", {
|
|
155
|
+
data: dataPromises,
|
|
156
|
+
jobs: [...],
|
|
157
|
+
evaluators: [...],
|
|
158
|
+
});
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## 🔧 Configuration
|
|
162
|
+
|
|
163
|
+
### Environment Variables
|
|
164
|
+
|
|
165
|
+
- `ORQ_API_KEY`: API key for Orq platform integration (required for dataset access)
|
|
166
|
+
|
|
167
|
+
## 📚 API Reference
|
|
168
|
+
|
|
169
|
+
### `evaluatorq(name, options)`
|
|
170
|
+
|
|
171
|
+
Main function to run evaluations.
|
|
172
|
+
|
|
173
|
+
#### Parameters:
|
|
174
|
+
|
|
175
|
+
- `name`: String identifier for the evaluation run
|
|
176
|
+
- `options`: Configuration object with:
|
|
177
|
+
- `data`: Array of data points, async iterable, or Orq dataset config
|
|
178
|
+
- `jobs`: Array of job functions to run on each data point
|
|
179
|
+
- `evaluators`: Array of evaluator configurations
|
|
180
|
+
|
|
181
|
+
#### Returns:
|
|
182
|
+
|
|
183
|
+
Promise that resolves when evaluation is complete.
|
|
184
|
+
|
|
185
|
+
### Types
|
|
186
|
+
|
|
187
|
+
```typescript
|
|
188
|
+
type Output = string | number | boolean | Record<string, unknown> | null;
|
|
189
|
+
|
|
190
|
+
interface DataPoint {
|
|
191
|
+
inputs: Record<string, unknown>;
|
|
192
|
+
expectedOutput?: Output;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
interface JobResult {
|
|
196
|
+
jobName: string;
|
|
197
|
+
output: Output;
|
|
198
|
+
error?: Error;
|
|
199
|
+
evaluatorScores?: EvaluatorScore[];
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
interface EvaluatorScore {
|
|
203
|
+
evaluatorName: string;
|
|
204
|
+
score: number | boolean | string;
|
|
205
|
+
error?: Error;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
type Job = (
|
|
209
|
+
data: DataPoint,
|
|
210
|
+
row: number,
|
|
211
|
+
) => Promise<{
|
|
212
|
+
name: string;
|
|
213
|
+
output: Output;
|
|
214
|
+
}>;
|
|
215
|
+
|
|
216
|
+
type ScorerParameter = {
|
|
217
|
+
data: DataPoint;
|
|
218
|
+
output: Output;
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
type Scorer =
|
|
222
|
+
| ((params: ScorerParameter) => Promise<string>)
|
|
223
|
+
| ((params: ScorerParameter) => Promise<number>)
|
|
224
|
+
| ((params: ScorerParameter) => Promise<boolean>);
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## 🛠️ Development
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Build the package
|
|
231
|
+
bunx nx build evaluatorq
|
|
232
|
+
|
|
233
|
+
# Run type checking
|
|
234
|
+
bunx nx typecheck evaluatorq
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## 📄 License
|
|
238
|
+
|
|
239
|
+
This is free and unencumbered software released into the public domain. See [UNLICENSE](https://unlicense.org) for details.
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AACnE,cAAc,gBAAgB,CAAC;AAC/B,cAAc,2BAA2B,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Effect } from "effect";
|
|
2
|
+
import { ProgressService } from "./progress.js";
|
|
3
|
+
import type { DataPoint, DataPointResult, Job, JobResult, Scorer } from "./types.js";
|
|
4
|
+
export declare function processDataPointEffect(dataPromise: Promise<DataPoint>, rowIndex: number, jobs: Job[], evaluators: {
|
|
5
|
+
name: string;
|
|
6
|
+
scorer: Scorer;
|
|
7
|
+
}[], parallelism: number): Effect.Effect<DataPointResult[], Error, ProgressService>;
|
|
8
|
+
export declare function processJobEffect(job: Job, dataPoint: DataPoint, rowIndex: number, evaluators: {
|
|
9
|
+
name: string;
|
|
10
|
+
scorer: Scorer;
|
|
11
|
+
}[]): Effect.Effect<JobResult, Error, ProgressService>;
|
|
12
|
+
//# sourceMappingURL=effects.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"effects.d.ts","sourceRoot":"","sources":["../../src/lib/effects.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAEtC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,GAAG,EACH,SAAS,EACT,MAAM,EACP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,sBAAsB,CACpC,WAAW,EAAE,OAAO,CAAC,SAAS,CAAC,EAC/B,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,GAAG,EAAE,EACX,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,EAC9C,WAAW,EAAE,MAAM,GAClB,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CA4C1D;AAED,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,SAAS,EACpB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,eAAe,CAAC,CAyGlD"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { Effect, pipe } from "effect";
|
|
2
|
+
import { ProgressService } from "./progress.js";
|
|
3
|
+
export function processDataPointEffect(dataPromise, rowIndex, jobs, evaluators, parallelism) {
|
|
4
|
+
return pipe(Effect.tryPromise({
|
|
5
|
+
try: () => dataPromise,
|
|
6
|
+
catch: (error) => error,
|
|
7
|
+
}), Effect.flatMap((dataPoint) => Effect.gen(function* (_) {
|
|
8
|
+
const progress = yield* _(ProgressService);
|
|
9
|
+
// Update progress for this data point
|
|
10
|
+
yield* _(progress.updateProgress({
|
|
11
|
+
currentDataPoint: rowIndex + 1,
|
|
12
|
+
phase: "processing",
|
|
13
|
+
}));
|
|
14
|
+
// Process jobs
|
|
15
|
+
const jobResults = yield* _(Effect.forEach(jobs, (job) => processJobEffect(job, dataPoint, rowIndex, evaluators), { concurrency: parallelism }));
|
|
16
|
+
return [
|
|
17
|
+
{
|
|
18
|
+
dataPoint,
|
|
19
|
+
jobResults,
|
|
20
|
+
},
|
|
21
|
+
];
|
|
22
|
+
})), Effect.catchAll((error) => Effect.succeed([
|
|
23
|
+
{
|
|
24
|
+
dataPoint: { inputs: {} }, // Placeholder since we couldn't get the actual data
|
|
25
|
+
error,
|
|
26
|
+
},
|
|
27
|
+
])));
|
|
28
|
+
}
|
|
29
|
+
export function processJobEffect(job, dataPoint, rowIndex, evaluators) {
|
|
30
|
+
return Effect.gen(function* (_) {
|
|
31
|
+
const progress = yield* _(ProgressService);
|
|
32
|
+
// Update progress with current job
|
|
33
|
+
const jobResult = yield* _(pipe(Effect.Do, Effect.bind("jobName", () => Effect.sync(() => {
|
|
34
|
+
// Try to get job name from a test run or use a placeholder
|
|
35
|
+
return "job";
|
|
36
|
+
})), Effect.tap(({ jobName }) => progress.updateProgress({
|
|
37
|
+
currentJob: jobName,
|
|
38
|
+
phase: "processing",
|
|
39
|
+
})), Effect.bind("result", () => Effect.tryPromise({
|
|
40
|
+
try: () => job(dataPoint, rowIndex),
|
|
41
|
+
catch: (error) => error,
|
|
42
|
+
})), Effect.tap(({ result }) => progress.updateProgress({
|
|
43
|
+
currentJob: result.name,
|
|
44
|
+
})), Effect.map(({ result }) => result)));
|
|
45
|
+
// Process evaluators if any
|
|
46
|
+
if (evaluators.length > 0) {
|
|
47
|
+
// Update phase to evaluating
|
|
48
|
+
yield* _(progress.updateProgress({ phase: "evaluating" }));
|
|
49
|
+
const evaluatorScores = yield* _(Effect.forEach(evaluators, (evaluator) => Effect.gen(function* (_) {
|
|
50
|
+
// Update current evaluator
|
|
51
|
+
yield* _(progress.updateProgress({
|
|
52
|
+
currentEvaluator: evaluator.name,
|
|
53
|
+
}));
|
|
54
|
+
const score = yield* _(pipe(Effect.tryPromise({
|
|
55
|
+
try: async () => {
|
|
56
|
+
const result = await evaluator.scorer({
|
|
57
|
+
data: dataPoint,
|
|
58
|
+
output: jobResult.output,
|
|
59
|
+
});
|
|
60
|
+
return result;
|
|
61
|
+
},
|
|
62
|
+
catch: (error) => error,
|
|
63
|
+
}), Effect.map((score) => ({
|
|
64
|
+
evaluatorName: evaluator.name,
|
|
65
|
+
score: score,
|
|
66
|
+
})), Effect.catchAll((error) => Effect.succeed({
|
|
67
|
+
evaluatorName: evaluator.name,
|
|
68
|
+
score: "",
|
|
69
|
+
error: error,
|
|
70
|
+
}))));
|
|
71
|
+
return score;
|
|
72
|
+
}), { concurrency: "unbounded" }));
|
|
73
|
+
return {
|
|
74
|
+
jobName: jobResult.name,
|
|
75
|
+
output: jobResult.output,
|
|
76
|
+
evaluatorScores,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
jobName: jobResult.name,
|
|
81
|
+
output: jobResult.output,
|
|
82
|
+
evaluatorScores: [],
|
|
83
|
+
};
|
|
84
|
+
}).pipe(Effect.catchAll((error) => Effect.succeed({
|
|
85
|
+
jobName: "Unknown", // We don't know the job name if it threw before returning
|
|
86
|
+
output: null,
|
|
87
|
+
error,
|
|
88
|
+
})));
|
|
89
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Effect } from "effect";
|
|
2
|
+
import type { EvaluatorParams, EvaluatorqResult } from "./types.js";
|
|
3
|
+
/**
|
|
4
|
+
* @param _name - The name of the evaluation run.
|
|
5
|
+
* @param params - The parameters for the evaluation run.
|
|
6
|
+
* @returns The results of the evaluation run.
|
|
7
|
+
*/
|
|
8
|
+
export declare function evaluatorq(_name: string, params: EvaluatorParams): Promise<EvaluatorqResult>;
|
|
9
|
+
export declare const evaluatorqEffect: (_name: string, params: EvaluatorParams) => Effect.Effect<EvaluatorqResult, Error, never>;
|
|
10
|
+
export declare const evaluatorqWithTableEffect: (name: string, params: EvaluatorParams) => Effect.Effect<EvaluatorqResult, Error, never>;
|
|
11
|
+
//# sourceMappingURL=evaluatorq.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluatorq.d.ts","sourceRoot":"","sources":["../../src/lib/evaluatorq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAWtC,OAAO,KAAK,EAEV,eAAe,EACf,gBAAgB,EAEjB,MAAM,YAAY,CAAC;AA+CpB;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,gBAAgB,CAAC,CAsE3B;AAGD,eAAO,MAAM,gBAAgB,GAC3B,OAAO,MAAM,EACb,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAuD9C,CAAC;AAsDF,eAAO,MAAM,yBAAyB,GACpC,MAAM,MAAM,EACZ,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAI5C,CAAC"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { Effect, pipe } from "effect";
|
|
2
|
+
import { processDataPointEffect } from "./effects.js";
|
|
3
|
+
import { ProgressService, ProgressServiceLive, withProgress, } from "./progress.js";
|
|
4
|
+
import { displayResultsTableEffect } from "./table-display.js";
|
|
5
|
+
async function setupOrqClient(apiKey) {
|
|
6
|
+
try {
|
|
7
|
+
const client = await import("@orq-ai/node");
|
|
8
|
+
return new client.Orq({ apiKey });
|
|
9
|
+
}
|
|
10
|
+
catch (error) {
|
|
11
|
+
const err = error;
|
|
12
|
+
if (err.code === "MODULE_NOT_FOUND" ||
|
|
13
|
+
err.code === "ERR_MODULE_NOT_FOUND" ||
|
|
14
|
+
err.message?.includes("Cannot find module")) {
|
|
15
|
+
throw new Error("The @orq-ai/node package is not installed. To use dataset features, please install it:\n" +
|
|
16
|
+
" npm install @orq-ai/node\n" +
|
|
17
|
+
" # or\n" +
|
|
18
|
+
" yarn add @orq-ai/node\n" +
|
|
19
|
+
" # or\n" +
|
|
20
|
+
" bun add @orq-ai/node");
|
|
21
|
+
}
|
|
22
|
+
throw new Error(`Failed to setup ORQ client: ${err.message || err}`);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
async function fetchDatasetAsDataPoints(orqClient, datasetId) {
|
|
26
|
+
try {
|
|
27
|
+
const response = await orqClient.datasets.listDatapoints({ datasetId });
|
|
28
|
+
return response.data.map((datapoint) => Promise.resolve({
|
|
29
|
+
inputs: datapoint.inputs || {},
|
|
30
|
+
expectedOutput: datapoint.expectedOutput,
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
throw new Error(`Failed to fetch dataset ${datasetId}: ${error instanceof Error ? error.message : String(error)}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* @param _name - The name of the evaluation run.
|
|
39
|
+
* @param params - The parameters for the evaluation run.
|
|
40
|
+
* @returns The results of the evaluation run.
|
|
41
|
+
*/
|
|
42
|
+
export async function evaluatorq(_name, params) {
|
|
43
|
+
const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
|
|
44
|
+
let orqClient;
|
|
45
|
+
const orqApiKey = process.env.ORQ_API_KEY;
|
|
46
|
+
if (orqApiKey) {
|
|
47
|
+
orqClient = await setupOrqClient(orqApiKey);
|
|
48
|
+
}
|
|
49
|
+
let dataPromises;
|
|
50
|
+
// Handle datasetId case
|
|
51
|
+
if ("datasetId" in data) {
|
|
52
|
+
if (!orqApiKey || !orqClient) {
|
|
53
|
+
throw new Error("ORQ_API_KEY environment variable must be set to fetch datapoints from Orq platform.");
|
|
54
|
+
}
|
|
55
|
+
dataPromises = await fetchDatasetAsDataPoints(orqClient, data.datasetId);
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
dataPromises = data;
|
|
59
|
+
}
|
|
60
|
+
// Create Effect for processing all data points
|
|
61
|
+
const program = pipe(Effect.gen(function* (_) {
|
|
62
|
+
const progress = yield* _(ProgressService);
|
|
63
|
+
// Initialize progress
|
|
64
|
+
yield* _(progress.updateProgress({
|
|
65
|
+
totalDataPoints: dataPromises.length,
|
|
66
|
+
currentDataPoint: 0,
|
|
67
|
+
phase: "initializing",
|
|
68
|
+
}));
|
|
69
|
+
// Process data points
|
|
70
|
+
const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise instanceof Promise
|
|
71
|
+
? dataPromise
|
|
72
|
+
: Promise.resolve(dataPromise), index, jobs, evaluators, parallelism), { concurrency: parallelism }));
|
|
73
|
+
return results.flat();
|
|
74
|
+
}),
|
|
75
|
+
// Conditionally add table display
|
|
76
|
+
print
|
|
77
|
+
? Effect.tap((results) => displayResultsTableEffect(results))
|
|
78
|
+
: Effect.tap(() => Effect.void),
|
|
79
|
+
// Provide the progress service
|
|
80
|
+
Effect.provide(ProgressServiceLive),
|
|
81
|
+
// Wrap with progress tracking
|
|
82
|
+
(effect) => withProgress(effect, print));
|
|
83
|
+
// Run the Effect and convert back to Promise
|
|
84
|
+
return Effect.runPromise(program);
|
|
85
|
+
}
|
|
86
|
+
// Create an Effect that runs evaluation and optionally displays results
|
|
87
|
+
export const evaluatorqEffect = (_name, params) => {
|
|
88
|
+
const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
|
|
89
|
+
// Handle datasetId case
|
|
90
|
+
if ("datasetId" in data) {
|
|
91
|
+
return Effect.gen(function* (_) {
|
|
92
|
+
const apiKey = process.env.ORQ_API_KEY;
|
|
93
|
+
if (!apiKey) {
|
|
94
|
+
return yield* _(Effect.fail(new Error("ORQ_API_KEY environment variable must be set to fetch datasets from Orq platform.")));
|
|
95
|
+
}
|
|
96
|
+
const orqClient = yield* _(Effect.tryPromise({
|
|
97
|
+
try: () => setupOrqClient(apiKey),
|
|
98
|
+
catch: (error) => new Error(`Failed to setup Orq client: ${error instanceof Error ? error.message : String(error)}`),
|
|
99
|
+
}));
|
|
100
|
+
if (!orqClient) {
|
|
101
|
+
return yield* _(Effect.fail(new Error("Failed to setup Orq client")));
|
|
102
|
+
}
|
|
103
|
+
const dataPromises = yield* _(Effect.tryPromise({
|
|
104
|
+
try: () => fetchDatasetAsDataPoints(orqClient, data.datasetId),
|
|
105
|
+
catch: (error) => error instanceof Error
|
|
106
|
+
? error
|
|
107
|
+
: new Error(`Failed to fetch dataset: ${String(error)}`),
|
|
108
|
+
}));
|
|
109
|
+
return yield* _(runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print));
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
const dataPromises = data;
|
|
113
|
+
return runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print);
|
|
114
|
+
};
|
|
115
|
+
// Extract common evaluation logic
|
|
116
|
+
const runEvaluationEffect = (dataPromises, evaluators = [], jobs, parallelism, print) => pipe(Effect.gen(function* (_) {
|
|
117
|
+
const progress = yield* _(ProgressService);
|
|
118
|
+
// Initialize progress
|
|
119
|
+
yield* _(progress.updateProgress({
|
|
120
|
+
totalDataPoints: dataPromises.length,
|
|
121
|
+
currentDataPoint: 0,
|
|
122
|
+
phase: "initializing",
|
|
123
|
+
}));
|
|
124
|
+
// Process data points
|
|
125
|
+
const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise instanceof Promise
|
|
126
|
+
? dataPromise
|
|
127
|
+
: Promise.resolve(dataPromise), index, jobs, evaluators, parallelism), { concurrency: parallelism }));
|
|
128
|
+
return results.flat();
|
|
129
|
+
}),
|
|
130
|
+
// Conditionally add table display
|
|
131
|
+
print
|
|
132
|
+
? Effect.tap((results) => displayResultsTableEffect(results))
|
|
133
|
+
: Effect.tap(() => Effect.void),
|
|
134
|
+
// Provide the progress service
|
|
135
|
+
Effect.provide(ProgressServiceLive),
|
|
136
|
+
// Wrap with progress tracking
|
|
137
|
+
(effect) => withProgress(effect, print));
|
|
138
|
+
// Composable evaluatorq with display
|
|
139
|
+
export const evaluatorqWithTableEffect = (name, params) => pipe(evaluatorqEffect(name, params), Effect.tap((results) => displayResultsTableEffect(results)));
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { Context, Effect, Layer } from "effect";
|
|
2
|
+
export interface ProgressState {
|
|
3
|
+
totalDataPoints: number;
|
|
4
|
+
currentDataPoint: number;
|
|
5
|
+
currentJob?: string;
|
|
6
|
+
currentEvaluator?: string;
|
|
7
|
+
phase: "initializing" | "processing" | "evaluating" | "completed";
|
|
8
|
+
}
|
|
9
|
+
export interface ProgressService {
|
|
10
|
+
readonly updateProgress: (update: Partial<ProgressState>) => Effect.Effect<void>;
|
|
11
|
+
readonly startSpinner: () => Effect.Effect<void>;
|
|
12
|
+
readonly stopSpinner: () => Effect.Effect<void>;
|
|
13
|
+
readonly showMessage: (message: string) => Effect.Effect<void>;
|
|
14
|
+
}
|
|
15
|
+
export declare const ProgressService: Context.Tag<ProgressService, ProgressService>;
|
|
16
|
+
export declare const ProgressServiceLive: Layer.Layer<ProgressService, never, never>;
|
|
17
|
+
export declare const withProgress: <R, E, A>(effect: Effect.Effect<A, E, R>, showProgress?: boolean) => Effect.Effect<A, E, R>;
|
|
18
|
+
//# sourceMappingURL=progress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"progress.d.ts","sourceRoot":"","sources":["../../src/lib/progress.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,QAAQ,CAAC;AAIhD,MAAM,WAAW,aAAa;IAC5B,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,EAAE,cAAc,GAAG,YAAY,GAAG,YAAY,GAAG,WAAW,CAAC;CACnE;AAGD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,cAAc,EAAE,CACvB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,KAC3B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,YAAY,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACjD,QAAQ,CAAC,WAAW,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAChD,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;CAChE;AAGD,eAAO,MAAM,eAAe,+CAC4B,CAAC;AA2GzD,eAAO,MAAM,mBAAmB,4CAG/B,CAAC;AAGF,eAAO,MAAM,YAAY,GAAI,CAAC,EAAE,CAAC,EAAE,CAAC,EAClC,QAAQ,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,EAC9B,eAAc,OAAc,KAC3B,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CA4BvB,CAAC"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import { Context, Effect, Layer } from "effect";
|
|
3
|
+
import ora from "ora";
|
|
4
|
+
// Context tag for the progress service
|
|
5
|
+
export const ProgressService = Context.GenericTag("ProgressService");
|
|
6
|
+
// Spinner instance
|
|
7
|
+
let spinner = null;
|
|
8
|
+
// Create the progress service implementation
|
|
9
|
+
const makeProgressService = () => {
|
|
10
|
+
let state = {
|
|
11
|
+
totalDataPoints: 0,
|
|
12
|
+
currentDataPoint: 0,
|
|
13
|
+
phase: "initializing",
|
|
14
|
+
};
|
|
15
|
+
const formatProgressText = () => {
|
|
16
|
+
const percentage = state.totalDataPoints > 0
|
|
17
|
+
? Math.round((state.currentDataPoint / state.totalDataPoints) * 100)
|
|
18
|
+
: 0;
|
|
19
|
+
let text = "";
|
|
20
|
+
switch (state.phase) {
|
|
21
|
+
case "initializing":
|
|
22
|
+
text = chalk.cyan("Initializing evaluation...");
|
|
23
|
+
break;
|
|
24
|
+
case "processing":
|
|
25
|
+
text = chalk.cyan(`Processing data point ${state.currentDataPoint}/${state.totalDataPoints} (${percentage}%)`);
|
|
26
|
+
if (state.currentJob) {
|
|
27
|
+
text += chalk.gray(` - Running job: ${chalk.white(state.currentJob)}`);
|
|
28
|
+
}
|
|
29
|
+
break;
|
|
30
|
+
case "evaluating":
|
|
31
|
+
text = chalk.cyan(`Evaluating results ${state.currentDataPoint}/${state.totalDataPoints} (${percentage}%)`);
|
|
32
|
+
if (state.currentEvaluator) {
|
|
33
|
+
text += chalk.gray(` - Running evaluator: ${chalk.white(state.currentEvaluator)}`);
|
|
34
|
+
}
|
|
35
|
+
break;
|
|
36
|
+
case "completed":
|
|
37
|
+
text = chalk.green("✓ Evaluation completed");
|
|
38
|
+
break;
|
|
39
|
+
}
|
|
40
|
+
return text;
|
|
41
|
+
};
|
|
42
|
+
return {
|
|
43
|
+
updateProgress: (update) => Effect.sync(() => {
|
|
44
|
+
state = { ...state, ...update };
|
|
45
|
+
if (spinner) {
|
|
46
|
+
spinner.text = formatProgressText();
|
|
47
|
+
}
|
|
48
|
+
}),
|
|
49
|
+
startSpinner: () => Effect.sync(() => {
|
|
50
|
+
if (!spinner) {
|
|
51
|
+
// Reserve space first by printing empty lines
|
|
52
|
+
process.stdout.write("\n\n\n");
|
|
53
|
+
// Move cursor back up to where we want the spinner
|
|
54
|
+
process.stdout.write("\x1b[3A");
|
|
55
|
+
spinner = ora({
|
|
56
|
+
text: formatProgressText(),
|
|
57
|
+
spinner: "dots",
|
|
58
|
+
color: "cyan",
|
|
59
|
+
});
|
|
60
|
+
spinner.start();
|
|
61
|
+
}
|
|
62
|
+
}),
|
|
63
|
+
stopSpinner: () => Effect.sync(() => {
|
|
64
|
+
if (spinner) {
|
|
65
|
+
if (state.phase === "completed") {
|
|
66
|
+
spinner.succeed(chalk.green("✓ Evaluation completed successfully"));
|
|
67
|
+
// Just one newline since table display adds its own
|
|
68
|
+
process.stdout.write("\n");
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
spinner.stop();
|
|
72
|
+
// Just one newline since table display adds its own
|
|
73
|
+
process.stdout.write("\n");
|
|
74
|
+
}
|
|
75
|
+
spinner = null;
|
|
76
|
+
}
|
|
77
|
+
}),
|
|
78
|
+
showMessage: (message) => Effect.sync(() => {
|
|
79
|
+
if (spinner) {
|
|
80
|
+
spinner.info(message);
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
console.log(message);
|
|
84
|
+
}
|
|
85
|
+
}),
|
|
86
|
+
};
|
|
87
|
+
};
|
|
88
|
+
// Create a layer for the progress service
|
|
89
|
+
export const ProgressServiceLive = Layer.succeed(ProgressService, makeProgressService());
|
|
90
|
+
// Helper function to run with progress tracking
|
|
91
|
+
export const withProgress = (effect, showProgress = true) => {
|
|
92
|
+
if (!showProgress) {
|
|
93
|
+
return effect;
|
|
94
|
+
}
|
|
95
|
+
return Effect.gen(function* (_) {
|
|
96
|
+
const progress = yield* _(ProgressService);
|
|
97
|
+
// Start spinner
|
|
98
|
+
yield* _(progress.startSpinner());
|
|
99
|
+
try {
|
|
100
|
+
// Run the effect
|
|
101
|
+
const result = yield* _(effect);
|
|
102
|
+
// Update to completed state
|
|
103
|
+
yield* _(progress.updateProgress({ phase: "completed" }));
|
|
104
|
+
// Stop spinner with success
|
|
105
|
+
yield* _(progress.stopSpinner());
|
|
106
|
+
return result;
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
// Stop spinner on error
|
|
110
|
+
yield* _(progress.stopSpinner());
|
|
111
|
+
throw error;
|
|
112
|
+
}
|
|
113
|
+
}).pipe(Effect.provide(ProgressServiceLive));
|
|
114
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"table-display.d.ts","sourceRoot":"","sources":["../../src/lib/table-display.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAGhC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AA0TnD,eAAO,MAAM,yBAAyB,GACpC,SAAS,gBAAgB,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CA+B/B,CAAC"}
|