@orq-ai/evaluatorq 1.0.0-9 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -73
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/lib/effects.d.ts.map +1 -1
- package/dist/lib/effects.js +12 -7
- package/dist/lib/evaluatorq.d.ts.map +1 -1
- package/dist/lib/evaluatorq.js +50 -26
- package/dist/lib/job-helper.d.ts +17 -0
- package/dist/lib/job-helper.d.ts.map +1 -0
- package/dist/lib/job-helper.js +33 -0
- package/dist/lib/send-results.d.ts +32 -0
- package/dist/lib/send-results.d.ts.map +1 -0
- package/dist/lib/send-results.js +70 -0
- package/dist/lib/table-display.d.ts.map +1 -1
- package/dist/lib/table-display.js +34 -3
- package/dist/lib/types.d.ts +25 -7
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/tsconfig.lib.tsbuildinfo +1 -1
- package/package.json +18 -2
- package/src/index.ts +0 -4
- package/src/lib/effects.ts +0 -174
- package/src/lib/evaluatorq.ts +0 -260
- package/src/lib/progress.ts +0 -170
- package/src/lib/table-display.ts +0 -352
- package/src/lib/types.ts +0 -79
- package/src/lib/visualizer/html-generator.ts +0 -364
- package/src/lib/visualizer/index.ts +0 -70
- package/src/lib/visualizer/types.ts +0 -17
- package/tsconfig.json +0 -10
- package/tsconfig.lib.json +0 -14
package/README.md
CHANGED
|
@@ -32,33 +32,36 @@ npm install @orq-ai/node
|
|
|
32
32
|
### Basic Usage
|
|
33
33
|
|
|
34
34
|
```typescript
|
|
35
|
-
import { evaluatorq } from "@orq-ai/evaluatorq";
|
|
35
|
+
import { evaluatorq, job } from "@orq-ai/evaluatorq";
|
|
36
|
+
|
|
37
|
+
const textAnalyzer = job("text-analyzer", async (data) => {
|
|
38
|
+
const text = data.inputs.text;
|
|
39
|
+
const analysis = {
|
|
40
|
+
length: text.length,
|
|
41
|
+
wordCount: text.split(" ").length,
|
|
42
|
+
uppercase: text.toUpperCase(),
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
return analysis;
|
|
46
|
+
});
|
|
36
47
|
|
|
37
48
|
await evaluatorq("text-analysis", {
|
|
38
49
|
data: [
|
|
39
50
|
{ inputs: { text: "Hello world" } },
|
|
40
51
|
{ inputs: { text: "Testing evaluation" } },
|
|
41
52
|
],
|
|
42
|
-
jobs: [
|
|
43
|
-
async (data) => {
|
|
44
|
-
const text = data.inputs.text;
|
|
45
|
-
const analysis = {
|
|
46
|
-
length: text.length,
|
|
47
|
-
wordCount: text.split(" ").length,
|
|
48
|
-
uppercase: text.toUpperCase(),
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
return {
|
|
52
|
-
name: "text-analyzer",
|
|
53
|
-
output: analysis,
|
|
54
|
-
};
|
|
55
|
-
},
|
|
56
|
-
],
|
|
53
|
+
jobs: [textAnalyzer],
|
|
57
54
|
evaluators: [
|
|
58
55
|
{
|
|
59
56
|
name: "length-check",
|
|
60
57
|
scorer: async ({ output }) => {
|
|
61
|
-
|
|
58
|
+
const passesCheck = output.length > 10;
|
|
59
|
+
return {
|
|
60
|
+
value: passesCheck ? 1 : 0,
|
|
61
|
+
explanation: passesCheck
|
|
62
|
+
? "Output length is sufficient"
|
|
63
|
+
: `Output too short (${output.length} chars, need >10)`,
|
|
64
|
+
};
|
|
62
65
|
},
|
|
63
66
|
},
|
|
64
67
|
],
|
|
@@ -68,28 +71,33 @@ await evaluatorq("text-analysis", {
|
|
|
68
71
|
### Using Orq Platform Datasets
|
|
69
72
|
|
|
70
73
|
```typescript
|
|
71
|
-
import { evaluatorq } from "@orq-ai/evaluatorq";
|
|
74
|
+
import { evaluatorq, job } from "@orq-ai/evaluatorq";
|
|
75
|
+
|
|
76
|
+
const processor = job("processor", async (data) => {
|
|
77
|
+
// Process each data point from the dataset
|
|
78
|
+
return processData(data);
|
|
79
|
+
});
|
|
72
80
|
|
|
73
81
|
// Requires ORQ_API_KEY environment variable
|
|
74
82
|
await evaluatorq("dataset-evaluation", {
|
|
75
83
|
data: {
|
|
76
84
|
datasetId: "your-dataset-id", // From Orq platform
|
|
77
85
|
},
|
|
78
|
-
jobs: [
|
|
79
|
-
async (data) => {
|
|
80
|
-
// Process each data point from the dataset
|
|
81
|
-
return {
|
|
82
|
-
name: "processor",
|
|
83
|
-
output: processData(data),
|
|
84
|
-
};
|
|
85
|
-
},
|
|
86
|
-
],
|
|
86
|
+
jobs: [processor],
|
|
87
87
|
evaluators: [
|
|
88
88
|
{
|
|
89
89
|
name: "accuracy",
|
|
90
90
|
scorer: async ({ data, output }) => {
|
|
91
91
|
// Compare output with expected results
|
|
92
|
-
|
|
92
|
+
const score = calculateScore(output, data.expectedOutput);
|
|
93
|
+
return {
|
|
94
|
+
value: score,
|
|
95
|
+
explanation: score > 0.8
|
|
96
|
+
? "High accuracy match"
|
|
97
|
+
: score > 0.5
|
|
98
|
+
? "Partial match"
|
|
99
|
+
: "Low accuracy match",
|
|
100
|
+
};
|
|
93
101
|
},
|
|
94
102
|
},
|
|
95
103
|
],
|
|
@@ -103,22 +111,15 @@ await evaluatorq("dataset-evaluation", {
|
|
|
103
111
|
Run multiple jobs in parallel for each data point:
|
|
104
112
|
|
|
105
113
|
```typescript
|
|
114
|
+
import { job } from "@orq-ai/evaluatorq";
|
|
115
|
+
|
|
116
|
+
const preprocessor = job("preprocessor", async (data) => preprocess(data));
|
|
117
|
+
const analyzer = job("analyzer", async (data) => analyze(data));
|
|
118
|
+
const transformer = job("transformer", async (data) => transform(data));
|
|
119
|
+
|
|
106
120
|
await evaluatorq("multi-job-eval", {
|
|
107
121
|
data: [...],
|
|
108
|
-
jobs: [
|
|
109
|
-
async (data) => ({
|
|
110
|
-
name: "preprocessor",
|
|
111
|
-
output: preprocess(data),
|
|
112
|
-
}),
|
|
113
|
-
async (data) => ({
|
|
114
|
-
name: "analyzer",
|
|
115
|
-
output: analyze(data),
|
|
116
|
-
}),
|
|
117
|
-
async (data) => ({
|
|
118
|
-
name: "transformer",
|
|
119
|
-
output: transform(data),
|
|
120
|
-
}),
|
|
121
|
-
],
|
|
122
|
+
jobs: [preprocessor, analyzer, transformer],
|
|
122
123
|
evaluators: [...],
|
|
123
124
|
});
|
|
124
125
|
```
|
|
@@ -126,19 +127,18 @@ await evaluatorq("multi-job-eval", {
|
|
|
126
127
|
#### Custom Error Handling
|
|
127
128
|
|
|
128
129
|
```typescript
|
|
130
|
+
import { job } from "@orq-ai/evaluatorq";
|
|
131
|
+
|
|
132
|
+
const riskyJob = job("risky-job", async (data) => {
|
|
133
|
+
// Errors are captured and included in the evaluation results
|
|
134
|
+
// The job name is preserved even when errors occur
|
|
135
|
+
const result = await riskyOperation(data);
|
|
136
|
+
return result;
|
|
137
|
+
});
|
|
138
|
+
|
|
129
139
|
await evaluatorq("error-handling", {
|
|
130
140
|
data: [...],
|
|
131
|
-
jobs: [
|
|
132
|
-
async (data) => {
|
|
133
|
-
try {
|
|
134
|
-
const result = await riskyOperation(data);
|
|
135
|
-
return { name: "risky-job", output: result };
|
|
136
|
-
} catch (error) {
|
|
137
|
-
// Errors are captured and included in the evaluation results
|
|
138
|
-
throw new Error(`Failed to process: ${error.message}`);
|
|
139
|
-
}
|
|
140
|
-
},
|
|
141
|
-
],
|
|
141
|
+
jobs: [riskyJob],
|
|
142
142
|
evaluators: [...],
|
|
143
143
|
});
|
|
144
144
|
```
|
|
@@ -162,7 +162,55 @@ await evaluatorq("async-eval", {
|
|
|
162
162
|
|
|
163
163
|
### Environment Variables
|
|
164
164
|
|
|
165
|
-
- `ORQ_API_KEY`: API key for Orq platform integration (required for dataset access)
|
|
165
|
+
- `ORQ_API_KEY`: API key for Orq platform integration (required for dataset access and sending results)
|
|
166
|
+
|
|
167
|
+
## 📊 Orq Platform Integration
|
|
168
|
+
|
|
169
|
+
### Automatic Result Sending
|
|
170
|
+
|
|
171
|
+
When the `ORQ_API_KEY` environment variable is set, evaluatorq automatically sends evaluation results to the Orq platform for visualization and analysis.
|
|
172
|
+
|
|
173
|
+
```typescript
|
|
174
|
+
import { evaluatorq, job } from "@orq-ai/evaluatorq";
|
|
175
|
+
|
|
176
|
+
// Results are automatically sent when ORQ_API_KEY environment variable is present
|
|
177
|
+
await evaluatorq("my-evaluation", {
|
|
178
|
+
data: [...],
|
|
179
|
+
jobs: [...],
|
|
180
|
+
evaluators: [...],
|
|
181
|
+
sendResults: true, // Enabled by default when ORQ_API_KEY environment variable is set
|
|
182
|
+
});
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
#### Configuration Options
|
|
186
|
+
|
|
187
|
+
- `sendResults`: Boolean flag to control result sending (defaults to `true` when `ORQ_API_KEY` is set)
|
|
188
|
+
|
|
189
|
+
#### What Gets Sent
|
|
190
|
+
|
|
191
|
+
When enabled, the following information is sent to Orq:
|
|
192
|
+
- Evaluation name
|
|
193
|
+
- Dataset ID (when using Orq datasets)
|
|
194
|
+
- Job results with outputs and errors
|
|
195
|
+
- Evaluator scores with values and explanations
|
|
196
|
+
- Execution timing information
|
|
197
|
+
|
|
198
|
+
Note: Evaluator explanations are included in the data sent to Orq but are not displayed in the terminal output to keep the console clean.
|
|
199
|
+
|
|
200
|
+
#### Result Visualization
|
|
201
|
+
|
|
202
|
+
After successful submission, you'll see a console message with a link to view your results:
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
📊 View your evaluation results at: <url to the evaluation>
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
The Orq platform provides:
|
|
209
|
+
- Interactive result tables
|
|
210
|
+
- Score statistics
|
|
211
|
+
- Performance metrics
|
|
212
|
+
- Historical comparisons
|
|
213
|
+
|
|
166
214
|
|
|
167
215
|
## 📚 API Reference
|
|
168
216
|
|
|
@@ -185,27 +233,54 @@ Promise that resolves when evaluation is complete.
|
|
|
185
233
|
### Types
|
|
186
234
|
|
|
187
235
|
```typescript
|
|
236
|
+
type Output = string | number | boolean | Record<string, unknown> | null;
|
|
237
|
+
|
|
188
238
|
interface DataPoint {
|
|
189
|
-
inputs: Record<string,
|
|
190
|
-
expectedOutput?:
|
|
191
|
-
metadata?: Record<string, any>;
|
|
239
|
+
inputs: Record<string, unknown>;
|
|
240
|
+
expectedOutput?: Output;
|
|
192
241
|
}
|
|
193
242
|
|
|
194
243
|
interface JobResult {
|
|
195
|
-
|
|
196
|
-
output:
|
|
244
|
+
jobName: string;
|
|
245
|
+
output: Output;
|
|
246
|
+
error?: Error;
|
|
247
|
+
evaluatorScores?: EvaluatorScore[];
|
|
197
248
|
}
|
|
198
249
|
|
|
199
|
-
interface
|
|
200
|
-
|
|
201
|
-
|
|
250
|
+
interface EvaluatorScore {
|
|
251
|
+
evaluatorName: string;
|
|
252
|
+
score: EvaluationResult<number | boolean | string>;
|
|
253
|
+
error?: Error;
|
|
202
254
|
}
|
|
203
255
|
|
|
204
|
-
|
|
256
|
+
type Job = (
|
|
257
|
+
data: DataPoint,
|
|
258
|
+
row: number,
|
|
259
|
+
) => Promise<{
|
|
260
|
+
name: string;
|
|
261
|
+
output: Output;
|
|
262
|
+
}>;
|
|
263
|
+
|
|
264
|
+
// Helper function for creating jobs with preserved names on errors
|
|
265
|
+
function job(
|
|
266
|
+
name: string,
|
|
267
|
+
fn: (data: DataPoint, row: number) => Promise<Output> | Output,
|
|
268
|
+
): Job;
|
|
269
|
+
|
|
270
|
+
type ScorerParameter = {
|
|
205
271
|
data: DataPoint;
|
|
206
|
-
output:
|
|
207
|
-
|
|
208
|
-
|
|
272
|
+
output: Output;
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
type EvaluationResult<T> = {
|
|
276
|
+
value: T;
|
|
277
|
+
explanation?: string;
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
type Scorer =
|
|
281
|
+
| ((params: ScorerParameter) => Promise<EvaluationResult<string>>)
|
|
282
|
+
| ((params: ScorerParameter) => Promise<EvaluationResult<number>>)
|
|
283
|
+
| ((params: ScorerParameter) => Promise<EvaluationResult<boolean>>);
|
|
209
284
|
```
|
|
210
285
|
|
|
211
286
|
## 🛠️ Development
|
|
@@ -216,11 +291,4 @@ bunx nx build evaluatorq
|
|
|
216
291
|
|
|
217
292
|
# Run type checking
|
|
218
293
|
bunx nx typecheck evaluatorq
|
|
219
|
-
|
|
220
|
-
# Run tests
|
|
221
|
-
bunx nx test evaluatorq
|
|
222
294
|
```
|
|
223
|
-
|
|
224
|
-
## 📄 License
|
|
225
|
-
|
|
226
|
-
This is free and unencumbered software released into the public domain. See [UNLICENSE](https://unlicense.org) for details.
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export * from "./lib/evaluatorq.js";
|
|
2
|
+
export { job } from "./lib/job-helper.js";
|
|
3
|
+
export { sendResultsToOrqEffect } from "./lib/send-results.js";
|
|
2
4
|
export { displayResultsTableEffect } from "./lib/table-display.js";
|
|
3
5
|
export * from "./lib/types.js";
|
|
4
|
-
export * from "./lib/visualizer/index.js";
|
|
5
6
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,GAAG,EAAE,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AACnE,cAAc,gBAAgB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export * from "./lib/evaluatorq.js";
|
|
2
|
+
export { job } from "./lib/job-helper.js";
|
|
3
|
+
export { sendResultsToOrqEffect } from "./lib/send-results.js";
|
|
2
4
|
export { displayResultsTableEffect } from "./lib/table-display.js";
|
|
3
5
|
export * from "./lib/types.js";
|
|
4
|
-
export * from "./lib/visualizer/index.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"effects.d.ts","sourceRoot":"","sources":["../../src/lib/effects.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAEtC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,GAAG,EACH,SAAS,EACT,MAAM,EACP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,sBAAsB,CACpC,WAAW,EAAE,OAAO,CAAC,SAAS,CAAC,EAC/B,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,GAAG,EAAE,EACX,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,EAC9C,WAAW,EAAE,MAAM,GAClB,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CA4C1D;AAED,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,SAAS,EACpB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,eAAe,CAAC,
|
|
1
|
+
{"version":3,"file":"effects.d.ts","sourceRoot":"","sources":["../../src/lib/effects.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAEtC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,GAAG,EACH,SAAS,EACT,MAAM,EACP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,sBAAsB,CACpC,WAAW,EAAE,OAAO,CAAC,SAAS,CAAC,EAC/B,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,GAAG,EAAE,EACX,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,EAC9C,WAAW,EAAE,MAAM,GAClB,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CA4C1D;AAED,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,SAAS,EACpB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,eAAe,CAAC,CA4GlD"}
|
package/dist/lib/effects.js
CHANGED
|
@@ -62,10 +62,10 @@ export function processJobEffect(job, dataPoint, rowIndex, evaluators) {
|
|
|
62
62
|
catch: (error) => error,
|
|
63
63
|
}), Effect.map((score) => ({
|
|
64
64
|
evaluatorName: evaluator.name,
|
|
65
|
-
score
|
|
65
|
+
score,
|
|
66
66
|
})), Effect.catchAll((error) => Effect.succeed({
|
|
67
67
|
evaluatorName: evaluator.name,
|
|
68
|
-
score: "",
|
|
68
|
+
score: { value: "" },
|
|
69
69
|
error: error,
|
|
70
70
|
}))));
|
|
71
71
|
return score;
|
|
@@ -81,9 +81,14 @@ export function processJobEffect(job, dataPoint, rowIndex, evaluators) {
|
|
|
81
81
|
output: jobResult.output,
|
|
82
82
|
evaluatorScores: [],
|
|
83
83
|
};
|
|
84
|
-
}).pipe(Effect.catchAll((error) =>
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
84
|
+
}).pipe(Effect.catchAll((error) => {
|
|
85
|
+
// Check if the error has a jobName property (set by our job helper)
|
|
86
|
+
const errorWithJobName = error;
|
|
87
|
+
const jobName = errorWithJobName.jobName || "Unknown";
|
|
88
|
+
return Effect.succeed({
|
|
89
|
+
jobName,
|
|
90
|
+
output: null,
|
|
91
|
+
error,
|
|
92
|
+
});
|
|
93
|
+
}));
|
|
89
94
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluatorq.d.ts","sourceRoot":"","sources":["../../src/lib/evaluatorq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"evaluatorq.d.ts","sourceRoot":"","sources":["../../src/lib/evaluatorq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAYtC,OAAO,KAAK,EAEV,eAAe,EACf,gBAAgB,EAEjB,MAAM,YAAY,CAAC;AA+CpB;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,gBAAgB,CAAC,CAmG3B;AAGD,eAAO,MAAM,gBAAgB,GAC3B,OAAO,MAAM,EACb,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAmF9C,CAAC;AAgFF,eAAO,MAAM,yBAAyB,GACpC,MAAM,MAAM,EACZ,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAI5C,CAAC"}
|
package/dist/lib/evaluatorq.js
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { Effect, pipe } from "effect";
|
|
2
2
|
import { processDataPointEffect } from "./effects.js";
|
|
3
3
|
import { ProgressService, ProgressServiceLive, withProgress, } from "./progress.js";
|
|
4
|
+
import { sendResultsToOrqEffect } from "./send-results.js";
|
|
4
5
|
import { displayResultsTableEffect } from "./table-display.js";
|
|
5
6
|
async function setupOrqClient(apiKey) {
|
|
6
7
|
try {
|
|
7
8
|
const client = await import("@orq-ai/node");
|
|
8
|
-
return new client.Orq({ apiKey
|
|
9
|
+
return new client.Orq({ apiKey });
|
|
9
10
|
}
|
|
10
11
|
catch (error) {
|
|
11
12
|
const err = error;
|
|
@@ -40,18 +41,23 @@ async function fetchDatasetAsDataPoints(orqClient, datasetId) {
|
|
|
40
41
|
* @returns The results of the evaluation run.
|
|
41
42
|
*/
|
|
42
43
|
export async function evaluatorq(_name, params) {
|
|
43
|
-
const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
|
|
44
|
+
const { data, evaluators = [], jobs, parallelism = 1, print = true, sendResults, description, } = params;
|
|
44
45
|
let orqClient;
|
|
45
46
|
const orqApiKey = process.env.ORQ_API_KEY;
|
|
46
47
|
if (orqApiKey) {
|
|
47
48
|
orqClient = await setupOrqClient(orqApiKey);
|
|
48
49
|
}
|
|
50
|
+
// Default sendResults to true when API key is available
|
|
51
|
+
const shouldSendResults = sendResults !== undefined ? sendResults : Boolean(orqApiKey);
|
|
52
|
+
const startTime = new Date();
|
|
49
53
|
let dataPromises;
|
|
54
|
+
let datasetId;
|
|
50
55
|
// Handle datasetId case
|
|
51
56
|
if ("datasetId" in data) {
|
|
52
57
|
if (!orqApiKey || !orqClient) {
|
|
53
58
|
throw new Error("ORQ_API_KEY environment variable must be set to fetch datapoints from Orq platform.");
|
|
54
59
|
}
|
|
60
|
+
datasetId = data.datasetId;
|
|
55
61
|
dataPromises = await fetchDatasetAsDataPoints(orqClient, data.datasetId);
|
|
56
62
|
}
|
|
57
63
|
else {
|
|
@@ -67,13 +73,19 @@ export async function evaluatorq(_name, params) {
|
|
|
67
73
|
phase: "initializing",
|
|
68
74
|
}));
|
|
69
75
|
// Process data points
|
|
70
|
-
const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise
|
|
76
|
+
const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise instanceof Promise
|
|
77
|
+
? dataPromise
|
|
78
|
+
: Promise.resolve(dataPromise), index, jobs, evaluators, parallelism), { concurrency: parallelism }));
|
|
71
79
|
return results.flat();
|
|
72
80
|
}),
|
|
73
81
|
// Conditionally add table display
|
|
74
82
|
print
|
|
75
83
|
? Effect.tap((results) => displayResultsTableEffect(results))
|
|
76
84
|
: Effect.tap(() => Effect.void),
|
|
85
|
+
// Conditionally send results to Orq
|
|
86
|
+
shouldSendResults && orqApiKey
|
|
87
|
+
? Effect.tap((results) => sendResultsToOrqEffect(orqApiKey, _name, description, datasetId, results, startTime, new Date()))
|
|
88
|
+
: Effect.tap(() => Effect.void),
|
|
77
89
|
// Provide the progress service
|
|
78
90
|
Effect.provide(ProgressServiceLive),
|
|
79
91
|
// Wrap with progress tracking
|
|
@@ -83,7 +95,8 @@ export async function evaluatorq(_name, params) {
|
|
|
83
95
|
}
|
|
84
96
|
// Create an Effect that runs evaluation and optionally displays results
|
|
85
97
|
export const evaluatorqEffect = (_name, params) => {
|
|
86
|
-
const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
|
|
98
|
+
const { data, evaluators = [], jobs, parallelism = 1, print = true, sendResults, description, } = params;
|
|
99
|
+
const startTime = new Date();
|
|
87
100
|
// Handle datasetId case
|
|
88
101
|
if ("datasetId" in data) {
|
|
89
102
|
return Effect.gen(function* (_) {
|
|
@@ -104,32 +117,43 @@ export const evaluatorqEffect = (_name, params) => {
|
|
|
104
117
|
? error
|
|
105
118
|
: new Error(`Failed to fetch dataset: ${String(error)}`),
|
|
106
119
|
}));
|
|
107
|
-
return yield* _(runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print));
|
|
120
|
+
return yield* _(runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print, sendResults, description, _name, data.datasetId, apiKey, startTime));
|
|
108
121
|
});
|
|
109
122
|
}
|
|
110
123
|
const dataPromises = data;
|
|
111
|
-
return runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print);
|
|
124
|
+
return runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print, sendResults, description, _name, undefined, undefined, startTime);
|
|
112
125
|
};
|
|
113
126
|
// Extract common evaluation logic
|
|
114
|
-
const runEvaluationEffect = (dataPromises, evaluators = [], jobs, parallelism, print
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
})
|
|
126
|
-
//
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
//
|
|
133
|
-
|
|
127
|
+
const runEvaluationEffect = (dataPromises, evaluators = [], jobs, parallelism, print, sendResults, description, evaluationName, datasetId, apiKey, startTime) => {
|
|
128
|
+
// Default sendResults to true when API key is available
|
|
129
|
+
const orqApiKey = apiKey || process.env.ORQ_API_KEY;
|
|
130
|
+
const shouldSendResults = sendResults !== undefined ? sendResults : Boolean(orqApiKey);
|
|
131
|
+
return pipe(Effect.gen(function* (_) {
|
|
132
|
+
const progress = yield* _(ProgressService);
|
|
133
|
+
// Initialize progress
|
|
134
|
+
yield* _(progress.updateProgress({
|
|
135
|
+
totalDataPoints: dataPromises.length,
|
|
136
|
+
currentDataPoint: 0,
|
|
137
|
+
phase: "initializing",
|
|
138
|
+
}));
|
|
139
|
+
// Process data points
|
|
140
|
+
const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise instanceof Promise
|
|
141
|
+
? dataPromise
|
|
142
|
+
: Promise.resolve(dataPromise), index, jobs, evaluators, parallelism), { concurrency: parallelism }));
|
|
143
|
+
return results.flat();
|
|
144
|
+
}),
|
|
145
|
+
// Conditionally add table display
|
|
146
|
+
print
|
|
147
|
+
? Effect.tap((results) => displayResultsTableEffect(results))
|
|
148
|
+
: Effect.tap(() => Effect.void),
|
|
149
|
+
// Conditionally send results to Orq
|
|
150
|
+
shouldSendResults && orqApiKey
|
|
151
|
+
? Effect.tap((results) => sendResultsToOrqEffect(orqApiKey, evaluationName, description, datasetId, results, startTime, new Date()))
|
|
152
|
+
: Effect.tap(() => Effect.void),
|
|
153
|
+
// Provide the progress service
|
|
154
|
+
Effect.provide(ProgressServiceLive),
|
|
155
|
+
// Wrap with progress tracking
|
|
156
|
+
(effect) => withProgress(effect, print));
|
|
157
|
+
};
|
|
134
158
|
// Composable evaluatorq with display
|
|
135
159
|
export const evaluatorqWithTableEffect = (name, params) => pipe(evaluatorqEffect(name, params), Effect.tap((results) => displayResultsTableEffect(results)));
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { DataPoint, Job, Output } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Helper function to create a named job that ensures the job name is preserved
|
|
4
|
+
* even when errors occur during execution.
|
|
5
|
+
*
|
|
6
|
+
* @param name - The name of the job
|
|
7
|
+
* @param fn - The job function that returns the output
|
|
8
|
+
* @returns A Job function that always includes the job name
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* const myJob = job("myJobName", async (data) => {
|
|
12
|
+
* // Your job logic here
|
|
13
|
+
* return "output";
|
|
14
|
+
* });
|
|
15
|
+
*/
|
|
16
|
+
export declare function job(name: string, fn: (data: DataPoint, row: number) => Promise<Output> | Output): Job;
|
|
17
|
+
//# sourceMappingURL=job-helper.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"job-helper.d.ts","sourceRoot":"","sources":["../../src/lib/job-helper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAEzD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,GAAG,CACjB,IAAI,EAAE,MAAM,EACZ,EAAE,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,GAC7D,GAAG,CAoBL"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helper function to create a named job that ensures the job name is preserved
|
|
3
|
+
* even when errors occur during execution.
|
|
4
|
+
*
|
|
5
|
+
* @param name - The name of the job
|
|
6
|
+
* @param fn - The job function that returns the output
|
|
7
|
+
* @returns A Job function that always includes the job name
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* const myJob = job("myJobName", async (data) => {
|
|
11
|
+
* // Your job logic here
|
|
12
|
+
* return "output";
|
|
13
|
+
* });
|
|
14
|
+
*/
|
|
15
|
+
export function job(name, fn) {
|
|
16
|
+
return async (data, row) => {
|
|
17
|
+
try {
|
|
18
|
+
const output = await fn(data, row);
|
|
19
|
+
return {
|
|
20
|
+
name,
|
|
21
|
+
output,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
// Re-throw the error with the job name attached
|
|
26
|
+
// The error will be caught by the evaluatorq framework
|
|
27
|
+
// but the name will be preserved
|
|
28
|
+
throw Object.assign(error instanceof Error ? error : new Error(String(error)), {
|
|
29
|
+
jobName: name,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { Effect } from "effect";
|
|
2
|
+
import type { DataPoint, EvaluatorqResult, Output } from "./types.js";
|
|
3
|
+
export interface SerializedEvaluatorScore {
|
|
4
|
+
evaluatorName: string;
|
|
5
|
+
score: {
|
|
6
|
+
value: number | boolean | string;
|
|
7
|
+
explanation?: string;
|
|
8
|
+
};
|
|
9
|
+
error?: string;
|
|
10
|
+
}
|
|
11
|
+
export interface SerializedJobResult {
|
|
12
|
+
jobName: string;
|
|
13
|
+
output: Output;
|
|
14
|
+
error?: string;
|
|
15
|
+
evaluatorScores?: SerializedEvaluatorScore[];
|
|
16
|
+
}
|
|
17
|
+
export interface SerializedDataPointResult {
|
|
18
|
+
dataPoint: DataPoint;
|
|
19
|
+
error?: string;
|
|
20
|
+
jobResults?: SerializedJobResult[];
|
|
21
|
+
}
|
|
22
|
+
export interface SendResultsPayload {
|
|
23
|
+
_name: string;
|
|
24
|
+
_description?: string;
|
|
25
|
+
_createdAt: string;
|
|
26
|
+
_endedAt: string;
|
|
27
|
+
_evaluationDuration: number;
|
|
28
|
+
datasetId?: string;
|
|
29
|
+
results: SerializedDataPointResult[];
|
|
30
|
+
}
|
|
31
|
+
export declare const sendResultsToOrqEffect: (apiKey: string, evaluationName: string, evaluationDescription: string | undefined, datasetId: string | undefined, results: EvaluatorqResult, startTime: Date, endTime: Date) => Effect.Effect<void, never, never>;
|
|
32
|
+
//# sourceMappingURL=send-results.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"send-results.d.ts","sourceRoot":"","sources":["../../src/lib/send-results.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAGtE,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;QACjC,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,wBAAwB,EAAE,CAAC;CAC9C;AAED,MAAM,WAAW,yBAAyB;IACxC,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAGD,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,yBAAyB,EAAE,CAAC;CACtC;AAWD,eAAO,MAAM,sBAAsB,GACjC,QAAQ,MAAM,EACd,gBAAgB,MAAM,EACtB,uBAAuB,MAAM,GAAG,SAAS,EACzC,WAAW,MAAM,GAAG,SAAS,EAC7B,SAAS,gBAAgB,EACzB,WAAW,IAAI,EACf,SAAS,IAAI,KACZ,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAmG/B,CAAC"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { Effect } from "effect";
|
|
2
|
+
export const sendResultsToOrqEffect = (apiKey, evaluationName, evaluationDescription, datasetId, results, startTime, endTime) => Effect.gen(function* (_) {
|
|
3
|
+
// Convert Error objects to strings for JSON serialization
|
|
4
|
+
const serializedResults = results.map((result) => ({
|
|
5
|
+
dataPoint: result.dataPoint,
|
|
6
|
+
error: result.error ? String(result.error) : undefined,
|
|
7
|
+
jobResults: result.jobResults?.map((jobResult) => ({
|
|
8
|
+
jobName: jobResult.jobName,
|
|
9
|
+
output: jobResult.output,
|
|
10
|
+
error: jobResult.error ? String(jobResult.error) : undefined,
|
|
11
|
+
evaluatorScores: jobResult.evaluatorScores?.map((score) => ({
|
|
12
|
+
evaluatorName: score.evaluatorName,
|
|
13
|
+
score: score.score,
|
|
14
|
+
error: score.error ? String(score.error) : undefined,
|
|
15
|
+
})),
|
|
16
|
+
})),
|
|
17
|
+
}));
|
|
18
|
+
const payload = {
|
|
19
|
+
_name: evaluationName,
|
|
20
|
+
_description: evaluationDescription,
|
|
21
|
+
_createdAt: startTime.toISOString(),
|
|
22
|
+
_endedAt: endTime.toISOString(),
|
|
23
|
+
_evaluationDuration: endTime.getTime() - startTime.getTime(),
|
|
24
|
+
...(datasetId && { datasetId }),
|
|
25
|
+
results: serializedResults,
|
|
26
|
+
};
|
|
27
|
+
// Use tryPromise but catch and log errors instead of propagating them
|
|
28
|
+
yield* _(Effect.tryPromise({
|
|
29
|
+
try: async () => {
|
|
30
|
+
const baseUrl = process.env.ORQ_BASE_URL || "https://api.orq.ai";
|
|
31
|
+
const response = await fetch(`${baseUrl}/v2/spreadsheets/evaluations/receive`, {
|
|
32
|
+
method: "POST",
|
|
33
|
+
headers: {
|
|
34
|
+
"Content-Type": "application/json",
|
|
35
|
+
Authorization: `Bearer ${apiKey}`,
|
|
36
|
+
},
|
|
37
|
+
body: JSON.stringify(payload),
|
|
38
|
+
});
|
|
39
|
+
if (!response.ok) {
|
|
40
|
+
const errorText = await response
|
|
41
|
+
.text()
|
|
42
|
+
.catch(() => "Unknown error");
|
|
43
|
+
// Log warning instead of throwing
|
|
44
|
+
console.warn(`\n⚠️ Warning: Could not send results to Orq platform (${response.status} ${response.statusText})`);
|
|
45
|
+
// Only show detailed error in verbose mode or specific error cases
|
|
46
|
+
if (process.env.ORQ_DEBUG === "true" || response.status >= 500) {
|
|
47
|
+
console.warn(` Details: ${errorText}`);
|
|
48
|
+
}
|
|
49
|
+
return; // Return early but don't throw
|
|
50
|
+
}
|
|
51
|
+
const result = (await response.json());
|
|
52
|
+
console.log(`\n✅ Results sent to Orq: ${result.experiment_name} (${result.rows_created} rows created)`);
|
|
53
|
+
// Display the experiment URL if available
|
|
54
|
+
if (result.experiment_url) {
|
|
55
|
+
console.log(` 📊 View your evaluation at: ${result.experiment_url}`);
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
catch: (error) => {
|
|
59
|
+
// Log warning for network or other errors
|
|
60
|
+
console.warn(`\n⚠️ Warning: Could not send results to Orq platform`);
|
|
61
|
+
if (process.env.ORQ_DEBUG === "true") {
|
|
62
|
+
console.warn(` Details: ${error instanceof Error ? error.message : String(error)}`);
|
|
63
|
+
}
|
|
64
|
+
// Return undefined to indicate handled error
|
|
65
|
+
return undefined;
|
|
66
|
+
},
|
|
67
|
+
}),
|
|
68
|
+
// Catch any Effect errors and convert to success
|
|
69
|
+
Effect.catchAll(() => Effect.succeed(undefined)));
|
|
70
|
+
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"table-display.d.ts","sourceRoot":"","sources":["../../src/lib/table-display.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAGhC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"table-display.d.ts","sourceRoot":"","sources":["../../src/lib/table-display.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAGhC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAuVnD,eAAO,MAAM,yBAAyB,GACpC,SAAS,gBAAgB,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAqC/B,CAAC"}
|