@orq-ai/evaluatorq 1.0.0-9 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,33 +32,36 @@ npm install @orq-ai/node
32
32
  ### Basic Usage
33
33
 
34
34
  ```typescript
35
- import { evaluatorq } from "@orq-ai/evaluatorq";
35
+ import { evaluatorq, job } from "@orq-ai/evaluatorq";
36
+
37
+ const textAnalyzer = job("text-analyzer", async (data) => {
38
+ const text = data.inputs.text;
39
+ const analysis = {
40
+ length: text.length,
41
+ wordCount: text.split(" ").length,
42
+ uppercase: text.toUpperCase(),
43
+ };
44
+
45
+ return analysis;
46
+ });
36
47
 
37
48
  await evaluatorq("text-analysis", {
38
49
  data: [
39
50
  { inputs: { text: "Hello world" } },
40
51
  { inputs: { text: "Testing evaluation" } },
41
52
  ],
42
- jobs: [
43
- async (data) => {
44
- const text = data.inputs.text;
45
- const analysis = {
46
- length: text.length,
47
- wordCount: text.split(" ").length,
48
- uppercase: text.toUpperCase(),
49
- };
50
-
51
- return {
52
- name: "text-analyzer",
53
- output: analysis,
54
- };
55
- },
56
- ],
53
+ jobs: [textAnalyzer],
57
54
  evaluators: [
58
55
  {
59
56
  name: "length-check",
60
57
  scorer: async ({ output }) => {
61
- return output.length > 10 ? 1 : 0;
58
+ const passesCheck = output.length > 10;
59
+ return {
60
+ value: passesCheck ? 1 : 0,
61
+ explanation: passesCheck
62
+ ? "Output length is sufficient"
63
+ : `Output too short (${output.length} chars, need >10)`,
64
+ };
62
65
  },
63
66
  },
64
67
  ],
@@ -68,28 +71,33 @@ await evaluatorq("text-analysis", {
68
71
  ### Using Orq Platform Datasets
69
72
 
70
73
  ```typescript
71
- import { evaluatorq } from "@orq-ai/evaluatorq";
74
+ import { evaluatorq, job } from "@orq-ai/evaluatorq";
75
+
76
+ const processor = job("processor", async (data) => {
77
+ // Process each data point from the dataset
78
+ return processData(data);
79
+ });
72
80
 
73
81
  // Requires ORQ_API_KEY environment variable
74
82
  await evaluatorq("dataset-evaluation", {
75
83
  data: {
76
84
  datasetId: "your-dataset-id", // From Orq platform
77
85
  },
78
- jobs: [
79
- async (data) => {
80
- // Process each data point from the dataset
81
- return {
82
- name: "processor",
83
- output: processData(data),
84
- };
85
- },
86
- ],
86
+ jobs: [processor],
87
87
  evaluators: [
88
88
  {
89
89
  name: "accuracy",
90
90
  scorer: async ({ data, output }) => {
91
91
  // Compare output with expected results
92
- return calculateScore(output, data.expectedOutput);
92
+ const score = calculateScore(output, data.expectedOutput);
93
+ return {
94
+ value: score,
95
+ explanation: score > 0.8
96
+ ? "High accuracy match"
97
+ : score > 0.5
98
+ ? "Partial match"
99
+ : "Low accuracy match",
100
+ };
93
101
  },
94
102
  },
95
103
  ],
@@ -103,22 +111,15 @@ await evaluatorq("dataset-evaluation", {
103
111
  Run multiple jobs in parallel for each data point:
104
112
 
105
113
  ```typescript
114
+ import { job } from "@orq-ai/evaluatorq";
115
+
116
+ const preprocessor = job("preprocessor", async (data) => preprocess(data));
117
+ const analyzer = job("analyzer", async (data) => analyze(data));
118
+ const transformer = job("transformer", async (data) => transform(data));
119
+
106
120
  await evaluatorq("multi-job-eval", {
107
121
  data: [...],
108
- jobs: [
109
- async (data) => ({
110
- name: "preprocessor",
111
- output: preprocess(data),
112
- }),
113
- async (data) => ({
114
- name: "analyzer",
115
- output: analyze(data),
116
- }),
117
- async (data) => ({
118
- name: "transformer",
119
- output: transform(data),
120
- }),
121
- ],
122
+ jobs: [preprocessor, analyzer, transformer],
122
123
  evaluators: [...],
123
124
  });
124
125
  ```
@@ -126,19 +127,18 @@ await evaluatorq("multi-job-eval", {
126
127
  #### Custom Error Handling
127
128
 
128
129
  ```typescript
130
+ import { job } from "@orq-ai/evaluatorq";
131
+
132
+ const riskyJob = job("risky-job", async (data) => {
133
+ // Errors are captured and included in the evaluation results
134
+ // The job name is preserved even when errors occur
135
+ const result = await riskyOperation(data);
136
+ return result;
137
+ });
138
+
129
139
  await evaluatorq("error-handling", {
130
140
  data: [...],
131
- jobs: [
132
- async (data) => {
133
- try {
134
- const result = await riskyOperation(data);
135
- return { name: "risky-job", output: result };
136
- } catch (error) {
137
- // Errors are captured and included in the evaluation results
138
- throw new Error(`Failed to process: ${error.message}`);
139
- }
140
- },
141
- ],
141
+ jobs: [riskyJob],
142
142
  evaluators: [...],
143
143
  });
144
144
  ```
@@ -162,7 +162,55 @@ await evaluatorq("async-eval", {
162
162
 
163
163
  ### Environment Variables
164
164
 
165
- - `ORQ_API_KEY`: API key for Orq platform integration (required for dataset access)
165
+ - `ORQ_API_KEY`: API key for Orq platform integration (required for dataset access and sending results)
166
+
167
+ ## 📊 Orq Platform Integration
168
+
169
+ ### Automatic Result Sending
170
+
171
+ When the `ORQ_API_KEY` environment variable is set, evaluatorq automatically sends evaluation results to the Orq platform for visualization and analysis.
172
+
173
+ ```typescript
174
+ import { evaluatorq, job } from "@orq-ai/evaluatorq";
175
+
176
+ // Results are automatically sent when ORQ_API_KEY environment variable is present
177
+ await evaluatorq("my-evaluation", {
178
+ data: [...],
179
+ jobs: [...],
180
+ evaluators: [...],
181
+ sendResults: true, // Enabled by default when ORQ_API_KEY environment variable is set
182
+ });
183
+ ```
184
+
185
+ #### Configuration Options
186
+
187
+ - `sendResults`: Boolean flag to control result sending (defaults to `true` when `ORQ_API_KEY` is set)
188
+
189
+ #### What Gets Sent
190
+
191
+ When enabled, the following information is sent to Orq:
192
+ - Evaluation name
193
+ - Dataset ID (when using Orq datasets)
194
+ - Job results with outputs and errors
195
+ - Evaluator scores with values and explanations
196
+ - Execution timing information
197
+
198
+ Note: Evaluator explanations are included in the data sent to Orq but are not displayed in the terminal output to keep the console clean.
199
+
200
+ #### Result Visualization
201
+
202
+ After successful submission, you'll see a console message with a link to view your results:
203
+
204
+ ```
205
+ 📊 View your evaluation results at: <url to the evaluation>
206
+ ```
207
+
208
+ The Orq platform provides:
209
+ - Interactive result tables
210
+ - Score statistics
211
+ - Performance metrics
212
+ - Historical comparisons
213
+
166
214
 
167
215
  ## 📚 API Reference
168
216
 
@@ -185,27 +233,54 @@ Promise that resolves when evaluation is complete.
185
233
  ### Types
186
234
 
187
235
  ```typescript
236
+ type Output = string | number | boolean | Record<string, unknown> | null;
237
+
188
238
  interface DataPoint {
189
- inputs: Record<string, any>;
190
- expectedOutput?: any;
191
- metadata?: Record<string, any>;
239
+ inputs: Record<string, unknown>;
240
+ expectedOutput?: Output;
192
241
  }
193
242
 
194
243
  interface JobResult {
195
- name: string;
196
- output: any;
244
+ jobName: string;
245
+ output: Output;
246
+ error?: Error;
247
+ evaluatorScores?: EvaluatorScore[];
197
248
  }
198
249
 
199
- interface Evaluator {
200
- name: string;
201
- scorer: (context: EvaluatorContext) => Promise<number>;
250
+ interface EvaluatorScore {
251
+ evaluatorName: string;
252
+ score: EvaluationResult<number | boolean | string>;
253
+ error?: Error;
202
254
  }
203
255
 
204
- interface EvaluatorContext {
256
+ type Job = (
257
+ data: DataPoint,
258
+ row: number,
259
+ ) => Promise<{
260
+ name: string;
261
+ output: Output;
262
+ }>;
263
+
264
+ // Helper function for creating jobs with preserved names on errors
265
+ function job(
266
+ name: string,
267
+ fn: (data: DataPoint, row: number) => Promise<Output> | Output,
268
+ ): Job;
269
+
270
+ type ScorerParameter = {
205
271
  data: DataPoint;
206
- output: any;
207
- row: number;
208
- }
272
+ output: Output;
273
+ };
274
+
275
+ type EvaluationResult<T> = {
276
+ value: T;
277
+ explanation?: string;
278
+ };
279
+
280
+ type Scorer =
281
+ | ((params: ScorerParameter) => Promise<EvaluationResult<string>>)
282
+ | ((params: ScorerParameter) => Promise<EvaluationResult<number>>)
283
+ | ((params: ScorerParameter) => Promise<EvaluationResult<boolean>>);
209
284
  ```
210
285
 
211
286
  ## 🛠️ Development
@@ -216,11 +291,4 @@ bunx nx build evaluatorq
216
291
 
217
292
  # Run type checking
218
293
  bunx nx typecheck evaluatorq
219
-
220
- # Run tests
221
- bunx nx test evaluatorq
222
294
  ```
223
-
224
- ## 📄 License
225
-
226
- This is free and unencumbered software released into the public domain. See [UNLICENSE](https://unlicense.org) for details.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export * from "./lib/evaluatorq.js";
2
+ export { job } from "./lib/job-helper.js";
3
+ export { sendResultsToOrqEffect } from "./lib/send-results.js";
2
4
  export { displayResultsTableEffect } from "./lib/table-display.js";
3
5
  export * from "./lib/types.js";
4
- export * from "./lib/visualizer/index.js";
5
6
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AACnE,cAAc,gBAAgB,CAAC;AAC/B,cAAc,2BAA2B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,GAAG,EAAE,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AACnE,cAAc,gBAAgB,CAAC"}
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./lib/evaluatorq.js";
2
+ export { job } from "./lib/job-helper.js";
3
+ export { sendResultsToOrqEffect } from "./lib/send-results.js";
2
4
  export { displayResultsTableEffect } from "./lib/table-display.js";
3
5
  export * from "./lib/types.js";
4
- export * from "./lib/visualizer/index.js";
@@ -1 +1 @@
1
- {"version":3,"file":"effects.d.ts","sourceRoot":"","sources":["../../src/lib/effects.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAEtC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,GAAG,EACH,SAAS,EACT,MAAM,EACP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,sBAAsB,CACpC,WAAW,EAAE,OAAO,CAAC,SAAS,CAAC,EAC/B,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,GAAG,EAAE,EACX,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,EAC9C,WAAW,EAAE,MAAM,GAClB,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CA4C1D;AAED,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,SAAS,EACpB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,eAAe,CAAC,CAyGlD"}
1
+ {"version":3,"file":"effects.d.ts","sourceRoot":"","sources":["../../src/lib/effects.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAEtC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,KAAK,EACV,SAAS,EACT,eAAe,EACf,GAAG,EACH,SAAS,EACT,MAAM,EACP,MAAM,YAAY,CAAC;AAEpB,wBAAgB,sBAAsB,CACpC,WAAW,EAAE,OAAO,CAAC,SAAS,CAAC,EAC/B,QAAQ,EAAE,MAAM,EAChB,IAAI,EAAE,GAAG,EAAE,EACX,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,EAC9C,WAAW,EAAE,MAAM,GAClB,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CA4C1D;AAED,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,SAAS,EACpB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,eAAe,CAAC,CA4GlD"}
@@ -62,10 +62,10 @@ export function processJobEffect(job, dataPoint, rowIndex, evaluators) {
62
62
  catch: (error) => error,
63
63
  }), Effect.map((score) => ({
64
64
  evaluatorName: evaluator.name,
65
- score: score,
65
+ score,
66
66
  })), Effect.catchAll((error) => Effect.succeed({
67
67
  evaluatorName: evaluator.name,
68
- score: "",
68
+ score: { value: "" },
69
69
  error: error,
70
70
  }))));
71
71
  return score;
@@ -81,9 +81,14 @@ export function processJobEffect(job, dataPoint, rowIndex, evaluators) {
81
81
  output: jobResult.output,
82
82
  evaluatorScores: [],
83
83
  };
84
- }).pipe(Effect.catchAll((error) => Effect.succeed({
85
- jobName: "Unknown", // We don't know the job name if it threw before returning
86
- output: null,
87
- error,
88
- })));
84
+ }).pipe(Effect.catchAll((error) => {
85
+ // Check if the error has a jobName property (set by our job helper)
86
+ const errorWithJobName = error;
87
+ const jobName = errorWithJobName.jobName || "Unknown";
88
+ return Effect.succeed({
89
+ jobName,
90
+ output: null,
91
+ error,
92
+ });
93
+ }));
89
94
  }
@@ -1 +1 @@
1
- {"version":3,"file":"evaluatorq.d.ts","sourceRoot":"","sources":["../../src/lib/evaluatorq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAWtC,OAAO,KAAK,EAEV,eAAe,EACf,gBAAgB,EAEjB,MAAM,YAAY,CAAC;AA+CpB;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,gBAAgB,CAAC,CAoE3B;AAGD,eAAO,MAAM,gBAAgB,GAC3B,OAAO,MAAM,EACb,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAuD9C,CAAC;AAoDF,eAAO,MAAM,yBAAyB,GACpC,MAAM,MAAM,EACZ,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAI5C,CAAC"}
1
+ {"version":3,"file":"evaluatorq.d.ts","sourceRoot":"","sources":["../../src/lib/evaluatorq.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAQ,MAAM,QAAQ,CAAC;AAYtC,OAAO,KAAK,EAEV,eAAe,EACf,gBAAgB,EAEjB,MAAM,YAAY,CAAC;AA+CpB;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,gBAAgB,CAAC,CAmG3B;AAGD,eAAO,MAAM,gBAAgB,GAC3B,OAAO,MAAM,EACb,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAmF9C,CAAC;AAgFF,eAAO,MAAM,yBAAyB,GACpC,MAAM,MAAM,EACZ,QAAQ,eAAe,KACtB,MAAM,CAAC,MAAM,CAAC,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAI5C,CAAC"}
@@ -1,11 +1,12 @@
1
1
  import { Effect, pipe } from "effect";
2
2
  import { processDataPointEffect } from "./effects.js";
3
3
  import { ProgressService, ProgressServiceLive, withProgress, } from "./progress.js";
4
+ import { sendResultsToOrqEffect } from "./send-results.js";
4
5
  import { displayResultsTableEffect } from "./table-display.js";
5
6
  async function setupOrqClient(apiKey) {
6
7
  try {
7
8
  const client = await import("@orq-ai/node");
8
- return new client.Orq({ apiKey, serverURL: "https://my.staging.orq.ai" });
9
+ return new client.Orq({ apiKey });
9
10
  }
10
11
  catch (error) {
11
12
  const err = error;
@@ -40,18 +41,23 @@ async function fetchDatasetAsDataPoints(orqClient, datasetId) {
40
41
  * @returns The results of the evaluation run.
41
42
  */
42
43
  export async function evaluatorq(_name, params) {
43
- const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
44
+ const { data, evaluators = [], jobs, parallelism = 1, print = true, sendResults, description, } = params;
44
45
  let orqClient;
45
46
  const orqApiKey = process.env.ORQ_API_KEY;
46
47
  if (orqApiKey) {
47
48
  orqClient = await setupOrqClient(orqApiKey);
48
49
  }
50
+ // Default sendResults to true when API key is available
51
+ const shouldSendResults = sendResults !== undefined ? sendResults : Boolean(orqApiKey);
52
+ const startTime = new Date();
49
53
  let dataPromises;
54
+ let datasetId;
50
55
  // Handle datasetId case
51
56
  if ("datasetId" in data) {
52
57
  if (!orqApiKey || !orqClient) {
53
58
  throw new Error("ORQ_API_KEY environment variable must be set to fetch datapoints from Orq platform.");
54
59
  }
60
+ datasetId = data.datasetId;
55
61
  dataPromises = await fetchDatasetAsDataPoints(orqClient, data.datasetId);
56
62
  }
57
63
  else {
@@ -67,13 +73,19 @@ export async function evaluatorq(_name, params) {
67
73
  phase: "initializing",
68
74
  }));
69
75
  // Process data points
70
- const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise, index, jobs, evaluators, parallelism), { concurrency: parallelism }));
76
+ const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise instanceof Promise
77
+ ? dataPromise
78
+ : Promise.resolve(dataPromise), index, jobs, evaluators, parallelism), { concurrency: parallelism }));
71
79
  return results.flat();
72
80
  }),
73
81
  // Conditionally add table display
74
82
  print
75
83
  ? Effect.tap((results) => displayResultsTableEffect(results))
76
84
  : Effect.tap(() => Effect.void),
85
+ // Conditionally send results to Orq
86
+ shouldSendResults && orqApiKey
87
+ ? Effect.tap((results) => sendResultsToOrqEffect(orqApiKey, _name, description, datasetId, results, startTime, new Date()))
88
+ : Effect.tap(() => Effect.void),
77
89
  // Provide the progress service
78
90
  Effect.provide(ProgressServiceLive),
79
91
  // Wrap with progress tracking
@@ -83,7 +95,8 @@ export async function evaluatorq(_name, params) {
83
95
  }
84
96
  // Create an Effect that runs evaluation and optionally displays results
85
97
  export const evaluatorqEffect = (_name, params) => {
86
- const { data, evaluators = [], jobs, parallelism = 1, print = true } = params;
98
+ const { data, evaluators = [], jobs, parallelism = 1, print = true, sendResults, description, } = params;
99
+ const startTime = new Date();
87
100
  // Handle datasetId case
88
101
  if ("datasetId" in data) {
89
102
  return Effect.gen(function* (_) {
@@ -104,32 +117,43 @@ export const evaluatorqEffect = (_name, params) => {
104
117
  ? error
105
118
  : new Error(`Failed to fetch dataset: ${String(error)}`),
106
119
  }));
107
- return yield* _(runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print));
120
+ return yield* _(runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print, sendResults, description, _name, data.datasetId, apiKey, startTime));
108
121
  });
109
122
  }
110
123
  const dataPromises = data;
111
- return runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print);
124
+ return runEvaluationEffect(dataPromises, evaluators, jobs, parallelism, print, sendResults, description, _name, undefined, undefined, startTime);
112
125
  };
113
126
  // Extract common evaluation logic
114
- const runEvaluationEffect = (dataPromises, evaluators = [], jobs, parallelism, print) => pipe(Effect.gen(function* (_) {
115
- const progress = yield* _(ProgressService);
116
- // Initialize progress
117
- yield* _(progress.updateProgress({
118
- totalDataPoints: dataPromises.length,
119
- currentDataPoint: 0,
120
- phase: "initializing",
121
- }));
122
- // Process data points
123
- const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise, index, jobs, evaluators, parallelism), { concurrency: parallelism }));
124
- return results.flat();
125
- }),
126
- // Conditionally add table display
127
- print
128
- ? Effect.tap((results) => displayResultsTableEffect(results))
129
- : Effect.tap(() => Effect.void),
130
- // Provide the progress service
131
- Effect.provide(ProgressServiceLive),
132
- // Wrap with progress tracking
133
- (effect) => withProgress(effect, print));
127
+ const runEvaluationEffect = (dataPromises, evaluators = [], jobs, parallelism, print, sendResults, description, evaluationName, datasetId, apiKey, startTime) => {
128
+ // Default sendResults to true when API key is available
129
+ const orqApiKey = apiKey || process.env.ORQ_API_KEY;
130
+ const shouldSendResults = sendResults !== undefined ? sendResults : Boolean(orqApiKey);
131
+ return pipe(Effect.gen(function* (_) {
132
+ const progress = yield* _(ProgressService);
133
+ // Initialize progress
134
+ yield* _(progress.updateProgress({
135
+ totalDataPoints: dataPromises.length,
136
+ currentDataPoint: 0,
137
+ phase: "initializing",
138
+ }));
139
+ // Process data points
140
+ const results = yield* _(Effect.forEach(dataPromises.map((dataPromise, index) => ({ dataPromise, index })), ({ dataPromise, index }) => processDataPointEffect(dataPromise instanceof Promise
141
+ ? dataPromise
142
+ : Promise.resolve(dataPromise), index, jobs, evaluators, parallelism), { concurrency: parallelism }));
143
+ return results.flat();
144
+ }),
145
+ // Conditionally add table display
146
+ print
147
+ ? Effect.tap((results) => displayResultsTableEffect(results))
148
+ : Effect.tap(() => Effect.void),
149
+ // Conditionally send results to Orq
150
+ shouldSendResults && orqApiKey
151
+ ? Effect.tap((results) => sendResultsToOrqEffect(orqApiKey, evaluationName, description, datasetId, results, startTime, new Date()))
152
+ : Effect.tap(() => Effect.void),
153
+ // Provide the progress service
154
+ Effect.provide(ProgressServiceLive),
155
+ // Wrap with progress tracking
156
+ (effect) => withProgress(effect, print));
157
+ };
134
158
  // Composable evaluatorq with display
135
159
  export const evaluatorqWithTableEffect = (name, params) => pipe(evaluatorqEffect(name, params), Effect.tap((results) => displayResultsTableEffect(results)));
@@ -0,0 +1,17 @@
1
+ import type { DataPoint, Job, Output } from "./types.js";
2
+ /**
3
+ * Helper function to create a named job that ensures the job name is preserved
4
+ * even when errors occur during execution.
5
+ *
6
+ * @param name - The name of the job
7
+ * @param fn - The job function that returns the output
8
+ * @returns A Job function that always includes the job name
9
+ *
10
+ * @example
11
+ * const myJob = job("myJobName", async (data) => {
12
+ * // Your job logic here
13
+ * return "output";
14
+ * });
15
+ */
16
+ export declare function job(name: string, fn: (data: DataPoint, row: number) => Promise<Output> | Output): Job;
17
+ //# sourceMappingURL=job-helper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"job-helper.d.ts","sourceRoot":"","sources":["../../src/lib/job-helper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAEzD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,GAAG,CACjB,IAAI,EAAE,MAAM,EACZ,EAAE,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,GAAG,MAAM,GAC7D,GAAG,CAoBL"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Helper function to create a named job that ensures the job name is preserved
3
+ * even when errors occur during execution.
4
+ *
5
+ * @param name - The name of the job
6
+ * @param fn - The job function that returns the output
7
+ * @returns A Job function that always includes the job name
8
+ *
9
+ * @example
10
+ * const myJob = job("myJobName", async (data) => {
11
+ * // Your job logic here
12
+ * return "output";
13
+ * });
14
+ */
15
+ export function job(name, fn) {
16
+ return async (data, row) => {
17
+ try {
18
+ const output = await fn(data, row);
19
+ return {
20
+ name,
21
+ output,
22
+ };
23
+ }
24
+ catch (error) {
25
+ // Re-throw the error with the job name attached
26
+ // The error will be caught by the evaluatorq framework
27
+ // but the name will be preserved
28
+ throw Object.assign(error instanceof Error ? error : new Error(String(error)), {
29
+ jobName: name,
30
+ });
31
+ }
32
+ };
33
+ }
@@ -0,0 +1,32 @@
1
+ import { Effect } from "effect";
2
+ import type { DataPoint, EvaluatorqResult, Output } from "./types.js";
3
+ export interface SerializedEvaluatorScore {
4
+ evaluatorName: string;
5
+ score: {
6
+ value: number | boolean | string;
7
+ explanation?: string;
8
+ };
9
+ error?: string;
10
+ }
11
+ export interface SerializedJobResult {
12
+ jobName: string;
13
+ output: Output;
14
+ error?: string;
15
+ evaluatorScores?: SerializedEvaluatorScore[];
16
+ }
17
+ export interface SerializedDataPointResult {
18
+ dataPoint: DataPoint;
19
+ error?: string;
20
+ jobResults?: SerializedJobResult[];
21
+ }
22
+ export interface SendResultsPayload {
23
+ _name: string;
24
+ _description?: string;
25
+ _createdAt: string;
26
+ _endedAt: string;
27
+ _evaluationDuration: number;
28
+ datasetId?: string;
29
+ results: SerializedDataPointResult[];
30
+ }
31
+ export declare const sendResultsToOrqEffect: (apiKey: string, evaluationName: string, evaluationDescription: string | undefined, datasetId: string | undefined, results: EvaluatorqResult, startTime: Date, endTime: Date) => Effect.Effect<void, never, never>;
32
+ //# sourceMappingURL=send-results.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"send-results.d.ts","sourceRoot":"","sources":["../../src/lib/send-results.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAGtE,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;QACjC,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,wBAAwB,EAAE,CAAC;CAC9C;AAED,MAAM,WAAW,yBAAyB;IACxC,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAGD,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,yBAAyB,EAAE,CAAC;CACtC;AAWD,eAAO,MAAM,sBAAsB,GACjC,QAAQ,MAAM,EACd,gBAAgB,MAAM,EACtB,uBAAuB,MAAM,GAAG,SAAS,EACzC,WAAW,MAAM,GAAG,SAAS,EAC7B,SAAS,gBAAgB,EACzB,WAAW,IAAI,EACf,SAAS,IAAI,KACZ,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAmG/B,CAAC"}
@@ -0,0 +1,70 @@
1
+ import { Effect } from "effect";
2
+ export const sendResultsToOrqEffect = (apiKey, evaluationName, evaluationDescription, datasetId, results, startTime, endTime) => Effect.gen(function* (_) {
3
+ // Convert Error objects to strings for JSON serialization
4
+ const serializedResults = results.map((result) => ({
5
+ dataPoint: result.dataPoint,
6
+ error: result.error ? String(result.error) : undefined,
7
+ jobResults: result.jobResults?.map((jobResult) => ({
8
+ jobName: jobResult.jobName,
9
+ output: jobResult.output,
10
+ error: jobResult.error ? String(jobResult.error) : undefined,
11
+ evaluatorScores: jobResult.evaluatorScores?.map((score) => ({
12
+ evaluatorName: score.evaluatorName,
13
+ score: score.score,
14
+ error: score.error ? String(score.error) : undefined,
15
+ })),
16
+ })),
17
+ }));
18
+ const payload = {
19
+ _name: evaluationName,
20
+ _description: evaluationDescription,
21
+ _createdAt: startTime.toISOString(),
22
+ _endedAt: endTime.toISOString(),
23
+ _evaluationDuration: endTime.getTime() - startTime.getTime(),
24
+ ...(datasetId && { datasetId }),
25
+ results: serializedResults,
26
+ };
27
+ // Use tryPromise but catch and log errors instead of propagating them
28
+ yield* _(Effect.tryPromise({
29
+ try: async () => {
30
+ const baseUrl = process.env.ORQ_BASE_URL || "https://api.orq.ai";
31
+ const response = await fetch(`${baseUrl}/v2/spreadsheets/evaluations/receive`, {
32
+ method: "POST",
33
+ headers: {
34
+ "Content-Type": "application/json",
35
+ Authorization: `Bearer ${apiKey}`,
36
+ },
37
+ body: JSON.stringify(payload),
38
+ });
39
+ if (!response.ok) {
40
+ const errorText = await response
41
+ .text()
42
+ .catch(() => "Unknown error");
43
+ // Log warning instead of throwing
44
+ console.warn(`\n⚠️ Warning: Could not send results to Orq platform (${response.status} ${response.statusText})`);
45
+ // Only show detailed error in verbose mode or specific error cases
46
+ if (process.env.ORQ_DEBUG === "true" || response.status >= 500) {
47
+ console.warn(` Details: ${errorText}`);
48
+ }
49
+ return; // Return early but don't throw
50
+ }
51
+ const result = (await response.json());
52
+ console.log(`\n✅ Results sent to Orq: ${result.experiment_name} (${result.rows_created} rows created)`);
53
+ // Display the experiment URL if available
54
+ if (result.experiment_url) {
55
+ console.log(` 📊 View your evaluation at: ${result.experiment_url}`);
56
+ }
57
+ },
58
+ catch: (error) => {
59
+ // Log warning for network or other errors
60
+ console.warn(`\n⚠️ Warning: Could not send results to Orq platform`);
61
+ if (process.env.ORQ_DEBUG === "true") {
62
+ console.warn(` Details: ${error instanceof Error ? error.message : String(error)}`);
63
+ }
64
+ // Return undefined to indicate handled error
65
+ return undefined;
66
+ },
67
+ }),
68
+ // Catch any Effect errors and convert to success
69
+ Effect.catchAll(() => Effect.succeed(undefined)));
70
+ });
@@ -1 +1 @@
1
- {"version":3,"file":"table-display.d.ts","sourceRoot":"","sources":["../../src/lib/table-display.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAGhC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AA0TnD,eAAO,MAAM,yBAAyB,GACpC,SAAS,gBAAgB,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CA+B/B,CAAC"}
1
+ {"version":3,"file":"table-display.d.ts","sourceRoot":"","sources":["../../src/lib/table-display.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAGhC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAuVnD,eAAO,MAAM,yBAAyB,GACpC,SAAS,gBAAgB,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAqC/B,CAAC"}