judgeval 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Judgeval.d.ts +4 -3
- package/dist/Judgeval.d.ts.map +1 -1
- package/dist/agent-judges/AgentJudge.d.ts +19 -0
- package/dist/agent-judges/AgentJudge.d.ts.map +1 -0
- package/dist/agent-judges/AgentJudgeFactory.d.ts +100 -0
- package/dist/agent-judges/AgentJudgeFactory.d.ts.map +1 -0
- package/dist/agent-judges/index.d.ts +3 -0
- package/dist/agent-judges/index.d.ts.map +1 -0
- package/dist/datasets/DatasetFactory.d.ts.map +1 -1
- package/dist/evaluation/Evaluation.d.ts.map +1 -1
- package/dist/evaluation/EvaluatorRunner.d.ts.map +1 -1
- package/dist/index.cjs +5 -5
- package/dist/index.cjs.map +19 -18
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.mjs +5 -5
- package/dist/index.mjs.map +19 -18
- package/dist/internal/api/client.d.ts +4 -1
- package/dist/internal/api/client.d.ts.map +1 -1
- package/dist/internal/api/models/ExperimentScorer.d.ts +6 -0
- package/dist/internal/api/models/ExperimentScorer.d.ts.map +1 -1
- package/dist/internal/api/models/SDKCreateAgentJudgeRequest.d.ts +15 -0
- package/dist/internal/api/models/SDKCreateAgentJudgeRequest.d.ts.map +1 -0
- package/dist/internal/api/models/SDKCreateAgentJudgeResponse.d.ts +4 -0
- package/dist/internal/api/models/SDKCreateAgentJudgeResponse.d.ts.map +1 -0
- package/dist/internal/api/models/SDKUpdateAgentJudgeRequest.d.ts +18 -0
- package/dist/internal/api/models/SDKUpdateAgentJudgeRequest.d.ts.map +1 -0
- package/dist/internal/api/models/SDKUpdateAgentJudgeResponse.d.ts +72 -0
- package/dist/internal/api/models/SDKUpdateAgentJudgeResponse.d.ts.map +1 -0
- package/dist/internal/api/models/index.d.ts +4 -0
- package/dist/internal/api/models/index.d.ts.map +1 -1
- package/dist/trace/OfflineTracer.d.ts +4 -8
- package/dist/trace/OfflineTracer.d.ts.map +1 -1
- package/dist/trace/baggage/utils.d.ts +1 -3
- package/dist/trace/baggage/utils.d.ts.map +1 -1
- package/dist/trace/exporters/JudgmentSpanExporter.d.ts.map +1 -1
- package/dist/trace/exporters/NoOpSpanExporter.d.ts.map +1 -1
- package/dist/utils/logger.d.ts +1 -0
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/retry.d.ts.map +1 -1
- package/dist/utils/serializer.d.ts.map +1 -1
- package/package.json +5 -5
package/dist/Judgeval.d.ts
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import { EvaluationFactory } from "./evaluation/EvaluationFactory";
|
|
2
2
|
import { DatasetFactory } from "./datasets/DatasetFactory";
|
|
3
|
+
import { AgentJudgeFactory } from "./agent-judges/AgentJudgeFactory";
|
|
3
4
|
import type { OfflineTracer, OfflineTracerConfig } from "./trace/OfflineTracer";
|
|
4
5
|
/**
|
|
5
6
|
* Options for {@link Judgeval.offlineTracer}.
|
|
6
|
-
*
|
|
7
|
-
* Mirrors `OfflineTracerConfig` minus credentials and `projectName`,
|
|
8
|
-
* which are reused from the parent `Judgeval` instance.
|
|
7
|
+
* Credentials and `projectName` are taken from the parent `Judgeval` instance.
|
|
9
8
|
*/
|
|
10
9
|
export type JudgevalOfflineTracerOptions = Omit<OfflineTracerConfig, "projectName" | "apiKey" | "organizationId" | "apiUrl">;
|
|
11
10
|
/**
|
|
@@ -85,5 +84,7 @@ export declare class Judgeval {
|
|
|
85
84
|
get datasets(): DatasetFactory;
|
|
86
85
|
/** Access evaluation (create evaluation runs). */
|
|
87
86
|
get evaluation(): EvaluationFactory;
|
|
87
|
+
/** Manage Agent Judges (prompt-based scorers) on the platform. */
|
|
88
|
+
get agentJudges(): AgentJudgeFactory;
|
|
88
89
|
}
|
|
89
90
|
//# sourceMappingURL=Judgeval.d.ts.map
|
package/dist/Judgeval.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Judgeval.d.ts","sourceRoot":"","sources":["../src/Judgeval.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"Judgeval.d.ts","sourceRoot":"","sources":["../src/Judgeval.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAC;AACrE,OAAO,KAAK,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAEhF;;;GAGG;AACH,MAAM,MAAM,4BAA4B,GAAG,IAAI,CAC7C,mBAAmB,EACnB,aAAa,GAAG,QAAQ,GAAG,gBAAgB,GAAG,QAAQ,CACvD,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B,iDAAiD;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IACtC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAgB;IAE3C,OAAO;IAUP;;;;;;;;;;;;;;;;OAgBG;WACU,MAAM,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC;IAgC9D;;;;;;;;;;;;;;;;OAgBG;IACG,aAAa,CACjB,OAAO,EAAE,4BAA4B,GACpC,OAAO,CAAC,aAAa,CAAC;IAWzB,qDAAqD;IACrD,IAAI,QAAQ,IAAI,cAAc,CAE7B;IAED,kDAAkD;IAClD,IAAI,UAAU,IAAI,iBAAiB,CAMlC;IAED,kEAAkE;IAClE,IAAI,WAAW,IAAI,iBAAiB,CAMnC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export type ScoreType = "binary" | "numeric" | "categorical";
|
|
2
|
+
export interface AgentJudge {
|
|
3
|
+
judgeId: string;
|
|
4
|
+
name: string;
|
|
5
|
+
prompt: string;
|
|
6
|
+
model: string;
|
|
7
|
+
scoreType: ScoreType;
|
|
8
|
+
description: string | null;
|
|
9
|
+
judgeDescription: string | null;
|
|
10
|
+
categories: {
|
|
11
|
+
name: string;
|
|
12
|
+
description: string;
|
|
13
|
+
}[] | null;
|
|
14
|
+
minScore: number | null;
|
|
15
|
+
maxScore: number | null;
|
|
16
|
+
majorVersion: number | null;
|
|
17
|
+
minorVersion: number | null;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=AgentJudge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AgentJudge.d.ts","sourceRoot":"","sources":["../../src/agent-judges/AgentJudge.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,SAAS,GAAG,QAAQ,GAAG,SAAS,GAAG,aAAa,CAAC;AAE7D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,SAAS,CAAC;IACrB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,EAAE,GAAG,IAAI,CAAC;IAC3D,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import type { JudgmentApiClient } from "../internal/api/client";
|
|
2
|
+
import type { AgentJudge, ScoreType } from "./AgentJudge";
|
|
3
|
+
/**
|
|
4
|
+
* Create and update prompt-based Agent Judges on the Judgment platform.
|
|
5
|
+
*
|
|
6
|
+
* Access via `client.agentJudges`.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const judge = await client.agentJudges.create({
|
|
11
|
+
* name: "helpfulness",
|
|
12
|
+
* prompt: "Rate the assistant's helpfulness from 0 to 1.",
|
|
13
|
+
* model: "gpt-5.2",
|
|
14
|
+
* scoreType: "numeric",
|
|
15
|
+
* });
|
|
16
|
+
*
|
|
17
|
+
* await client.agentJudges.update({
|
|
18
|
+
* judgeId: judge.judgeId,
|
|
19
|
+
* prompt: "Updated rubric prompt.",
|
|
20
|
+
* });
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
export declare class AgentJudgeFactory {
|
|
24
|
+
private readonly _client;
|
|
25
|
+
private readonly _projectId;
|
|
26
|
+
private readonly _projectName;
|
|
27
|
+
constructor(client: JudgmentApiClient, projectId: string | null, projectName: string);
|
|
28
|
+
/**
|
|
29
|
+
* Create a new Agent Judge (prompt-based scorer).
|
|
30
|
+
*
|
|
31
|
+
* @param options.name - Unique judge name within the project.
|
|
32
|
+
* @param options.prompt - Rubric prompt template used by the agent judge harness.
|
|
33
|
+
* @param options.model - LiteLLM model id (e.g. `"gpt-5.2"`).
|
|
34
|
+
* @param options.scoreType - One of `"numeric"`, `"binary"`, or `"categorical"`.
|
|
35
|
+
* @param options.description - Description stored on the underlying scorer version.
|
|
36
|
+
* @param options.judgeDescription - Description shown in the UI.
|
|
37
|
+
* @param options.categories - Choice list for `categorical` judges.
|
|
38
|
+
* @param options.minScore - Lower bound for `numeric` judges (defaults to `0` server-side).
|
|
39
|
+
* @param options.maxScore - Upper bound for `numeric` judges (defaults to `1` server-side).
|
|
40
|
+
* @returns The newly created `AgentJudge`, or `null` if the project is unresolved.
|
|
41
|
+
*/
|
|
42
|
+
create(options: {
|
|
43
|
+
name: string;
|
|
44
|
+
prompt: string;
|
|
45
|
+
model: string;
|
|
46
|
+
scoreType: ScoreType;
|
|
47
|
+
description?: string;
|
|
48
|
+
judgeDescription?: string;
|
|
49
|
+
categories?: {
|
|
50
|
+
name: string;
|
|
51
|
+
description: string;
|
|
52
|
+
}[];
|
|
53
|
+
minScore?: number;
|
|
54
|
+
maxScore?: number;
|
|
55
|
+
}): Promise<AgentJudge | null>;
|
|
56
|
+
/**
|
|
57
|
+
* Update an existing Agent Judge.
|
|
58
|
+
*
|
|
59
|
+
* Passing any of `prompt`, `model`, `categories`, `minScore`, or
|
|
60
|
+
* `maxScore` writes a new version of the underlying prompt scorer.
|
|
61
|
+
* When `targetMajorVersion` / `targetMinorVersion` are omitted, the
|
|
62
|
+
* server auto-bumps the latest version's minor by 1 — matching the
|
|
63
|
+
* UI's default "save" behaviour.
|
|
64
|
+
*
|
|
65
|
+
* @param options.judgeId - ID of the judge to update.
|
|
66
|
+
* @param options.prompt - New rubric prompt template.
|
|
67
|
+
* @param options.model - New LiteLLM model id.
|
|
68
|
+
* @param options.scoreType - New score type.
|
|
69
|
+
* @param options.description - New scorer-version description.
|
|
70
|
+
* @param options.judgeDescription - New UI-facing description.
|
|
71
|
+
* @param options.categories - New choices for `categorical` judges.
|
|
72
|
+
* @param options.minScore - New lower bound for `numeric` judges.
|
|
73
|
+
* @param options.maxScore - New upper bound for `numeric` judges.
|
|
74
|
+
* @param options.sourceMajorVersion - Major version to copy unspecified fields from.
|
|
75
|
+
* @param options.sourceMinorVersion - Minor version to copy unspecified fields from.
|
|
76
|
+
* @param options.targetMajorVersion - Major version to write to.
|
|
77
|
+
* @param options.targetMinorVersion - Minor version to write to.
|
|
78
|
+
* @returns The updated `AgentJudge`, or `null` if the project is unresolved.
|
|
79
|
+
*/
|
|
80
|
+
update(options: {
|
|
81
|
+
judgeId: string;
|
|
82
|
+
prompt?: string;
|
|
83
|
+
model?: string;
|
|
84
|
+
scoreType?: ScoreType;
|
|
85
|
+
description?: string;
|
|
86
|
+
judgeDescription?: string;
|
|
87
|
+
categories?: {
|
|
88
|
+
name: string;
|
|
89
|
+
description: string;
|
|
90
|
+
}[];
|
|
91
|
+
minScore?: number;
|
|
92
|
+
maxScore?: number;
|
|
93
|
+
sourceMajorVersion?: number;
|
|
94
|
+
sourceMinorVersion?: number;
|
|
95
|
+
targetMajorVersion?: number;
|
|
96
|
+
targetMinorVersion?: number;
|
|
97
|
+
}): Promise<AgentJudge | null>;
|
|
98
|
+
private _expectProjectId;
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=AgentJudgeFactory.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AgentJudgeFactory.d.ts","sourceRoot":"","sources":["../../src/agent-judges/AgentJudgeFactory.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAKhE,OAAO,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAE1D;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAgB;IAC3C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAGpC,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAOrB;;;;;;;;;;;;;OAaG;IACG,MAAM,CAAC,OAAO,EAAE;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,SAAS,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,UAAU,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,WAAW,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAwC9B;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACG,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,SAAS,CAAC,EAAE,SAAS,CAAC;QACtB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,UAAU,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,WAAW,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QACrD,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IAkC9B,OAAO,CAAC,gBAAgB;CASzB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agent-judges/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,YAAY,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DatasetFactory.d.ts","sourceRoot":"","sources":["../../src/datasets/DatasetFactory.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,OAAO,EAAoB,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"DatasetFactory.d.ts","sourceRoot":"","sources":["../../src/datasets/DatasetFactory.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,OAAO,EAAoB,MAAM,iBAAiB,CAAC;AAE5D,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC;;;;;;;;;;GAUG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAgB;IAC3C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAGpC,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAOrB;;;;;OAKG;IACG,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC;IAyBhD;;;;;;;;OAQG;IACG,MAAM,CACV,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE;QACP,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;QACrB,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;KACf,GACL,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC;IA4B1B;;;;OAIG;IACH,IAAI,IAAI,OAAO,CAAC,WAAW,EAAE,GAAG,IAAI,CAAC;IAOrC,OAAO,CAAC,gBAAgB;CASzB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Evaluation.d.ts","sourceRoot":"","sources":["../../src/evaluation/Evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"Evaluation.d.ts","sourceRoot":"","sources":["../../src/evaluation/Evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAKxC,MAAM,WAAW,oBAAoB;IACnC,gCAAgC;IAChC,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB;;;OAGG;IACH,OAAO,EAAE,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;IAC5B,qDAAqD;IACrD,WAAW,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuB;IAC9C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAG9C,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAMrB;;;;;;;;OAQG;IACH,GAAG,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;CA+C7D"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"EvaluatorRunner.d.ts","sourceRoot":"","sources":["../../src/evaluation/EvaluatorRunner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,6CAA6C,CAAC;AACxF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"EvaluatorRunner.d.ts","sourceRoot":"","sources":["../../src/evaluation/EvaluatorRunner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,6CAA6C,CAAC;AACxF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAK7C;;;;;GAKG;AACH,8BAAsB,eAAe,CAAC,CAAC,SAAS,MAAM,GAAG,KAAK;IAC5D,SAAS,CAAC,QAAQ,CAAC,OAAO,EAAE,iBAAiB,CAAC;IAC9C,SAAS,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAS,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;gBAGtC,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAOrB,SAAS,CAAC,QAAQ,CAAC,aAAa,CAC9B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,CAAC,EAAE,GACX,oBAAoB;IAEvB,SAAS,CAAC,QAAQ,CAAC,OAAO,CACxB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,CAAC,EAAE,EACZ,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;cAEF,KAAK,CACnB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC;QAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IA4BzD,SAAS,CAAC,eAAe,CACvB,QAAQ,EAAE,OAAO,EAAE,EACnB,WAAW,EAAE,iBAAiB,EAAE,EAChC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,OAAO,GAClB,aAAa,EAAE;IAsEZ,GAAG,CACP,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,CAAC,EAAE,EACZ,WAAW,EAAE,MAAM,EACnB,UAAU,GAAE,OAAe,EAC3B,cAAc,GAAE,MAAY,GAC3B,OAAO,CAAC,aAAa,EAAE,CAAC;CA8C5B"}
|
package/dist/index.cjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
var zq=Object.create;var{getPrototypeOf:
|
|
2
|
-
`)}static debug(R){
|
|
3
|
-
${$.stack}`:"";return C.error(`[Caught] An exception was raised in ${R}: ${String($)}${m}`),E}}var O=W(()=>{y()});var lR=W(()=>{O()});function v(R,q={}){let{pre:E,post:$,error:m,finally:P}=q;return async function(...D){let f=E?Y("immutableWrapAsync.pre",()=>E(...D)):void 0,V;try{let Q=await R.apply(this,D);if($)V=Y("immutableWrapAsync.post",()=>$(f,Q,D));return Q}catch(Q){if(m)V=Y("immutableWrapAsync.error",()=>m(f,Q,D));throw Q}finally{if(P)Y("immutableWrapAsync.finally",()=>{P(V)})}}}var sR=W(()=>{O()});var iR=W(()=>{O()});function UR(R,q={}){let{pre:E,yield:$,post:m,error:P,finally:F}=q;return async function*(...f){let V=E?Y("immutableWrapAsyncIterator.pre",()=>E(...f)):void 0,Q;try{for await(let H of R(...f)){if($)Y("immutableWrapAsyncIterator.yield",()=>{$(V,H)});yield H}if(m)Q=Y("immutableWrapAsyncIterator.post",()=>m(V))}catch(H){if(P)Q=Y("immutableWrapAsyncIterator.error",()=>P(V,H));throw H}finally{if(F)Y("immutableWrapAsyncIterator.finally",()=>{F(Q)})}}}var FR=W(()=>{O()});function i(R,q){let E=R[Symbol.asyncIterator].bind(R),$=UR(()=>({[Symbol.asyncIterator]:E}),{yield:(m,P)=>{q.onYield(P)},post:()=>{q.onDone()},error:(m,P)=>{q.onError(P)},finally:()=>{q.onFinally()}});R[Symbol.asyncIterator]=()=>$()}var nR=W(()=>{FR()});var V1=W(()=>{lR();sR();iR();FR();nR()});function W1(R,q){Y("recordChatUsage",()=>{let E=q.prompt_tokens_details?.cached_tokens??0,$=q.prompt_tokens+q.completion_tokens+E;U.recordLLMMetadata({non_cached_input_tokens:$>q.total_tokens?q.prompt_tokens-E:q.prompt_tokens,output_tokens:q.completion_tokens||void 0,cache_read_input_tokens:E||void 0},R),U.setAttribute("judgment.usage.metadata",h(q),R)})}var fR=W(()=>{E1();A();O();L()});function gR(R){R.chat.completions.parse=v(R.chat.completions.parse.bind(R.chat.completions),{pre:(q)=>{let E=U.startSpan("OPENAI_API_CALL");return U.setSpanKind("llm",E),U.recordLLMMetadata({model:q.model},E),U.setInput(q,E),E},post:(q,E)=>{if(!q)return;if(U.setOutput(h(E),q),E.usage)W1(q,E.usage);return U.recordLLMMetadata({model:E.model},q),q},error:(q,E)=>{if(q)U.setError(E,q);return q},finally:(q)=>{q?.end()}})}var cR=W(()=>{A();L();V1();fR()});function tR(R){R.chat.completions.create=v(R.chat.completions.create.bind(R.chat.completions),{pre:(q)=>{if(q.stream)q.stream_options??={include_usage:!0};let E=U.startSpan("OPENAI_API_CALL");return U.setSpanKind("llm",E),U.recordLLMMetadata({model:q.model},E),U.setInput(q,E),{span:E,proxied:!1}},post:(q,E,$)=>{if(!q)return;let{span:m}=q;if($[0].stream){let F=E,D="";return i(F,{onYield(f){if(typeof f.choices[0]?.delta.content==="string")D+=f.choices[0].delta.content;if(f.usage)W1(m,f.usage)},onDone(){U.setOutput(D,m)},onError(f){U.setError(f,m)},onFinally(){m.end()}}),{span:m,proxied:!0}}let P=E;if(U.setOutput(h(P),m),P.usage)W1(m,P.usage);return U.recordLLMMetadata({model:P.model},m),q},error:(q,E)=>{if(q)U.setError(E,q.span);return q},finally:(q)=>{if(q&&!q.proxied)q.span.end()}})}var rR=W(()=>{A();L();V1();fR()});function eR(R,q){Y("images.recordUsage",()=>{let E="input_tokens_details"in q?q.input_tokens_details:void 0,$=E?.image_tokens??0;if(U.recordLLMMetadata({non_cached_input_tokens:E?.text_tokens??0,output_tokens:q.output_tokens||void 0},R),$)U.setAttribute("judgment.usage.non_cached_input_image_tokens",$,R);if(q.output_tokens)U.setAttribute("judgment.usage.output_image_tokens",q.output_tokens,R);U.setAttribute("judgment.usage.metadata",h(q),R)})}function aR(R){R.images.generate=v(R.images.generate.bind(R.images),{pre:(q)=>{let E=U.startSpan("OPENAI_API_CALL");return U.setSpanKind("llm",E),U.recordLLMMetadata({model:q.model},E),U.setInput(q,E),{span:E,proxied:!1}},post:(q,E,$)=>{if(!q)return;let{span:m}=q;if($[0].stream){let F=E,D;return i(F,{onYield(f){if(lq.has(f.type))D=f,eR(m,D.usage)},onDone(){U.setOutput(h(D??{}),m)},onError(f){U.setError(f,m)},onFinally(){m.end()}}),{span:m,proxied:!0}}let P=E;if(U.setOutput(h(P),m),P.usage)eR(m,P.usage);return q},error:(q,E)=>{if(q)U.setError(E,q.span);return q},finally:(q)=>{if(q&&!q.proxied)q.span.end()}})}var lq;var Rq=W(()=>{E1();A();O();L();V1();lq=new Set(["image_generation.completed","image_edit.completed"])});function qq(R,q){Y("responses.recordUsage",()=>{let E=q.input_tokens_details.cached_tokens,$=q.input_tokens+q.output_tokens+E;U.recordLLMMetadata({non_cached_input_tokens:$>q.total_tokens?q.input_tokens-E:q.input_tokens,output_tokens:q.output_tokens||void 0,cache_read_input_tokens:E||void 0},R),U.setAttribute("judgment.usage.metadata",h(q),R)})}function Eq(R){R.responses.create=v(R.responses.create.bind(R.responses),{pre:(q)=>{let E=U.startSpan("OPENAI_API_CALL");return U.setSpanKind("llm",E),U.recordLLMMetadata({model:q.model},E),U.setInput(q,E),{span:E,proxied:!1}},post:(q,E,$)=>{if(!q)return;let{span:m}=q;if($[0].stream){let F=E,D="";return i(F,{onYield(f){if(f.type==="response.output_text.delta")D+=f.delta;if(f.type==="response.completed"){let V=f.response;if(V.usage)qq(m,V.usage);U.recordLLMMetadata({model:V.model},m)}},onDone(){U.setOutput(D,m)},onError(f){U.setError(f,m)},onFinally(){m.end()}}),{span:m,proxied:!0}}let P=E;if(U.setOutput(h(P),m),P.usage)qq(m,P.usage);if(typeof P.model==="string")U.recordLLMMetadata({model:P.model},m);return q},error:(q,E)=>{if(q)U.setError(E,q.span);return q},finally:(q)=>{if(q&&!q.proxied)q.span.end()}})}var $q=W(()=>{E1();A();O();L();V1()});function B1(R){return Y("wrapOpenAI",()=>{tR(R),gR(R),Eq(R),aR(R)}),R}var mq=W(()=>{O();cR();rR();Rq();$q()});function A1(R){return B1(R)}var DR=W(()=>{mq()});function tq(R){return Function.prototype.toString.call(R)}function rq(R){let q=tq(R).replace(cq,"");return q.match(sq)||q.match(iq)}function Pq(R){let q=rq(R);if(!q||!q[1])return[];return q[1].split(nq).map((E)=>{return E.replace(gq,(m,P,F)=>F).trim()}).filter((E)=>E.length>0)}var sq,iq,nq,gq,cq;var Uq=W(()=>{sq=/^([^(]+?)=>/,iq=/^[^(]*\(\s*([^)]*)\)/m,nq=/,/,gq=/^\s*(_?)(\S+?)\1\s*$/,cq=/((\/\/.*$)|(\/\*[\s\S]*?\*\/))/gm});var Fq="=",T1=";",Q1=",",K1="baggage",fq=180,Dq=4096,Cq=8192;function Wq(R){return R.reduce((q,E)=>{let $=`${q}${q!==""?Q1:""}${E}`;return $.length>Cq?q:$},"")}function Qq(R){return R.getAllEntries().map(([q,E])=>{let $=`${encodeURIComponent(q)}=${encodeURIComponent(E.value)}`;if(E.metadata!==void 0)$+=T1+E.metadata.toString();return $})}function Yq(R){let q=R.split(T1);if(q.length<=0)return;let E=q.shift();if(!E)return;let $=E.indexOf(Fq);if($<=0)return;let m=decodeURIComponent(E.substring(0,$).trim()),P=decodeURIComponent(E.substring($+1).trim()),F;if(q.length>0)F=Vq.baggageEntryMetadataFromString(q.join(T1));return{key:m,value:P,metadata:F}}var Vq;var Hq=W(()=>{Vq=require("@opentelemetry/api")});class T{inject(R,q,E){let $=K(R);if(!$||Zq.isTracingSuppressed(R))return;let m=Qq($).filter((F)=>F.length<=Dq).slice(0,fq),P=Wq(m);if(P.length>0)E.set(q,K1,P)}extract(R,q,E){let $=E.get(q,K1),m=Array.isArray($)?$.join(Q1):$;if(!m)return R;let P={};if(m.length===0)return R;if(m.split(Q1).forEach((D)=>{let f=Yq(D);if(f){let V={value:f.value};if(f.metadata)V.metadata=f.metadata;P[f.key]=V}}),Object.entries(P).length===0)return R;return H1(R,Y1(P))}fields(){return[K1]}}var Zq;var o1=W(()=>{Z1();Hq();Zq=require("@opentelemetry/core")});var u1={};h1(u1,{setBaggage:()=>H1,getBaggage:()=>K,getActiveBaggage:()=>eq,deleteBaggage:()=>aq,createBaggage:()=>Y1,baggageEntryMetadataFromString:()=>Sq.baggageEntryMetadataFromString,JudgmentBaggagePropagator:()=>T});function K(R){return R.getValue(CR)??void 0}function eq(){return K(X.getInstance().getCurrentContext())}function H1(R,q){return R.setValue(CR,q)}function aq(R){return R.deleteValue(CR)}var S1,Sq,Y1,CR;var Z1=W(()=>{B();o1();S1=require("@opentelemetry/api"),Sq=require("@opentelemetry/api"),Y1=S1.propagation.createBaggage.bind(S1.propagation),CR=S1.createContextKey("baggage")});var l1={};h1(l1,{setGlobalTextmap:()=>RE,inject:()=>qE,getGlobalTextmap:()=>VR,extract:()=>WR});function VR(){return Xq}function RE(R){Xq=R}function jq(R){if(R!==void 0)return R;return X.getInstance().getCurrentContext()}function qE(R,q,E=p1.defaultTextMapSetter){Y("propagation.inject",()=>{VR().inject(jq(q),R,E)})}function WR(R,q,E=p1.defaultTextMapGetter){let $=jq(q);return Y("propagation.extract",()=>VR().extract($,R,E),$)}var p1,d1,Xq;var QR=W(()=>{O();o1();B();p1=require("@opentelemetry/api"),d1=require("@opentelemetry/core"),Xq=new d1.CompositePropagator({propagators:[new d1.W3CTraceContextPropagator,new T]})});class U{projectName;projectId;apiKey;organizationId;apiUrl;environment;serializer;_tracerProvider;_client;_enableMonitoring;supportsLiveInstrumentation=!0;constructor(R,q,E,$,m,P,F,D,f,V){this.projectName=R,this.projectId=q,this.apiKey=E,this.organizationId=$,this.apiUrl=m,this.environment=P,this.serializer=F,this._tracerProvider=D,this._client=f,this._enableMonitoring=V}setActive(){return X.getInstance().setActive(this)}static _getProxyProvider(){return X.getInstance()}static _getSerializer(){return U._getProxyProvider().getActiveTracer()?.serializer??h}static _getCurrentTraceAndSpanId(){let q=U._getProxyProvider().getCurrentSpan();if(!q?.isRecording())return null;let E=q.spanContext();if(!E.traceId||!(E.traceFlags&1))return null;return[E.traceId,E.spanId]}static _emitPartial(){Y("BaseTracer._emitPartial",()=>{let R=U._getProxyProvider().getActiveTracer();if(!R||!R.supportsLiveInstrumentation)return;R.getSpanProcessor().emitPartial()})}static getCurrentSpan(){return U._getProxyProvider().getCurrentSpan()}static async forceFlush(){await U._getProxyProvider().forceFlush()}static async shutdown(){await U._getProxyProvider().shutdown()}static registerOTELInstrumentation(R){Y("BaseTracer.registerOTELInstrumentation",()=>{U._getProxyProvider().addInstrumentation(R)})}static wrap(R){return A1(R)}static getOTELTracer(){return U._getProxyProvider().getTracer(wq)}static startSpan(R,q){let E=U.getOTELTracer().startSpan(R,{attributes:q});return U._emitPartial(),E}static startActiveSpan(R,q){let{name:E,attributes:$}=R;return U.getOTELTracer().startActiveSpan(E,{attributes:$},(m)=>{U._emitPartial();try{let P=q(m);if(P instanceof Promise)return P.finally(()=>{m.end()});return m.end(),P}catch(P){throw m.end(),P}})}static span(R,q){return U.startActiveSpan({name:R},(E)=>{try{let $=q(E);if($ instanceof Promise)return $.catch((m)=>{throw E.setStatus({code:k.SpanStatusCode.ERROR,message:String(m)}),E.recordException(m),m});return $}catch($){throw E.setStatus({code:k.SpanStatusCode.ERROR,message:String($)}),E.recordException($),$}})}static with(R,q){return U.span(R,q)}static continueTrace(R,q){let E=U._getProxyProvider(),$=WR(R);return E.withContext($,()=>q($))}static observe(R,q){let E;if(typeof R==="function")E=R;else q=R;let{spanType:$="span",spanName:m,recordInput:P=!0,recordOutput:F=!0,fork:D=!1}=q??{},f=U._getProxyProvider(),V=(Q)=>{let H=m??Q.name;return function(...S){let b=f.getTracer(wq);if(D&&f.getActiveTracer()!==null&&f.getCurrentSpan()?.isRecording()===!0){let w=U._getSerializer(),j=b.startSpan(H),I=j.spanContext();if($)j.setAttribute("judgment.span_kind",$);let hR={"judgment.link.source_trace_id":I.traceId,"judgment.link.source_span_id":I.spanId};if($)hR["judgment.span_kind"]=$;let Jq=f.setSpan(f.getCurrentContext(),f.wrapSpanContext(k.INVALID_SPAN_CONTEXT)),g=b.startSpan(H,{attributes:hR},Jq),xR=g.spanContext();j.setAttribute("judgment.link.target_trace_id",xR.traceId),j.setAttribute("judgment.link.target_span_id",xR.spanId);let t1=()=>{g.end(),j.end()},MR=(x)=>{for(let J of[g,j])J.recordException(x),J.setStatus({code:k.SpanStatusCode.ERROR,message:String(x)})},GR=(x)=>{let J=s(x,w);g.setAttribute("judgment.output",J),j.setAttribute("judgment.output",J)};if(P){let x=s(hq(Q,S),w);g.setAttribute("judgment.input",x),j.setAttribute("judgment.input",x)}return U._emitPartial(),f.useSpan(g,!1,!1,!1,()=>{try{let x=Q.call(this,...S);if(x instanceof Promise)return x.then((J)=>{if(F)GR(J);return J}).catch((J)=>{throw MR(J),J}).finally(t1);if(F)GR(x);return t1(),x}catch(x){throw MR(x),t1(),x}})}return b.startActiveSpan(H,(w)=>{if($)w.setAttribute("judgment.span_kind",$);try{if(P)w.setAttribute("judgment.input",s(hq(Q,S),U._getSerializer()));U._emitPartial();let j=Q.call(this,...S);if(j instanceof Promise)return j.then((I)=>{if(F)w.setAttribute("judgment.output",s(I,U._getSerializer()));return I}).catch((I)=>{throw w.recordException(I),w.setStatus({code:k.SpanStatusCode.ERROR,message:String(I)}),I}).finally(()=>{w.end()});if(F)w.setAttribute("judgment.output",s(j,U._getSerializer()));return w.end(),j}catch(j){throw w.recordException(j),w.setStatus({code:k.SpanStatusCode.ERROR,message:String(j)}),w.end(),j}})}};if(!E)return V;return V(E)}static _resolveSpan(R){if(R)return R;return U._getProxyProvider().getCurrentSpan()}static setSpanKind(R,q){Y("BaseTracer.setSpanKind",()=>{if(!R)return;let E=U._resolveSpan(q);if(E?.isRecording())E.setAttribute("judgment.span_kind",R)})}static setLLMSpan(){U.setSpanKind("llm")}static setToolSpan(){U.setSpanKind("tool")}static setGeneralSpan(){U.setSpanKind("span")}static setAttribute(R,q,E){Y("BaseTracer.setAttribute",()=>{let $=U._resolveSpan(E);if(!$?.isRecording())return;if(!R||q==null)return;$.setAttribute(R,s(q,U._getSerializer()))})}static setAttributes(R,q){for(let[E,$]of Object.entries(R))if(q)U.setAttribute(E,$,q);else U.setAttribute(E,$)}static setInput(R,q){if(q)U.setAttribute("judgment.input",R,q);else U.setAttribute("judgment.input",R)}static setOutput(R,q){if(q)U.setAttribute("judgment.output",R,q);else U.setAttribute("judgment.output",R)}static setError(R,q){Y("BaseTracer.setError",()=>{let E=U._resolveSpan(q);if(!E?.isRecording())return;E.recordException(R),E.setStatus({code:k.SpanStatusCode.ERROR,message:String(R)})})}static recordLLMMetadata(R,q){Y("BaseTracer.recordLLMMetadata",()=>{let E=U._resolveSpan(q);if(!E?.isRecording())return;if(typeof R.model==="string")E.setAttribute("judgment.llm.model",R.model);if(typeof R.provider==="string")E.setAttribute("judgment.llm.provider",R.provider);if(typeof R.non_cached_input_tokens==="number")E.setAttribute("judgment.usage.non_cached_input_tokens",R.non_cached_input_tokens);if(typeof R.output_tokens==="number")E.setAttribute("judgment.usage.output_tokens",R.output_tokens);if(typeof R.cache_read_input_tokens==="number")E.setAttribute("judgment.usage.cache_read_input_tokens",R.cache_read_input_tokens);if(typeof R.cache_creation_input_tokens==="number")E.setAttribute("judgment.usage.cache_creation_input_tokens",R.cache_creation_input_tokens);if(typeof R.total_cost_usd==="number")E.setAttribute("judgment.usage.total_cost_usd",R.total_cost_usd)})}static _setPropagatingBaggageKey(R,q){Y("BaseTracer._setPropagatingBaggageKey",()=>{let E=U._getProxyProvider(),$=E.getCurrentSpan();if(!$?.isRecording())return;$.setAttribute(R,q);let m=E.getCurrentContext(),P=(K(m)??Y1()).setEntry(R,{value:q});E.attachContext(H1(m,P))})}static setCustomerId(R){U._setPropagatingBaggageKey("judgment.customer_id",R)}static setCustomerUserId(R){U._setPropagatingBaggageKey("judgment.customer_user_id",R)}static setSessionId(R){U._setPropagatingBaggageKey("judgment.session_id",R)}static tag(R){Y("BaseTracer.tag",()=>{if(!R||Array.isArray(R)&&R.length===0)return;let E=U._getProxyProvider().getActiveTracer();if(!E?.projectId||!E._client)return;if(!E.supportsLiveInstrumentation)return;let $=U._getCurrentTraceAndSpanId();if(!$)return;let[m]=$,P=Array.isArray(R)?R:[R];E._client.postV1projectsTracesByTraceIdTags(E.projectId,m,{tags:P}).catch((F)=>{C.error(`tag failed: ${String(F)}`)})})}static asyncEvaluate(R,q){Y("BaseTracer.asyncEvaluate",()=>{let{judge:E,example:$}=R,P=U._getProxyProvider().getActiveTracer();if(!P?.projectId)return;if(!P.supportsLiveInstrumentation)return;let F=U._resolveSpan(q);if(!F?.isRecording())return;let D=P.getSpanProcessor(),f=F.spanContext(),V=D.stateIncr(f,"pending_evals_count"),Q={project_id:P.projectId,eval_name:`async_evaluate_${E}_${V}`,judges:[{name:E}],examples:[{...$,example_id:xq.randomUUID(),created_at:new Date().toISOString(),trace_id:f.traceId,span_id:f.spanId}],is_offline:!1,is_behavior:!1},H=D.stateAppend(f,"pending_evals",Q);F.setAttribute("judgment.pending_trace_eval",JSON.stringify(H))})}}function hq(R,q){try{let E=Pq(R).map((m)=>m.replace(/^\.\.\./,"").split("=")[0].trim()).filter((m)=>m.length>0),$={};return E.forEach((m,P)=>{if(P<q.length)$[m]=q[P]}),$}catch{return{}}}var k,xq,wq="judgeval";var A=W(()=>{E1();DR();Uq();O();y();L();Z1();B();QR();k=require("@opentelemetry/api"),xq=require("crypto")});class N{_delegate;constructor(R,q,E,$){this._delegate=new Mq.OTLPTraceExporter({url:R,headers:{Authorization:`Bearer ${q}`,"X-Organization-Id":E,"X-Project-Id":$}})}export(R,q){C.info(`Exported ${R.length} spans`),this._delegate?.export(R,q)}shutdown(){return this._delegate?.shutdown()??Promise.resolve()}forceFlush(){return this._delegate?.forceFlush()??Promise.resolve()}}var Mq;var X1=W(()=>{y();Mq=require("@opentelemetry/exporter-trace-otlp-http")});var Gq,o;var s1=W(()=>{X1();Gq=require("@opentelemetry/core");o=class o extends N{constructor(){super("https://api.judgmentlabs.ai/otel/v1/traces","","","");this._delegate=null}export(R,q){q({code:Gq.ExportResultCode.SUCCESS})}shutdown(){return Promise.resolve()}forceFlush(){return Promise.resolve()}}});class $1{_keyPredicate;constructor(R=i1){this._keyPredicate=R}onStart(R,q){let E=K(q)?.getAllEntries()??[];for(let[$,m]of E)if(this._keyPredicate($))R.setAttribute($,m.value)}onEnd(R){}forceFlush(){return Promise.resolve()}shutdown(){return Promise.resolve()}}var i1=()=>!0;var YR=W(()=>{Z1()});function m1(R){return`${R.traceId}:${R.spanId}`}function $E(R){return R[0]===0&&R[1]===0}var _q,u;var j1=W(()=>{E1();O();B();YR();_q=require("@opentelemetry/sdk-trace-base");u=class u extends _q.BatchSpanProcessor{tracer;_state=new Map;_spanFinalizers;_baggageProcessor;constructor(R,q,E){super(q,E);this.tracer=R,this._spanFinalizers=new FinalizationRegistry(($)=>{this._cleanupSpanState($)}),this._baggageProcessor=new $1}_cleanupSpanState(R){this._state.delete(R)}_registerSpan(R){let q=R.spanContext();if(!q.traceId||!q.spanId)return;let E=m1(q);this._spanFinalizers.register(R,E)}stateSet(R,q,E){let $=m1(R),m=this._state.get($);if(!m)m=new Map,this._state.set($,m);m.set(q,E)}stateGet(R,q,E){let $=m1(R),m=this._state.get($);if(!m?.has(q))return E;return m.get(q)}stateIncr(R,q){let E=m1(R),$=this._state.get(E);if(!$)$=new Map,this._state.set(E,$);let m=$.get(q),P=typeof m==="number"?m:0;return $.set(q,P+1),P}stateAppend(R,q,E){let $=m1(R),m=this._state.get($);if(!m)m=new Map,this._state.set($,m);let P=m.get(q),F=Array.isArray(P)?[...P,E]:[E];return m.set(q,F),F}_emitSpan(R,q=!1){let E=R.spanContext();if(!E.traceId)return;let $=this.stateIncr(E,"judgment.update_id"),m={...R.attributes,"judgment.update_id":$};if(q)delete m["judgment.pending_trace_eval"];let P=Object.create(R);Object.defineProperty(P,"attributes",{value:m,writable:!1});let F=$E(R.endTime)?R.startTime:R.endTime;Object.defineProperty(P,"endTime",{value:F,writable:!1}),super.onEnd(P)}emitPartial(){Y("JudgmentSpanProcessor.emitPartial",()=>{let q=X.getInstance().getCurrentSpan();if(!q?.isRecording())return;let E=q.spanContext();if(!E.traceId)return;if(this.stateGet(E,"disable_partial_emit",!1))return;this._emitSpan(q,!0)})}onStart(R,q){Y("JudgmentSpanProcessor.onStart",()=>{this._baggageProcessor.onStart(R,q),this._registerSpan(R)})}onEnd(R){Y("JudgmentSpanProcessor.onEnd",()=>{let q=R.spanContext();if(!q.traceId){super.onEnd(R);return}let E=m1(q);try{if(!this.stateGet(q,"cancelled",!1))this._emitSpan(R)}finally{this._cleanupSpanState(E)}})}}});var P1;var HR=W(()=>{s1();j1();P1=class P1 extends u{constructor(){super(null,new o)}onStart(R,q){}onEnd(R){}shutdown(){return Promise.resolve()}forceFlush(){return Promise.resolve()}emitPartial(){}stateSet(R,q,E){}stateGet(R,q,E){return E}stateIncr(R,q){return 0}stateAppend(R,q,E){return[E]}}});var n1,ZR,n;var SR=W(()=>{D1();M1();y();G1();L();qR();A();B();X1();s1();j1();HR();n1=require("@opentelemetry/resources"),ZR=require("@opentelemetry/sdk-trace-node");n=class n extends U{_spanExporter=null;_spanProcessor=null;constructor(R,q,E,$,m,P,F,D,f,V){super(R,q,E,$,m,P,F,D,f,V)}static async init(R={}){let q=R.apiKey??c,E=R.organizationId??t,$=R.apiUrl??r,m=R.projectName??null,P=R.serializer??h,F=!0;if(!m)C.warning("project_name not provided. Tracer will not export spans."),F=!1;if(!q)C.warning("api_key not provided. Tracer will not export spans."),F=!1;if(!E)C.warning("organization_id not provided. Tracer will not export spans."),F=!1;if(!$)C.warning("api_url not provided. Tracer will not export spans."),F=!1;let D=null,f=null;if(F&&m&&q&&E&&$){if(D=new d($,q,E),f=await e(D,m).catch(()=>null),!f)C.warning(`Project '${m}' not found. Tracer will not export spans.`),F=!1}let V={"service.name":m??"unknown","telemetry.sdk.name":"judgeval","telemetry.sdk.version":L1};if(R.environment)V["deployment.environment"]=R.environment;if(R.resourceAttributes)Object.assign(V,R.resourceAttributes);let Q=n1.defaultResource().merge(n1.resourceFromAttributes(V)),H=new ZR.NodeTracerProvider({resource:Q,sampler:R.sampler,spanLimits:R.spanLimits}),S=new n(m,f,q,E,$,R.environment??null,P,H,D,F);if(F){let _=new ZR.NodeTracerProvider({resource:Q,sampler:R.sampler,spanLimits:R.spanLimits,spanProcessors:[S.getSpanProcessor(),...R.spanProcessors??[]]});S._tracerProvider=_}if(X.getInstance().register(S),R.setActive??!0)S.setActive();return S}getSpanExporter(){if(this._spanExporter)return this._spanExporter;if(!this._enableMonitoring||!this.projectId||!this.apiKey||!this.organizationId||!this.apiUrl)this._spanExporter=new o;else{let R=this.apiUrl.endsWith("/")?this.apiUrl+"otel/v1/traces":this.apiUrl+"/otel/v1/traces";this._spanExporter=new N(R,this.apiKey,this.organizationId,this.projectId)}return this._spanExporter}getSpanProcessor(){if(this._spanProcessor)return this._spanProcessor;if(!this._enableMonitoring)this._spanProcessor=new P1;else this._spanProcessor=new u(this,this.getSpanExporter());return this._spanProcessor}}});var U1;var XR=W(()=>{C1();j1();U1=class U1 extends u{_dataset;_exampleFields;_seenTraceIds=new Set;constructor(R,q,E){super(R,q);this._dataset=E.dataset,this._exampleFields={...E.exampleFields??{}}}_maybeCreateExample(R){if(R.parentSpanContext)return;let q=R.spanContext();if(!q?.traceId)return;if(this._seenTraceIds.has(q.traceId))return;this._seenTraceIds.add(q.traceId);let E=G.create({...this._exampleFields,offline_trace_id:q.traceId});this._dataset.push(E)}onEnd(R){try{this._maybeCreateExample(R)}finally{super.onEnd(R)}}}});var bq={};h1(bq,{OfflineTracer:()=>F1});var g1,jR,Oq="otel/v1/offline-traces",F1;var wR=W(()=>{D1();M1();G1();L();qR();B();SR();X1();XR();g1=require("@opentelemetry/resources"),jR=require("@opentelemetry/sdk-trace-node");F1=class F1 extends n{supportsLiveInstrumentation=!1;_offlineApiUrl;_offlineApiKey;_offlineOrganizationId;_offlineProjectId;_dataset;_exampleFields;_offlineSpanExporter=null;_offlineSpanProcessor=null;constructor(R){super(R.projectName,R.projectId,R.apiKey,R.organizationId,R.apiUrl,R.environment,R.serializer,R.tracerProvider,R.client,!0);this._offlineApiUrl=R.apiUrl,this._offlineApiKey=R.apiKey,this._offlineOrganizationId=R.organizationId,this._offlineProjectId=R.projectId,this._dataset=R.dataset,this._exampleFields=R.exampleFields}static async create(R){let q=R.apiKey??c,E=R.organizationId??t,$=R.apiUrl??r,m=R.projectName,P=R.serializer??h;if(!m)throw Error("projectName is required for OfflineTracer");if(!q)throw Error("apiKey is required for OfflineTracer");if(!E)throw Error("organizationId is required for OfflineTracer");if(!$)throw Error("apiUrl is required for OfflineTracer");let F=new d($,q,E),D;try{D=await e(F,m)}catch(b){throw Error(`Project '${m}' not found; cannot start OfflineTracer: ${String(b)}`)}let f={"service.name":m,"telemetry.sdk.name":"judgeval","telemetry.sdk.version":L1,"judgment.offline":"true"};if(R.environment)f["deployment.environment"]=R.environment;if(R.resourceAttributes)Object.assign(f,R.resourceAttributes);let V=g1.defaultResource().merge(g1.resourceFromAttributes(f)),Q=new F1({projectName:m,projectId:D,apiKey:q,organizationId:E,apiUrl:$,environment:R.environment??null,serializer:P,tracerProvider:new jR.NodeTracerProvider({resource:V}),client:F,dataset:R.dataset,exampleFields:{...R.exampleFields??{}}}),H=new jR.NodeTracerProvider({resource:V,sampler:R.sampler,spanLimits:R.spanLimits,spanProcessors:[Q.getSpanProcessor(),...R.spanProcessors??[]]});if(Q._tracerProvider=H,X.getInstance().register(Q),R.setActive??!0)Q.setActive();return Q}getSpanExporter(){if(this._offlineSpanExporter)return this._offlineSpanExporter;let R=this._offlineApiUrl.endsWith("/")?this._offlineApiUrl+Oq:this._offlineApiUrl+"/"+Oq;return this._offlineSpanExporter=new N(R,this._offlineApiKey,this._offlineOrganizationId,this._offlineProjectId),this._offlineSpanExporter}getSpanProcessor(){if(this._offlineSpanProcessor)return this._offlineSpanProcessor;return this._offlineSpanProcessor=new U1(this,this.getSpanExporter(),{dataset:this._dataset,exampleFields:this._exampleFields}),this._offlineSpanProcessor}}});var mE={};h1(mE,{wrapOpenAI:()=>B1,wrap:()=>A1,propagation:()=>l1,baggage:()=>u1,Tracer:()=>n,OfflineTracer:()=>F1,OfflineJudgmentSpanProcessor:()=>U1,NoOpSpanProcessor:()=>P1,NoOpSpanExporter:()=>o,JudgmentTracerProvider:()=>X,JudgmentSpanExporter:()=>N,JudgmentBaggageSpanProcessor:()=>$1,JudgmentBaggagePropagator:()=>T,Judgeval:()=>c1,Judge:()=>a,Example:()=>G,Evaluation:()=>q1,Dataset:()=>l,BaseTracer:()=>U,ALLOW_ALL_BAGGAGE_KEYS:()=>i1});module.exports=Lq(mE);D1();M1();G1();class a{}var a1=w1(b1(),1);var Z=w1(b1(),1),Aq=2000;class R1{_client;_projectId;_projectName;constructor(R,q,E){this._client=R,this._projectId=q,this._projectName=E}async _poll(R,q,E,$){let m=Date.now();while(!0){let P=(Date.now()-m)/1000;if(P>$)throw Error(`Evaluation timed out after ${$}s`);let F=await this._client.getV1projectsExperimentsByRunId(R,q),D=F.results??[];if(D.length>=E){let V=F.ui_results_url??"Failed to get UI results URL";return console.log(`${Z.default.green("✓")} Evals completed and saved in ${Z.default.bold(`${P.toFixed(1)}s`)}`),{results:D,url:V}}await new Promise((V)=>setTimeout(V,Aq))}}_displayResults(R,q,E,$){let m=[],P=0,F=0;console.log();for(let D=0;D<q.length;D++){let f=q[D],V=f.scorers.every((Q)=>Boolean(Q.success));if(V)P++,console.log(`${Z.default.green("✓")} Example ${D+1}: ${Z.default.green("PASSED")}`);else F++,console.log(`${Z.default.red("✗")} Example ${D+1}: ${Z.default.red("FAILED")}`);for(let Q of f.scorers){let H=Q.score!==null?Q.score.toFixed(3):"N/A",S=Q.success?Z.default.green(H):Z.default.red(H);console.log(` ${Z.default.dim(`${Q.name}:`)} ${S} ${Z.default.dim(`(threshold: ${Q.threshold})`)}`)}m.push({success:V,scorers:f.scorers,example:R[D]})}if(console.log(),P===m.length)console.log(`${Z.default.bold(Z.default.green("✓ All tests passed!"))} (${P}/${m.length})`);else console.log(`${Z.default.bold(Z.default.yellow("⚠ Results:"))} ${Z.default.green(`${P} passed`)} | ${Z.default.red(`${F} failed`)}`);if(console.log(`${Z.default.dim("View full details:")} ${Z.default.underline(E)}`),console.log(),$&&m.some((D)=>!D.success)){let D=[`Evaluation failed: ${F}/${m.length} examples failed`];for(let f=0;f<m.length;f++)if(!m[f].success){D.push(` Example ${f+1}:`);for(let V of m[f].scorers)if(!V.success){if(D.push(` ${V.name}: ${V.score!==null?V.score.toFixed(3):"N/A"} (threshold: ${V.threshold})`),V.reason)D.push(` ${V.reason}`)}}throw Error(D.join(`
|
|
4
|
-
`))}return
|
|
1
|
+
var zq=Object.create;var{getPrototypeOf:vq,defineProperty:F1,getOwnPropertyNames:jR,getOwnPropertyDescriptor:Nq}=Object,MR=Object.prototype.hasOwnProperty;var w1=(R,q,E)=>{E=R!=null?zq(vq(R)):{};let $=q||!R||!R.__esModule?F1(E,"default",{value:R,enumerable:!0}):E;for(let f of jR(R))if(!MR.call($,f))F1($,f,{get:()=>R[f],enumerable:!0});return $},_R=new WeakMap,Iq=(R)=>{var q=_R.get(R),E;if(q)return q;if(q=F1({},"__esModule",{value:!0}),R&&typeof R==="object"||typeof R==="function")jR(R).map(($)=>!MR.call(q,$)&&F1(q,$,{get:()=>R[$],enumerable:!(E=Nq(R,$))||E.enumerable}));return _R.set(R,q),q},kq=(R,q)=>()=>(q||R((q={exports:{}}).exports,q),q.exports);var G1=(R,q)=>{for(var E in q)F1(R,E,{get:q[E],enumerable:!0,configurable:!0,set:($)=>q[E]=()=>$})};var V=(R,q)=>()=>(R&&(q=R(R=0)),q);function J1(R,q){let E=process.env[R];if(!E)return q??null;return E}var i,t,r,bR;var C1=V(()=>{i=J1("JUDGMENT_API_KEY"),t=J1("JUDGMENT_ORG_ID"),r=J1("JUDGMENT_API_URL","https://api.judgmentlabs.ai"),bR=J1("JUDGMENT_LOG_LEVEL","warn")});class p{baseUrl;apiKey;organizationId;constructor(R,q,E){this.baseUrl=R,this.apiKey=q,this.organizationId=E}getBaseUrl(){return this.baseUrl}getApiKey(){return this.apiKey}getOrganizationId(){return this.organizationId}async request(R,q,E){let $=await fetch(q,{method:R,headers:{"Content-Type":"application/json",Authorization:`Bearer ${this.apiKey}`,"X-Organization-Id":this.organizationId},body:E!==void 0?JSON.stringify(E):void 0});if(!$.ok){let f=await $.text();throw Error(`HTTP ${$.status}: ${f}`)}return $.json()}async postOtelV1traces(){let R=this.baseUrl+"/otel/v1/traces";return this.request("POST",R,{})}async postOtelV1offlineTraces(){let R=this.baseUrl+"/otel/v1/offline-traces";return this.request("POST",R,{})}async postV1projectsResolve(R){let q=this.baseUrl+"/v1/projects/resolve/";return this.request("POST",q,R)}async postV1projects(R){let q=this.baseUrl+"/v1/projects";return this.request("POST",q,R)}async deleteV1projects(R){let q=this.baseUrl+`/v1/projects/${R}`;return this.request("DELETE",q,{})}async postV1projectsDatasets(R,q){let E=this.baseUrl+`/v1/projects/${R}/datasets`;return this.request("POST",E,q)}async getV1projectsDatasets(R){let q=this.baseUrl+`/v1/projects/${R}/datasets`;return this.request("GET",q,void 0)}async postV1projectsDatasetsByDatasetNameExamples(R,q,E){let $=this.baseUrl+`/v1/projects/${R}/datasets/${q}/examples`;return this.request("POST",$,E)}async getV1projectsDatasetsByDatasetName(R,q){let E=this.baseUrl+`/v1/projects/${R}/datasets/${q}`;return this.request("GET",E,void 0)}async postV1projectsEvaluateExamples(R,q){let E=this.baseUrl+`/v1/projects/${R}/evaluate/examples`;return this.request("POST",E,q)}async postV1projectsEvaluateTraces(R,q){let E=this.baseUrl+`/v1/projects/${R}/evaluate/traces`;return this.request("POST",E,q)}async postV1projectsEvalResults(R,q){let E=this.baseUrl+`/v1/projects/${R}/eval-results`;return this.request("POST",E,q)}async postV1projectsEvalResultsExamples(R,q){let E=this.baseUrl+`/v1/projects/${R}/eval-results/examples`;return this.request("POST",E,q)}async getV1projectsExperimentsByRunId(R,q){let E=this.baseUrl+`/v1/projects/${R}/experiments/${q}`;return this.request("GET",E,void 0)}async postV1projectsEvalQueueExamples(R,q){let E=this.baseUrl+`/v1/projects/${R}/eval-queue/examples`;return this.request("POST",E,q)}async postV1projectsEvalQueueTraces(R,q){let E=this.baseUrl+`/v1/projects/${R}/eval-queue/traces`;return this.request("POST",E,q)}async getV1projectsPromptsByName(R,q,E,$){let f=new URLSearchParams;if(E!==void 0)f.set("commit_id",E);if($!==void 0)f.set("tag",$);let P=this.baseUrl+`/v1/projects/${R}/prompts/${q}`+(f.toString()?"?"+f.toString():"");return this.request("GET",P,void 0)}async postV1projectsPrompts(R,q){let E=this.baseUrl+`/v1/projects/${R}/prompts`;return this.request("POST",E,q)}async postV1projectsPromptsByNameTags(R,q,E){let $=this.baseUrl+`/v1/projects/${R}/prompts/${q}/tags`;return this.request("POST",$,E)}async deleteV1projectsPromptsByNameTags(R,q,E){let $=this.baseUrl+`/v1/projects/${R}/prompts/${q}/tags`;return this.request("DELETE",$,E)}async getV1projectsPromptsByNameVersions(R,q){let E=this.baseUrl+`/v1/projects/${R}/prompts/${q}/versions`;return this.request("GET",E,void 0)}async getV1projectsScorers(R,q,E){let $=new URLSearchParams;if(q!==void 0)$.set("names",q);if(E!==void 0)$.set("is_trace",E);let f=this.baseUrl+`/v1/projects/${R}/scorers`+($.toString()?"?"+$.toString():"");return this.request("GET",f,void 0)}async getV1projectsScorersByNameExists(R,q){let E=this.baseUrl+`/v1/projects/${R}/scorers/${q}/exists`;return this.request("GET",E,void 0)}async postV1projectsScorersCustom(R){let q=this.baseUrl+`/v1/projects/${R}/scorers/custom`;return this.request("POST",q,{})}async postV1projectsScorersCustomBundle(R){let q=this.baseUrl+`/v1/projects/${R}/scorers/custom/bundle`;return this.request("POST",q,{})}async getV1projectsScorersCustomByNameExists(R,q){let E=this.baseUrl+`/v1/projects/${R}/scorers/custom/${q}/exists`;return this.request("GET",E,void 0)}async postV1projectsTracesByTraceIdTags(R,q,E){let $=this.baseUrl+`/v1/projects/${R}/traces/${q}/tags`;return this.request("POST",$,E)}async getV1e2eFetchTraceByProjectNameByTraceId(R,q){let E=this.baseUrl+`/v1/e2e_fetch_trace/${R}/${q}`;return this.request("GET",E,void 0)}async getV1e2eTracesPerProject(R,q,E){let $=new URLSearchParams;if(q!==void 0)$.set("limit",q);if(E!==void 0)$.set("offset",E);let f=this.baseUrl+`/v1/e2e_traces_per_project/${R}`+($.toString()?"?"+$.toString():"");return this.request("GET",f,void 0)}async postV1e2eFetchSpanScore(R){let q=this.baseUrl+"/v1/e2e_fetch_span_score/";return this.request("POST",q,R)}async postV1projectsJudges(R,q){let E=this.baseUrl+`/v1/projects/${R}/judges`;return this.request("POST",E,q)}async patchV1projectsJudgesByJudgeId(R,q,E){let $=this.baseUrl+`/v1/projects/${R}/judges/${q}`;return this.request("PATCH",$,E)}}var O1=()=>{};var F;var O=V(()=>{C1();F=class F{static RESET="\x1B[0m";static RED="\x1B[31m";static YELLOW="\x1B[33m";static GRAY="\x1B[90m";static Level={DEBUG:0,INFO:1,WARNING:2,ERROR:3,CRITICAL:4};static initialized=!1;static levelSetManually=!1;static currentLevel=F.Level.WARNING;static useColor=!0;static initialize(){if(!F.initialized){let R=process.env.JUDGMENT_NO_COLOR;if(F.useColor=!R&&process.stdout.isTTY,!F.levelSetManually){let q=bR.toLowerCase();if(q){let E={debug:F.Level.DEBUG,info:F.Level.INFO,warning:F.Level.WARNING,warn:F.Level.WARNING,error:F.Level.ERROR,critical:F.Level.CRITICAL};F.currentLevel=E[q]??F.Level.WARNING}}F.initialized=!0}}static setLevel(R){F.currentLevel=R,F.levelSetManually=!0}static setUseColor(R){F.useColor=R}static log(R,q){if(F.initialize(),R<F.currentLevel)return;let E=new Date().toISOString().replace("T"," ").substring(0,19),$=Object.keys(F.Level).find((U)=>F.Level[U]===R)??"UNKNOWN",f=`${E} - judgeval - ${$} - ${q}`;if(F.useColor)f=`${R===F.Level.DEBUG||R===F.Level.INFO?F.GRAY:R===F.Level.WARNING?F.YELLOW:F.RED}${f}${F.RESET}`;(R>=F.Level.ERROR?process.stderr:process.stdout).write(f+`
|
|
2
|
+
`)}static debug(R){F.log(F.Level.DEBUG,R)}static info(R){F.log(F.Level.INFO,R)}static warning(R){F.log(F.Level.WARNING,R)}static warn(R){F.log(F.Level.WARNING,R)}static error(R){F.log(F.Level.ERROR,R)}static critical(R){F.log(F.Level.CRITICAL,R)}}});async function zR(R,q={}){let{maxRetries:E=3,backoff:$=()=>1000,onRetry:f}=q;for(let P=1;P<=E;P++)try{return await R()}catch(U){if(P===E)throw U;f?.(P,U),await new Promise((C)=>setTimeout(C,$(P)))}throw Error("retry: exhausted all attempts")}async function e(R,q){let E=`org:${R.getOrganizationId()}:project:${q}`,$=vR.get(E);if($)return $;let f=e1.get(E);if(f)return f;let P=(async()=>{F.info(`Resolving project ID for project: ${q}`);let U=await zR(async()=>{let D=(await R.postV1projectsResolve({project_name:q})).project_id;if(!D)throw Error(`Project ID not found for project: ${q}`);return D},{maxRetries:3,backoff:(C)=>C*1000,onRetry:(C,D)=>{F.warning(`Failed to resolve project ID for '${q}' (attempt ${C}): ${String(D)}`)}});return F.info(`Resolved project ID: ${U}`),vR.set(E,U),U})();e1.set(E,P);try{return await P}finally{e1.delete(E)}}var vR,e1;var _1=V(()=>{O();vR=new Map,e1=new Map});var b1=kq((hE,a1)=>{var M1=process||{},NR=M1.argv||[],j1=M1.env||{},Lq=!(!!j1.NO_COLOR||NR.includes("--no-color"))&&(!!j1.FORCE_COLOR||NR.includes("--color")||M1.platform==="win32"||(M1.stdout||{}).isTTY&&j1.TERM!=="dumb"||!!j1.CI),yq=(R,q,E=R)=>($)=>{let f=""+$,P=f.indexOf(q,R.length);return~P?R+Aq(f,q,E,P)+q:R+f+q},Aq=(R,q,E,$)=>{let f="",P=0;do f+=R.substring(P,$)+E,P=$+q.length,$=R.indexOf(q,P);while(~$);return f+R.substring(P)},IR=(R=Lq)=>{let q=R?yq:()=>String;return{isColorSupported:R,reset:q("\x1B[0m","\x1B[0m"),bold:q("\x1B[1m","\x1B[22m","\x1B[22m\x1B[1m"),dim:q("\x1B[2m","\x1B[22m","\x1B[22m\x1B[2m"),italic:q("\x1B[3m","\x1B[23m"),underline:q("\x1B[4m","\x1B[24m"),inverse:q("\x1B[7m","\x1B[27m"),hidden:q("\x1B[8m","\x1B[28m"),strikethrough:q("\x1B[9m","\x1B[29m"),black:q("\x1B[30m","\x1B[39m"),red:q("\x1B[31m","\x1B[39m"),green:q("\x1B[32m","\x1B[39m"),yellow:q("\x1B[33m","\x1B[39m"),blue:q("\x1B[34m","\x1B[39m"),magenta:q("\x1B[35m","\x1B[39m"),cyan:q("\x1B[36m","\x1B[39m"),white:q("\x1B[37m","\x1B[39m"),gray:q("\x1B[90m","\x1B[39m"),bgBlack:q("\x1B[40m","\x1B[49m"),bgRed:q("\x1B[41m","\x1B[49m"),bgGreen:q("\x1B[42m","\x1B[49m"),bgYellow:q("\x1B[43m","\x1B[49m"),bgBlue:q("\x1B[44m","\x1B[49m"),bgMagenta:q("\x1B[45m","\x1B[49m"),bgCyan:q("\x1B[46m","\x1B[49m"),bgWhite:q("\x1B[47m","\x1B[49m"),blackBright:q("\x1B[90m","\x1B[39m"),redBright:q("\x1B[91m","\x1B[39m"),greenBright:q("\x1B[92m","\x1B[39m"),yellowBright:q("\x1B[93m","\x1B[39m"),blueBright:q("\x1B[94m","\x1B[39m"),magentaBright:q("\x1B[95m","\x1B[39m"),cyanBright:q("\x1B[96m","\x1B[39m"),whiteBright:q("\x1B[97m","\x1B[39m"),bgBlackBright:q("\x1B[100m","\x1B[49m"),bgRedBright:q("\x1B[101m","\x1B[49m"),bgGreenBright:q("\x1B[102m","\x1B[49m"),bgYellowBright:q("\x1B[103m","\x1B[49m"),bgBlueBright:q("\x1B[104m","\x1B[49m"),bgMagentaBright:q("\x1B[105m","\x1B[49m"),bgCyanBright:q("\x1B[106m","\x1B[49m"),bgWhiteBright:q("\x1B[107m","\x1B[49m")}};a1.exports=IR();a1.exports.createColors=IR});var _;var S1=V(()=>{_=class _{exampleId;createdAt;name;_properties;constructor(R,q,E,$){this.exampleId=R,this.createdAt=q,this.name=E,this._properties=$}static create(R={}){return new _(crypto.randomUUID(),new Date().toISOString(),null,{...R})}static META_KEYS=new Set(["example_id","created_at","name","trace_id","span_id","offline_trace_id"]);static from(R){let q={};for(let E of Object.keys(R))if(!_.META_KEYS.has(E))q[E]=R[E];return new _(R.example_id??"",R.created_at??"",R.name??null,q)}get(R){return this._properties[R]}has(R){return R in this._properties}get properties(){return{...this._properties}}toJSON(){let R={example_id:this.exampleId,created_at:this.createdAt,name:this.name};for(let[q,E]of Object.entries(this._properties))R[q]=E;return R}}});function Tq(){let R=new WeakSet;return function(q,E){if(typeof E==="bigint")return E.toString();if(typeof E==="object"&&E!==null){if(R.has(E))return"[Circular]";R.add(E)}return E}}function w(R){try{let q=JSON.stringify(R);if(typeof q==="string")return q;return String(q)}catch{try{let q=JSON.stringify(R,Tq());return typeof q==="string"?q:String(R)}catch(q){return F.error(`safeStringify failed: ${q}`),String(R)}}}function s(R,q){if(typeof R==="string"||typeof R==="number"||typeof R==="boolean")return R;return q(R)}var k=V(()=>{O()});var yR="1.0.1";var LR=()=>{};var k1;var qR=V(()=>{LR();k1=yR});function $R(){return BR.getStore()===!0}function KR(R){if(ER=R,AR)return;let q=v.context;q.active=()=>{if(!$R())return dq();let E=L1.getStore();if(E)return E;return ER?ER():v.ROOT_CONTEXT},q.with=(E,$,f,...P)=>{if(!$R())return lq(E,$,f,...P);return L1.run(E,()=>$.apply(f,P))},q.bind=(E,$)=>{if(!$R())return pq(E,$);if(typeof $!=="function")return $;let f=$;return(...P)=>L1.run(E,()=>f(...P))},AR=!0}function PR(R,q){return BR.run(!0,()=>L1.run(R,q))}var v,fR,AR=!1,ER=null,BR,L1,dq,lq,pq;var TR=V(()=>{v=require("@opentelemetry/api"),fR=require("async_hooks"),BR=new fR.AsyncLocalStorage,L1=new fR.AsyncLocalStorage,dq=v.context.active.bind(v.context),lq=v.context.with.bind(v.context),pq=v.context.bind.bind(v.context)});class lR{_provider;constructor(R){this._provider=R}startSpan(R,q,E){let $=E??this._provider.getCurrentContext();return this._provider._getDelegateTracer().startSpan(R,q,$)}startActiveSpan(R,...q){let E={},$=this._provider.getCurrentContext(),f;if(q.length===1)f=q[0];else if(q.length===2)E=q[0],f=q[1];else E=q[0],$=q[1],f=q[2];let P=this.startSpan(R,E,$);return this._provider.useSpan(P,!1,!1,!1,()=>f(P))}}class pR{startSpan(){return J.trace.wrapSpanContext(J.INVALID_SPAN_CONTEXT)}startActiveSpan(R,...q){return(q.length===1?q[0]:q.length===2?q[1]:q[2])(this.startSpan())}}class X{static _instance=null;_activeTracer=null;_instrumentations=[];_noOpTracer;_proxyTracer;_tracers=new Set;constructor(){this._noOpTracer=new pR,this._proxyTracer=new lR(this),KR(()=>this.getCurrentContext())}static getInstance(){return X._instance??=new X,X._instance}static installAsGlobalTracerProvider(){let R=X.getInstance();return J.trace.setGlobalTracerProvider(R)}register(R){this._tracers.add(R)}deregister(R){this._tracers.delete(R)}setActive(R){let q=this.getCurrentSpan();if(q?.isRecording()){if(J.trace.getSpan(this.getCurrentContext())===q)return F.error("Cannot set_active() while a root span is active. Keeping existing tracer provider."),!1}return this.register(R),this._activeTracer=R,!0}getActiveTracer(){return this._activeTracer}getCurrentContext(){return y1.getStore()??J.ROOT_CONTEXT}setSpan(R,q){return J.trace.setSpan(R,q)}wrapSpanContext(R){return J.trace.wrapSpanContext(R)}getCurrentSpan(){let R=this.getCurrentContext();return J.trace.getSpan(R)}hasActiveRootSpan(){if(!this.getCurrentSpan()?.isRecording())return!1;return!0}_getDelegateTracer(){let R=this._activeTracer;if(!R)return F.debug("No active tracer, returning NoOpTracer"),this._noOpTracer;return R._tracerProvider.getTracer(oq)}getTracer(R,q,E){return this._proxyTracer}addInstrumentation(R){try{uR.registerInstrumentations({tracerProvider:this,instrumentations:[R]}),this._instrumentations.push(R)}catch(q){F.error(`Failed to add instrumentation: ${String(q)}`)}}useSpan(R,q,E,$,f){let P=this.getCurrentContext(),U=J.trace.setSpan(P,R);return y1.run(U,()=>PR(U,()=>{try{let C=f();if(C instanceof Promise)return C.catch((D)=>{if(R.isRecording()){if(E)R.recordException(D);if($){let S=D;R.setStatus({code:J.SpanStatusCode.ERROR,message:`${S.name}: ${S.message}`})}}throw D}).finally(()=>{if(q)R.end()});if(q)R.end();return C}catch(C){if(R.isRecording()){if(E)R.recordException(C);if($){let D=C;R.setStatus({code:J.SpanStatusCode.ERROR,message:`${D.name}: ${D.message}`})}}if(q)R.end();throw C}}))}attachContext(R){y1.enterWith(R)}withContext(R,q){return y1.run(R,()=>PR(R,q))}async forceFlush(){let R=await Promise.allSettled(Array.from(this._tracers).map((q)=>q._tracerProvider.forceFlush()));for(let q of R)if(q.status==="rejected")F.error(`forceFlush failed: ${String(q.reason)}`)}async shutdown(){let R=await Promise.allSettled(Array.from(this._tracers).map((q)=>q._tracerProvider.shutdown()));for(let q of R)if(q.status==="rejected")F.error(`shutdown failed: ${String(q.reason)}`);this._activeTracer=null,this._tracers.clear()}}var J,uR,dR,oq="judgeval",y1;var A=V(()=>{O();TR();J=require("@opentelemetry/api"),uR=require("@opentelemetry/instrumentation"),dR=require("async_hooks"),y1=new dR.AsyncLocalStorage});var $1=()=>{};function Q(R,q,E){try{return q()}catch($){let f=$ instanceof Error&&$.stack?`
|
|
3
|
+
${$.stack}`:"";return F.error(`[Caught] An exception was raised in ${R}: ${String($)}${f}`),E}}var M=V(()=>{O()});var oR=V(()=>{M()});function L(R,q={}){let{pre:E,post:$,error:f,finally:P}=q;return async function(...C){let D=E?Q("immutableWrapAsync.pre",()=>E(...C)):void 0,S;try{let W=await R.apply(this,C);if($)S=Q("immutableWrapAsync.post",()=>$(D,W,C));return W}catch(W){if(f)S=Q("immutableWrapAsync.error",()=>f(D,W,C));throw W}finally{if(P)Q("immutableWrapAsync.finally",()=>{P(S)})}}}var sR=V(()=>{M()});var gR=V(()=>{M()});function mR(R,q={}){let{pre:E,yield:$,post:f,error:P,finally:U}=q;return async function*(...D){let S=E?Q("immutableWrapAsyncIterator.pre",()=>E(...D)):void 0,W;try{for await(let Y of R(...D)){if($)Q("immutableWrapAsyncIterator.yield",()=>{$(S,Y)});yield Y}if(f)W=Q("immutableWrapAsyncIterator.post",()=>f(S))}catch(Y){if(P)W=Q("immutableWrapAsyncIterator.error",()=>P(S,Y));throw Y}finally{if(U)Q("immutableWrapAsyncIterator.finally",()=>{U(W)})}}}var UR=V(()=>{M()});function g(R,q){let E=R[Symbol.asyncIterator].bind(R),$=mR(()=>({[Symbol.asyncIterator]:E}),{yield:(f,P)=>{q.onYield(P)},post:()=>{q.onDone()},error:(f,P)=>{q.onError(P)},finally:()=>{q.onFinally()}});R[Symbol.asyncIterator]=()=>$()}var cR=V(()=>{UR()});var V1=V(()=>{oR();sR();gR();UR();cR()});function W1(R,q){Q("recordChatUsage",()=>{let E=q.prompt_tokens_details?.cached_tokens??0,$=q.prompt_tokens+q.completion_tokens+E;m.recordLLMMetadata({non_cached_input_tokens:$>q.total_tokens?q.prompt_tokens-E:q.prompt_tokens,output_tokens:q.completion_tokens||void 0,cache_read_input_tokens:E||void 0},R),m.setAttribute("judgment.usage.metadata",w(q),R)})}var DR=V(()=>{$1();B();M();k()});function nR(R){R.chat.completions.parse=L(R.chat.completions.parse.bind(R.chat.completions),{pre:(q)=>{let E=m.startSpan("OPENAI_API_CALL");return m.setSpanKind("llm",E),m.recordLLMMetadata({model:q.model},E),m.setInput(q,E),E},post:(q,E)=>{if(!q)return;if(m.setOutput(w(E),q),E.usage)W1(q,E.usage);return m.recordLLMMetadata({model:E.model},q),q},error:(q,E)=>{if(q)m.setError(E,q);return q},finally:(q)=>{q?.end()}})}var iR=V(()=>{B();k();V1();DR()});function tR(R){R.chat.completions.create=L(R.chat.completions.create.bind(R.chat.completions),{pre:(q)=>{if(q.stream)q.stream_options??={include_usage:!0};let E=m.startSpan("OPENAI_API_CALL");return m.setSpanKind("llm",E),m.recordLLMMetadata({model:q.model},E),m.setInput(q,E),{span:E,proxied:!1}},post:(q,E,$)=>{if(!q)return;let{span:f}=q;if($[0].stream){let U=E,C="";return g(U,{onYield(D){if(typeof D.choices[0]?.delta.content==="string")C+=D.choices[0].delta.content;if(D.usage)W1(f,D.usage)},onDone(){m.setOutput(C,f)},onError(D){m.setError(D,f)},onFinally(){f.end()}}),{span:f,proxied:!0}}let P=E;if(m.setOutput(w(P),f),P.usage)W1(f,P.usage);return m.recordLLMMetadata({model:P.model},f),q},error:(q,E)=>{if(q)m.setError(E,q.span);return q},finally:(q)=>{if(q&&!q.proxied)q.span.end()}})}var rR=V(()=>{B();k();V1();DR()});function eR(R,q){Q("images.recordUsage",()=>{let E="input_tokens_details"in q?q.input_tokens_details:void 0,$=E?.image_tokens??0;if(m.recordLLMMetadata({non_cached_input_tokens:E?.text_tokens??0,output_tokens:q.output_tokens||void 0},R),$)m.setAttribute("judgment.usage.non_cached_input_image_tokens",$,R);if(q.output_tokens)m.setAttribute("judgment.usage.output_image_tokens",q.output_tokens,R);m.setAttribute("judgment.usage.metadata",w(q),R)})}function aR(R){R.images.generate=L(R.images.generate.bind(R.images),{pre:(q)=>{let E=m.startSpan("OPENAI_API_CALL");return m.setSpanKind("llm",E),m.recordLLMMetadata({model:q.model},E),m.setInput(q,E),{span:E,proxied:!1}},post:(q,E,$)=>{if(!q)return;let{span:f}=q;if($[0].stream){let U=E,C;return g(U,{onYield(D){if(sq.has(D.type))C=D,eR(f,C.usage)},onDone(){m.setOutput(w(C??{}),f)},onError(D){m.setError(D,f)},onFinally(){f.end()}}),{span:f,proxied:!0}}let P=E;if(m.setOutput(w(P),f),P.usage)eR(f,P.usage);return q},error:(q,E)=>{if(q)m.setError(E,q.span);return q},finally:(q)=>{if(q&&!q.proxied)q.span.end()}})}var sq;var Rq=V(()=>{$1();B();M();k();V1();sq=new Set(["image_generation.completed","image_edit.completed"])});function qq(R,q){Q("responses.recordUsage",()=>{let E=q.input_tokens_details.cached_tokens,$=q.input_tokens+q.output_tokens+E;m.recordLLMMetadata({non_cached_input_tokens:$>q.total_tokens?q.input_tokens-E:q.input_tokens,output_tokens:q.output_tokens||void 0,cache_read_input_tokens:E||void 0},R),m.setAttribute("judgment.usage.metadata",w(q),R)})}function Eq(R){R.responses.create=L(R.responses.create.bind(R.responses),{pre:(q)=>{let E=m.startSpan("OPENAI_API_CALL");return m.setSpanKind("llm",E),m.recordLLMMetadata({model:q.model},E),m.setInput(q,E),{span:E,proxied:!1}},post:(q,E,$)=>{if(!q)return;let{span:f}=q;if($[0].stream){let U=E,C="";return g(U,{onYield(D){if(D.type==="response.output_text.delta")C+=D.delta;if(D.type==="response.completed"){let S=D.response;if(S.usage)qq(f,S.usage);m.recordLLMMetadata({model:S.model},f)}},onDone(){m.setOutput(C,f)},onError(D){m.setError(D,f)},onFinally(){f.end()}}),{span:f,proxied:!0}}let P=E;if(m.setOutput(w(P),f),P.usage)qq(f,P.usage);if(typeof P.model==="string")m.recordLLMMetadata({model:P.model},f);return q},error:(q,E)=>{if(q)m.setError(E,q.span);return q},finally:(q)=>{if(q&&!q.proxied)q.span.end()}})}var $q=V(()=>{$1();B();M();k();V1()});function B1(R){return Q("wrapOpenAI",()=>{tR(R),nR(R),Eq(R),aR(R)}),R}var fq=V(()=>{M();iR();rR();Rq();$q()});function K1(R){return B1(R)}var FR=V(()=>{fq()});function rq(R){return Function.prototype.toString.call(R)}function eq(R){let q=rq(R).replace(tq,"");return q.match(gq)||q.match(cq)}function Pq(R){let q=eq(R);if(!q||!q[1])return[];return q[1].split(nq).map((E)=>{return E.replace(iq,(f,P,U)=>U).trim()}).filter((E)=>E.length>0)}var gq,cq,nq,iq,tq;var mq=V(()=>{gq=/^([^(]+?)=>/,cq=/^[^(]*\(\s*([^)]*)\)/m,nq=/,/,iq=/^\s*(_?)(\S+?)\1\s*$/,tq=/((\/\/.*$)|(\/\*[\s\S]*?\*\/))/gm});var Uq="=",T1=";",Q1=",",u1="baggage",Dq=180,Fq=4096,Cq=8192;function Vq(R){return R.reduce((q,E)=>{let $=`${q}${q!==""?Q1:""}${E}`;return $.length>Cq?q:$},"")}function Wq(R){return R.getAllEntries().map(([q,E])=>{let $=`${encodeURIComponent(q)}=${encodeURIComponent(E.value)}`;if(E.metadata!==void 0)$+=T1+E.metadata.toString();return $})}function Qq(R){let q=R.split(T1);if(q.length<=0)return;let E=q.shift();if(!E)return;let $=E.indexOf(Uq);if($<=0)return;let f=decodeURIComponent(E.substring(0,$).trim()),P=decodeURIComponent(E.substring($+1).trim()),U;if(q.length>0)U=Sq.baggageEntryMetadataFromString(q.join(T1));return{key:f,value:P,metadata:U}}var Sq;var Yq=V(()=>{Sq=require("@opentelemetry/api")});class K{inject(R,q,E){let $=T(R);if(!$||Hq.isTracingSuppressed(R))return;let f=Wq($).filter((U)=>U.length<=Fq).slice(0,Dq),P=Vq(f);if(P.length>0)E.set(q,u1,P)}extract(R,q,E){let $=E.get(q,u1),f=Array.isArray($)?$.join(Q1):$;if(!f)return R;let P={};if(f.length===0)return R;if(f.split(Q1).forEach((C)=>{let D=Qq(C);if(D){let S={value:D.value};if(D.metadata)S.metadata=D.metadata;P[D.key]=S}}),Object.entries(P).length===0)return R;return H1(R,Y1(P))}fields(){return[u1]}}var Hq;var d1=V(()=>{Z1();Yq();Hq=require("@opentelemetry/core")});var l1={};G1(l1,{setBaggage:()=>H1,getBaggage:()=>T,getActiveBaggage:()=>aq,deleteBaggage:()=>RE,createBaggage:()=>Y1,baggageEntryMetadataFromString:()=>Zq.baggageEntryMetadataFromString,JudgmentBaggagePropagator:()=>K});function T(R){return R.getValue(CR)}function aq(){return T(X.getInstance().getCurrentContext())}function H1(R,q){return R.setValue(CR,q)}function RE(R){return R.deleteValue(CR)}var X1,Zq,Y1,CR;var Z1=V(()=>{A();d1();X1=require("@opentelemetry/api"),Zq=require("@opentelemetry/api"),Y1=X1.propagation.createBaggage.bind(X1.propagation),CR=X1.createContextKey("baggage")});var s1={};G1(s1,{setGlobalTextmap:()=>qE,inject:()=>EE,getGlobalTextmap:()=>SR,extract:()=>VR});function SR(){return Xq}function qE(R){Xq=R}function hq(R){if(R!==void 0)return R;return X.getInstance().getCurrentContext()}function EE(R,q,E=p1.defaultTextMapSetter){Q("propagation.inject",()=>{SR().inject(hq(q),R,E)})}function VR(R,q,E=p1.defaultTextMapGetter){let $=hq(q);return Q("propagation.extract",()=>SR().extract($,R,E),$)}var p1,o1,Xq;var WR=V(()=>{M();d1();A();p1=require("@opentelemetry/api"),o1=require("@opentelemetry/core"),Xq=new o1.CompositePropagator({propagators:[new o1.W3CTraceContextPropagator,new K]})});class m{projectName;projectId;apiKey;organizationId;apiUrl;environment;serializer;_tracerProvider;_client;_enableMonitoring;supportsLiveInstrumentation=!0;constructor(R,q,E,$,f,P,U,C,D,S){this.projectName=R,this.projectId=q,this.apiKey=E,this.organizationId=$,this.apiUrl=f,this.environment=P,this.serializer=U,this._tracerProvider=C,this._client=D,this._enableMonitoring=S}setActive(){return X.getInstance().setActive(this)}static _getProxyProvider(){return X.getInstance()}static _getSerializer(){return m._getProxyProvider().getActiveTracer()?.serializer??w}static _getCurrentTraceAndSpanId(){let q=m._getProxyProvider().getCurrentSpan();if(!q?.isRecording())return null;let E=q.spanContext();if(!E.traceId||!(E.traceFlags&1))return null;return[E.traceId,E.spanId]}static _emitPartial(){Q("BaseTracer._emitPartial",()=>{let R=m._getProxyProvider().getActiveTracer();if(!R||!R.supportsLiveInstrumentation)return;R.getSpanProcessor().emitPartial()})}static getCurrentSpan(){return m._getProxyProvider().getCurrentSpan()}static async forceFlush(){await m._getProxyProvider().forceFlush()}static async shutdown(){await m._getProxyProvider().shutdown()}static registerOTELInstrumentation(R){Q("BaseTracer.registerOTELInstrumentation",()=>{m._getProxyProvider().addInstrumentation(R)})}static wrap(R){return K1(R)}static getOTELTracer(){return m._getProxyProvider().getTracer(xq)}static startSpan(R,q){let E=m.getOTELTracer().startSpan(R,{attributes:q});return m._emitPartial(),E}static startActiveSpan(R,q){let{name:E,attributes:$}=R;return m.getOTELTracer().startActiveSpan(E,{attributes:$},(f)=>{m._emitPartial();try{let P=q(f);if(P instanceof Promise)return P.finally(()=>{f.end()});return f.end(),P}catch(P){throw f.end(),P}})}static span(R,q){return m.startActiveSpan({name:R},(E)=>{try{let $=q(E);if($ instanceof Promise)return $.catch((f)=>{throw E.setStatus({code:y.SpanStatusCode.ERROR,message:String(f)}),E.recordException(f),f});return $}catch($){throw E.setStatus({code:y.SpanStatusCode.ERROR,message:String($)}),E.recordException($),$}})}static with(R,q){return m.span(R,q)}static continueTrace(R,q){let E=m._getProxyProvider(),$=VR(R);return E.withContext($,()=>q($))}static observe(R,q){let E;if(typeof R==="function")E=R;else q=R;let{spanType:$="span",spanName:f,recordInput:P=!0,recordOutput:U=!0,fork:C=!1}=q??{},D=m._getProxyProvider(),S=(W)=>{let Y=f??W.name;return function(...Z){let b=D.getTracer(xq);if(C&&D.getActiveTracer()!==null&&D.getCurrentSpan()?.isRecording()===!0){let x=m._getSerializer(),h=b.startSpan(Y),I=h.spanContext();if($)h.setAttribute("judgment.span_kind",$);let wR={"judgment.link.source_trace_id":I.traceId,"judgment.link.source_span_id":I.spanId};if($)wR["judgment.span_kind"]=$;let bq=D.setSpan(D.getCurrentContext(),D.wrapSpanContext(y.INVALID_SPAN_CONTEXT)),n=b.startSpan(Y,{attributes:wR},bq),GR=n.spanContext();h.setAttribute("judgment.link.target_trace_id",GR.traceId),h.setAttribute("judgment.link.target_span_id",GR.spanId);let r1=()=>{n.end(),h.end()},JR=(G)=>{for(let z of[n,h])z.recordException(G),z.setStatus({code:y.SpanStatusCode.ERROR,message:String(G)})},OR=(G)=>{let z=s(G,x);n.setAttribute("judgment.output",z),h.setAttribute("judgment.output",z)};if(P){let G=s(wq(W,Z),x);n.setAttribute("judgment.input",G),h.setAttribute("judgment.input",G)}return m._emitPartial(),D.useSpan(n,!1,!1,!1,()=>{try{let G=W.call(this,...Z);if(G instanceof Promise)return G.then((z)=>{if(U)OR(z);return z}).catch((z)=>{throw JR(z),z}).finally(r1);if(U)OR(G);return r1(),G}catch(G){throw JR(G),r1(),G}})}return b.startActiveSpan(Y,(x)=>{if($)x.setAttribute("judgment.span_kind",$);try{if(P)x.setAttribute("judgment.input",s(wq(W,Z),m._getSerializer()));m._emitPartial();let h=W.call(this,...Z);if(h instanceof Promise)return h.then((I)=>{if(U)x.setAttribute("judgment.output",s(I,m._getSerializer()));return I}).catch((I)=>{throw x.recordException(I),x.setStatus({code:y.SpanStatusCode.ERROR,message:String(I)}),I}).finally(()=>{x.end()});if(U)x.setAttribute("judgment.output",s(h,m._getSerializer()));return x.end(),h}catch(h){throw x.recordException(h),x.setStatus({code:y.SpanStatusCode.ERROR,message:String(h)}),x.end(),h}})}};if(!E)return S;return S(E)}static _resolveSpan(R){if(R)return R;return m._getProxyProvider().getCurrentSpan()}static setSpanKind(R,q){Q("BaseTracer.setSpanKind",()=>{if(!R)return;let E=m._resolveSpan(q);if(E?.isRecording())E.setAttribute("judgment.span_kind",R)})}static setLLMSpan(){m.setSpanKind("llm")}static setToolSpan(){m.setSpanKind("tool")}static setGeneralSpan(){m.setSpanKind("span")}static setAttribute(R,q,E){Q("BaseTracer.setAttribute",()=>{let $=m._resolveSpan(E);if(!$?.isRecording())return;if(!R||q==null)return;$.setAttribute(R,s(q,m._getSerializer()))})}static setAttributes(R,q){for(let[E,$]of Object.entries(R))if(q)m.setAttribute(E,$,q);else m.setAttribute(E,$)}static setInput(R,q){if(q)m.setAttribute("judgment.input",R,q);else m.setAttribute("judgment.input",R)}static setOutput(R,q){if(q)m.setAttribute("judgment.output",R,q);else m.setAttribute("judgment.output",R)}static setError(R,q){Q("BaseTracer.setError",()=>{let E=m._resolveSpan(q);if(!E?.isRecording())return;E.recordException(R),E.setStatus({code:y.SpanStatusCode.ERROR,message:String(R)})})}static recordLLMMetadata(R,q){Q("BaseTracer.recordLLMMetadata",()=>{let E=m._resolveSpan(q);if(!E?.isRecording())return;if(typeof R.model==="string")E.setAttribute("judgment.llm.model",R.model);if(typeof R.provider==="string")E.setAttribute("judgment.llm.provider",R.provider);if(typeof R.non_cached_input_tokens==="number")E.setAttribute("judgment.usage.non_cached_input_tokens",R.non_cached_input_tokens);if(typeof R.output_tokens==="number")E.setAttribute("judgment.usage.output_tokens",R.output_tokens);if(typeof R.cache_read_input_tokens==="number")E.setAttribute("judgment.usage.cache_read_input_tokens",R.cache_read_input_tokens);if(typeof R.cache_creation_input_tokens==="number")E.setAttribute("judgment.usage.cache_creation_input_tokens",R.cache_creation_input_tokens);if(typeof R.total_cost_usd==="number")E.setAttribute("judgment.usage.total_cost_usd",R.total_cost_usd)})}static _setPropagatingBaggageKey(R,q){Q("BaseTracer._setPropagatingBaggageKey",()=>{let E=m._getProxyProvider(),$=E.getCurrentSpan();if(!$?.isRecording())return;$.setAttribute(R,q);let f=E.getCurrentContext(),P=(T(f)??Y1()).setEntry(R,{value:q});E.attachContext(H1(f,P))})}static setCustomerId(R){m._setPropagatingBaggageKey("judgment.customer_id",R)}static setCustomerUserId(R){m._setPropagatingBaggageKey("judgment.customer_user_id",R)}static setSessionId(R){m._setPropagatingBaggageKey("judgment.session_id",R)}static tag(R){Q("BaseTracer.tag",()=>{if(!R||Array.isArray(R)&&R.length===0)return;let E=m._getProxyProvider().getActiveTracer();if(!E?.projectId||!E._client)return;if(!E.supportsLiveInstrumentation)return;let $=m._getCurrentTraceAndSpanId();if(!$)return;let[f]=$,P=Array.isArray(R)?R:[R];E._client.postV1projectsTracesByTraceIdTags(E.projectId,f,{tags:P}).catch((U)=>{F.error(`tag failed: ${String(U)}`)})})}static asyncEvaluate(R,q){Q("BaseTracer.asyncEvaluate",()=>{let{judge:E,example:$}=R,P=m._getProxyProvider().getActiveTracer();if(!P?.projectId)return;if(!P.supportsLiveInstrumentation)return;let U=m._resolveSpan(q);if(!U?.isRecording())return;let C=P.getSpanProcessor(),D=U.spanContext(),S=C.stateIncr(D,"pending_evals_count"),W={project_id:P.projectId,eval_name:`async_evaluate_${E}_${S}`,judges:[{name:E}],examples:[{...$,example_id:Gq.randomUUID(),created_at:new Date().toISOString(),trace_id:D.traceId,span_id:D.spanId}],is_offline:!1,is_behavior:!1},Y=C.stateAppend(D,"pending_evals",W);U.setAttribute("judgment.pending_trace_eval",JSON.stringify(Y))})}}function wq(R,q){try{let E=Pq(R).map((f)=>f.replace(/^\.\.\./,"").split("=")[0].trim()).filter((f)=>f.length>0),$={};return E.forEach((f,P)=>{if(P<q.length)$[f]=q[P]}),$}catch{return{}}}var y,Gq,xq="judgeval";var B=V(()=>{$1();FR();mq();M();O();k();Z1();A();WR();y=require("@opentelemetry/api"),Gq=require("crypto")});class N{_delegate;constructor(R,q,E,$){if(!R){this._delegate=null;return}this._delegate=new Jq.OTLPTraceExporter({url:R,headers:{Authorization:`Bearer ${q}`,"X-Organization-Id":E,"X-Project-Id":$}})}export(R,q){F.info(`Exported ${R.length} spans`),this._delegate?.export(R,q)}shutdown(){return this._delegate?.shutdown()??Promise.resolve()}forceFlush(){return this._delegate?.forceFlush()??Promise.resolve()}}var Jq;var h1=V(()=>{O();Jq=require("@opentelemetry/exporter-trace-otlp-http")});var Oq,u;var g1=V(()=>{h1();Oq=require("@opentelemetry/core");u=class u extends N{constructor(){super("","","","")}export(R,q){q({code:Oq.ExportResultCode.SUCCESS})}shutdown(){return Promise.resolve()}forceFlush(){return Promise.resolve()}}});class f1{_keyPredicate;constructor(R=c1){this._keyPredicate=R}onStart(R,q){let E=T(q)?.getAllEntries()??[];for(let[$,f]of E)if(this._keyPredicate($))R.setAttribute($,f.value)}onEnd(R){}forceFlush(){return Promise.resolve()}shutdown(){return Promise.resolve()}}var c1=()=>!0;var QR=V(()=>{Z1()});function P1(R){return`${R.traceId}:${R.spanId}`}function fE(R){return R[0]===0&&R[1]===0}var _q,d;var x1=V(()=>{$1();M();A();QR();_q=require("@opentelemetry/sdk-trace-base");d=class d extends _q.BatchSpanProcessor{tracer;_state=new Map;_spanFinalizers;_baggageProcessor;constructor(R,q,E){super(q,E);this.tracer=R,this._spanFinalizers=new FinalizationRegistry(($)=>{this._cleanupSpanState($)}),this._baggageProcessor=new f1}_cleanupSpanState(R){this._state.delete(R)}_registerSpan(R){let q=R.spanContext();if(!q.traceId||!q.spanId)return;let E=P1(q);this._spanFinalizers.register(R,E)}stateSet(R,q,E){let $=P1(R),f=this._state.get($);if(!f)f=new Map,this._state.set($,f);f.set(q,E)}stateGet(R,q,E){let $=P1(R),f=this._state.get($);if(!f?.has(q))return E;return f.get(q)}stateIncr(R,q){let E=P1(R),$=this._state.get(E);if(!$)$=new Map,this._state.set(E,$);let f=$.get(q),P=typeof f==="number"?f:0;return $.set(q,P+1),P}stateAppend(R,q,E){let $=P1(R),f=this._state.get($);if(!f)f=new Map,this._state.set($,f);let P=f.get(q),U=Array.isArray(P)?[...P,E]:[E];return f.set(q,U),U}_emitSpan(R,q=!1){let E=R.spanContext();if(!E.traceId)return;let $=this.stateIncr(E,"judgment.update_id"),f={...R.attributes,"judgment.update_id":$};if(q)delete f["judgment.pending_trace_eval"];let P=Object.create(R);Object.defineProperty(P,"attributes",{value:f,writable:!1});let U=fE(R.endTime)?R.startTime:R.endTime;Object.defineProperty(P,"endTime",{value:U,writable:!1}),super.onEnd(P)}emitPartial(){Q("JudgmentSpanProcessor.emitPartial",()=>{let q=X.getInstance().getCurrentSpan();if(!q?.isRecording())return;let E=q.spanContext();if(!E.traceId)return;if(this.stateGet(E,"disable_partial_emit",!1))return;this._emitSpan(q,!0)})}onStart(R,q){Q("JudgmentSpanProcessor.onStart",()=>{this._baggageProcessor.onStart(R,q),this._registerSpan(R)})}onEnd(R){Q("JudgmentSpanProcessor.onEnd",()=>{let q=R.spanContext();if(!q.traceId){super.onEnd(R);return}let E=P1(q);try{if(!this.stateGet(q,"cancelled",!1))this._emitSpan(R)}finally{this._cleanupSpanState(E)}})}}});var m1;var YR=V(()=>{g1();x1();m1=class m1 extends d{constructor(){super(null,new u)}onStart(R,q){}onEnd(R){}shutdown(){return Promise.resolve()}forceFlush(){return Promise.resolve()}emitPartial(){}stateSet(R,q,E){}stateGet(R,q,E){return E}stateIncr(R,q){return 0}stateAppend(R,q,E){return[E]}}});var n1,HR,c;var ZR=V(()=>{C1();O1();O();_1();k();qR();B();A();h1();g1();x1();YR();n1=require("@opentelemetry/resources"),HR=require("@opentelemetry/sdk-trace-node");c=class c extends m{_spanExporter=null;_spanProcessor=null;constructor(R,q,E,$,f,P,U,C,D,S){super(R,q,E,$,f,P,U,C,D,S)}static async init(R={}){let q=R.apiKey??i,E=R.organizationId??t,$=R.apiUrl??r,f=R.projectName??null,P=R.serializer??w,U=!0;if(!f)F.warning("project_name not provided. Tracer will not export spans."),U=!1;if(!q)F.warning("api_key not provided. Tracer will not export spans."),U=!1;if(!E)F.warning("organization_id not provided. Tracer will not export spans."),U=!1;if(!$)F.warning("api_url not provided. Tracer will not export spans."),U=!1;let C=null,D=null;if(U&&f&&q&&E&&$){if(C=new p($,q,E),D=await e(C,f).catch(()=>null),!D)F.warning(`Project '${f}' not found. Tracer will not export spans.`),U=!1}let S={"service.name":f??"unknown","telemetry.sdk.name":"judgeval","telemetry.sdk.version":k1};if(R.environment)S["deployment.environment"]=R.environment;if(R.resourceAttributes)Object.assign(S,R.resourceAttributes);let W=n1.defaultResource().merge(n1.resourceFromAttributes(S)),Y=new HR.NodeTracerProvider({resource:W,sampler:R.sampler,spanLimits:R.spanLimits}),Z=new c(f,D,q,E,$,R.environment??null,P,Y,C,U);if(U){let j=new HR.NodeTracerProvider({resource:W,sampler:R.sampler,spanLimits:R.spanLimits,spanProcessors:[Z.getSpanProcessor(),...R.spanProcessors??[]]});Z._tracerProvider=j}if(X.getInstance().register(Z),R.setActive??!0)Z.setActive();return Z}getSpanExporter(){if(this._spanExporter)return this._spanExporter;if(!this._enableMonitoring||!this.projectId||!this.apiKey||!this.organizationId||!this.apiUrl)this._spanExporter=new u;else{let R=this.apiUrl.endsWith("/")?this.apiUrl+"otel/v1/traces":this.apiUrl+"/otel/v1/traces";this._spanExporter=new N(R,this.apiKey,this.organizationId,this.projectId)}return this._spanExporter}getSpanProcessor(){if(this._spanProcessor)return this._spanProcessor;if(!this._enableMonitoring)this._spanProcessor=new m1;else this._spanProcessor=new d(this,this.getSpanExporter());return this._spanProcessor}}});var U1;var XR=V(()=>{S1();x1();U1=class U1 extends d{_dataset;_exampleFields;_seenTraceIds=new Set;constructor(R,q,E){super(R,q);this._dataset=E.dataset,this._exampleFields={...E.exampleFields??{}}}_maybeCreateExample(R){if(R.parentSpanContext)return;let q=R.spanContext();if(!q?.traceId)return;if(this._seenTraceIds.has(q.traceId))return;this._seenTraceIds.add(q.traceId);let E=_.create({...this._exampleFields,offline_trace_id:q.traceId});this._dataset.push(E)}onEnd(R){try{this._maybeCreateExample(R)}finally{super.onEnd(R)}}}});var Mq={};G1(Mq,{OfflineTracer:()=>D1});var i1,hR,jq="otel/v1/offline-traces",D1;var xR=V(()=>{C1();O1();_1();k();qR();A();ZR();h1();XR();i1=require("@opentelemetry/resources"),hR=require("@opentelemetry/sdk-trace-node");D1=class D1 extends c{supportsLiveInstrumentation=!1;_offlineApiUrl;_offlineApiKey;_offlineOrganizationId;_offlineProjectId;_dataset;_exampleFields;_offlineSpanExporter=null;_offlineSpanProcessor=null;constructor(R){super(R.projectName,R.projectId,R.apiKey,R.organizationId,R.apiUrl,R.environment,R.serializer,R.tracerProvider,R.client,!0);this._offlineApiUrl=R.apiUrl,this._offlineApiKey=R.apiKey,this._offlineOrganizationId=R.organizationId,this._offlineProjectId=R.projectId,this._dataset=R.dataset,this._exampleFields=R.exampleFields}static async create(R){let q=R.apiKey??i,E=R.organizationId??t,$=R.apiUrl??r,f=R.projectName,P=R.serializer??w;if(!f)throw Error("projectName is required for OfflineTracer");if(!q)throw Error("apiKey is required for OfflineTracer");if(!E)throw Error("organizationId is required for OfflineTracer");if(!$)throw Error("apiUrl is required for OfflineTracer");let U=new p($,q,E),C;try{C=await e(U,f)}catch(b){throw Error(`Project '${f}' not found; cannot start OfflineTracer: ${String(b)}`)}let D={"service.name":f,"telemetry.sdk.name":"judgeval","telemetry.sdk.version":k1,"judgment.offline":"true"};if(R.environment)D["deployment.environment"]=R.environment;if(R.resourceAttributes)Object.assign(D,R.resourceAttributes);let S=i1.defaultResource().merge(i1.resourceFromAttributes(D)),W=new D1({projectName:f,projectId:C,apiKey:q,organizationId:E,apiUrl:$,environment:R.environment??null,serializer:P,tracerProvider:new hR.NodeTracerProvider({resource:S}),client:U,dataset:R.dataset,exampleFields:{...R.exampleFields??{}}}),Y=new hR.NodeTracerProvider({resource:S,sampler:R.sampler,spanLimits:R.spanLimits,spanProcessors:[W.getSpanProcessor(),...R.spanProcessors??[]]});if(W._tracerProvider=Y,X.getInstance().register(W),R.setActive??!0)W.setActive();return W}getSpanExporter(){if(this._offlineSpanExporter)return this._offlineSpanExporter;let R=this._offlineApiUrl.endsWith("/")?this._offlineApiUrl+jq:this._offlineApiUrl+"/"+jq;return this._offlineSpanExporter=new N(R,this._offlineApiKey,this._offlineOrganizationId,this._offlineProjectId),this._offlineSpanExporter}getSpanProcessor(){if(this._offlineSpanProcessor)return this._offlineSpanProcessor;return this._offlineSpanProcessor=new U1(this,this.getSpanExporter(),{dataset:this._dataset,exampleFields:this._exampleFields}),this._offlineSpanProcessor}}});var PE={};G1(PE,{wrapOpenAI:()=>B1,wrap:()=>K1,propagation:()=>s1,baggage:()=>l1,Tracer:()=>c,OfflineTracer:()=>D1,OfflineJudgmentSpanProcessor:()=>U1,NoOpSpanProcessor:()=>m1,NoOpSpanExporter:()=>u,JudgmentTracerProvider:()=>X,JudgmentSpanExporter:()=>N,JudgmentBaggageSpanProcessor:()=>f1,JudgmentBaggagePropagator:()=>K,Judgeval:()=>t1,Judge:()=>a,Example:()=>_,Evaluation:()=>q1,Dataset:()=>o,BaseTracer:()=>m,AgentJudgeFactory:()=>E1,ALLOW_ALL_BAGGAGE_KEYS:()=>c1});module.exports=Iq(PE);C1();O1();_1();O();class a{}O();var RR=w1(b1(),1);O();var H=w1(b1(),1),Bq=2000;class R1{_client;_projectId;_projectName;constructor(R,q,E){this._client=R,this._projectId=q,this._projectName=E}async _poll(R,q,E,$){let f=Date.now();while(!0){let P=(Date.now()-f)/1000;if(P>$)throw Error(`Evaluation timed out after ${$}s`);let U=await this._client.getV1projectsExperimentsByRunId(R,q),C=U.results??[];if(C.length>=E){let S=U.ui_results_url??"Failed to get UI results URL";return console.log(`${H.default.green("✓")} Evals completed and saved in ${H.default.bold(`${P.toFixed(1)}s`)}`),{results:C,url:S}}await new Promise((S)=>setTimeout(S,Bq))}}_displayResults(R,q,E,$){let f=[],P=0,U=0;console.log();for(let C=0;C<q.length;C++){let D=q[C],S=D.scorers.every((W)=>Boolean(W.success));if(S)P++,console.log(`${H.default.green("✓")} Example ${C+1}: ${H.default.green("PASSED")}`);else U++,console.log(`${H.default.red("✗")} Example ${C+1}: ${H.default.red("FAILED")}`);for(let W of D.scorers){let Y=W.score!==null?W.score.toFixed(3):"N/A",Z=W.success?H.default.green(Y):H.default.red(Y);console.log(` ${H.default.dim(`${W.name}:`)} ${Z} ${H.default.dim(`(threshold: ${W.threshold})`)}`)}f.push({success:S,scorers:D.scorers,example:R[C]})}if(console.log(),P===f.length)console.log(`${H.default.bold(H.default.green("✓ All tests passed!"))} (${P}/${f.length})`);else console.log(`${H.default.bold(H.default.yellow("⚠ Results:"))} ${H.default.green(`${P} passed`)} | ${H.default.red(`${U} failed`)}`);if(console.log(`${H.default.dim("View full details:")} ${H.default.underline(E)}`),console.log(),$&&f.some((C)=>!C.success)){let C=[`Evaluation failed: ${U}/${f.length} examples failed`];for(let D=0;D<f.length;D++)if(!f[D].success){C.push(` Example ${D+1}:`);for(let S of f[D].scorers)if(!S.success){if(C.push(` ${S.name}: ${S.score!==null?S.score.toFixed(3):"N/A"} (threshold: ${S.threshold})`),S.reason)C.push(` ${S.reason}`)}}throw Error(C.join(`
|
|
4
|
+
`))}return f}async run(R,q,E,$=!1,f=300){if(!this._projectId)return F.error("Project ID is not resolved. Evaluation requires a valid project."),[];let P=this._projectId,U=crypto.randomUUID(),C=new Date().toISOString();console.log(),console.log(H.default.bold(H.default.cyan("Starting Evaluation"))),console.log(`${H.default.dim("Run:")} ${E}`),console.log(`${H.default.dim("Project:")} ${this._projectName}`),console.log(`${H.default.dim("Examples:")} ${R.length} | ${H.default.dim("Scorers:")} ${q.length}`),console.log();let D=this._buildPayload(U,P,E,C,R,q),S=await this._submit(P,U,R,q,D),{results:W,url:Y}=await this._poll(P,U,S,f);return this._displayResults(R,W,Y,$)}}class z1 extends R1{_buildPayload(R,q,E,$,f,P){return{id:R,project_id:q,eval_name:E,created_at:$,examples:f.map((U)=>U.toJSON()),judgment_scorers:[],custom_scorers:[]}}async _submit(R,q,E,$,f){let P=Date.now(),U=E.flatMap((Y,Z)=>$.map((b)=>b.score(Y).then((j)=>({exampleIdx:Z,scorer:b,result:j,error:null})).catch((j)=>({exampleIdx:Z,scorer:b,result:null,error:String(j)})))),C=await Promise.all(U),D=((Date.now()-P)/1000).toFixed(1);console.log(`${RR.default.green("✓")} Scoring completed in ${RR.default.bold(`${D}s`)}`);let S=new Map;for(let Y of C){let Z=S.get(Y.exampleIdx);if(!Z)Z=[],S.set(Y.exampleIdx,Z);Z.push(Y)}let W=E.map((Y,Z)=>{return{scorers_data:(S.get(Z)??[]).map((j)=>{if(j.error!==null)return{scorer_name:j.scorer.constructor.name,value:0,reason:"",error:j.error};let x=j.result;return{scorer_name:j.scorer.constructor.name,value:x.value,reason:x.reason,...x.citations&&{citations:x.citations.map((h)=>({span_id:h.spanId,span_attribute:h.spanAttribute}))}}}),data_object:Y.toJSON()}});return await this._client.postV1projectsEvalResultsExamples(R,{results:W,run:f}),E.length}}var kR=w1(b1(),1);class v1 extends R1{_buildPayload(R,q,E,$,f,P){return{id:R,project_id:q,eval_name:E,created_at:$,examples:f.map((U)=>U.toJSON()),judgment_scorers:P.map((U)=>({name:U})),custom_scorers:[]}}async _submit(R,q,E,$,f){return await this._client.postV1projectsEvalQueueExamples(R,f),console.log(`${kR.default.green("✓")} Evaluation submitted`),E.length}}class q1{_local;_hosted;constructor(R,q,E){this._local=new z1(R,q,E),this._hosted=new v1(R,q,E)}run(R){let{examples:q,scorers:E,evalRunName:$,assertTest:f=!1,timeoutSeconds:P=300}=R,U=E.filter((D)=>typeof D==="string"),C=E.filter((D)=>D instanceof a);if(C.length>0&&U.length>0)return F.error("Running both local and hosted scorers is not supported. Please run your evaluation with either local or hosted scorers, but not both."),Promise.resolve([]);if(C.length===0&&U.length===0)return F.error("No valid local or hosted scorers provided. Please provide at least one local or hosted scorer."),Promise.resolve([]);if(C.length>0)return this._local.run(q,C,$,f,P);return this._hosted.run(q,U,$,f,P)}}class N1{_client;_projectId;_projectName;constructor(R,q,E){this._client=R,this._projectId=q,this._projectName=E}create(){return new q1(this._client,this._projectId,this._projectName)}}S1();O();S1();class o{name;projectId;projectName;datasetKind;examples;_client;constructor(R){this.name=R.name,this.projectId=R.projectId,this.projectName=R.projectName,this.datasetKind=R.datasetKind??"example",this.examples=R.examples??[],this._client=R.client??null}async addExamples(R,q=100){if(!this._client)return;for(let E=0;E<R.length;E+=q){let $=R.slice(E,E+q);await this._client.postV1projectsDatasetsByDatasetNameExamples(this.projectId,this.name,{examples:$.map((f)=>f.toJSON())})}}async addFromJson(R,q=100){let{readFile:E}=await import("fs/promises"),$=await E(R,"utf-8"),P=JSON.parse($).map((U)=>{if(typeof U!=="object"||U===null)throw Error("Each item in the JSON array must be an object");return _.create(U)});await this.addExamples(P,q)}get length(){return this.examples.length}[Symbol.iterator](){return this.examples[Symbol.iterator]()}toString(){return`Dataset(name=${this.name}, examples=${this.examples.length})`}}class I1{_client;_projectId;_projectName;constructor(R,q,E){this._client=R,this._projectId=q,this._projectName=E}async get(R){let q=this._expectProjectId();if(!q)return null;let E=await this._client.getV1projectsDatasetsByDatasetName(q,R),$=E.dataset_kind??"example",P=(E.examples??[]).map((U)=>_.from(U));return new o({name:R,projectId:q,projectName:this._projectName,datasetKind:$,examples:P,client:this._client})}async create(R,q={}){let E=this._expectProjectId();if(!E)return null;let{examples:$=[],overwrite:f=!1,batchSize:P=100}=q;await this._client.postV1projectsDatasets(E,{name:R,examples:[],dataset_kind:"example",overwrite:f});let U=new o({name:R,projectId:E,projectName:this._projectName,examples:$,client:this._client});if($.length>0)await U.addExamples($,P);return U}list(){let R=this._expectProjectId();if(!R)return Promise.resolve(null);return this._client.getV1projectsDatasets(R)}_expectProjectId(){if(!this._projectId)return F.error("Project ID is not resolved. Dataset operations require a valid project."),null;return this._projectId}}O();class E1{_client;_projectId;_projectName;constructor(R,q,E){this._client=R,this._projectId=q,this._projectName=E}async create(R){let q=this._expectProjectId();if(!q)return null;let E={name:R.name,prompt:R.prompt,model:R.model,score_type:R.scoreType};if(R.description!==void 0)E.description=R.description;if(R.judgeDescription!==void 0)E.judge_description=R.judgeDescription;if(R.categories!==void 0)E.categories=R.categories;if(R.minScore!==void 0)E.min_score=R.minScore;if(R.maxScore!==void 0)E.max_score=R.maxScore;return{judgeId:(await this._client.postV1projectsJudges(q,E)).judge_id,name:R.name,prompt:R.prompt,model:R.model,scoreType:R.scoreType,description:R.description??null,judgeDescription:R.judgeDescription??null,categories:R.categories??null,minScore:R.minScore??null,maxScore:R.maxScore??null,majorVersion:0,minorVersion:0}}async update(R){let q=this._expectProjectId();if(!q)return null;let E={};if(R.prompt!==void 0)E.prompt=R.prompt;if(R.model!==void 0)E.model=R.model;if(R.scoreType!==void 0)E.score_type=R.scoreType;if(R.description!==void 0)E.description=R.description;if(R.judgeDescription!==void 0)E.judge_description=R.judgeDescription;if(R.categories!==void 0)E.categories=R.categories;if(R.minScore!==void 0)E.min_score=R.minScore;if(R.maxScore!==void 0)E.max_score=R.maxScore;if(R.sourceMajorVersion!==void 0)E.source_major_version=R.sourceMajorVersion;if(R.sourceMinorVersion!==void 0)E.source_minor_version=R.sourceMinorVersion;if(R.targetMajorVersion!==void 0)E.target_major_version=R.targetMajorVersion;if(R.targetMinorVersion!==void 0)E.target_minor_version=R.targetMinorVersion;let $=await this._client.patchV1projectsJudgesByJudgeId(q,R.judgeId,E);return Kq($)}_expectProjectId(){if(!this._projectId)return F.error("Project ID is not resolved. Agent judge operations require a valid project."),null;return this._projectId}}function Kq(R){let q=R.judge;return{judgeId:q.id,name:q.name,prompt:q.prompt??"",model:q.model??"",scoreType:q.score_type,description:q.description??null,judgeDescription:q.judge_description??null,categories:q.categories??null,minScore:q.min_score??null,maxScore:q.max_score??null,majorVersion:q.major_version??null,minorVersion:q.minor_version??null}}class t1{_client;_projectName;_projectId;constructor(R,q,E){this._client=R,this._projectName=q,this._projectId=E}static async create(R){let q=R.apiKey??i,E=R.organizationId??t,$=R.apiUrl??r;if(!q)throw Error("API key is required");if(!E)throw Error("Organization ID is required");if(!$)throw Error("API URL is required");if(!R.projectName)throw Error("Project name is required");let f=new p($,q,E),P=null;try{P=await e(f,R.projectName)}catch{F.warning(`Project '${R.projectName}' not found. Some operations requiring project_id will be skipped.`)}return new t1(f,R.projectName,P)}async offlineTracer(R){let{OfflineTracer:q}=await Promise.resolve().then(() => (xR(),Mq));return q.create({...R,projectName:this._projectName,apiKey:this._client.getApiKey(),organizationId:this._client.getOrganizationId(),apiUrl:this._client.getBaseUrl()})}get datasets(){return new I1(this._client,this._projectId,this._projectName)}get evaluation(){return new N1(this._client,this._projectId,this._projectName)}get agentJudges(){return new E1(this._client,this._projectId,this._projectName)}}B();h1();g1();A();x1();YR();XR();QR();d1();Z1();WR();ZR();xR();FR();S1();
|
|
5
5
|
|
|
6
|
-
//# debugId=
|
|
6
|
+
//# debugId=F41ABCF440A36DDC64756E2164756E21
|
|
7
7
|
//# sourceMappingURL=index.cjs.map
|