judgeval 0.9.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +1 -1
  2. package/dist/Judgeval.d.ts +45 -6
  3. package/dist/Judgeval.d.ts.map +1 -1
  4. package/dist/data/Example.d.ts +55 -0
  5. package/dist/data/Example.d.ts.map +1 -0
  6. package/dist/data/ScoringResult.d.ts +12 -0
  7. package/dist/data/ScoringResult.d.ts.map +1 -0
  8. package/dist/data/index.d.ts +3 -0
  9. package/dist/data/index.d.ts.map +1 -0
  10. package/dist/datasets/Dataset.d.ts +56 -0
  11. package/dist/datasets/Dataset.d.ts.map +1 -0
  12. package/dist/datasets/DatasetFactory.d.ts +50 -0
  13. package/dist/datasets/DatasetFactory.d.ts.map +1 -0
  14. package/dist/datasets/index.d.ts +4 -0
  15. package/dist/datasets/index.d.ts.map +1 -0
  16. package/dist/evaluation/Evaluation.d.ts +65 -0
  17. package/dist/evaluation/Evaluation.d.ts.map +1 -0
  18. package/dist/evaluation/EvaluationFactory.d.ts +22 -0
  19. package/dist/evaluation/EvaluationFactory.d.ts.map +1 -0
  20. package/dist/evaluation/EvaluatorRunner.d.ts +27 -0
  21. package/dist/evaluation/EvaluatorRunner.d.ts.map +1 -0
  22. package/dist/evaluation/HostedEvaluatorRunner.d.ts +14 -0
  23. package/dist/evaluation/HostedEvaluatorRunner.d.ts.map +1 -0
  24. package/dist/evaluation/LocalEvaluatorRunner.d.ts +16 -0
  25. package/dist/evaluation/LocalEvaluatorRunner.d.ts.map +1 -0
  26. package/dist/evaluation/index.d.ts +6 -0
  27. package/dist/evaluation/index.d.ts.map +1 -0
  28. package/dist/index.cjs +5 -4
  29. package/dist/index.cjs.map +44 -16
  30. package/dist/index.d.ts +11 -2
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.mjs +5 -4
  33. package/dist/index.mjs.map +44 -16
  34. package/dist/instrumentation/index.d.ts +22 -0
  35. package/dist/instrumentation/index.d.ts.map +1 -0
  36. package/dist/instrumentation/llm/index.d.ts +2 -0
  37. package/dist/instrumentation/llm/index.d.ts.map +1 -0
  38. package/dist/instrumentation/llm/openai/beta-chat-completions.d.ts +7 -0
  39. package/dist/instrumentation/llm/openai/beta-chat-completions.d.ts.map +1 -0
  40. package/dist/instrumentation/llm/openai/chat-completions.d.ts +7 -0
  41. package/dist/instrumentation/llm/openai/chat-completions.d.ts.map +1 -0
  42. package/dist/instrumentation/llm/openai/images.d.ts +7 -0
  43. package/dist/instrumentation/llm/openai/images.d.ts.map +1 -0
  44. package/dist/instrumentation/llm/openai/index.d.ts +14 -0
  45. package/dist/instrumentation/llm/openai/index.d.ts.map +1 -0
  46. package/dist/instrumentation/llm/openai/responses.d.ts +7 -0
  47. package/dist/instrumentation/llm/openai/responses.d.ts.map +1 -0
  48. package/dist/instrumentation/llm/openai/utils.d.ts +4 -0
  49. package/dist/instrumentation/llm/openai/utils.d.ts.map +1 -0
  50. package/dist/internal/api/models/Example.d.ts +1 -0
  51. package/dist/internal/api/models/Example.d.ts.map +1 -1
  52. package/dist/judges/Judge.d.ts +27 -0
  53. package/dist/judges/Judge.d.ts.map +1 -0
  54. package/dist/judges/index.d.ts +3 -0
  55. package/dist/judges/index.d.ts.map +1 -0
  56. package/dist/judges/responses.d.ts +61 -0
  57. package/dist/judges/responses.d.ts.map +1 -0
  58. package/dist/trace/BaseTracer.d.ts +157 -25
  59. package/dist/trace/BaseTracer.d.ts.map +1 -1
  60. package/dist/trace/JudgmentTracerProvider.d.ts +9 -1
  61. package/dist/trace/JudgmentTracerProvider.d.ts.map +1 -1
  62. package/dist/trace/OfflineTracer.d.ts +105 -0
  63. package/dist/trace/OfflineTracer.d.ts.map +1 -0
  64. package/dist/trace/Tracer.d.ts +3 -1
  65. package/dist/trace/Tracer.d.ts.map +1 -1
  66. package/dist/trace/index.d.ts +3 -1
  67. package/dist/trace/index.d.ts.map +1 -1
  68. package/dist/trace/processors/OfflineJudgmentSpanProcessor.d.ts +25 -0
  69. package/dist/trace/processors/OfflineJudgmentSpanProcessor.d.ts.map +1 -0
  70. package/dist/trace/processors/index.d.ts +1 -0
  71. package/dist/trace/processors/index.d.ts.map +1 -1
  72. package/dist/trace/propagation/index.d.ts.map +1 -1
  73. package/dist/utils/wrappers/immutable-wrap-async-iterator.d.ts +3 -0
  74. package/dist/utils/wrappers/immutable-wrap-async-iterator.d.ts.map +1 -0
  75. package/dist/utils/wrappers/immutable-wrap-async.d.ts +3 -0
  76. package/dist/utils/wrappers/immutable-wrap-async.d.ts.map +1 -0
  77. package/dist/utils/wrappers/immutable-wrap-sync-iterator.d.ts +3 -0
  78. package/dist/utils/wrappers/immutable-wrap-sync-iterator.d.ts.map +1 -0
  79. package/dist/utils/wrappers/immutable-wrap-sync.d.ts +3 -0
  80. package/dist/utils/wrappers/immutable-wrap-sync.d.ts.map +1 -0
  81. package/dist/utils/wrappers/index.d.ts +7 -0
  82. package/dist/utils/wrappers/index.d.ts.map +1 -0
  83. package/dist/utils/wrappers/proxy-async-iterable.d.ts +13 -0
  84. package/dist/utils/wrappers/proxy-async-iterable.d.ts.map +1 -0
  85. package/dist/utils/wrappers/types.d.ts +15 -0
  86. package/dist/utils/wrappers/types.d.ts.map +1 -0
  87. package/package.json +18 -23
package/README.md CHANGED
@@ -49,7 +49,7 @@ const tracedChat = Tracer.observe(async (userMessage: string) => {
49
49
  messages: [{ role: "user", content: userMessage }],
50
50
  });
51
51
 
52
- Tracer.asyncEvaluate("Relevancy");
52
+ Tracer.asyncEvaluate({ judge: "Relevancy" });
53
53
 
54
54
  return response.choices[0].message.content || "";
55
55
  });
@@ -1,3 +1,13 @@
1
+ import { EvaluationFactory } from "./evaluation/EvaluationFactory";
2
+ import { DatasetFactory } from "./datasets/DatasetFactory";
3
+ import type { OfflineTracer, OfflineTracerConfig } from "./trace/OfflineTracer";
4
+ /**
5
+ * Options for {@link Judgeval.offlineTracer}.
6
+ *
7
+ * Mirrors `OfflineTracerConfig` minus credentials and `projectName`,
8
+ * which are reused from the parent `Judgeval` instance.
9
+ */
10
+ export type JudgevalOfflineTracerOptions = Omit<OfflineTracerConfig, "projectName" | "apiKey" | "organizationId" | "apiUrl">;
1
11
  /**
2
12
  * Configuration options for the Judgeval client.
3
13
  *
@@ -6,6 +16,8 @@
6
16
  * `JUDGMENT_API_URL`.
7
17
  */
8
18
  export interface JudgevalConfig {
19
+ /** The project name on the Judgment platform. */
20
+ projectName: string;
9
21
  /** Judgment API key. Defaults to `JUDGMENT_API_KEY` env var. */
10
22
  apiKey?: string;
11
23
  /** Judgment organization ID. Defaults to `JUDGMENT_ORG_ID` env var. */
@@ -17,34 +29,61 @@ export interface JudgevalConfig {
17
29
  * The main entry point for interacting with the Judgment platform.
18
30
  *
19
31
  * `Judgeval` connects to your Judgment project and gives you access to
20
- * tracing, evaluation, and monitoring through the Judgment platform.
32
+ * evaluation, datasets, and monitoring through the Judgment platform.
21
33
  *
22
34
  * @example
23
35
  * ```typescript
24
36
  * import { Judgeval } from "judgeval";
25
37
  *
26
- * const client = Judgeval.create();
38
+ * const client = await Judgeval.create({ projectName: "my-project" });
27
39
  * ```
28
40
  *
29
41
  * @throws Error if any required credential is missing.
30
42
  */
31
43
  export declare class Judgeval {
32
- private readonly internalClient;
33
- protected constructor(config?: JudgevalConfig);
44
+ private readonly _client;
45
+ private readonly _projectName;
46
+ private readonly _projectId;
47
+ private constructor();
34
48
  /**
35
49
  * Create a new Judgeval client instance.
36
50
  *
51
+ * Resolves the `projectName` to a `projectId` via the Judgment API.
52
+ *
37
53
  * @param config - Configuration options. Credentials default to environment variables.
38
54
  * @returns A new `Judgeval` instance.
39
55
  *
40
56
  * @example
41
57
  * ```typescript
42
- * const client = Judgeval.create({
58
+ * const client = await Judgeval.create({
59
+ * projectName: "my-project",
43
60
  * apiKey: "<your-api-key>",
44
61
  * organizationId: "<your-organization-id>",
45
62
  * });
46
63
  * ```
47
64
  */
48
- static create(config?: JudgevalConfig): Judgeval;
65
+ static create(config: JudgevalConfig): Promise<Judgeval>;
66
+ /**
67
+ * Create and activate an `OfflineTracer` for this project.
68
+ *
69
+ * Reuses the credentials supplied to this `Judgeval` instance. Each
70
+ * completed root span appends an `Example` to `dataset`, carrying
71
+ * the offline trace id and the static `exampleFields`.
72
+ *
73
+ * @example
74
+ * ```typescript
75
+ * const judgeval = await Judgeval.create({ projectName: "my-project" });
76
+ * const dataset: Example[] = [];
77
+ * const tracer = await judgeval.offlineTracer({
78
+ * dataset,
79
+ * exampleFields: { input: item.input, golden_output: item.goldenOutput },
80
+ * });
81
+ * ```
82
+ */
83
+ offlineTracer(options: JudgevalOfflineTracerOptions): Promise<OfflineTracer>;
84
+ /** Access dataset management (create, get, list). */
85
+ get datasets(): DatasetFactory;
86
+ /** Access evaluation (create evaluation runs). */
87
+ get evaluation(): EvaluationFactory;
49
88
  }
50
89
  //# sourceMappingURL=Judgeval.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"Judgeval.d.ts","sourceRoot":"","sources":["../src/Judgeval.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAoB;IAEnD,SAAS,aAAa,MAAM,GAAE,cAAmB;IAkBjD;;;;;;;;;;;;;OAaG;IACH,MAAM,CAAC,MAAM,CAAC,MAAM,GAAE,cAAmB,GAAG,QAAQ;CAGrD"}
1
+ {"version":3,"file":"Judgeval.d.ts","sourceRoot":"","sources":["../src/Judgeval.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,KAAK,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAEhF;;;;;GAKG;AACH,MAAM,MAAM,4BAA4B,GAAG,IAAI,CAC7C,mBAAmB,EACnB,aAAa,GAAG,QAAQ,GAAG,gBAAgB,GAAG,QAAQ,CACvD,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,WAAW,cAAc;IAC7B,iDAAiD;IACjD,WAAW,EAAE,MAAM,CAAC;IACpB,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gEAAgE;IAChE,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IACtC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAgB;IAE3C,OAAO;IAUP;;;;;;;;;;;;;;;;OAgBG;WACU,MAAM,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC;IAgC9D;;;;;;;;;;;;;;;;OAgBG;IACG,aAAa,CACjB,OAAO,EAAE,4BAA4B,GACpC,OAAO,CAAC,aAAa,CAAC;IAWzB,qDAAqD;IACrD,IAAI,QAAQ,IAAI,cAAc,CAE7B;IAED,kDAAkD;IAClD,IAAI,UAAU,IAAI,iBAAiB,CAMlC;CACF"}
@@ -0,0 +1,55 @@
1
+ import type { Example as APIExample } from "../internal/api/models/Example";
2
+ /**
3
+ * The wire format for examples: the fixed API fields plus arbitrary
4
+ * user-defined properties (input, actual_output, etc.).
5
+ */
6
+ export type ExampleDict = APIExample & Record<string, unknown>;
7
+ /**
8
+ * A single evaluation example with flexible key-value properties.
9
+ *
10
+ * Use `Example.create()` to construct an example with arbitrary fields
11
+ * such as `input`, `actualOutput`, `expectedOutput`, etc.
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * const example = Example.create({
16
+ * input: "What is the capital of France?",
17
+ * actual_output: "Paris is the capital of France.",
18
+ * expected_output: "Paris",
19
+ * });
20
+ *
21
+ * example.get("input"); // "What is the capital of France?"
22
+ * ```
23
+ */
24
+ export declare class Example {
25
+ readonly exampleId: string;
26
+ readonly createdAt: string;
27
+ readonly name: string | null;
28
+ private readonly _properties;
29
+ private constructor();
30
+ /**
31
+ * Create an example with the given properties.
32
+ *
33
+ * Any key-value pairs passed in `props` become accessible via `.get()`.
34
+ * Common keys: `input`, `actual_output`, `expected_output`, `retrieval_context`.
35
+ */
36
+ static create(props?: Record<string, unknown>): Example;
37
+ /** Known keys on the API Example interface that are not user properties. */
38
+ private static readonly META_KEYS;
39
+ /**
40
+ * Reconstruct an Example from an API response dict.
41
+ *
42
+ * Separates the fixed metadata fields (`example_id`, `created_at`, `name`)
43
+ * from user-defined properties.
44
+ */
45
+ static from(data: ExampleDict): Example;
46
+ /** Get a property by key. */
47
+ get(key: string): unknown;
48
+ /** Check if a property key exists. */
49
+ has(key: string): boolean;
50
+ /** Return a shallow copy of all custom properties. */
51
+ get properties(): Record<string, unknown>;
52
+ /** Serialize to the API wire format. */
53
+ toJSON(): ExampleDict;
54
+ }
55
+ //# sourceMappingURL=Example.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Example.d.ts","sourceRoot":"","sources":["../../src/data/Example.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,IAAI,UAAU,EAAE,MAAM,gCAAgC,CAAC;AAE5E;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAE/D;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,OAAO;IAClB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA0B;IAEtD,OAAO;IAYP;;;;;OAKG;IACH,MAAM,CAAC,MAAM,CAAC,KAAK,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO;IAM3D,4EAA4E;IAC5E,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAO9B;IAEH;;;;;OAKG;IACH,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO;IAevC,6BAA6B;IAC7B,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAIzB,sCAAsC;IACtC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAIzB,sDAAsD;IACtD,IAAI,UAAU,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAExC;IAED,wCAAwC;IACxC,MAAM,IAAI,WAAW;CAatB"}
@@ -0,0 +1,12 @@
1
+ import type { Example } from "./Example";
2
+ import type { ExperimentScorer } from "../internal/api/models/ExperimentScorer";
3
+ /** The combined result of running scorers against a single example. */
4
+ export interface ScoringResult {
5
+ /** True only if every scorer passed its threshold. */
6
+ success: boolean;
7
+ /** Per-scorer results, directly from the API. */
8
+ scorers: ExperimentScorer[];
9
+ /** The original example that was evaluated. */
10
+ example: Example;
11
+ }
12
+ //# sourceMappingURL=ScoringResult.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ScoringResult.d.ts","sourceRoot":"","sources":["../../src/data/ScoringResult.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yCAAyC,CAAC;AAEhF,uEAAuE;AACvE,MAAM,WAAW,aAAa;IAC5B,sDAAsD;IACtD,OAAO,EAAE,OAAO,CAAC;IACjB,iDAAiD;IACjD,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5B,+CAA+C;IAC/C,OAAO,EAAE,OAAO,CAAC;CAClB"}
@@ -0,0 +1,3 @@
1
+ export { Example, type ExampleDict } from "./Example";
2
+ export type { ScoringResult } from "./ScoringResult";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/data/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,WAAW,CAAC;AACtD,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,56 @@
1
+ import type { JudgmentApiClient } from "../internal/api/client";
2
+ import { Example } from "../data/Example";
3
+ /**
4
+ * A collection of {@link Example} objects stored on the Judgment platform.
5
+ *
6
+ * Datasets are retrieved via {@link DatasetFactory.get} or created via
7
+ * {@link DatasetFactory.create}. Once obtained, you can iterate over
8
+ * the examples directly, or add new ones.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * const dataset = await client.datasets.get("golden-set");
13
+ * for (const example of dataset) {
14
+ * console.log(example.get("input"));
15
+ * }
16
+ * ```
17
+ */
18
+ export declare class Dataset {
19
+ readonly name: string;
20
+ readonly projectId: string;
21
+ readonly projectName: string;
22
+ readonly datasetKind: string;
23
+ readonly examples: Example[];
24
+ private readonly _client;
25
+ constructor(opts: {
26
+ name: string;
27
+ projectId: string;
28
+ projectName: string;
29
+ datasetKind?: string;
30
+ examples?: Example[];
31
+ client?: JudgmentApiClient | null;
32
+ });
33
+ /**
34
+ * Upload examples to this dataset in batches.
35
+ *
36
+ * @param examples - The examples to upload.
37
+ * @param batchSize - Number of examples per batch request. Defaults to 100.
38
+ */
39
+ addExamples(examples: Example[], batchSize?: number): Promise<void>;
40
+ /**
41
+ * Load examples from a JSON file and add them to the dataset.
42
+ *
43
+ * Expects the file to contain a JSON array of objects, each with
44
+ * properties like `input`, `actual_output`, etc.
45
+ *
46
+ * @param filePath - Path to the JSON file.
47
+ * @param batchSize - Number of examples per batch request. Defaults to 100.
48
+ */
49
+ addFromJson(filePath: string, batchSize?: number): Promise<void>;
50
+ /** Number of examples in this dataset. */
51
+ get length(): number;
52
+ /** Iterate over examples. */
53
+ [Symbol.iterator](): Iterator<Example>;
54
+ toString(): string;
55
+ }
56
+ //# sourceMappingURL=Dataset.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Dataset.d.ts","sourceRoot":"","sources":["../../src/datasets/Dataset.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAE1C;;;;;;;;;;;;;;GAcG;AACH,qBAAa,OAAO;IAClB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,OAAO,EAAE,CAAC;IAC7B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA2B;gBAEvC,IAAI,EAAE;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;QACrB,MAAM,CAAC,EAAE,iBAAiB,GAAG,IAAI,CAAC;KACnC;IASD;;;;;OAKG;IACG,WAAW,CACf,QAAQ,EAAE,OAAO,EAAE,EACnB,SAAS,GAAE,MAAY,GACtB,OAAO,CAAC,IAAI,CAAC;IAahB;;;;;;;;OAQG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAe3E,0CAA0C;IAC1C,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED,6BAA6B;IAC7B,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,OAAO,CAAC;IAItC,QAAQ,IAAI,MAAM;CAGnB"}
@@ -0,0 +1,50 @@
1
+ import type { JudgmentApiClient } from "../internal/api/client";
2
+ import type { DatasetInfo } from "../internal/api/models/DatasetInfo";
3
+ import { Example } from "../data/Example";
4
+ import { Dataset } from "./Dataset";
5
+ /**
6
+ * Creates, retrieves, and lists datasets in your project.
7
+ *
8
+ * Access via `client.datasets`.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * const datasets = await client.datasets.list();
13
+ * const dataset = await client.datasets.get("golden-set");
14
+ * ```
15
+ */
16
+ export declare class DatasetFactory {
17
+ private readonly _client;
18
+ private readonly _projectId;
19
+ private readonly _projectName;
20
+ constructor(client: JudgmentApiClient, projectId: string | null, projectName: string);
21
+ /**
22
+ * Retrieve a dataset by name, including all its examples.
23
+ *
24
+ * @param name - The dataset name.
25
+ * @returns The dataset with all examples hydrated, or `null` if the project is unresolved.
26
+ */
27
+ get(name: string): Promise<Dataset | null>;
28
+ /**
29
+ * Create a new dataset, optionally pre-populated with examples.
30
+ *
31
+ * @param name - The dataset name.
32
+ * @param options.examples - Examples to upload after creation.
33
+ * @param options.overwrite - If `true`, overwrite an existing dataset with the same name.
34
+ * @param options.batchSize - Number of examples per batch upload request. Defaults to 100.
35
+ * @returns The newly created dataset, or `null` if the project is unresolved.
36
+ */
37
+ create(name: string, options?: {
38
+ examples?: Example[];
39
+ overwrite?: boolean;
40
+ batchSize?: number;
41
+ }): Promise<Dataset | null>;
42
+ /**
43
+ * List all datasets in the project.
44
+ *
45
+ * @returns An array of dataset metadata, or `null` if the project is unresolved.
46
+ */
47
+ list(): Promise<DatasetInfo[] | null>;
48
+ private _expectProjectId;
49
+ }
50
+ //# sourceMappingURL=DatasetFactory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DatasetFactory.d.ts","sourceRoot":"","sources":["../../src/datasets/DatasetFactory.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oCAAoC,CAAC;AACtE,OAAO,EAAE,OAAO,EAAoB,MAAM,iBAAiB,CAAC;AAC5D,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC;;;;;;;;;;GAUG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAgB;IAC3C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAGpC,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAOrB;;;;;OAKG;IACG,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC;IAyBhD;;;;;;;;OAQG;IACG,MAAM,CACV,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE;QACP,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;QACrB,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;KACf,GACL,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC;IA4B1B;;;;OAIG;IACH,IAAI,IAAI,OAAO,CAAC,WAAW,EAAE,GAAG,IAAI,CAAC;IAOrC,OAAO,CAAC,gBAAgB;CASzB"}
@@ -0,0 +1,4 @@
1
+ export { Dataset } from "./Dataset";
2
+ export { DatasetFactory } from "./DatasetFactory";
3
+ export type { DatasetInfo } from "../internal/api/models/DatasetInfo";
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/datasets/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAClD,YAAY,EAAE,WAAW,EAAE,MAAM,oCAAoC,CAAC"}
@@ -0,0 +1,65 @@
1
+ import type { JudgmentApiClient } from "../internal/api/client";
2
+ import type { Example } from "../data/Example";
3
+ import type { ScoringResult } from "../data/ScoringResult";
4
+ import { Judge } from "../judges/Judge";
5
+ export interface EvaluationRunOptions {
6
+ /** The examples to evaluate. */
7
+ examples: Example[];
8
+ /**
9
+ * Hosted scorer names (strings like `"faithfulness"`) **or**
10
+ * custom `Judge` instances. Cannot mix both.
11
+ */
12
+ scorers: string[] | Judge[];
13
+ /** A name for this run, visible in the dashboard. */
14
+ evalRunName: string;
15
+ /**
16
+ * If true, throws an error when any scorer fails its threshold.
17
+ * Useful in CI/CD pipelines.
18
+ */
19
+ assertTest?: boolean;
20
+ /**
21
+ * Maximum seconds to wait for hosted scorer results before timing out.
22
+ * @default 300
23
+ */
24
+ timeoutSeconds?: number;
25
+ }
26
+ /**
27
+ * Score a batch of examples using hosted scorers or custom judges.
28
+ *
29
+ * Two modes are supported:
30
+ *
31
+ * - **Hosted scorers** — pass scorer names as strings (e.g.
32
+ * `"faithfulness"`, `"answer_relevancy"`). Evaluation runs server-side
33
+ * on the Judgment platform.
34
+ * - **Custom judges** — pass {@link Judge} subclass instances for
35
+ * in-process evaluation with your own scoring logic.
36
+ *
37
+ * Create an `Evaluation` via `client.evaluation.create()`, then call
38
+ * `.run()` to execute scorers against your examples.
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * const evaluation = client.evaluation.create();
43
+ * const results = await evaluation.run({
44
+ * examples,
45
+ * scorers: ["faithfulness", "answer_relevancy"],
46
+ * evalRunName: "nightly-eval",
47
+ * });
48
+ * ```
49
+ */
50
+ export declare class Evaluation {
51
+ private readonly _local;
52
+ private readonly _hosted;
53
+ constructor(client: JudgmentApiClient, projectId: string | null, projectName: string);
54
+ /**
55
+ * Run scorers against your examples and return results.
56
+ *
57
+ * Pass **either** hosted scorer names (strings) **or** custom {@link Judge}
58
+ * instances. Mixing both in one call is not supported.
59
+ *
60
+ * @param options - Evaluation configuration including examples, scorers, and run name.
61
+ * @returns A list of {@link ScoringResult} objects, one per example.
62
+ */
63
+ run(options: EvaluationRunOptions): Promise<ScoringResult[]>;
64
+ }
65
+ //# sourceMappingURL=Evaluation.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Evaluation.d.ts","sourceRoot":"","sources":["../../src/evaluation/Evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAIxC,MAAM,WAAW,oBAAoB;IACnC,gCAAgC;IAChC,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB;;;OAGG;IACH,OAAO,EAAE,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;IAC5B,qDAAqD;IACrD,WAAW,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAuB;IAC9C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAG9C,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAMrB;;;;;;;;OAQG;IACH,GAAG,CAAC,OAAO,EAAE,oBAAoB,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;CA+C7D"}
@@ -0,0 +1,22 @@
1
+ import type { JudgmentApiClient } from "../internal/api/client";
2
+ import { Evaluation } from "./Evaluation";
3
+ /**
4
+ * Creates {@link Evaluation} instances for running batch scoring.
5
+ *
6
+ * Access via `client.evaluation`.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * const evaluation = client.evaluation.create();
11
+ * const results = await evaluation.run({ examples, scorers, evalRunName: "my-eval" });
12
+ * ```
13
+ */
14
+ export declare class EvaluationFactory {
15
+ private readonly _client;
16
+ private readonly _projectId;
17
+ private readonly _projectName;
18
+ constructor(client: JudgmentApiClient, projectId: string | null, projectName: string);
19
+ /** Create a new `Evaluation` bound to the current project. */
20
+ create(): Evaluation;
21
+ }
22
+ //# sourceMappingURL=EvaluationFactory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EvaluationFactory.d.ts","sourceRoot":"","sources":["../../src/evaluation/EvaluationFactory.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C;;;;;;;;;;GAUG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;IAC5C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAgB;IAC3C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAGpC,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAOrB,8DAA8D;IAC9D,MAAM,IAAI,UAAU;CAGrB"}
@@ -0,0 +1,27 @@
1
+ import type { JudgmentApiClient } from "../internal/api/client";
2
+ import type { ExampleEvaluationRun } from "../internal/api/models/ExampleEvaluationRun";
3
+ import type { ExperimentRunItem } from "../internal/api/models/ExperimentRunItem";
4
+ import type { Example } from "../data/Example";
5
+ import type { ScoringResult } from "../data/ScoringResult";
6
+ import type { Judge } from "../judges/Judge";
7
+ /**
8
+ * Abstract base for evaluation runners.
9
+ *
10
+ * Provides the shared run -> poll -> display flow.
11
+ * Subclasses implement `_buildPayload` and `_submit` for local vs hosted mode.
12
+ */
13
+ export declare abstract class EvaluatorRunner<S extends string | Judge> {
14
+ protected readonly _client: JudgmentApiClient;
15
+ protected readonly _projectId: string | null;
16
+ protected readonly _projectName: string;
17
+ constructor(client: JudgmentApiClient, projectId: string | null, projectName: string);
18
+ protected abstract _buildPayload(evalId: string, projectId: string, evalRunName: string, createdAt: string, examples: Example[], scorers: S[]): ExampleEvaluationRun;
19
+ protected abstract _submit(projectId: string, evalId: string, examples: Example[], scorers: S[], payload: ExampleEvaluationRun): Promise<number>;
20
+ protected _poll(projectId: string, evalId: string, expectedCount: number, timeoutSeconds: number): Promise<{
21
+ results: ExperimentRunItem[];
22
+ url: string;
23
+ }>;
24
+ protected _displayResults(examples: Example[], resultsData: ExperimentRunItem[], url: string, assertTest: boolean): ScoringResult[];
25
+ run(examples: Example[], scorers: S[], evalRunName: string, assertTest?: boolean, timeoutSeconds?: number): Promise<ScoringResult[]>;
26
+ }
27
+ //# sourceMappingURL=EvaluatorRunner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EvaluatorRunner.d.ts","sourceRoot":"","sources":["../../src/evaluation/EvaluatorRunner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,6CAA6C,CAAC;AACxF,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,0CAA0C,CAAC;AAClF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAI7C;;;;;GAKG;AACH,8BAAsB,eAAe,CAAC,CAAC,SAAS,MAAM,GAAG,KAAK;IAC5D,SAAS,CAAC,QAAQ,CAAC,OAAO,EAAE,iBAAiB,CAAC;IAC9C,SAAS,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAS,CAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;gBAGtC,MAAM,EAAE,iBAAiB,EACzB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,WAAW,EAAE,MAAM;IAOrB,SAAS,CAAC,QAAQ,CAAC,aAAa,CAC9B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,CAAC,EAAE,GACX,oBAAoB;IAEvB,SAAS,CAAC,QAAQ,CAAC,OAAO,CACxB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,CAAC,EAAE,EACZ,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;cAEF,KAAK,CACnB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,aAAa,EAAE,MAAM,EACrB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC;QAAE,OAAO,EAAE,iBAAiB,EAAE,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IA4BzD,SAAS,CAAC,eAAe,CACvB,QAAQ,EAAE,OAAO,EAAE,EACnB,WAAW,EAAE,iBAAiB,EAAE,EAChC,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,OAAO,GAClB,aAAa,EAAE;IAsEZ,GAAG,CACP,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,CAAC,EAAE,EACZ,WAAW,EAAE,MAAM,EACnB,UAAU,GAAE,OAAe,EAC3B,cAAc,GAAE,MAAY,GAC3B,OAAO,CAAC,aAAa,EAAE,CAAC;CA8C5B"}
@@ -0,0 +1,14 @@
1
+ import type { ExampleEvaluationRun } from "../internal/api/models/ExampleEvaluationRun";
2
+ import type { Example } from "../data/Example";
3
+ import { EvaluatorRunner } from "./EvaluatorRunner";
4
+ /**
5
+ * Evaluation runner for hosted (server-side) scorers.
6
+ *
7
+ * Submits scorer names to the Judgment platform's evaluation queue
8
+ * and polls for results. Used internally by {@link Evaluation}.
9
+ */
10
+ export declare class HostedEvaluatorRunner extends EvaluatorRunner<string> {
11
+ protected _buildPayload(evalId: string, projectId: string, evalRunName: string, createdAt: string, examples: Example[], scorers: string[]): ExampleEvaluationRun;
12
+ protected _submit(projectId: string, _evalId: string, examples: Example[], _scorers: string[], payload: ExampleEvaluationRun): Promise<number>;
13
+ }
14
+ //# sourceMappingURL=HostedEvaluatorRunner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HostedEvaluatorRunner.d.ts","sourceRoot":"","sources":["../../src/evaluation/HostedEvaluatorRunner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,6CAA6C,CAAC;AACxF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD;;;;;GAKG;AACH,qBAAa,qBAAsB,SAAQ,eAAe,CAAC,MAAM,CAAC;IAChE,SAAS,CAAC,aAAa,CACrB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,MAAM,EAAE,GAChB,oBAAoB;cAYP,OAAO,CACrB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,OAAO,EAAE,EACnB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;CAKnB"}
@@ -0,0 +1,16 @@
1
+ import type { ExampleEvaluationRun } from "../internal/api/models/ExampleEvaluationRun";
2
+ import type { Example } from "../data/Example";
3
+ import type { Judge } from "../judges/Judge";
4
+ import { EvaluatorRunner } from "./EvaluatorRunner";
5
+ /**
6
+ * Evaluation runner for custom (in-process) scorers.
7
+ *
8
+ * Runs all {@link Judge} instances locally against the provided examples,
9
+ * posts results to the Judgment platform, then polls for finalized scores.
10
+ * Used internally by {@link Evaluation}.
11
+ */
12
+ export declare class LocalEvaluatorRunner extends EvaluatorRunner<Judge> {
13
+ protected _buildPayload(evalId: string, projectId: string, evalRunName: string, createdAt: string, examples: Example[], _scorers: Judge[]): ExampleEvaluationRun;
14
+ protected _submit(projectId: string, _evalId: string, examples: Example[], scorers: Judge[], payload: ExampleEvaluationRun): Promise<number>;
15
+ }
16
+ //# sourceMappingURL=LocalEvaluatorRunner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LocalEvaluatorRunner.d.ts","sourceRoot":"","sources":["../../src/evaluation/LocalEvaluatorRunner.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,6CAA6C,CAAC;AAExF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAE7C,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AASpD;;;;;;GAMG;AACH,qBAAa,oBAAqB,SAAQ,eAAe,CAAC,KAAK,CAAC;IAC9D,SAAS,CAAC,aAAa,CACrB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,OAAO,EAAE,EACnB,QAAQ,EAAE,KAAK,EAAE,GAChB,oBAAoB;cAYP,OAAO,CACrB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,OAAO,EAAE,EACnB,OAAO,EAAE,KAAK,EAAE,EAChB,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;CAgFnB"}
@@ -0,0 +1,6 @@
1
+ export { Evaluation, type EvaluationRunOptions } from "./Evaluation";
2
+ export { EvaluationFactory } from "./EvaluationFactory";
3
+ export { EvaluatorRunner } from "./EvaluatorRunner";
4
+ export { LocalEvaluatorRunner } from "./LocalEvaluatorRunner";
5
+ export { HostedEvaluatorRunner } from "./HostedEvaluatorRunner";
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evaluation/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,KAAK,oBAAoB,EAAE,MAAM,cAAc,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,qBAAqB,EAAE,MAAM,yBAAyB,CAAC"}