@arizeai/phoenix-evals 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -0
- package/dist/esm/index.d.ts +2 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +2 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/createClassifier.d.ts.map +1 -1
- package/dist/esm/llm/createClassifier.js +2 -1
- package/dist/esm/llm/createClassifier.js.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +1 -2
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js +2 -1
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/esm/llm/generateClassification.d.ts +1 -2
- package/dist/esm/llm/generateClassification.d.ts.map +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/evals.d.ts +2 -4
- package/dist/esm/types/evals.d.ts.map +1 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +2 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/createClassifier.d.ts.map +1 -1
- package/dist/src/llm/createClassifier.js +13 -6
- package/dist/src/llm/createClassifier.js.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.d.ts +1 -2
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.js +14 -3
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/src/llm/generateClassification.d.ts +1 -2
- package/dist/src/llm/generateClassification.d.ts.map +1 -1
- package/dist/src/types/evals.d.ts +2 -4
- package/dist/src/types/evals.d.ts.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -0
- package/src/llm/createClassifier.ts +2 -1
- package/src/llm/createHallucinationEvaluator.ts +3 -1
- package/src/llm/generateClassification.ts +1 -1
- package/src/types/evals.ts +3 -2
package/dist/src/index.d.ts
CHANGED
package/dist/src/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC"}
|
package/dist/src/index.js
CHANGED
|
@@ -15,4 +15,6 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
__exportStar(require("./llm"), exports);
|
|
18
|
+
__exportStar(require("./template"), exports);
|
|
19
|
+
__exportStar(require("./types"), exports);
|
|
18
20
|
//# sourceMappingURL=index.js.map
|
package/dist/src/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,wCAAsB"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,wCAAsB;AACtB,6CAA2B;AAC3B,0CAAwB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createClassifier.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,oBAAoB,EACpB,WAAW,EACZ,MAAM,gBAAgB,CAAC;AAkBxB;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,SAAS,EACpD,IAAI,EAAE,oBAAoB,GACzB,WAAW,CAAC,UAAU,EAAE,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"createClassifier.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,oBAAoB,EACpB,WAAW,EACZ,MAAM,gBAAgB,CAAC;AAkBxB;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,SAAS,EACpD,IAAI,EAAE,oBAAoB,GACzB,WAAW,CAAC,UAAU,EAAE,SAAS,CAAC,CA8BpC"}
|
|
@@ -1,4 +1,15 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __rest = (this && this.__rest) || function (s, e) {
|
|
3
|
+
var t = {};
|
|
4
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
|
|
5
|
+
t[p] = s[p];
|
|
6
|
+
if (s != null && typeof Object.getOwnPropertySymbols === "function")
|
|
7
|
+
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
|
|
8
|
+
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
|
|
9
|
+
t[p[i]] = s[p[i]];
|
|
10
|
+
}
|
|
11
|
+
return t;
|
|
12
|
+
};
|
|
2
13
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
14
|
exports.createClassifier = createClassifier;
|
|
4
15
|
const generateClassification_1 = require("./generateClassification");
|
|
@@ -18,18 +29,14 @@ function choicesToLabels(choices) {
|
|
|
18
29
|
* A function that serves as a factory that will output a classification evaluator
|
|
19
30
|
*/
|
|
20
31
|
function createClassifier(args) {
|
|
21
|
-
const { model, choices, promptTemplate } = args;
|
|
32
|
+
const { model, choices, promptTemplate } = args, rest = __rest(args, ["model", "choices", "promptTemplate"]);
|
|
22
33
|
return async (args) => {
|
|
23
34
|
const templateVariables = Object.assign({}, args);
|
|
24
35
|
const prompt = (0, template_1.formatTemplate)({
|
|
25
36
|
template: promptTemplate,
|
|
26
37
|
variables: templateVariables,
|
|
27
38
|
});
|
|
28
|
-
const classification = await (0, generateClassification_1.generateClassification)({
|
|
29
|
-
model,
|
|
30
|
-
labels: choicesToLabels(choices),
|
|
31
|
-
prompt,
|
|
32
|
-
});
|
|
39
|
+
const classification = await (0, generateClassification_1.generateClassification)(Object.assign({ model, labels: choicesToLabels(choices), prompt }, rest));
|
|
33
40
|
// Post-process the classification result and map it to the choices
|
|
34
41
|
const score = choices[classification.label];
|
|
35
42
|
return Object.assign({ score }, classification);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createClassifier.js","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"createClassifier.js","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":";;;;;;;;;;;;;AA2BA,4CAgCC;AApDD,qEAAkE;AAClE,0CAA6C;AAE7C;;;GAGG;AACH,SAAS,eAAe,CACtB,OAAiC;IAEjC,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,MAA+B,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAC9B,IAA0B;IAE1B,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,KAAc,IAAI,EAAb,IAAI,UAAK,IAAI,EAAlD,sCAA2C,CAAO,CAAC;IAEzD,OAAO,KAAK,EACV,IAA2C,EAChB,EAAE;QAC7B,MAAM,iBAAiB,qBAClB,IAAI,CACR,CAAC;QAEF,MAAM,MAAM,GAAG,IAAA,yBAAc,EAAC;YAC5B,QAAQ,EAAE,cAAc;YACxB,SAAS,EAAE,iBAAiB;SAC7B,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,MAAM,IAAA,+CAAsB,kBACjD,KAAK,EACL,MAAM,EAAE,eAAe,CAAC,OAAO,CAAC,EAChC,MAAM,IACH,IAAI,EACP,CAAC;QAEH,mEAAmE;QACnE,MAAM,KAAK,GAAG,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAE5C,uBACE,KAAK,IACF,cAAc,EACjB;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { CreateClassifierArgs, EvaluatorFn } from "../types/evals";
|
|
2
|
-
interface HallucinationEvaluatorArgs extends Omit<CreateClassifierArgs, "promptTemplate" | "choices"> {
|
|
2
|
+
export interface HallucinationEvaluatorArgs extends Omit<CreateClassifierArgs, "promptTemplate" | "choices"> {
|
|
3
3
|
choices?: CreateClassifierArgs["choices"];
|
|
4
4
|
promptTemplate?: CreateClassifierArgs["promptTemplate"];
|
|
5
5
|
}
|
|
@@ -10,5 +10,4 @@ interface HallucinationEvaluatorArgs extends Omit<CreateClassifierArgs, "promptT
|
|
|
10
10
|
* @returns A function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
11
11
|
*/
|
|
12
12
|
export declare function createHallucinationEvaluator(args: HallucinationEvaluatorArgs): EvaluatorFn<string, string>;
|
|
13
|
-
export {};
|
|
14
13
|
//# sourceMappingURL=createHallucinationEvaluator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createHallucinationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAMnE,
|
|
1
|
+
{"version":3,"file":"createHallucinationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAMnE,MAAM,WAAW,0BACf,SAAQ,IAAI,CAAC,oBAAoB,EAAE,gBAAgB,GAAG,SAAS,CAAC;IAChE,OAAO,CAAC,EAAE,oBAAoB,CAAC,SAAS,CAAC,CAAC;IAC1C,cAAc,CAAC,EAAE,oBAAoB,CAAC,gBAAgB,CAAC,CAAC;CACzD;AACD;;;;;GAKG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,0BAA0B,GAC/B,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAa7B"}
|
|
@@ -1,4 +1,15 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __rest = (this && this.__rest) || function (s, e) {
|
|
3
|
+
var t = {};
|
|
4
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
|
|
5
|
+
t[p] = s[p];
|
|
6
|
+
if (s != null && typeof Object.getOwnPropertySymbols === "function")
|
|
7
|
+
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
|
|
8
|
+
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
|
|
9
|
+
t[p[i]] = s[p[i]];
|
|
10
|
+
}
|
|
11
|
+
return t;
|
|
12
|
+
};
|
|
2
13
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
14
|
exports.createHallucinationEvaluator = createHallucinationEvaluator;
|
|
4
15
|
const createClassifier_1 = require("./createClassifier");
|
|
@@ -10,9 +21,9 @@ const HALLUCINATION_TEMPLATE_1 = require("../default_templates/HALLUCINATION_TEM
|
|
|
10
21
|
* @returns A function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
11
22
|
*/
|
|
12
23
|
function createHallucinationEvaluator(args) {
|
|
13
|
-
const { choices = HALLUCINATION_TEMPLATE_1.HALLUCINATION_CHOICES, promptTemplate = HALLUCINATION_TEMPLATE_1.HALLUCINATION_TEMPLATE
|
|
14
|
-
const hallucinationEvaluatorFn = (0, createClassifier_1.createClassifier)(Object.assign(Object.assign({}, args), { promptTemplate,
|
|
15
|
-
choices }));
|
|
24
|
+
const { choices = HALLUCINATION_TEMPLATE_1.HALLUCINATION_CHOICES, promptTemplate = HALLUCINATION_TEMPLATE_1.HALLUCINATION_TEMPLATE } = args, rest = __rest(args, ["choices", "promptTemplate"]);
|
|
25
|
+
const hallucinationEvaluatorFn = (0, createClassifier_1.createClassifier)(Object.assign(Object.assign(Object.assign({}, args), { promptTemplate,
|
|
26
|
+
choices }), rest));
|
|
16
27
|
return hallucinationEvaluatorFn;
|
|
17
28
|
}
|
|
18
29
|
//# sourceMappingURL=createHallucinationEvaluator.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createHallucinationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"createHallucinationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":";;;;;;;;;;;;;AAkBA,oEAeC;AAjCD,yDAAsD;AAEtD,wFAGqD;AAOrD;;;;;GAKG;AACH,SAAgB,4BAA4B,CAC1C,IAAgC;IAEhC,MAAM,EACJ,OAAO,GAAG,8CAAqB,EAC/B,cAAc,GAAG,+CAAsB,KAErC,IAAI,EADH,IAAI,UACL,IAAI,EAJF,6BAIL,CAAO,CAAC;IACT,MAAM,wBAAwB,GAAG,IAAA,mCAAgB,gDAC5C,IAAI,KACP,cAAc;QACd,OAAO,KACJ,IAAI,EACP,CAAC;IACH,OAAO,wBAAwB,CAAC;AAClC,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { ClassificationResult, WithLLM } from "../types/evals";
|
|
2
2
|
import { WithTelemetry } from "../types/otel";
|
|
3
3
|
import type { WithPrompt } from "../types/prompts";
|
|
4
|
-
interface ClassifyArgs extends WithLLM, WithPrompt, WithTelemetry {
|
|
4
|
+
export interface ClassifyArgs extends WithLLM, WithPrompt, WithTelemetry {
|
|
5
5
|
/**
|
|
6
6
|
* The labels to classify the example into. E.x. ["correct", "incorrect"]
|
|
7
7
|
*/
|
|
@@ -19,5 +19,4 @@ interface ClassifyArgs extends WithLLM, WithPrompt, WithTelemetry {
|
|
|
19
19
|
* A function that leverages an llm to perform a classification
|
|
20
20
|
*/
|
|
21
21
|
export declare function generateClassification(args: ClassifyArgs): Promise<ClassificationResult>;
|
|
22
|
-
export {};
|
|
23
22
|
//# sourceMappingURL=generateClassification.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generateClassification.d.ts","sourceRoot":"","sources":["../../../src/llm/generateClassification.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAInD,
|
|
1
|
+
{"version":3,"file":"generateClassification.d.ts","sourceRoot":"","sources":["../../../src/llm/generateClassification.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAInD,MAAM,WAAW,YAAa,SAAQ,OAAO,EAAE,UAAU,EAAE,aAAa;IACtE;;OAEG;IACH,MAAM,EAAE,CAAC,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;IAC9B;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AACD;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,YAAY,GACjB,OAAO,CAAC,oBAAoB,CAAC,CAyB/B"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { LanguageModel } from "ai";
|
|
2
|
+
import { WithTelemetry } from "./otel";
|
|
2
3
|
/**
|
|
3
4
|
* The arguments for an evaluation
|
|
4
5
|
*/
|
|
@@ -54,10 +55,7 @@ export type ClassificationChoicesMap = Record<string, number>;
|
|
|
54
55
|
/**
|
|
55
56
|
* The arguments for creating a classification-based evaluator
|
|
56
57
|
*/
|
|
57
|
-
export interface CreateClassifierArgs {
|
|
58
|
-
/**
|
|
59
|
-
* The LLM to use for classification / evaluation
|
|
60
|
-
*/
|
|
58
|
+
export interface CreateClassifierArgs extends WithTelemetry {
|
|
61
59
|
model: LanguageModel;
|
|
62
60
|
/**
|
|
63
61
|
* The choices to classify the example into.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evals.d.ts","sourceRoot":"","sources":["../../../src/types/evals.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"evals.d.ts","sourceRoot":"","sources":["../../../src/types/evals.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC;;GAEG;AACH,MAAM,WAAW,cAAc,CAAC,UAAU,EAAE,SAAS;IACnD,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,CAAC,EAAE,UAAU,CAAC;IACtB,KAAK,CAAC,EAAE,SAAS,CAAC;IAClB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,aAAa,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB,CAAC,UAAU,EAAE,SAAS,CACtD,SAAQ,cAAc,CAAC,UAAU,EAAE,SAAS,CAAC,EAC3C,OAAO;CAAG;AAEd;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,MAAM,wBAAwB,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,oBAAqB,SAAQ,aAAa;IAIzD,KAAK,EAAE,aAAa,CAAC;IACrB;;;OAGG;IACH,OAAO,EAAE,wBAAwB,CAAC;IAClC;;OAEG;IACH,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,WAAW,CAAC,UAAU,EAAE,SAAS,IAAI,CAC/C,IAAI,EAAE,cAAc,CAAC,UAAU,EAAE,SAAS,CAAC,KACxC,OAAO,CAAC,gBAAgB,CAAC,CAAC"}
|