braintrust 0.0.175 → 0.0.176
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +79 -6
- package/dist/index.d.mts +47 -13
- package/dist/index.d.ts +47 -13
- package/dist/index.js +118 -43
- package/dist/index.mjs +117 -43
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -1236,7 +1236,7 @@ var require_package = __commonJS({
|
|
|
1236
1236
|
"package.json"(exports2, module2) {
|
|
1237
1237
|
module2.exports = {
|
|
1238
1238
|
name: "braintrust",
|
|
1239
|
-
version: "0.0.
|
|
1239
|
+
version: "0.0.176",
|
|
1240
1240
|
description: "SDK for integrating Braintrust",
|
|
1241
1241
|
repository: {
|
|
1242
1242
|
type: "git",
|
|
@@ -1311,7 +1311,7 @@ var require_package = __commonJS({
|
|
|
1311
1311
|
},
|
|
1312
1312
|
dependencies: {
|
|
1313
1313
|
"@ai-sdk/provider": "^1.0.1",
|
|
1314
|
-
"@braintrust/core": "0.0.
|
|
1314
|
+
"@braintrust/core": "0.0.70",
|
|
1315
1315
|
"@next/env": "^14.2.3",
|
|
1316
1316
|
"@vercel/functions": "^1.0.2",
|
|
1317
1317
|
ai: "^3.2.16",
|
|
@@ -4574,9 +4574,7 @@ var BarProgressReporter = class {
|
|
|
4574
4574
|
};
|
|
4575
4575
|
|
|
4576
4576
|
// src/framework.ts
|
|
4577
|
-
var import_chalk = __toESM(require("chalk"));
|
|
4578
4577
|
var import_core2 = require("@braintrust/core");
|
|
4579
|
-
var import_pluralize = __toESM(require("pluralize"));
|
|
4580
4578
|
|
|
4581
4579
|
// ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
|
|
4582
4580
|
function initialParams(fn) {
|
|
@@ -5682,6 +5680,8 @@ function waterfall(tasks, callback) {
|
|
|
5682
5680
|
var waterfall$1 = awaitify(waterfall);
|
|
5683
5681
|
|
|
5684
5682
|
// src/framework.ts
|
|
5683
|
+
var import_chalk = __toESM(require("chalk"));
|
|
5684
|
+
var import_pluralize = __toESM(require("pluralize"));
|
|
5685
5685
|
var EvalResultWithSummary = class {
|
|
5686
5686
|
constructor(summary, results) {
|
|
5687
5687
|
this.summary = summary;
|
|
@@ -5858,7 +5858,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
5858
5858
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
5859
5859
|
await rootSpan.traced(
|
|
5860
5860
|
async (span) => {
|
|
5861
|
-
const outputResult = evaluator.task(datum.input, {
|
|
5861
|
+
const outputResult = evaluator.task(datum.input, {
|
|
5862
|
+
meta,
|
|
5863
|
+
metadata,
|
|
5864
|
+
span
|
|
5865
|
+
});
|
|
5862
5866
|
if (outputResult instanceof Promise) {
|
|
5863
5867
|
output = await outputResult;
|
|
5864
5868
|
} else {
|
|
@@ -6720,6 +6724,7 @@ async function uploadHandleBundles({
|
|
|
6720
6724
|
function_data: {
|
|
6721
6725
|
type: "prompt"
|
|
6722
6726
|
},
|
|
6727
|
+
function_type: prompt.functionType,
|
|
6723
6728
|
prompt_data,
|
|
6724
6729
|
if_exists: prompt.ifExists
|
|
6725
6730
|
});
|
|
@@ -7068,12 +7073,14 @@ var Project = class {
|
|
|
7068
7073
|
id;
|
|
7069
7074
|
tools;
|
|
7070
7075
|
prompts;
|
|
7076
|
+
scorers;
|
|
7071
7077
|
constructor(args) {
|
|
7072
7078
|
_initializeSpanContext();
|
|
7073
7079
|
this.name = "name" in args ? args.name : void 0;
|
|
7074
7080
|
this.id = "id" in args ? args.id : void 0;
|
|
7075
7081
|
this.tools = new ToolBuilder(this);
|
|
7076
7082
|
this.prompts = new PromptBuilder(this);
|
|
7083
|
+
this.scorers = new ScorerBuilder(this);
|
|
7077
7084
|
}
|
|
7078
7085
|
};
|
|
7079
7086
|
var ToolBuilder = class {
|
|
@@ -7107,6 +7114,70 @@ var ToolBuilder = class {
|
|
|
7107
7114
|
return tool;
|
|
7108
7115
|
}
|
|
7109
7116
|
};
|
|
7117
|
+
var ScorerBuilder = class {
|
|
7118
|
+
constructor(project) {
|
|
7119
|
+
this.project = project;
|
|
7120
|
+
}
|
|
7121
|
+
taskCounter = 0;
|
|
7122
|
+
create(opts) {
|
|
7123
|
+
this.taskCounter++;
|
|
7124
|
+
let resolvedName = opts.name;
|
|
7125
|
+
if (!resolvedName && "handler" in opts) {
|
|
7126
|
+
resolvedName = opts.handler.name;
|
|
7127
|
+
}
|
|
7128
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
7129
|
+
resolvedName = `Scorer ${import_path4.default.basename(__filename)} ${this.taskCounter}`;
|
|
7130
|
+
}
|
|
7131
|
+
const slug = opts.slug ?? (0, import_slugify2.default)(resolvedName, { lower: true, strict: true });
|
|
7132
|
+
if ("handler" in opts) {
|
|
7133
|
+
const scorer = new CodeFunction(this.project, {
|
|
7134
|
+
...opts,
|
|
7135
|
+
name: resolvedName,
|
|
7136
|
+
slug,
|
|
7137
|
+
type: "scorer"
|
|
7138
|
+
});
|
|
7139
|
+
if (globalThis._lazy_load) {
|
|
7140
|
+
globalThis._evals.functions.push(
|
|
7141
|
+
scorer
|
|
7142
|
+
);
|
|
7143
|
+
}
|
|
7144
|
+
} else {
|
|
7145
|
+
const promptBlock = "messages" in opts ? {
|
|
7146
|
+
type: "chat",
|
|
7147
|
+
messages: opts.messages
|
|
7148
|
+
} : {
|
|
7149
|
+
type: "completion",
|
|
7150
|
+
content: opts.prompt
|
|
7151
|
+
};
|
|
7152
|
+
const promptData = {
|
|
7153
|
+
prompt: promptBlock,
|
|
7154
|
+
options: {
|
|
7155
|
+
model: opts.model,
|
|
7156
|
+
params: opts.params
|
|
7157
|
+
},
|
|
7158
|
+
parser: {
|
|
7159
|
+
type: "llm_classifier",
|
|
7160
|
+
use_cot: opts.useCot,
|
|
7161
|
+
choice_scores: opts.choiceScores
|
|
7162
|
+
}
|
|
7163
|
+
};
|
|
7164
|
+
const codePrompt = new CodePrompt(
|
|
7165
|
+
this.project,
|
|
7166
|
+
promptData,
|
|
7167
|
+
[],
|
|
7168
|
+
{
|
|
7169
|
+
...opts,
|
|
7170
|
+
name: resolvedName,
|
|
7171
|
+
slug
|
|
7172
|
+
},
|
|
7173
|
+
"scorer"
|
|
7174
|
+
);
|
|
7175
|
+
if (globalThis._lazy_load) {
|
|
7176
|
+
globalThis._evals.prompts.push(codePrompt);
|
|
7177
|
+
}
|
|
7178
|
+
}
|
|
7179
|
+
}
|
|
7180
|
+
};
|
|
7110
7181
|
var CodeFunction = class {
|
|
7111
7182
|
constructor(project, opts) {
|
|
7112
7183
|
this.project = project;
|
|
@@ -7146,8 +7217,9 @@ var CodePrompt = class {
|
|
|
7146
7217
|
ifExists;
|
|
7147
7218
|
description;
|
|
7148
7219
|
id;
|
|
7220
|
+
functionType;
|
|
7149
7221
|
toolFunctions;
|
|
7150
|
-
constructor(project, prompt, toolFunctions, opts) {
|
|
7222
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
7151
7223
|
this.project = project;
|
|
7152
7224
|
this.name = opts.name;
|
|
7153
7225
|
this.slug = opts.slug;
|
|
@@ -7156,6 +7228,7 @@ var CodePrompt = class {
|
|
|
7156
7228
|
this.ifExists = opts.ifExists;
|
|
7157
7229
|
this.description = opts.description;
|
|
7158
7230
|
this.id = opts.id;
|
|
7231
|
+
this.functionType = functionType;
|
|
7159
7232
|
}
|
|
7160
7233
|
};
|
|
7161
7234
|
var toolFunctionDefinitionSchema = import_zod4.z.object({
|
package/dist/index.d.mts
CHANGED
|
@@ -1572,13 +1572,20 @@ declare class Project {
|
|
|
1572
1572
|
readonly id?: string;
|
|
1573
1573
|
tools: ToolBuilder;
|
|
1574
1574
|
prompts: PromptBuilder;
|
|
1575
|
+
scorers: ScorerBuilder;
|
|
1575
1576
|
constructor(args: CreateProjectOpts);
|
|
1576
1577
|
}
|
|
1577
1578
|
declare class ToolBuilder {
|
|
1578
1579
|
private readonly project;
|
|
1579
1580
|
private taskCounter;
|
|
1580
1581
|
constructor(project: Project);
|
|
1581
|
-
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts:
|
|
1582
|
+
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: CodeOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
|
|
1583
|
+
}
|
|
1584
|
+
declare class ScorerBuilder {
|
|
1585
|
+
private readonly project;
|
|
1586
|
+
private taskCounter;
|
|
1587
|
+
constructor(project: Project);
|
|
1588
|
+
create<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>>(opts: ScorerOpts<Output, Input, Params, Returns, Fn>): void;
|
|
1582
1589
|
}
|
|
1583
1590
|
type Schema<Input, Output> = Partial<{
|
|
1584
1591
|
parameters: z.ZodSchema<Input>;
|
|
@@ -1590,9 +1597,21 @@ interface BaseFnOpts {
|
|
|
1590
1597
|
description: string;
|
|
1591
1598
|
ifExists: IfExists;
|
|
1592
1599
|
}
|
|
1593
|
-
type
|
|
1600
|
+
type CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
|
|
1594
1601
|
handler: Fn;
|
|
1595
1602
|
} & Schema<Params, Returns>;
|
|
1603
|
+
type ScorerPromptOpts = Partial<BaseFnOpts> & PromptOpts<false, false, false, false> & {
|
|
1604
|
+
useCot: boolean;
|
|
1605
|
+
choiceScores: Record<string, number>;
|
|
1606
|
+
};
|
|
1607
|
+
type ScorerArgs<Output, Input> = {
|
|
1608
|
+
output: Output;
|
|
1609
|
+
expected?: Output;
|
|
1610
|
+
input?: Input;
|
|
1611
|
+
metadata?: Record<string, unknown>;
|
|
1612
|
+
};
|
|
1613
|
+
type Exact<T, Shape> = T extends Shape ? Exclude<keyof T, keyof Shape> extends never ? T : never : never;
|
|
1614
|
+
type ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = CodeOpts<Exact<Params, ScorerArgs<Output, Input>>, Returns, Fn> | ScorerPromptOpts;
|
|
1596
1615
|
declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> {
|
|
1597
1616
|
readonly project: Project;
|
|
1598
1617
|
readonly handler: Fn;
|
|
@@ -1603,7 +1622,7 @@ declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Outp
|
|
|
1603
1622
|
readonly parameters?: z.ZodSchema<Input>;
|
|
1604
1623
|
readonly returns?: z.ZodSchema<Output>;
|
|
1605
1624
|
readonly ifExists?: IfExists;
|
|
1606
|
-
constructor(project: Project, opts: Omit<
|
|
1625
|
+
constructor(project: Project, opts: Omit<CodeOpts<Input, Output, Fn>, "name" | "slug"> & {
|
|
1607
1626
|
name: string;
|
|
1608
1627
|
slug: string;
|
|
1609
1628
|
type: FunctionType;
|
|
@@ -1619,11 +1638,12 @@ declare class CodePrompt {
|
|
|
1619
1638
|
readonly ifExists?: IfExists;
|
|
1620
1639
|
readonly description?: string;
|
|
1621
1640
|
readonly id?: string;
|
|
1641
|
+
readonly functionType?: FunctionType;
|
|
1622
1642
|
readonly toolFunctions: (SavedFunctionId | GenericCodeFunction)[];
|
|
1623
|
-
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false>, "name" | "slug"> & {
|
|
1643
|
+
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false, false, false>, "name" | "slug"> & {
|
|
1624
1644
|
name: string;
|
|
1625
1645
|
slug: string;
|
|
1626
|
-
});
|
|
1646
|
+
}, functionType?: FunctionType);
|
|
1627
1647
|
}
|
|
1628
1648
|
declare const toolFunctionDefinitionSchema: z.ZodObject<{
|
|
1629
1649
|
type: z.ZodLiteral<"function">;
|
|
@@ -1667,18 +1687,22 @@ interface PromptId {
|
|
|
1667
1687
|
interface PromptVersion {
|
|
1668
1688
|
version: TransactionId;
|
|
1669
1689
|
}
|
|
1690
|
+
interface PromptTools {
|
|
1691
|
+
tools: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1692
|
+
}
|
|
1693
|
+
interface PromptNoTrace {
|
|
1694
|
+
noTrace: boolean;
|
|
1695
|
+
}
|
|
1670
1696
|
type PromptContents = {
|
|
1671
1697
|
prompt: string;
|
|
1672
1698
|
} | {
|
|
1673
1699
|
messages: Message[];
|
|
1674
1700
|
};
|
|
1675
|
-
type PromptOpts<HasId extends boolean, HasVersion extends boolean> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1701
|
+
type PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1676
1702
|
name: string;
|
|
1677
|
-
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & PromptContents & {
|
|
1703
|
+
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & (HasTools extends true ? Partial<PromptTools> : {}) & (HasNoTrace extends true ? Partial<PromptNoTrace> : {}) & PromptContents & {
|
|
1678
1704
|
model: string;
|
|
1679
1705
|
params?: ModelParams;
|
|
1680
|
-
tools?: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1681
|
-
noTrace?: boolean;
|
|
1682
1706
|
};
|
|
1683
1707
|
declare class PromptBuilder {
|
|
1684
1708
|
private readonly project;
|
|
@@ -1707,7 +1731,14 @@ declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata ex
|
|
|
1707
1731
|
type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | Promise<EvalCase<Input, Expected, Metadata>[]> | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
|
|
1708
1732
|
type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
|
|
1709
1733
|
interface EvalHooks {
|
|
1734
|
+
/**
|
|
1735
|
+
* @deprecated Use `metadata` instead.
|
|
1736
|
+
*/
|
|
1710
1737
|
meta: (info: Record<string, unknown>) => void;
|
|
1738
|
+
/**
|
|
1739
|
+
* The metadata object for the current evaluation. You can mutate this object to add or remove metadata.
|
|
1740
|
+
*/
|
|
1741
|
+
metadata: Record<string, unknown>;
|
|
1711
1742
|
span: Span;
|
|
1712
1743
|
}
|
|
1713
1744
|
type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
@@ -1923,6 +1954,7 @@ type braintrust_BraintrustStreamChunk = BraintrustStreamChunk;
|
|
|
1923
1954
|
type braintrust_ChatPrompt = ChatPrompt;
|
|
1924
1955
|
type braintrust_CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> = CodeFunction<Input, Output, Fn>;
|
|
1925
1956
|
declare const braintrust_CodeFunction: typeof CodeFunction;
|
|
1957
|
+
type braintrust_CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = CodeOpts<Params, Returns, Fn>;
|
|
1926
1958
|
type braintrust_CodePrompt = CodePrompt;
|
|
1927
1959
|
declare const braintrust_CodePrompt: typeof CodePrompt;
|
|
1928
1960
|
declare const braintrust_CommentEvent: typeof CommentEvent;
|
|
@@ -1986,7 +2018,7 @@ type braintrust_Prompt<HasId extends boolean = true, HasVersion extends boolean
|
|
|
1986
2018
|
declare const braintrust_Prompt: typeof Prompt;
|
|
1987
2019
|
type braintrust_PromptBuilder = PromptBuilder;
|
|
1988
2020
|
declare const braintrust_PromptBuilder: typeof PromptBuilder;
|
|
1989
|
-
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean> = PromptOpts<HasId, HasVersion>;
|
|
2021
|
+
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = PromptOpts<HasId, HasVersion, HasTools, HasNoTrace>;
|
|
1990
2022
|
type braintrust_PromptRowWithId<HasId extends boolean = true, HasVersion extends boolean = true> = PromptRowWithId<HasId, HasVersion>;
|
|
1991
2023
|
type braintrust_ReadonlyAttachment = ReadonlyAttachment;
|
|
1992
2024
|
declare const braintrust_ReadonlyAttachment: typeof ReadonlyAttachment;
|
|
@@ -1995,6 +2027,9 @@ declare const braintrust_ReadonlyExperiment: typeof ReadonlyExperiment;
|
|
|
1995
2027
|
declare const braintrust_Reporter: typeof Reporter;
|
|
1996
2028
|
type braintrust_ReporterBody<EvalReport> = ReporterBody<EvalReport>;
|
|
1997
2029
|
type braintrust_ScoreSummary = ScoreSummary;
|
|
2030
|
+
type braintrust_ScorerBuilder = ScorerBuilder;
|
|
2031
|
+
declare const braintrust_ScorerBuilder: typeof ScorerBuilder;
|
|
2032
|
+
type braintrust_ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = ScorerOpts<Output, Input, Params, Returns, Fn>;
|
|
1998
2033
|
type braintrust_SerializedBraintrustState = SerializedBraintrustState;
|
|
1999
2034
|
type braintrust_SetCurrentArg = SetCurrentArg;
|
|
2000
2035
|
type braintrust_Span = Span;
|
|
@@ -2005,7 +2040,6 @@ type braintrust_StartSpanArgs = StartSpanArgs;
|
|
|
2005
2040
|
type braintrust_ToolBuilder = ToolBuilder;
|
|
2006
2041
|
declare const braintrust_ToolBuilder: typeof ToolBuilder;
|
|
2007
2042
|
type braintrust_ToolFunctionDefinition = ToolFunctionDefinition;
|
|
2008
|
-
type braintrust_ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = ToolOpts<Params, Returns, Fn>;
|
|
2009
2043
|
type braintrust_WithTransactionId<R> = WithTransactionId<R>;
|
|
2010
2044
|
declare const braintrust_X_CACHED_HEADER: typeof X_CACHED_HEADER;
|
|
2011
2045
|
declare const braintrust__exportsForTestingOnly: typeof _exportsForTestingOnly;
|
|
@@ -2053,7 +2087,7 @@ declare const braintrust_wrapOpenAI: typeof wrapOpenAI;
|
|
|
2053
2087
|
declare const braintrust_wrapOpenAIv4: typeof wrapOpenAIv4;
|
|
2054
2088
|
declare const braintrust_wrapTraced: typeof wrapTraced;
|
|
2055
2089
|
declare namespace braintrust {
|
|
2056
|
-
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type
|
|
2090
|
+
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, type braintrust_CodeOpts as CodeOpts, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, braintrust_ScorerBuilder as ScorerBuilder, type braintrust_ScorerOpts as ScorerOpts, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
|
|
2057
2091
|
}
|
|
2058
2092
|
|
|
2059
2093
|
/**
|
|
@@ -2109,4 +2143,4 @@ declare namespace braintrust {
|
|
|
2109
2143
|
* @module braintrust
|
|
2110
2144
|
*/
|
|
2111
2145
|
|
|
2112
|
-
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type
|
|
2146
|
+
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, type CodeOpts, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, ScorerBuilder, type ScorerOpts, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, toolFunctionDefinitionSchema, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
|
package/dist/index.d.ts
CHANGED
|
@@ -1572,13 +1572,20 @@ declare class Project {
|
|
|
1572
1572
|
readonly id?: string;
|
|
1573
1573
|
tools: ToolBuilder;
|
|
1574
1574
|
prompts: PromptBuilder;
|
|
1575
|
+
scorers: ScorerBuilder;
|
|
1575
1576
|
constructor(args: CreateProjectOpts);
|
|
1576
1577
|
}
|
|
1577
1578
|
declare class ToolBuilder {
|
|
1578
1579
|
private readonly project;
|
|
1579
1580
|
private taskCounter;
|
|
1580
1581
|
constructor(project: Project);
|
|
1581
|
-
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts:
|
|
1582
|
+
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: CodeOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
|
|
1583
|
+
}
|
|
1584
|
+
declare class ScorerBuilder {
|
|
1585
|
+
private readonly project;
|
|
1586
|
+
private taskCounter;
|
|
1587
|
+
constructor(project: Project);
|
|
1588
|
+
create<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>>(opts: ScorerOpts<Output, Input, Params, Returns, Fn>): void;
|
|
1582
1589
|
}
|
|
1583
1590
|
type Schema<Input, Output> = Partial<{
|
|
1584
1591
|
parameters: z.ZodSchema<Input>;
|
|
@@ -1590,9 +1597,21 @@ interface BaseFnOpts {
|
|
|
1590
1597
|
description: string;
|
|
1591
1598
|
ifExists: IfExists;
|
|
1592
1599
|
}
|
|
1593
|
-
type
|
|
1600
|
+
type CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
|
|
1594
1601
|
handler: Fn;
|
|
1595
1602
|
} & Schema<Params, Returns>;
|
|
1603
|
+
type ScorerPromptOpts = Partial<BaseFnOpts> & PromptOpts<false, false, false, false> & {
|
|
1604
|
+
useCot: boolean;
|
|
1605
|
+
choiceScores: Record<string, number>;
|
|
1606
|
+
};
|
|
1607
|
+
type ScorerArgs<Output, Input> = {
|
|
1608
|
+
output: Output;
|
|
1609
|
+
expected?: Output;
|
|
1610
|
+
input?: Input;
|
|
1611
|
+
metadata?: Record<string, unknown>;
|
|
1612
|
+
};
|
|
1613
|
+
type Exact<T, Shape> = T extends Shape ? Exclude<keyof T, keyof Shape> extends never ? T : never : never;
|
|
1614
|
+
type ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = CodeOpts<Exact<Params, ScorerArgs<Output, Input>>, Returns, Fn> | ScorerPromptOpts;
|
|
1596
1615
|
declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> {
|
|
1597
1616
|
readonly project: Project;
|
|
1598
1617
|
readonly handler: Fn;
|
|
@@ -1603,7 +1622,7 @@ declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Outp
|
|
|
1603
1622
|
readonly parameters?: z.ZodSchema<Input>;
|
|
1604
1623
|
readonly returns?: z.ZodSchema<Output>;
|
|
1605
1624
|
readonly ifExists?: IfExists;
|
|
1606
|
-
constructor(project: Project, opts: Omit<
|
|
1625
|
+
constructor(project: Project, opts: Omit<CodeOpts<Input, Output, Fn>, "name" | "slug"> & {
|
|
1607
1626
|
name: string;
|
|
1608
1627
|
slug: string;
|
|
1609
1628
|
type: FunctionType;
|
|
@@ -1619,11 +1638,12 @@ declare class CodePrompt {
|
|
|
1619
1638
|
readonly ifExists?: IfExists;
|
|
1620
1639
|
readonly description?: string;
|
|
1621
1640
|
readonly id?: string;
|
|
1641
|
+
readonly functionType?: FunctionType;
|
|
1622
1642
|
readonly toolFunctions: (SavedFunctionId | GenericCodeFunction)[];
|
|
1623
|
-
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false>, "name" | "slug"> & {
|
|
1643
|
+
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false, false, false>, "name" | "slug"> & {
|
|
1624
1644
|
name: string;
|
|
1625
1645
|
slug: string;
|
|
1626
|
-
});
|
|
1646
|
+
}, functionType?: FunctionType);
|
|
1627
1647
|
}
|
|
1628
1648
|
declare const toolFunctionDefinitionSchema: z.ZodObject<{
|
|
1629
1649
|
type: z.ZodLiteral<"function">;
|
|
@@ -1667,18 +1687,22 @@ interface PromptId {
|
|
|
1667
1687
|
interface PromptVersion {
|
|
1668
1688
|
version: TransactionId;
|
|
1669
1689
|
}
|
|
1690
|
+
interface PromptTools {
|
|
1691
|
+
tools: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1692
|
+
}
|
|
1693
|
+
interface PromptNoTrace {
|
|
1694
|
+
noTrace: boolean;
|
|
1695
|
+
}
|
|
1670
1696
|
type PromptContents = {
|
|
1671
1697
|
prompt: string;
|
|
1672
1698
|
} | {
|
|
1673
1699
|
messages: Message[];
|
|
1674
1700
|
};
|
|
1675
|
-
type PromptOpts<HasId extends boolean, HasVersion extends boolean> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1701
|
+
type PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1676
1702
|
name: string;
|
|
1677
|
-
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & PromptContents & {
|
|
1703
|
+
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & (HasTools extends true ? Partial<PromptTools> : {}) & (HasNoTrace extends true ? Partial<PromptNoTrace> : {}) & PromptContents & {
|
|
1678
1704
|
model: string;
|
|
1679
1705
|
params?: ModelParams;
|
|
1680
|
-
tools?: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1681
|
-
noTrace?: boolean;
|
|
1682
1706
|
};
|
|
1683
1707
|
declare class PromptBuilder {
|
|
1684
1708
|
private readonly project;
|
|
@@ -1707,7 +1731,14 @@ declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata ex
|
|
|
1707
1731
|
type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | Promise<EvalCase<Input, Expected, Metadata>[]> | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
|
|
1708
1732
|
type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
|
|
1709
1733
|
interface EvalHooks {
|
|
1734
|
+
/**
|
|
1735
|
+
* @deprecated Use `metadata` instead.
|
|
1736
|
+
*/
|
|
1710
1737
|
meta: (info: Record<string, unknown>) => void;
|
|
1738
|
+
/**
|
|
1739
|
+
* The metadata object for the current evaluation. You can mutate this object to add or remove metadata.
|
|
1740
|
+
*/
|
|
1741
|
+
metadata: Record<string, unknown>;
|
|
1711
1742
|
span: Span;
|
|
1712
1743
|
}
|
|
1713
1744
|
type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
@@ -1923,6 +1954,7 @@ type braintrust_BraintrustStreamChunk = BraintrustStreamChunk;
|
|
|
1923
1954
|
type braintrust_ChatPrompt = ChatPrompt;
|
|
1924
1955
|
type braintrust_CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> = CodeFunction<Input, Output, Fn>;
|
|
1925
1956
|
declare const braintrust_CodeFunction: typeof CodeFunction;
|
|
1957
|
+
type braintrust_CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = CodeOpts<Params, Returns, Fn>;
|
|
1926
1958
|
type braintrust_CodePrompt = CodePrompt;
|
|
1927
1959
|
declare const braintrust_CodePrompt: typeof CodePrompt;
|
|
1928
1960
|
declare const braintrust_CommentEvent: typeof CommentEvent;
|
|
@@ -1986,7 +2018,7 @@ type braintrust_Prompt<HasId extends boolean = true, HasVersion extends boolean
|
|
|
1986
2018
|
declare const braintrust_Prompt: typeof Prompt;
|
|
1987
2019
|
type braintrust_PromptBuilder = PromptBuilder;
|
|
1988
2020
|
declare const braintrust_PromptBuilder: typeof PromptBuilder;
|
|
1989
|
-
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean> = PromptOpts<HasId, HasVersion>;
|
|
2021
|
+
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = PromptOpts<HasId, HasVersion, HasTools, HasNoTrace>;
|
|
1990
2022
|
type braintrust_PromptRowWithId<HasId extends boolean = true, HasVersion extends boolean = true> = PromptRowWithId<HasId, HasVersion>;
|
|
1991
2023
|
type braintrust_ReadonlyAttachment = ReadonlyAttachment;
|
|
1992
2024
|
declare const braintrust_ReadonlyAttachment: typeof ReadonlyAttachment;
|
|
@@ -1995,6 +2027,9 @@ declare const braintrust_ReadonlyExperiment: typeof ReadonlyExperiment;
|
|
|
1995
2027
|
declare const braintrust_Reporter: typeof Reporter;
|
|
1996
2028
|
type braintrust_ReporterBody<EvalReport> = ReporterBody<EvalReport>;
|
|
1997
2029
|
type braintrust_ScoreSummary = ScoreSummary;
|
|
2030
|
+
type braintrust_ScorerBuilder = ScorerBuilder;
|
|
2031
|
+
declare const braintrust_ScorerBuilder: typeof ScorerBuilder;
|
|
2032
|
+
type braintrust_ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = ScorerOpts<Output, Input, Params, Returns, Fn>;
|
|
1998
2033
|
type braintrust_SerializedBraintrustState = SerializedBraintrustState;
|
|
1999
2034
|
type braintrust_SetCurrentArg = SetCurrentArg;
|
|
2000
2035
|
type braintrust_Span = Span;
|
|
@@ -2005,7 +2040,6 @@ type braintrust_StartSpanArgs = StartSpanArgs;
|
|
|
2005
2040
|
type braintrust_ToolBuilder = ToolBuilder;
|
|
2006
2041
|
declare const braintrust_ToolBuilder: typeof ToolBuilder;
|
|
2007
2042
|
type braintrust_ToolFunctionDefinition = ToolFunctionDefinition;
|
|
2008
|
-
type braintrust_ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = ToolOpts<Params, Returns, Fn>;
|
|
2009
2043
|
type braintrust_WithTransactionId<R> = WithTransactionId<R>;
|
|
2010
2044
|
declare const braintrust_X_CACHED_HEADER: typeof X_CACHED_HEADER;
|
|
2011
2045
|
declare const braintrust__exportsForTestingOnly: typeof _exportsForTestingOnly;
|
|
@@ -2053,7 +2087,7 @@ declare const braintrust_wrapOpenAI: typeof wrapOpenAI;
|
|
|
2053
2087
|
declare const braintrust_wrapOpenAIv4: typeof wrapOpenAIv4;
|
|
2054
2088
|
declare const braintrust_wrapTraced: typeof wrapTraced;
|
|
2055
2089
|
declare namespace braintrust {
|
|
2056
|
-
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type
|
|
2090
|
+
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, type braintrust_CodeOpts as CodeOpts, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, braintrust_ScorerBuilder as ScorerBuilder, type braintrust_ScorerOpts as ScorerOpts, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
|
|
2057
2091
|
}
|
|
2058
2092
|
|
|
2059
2093
|
/**
|
|
@@ -2109,4 +2143,4 @@ declare namespace braintrust {
|
|
|
2109
2143
|
* @module braintrust
|
|
2110
2144
|
*/
|
|
2111
2145
|
|
|
2112
|
-
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type
|
|
2146
|
+
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, type CodeOpts, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, ScorerBuilder, type ScorerOpts, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, toolFunctionDefinitionSchema, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
|
package/dist/index.js
CHANGED
|
@@ -51,6 +51,7 @@ __export(src_exports, {
|
|
|
51
51
|
ReadonlyAttachment: () => ReadonlyAttachment,
|
|
52
52
|
ReadonlyExperiment: () => ReadonlyExperiment,
|
|
53
53
|
Reporter: () => Reporter,
|
|
54
|
+
ScorerBuilder: () => ScorerBuilder,
|
|
54
55
|
SpanImpl: () => SpanImpl,
|
|
55
56
|
ToolBuilder: () => ToolBuilder,
|
|
56
57
|
X_CACHED_HEADER: () => X_CACHED_HEADER,
|
|
@@ -3834,6 +3835,7 @@ __export(exports_node_exports, {
|
|
|
3834
3835
|
ReadonlyAttachment: () => ReadonlyAttachment,
|
|
3835
3836
|
ReadonlyExperiment: () => ReadonlyExperiment,
|
|
3836
3837
|
Reporter: () => Reporter,
|
|
3838
|
+
ScorerBuilder: () => ScorerBuilder,
|
|
3837
3839
|
SpanImpl: () => SpanImpl,
|
|
3838
3840
|
ToolBuilder: () => ToolBuilder,
|
|
3839
3841
|
X_CACHED_HEADER: () => X_CACHED_HEADER,
|
|
@@ -3948,49 +3950,8 @@ async function invoke(args) {
|
|
|
3948
3950
|
}
|
|
3949
3951
|
|
|
3950
3952
|
// src/framework.ts
|
|
3951
|
-
var import_chalk = __toESM(require("chalk"));
|
|
3952
3953
|
var import_core2 = require("@braintrust/core");
|
|
3953
3954
|
|
|
3954
|
-
// src/progress.ts
|
|
3955
|
-
var cliProgress = __toESM(require("cli-progress"));
|
|
3956
|
-
var MAX_NAME_LENGTH = 40;
|
|
3957
|
-
function fitNameToSpaces(name, length) {
|
|
3958
|
-
const padded = name.padEnd(length);
|
|
3959
|
-
if (padded.length <= length) {
|
|
3960
|
-
return padded;
|
|
3961
|
-
}
|
|
3962
|
-
return padded.substring(0, length - 3) + "...";
|
|
3963
|
-
}
|
|
3964
|
-
var BarProgressReporter = class {
|
|
3965
|
-
multiBar;
|
|
3966
|
-
bars = {};
|
|
3967
|
-
constructor() {
|
|
3968
|
-
this.multiBar = new cliProgress.MultiBar(
|
|
3969
|
-
{
|
|
3970
|
-
clearOnComplete: false,
|
|
3971
|
-
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
3972
|
-
autopadding: true
|
|
3973
|
-
},
|
|
3974
|
-
cliProgress.Presets.shades_grey
|
|
3975
|
-
);
|
|
3976
|
-
}
|
|
3977
|
-
start(name, total) {
|
|
3978
|
-
const bar = this.multiBar.create(total, 0);
|
|
3979
|
-
this.bars[name] = bar;
|
|
3980
|
-
}
|
|
3981
|
-
stop() {
|
|
3982
|
-
this.multiBar.stop();
|
|
3983
|
-
}
|
|
3984
|
-
increment(name) {
|
|
3985
|
-
this.bars[name].increment({
|
|
3986
|
-
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
3987
|
-
});
|
|
3988
|
-
}
|
|
3989
|
-
};
|
|
3990
|
-
|
|
3991
|
-
// src/framework.ts
|
|
3992
|
-
var import_pluralize = __toESM(require("pluralize"));
|
|
3993
|
-
|
|
3994
3955
|
// ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
|
|
3995
3956
|
function initialParams(fn) {
|
|
3996
3957
|
return function(...args) {
|
|
@@ -5094,6 +5055,47 @@ function waterfall(tasks, callback) {
|
|
|
5094
5055
|
}
|
|
5095
5056
|
var waterfall$1 = awaitify(waterfall);
|
|
5096
5057
|
|
|
5058
|
+
// src/framework.ts
|
|
5059
|
+
var import_chalk = __toESM(require("chalk"));
|
|
5060
|
+
var import_pluralize = __toESM(require("pluralize"));
|
|
5061
|
+
|
|
5062
|
+
// src/progress.ts
|
|
5063
|
+
var cliProgress = __toESM(require("cli-progress"));
|
|
5064
|
+
var MAX_NAME_LENGTH = 40;
|
|
5065
|
+
function fitNameToSpaces(name, length) {
|
|
5066
|
+
const padded = name.padEnd(length);
|
|
5067
|
+
if (padded.length <= length) {
|
|
5068
|
+
return padded;
|
|
5069
|
+
}
|
|
5070
|
+
return padded.substring(0, length - 3) + "...";
|
|
5071
|
+
}
|
|
5072
|
+
var BarProgressReporter = class {
|
|
5073
|
+
multiBar;
|
|
5074
|
+
bars = {};
|
|
5075
|
+
constructor() {
|
|
5076
|
+
this.multiBar = new cliProgress.MultiBar(
|
|
5077
|
+
{
|
|
5078
|
+
clearOnComplete: false,
|
|
5079
|
+
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
5080
|
+
autopadding: true
|
|
5081
|
+
},
|
|
5082
|
+
cliProgress.Presets.shades_grey
|
|
5083
|
+
);
|
|
5084
|
+
}
|
|
5085
|
+
start(name, total) {
|
|
5086
|
+
const bar = this.multiBar.create(total, 0);
|
|
5087
|
+
this.bars[name] = bar;
|
|
5088
|
+
}
|
|
5089
|
+
stop() {
|
|
5090
|
+
this.multiBar.stop();
|
|
5091
|
+
}
|
|
5092
|
+
increment(name) {
|
|
5093
|
+
this.bars[name].increment({
|
|
5094
|
+
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
5095
|
+
});
|
|
5096
|
+
}
|
|
5097
|
+
};
|
|
5098
|
+
|
|
5097
5099
|
// src/framework.ts
|
|
5098
5100
|
function BaseExperiment(options = {}) {
|
|
5099
5101
|
return { _type: "BaseExperiment", ...options };
|
|
@@ -5339,7 +5341,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
5339
5341
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
5340
5342
|
await rootSpan.traced(
|
|
5341
5343
|
async (span) => {
|
|
5342
|
-
const outputResult = evaluator.task(datum.input, {
|
|
5344
|
+
const outputResult = evaluator.task(datum.input, {
|
|
5345
|
+
meta,
|
|
5346
|
+
metadata,
|
|
5347
|
+
span
|
|
5348
|
+
});
|
|
5343
5349
|
if (outputResult instanceof Promise) {
|
|
5344
5350
|
output = await outputResult;
|
|
5345
5351
|
} else {
|
|
@@ -5634,12 +5640,14 @@ var Project = class {
|
|
|
5634
5640
|
id;
|
|
5635
5641
|
tools;
|
|
5636
5642
|
prompts;
|
|
5643
|
+
scorers;
|
|
5637
5644
|
constructor(args) {
|
|
5638
5645
|
_initializeSpanContext();
|
|
5639
5646
|
this.name = "name" in args ? args.name : void 0;
|
|
5640
5647
|
this.id = "id" in args ? args.id : void 0;
|
|
5641
5648
|
this.tools = new ToolBuilder(this);
|
|
5642
5649
|
this.prompts = new PromptBuilder(this);
|
|
5650
|
+
this.scorers = new ScorerBuilder(this);
|
|
5643
5651
|
}
|
|
5644
5652
|
};
|
|
5645
5653
|
var ToolBuilder = class {
|
|
@@ -5673,6 +5681,70 @@ var ToolBuilder = class {
|
|
|
5673
5681
|
return tool;
|
|
5674
5682
|
}
|
|
5675
5683
|
};
|
|
5684
|
+
var ScorerBuilder = class {
|
|
5685
|
+
constructor(project) {
|
|
5686
|
+
this.project = project;
|
|
5687
|
+
}
|
|
5688
|
+
taskCounter = 0;
|
|
5689
|
+
create(opts) {
|
|
5690
|
+
this.taskCounter++;
|
|
5691
|
+
let resolvedName = opts.name;
|
|
5692
|
+
if (!resolvedName && "handler" in opts) {
|
|
5693
|
+
resolvedName = opts.handler.name;
|
|
5694
|
+
}
|
|
5695
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
5696
|
+
resolvedName = `Scorer ${import_path.default.basename(__filename)} ${this.taskCounter}`;
|
|
5697
|
+
}
|
|
5698
|
+
const slug = opts.slug ?? (0, import_slugify.default)(resolvedName, { lower: true, strict: true });
|
|
5699
|
+
if ("handler" in opts) {
|
|
5700
|
+
const scorer = new CodeFunction(this.project, {
|
|
5701
|
+
...opts,
|
|
5702
|
+
name: resolvedName,
|
|
5703
|
+
slug,
|
|
5704
|
+
type: "scorer"
|
|
5705
|
+
});
|
|
5706
|
+
if (globalThis._lazy_load) {
|
|
5707
|
+
globalThis._evals.functions.push(
|
|
5708
|
+
scorer
|
|
5709
|
+
);
|
|
5710
|
+
}
|
|
5711
|
+
} else {
|
|
5712
|
+
const promptBlock = "messages" in opts ? {
|
|
5713
|
+
type: "chat",
|
|
5714
|
+
messages: opts.messages
|
|
5715
|
+
} : {
|
|
5716
|
+
type: "completion",
|
|
5717
|
+
content: opts.prompt
|
|
5718
|
+
};
|
|
5719
|
+
const promptData = {
|
|
5720
|
+
prompt: promptBlock,
|
|
5721
|
+
options: {
|
|
5722
|
+
model: opts.model,
|
|
5723
|
+
params: opts.params
|
|
5724
|
+
},
|
|
5725
|
+
parser: {
|
|
5726
|
+
type: "llm_classifier",
|
|
5727
|
+
use_cot: opts.useCot,
|
|
5728
|
+
choice_scores: opts.choiceScores
|
|
5729
|
+
}
|
|
5730
|
+
};
|
|
5731
|
+
const codePrompt = new CodePrompt(
|
|
5732
|
+
this.project,
|
|
5733
|
+
promptData,
|
|
5734
|
+
[],
|
|
5735
|
+
{
|
|
5736
|
+
...opts,
|
|
5737
|
+
name: resolvedName,
|
|
5738
|
+
slug
|
|
5739
|
+
},
|
|
5740
|
+
"scorer"
|
|
5741
|
+
);
|
|
5742
|
+
if (globalThis._lazy_load) {
|
|
5743
|
+
globalThis._evals.prompts.push(codePrompt);
|
|
5744
|
+
}
|
|
5745
|
+
}
|
|
5746
|
+
}
|
|
5747
|
+
};
|
|
5676
5748
|
var CodeFunction = class {
|
|
5677
5749
|
constructor(project, opts) {
|
|
5678
5750
|
this.project = project;
|
|
@@ -5712,8 +5784,9 @@ var CodePrompt = class {
|
|
|
5712
5784
|
ifExists;
|
|
5713
5785
|
description;
|
|
5714
5786
|
id;
|
|
5787
|
+
functionType;
|
|
5715
5788
|
toolFunctions;
|
|
5716
|
-
constructor(project, prompt, toolFunctions, opts) {
|
|
5789
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
5717
5790
|
this.project = project;
|
|
5718
5791
|
this.name = opts.name;
|
|
5719
5792
|
this.slug = opts.slug;
|
|
@@ -5722,6 +5795,7 @@ var CodePrompt = class {
|
|
|
5722
5795
|
this.ifExists = opts.ifExists;
|
|
5723
5796
|
this.description = opts.description;
|
|
5724
5797
|
this.id = opts.id;
|
|
5798
|
+
this.functionType = functionType;
|
|
5725
5799
|
}
|
|
5726
5800
|
};
|
|
5727
5801
|
var toolFunctionDefinitionSchema = import_zod3.z.object({
|
|
@@ -6456,6 +6530,7 @@ var src_default = exports_node_exports;
|
|
|
6456
6530
|
ReadonlyAttachment,
|
|
6457
6531
|
ReadonlyExperiment,
|
|
6458
6532
|
Reporter,
|
|
6533
|
+
ScorerBuilder,
|
|
6459
6534
|
SpanImpl,
|
|
6460
6535
|
ToolBuilder,
|
|
6461
6536
|
X_CACHED_HEADER,
|
package/dist/index.mjs
CHANGED
|
@@ -3770,6 +3770,7 @@ __export(exports_node_exports, {
|
|
|
3770
3770
|
ReadonlyAttachment: () => ReadonlyAttachment,
|
|
3771
3771
|
ReadonlyExperiment: () => ReadonlyExperiment,
|
|
3772
3772
|
Reporter: () => Reporter,
|
|
3773
|
+
ScorerBuilder: () => ScorerBuilder,
|
|
3773
3774
|
SpanImpl: () => SpanImpl,
|
|
3774
3775
|
ToolBuilder: () => ToolBuilder,
|
|
3775
3776
|
X_CACHED_HEADER: () => X_CACHED_HEADER,
|
|
@@ -3886,49 +3887,8 @@ async function invoke(args) {
|
|
|
3886
3887
|
}
|
|
3887
3888
|
|
|
3888
3889
|
// src/framework.ts
|
|
3889
|
-
import chalk from "chalk";
|
|
3890
3890
|
import { SpanTypeAttribute as SpanTypeAttribute2, mergeDicts as mergeDicts2 } from "@braintrust/core";
|
|
3891
3891
|
|
|
3892
|
-
// src/progress.ts
|
|
3893
|
-
import * as cliProgress from "cli-progress";
|
|
3894
|
-
var MAX_NAME_LENGTH = 40;
|
|
3895
|
-
function fitNameToSpaces(name, length) {
|
|
3896
|
-
const padded = name.padEnd(length);
|
|
3897
|
-
if (padded.length <= length) {
|
|
3898
|
-
return padded;
|
|
3899
|
-
}
|
|
3900
|
-
return padded.substring(0, length - 3) + "...";
|
|
3901
|
-
}
|
|
3902
|
-
var BarProgressReporter = class {
|
|
3903
|
-
multiBar;
|
|
3904
|
-
bars = {};
|
|
3905
|
-
constructor() {
|
|
3906
|
-
this.multiBar = new cliProgress.MultiBar(
|
|
3907
|
-
{
|
|
3908
|
-
clearOnComplete: false,
|
|
3909
|
-
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
3910
|
-
autopadding: true
|
|
3911
|
-
},
|
|
3912
|
-
cliProgress.Presets.shades_grey
|
|
3913
|
-
);
|
|
3914
|
-
}
|
|
3915
|
-
start(name, total) {
|
|
3916
|
-
const bar = this.multiBar.create(total, 0);
|
|
3917
|
-
this.bars[name] = bar;
|
|
3918
|
-
}
|
|
3919
|
-
stop() {
|
|
3920
|
-
this.multiBar.stop();
|
|
3921
|
-
}
|
|
3922
|
-
increment(name) {
|
|
3923
|
-
this.bars[name].increment({
|
|
3924
|
-
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
3925
|
-
});
|
|
3926
|
-
}
|
|
3927
|
-
};
|
|
3928
|
-
|
|
3929
|
-
// src/framework.ts
|
|
3930
|
-
import pluralize from "pluralize";
|
|
3931
|
-
|
|
3932
3892
|
// ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
|
|
3933
3893
|
function initialParams(fn) {
|
|
3934
3894
|
return function(...args) {
|
|
@@ -5032,6 +4992,47 @@ function waterfall(tasks, callback) {
|
|
|
5032
4992
|
}
|
|
5033
4993
|
var waterfall$1 = awaitify(waterfall);
|
|
5034
4994
|
|
|
4995
|
+
// src/framework.ts
|
|
4996
|
+
import chalk from "chalk";
|
|
4997
|
+
import pluralize from "pluralize";
|
|
4998
|
+
|
|
4999
|
+
// src/progress.ts
|
|
5000
|
+
import * as cliProgress from "cli-progress";
|
|
5001
|
+
var MAX_NAME_LENGTH = 40;
|
|
5002
|
+
function fitNameToSpaces(name, length) {
|
|
5003
|
+
const padded = name.padEnd(length);
|
|
5004
|
+
if (padded.length <= length) {
|
|
5005
|
+
return padded;
|
|
5006
|
+
}
|
|
5007
|
+
return padded.substring(0, length - 3) + "...";
|
|
5008
|
+
}
|
|
5009
|
+
var BarProgressReporter = class {
|
|
5010
|
+
multiBar;
|
|
5011
|
+
bars = {};
|
|
5012
|
+
constructor() {
|
|
5013
|
+
this.multiBar = new cliProgress.MultiBar(
|
|
5014
|
+
{
|
|
5015
|
+
clearOnComplete: false,
|
|
5016
|
+
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
5017
|
+
autopadding: true
|
|
5018
|
+
},
|
|
5019
|
+
cliProgress.Presets.shades_grey
|
|
5020
|
+
);
|
|
5021
|
+
}
|
|
5022
|
+
start(name, total) {
|
|
5023
|
+
const bar = this.multiBar.create(total, 0);
|
|
5024
|
+
this.bars[name] = bar;
|
|
5025
|
+
}
|
|
5026
|
+
stop() {
|
|
5027
|
+
this.multiBar.stop();
|
|
5028
|
+
}
|
|
5029
|
+
increment(name) {
|
|
5030
|
+
this.bars[name].increment({
|
|
5031
|
+
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
5032
|
+
});
|
|
5033
|
+
}
|
|
5034
|
+
};
|
|
5035
|
+
|
|
5035
5036
|
// src/framework.ts
|
|
5036
5037
|
function BaseExperiment(options = {}) {
|
|
5037
5038
|
return { _type: "BaseExperiment", ...options };
|
|
@@ -5277,7 +5278,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
5277
5278
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
5278
5279
|
await rootSpan.traced(
|
|
5279
5280
|
async (span) => {
|
|
5280
|
-
const outputResult = evaluator.task(datum.input, {
|
|
5281
|
+
const outputResult = evaluator.task(datum.input, {
|
|
5282
|
+
meta,
|
|
5283
|
+
metadata,
|
|
5284
|
+
span
|
|
5285
|
+
});
|
|
5281
5286
|
if (outputResult instanceof Promise) {
|
|
5282
5287
|
output = await outputResult;
|
|
5283
5288
|
} else {
|
|
@@ -5572,12 +5577,14 @@ var Project = class {
|
|
|
5572
5577
|
id;
|
|
5573
5578
|
tools;
|
|
5574
5579
|
prompts;
|
|
5580
|
+
scorers;
|
|
5575
5581
|
constructor(args) {
|
|
5576
5582
|
_initializeSpanContext();
|
|
5577
5583
|
this.name = "name" in args ? args.name : void 0;
|
|
5578
5584
|
this.id = "id" in args ? args.id : void 0;
|
|
5579
5585
|
this.tools = new ToolBuilder(this);
|
|
5580
5586
|
this.prompts = new PromptBuilder(this);
|
|
5587
|
+
this.scorers = new ScorerBuilder(this);
|
|
5581
5588
|
}
|
|
5582
5589
|
};
|
|
5583
5590
|
var ToolBuilder = class {
|
|
@@ -5611,6 +5618,70 @@ var ToolBuilder = class {
|
|
|
5611
5618
|
return tool;
|
|
5612
5619
|
}
|
|
5613
5620
|
};
|
|
5621
|
+
var ScorerBuilder = class {
|
|
5622
|
+
constructor(project) {
|
|
5623
|
+
this.project = project;
|
|
5624
|
+
}
|
|
5625
|
+
taskCounter = 0;
|
|
5626
|
+
create(opts) {
|
|
5627
|
+
this.taskCounter++;
|
|
5628
|
+
let resolvedName = opts.name;
|
|
5629
|
+
if (!resolvedName && "handler" in opts) {
|
|
5630
|
+
resolvedName = opts.handler.name;
|
|
5631
|
+
}
|
|
5632
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
5633
|
+
resolvedName = `Scorer ${path2.basename(__filename)} ${this.taskCounter}`;
|
|
5634
|
+
}
|
|
5635
|
+
const slug = opts.slug ?? slugifyLib(resolvedName, { lower: true, strict: true });
|
|
5636
|
+
if ("handler" in opts) {
|
|
5637
|
+
const scorer = new CodeFunction(this.project, {
|
|
5638
|
+
...opts,
|
|
5639
|
+
name: resolvedName,
|
|
5640
|
+
slug,
|
|
5641
|
+
type: "scorer"
|
|
5642
|
+
});
|
|
5643
|
+
if (globalThis._lazy_load) {
|
|
5644
|
+
globalThis._evals.functions.push(
|
|
5645
|
+
scorer
|
|
5646
|
+
);
|
|
5647
|
+
}
|
|
5648
|
+
} else {
|
|
5649
|
+
const promptBlock = "messages" in opts ? {
|
|
5650
|
+
type: "chat",
|
|
5651
|
+
messages: opts.messages
|
|
5652
|
+
} : {
|
|
5653
|
+
type: "completion",
|
|
5654
|
+
content: opts.prompt
|
|
5655
|
+
};
|
|
5656
|
+
const promptData = {
|
|
5657
|
+
prompt: promptBlock,
|
|
5658
|
+
options: {
|
|
5659
|
+
model: opts.model,
|
|
5660
|
+
params: opts.params
|
|
5661
|
+
},
|
|
5662
|
+
parser: {
|
|
5663
|
+
type: "llm_classifier",
|
|
5664
|
+
use_cot: opts.useCot,
|
|
5665
|
+
choice_scores: opts.choiceScores
|
|
5666
|
+
}
|
|
5667
|
+
};
|
|
5668
|
+
const codePrompt = new CodePrompt(
|
|
5669
|
+
this.project,
|
|
5670
|
+
promptData,
|
|
5671
|
+
[],
|
|
5672
|
+
{
|
|
5673
|
+
...opts,
|
|
5674
|
+
name: resolvedName,
|
|
5675
|
+
slug
|
|
5676
|
+
},
|
|
5677
|
+
"scorer"
|
|
5678
|
+
);
|
|
5679
|
+
if (globalThis._lazy_load) {
|
|
5680
|
+
globalThis._evals.prompts.push(codePrompt);
|
|
5681
|
+
}
|
|
5682
|
+
}
|
|
5683
|
+
}
|
|
5684
|
+
};
|
|
5614
5685
|
var CodeFunction = class {
|
|
5615
5686
|
constructor(project, opts) {
|
|
5616
5687
|
this.project = project;
|
|
@@ -5650,8 +5721,9 @@ var CodePrompt = class {
|
|
|
5650
5721
|
ifExists;
|
|
5651
5722
|
description;
|
|
5652
5723
|
id;
|
|
5724
|
+
functionType;
|
|
5653
5725
|
toolFunctions;
|
|
5654
|
-
constructor(project, prompt, toolFunctions, opts) {
|
|
5726
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
5655
5727
|
this.project = project;
|
|
5656
5728
|
this.name = opts.name;
|
|
5657
5729
|
this.slug = opts.slug;
|
|
@@ -5660,6 +5732,7 @@ var CodePrompt = class {
|
|
|
5660
5732
|
this.ifExists = opts.ifExists;
|
|
5661
5733
|
this.description = opts.description;
|
|
5662
5734
|
this.id = opts.id;
|
|
5735
|
+
this.functionType = functionType;
|
|
5663
5736
|
}
|
|
5664
5737
|
};
|
|
5665
5738
|
var toolFunctionDefinitionSchema = z3.object({
|
|
@@ -6393,6 +6466,7 @@ export {
|
|
|
6393
6466
|
ReadonlyAttachment,
|
|
6394
6467
|
ReadonlyExperiment,
|
|
6395
6468
|
Reporter,
|
|
6469
|
+
ScorerBuilder,
|
|
6396
6470
|
SpanImpl,
|
|
6397
6471
|
ToolBuilder,
|
|
6398
6472
|
X_CACHED_HEADER,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "braintrust",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.176",
|
|
4
4
|
"description": "SDK for integrating Braintrust",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
},
|
|
76
76
|
"dependencies": {
|
|
77
77
|
"@ai-sdk/provider": "^1.0.1",
|
|
78
|
-
"@braintrust/core": "0.0.
|
|
78
|
+
"@braintrust/core": "0.0.70",
|
|
79
79
|
"@next/env": "^14.2.3",
|
|
80
80
|
"@vercel/functions": "^1.0.2",
|
|
81
81
|
"ai": "^3.2.16",
|