braintrust 0.0.175 → 0.0.176

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1236,7 +1236,7 @@ var require_package = __commonJS({
1236
1236
  "package.json"(exports2, module2) {
1237
1237
  module2.exports = {
1238
1238
  name: "braintrust",
1239
- version: "0.0.175",
1239
+ version: "0.0.176",
1240
1240
  description: "SDK for integrating Braintrust",
1241
1241
  repository: {
1242
1242
  type: "git",
@@ -1311,7 +1311,7 @@ var require_package = __commonJS({
1311
1311
  },
1312
1312
  dependencies: {
1313
1313
  "@ai-sdk/provider": "^1.0.1",
1314
- "@braintrust/core": "0.0.69",
1314
+ "@braintrust/core": "0.0.70",
1315
1315
  "@next/env": "^14.2.3",
1316
1316
  "@vercel/functions": "^1.0.2",
1317
1317
  ai: "^3.2.16",
@@ -4574,9 +4574,7 @@ var BarProgressReporter = class {
4574
4574
  };
4575
4575
 
4576
4576
  // src/framework.ts
4577
- var import_chalk = __toESM(require("chalk"));
4578
4577
  var import_core2 = require("@braintrust/core");
4579
- var import_pluralize = __toESM(require("pluralize"));
4580
4578
 
4581
4579
  // ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
4582
4580
  function initialParams(fn) {
@@ -5682,6 +5680,8 @@ function waterfall(tasks, callback) {
5682
5680
  var waterfall$1 = awaitify(waterfall);
5683
5681
 
5684
5682
  // src/framework.ts
5683
+ var import_chalk = __toESM(require("chalk"));
5684
+ var import_pluralize = __toESM(require("pluralize"));
5685
5685
  var EvalResultWithSummary = class {
5686
5686
  constructor(summary, results) {
5687
5687
  this.summary = summary;
@@ -5858,7 +5858,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
5858
5858
  const meta = (o) => metadata = { ...metadata, ...o };
5859
5859
  await rootSpan.traced(
5860
5860
  async (span) => {
5861
- const outputResult = evaluator.task(datum.input, { meta, span });
5861
+ const outputResult = evaluator.task(datum.input, {
5862
+ meta,
5863
+ metadata,
5864
+ span
5865
+ });
5862
5866
  if (outputResult instanceof Promise) {
5863
5867
  output = await outputResult;
5864
5868
  } else {
@@ -6720,6 +6724,7 @@ async function uploadHandleBundles({
6720
6724
  function_data: {
6721
6725
  type: "prompt"
6722
6726
  },
6727
+ function_type: prompt.functionType,
6723
6728
  prompt_data,
6724
6729
  if_exists: prompt.ifExists
6725
6730
  });
@@ -7068,12 +7073,14 @@ var Project = class {
7068
7073
  id;
7069
7074
  tools;
7070
7075
  prompts;
7076
+ scorers;
7071
7077
  constructor(args) {
7072
7078
  _initializeSpanContext();
7073
7079
  this.name = "name" in args ? args.name : void 0;
7074
7080
  this.id = "id" in args ? args.id : void 0;
7075
7081
  this.tools = new ToolBuilder(this);
7076
7082
  this.prompts = new PromptBuilder(this);
7083
+ this.scorers = new ScorerBuilder(this);
7077
7084
  }
7078
7085
  };
7079
7086
  var ToolBuilder = class {
@@ -7107,6 +7114,70 @@ var ToolBuilder = class {
7107
7114
  return tool;
7108
7115
  }
7109
7116
  };
7117
+ var ScorerBuilder = class {
7118
+ constructor(project) {
7119
+ this.project = project;
7120
+ }
7121
+ taskCounter = 0;
7122
+ create(opts) {
7123
+ this.taskCounter++;
7124
+ let resolvedName = opts.name;
7125
+ if (!resolvedName && "handler" in opts) {
7126
+ resolvedName = opts.handler.name;
7127
+ }
7128
+ if (!resolvedName || resolvedName.trim().length === 0) {
7129
+ resolvedName = `Scorer ${import_path4.default.basename(__filename)} ${this.taskCounter}`;
7130
+ }
7131
+ const slug = opts.slug ?? (0, import_slugify2.default)(resolvedName, { lower: true, strict: true });
7132
+ if ("handler" in opts) {
7133
+ const scorer = new CodeFunction(this.project, {
7134
+ ...opts,
7135
+ name: resolvedName,
7136
+ slug,
7137
+ type: "scorer"
7138
+ });
7139
+ if (globalThis._lazy_load) {
7140
+ globalThis._evals.functions.push(
7141
+ scorer
7142
+ );
7143
+ }
7144
+ } else {
7145
+ const promptBlock = "messages" in opts ? {
7146
+ type: "chat",
7147
+ messages: opts.messages
7148
+ } : {
7149
+ type: "completion",
7150
+ content: opts.prompt
7151
+ };
7152
+ const promptData = {
7153
+ prompt: promptBlock,
7154
+ options: {
7155
+ model: opts.model,
7156
+ params: opts.params
7157
+ },
7158
+ parser: {
7159
+ type: "llm_classifier",
7160
+ use_cot: opts.useCot,
7161
+ choice_scores: opts.choiceScores
7162
+ }
7163
+ };
7164
+ const codePrompt = new CodePrompt(
7165
+ this.project,
7166
+ promptData,
7167
+ [],
7168
+ {
7169
+ ...opts,
7170
+ name: resolvedName,
7171
+ slug
7172
+ },
7173
+ "scorer"
7174
+ );
7175
+ if (globalThis._lazy_load) {
7176
+ globalThis._evals.prompts.push(codePrompt);
7177
+ }
7178
+ }
7179
+ }
7180
+ };
7110
7181
  var CodeFunction = class {
7111
7182
  constructor(project, opts) {
7112
7183
  this.project = project;
@@ -7146,8 +7217,9 @@ var CodePrompt = class {
7146
7217
  ifExists;
7147
7218
  description;
7148
7219
  id;
7220
+ functionType;
7149
7221
  toolFunctions;
7150
- constructor(project, prompt, toolFunctions, opts) {
7222
+ constructor(project, prompt, toolFunctions, opts, functionType) {
7151
7223
  this.project = project;
7152
7224
  this.name = opts.name;
7153
7225
  this.slug = opts.slug;
@@ -7156,6 +7228,7 @@ var CodePrompt = class {
7156
7228
  this.ifExists = opts.ifExists;
7157
7229
  this.description = opts.description;
7158
7230
  this.id = opts.id;
7231
+ this.functionType = functionType;
7159
7232
  }
7160
7233
  };
7161
7234
  var toolFunctionDefinitionSchema = import_zod4.z.object({
package/dist/index.d.mts CHANGED
@@ -1572,13 +1572,20 @@ declare class Project {
1572
1572
  readonly id?: string;
1573
1573
  tools: ToolBuilder;
1574
1574
  prompts: PromptBuilder;
1575
+ scorers: ScorerBuilder;
1575
1576
  constructor(args: CreateProjectOpts);
1576
1577
  }
1577
1578
  declare class ToolBuilder {
1578
1579
  private readonly project;
1579
1580
  private taskCounter;
1580
1581
  constructor(project: Project);
1581
- create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: ToolOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
1582
+ create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: CodeOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
1583
+ }
1584
+ declare class ScorerBuilder {
1585
+ private readonly project;
1586
+ private taskCounter;
1587
+ constructor(project: Project);
1588
+ create<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>>(opts: ScorerOpts<Output, Input, Params, Returns, Fn>): void;
1582
1589
  }
1583
1590
  type Schema<Input, Output> = Partial<{
1584
1591
  parameters: z.ZodSchema<Input>;
@@ -1590,9 +1597,21 @@ interface BaseFnOpts {
1590
1597
  description: string;
1591
1598
  ifExists: IfExists;
1592
1599
  }
1593
- type ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
1600
+ type CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
1594
1601
  handler: Fn;
1595
1602
  } & Schema<Params, Returns>;
1603
+ type ScorerPromptOpts = Partial<BaseFnOpts> & PromptOpts<false, false, false, false> & {
1604
+ useCot: boolean;
1605
+ choiceScores: Record<string, number>;
1606
+ };
1607
+ type ScorerArgs<Output, Input> = {
1608
+ output: Output;
1609
+ expected?: Output;
1610
+ input?: Input;
1611
+ metadata?: Record<string, unknown>;
1612
+ };
1613
+ type Exact<T, Shape> = T extends Shape ? Exclude<keyof T, keyof Shape> extends never ? T : never : never;
1614
+ type ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = CodeOpts<Exact<Params, ScorerArgs<Output, Input>>, Returns, Fn> | ScorerPromptOpts;
1596
1615
  declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> {
1597
1616
  readonly project: Project;
1598
1617
  readonly handler: Fn;
@@ -1603,7 +1622,7 @@ declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Outp
1603
1622
  readonly parameters?: z.ZodSchema<Input>;
1604
1623
  readonly returns?: z.ZodSchema<Output>;
1605
1624
  readonly ifExists?: IfExists;
1606
- constructor(project: Project, opts: Omit<ToolOpts<Input, Output, Fn>, "name" | "slug"> & {
1625
+ constructor(project: Project, opts: Omit<CodeOpts<Input, Output, Fn>, "name" | "slug"> & {
1607
1626
  name: string;
1608
1627
  slug: string;
1609
1628
  type: FunctionType;
@@ -1619,11 +1638,12 @@ declare class CodePrompt {
1619
1638
  readonly ifExists?: IfExists;
1620
1639
  readonly description?: string;
1621
1640
  readonly id?: string;
1641
+ readonly functionType?: FunctionType;
1622
1642
  readonly toolFunctions: (SavedFunctionId | GenericCodeFunction)[];
1623
- constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false>, "name" | "slug"> & {
1643
+ constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false, false, false>, "name" | "slug"> & {
1624
1644
  name: string;
1625
1645
  slug: string;
1626
- });
1646
+ }, functionType?: FunctionType);
1627
1647
  }
1628
1648
  declare const toolFunctionDefinitionSchema: z.ZodObject<{
1629
1649
  type: z.ZodLiteral<"function">;
@@ -1667,18 +1687,22 @@ interface PromptId {
1667
1687
  interface PromptVersion {
1668
1688
  version: TransactionId;
1669
1689
  }
1690
+ interface PromptTools {
1691
+ tools: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
1692
+ }
1693
+ interface PromptNoTrace {
1694
+ noTrace: boolean;
1695
+ }
1670
1696
  type PromptContents = {
1671
1697
  prompt: string;
1672
1698
  } | {
1673
1699
  messages: Message[];
1674
1700
  };
1675
- type PromptOpts<HasId extends boolean, HasVersion extends boolean> = (Partial<Omit<BaseFnOpts, "name">> & {
1701
+ type PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = (Partial<Omit<BaseFnOpts, "name">> & {
1676
1702
  name: string;
1677
- }) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & PromptContents & {
1703
+ }) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & (HasTools extends true ? Partial<PromptTools> : {}) & (HasNoTrace extends true ? Partial<PromptNoTrace> : {}) & PromptContents & {
1678
1704
  model: string;
1679
1705
  params?: ModelParams;
1680
- tools?: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
1681
- noTrace?: boolean;
1682
1706
  };
1683
1707
  declare class PromptBuilder {
1684
1708
  private readonly project;
@@ -1707,7 +1731,14 @@ declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata ex
1707
1731
  type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | Promise<EvalCase<Input, Expected, Metadata>[]> | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
1708
1732
  type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
1709
1733
  interface EvalHooks {
1734
+ /**
1735
+ * @deprecated Use `metadata` instead.
1736
+ */
1710
1737
  meta: (info: Record<string, unknown>) => void;
1738
+ /**
1739
+ * The metadata object for the current evaluation. You can mutate this object to add or remove metadata.
1740
+ */
1741
+ metadata: Record<string, unknown>;
1711
1742
  span: Span;
1712
1743
  }
1713
1744
  type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
@@ -1923,6 +1954,7 @@ type braintrust_BraintrustStreamChunk = BraintrustStreamChunk;
1923
1954
  type braintrust_ChatPrompt = ChatPrompt;
1924
1955
  type braintrust_CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> = CodeFunction<Input, Output, Fn>;
1925
1956
  declare const braintrust_CodeFunction: typeof CodeFunction;
1957
+ type braintrust_CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = CodeOpts<Params, Returns, Fn>;
1926
1958
  type braintrust_CodePrompt = CodePrompt;
1927
1959
  declare const braintrust_CodePrompt: typeof CodePrompt;
1928
1960
  declare const braintrust_CommentEvent: typeof CommentEvent;
@@ -1986,7 +2018,7 @@ type braintrust_Prompt<HasId extends boolean = true, HasVersion extends boolean
1986
2018
  declare const braintrust_Prompt: typeof Prompt;
1987
2019
  type braintrust_PromptBuilder = PromptBuilder;
1988
2020
  declare const braintrust_PromptBuilder: typeof PromptBuilder;
1989
- type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean> = PromptOpts<HasId, HasVersion>;
2021
+ type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = PromptOpts<HasId, HasVersion, HasTools, HasNoTrace>;
1990
2022
  type braintrust_PromptRowWithId<HasId extends boolean = true, HasVersion extends boolean = true> = PromptRowWithId<HasId, HasVersion>;
1991
2023
  type braintrust_ReadonlyAttachment = ReadonlyAttachment;
1992
2024
  declare const braintrust_ReadonlyAttachment: typeof ReadonlyAttachment;
@@ -1995,6 +2027,9 @@ declare const braintrust_ReadonlyExperiment: typeof ReadonlyExperiment;
1995
2027
  declare const braintrust_Reporter: typeof Reporter;
1996
2028
  type braintrust_ReporterBody<EvalReport> = ReporterBody<EvalReport>;
1997
2029
  type braintrust_ScoreSummary = ScoreSummary;
2030
+ type braintrust_ScorerBuilder = ScorerBuilder;
2031
+ declare const braintrust_ScorerBuilder: typeof ScorerBuilder;
2032
+ type braintrust_ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = ScorerOpts<Output, Input, Params, Returns, Fn>;
1998
2033
  type braintrust_SerializedBraintrustState = SerializedBraintrustState;
1999
2034
  type braintrust_SetCurrentArg = SetCurrentArg;
2000
2035
  type braintrust_Span = Span;
@@ -2005,7 +2040,6 @@ type braintrust_StartSpanArgs = StartSpanArgs;
2005
2040
  type braintrust_ToolBuilder = ToolBuilder;
2006
2041
  declare const braintrust_ToolBuilder: typeof ToolBuilder;
2007
2042
  type braintrust_ToolFunctionDefinition = ToolFunctionDefinition;
2008
- type braintrust_ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = ToolOpts<Params, Returns, Fn>;
2009
2043
  type braintrust_WithTransactionId<R> = WithTransactionId<R>;
2010
2044
  declare const braintrust_X_CACHED_HEADER: typeof X_CACHED_HEADER;
2011
2045
  declare const braintrust__exportsForTestingOnly: typeof _exportsForTestingOnly;
@@ -2053,7 +2087,7 @@ declare const braintrust_wrapOpenAI: typeof wrapOpenAI;
2053
2087
  declare const braintrust_wrapOpenAIv4: typeof wrapOpenAIv4;
2054
2088
  declare const braintrust_wrapTraced: typeof wrapTraced;
2055
2089
  declare namespace braintrust {
2056
- export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_ToolOpts as ToolOpts, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
2090
+ export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, type braintrust_CodeOpts as CodeOpts, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, braintrust_ScorerBuilder as ScorerBuilder, type braintrust_ScorerOpts as ScorerOpts, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
2057
2091
  }
2058
2092
 
2059
2093
  /**
@@ -2109,4 +2143,4 @@ declare namespace braintrust {
2109
2143
  * @module braintrust
2110
2144
  */
2111
2145
 
2112
- export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type ToolOpts, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, toolFunctionDefinitionSchema, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
2146
+ export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, type CodeOpts, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, ScorerBuilder, type ScorerOpts, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, toolFunctionDefinitionSchema, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
package/dist/index.d.ts CHANGED
@@ -1572,13 +1572,20 @@ declare class Project {
1572
1572
  readonly id?: string;
1573
1573
  tools: ToolBuilder;
1574
1574
  prompts: PromptBuilder;
1575
+ scorers: ScorerBuilder;
1575
1576
  constructor(args: CreateProjectOpts);
1576
1577
  }
1577
1578
  declare class ToolBuilder {
1578
1579
  private readonly project;
1579
1580
  private taskCounter;
1580
1581
  constructor(project: Project);
1581
- create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: ToolOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
1582
+ create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: CodeOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
1583
+ }
1584
+ declare class ScorerBuilder {
1585
+ private readonly project;
1586
+ private taskCounter;
1587
+ constructor(project: Project);
1588
+ create<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>>(opts: ScorerOpts<Output, Input, Params, Returns, Fn>): void;
1582
1589
  }
1583
1590
  type Schema<Input, Output> = Partial<{
1584
1591
  parameters: z.ZodSchema<Input>;
@@ -1590,9 +1597,21 @@ interface BaseFnOpts {
1590
1597
  description: string;
1591
1598
  ifExists: IfExists;
1592
1599
  }
1593
- type ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
1600
+ type CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
1594
1601
  handler: Fn;
1595
1602
  } & Schema<Params, Returns>;
1603
+ type ScorerPromptOpts = Partial<BaseFnOpts> & PromptOpts<false, false, false, false> & {
1604
+ useCot: boolean;
1605
+ choiceScores: Record<string, number>;
1606
+ };
1607
+ type ScorerArgs<Output, Input> = {
1608
+ output: Output;
1609
+ expected?: Output;
1610
+ input?: Input;
1611
+ metadata?: Record<string, unknown>;
1612
+ };
1613
+ type Exact<T, Shape> = T extends Shape ? Exclude<keyof T, keyof Shape> extends never ? T : never : never;
1614
+ type ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = CodeOpts<Exact<Params, ScorerArgs<Output, Input>>, Returns, Fn> | ScorerPromptOpts;
1596
1615
  declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> {
1597
1616
  readonly project: Project;
1598
1617
  readonly handler: Fn;
@@ -1603,7 +1622,7 @@ declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Outp
1603
1622
  readonly parameters?: z.ZodSchema<Input>;
1604
1623
  readonly returns?: z.ZodSchema<Output>;
1605
1624
  readonly ifExists?: IfExists;
1606
- constructor(project: Project, opts: Omit<ToolOpts<Input, Output, Fn>, "name" | "slug"> & {
1625
+ constructor(project: Project, opts: Omit<CodeOpts<Input, Output, Fn>, "name" | "slug"> & {
1607
1626
  name: string;
1608
1627
  slug: string;
1609
1628
  type: FunctionType;
@@ -1619,11 +1638,12 @@ declare class CodePrompt {
1619
1638
  readonly ifExists?: IfExists;
1620
1639
  readonly description?: string;
1621
1640
  readonly id?: string;
1641
+ readonly functionType?: FunctionType;
1622
1642
  readonly toolFunctions: (SavedFunctionId | GenericCodeFunction)[];
1623
- constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false>, "name" | "slug"> & {
1643
+ constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false, false, false>, "name" | "slug"> & {
1624
1644
  name: string;
1625
1645
  slug: string;
1626
- });
1646
+ }, functionType?: FunctionType);
1627
1647
  }
1628
1648
  declare const toolFunctionDefinitionSchema: z.ZodObject<{
1629
1649
  type: z.ZodLiteral<"function">;
@@ -1667,18 +1687,22 @@ interface PromptId {
1667
1687
  interface PromptVersion {
1668
1688
  version: TransactionId;
1669
1689
  }
1690
+ interface PromptTools {
1691
+ tools: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
1692
+ }
1693
+ interface PromptNoTrace {
1694
+ noTrace: boolean;
1695
+ }
1670
1696
  type PromptContents = {
1671
1697
  prompt: string;
1672
1698
  } | {
1673
1699
  messages: Message[];
1674
1700
  };
1675
- type PromptOpts<HasId extends boolean, HasVersion extends boolean> = (Partial<Omit<BaseFnOpts, "name">> & {
1701
+ type PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = (Partial<Omit<BaseFnOpts, "name">> & {
1676
1702
  name: string;
1677
- }) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & PromptContents & {
1703
+ }) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & (HasTools extends true ? Partial<PromptTools> : {}) & (HasNoTrace extends true ? Partial<PromptNoTrace> : {}) & PromptContents & {
1678
1704
  model: string;
1679
1705
  params?: ModelParams;
1680
- tools?: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
1681
- noTrace?: boolean;
1682
1706
  };
1683
1707
  declare class PromptBuilder {
1684
1708
  private readonly project;
@@ -1707,7 +1731,14 @@ declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata ex
1707
1731
  type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | Promise<EvalCase<Input, Expected, Metadata>[]> | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
1708
1732
  type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
1709
1733
  interface EvalHooks {
1734
+ /**
1735
+ * @deprecated Use `metadata` instead.
1736
+ */
1710
1737
  meta: (info: Record<string, unknown>) => void;
1738
+ /**
1739
+ * The metadata object for the current evaluation. You can mutate this object to add or remove metadata.
1740
+ */
1741
+ metadata: Record<string, unknown>;
1711
1742
  span: Span;
1712
1743
  }
1713
1744
  type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
@@ -1923,6 +1954,7 @@ type braintrust_BraintrustStreamChunk = BraintrustStreamChunk;
1923
1954
  type braintrust_ChatPrompt = ChatPrompt;
1924
1955
  type braintrust_CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> = CodeFunction<Input, Output, Fn>;
1925
1956
  declare const braintrust_CodeFunction: typeof CodeFunction;
1957
+ type braintrust_CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = CodeOpts<Params, Returns, Fn>;
1926
1958
  type braintrust_CodePrompt = CodePrompt;
1927
1959
  declare const braintrust_CodePrompt: typeof CodePrompt;
1928
1960
  declare const braintrust_CommentEvent: typeof CommentEvent;
@@ -1986,7 +2018,7 @@ type braintrust_Prompt<HasId extends boolean = true, HasVersion extends boolean
1986
2018
  declare const braintrust_Prompt: typeof Prompt;
1987
2019
  type braintrust_PromptBuilder = PromptBuilder;
1988
2020
  declare const braintrust_PromptBuilder: typeof PromptBuilder;
1989
- type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean> = PromptOpts<HasId, HasVersion>;
2021
+ type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = PromptOpts<HasId, HasVersion, HasTools, HasNoTrace>;
1990
2022
  type braintrust_PromptRowWithId<HasId extends boolean = true, HasVersion extends boolean = true> = PromptRowWithId<HasId, HasVersion>;
1991
2023
  type braintrust_ReadonlyAttachment = ReadonlyAttachment;
1992
2024
  declare const braintrust_ReadonlyAttachment: typeof ReadonlyAttachment;
@@ -1995,6 +2027,9 @@ declare const braintrust_ReadonlyExperiment: typeof ReadonlyExperiment;
1995
2027
  declare const braintrust_Reporter: typeof Reporter;
1996
2028
  type braintrust_ReporterBody<EvalReport> = ReporterBody<EvalReport>;
1997
2029
  type braintrust_ScoreSummary = ScoreSummary;
2030
+ type braintrust_ScorerBuilder = ScorerBuilder;
2031
+ declare const braintrust_ScorerBuilder: typeof ScorerBuilder;
2032
+ type braintrust_ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = ScorerOpts<Output, Input, Params, Returns, Fn>;
1998
2033
  type braintrust_SerializedBraintrustState = SerializedBraintrustState;
1999
2034
  type braintrust_SetCurrentArg = SetCurrentArg;
2000
2035
  type braintrust_Span = Span;
@@ -2005,7 +2040,6 @@ type braintrust_StartSpanArgs = StartSpanArgs;
2005
2040
  type braintrust_ToolBuilder = ToolBuilder;
2006
2041
  declare const braintrust_ToolBuilder: typeof ToolBuilder;
2007
2042
  type braintrust_ToolFunctionDefinition = ToolFunctionDefinition;
2008
- type braintrust_ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = ToolOpts<Params, Returns, Fn>;
2009
2043
  type braintrust_WithTransactionId<R> = WithTransactionId<R>;
2010
2044
  declare const braintrust_X_CACHED_HEADER: typeof X_CACHED_HEADER;
2011
2045
  declare const braintrust__exportsForTestingOnly: typeof _exportsForTestingOnly;
@@ -2053,7 +2087,7 @@ declare const braintrust_wrapOpenAI: typeof wrapOpenAI;
2053
2087
  declare const braintrust_wrapOpenAIv4: typeof wrapOpenAIv4;
2054
2088
  declare const braintrust_wrapTraced: typeof wrapTraced;
2055
2089
  declare namespace braintrust {
2056
- export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_ToolOpts as ToolOpts, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
2090
+ export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, type braintrust_CodeOpts as CodeOpts, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, braintrust_ScorerBuilder as ScorerBuilder, type braintrust_ScorerOpts as ScorerOpts, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, type braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
2057
2091
  }
2058
2092
 
2059
2093
  /**
@@ -2109,4 +2143,4 @@ declare namespace braintrust {
2109
2143
  * @module braintrust
2110
2144
  */
2111
2145
 
2112
- export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type ToolOpts, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, toolFunctionDefinitionSchema, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
2146
+ export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, type CodeOpts, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, ScorerBuilder, type ScorerOpts, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type ToolFunctionDefinition, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, toolFunctionDefinitionSchema, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
package/dist/index.js CHANGED
@@ -51,6 +51,7 @@ __export(src_exports, {
51
51
  ReadonlyAttachment: () => ReadonlyAttachment,
52
52
  ReadonlyExperiment: () => ReadonlyExperiment,
53
53
  Reporter: () => Reporter,
54
+ ScorerBuilder: () => ScorerBuilder,
54
55
  SpanImpl: () => SpanImpl,
55
56
  ToolBuilder: () => ToolBuilder,
56
57
  X_CACHED_HEADER: () => X_CACHED_HEADER,
@@ -3834,6 +3835,7 @@ __export(exports_node_exports, {
3834
3835
  ReadonlyAttachment: () => ReadonlyAttachment,
3835
3836
  ReadonlyExperiment: () => ReadonlyExperiment,
3836
3837
  Reporter: () => Reporter,
3838
+ ScorerBuilder: () => ScorerBuilder,
3837
3839
  SpanImpl: () => SpanImpl,
3838
3840
  ToolBuilder: () => ToolBuilder,
3839
3841
  X_CACHED_HEADER: () => X_CACHED_HEADER,
@@ -3948,49 +3950,8 @@ async function invoke(args) {
3948
3950
  }
3949
3951
 
3950
3952
  // src/framework.ts
3951
- var import_chalk = __toESM(require("chalk"));
3952
3953
  var import_core2 = require("@braintrust/core");
3953
3954
 
3954
- // src/progress.ts
3955
- var cliProgress = __toESM(require("cli-progress"));
3956
- var MAX_NAME_LENGTH = 40;
3957
- function fitNameToSpaces(name, length) {
3958
- const padded = name.padEnd(length);
3959
- if (padded.length <= length) {
3960
- return padded;
3961
- }
3962
- return padded.substring(0, length - 3) + "...";
3963
- }
3964
- var BarProgressReporter = class {
3965
- multiBar;
3966
- bars = {};
3967
- constructor() {
3968
- this.multiBar = new cliProgress.MultiBar(
3969
- {
3970
- clearOnComplete: false,
3971
- format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
3972
- autopadding: true
3973
- },
3974
- cliProgress.Presets.shades_grey
3975
- );
3976
- }
3977
- start(name, total) {
3978
- const bar = this.multiBar.create(total, 0);
3979
- this.bars[name] = bar;
3980
- }
3981
- stop() {
3982
- this.multiBar.stop();
3983
- }
3984
- increment(name) {
3985
- this.bars[name].increment({
3986
- evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
3987
- });
3988
- }
3989
- };
3990
-
3991
- // src/framework.ts
3992
- var import_pluralize = __toESM(require("pluralize"));
3993
-
3994
3955
  // ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
3995
3956
  function initialParams(fn) {
3996
3957
  return function(...args) {
@@ -5094,6 +5055,47 @@ function waterfall(tasks, callback) {
5094
5055
  }
5095
5056
  var waterfall$1 = awaitify(waterfall);
5096
5057
 
5058
+ // src/framework.ts
5059
+ var import_chalk = __toESM(require("chalk"));
5060
+ var import_pluralize = __toESM(require("pluralize"));
5061
+
5062
+ // src/progress.ts
5063
+ var cliProgress = __toESM(require("cli-progress"));
5064
+ var MAX_NAME_LENGTH = 40;
5065
+ function fitNameToSpaces(name, length) {
5066
+ const padded = name.padEnd(length);
5067
+ if (padded.length <= length) {
5068
+ return padded;
5069
+ }
5070
+ return padded.substring(0, length - 3) + "...";
5071
+ }
5072
+ var BarProgressReporter = class {
5073
+ multiBar;
5074
+ bars = {};
5075
+ constructor() {
5076
+ this.multiBar = new cliProgress.MultiBar(
5077
+ {
5078
+ clearOnComplete: false,
5079
+ format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
5080
+ autopadding: true
5081
+ },
5082
+ cliProgress.Presets.shades_grey
5083
+ );
5084
+ }
5085
+ start(name, total) {
5086
+ const bar = this.multiBar.create(total, 0);
5087
+ this.bars[name] = bar;
5088
+ }
5089
+ stop() {
5090
+ this.multiBar.stop();
5091
+ }
5092
+ increment(name) {
5093
+ this.bars[name].increment({
5094
+ evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
5095
+ });
5096
+ }
5097
+ };
5098
+
5097
5099
  // src/framework.ts
5098
5100
  function BaseExperiment(options = {}) {
5099
5101
  return { _type: "BaseExperiment", ...options };
@@ -5339,7 +5341,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
5339
5341
  const meta = (o) => metadata = { ...metadata, ...o };
5340
5342
  await rootSpan.traced(
5341
5343
  async (span) => {
5342
- const outputResult = evaluator.task(datum.input, { meta, span });
5344
+ const outputResult = evaluator.task(datum.input, {
5345
+ meta,
5346
+ metadata,
5347
+ span
5348
+ });
5343
5349
  if (outputResult instanceof Promise) {
5344
5350
  output = await outputResult;
5345
5351
  } else {
@@ -5634,12 +5640,14 @@ var Project = class {
5634
5640
  id;
5635
5641
  tools;
5636
5642
  prompts;
5643
+ scorers;
5637
5644
  constructor(args) {
5638
5645
  _initializeSpanContext();
5639
5646
  this.name = "name" in args ? args.name : void 0;
5640
5647
  this.id = "id" in args ? args.id : void 0;
5641
5648
  this.tools = new ToolBuilder(this);
5642
5649
  this.prompts = new PromptBuilder(this);
5650
+ this.scorers = new ScorerBuilder(this);
5643
5651
  }
5644
5652
  };
5645
5653
  var ToolBuilder = class {
@@ -5673,6 +5681,70 @@ var ToolBuilder = class {
5673
5681
  return tool;
5674
5682
  }
5675
5683
  };
5684
+ var ScorerBuilder = class {
5685
+ constructor(project) {
5686
+ this.project = project;
5687
+ }
5688
+ taskCounter = 0;
5689
+ create(opts) {
5690
+ this.taskCounter++;
5691
+ let resolvedName = opts.name;
5692
+ if (!resolvedName && "handler" in opts) {
5693
+ resolvedName = opts.handler.name;
5694
+ }
5695
+ if (!resolvedName || resolvedName.trim().length === 0) {
5696
+ resolvedName = `Scorer ${import_path.default.basename(__filename)} ${this.taskCounter}`;
5697
+ }
5698
+ const slug = opts.slug ?? (0, import_slugify.default)(resolvedName, { lower: true, strict: true });
5699
+ if ("handler" in opts) {
5700
+ const scorer = new CodeFunction(this.project, {
5701
+ ...opts,
5702
+ name: resolvedName,
5703
+ slug,
5704
+ type: "scorer"
5705
+ });
5706
+ if (globalThis._lazy_load) {
5707
+ globalThis._evals.functions.push(
5708
+ scorer
5709
+ );
5710
+ }
5711
+ } else {
5712
+ const promptBlock = "messages" in opts ? {
5713
+ type: "chat",
5714
+ messages: opts.messages
5715
+ } : {
5716
+ type: "completion",
5717
+ content: opts.prompt
5718
+ };
5719
+ const promptData = {
5720
+ prompt: promptBlock,
5721
+ options: {
5722
+ model: opts.model,
5723
+ params: opts.params
5724
+ },
5725
+ parser: {
5726
+ type: "llm_classifier",
5727
+ use_cot: opts.useCot,
5728
+ choice_scores: opts.choiceScores
5729
+ }
5730
+ };
5731
+ const codePrompt = new CodePrompt(
5732
+ this.project,
5733
+ promptData,
5734
+ [],
5735
+ {
5736
+ ...opts,
5737
+ name: resolvedName,
5738
+ slug
5739
+ },
5740
+ "scorer"
5741
+ );
5742
+ if (globalThis._lazy_load) {
5743
+ globalThis._evals.prompts.push(codePrompt);
5744
+ }
5745
+ }
5746
+ }
5747
+ };
5676
5748
  var CodeFunction = class {
5677
5749
  constructor(project, opts) {
5678
5750
  this.project = project;
@@ -5712,8 +5784,9 @@ var CodePrompt = class {
5712
5784
  ifExists;
5713
5785
  description;
5714
5786
  id;
5787
+ functionType;
5715
5788
  toolFunctions;
5716
- constructor(project, prompt, toolFunctions, opts) {
5789
+ constructor(project, prompt, toolFunctions, opts, functionType) {
5717
5790
  this.project = project;
5718
5791
  this.name = opts.name;
5719
5792
  this.slug = opts.slug;
@@ -5722,6 +5795,7 @@ var CodePrompt = class {
5722
5795
  this.ifExists = opts.ifExists;
5723
5796
  this.description = opts.description;
5724
5797
  this.id = opts.id;
5798
+ this.functionType = functionType;
5725
5799
  }
5726
5800
  };
5727
5801
  var toolFunctionDefinitionSchema = import_zod3.z.object({
@@ -6456,6 +6530,7 @@ var src_default = exports_node_exports;
6456
6530
  ReadonlyAttachment,
6457
6531
  ReadonlyExperiment,
6458
6532
  Reporter,
6533
+ ScorerBuilder,
6459
6534
  SpanImpl,
6460
6535
  ToolBuilder,
6461
6536
  X_CACHED_HEADER,
package/dist/index.mjs CHANGED
@@ -3770,6 +3770,7 @@ __export(exports_node_exports, {
3770
3770
  ReadonlyAttachment: () => ReadonlyAttachment,
3771
3771
  ReadonlyExperiment: () => ReadonlyExperiment,
3772
3772
  Reporter: () => Reporter,
3773
+ ScorerBuilder: () => ScorerBuilder,
3773
3774
  SpanImpl: () => SpanImpl,
3774
3775
  ToolBuilder: () => ToolBuilder,
3775
3776
  X_CACHED_HEADER: () => X_CACHED_HEADER,
@@ -3886,49 +3887,8 @@ async function invoke(args) {
3886
3887
  }
3887
3888
 
3888
3889
  // src/framework.ts
3889
- import chalk from "chalk";
3890
3890
  import { SpanTypeAttribute as SpanTypeAttribute2, mergeDicts as mergeDicts2 } from "@braintrust/core";
3891
3891
 
3892
- // src/progress.ts
3893
- import * as cliProgress from "cli-progress";
3894
- var MAX_NAME_LENGTH = 40;
3895
- function fitNameToSpaces(name, length) {
3896
- const padded = name.padEnd(length);
3897
- if (padded.length <= length) {
3898
- return padded;
3899
- }
3900
- return padded.substring(0, length - 3) + "...";
3901
- }
3902
- var BarProgressReporter = class {
3903
- multiBar;
3904
- bars = {};
3905
- constructor() {
3906
- this.multiBar = new cliProgress.MultiBar(
3907
- {
3908
- clearOnComplete: false,
3909
- format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
3910
- autopadding: true
3911
- },
3912
- cliProgress.Presets.shades_grey
3913
- );
3914
- }
3915
- start(name, total) {
3916
- const bar = this.multiBar.create(total, 0);
3917
- this.bars[name] = bar;
3918
- }
3919
- stop() {
3920
- this.multiBar.stop();
3921
- }
3922
- increment(name) {
3923
- this.bars[name].increment({
3924
- evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
3925
- });
3926
- }
3927
- };
3928
-
3929
- // src/framework.ts
3930
- import pluralize from "pluralize";
3931
-
3932
3892
  // ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
3933
3893
  function initialParams(fn) {
3934
3894
  return function(...args) {
@@ -5032,6 +4992,47 @@ function waterfall(tasks, callback) {
5032
4992
  }
5033
4993
  var waterfall$1 = awaitify(waterfall);
5034
4994
 
4995
+ // src/framework.ts
4996
+ import chalk from "chalk";
4997
+ import pluralize from "pluralize";
4998
+
4999
+ // src/progress.ts
5000
+ import * as cliProgress from "cli-progress";
5001
+ var MAX_NAME_LENGTH = 40;
5002
+ function fitNameToSpaces(name, length) {
5003
+ const padded = name.padEnd(length);
5004
+ if (padded.length <= length) {
5005
+ return padded;
5006
+ }
5007
+ return padded.substring(0, length - 3) + "...";
5008
+ }
5009
+ var BarProgressReporter = class {
5010
+ multiBar;
5011
+ bars = {};
5012
+ constructor() {
5013
+ this.multiBar = new cliProgress.MultiBar(
5014
+ {
5015
+ clearOnComplete: false,
5016
+ format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
5017
+ autopadding: true
5018
+ },
5019
+ cliProgress.Presets.shades_grey
5020
+ );
5021
+ }
5022
+ start(name, total) {
5023
+ const bar = this.multiBar.create(total, 0);
5024
+ this.bars[name] = bar;
5025
+ }
5026
+ stop() {
5027
+ this.multiBar.stop();
5028
+ }
5029
+ increment(name) {
5030
+ this.bars[name].increment({
5031
+ evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
5032
+ });
5033
+ }
5034
+ };
5035
+
5035
5036
  // src/framework.ts
5036
5037
  function BaseExperiment(options = {}) {
5037
5038
  return { _type: "BaseExperiment", ...options };
@@ -5277,7 +5278,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
5277
5278
  const meta = (o) => metadata = { ...metadata, ...o };
5278
5279
  await rootSpan.traced(
5279
5280
  async (span) => {
5280
- const outputResult = evaluator.task(datum.input, { meta, span });
5281
+ const outputResult = evaluator.task(datum.input, {
5282
+ meta,
5283
+ metadata,
5284
+ span
5285
+ });
5281
5286
  if (outputResult instanceof Promise) {
5282
5287
  output = await outputResult;
5283
5288
  } else {
@@ -5572,12 +5577,14 @@ var Project = class {
5572
5577
  id;
5573
5578
  tools;
5574
5579
  prompts;
5580
+ scorers;
5575
5581
  constructor(args) {
5576
5582
  _initializeSpanContext();
5577
5583
  this.name = "name" in args ? args.name : void 0;
5578
5584
  this.id = "id" in args ? args.id : void 0;
5579
5585
  this.tools = new ToolBuilder(this);
5580
5586
  this.prompts = new PromptBuilder(this);
5587
+ this.scorers = new ScorerBuilder(this);
5581
5588
  }
5582
5589
  };
5583
5590
  var ToolBuilder = class {
@@ -5611,6 +5618,70 @@ var ToolBuilder = class {
5611
5618
  return tool;
5612
5619
  }
5613
5620
  };
5621
+ var ScorerBuilder = class {
5622
+ constructor(project) {
5623
+ this.project = project;
5624
+ }
5625
+ taskCounter = 0;
5626
+ create(opts) {
5627
+ this.taskCounter++;
5628
+ let resolvedName = opts.name;
5629
+ if (!resolvedName && "handler" in opts) {
5630
+ resolvedName = opts.handler.name;
5631
+ }
5632
+ if (!resolvedName || resolvedName.trim().length === 0) {
5633
+ resolvedName = `Scorer ${path2.basename(__filename)} ${this.taskCounter}`;
5634
+ }
5635
+ const slug = opts.slug ?? slugifyLib(resolvedName, { lower: true, strict: true });
5636
+ if ("handler" in opts) {
5637
+ const scorer = new CodeFunction(this.project, {
5638
+ ...opts,
5639
+ name: resolvedName,
5640
+ slug,
5641
+ type: "scorer"
5642
+ });
5643
+ if (globalThis._lazy_load) {
5644
+ globalThis._evals.functions.push(
5645
+ scorer
5646
+ );
5647
+ }
5648
+ } else {
5649
+ const promptBlock = "messages" in opts ? {
5650
+ type: "chat",
5651
+ messages: opts.messages
5652
+ } : {
5653
+ type: "completion",
5654
+ content: opts.prompt
5655
+ };
5656
+ const promptData = {
5657
+ prompt: promptBlock,
5658
+ options: {
5659
+ model: opts.model,
5660
+ params: opts.params
5661
+ },
5662
+ parser: {
5663
+ type: "llm_classifier",
5664
+ use_cot: opts.useCot,
5665
+ choice_scores: opts.choiceScores
5666
+ }
5667
+ };
5668
+ const codePrompt = new CodePrompt(
5669
+ this.project,
5670
+ promptData,
5671
+ [],
5672
+ {
5673
+ ...opts,
5674
+ name: resolvedName,
5675
+ slug
5676
+ },
5677
+ "scorer"
5678
+ );
5679
+ if (globalThis._lazy_load) {
5680
+ globalThis._evals.prompts.push(codePrompt);
5681
+ }
5682
+ }
5683
+ }
5684
+ };
5614
5685
  var CodeFunction = class {
5615
5686
  constructor(project, opts) {
5616
5687
  this.project = project;
@@ -5650,8 +5721,9 @@ var CodePrompt = class {
5650
5721
  ifExists;
5651
5722
  description;
5652
5723
  id;
5724
+ functionType;
5653
5725
  toolFunctions;
5654
- constructor(project, prompt, toolFunctions, opts) {
5726
+ constructor(project, prompt, toolFunctions, opts, functionType) {
5655
5727
  this.project = project;
5656
5728
  this.name = opts.name;
5657
5729
  this.slug = opts.slug;
@@ -5660,6 +5732,7 @@ var CodePrompt = class {
5660
5732
  this.ifExists = opts.ifExists;
5661
5733
  this.description = opts.description;
5662
5734
  this.id = opts.id;
5735
+ this.functionType = functionType;
5663
5736
  }
5664
5737
  };
5665
5738
  var toolFunctionDefinitionSchema = z3.object({
@@ -6393,6 +6466,7 @@ export {
6393
6466
  ReadonlyAttachment,
6394
6467
  ReadonlyExperiment,
6395
6468
  Reporter,
6469
+ ScorerBuilder,
6396
6470
  SpanImpl,
6397
6471
  ToolBuilder,
6398
6472
  X_CACHED_HEADER,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "braintrust",
3
- "version": "0.0.175",
3
+ "version": "0.0.176",
4
4
  "description": "SDK for integrating Braintrust",
5
5
  "repository": {
6
6
  "type": "git",
@@ -75,7 +75,7 @@
75
75
  },
76
76
  "dependencies": {
77
77
  "@ai-sdk/provider": "^1.0.1",
78
- "@braintrust/core": "0.0.69",
78
+ "@braintrust/core": "0.0.70",
79
79
  "@next/env": "^14.2.3",
80
80
  "@vercel/functions": "^1.0.2",
81
81
  "ai": "^3.2.16",