@anvia/core 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/agent/index.d.ts +4 -4
  2. package/dist/agent/index.js +6 -5
  3. package/dist/{agent-0UeJ9Rad.d.ts → agent-B-ls5y_g.d.ts} +2 -15
  4. package/dist/{chunk-PP4VIN3Y.js → chunk-4PHDOEKY.js} +3 -3
  5. package/dist/{chunk-6U6PZ5MD.js → chunk-BD66WU2B.js} +6 -15
  6. package/dist/chunk-BD66WU2B.js.map +1 -0
  7. package/dist/{chunk-I2HOMD3R.js → chunk-CY7SB5FG.js} +3 -3
  8. package/dist/{chunk-A7VDIZQN.js → chunk-EFGX3EX5.js} +5 -14
  9. package/dist/chunk-EFGX3EX5.js.map +1 -0
  10. package/dist/{chunk-TILE6Z2N.js → chunk-L6JCKFAX.js} +2 -2
  11. package/dist/{chunk-65QV627O.js → chunk-OIA4CN5V.js} +2 -2
  12. package/dist/chunk-OIMLU4SF.js +20 -0
  13. package/dist/chunk-OIMLU4SF.js.map +1 -0
  14. package/dist/{chunk-TP32W7XT.js → chunk-TVIADATY.js} +597 -555
  15. package/dist/chunk-TVIADATY.js.map +1 -0
  16. package/dist/embeddings/index.d.ts +4 -24
  17. package/dist/embeddings/index.js +2 -1
  18. package/dist/evals/index.d.ts +23 -19
  19. package/dist/evals/index.js +92 -90
  20. package/dist/evals/index.js.map +1 -1
  21. package/dist/extractor/index.d.ts +3 -3
  22. package/dist/extractor/index.js +7 -6
  23. package/dist/index.d.ts +3 -3
  24. package/dist/index.js +7 -6
  25. package/dist/internal/agent.d.ts +3 -3
  26. package/dist/internal/agent.js +5 -4
  27. package/dist/{middleware-BQ7fkEEe.d.ts → middleware-CGiEIaBx.d.ts} +1 -1
  28. package/dist/pipeline/index.d.ts +5 -3
  29. package/dist/pipeline/index.js +156 -160
  30. package/dist/pipeline/index.js.map +1 -1
  31. package/dist/skills/index.js +5 -4
  32. package/dist/tool/index.d.ts +2 -2
  33. package/dist/tool/index.js +4 -3
  34. package/dist/types-IB2e9u5M.d.ts +25 -0
  35. package/dist/vector-store/index.d.ts +1 -1
  36. package/dist/vector-store/index.js +3 -2
  37. package/package.json +1 -1
  38. package/dist/chunk-6U6PZ5MD.js.map +0 -1
  39. package/dist/chunk-A7VDIZQN.js.map +0 -1
  40. package/dist/chunk-TP32W7XT.js.map +0 -1
  41. /package/dist/{chunk-PP4VIN3Y.js.map → chunk-4PHDOEKY.js.map} +0 -0
  42. /package/dist/{chunk-I2HOMD3R.js.map → chunk-CY7SB5FG.js.map} +0 -0
  43. /package/dist/{chunk-TILE6Z2N.js.map → chunk-L6JCKFAX.js.map} +0 -0
  44. /package/dist/{chunk-65QV627O.js.map → chunk-OIA4CN5V.js.map} +0 -0
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/evals/index.ts"],"sourcesContent":["import { z } from \"zod\";\nimport type { Agent } from \"../agent/agent\";\nimport type { PromptResponse } from \"../agent/request\";\nimport type { CompletionModel, JsonValue, Message } from \"../completion\";\nimport { cosineSimilarity, type EmbeddingModel, embedText } from \"../embeddings\";\nimport { ExtractorBuilder } from \"../extractor\";\nimport type { ZodSchema } from \"../schema\";\n\nexport type EvalMetadata = Record<string, JsonValue | undefined>;\n\nexport type EvalCase<Input, Expected = unknown> = {\n id: string;\n input: Input;\n expected?: Expected | undefined;\n metadata?: EvalMetadata | undefined;\n};\n\nexport type EvalTarget<Input, Output, Expected = unknown> = (\n input: Input,\n testCase: EvalCase<Input, Expected>,\n) => Output | Promise<Output>;\n\nexport type EvalOutcomeStatus = \"pass\" | \"fail\" | \"invalid\";\n\nexport type EvalOutcome<Score = unknown> =\n | {\n outcome: \"pass\";\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n }\n | {\n outcome: \"fail\";\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n }\n | {\n outcome: \"invalid\";\n reason: string;\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n };\n\nexport const EvalOutcome = {\n pass<Score>(\n score?: Score,\n options: { comment?: string | undefined; metadata?: EvalMetadata | undefined } = {},\n ): EvalOutcome<Score> {\n return {\n outcome: \"pass\",\n ...(score === undefined ? {} : { score }),\n ...(options.comment === undefined ? {} : { comment: options.comment }),\n ...(options.metadata === undefined ? {} : { metadata: options.metadata }),\n };\n },\n\n fail<Score>(\n score?: Score,\n options: { comment?: string | undefined; metadata?: EvalMetadata | undefined } = {},\n ): EvalOutcome<Score> {\n return {\n outcome: \"fail\",\n ...(score === undefined ? {} : { score }),\n ...(options.comment === undefined ? {} : { comment: options.comment }),\n ...(options.metadata === undefined ? {} : { metadata: options.metadata }),\n };\n },\n\n invalid<Score = never>(\n reason: string,\n options: {\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n } = {},\n ): EvalOutcome<Score> {\n return {\n outcome: \"invalid\",\n reason,\n ...(options.score === undefined ? {} : { score: options.score }),\n ...(options.comment === undefined ? {} : { comment: options.comment }),\n ...(options.metadata === undefined ? {} : { metadata: options.metadata }),\n };\n },\n};\n\nexport type EvalMetricArgs<Input, Output, Expected = unknown> = {\n suiteName: string;\n case: EvalCase<Input, Expected>;\n output: Output;\n};\n\nexport type EvalMetric<Input, Output, Score = unknown, Expected = unknown> = {\n name: string;\n evaluate(\n args: EvalMetricArgs<Input, Output, Expected>,\n ): EvalOutcome<Score> | Promise<EvalOutcome<Score>>;\n};\n\nexport type EvalMetricResult<Score = unknown> = {\n metricName: string;\n outcome: EvalOutcome<Score>;\n reporterErrors: unknown[];\n};\n\nexport type EvalCaseResult<Input, Output, Expected = unknown> = {\n case: EvalCase<Input, Expected>;\n output?: Output | undefined;\n targetError?: unknown;\n metrics: EvalMetricResult[];\n};\n\nexport type EvalSuiteResult<Input, Output, Expected = unknown> = {\n name: string;\n results: Array<EvalCaseResult<Input, Output, Expected>>;\n passed: number;\n failed: number;\n invalid: number;\n durationMs: number;\n};\n\nexport type EvalReportArgs<Input, Output, Score = unknown, Expected = unknown> = {\n suiteName: string;\n case: EvalCase<Input, Expected>;\n output?: Output | undefined;\n targetError?: unknown;\n metric: EvalMetric<Input, Output, Score, Expected>;\n outcome: EvalOutcome<Score>;\n};\n\nexport type EvalReporter<Input = unknown, Output = unknown, Expected = unknown> = {\n report(args: EvalReportArgs<Input, Output, unknown, Expected>): void | Promise<void>;\n};\n\nexport type RunEvalSuiteOptions<Input, Output, Expected = unknown> = {\n name: string;\n cases: Array<EvalCase<Input, Expected>>;\n target: EvalTarget<Input, Output, Expected>;\n metrics: Array<EvalMetric<NoInfer<Input>, NoInfer<Output>, unknown, NoInfer<Expected>>>;\n concurrency?: number | undefined;\n reporters?: Array<EvalReporter<NoInfer<Input>, NoInfer<Output>, NoInfer<Expected>>> | undefined;\n failOnReporterError?: boolean | undefined;\n};\n\nexport async function runEvalSuite<Input, Output, Expected = unknown>(\n options: RunEvalSuiteOptions<Input, Output, Expected>,\n): Promise<EvalSuiteResult<Input, Output, Expected>> {\n const startedAt = Date.now();\n const results = await mapWithConcurrency(\n options.cases,\n Math.max(1, Math.trunc(options.concurrency ?? 1)),\n (testCase) => runEvalCase(options, testCase),\n );\n const counts = countOutcomes(results);\n return {\n name: options.name,\n results,\n ...counts,\n durationMs: Date.now() - startedAt,\n };\n}\n\nexport type ValueSelector<Input, Output, Expected, Value> = (\n args: EvalMetricArgs<Input, Output, Expected>,\n) => Value | Promise<Value>;\n\nexport type SelectorOrValue<Input, Output, Expected, Value> =\n | Value\n | ValueSelector<Input, Output, Expected, Value>;\n\nexport type ExactMatchOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n actual?: ValueSelector<Input, Output, Expected, unknown> | undefined;\n expected?: SelectorOrValue<Input, Output, Expected, unknown> | undefined;\n};\n\nexport function exactMatch<Input, Output, Expected = unknown>(\n options: ExactMatchOptions<Input, Output, Expected> = {},\n): EvalMetric<Input, Output, boolean, Expected> {\n return {\n name: options.name ?? \"exact_match\",\n async evaluate(args) {\n const actual = await resolveActual(options.actual, args);\n const expected = await resolveExpected(options.expected, args);\n if (expected === undefined) {\n return EvalOutcome.invalid(\"No expected value provided for exact match.\");\n }\n const passed = stableComparable(actual) === stableComparable(expected);\n return passed\n ? EvalOutcome.pass(true)\n : EvalOutcome.fail(false, { comment: `Expected ${formatValue(expected)}.` });\n },\n };\n}\n\nexport type ContainsOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n actual?: ValueSelector<Input, Output, Expected, string> | undefined;\n expected?: SelectorOrValue<Input, Output, Expected, string | RegExp> | undefined;\n};\n\nexport function contains<Input, Output, Expected = unknown>(\n options: ContainsOptions<Input, Output, Expected> = {},\n): EvalMetric<Input, Output, boolean, Expected> {\n return {\n name: options.name ?? \"contains\",\n async evaluate(args) {\n const actual = await resolveActualText(options.actual, args);\n const expected = await resolveExpected(options.expected, args);\n if (expected === undefined) {\n return EvalOutcome.invalid(\"No expected value provided for contains.\");\n }\n if (typeof expected !== \"string\" && !(expected instanceof RegExp)) {\n return EvalOutcome.invalid(\"Contains expected value must be a string or RegExp.\");\n }\n const passed = expected instanceof RegExp ? expected.test(actual) : actual.includes(expected);\n return passed\n ? EvalOutcome.pass(true)\n : EvalOutcome.fail(false, { comment: `Output did not contain ${String(expected)}.` });\n },\n };\n}\n\nexport type SemanticSimilarityOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n model: EmbeddingModel;\n threshold: number;\n actual?: ValueSelector<Input, Output, Expected, string> | undefined;\n expected?: SelectorOrValue<Input, Output, Expected, string> | undefined;\n};\n\nexport function semanticSimilarity<Input, Output, Expected = unknown>(\n options: SemanticSimilarityOptions<Input, Output, Expected>,\n): EvalMetric<Input, Output, number, Expected> {\n return {\n name: options.name ?? \"semantic_similarity\",\n async evaluate(args) {\n const actual = await resolveActualText(options.actual, args);\n const expected = await resolveExpected(options.expected, args);\n if (expected === undefined) {\n return EvalOutcome.invalid(\"No expected value provided for semantic similarity.\");\n }\n if (typeof expected !== \"string\") {\n return EvalOutcome.invalid(\"Semantic similarity expected value must be a string.\");\n }\n const [actualEmbedding, expectedEmbedding] = await Promise.all([\n embedText(options.model, actual),\n embedText(options.model, expected),\n ]);\n const score = cosineSimilarity(actualEmbedding.vector, expectedEmbedding.vector);\n return score >= options.threshold\n ? EvalOutcome.pass(score)\n : EvalOutcome.fail(score, { comment: `Similarity below threshold ${options.threshold}.` });\n },\n };\n}\n\nexport type LlmJudgeOptions<Input, Output, SchemaOutput, Expected = unknown> = {\n name?: string | undefined;\n model: CompletionModel;\n schema: ZodSchema<SchemaOutput>;\n passes(value: SchemaOutput): boolean;\n instructions?: string | undefined;\n retries?: number | undefined;\n prompt?: ValueSelector<Input, Output, Expected, string> | undefined;\n};\n\nexport function llmJudge<Input, Output, SchemaOutput, Expected = unknown>(\n options: LlmJudgeOptions<Input, Output, SchemaOutput, Expected>,\n): EvalMetric<Input, Output, SchemaOutput, Expected> {\n const extractor = new ExtractorBuilder(options.model, options.schema)\n .instructions(\n options.instructions ??\n \"Judge the eval case by the requested schema. Submit the judgment using the schema.\",\n )\n .retries(options.retries ?? 0)\n .build();\n\n return {\n name: options.name ?? \"llm_judge\",\n async evaluate(args) {\n try {\n const judgment = await extractor.extract(await resolveJudgePrompt(options.prompt, args));\n return options.passes(judgment) ? EvalOutcome.pass(judgment) : EvalOutcome.fail(judgment);\n } catch (error) {\n return EvalOutcome.invalid(errorMessage(error));\n }\n },\n };\n}\n\nexport type LlmScoreMetricScore = {\n score: number;\n feedback: string;\n};\n\nexport type LlmScoreOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n model: CompletionModel;\n threshold: number;\n criteria: string | string[];\n instructions?: string | undefined;\n retries?: number | undefined;\n prompt?: ValueSelector<Input, Output, Expected, string> | undefined;\n};\n\nexport function llmScore<Input, Output, Expected = unknown>(\n options: LlmScoreOptions<Input, Output, Expected>,\n): EvalMetric<Input, Output, LlmScoreMetricScore, Expected> {\n const criteria = Array.isArray(options.criteria) ? options.criteria.join(\"\\n\") : options.criteria;\n const extractor = new ExtractorBuilder(\n options.model,\n z.object({\n score: z.number(),\n feedback: z.string(),\n }),\n )\n .instructions(\n options.instructions ??\n `Score the eval case against these criteria:\\n${criteria}\\n\\nReturn a score between 0 and 1 and brief feedback.`,\n )\n .retries(options.retries ?? 0)\n .build();\n\n return {\n name: options.name ?? \"llm_score\",\n async evaluate(args) {\n try {\n const score = await extractor.extract(await resolveJudgePrompt(options.prompt, args));\n if (score.score < 0 || score.score > 1) {\n return EvalOutcome.invalid(`Score ${score.score} outside valid range [0, 1].`, {\n score,\n });\n }\n return score.score >= options.threshold\n ? EvalOutcome.pass(score, { comment: score.feedback })\n : EvalOutcome.fail(score, { comment: score.feedback });\n } catch (error) {\n return EvalOutcome.invalid(errorMessage(error));\n }\n },\n };\n}\n\nexport type AgentEvalTargetOptions<Input, Output = PromptResponse> = {\n prompt?: ((input: Input, testCase: EvalCase<Input>) => string | Message) | undefined;\n output?: ((response: PromptResponse, testCase: EvalCase<Input>) => Output) | undefined;\n};\n\nexport function agentEvalTarget<Input>(\n agent: Agent,\n options?: AgentEvalTargetOptions<Input, PromptResponse>,\n): EvalTarget<Input, PromptResponse>;\nexport function agentEvalTarget<Input, Output>(\n agent: Agent,\n options: AgentEvalTargetOptions<Input, Output>,\n): EvalTarget<Input, Output>;\nexport function agentEvalTarget<Input, Output>(\n agent: Agent,\n options: AgentEvalTargetOptions<Input, Output | PromptResponse> = {},\n): EvalTarget<Input, Output | PromptResponse> {\n return async (input, testCase) => {\n const prompt = options.prompt?.(input, testCase) ?? String(input);\n const response = await agent.prompt(prompt).send();\n return options.output === undefined ? response : options.output(response, testCase);\n };\n}\n\nasync function runEvalCase<Input, Output, Expected>(\n options: RunEvalSuiteOptions<Input, Output, Expected>,\n testCase: EvalCase<Input, Expected>,\n): Promise<EvalCaseResult<Input, Output, Expected>> {\n let output: Output | undefined;\n let targetError: unknown;\n try {\n output = await options.target(testCase.input, testCase);\n } catch (error) {\n targetError = error;\n }\n\n const metrics: EvalMetricResult[] = [];\n for (const metric of options.metrics) {\n const outcome =\n targetError === undefined\n ? await safeEvaluate(options.name, testCase, output as Output, metric)\n : EvalOutcome.invalid(`Target failed: ${errorMessage(targetError)}`);\n const reporterErrors = await reportOutcome({\n suiteName: options.name,\n testCase,\n output,\n targetError,\n metric,\n outcome,\n reporters: options.reporters ?? [],\n failOnReporterError: options.failOnReporterError === true,\n });\n metrics.push({ metricName: metric.name, outcome, reporterErrors });\n }\n\n return {\n case: testCase,\n ...(output === undefined ? {} : { output }),\n ...(targetError === undefined ? {} : { targetError }),\n metrics,\n };\n}\n\nasync function safeEvaluate<Input, Output, Expected>(\n suiteName: string,\n testCase: EvalCase<Input, Expected>,\n output: Output,\n metric: EvalMetric<Input, Output, unknown, Expected>,\n): Promise<EvalOutcome> {\n try {\n return await metric.evaluate({ suiteName, case: testCase, output });\n } catch (error) {\n return EvalOutcome.invalid(errorMessage(error));\n }\n}\n\nasync function reportOutcome<Input, Output, Expected>(args: {\n suiteName: string;\n testCase: EvalCase<Input, Expected>;\n output: Output | undefined;\n targetError: unknown;\n metric: EvalMetric<Input, Output, unknown, Expected>;\n outcome: EvalOutcome;\n reporters: Array<EvalReporter<Input, Output, Expected>>;\n failOnReporterError: boolean;\n}): Promise<unknown[]> {\n const errors: unknown[] = [];\n for (const reporter of args.reporters) {\n try {\n await reporter.report({\n suiteName: args.suiteName,\n case: args.testCase,\n output: args.output,\n targetError: args.targetError,\n metric: args.metric,\n outcome: args.outcome,\n });\n } catch (error) {\n if (args.failOnReporterError) {\n throw error;\n }\n errors.push(error);\n }\n }\n return errors;\n}\n\nfunction countOutcomes(results: Array<EvalCaseResult<unknown, unknown, unknown>>): {\n passed: number;\n failed: number;\n invalid: number;\n} {\n let passed = 0;\n let failed = 0;\n let invalid = 0;\n for (const result of results) {\n for (const metric of result.metrics) {\n if (metric.outcome.outcome === \"pass\") passed += 1;\n if (metric.outcome.outcome === \"fail\") failed += 1;\n if (metric.outcome.outcome === \"invalid\") invalid += 1;\n }\n }\n return { passed, failed, invalid };\n}\n\nasync function resolveActual<Input, Output, Expected>(\n selector: ValueSelector<Input, Output, Expected, unknown> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<unknown> {\n return selector === undefined ? defaultOutputValue(args.output) : selector(args);\n}\n\nasync function resolveActualText<Input, Output, Expected>(\n selector: ValueSelector<Input, Output, Expected, string> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<string> {\n const value = selector === undefined ? defaultOutputValue(args.output) : await selector(args);\n return typeof value === \"string\" ? value : JSON.stringify(value);\n}\n\nasync function resolveExpected<Input, Output, Expected, Value>(\n selectorOrValue: SelectorOrValue<Input, Output, Expected, Value> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<Value | Expected | undefined> {\n if (selectorOrValue === undefined) {\n return args.case.expected;\n }\n return typeof selectorOrValue === \"function\"\n ? (selectorOrValue as ValueSelector<Input, Output, Expected, Value>)(args)\n : selectorOrValue;\n}\n\nasync function resolveJudgePrompt<Input, Output, Expected>(\n selector: ValueSelector<Input, Output, Expected, string> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<string> {\n if (selector !== undefined) {\n return selector(args);\n }\n return [\n `Suite: ${args.suiteName}`,\n `Case: ${args.case.id}`,\n `Input: ${formatValue(args.case.input)}`,\n `Expected: ${formatValue(args.case.expected)}`,\n `Output: ${formatValue(defaultOutputValue(args.output))}`,\n ].join(\"\\n\\n\");\n}\n\nfunction defaultOutputValue(output: unknown): unknown {\n if (\n typeof output === \"object\" &&\n output !== null &&\n \"output\" in output &&\n typeof (output as { output?: unknown }).output === \"string\"\n ) {\n return (output as { output: string }).output;\n }\n return output;\n}\n\nfunction stableComparable(value: unknown): string {\n if (typeof value === \"string\") {\n return value;\n }\n return JSON.stringify(value);\n}\n\nfunction formatValue(value: unknown): string {\n if (typeof value === \"string\") {\n return value;\n }\n try {\n return JSON.stringify(value);\n } catch {\n return String(value);\n }\n}\n\nfunction errorMessage(error: unknown): string {\n return error instanceof Error ? error.message : String(error);\n}\n\nasync function mapWithConcurrency<Input, Output>(\n inputs: Input[],\n concurrency: number,\n mapper: (input: Input) => Promise<Output>,\n): Promise<Output[]> {\n const results = new Array<Output>(inputs.length);\n let next = 0;\n\n async function worker(): Promise<void> {\n while (next < inputs.length) {\n const index = next;\n next += 1;\n results[index] = await mapper(inputs[index] as Input);\n }\n }\n\n await Promise.all(Array.from({ length: Math.min(concurrency, inputs.length) }, () => worker()));\n return results;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;AAAA,SAAS,SAAS;AA6CX,IAAM,cAAc;AAAA,EACzB,KACE,OACA,UAAiF,CAAC,GAC9D;AACpB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,GAAI,UAAU,SAAY,CAAC,IAAI,EAAE,MAAM;AAAA,MACvC,GAAI,QAAQ,YAAY,SAAY,CAAC,IAAI,EAAE,SAAS,QAAQ,QAAQ;AAAA,MACpE,GAAI,QAAQ,aAAa,SAAY,CAAC,IAAI,EAAE,UAAU,QAAQ,SAAS;AAAA,IACzE;AAAA,EACF;AAAA,EAEA,KACE,OACA,UAAiF,CAAC,GAC9D;AACpB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,GAAI,UAAU,SAAY,CAAC,IAAI,EAAE,MAAM;AAAA,MACvC,GAAI,QAAQ,YAAY,SAAY,CAAC,IAAI,EAAE,SAAS,QAAQ,QAAQ;AAAA,MACpE,GAAI,QAAQ,aAAa,SAAY,CAAC,IAAI,EAAE,UAAU,QAAQ,SAAS;AAAA,IACzE;AAAA,EACF;AAAA,EAEA,QACE,QACA,UAII,CAAC,GACe;AACpB,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,MACA,GAAI,QAAQ,UAAU,SAAY,CAAC,IAAI,EAAE,OAAO,QAAQ,MAAM;AAAA,MAC9D,GAAI,QAAQ,YAAY,SAAY,CAAC,IAAI,EAAE,SAAS,QAAQ,QAAQ;AAAA,MACpE,GAAI,QAAQ,aAAa,SAAY,CAAC,IAAI,EAAE,UAAU,QAAQ,SAAS;AAAA,IACzE;AAAA,EACF;AACF;AA4DA,eAAsB,aACpB,SACmD;AACnD,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,UAAU,MAAM;AAAA,IACpB,QAAQ;AAAA,IACR,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,eAAe,CAAC,CAAC;AAAA,IAChD,CAAC,aAAa,YAAY,SAAS,QAAQ;AAAA,EAC7C;AACA,QAAM,SAAS,cAAc,OAAO;AACpC,SAAO;AAAA,IACL,MAAM,QAAQ;AAAA,IACd;AAAA,IACA,GAAG;AAAA,IACH,YAAY,KAAK,IAAI,IAAI;AAAA,EAC3B;AACF;AAgBO,SAAS,WACd,UAAsD,CAAC,GACT;AAC9C,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,YAAM,SAAS,MAAM,cAAc,QAAQ,QAAQ,IAAI;AACvD,YAAM,WAAW,MAAM,gBAAgB,QAAQ,UAAU,IAAI;AAC7D,UAAI,aAAa,QAAW;AAC1B,eAAO,YAAY,QAAQ,6CAA6C;AAAA,MAC1E;AACA,YAAM,SAAS,iBAAiB,MAAM,MAAM,iBAAiB,QAAQ;AACrE,aAAO,SACH,YAAY,KAAK,IAAI,IACrB,YAAY,KAAK,OAAO,EAAE,SAAS,YAAY,YAAY,QAAQ,CAAC,IAAI,CAAC;AAAA,IAC/E;AAAA,EACF;AACF;AAQO,SAAS,SACd,UAAoD,CAAC,GACP;AAC9C,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,YAAM,SAAS,MAAM,kBAAkB,QAAQ,QAAQ,IAAI;AAC3D,YAAM,WAAW,MAAM,gBAAgB,QAAQ,UAAU,IAAI;AAC7D,UAAI,aAAa,QAAW;AAC1B,eAAO,YAAY,QAAQ,0CAA0C;AAAA,MACvE;AACA,UAAI,OAAO,aAAa,YAAY,EAAE,oBAAoB,SAAS;AACjE,eAAO,YAAY,QAAQ,qDAAqD;AAAA,MAClF;AACA,YAAM,SAAS,oBAAoB,SAAS,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,QAAQ;AAC5F,aAAO,SACH,YAAY,KAAK,IAAI,IACrB,YAAY,KAAK,OAAO,EAAE,SAAS,0BAA0B,OAAO,QAAQ,CAAC,IAAI,CAAC;AAAA,IACxF;AAAA,EACF;AACF;AAUO,SAAS,mBACd,SAC6C;AAC7C,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,YAAM,SAAS,MAAM,kBAAkB,QAAQ,QAAQ,IAAI;AAC3D,YAAM,WAAW,MAAM,gBAAgB,QAAQ,UAAU,IAAI;AAC7D,UAAI,aAAa,QAAW;AAC1B,eAAO,YAAY,QAAQ,qDAAqD;AAAA,MAClF;AACA,UAAI,OAAO,aAAa,UAAU;AAChC,eAAO,YAAY,QAAQ,sDAAsD;AAAA,MACnF;AACA,YAAM,CAAC,iBAAiB,iBAAiB,IAAI,MAAM,QAAQ,IAAI;AAAA,QAC7D,UAAU,QAAQ,OAAO,MAAM;AAAA,QAC/B,UAAU,QAAQ,OAAO,QAAQ;AAAA,MACnC,CAAC;AACD,YAAM,QAAQ,iBAAiB,gBAAgB,QAAQ,kBAAkB,MAAM;AAC/E,aAAO,SAAS,QAAQ,YACpB,YAAY,KAAK,KAAK,IACtB,YAAY,KAAK,OAAO,EAAE,SAAS,8BAA8B,QAAQ,SAAS,IAAI,CAAC;AAAA,IAC7F;AAAA,EACF;AACF;AAYO,SAAS,SACd,SACmD;AACnD,QAAM,YAAY,IAAI,iBAAiB,QAAQ,OAAO,QAAQ,MAAM,EACjE;AAAA,IACC,QAAQ,gBACN;AAAA,EACJ,EACC,QAAQ,QAAQ,WAAW,CAAC,EAC5B,MAAM;AAET,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,UAAI;AACF,cAAM,WAAW,MAAM,UAAU,QAAQ,MAAM,mBAAmB,QAAQ,QAAQ,IAAI,CAAC;AACvF,eAAO,QAAQ,OAAO,QAAQ,IAAI,YAAY,KAAK,QAAQ,IAAI,YAAY,KAAK,QAAQ;AAAA,MAC1F,SAAS,OAAO;AACd,eAAO,YAAY,QAAQ,aAAa,KAAK,CAAC;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AACF;AAiBO,SAAS,SACd,SAC0D;AAC1D,QAAM,WAAW,MAAM,QAAQ,QAAQ,QAAQ,IAAI,QAAQ,SAAS,KAAK,IAAI,IAAI,QAAQ;AACzF,QAAM,YAAY,IAAI;AAAA,IACpB,QAAQ;AAAA,IACR,EAAE,OAAO;AAAA,MACP,OAAO,EAAE,OAAO;AAAA,MAChB,UAAU,EAAE,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,EACG;AAAA,IACC,QAAQ,gBACN;AAAA,EAAgD,QAAQ;AAAA;AAAA;AAAA,EAC5D,EACC,QAAQ,QAAQ,WAAW,CAAC,EAC5B,MAAM;AAET,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,UAAI;AACF,cAAM,QAAQ,MAAM,UAAU,QAAQ,MAAM,mBAAmB,QAAQ,QAAQ,IAAI,CAAC;AACpF,YAAI,MAAM,QAAQ,KAAK,MAAM,QAAQ,GAAG;AACtC,iBAAO,YAAY,QAAQ,SAAS,MAAM,KAAK,gCAAgC;AAAA,YAC7E;AAAA,UACF,CAAC;AAAA,QACH;AACA,eAAO,MAAM,SAAS,QAAQ,YAC1B,YAAY,KAAK,OAAO,EAAE,SAAS,MAAM,SAAS,CAAC,IACnD,YAAY,KAAK,OAAO,EAAE,SAAS,MAAM,SAAS,CAAC;AAAA,MACzD,SAAS,OAAO;AACd,eAAO,YAAY,QAAQ,aAAa,KAAK,CAAC;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AACF;AAeO,SAAS,gBACd,OACA,UAAkE,CAAC,GACvB;AAC5C,SAAO,OAAO,OAAO,aAAa;AAChC,UAAM,SAAS,QAAQ,SAAS,OAAO,QAAQ,KAAK,OAAO,KAAK;AAChE,UAAM,WAAW,MAAM,MAAM,OAAO,MAAM,EAAE,KAAK;AACjD,WAAO,QAAQ,WAAW,SAAY,WAAW,QAAQ,OAAO,UAAU,QAAQ;AAAA,EACpF;AACF;AAEA,eAAe,YACb,SACA,UACkD;AAClD,MAAI;AACJ,MAAI;AACJ,MAAI;AACF,aAAS,MAAM,QAAQ,OAAO,SAAS,OAAO,QAAQ;AAAA,EACxD,SAAS,OAAO;AACd,kBAAc;AAAA,EAChB;AAEA,QAAM,UAA8B,CAAC;AACrC,aAAW,UAAU,QAAQ,SAAS;AACpC,UAAM,UACJ,gBAAgB,SACZ,MAAM,aAAa,QAAQ,MAAM,UAAU,QAAkB,MAAM,IACnE,YAAY,QAAQ,kBAAkB,aAAa,WAAW,CAAC,EAAE;AACvE,UAAM,iBAAiB,MAAM,cAAc;AAAA,MACzC,WAAW,QAAQ;AAAA,MACnB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW,QAAQ,aAAa,CAAC;AAAA,MACjC,qBAAqB,QAAQ,wBAAwB;AAAA,IACvD,CAAC;AACD,YAAQ,KAAK,EAAE,YAAY,OAAO,MAAM,SAAS,eAAe,CAAC;AAAA,EACnE;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,GAAI,WAAW,SAAY,CAAC,IAAI,EAAE,OAAO;AAAA,IACzC,GAAI,gBAAgB,SAAY,CAAC,IAAI,EAAE,YAAY;AAAA,IACnD;AAAA,EACF;AACF;AAEA,eAAe,aACb,WACA,UACA,QACA,QACsB;AACtB,MAAI;AACF,WAAO,MAAM,OAAO,SAAS,EAAE,WAAW,MAAM,UAAU,OAAO,CAAC;AAAA,EACpE,SAAS,OAAO;AACd,WAAO,YAAY,QAAQ,aAAa,KAAK,CAAC;AAAA,EAChD;AACF;AAEA,eAAe,cAAuC,MAS/B;AACrB,QAAM,SAAoB,CAAC;AAC3B,aAAW,YAAY,KAAK,WAAW;AACrC,QAAI;AACF,YAAM,SAAS,OAAO;AAAA,QACpB,WAAW,KAAK;AAAA,QAChB,MAAM,KAAK;AAAA,QACX,QAAQ,KAAK;AAAA,QACb,aAAa,KAAK;AAAA,QAClB,QAAQ,KAAK;AAAA,QACb,SAAS,KAAK;AAAA,MAChB,CAAC;AAAA,IACH,SAAS,OAAO;AACd,UAAI,KAAK,qBAAqB;AAC5B,cAAM;AAAA,MACR;AACA,aAAO,KAAK,KAAK;AAAA,IACnB;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,cAAc,SAIrB;AACA,MAAI,SAAS;AACb,MAAI,SAAS;AACb,MAAI,UAAU;AACd,aAAW,UAAU,SAAS;AAC5B,eAAW,UAAU,OAAO,SAAS;AACnC,UAAI,OAAO,QAAQ,YAAY,OAAQ,WAAU;AACjD,UAAI,OAAO,QAAQ,YAAY,OAAQ,WAAU;AACjD,UAAI,OAAO,QAAQ,YAAY,UAAW,YAAW;AAAA,IACvD;AAAA,EACF;AACA,SAAO,EAAE,QAAQ,QAAQ,QAAQ;AACnC;AAEA,eAAe,cACb,UACA,MACkB;AAClB,SAAO,aAAa,SAAY,mBAAmB,KAAK,MAAM,IAAI,SAAS,IAAI;AACjF;AAEA,eAAe,kBACb,UACA,MACiB;AACjB,QAAM,QAAQ,aAAa,SAAY,mBAAmB,KAAK,MAAM,IAAI,MAAM,SAAS,IAAI;AAC5F,SAAO,OAAO,UAAU,WAAW,QAAQ,KAAK,UAAU,KAAK;AACjE;AAEA,eAAe,gBACb,iBACA,MACuC;AACvC,MAAI,oBAAoB,QAAW;AACjC,WAAO,KAAK,KAAK;AAAA,EACnB;AACA,SAAO,OAAO,oBAAoB,aAC7B,gBAAkE,IAAI,IACvE;AACN;AAEA,eAAe,mBACb,UACA,MACiB;AACjB,MAAI,aAAa,QAAW;AAC1B,WAAO,SAAS,IAAI;AAAA,EACtB;AACA,SAAO;AAAA,IACL,UAAU,KAAK,SAAS;AAAA,IACxB,SAAS,KAAK,KAAK,EAAE;AAAA,IACrB,UAAU,YAAY,KAAK,KAAK,KAAK,CAAC;AAAA,IACtC,aAAa,YAAY,KAAK,KAAK,QAAQ,CAAC;AAAA,IAC5C,WAAW,YAAY,mBAAmB,KAAK,MAAM,CAAC,CAAC;AAAA,EACzD,EAAE,KAAK,MAAM;AACf;AAEA,SAAS,mBAAmB,QAA0B;AACpD,MACE,OAAO,WAAW,YAClB,WAAW,QACX,YAAY,UACZ,OAAQ,OAAgC,WAAW,UACnD;AACA,WAAQ,OAA8B;AAAA,EACxC;AACA,SAAO;AACT;AAEA,SAAS,iBAAiB,OAAwB;AAChD,MAAI,OAAO,UAAU,UAAU;AAC7B,WAAO;AAAA,EACT;AACA,SAAO,KAAK,UAAU,KAAK;AAC7B;AAEA,SAAS,YAAY,OAAwB;AAC3C,MAAI,OAAO,UAAU,UAAU;AAC7B,WAAO;AAAA,EACT;AACA,MAAI;AACF,WAAO,KAAK,UAAU,KAAK;AAAA,EAC7B,QAAQ;AACN,WAAO,OAAO,KAAK;AAAA,EACrB;AACF;AAEA,SAAS,aAAa,OAAwB;AAC5C,SAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC9D;AAEA,eAAe,mBACb,QACA,aACA,QACmB;AACnB,QAAM,UAAU,IAAI,MAAc,OAAO,MAAM;AAC/C,MAAI,OAAO;AAEX,iBAAe,SAAwB;AACrC,WAAO,OAAO,OAAO,QAAQ;AAC3B,YAAM,QAAQ;AACd,cAAQ;AACR,cAAQ,KAAK,IAAI,MAAM,OAAO,OAAO,KAAK,CAAU;AAAA,IACtD;AAAA,EACF;AAEA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,aAAa,OAAO,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AAC9F,SAAO;AACT;","names":[]}
1
+ {"version":3,"sources":["../../src/evals/agent-target.ts","../../src/evals/metrics.ts","../../src/evals/format.ts","../../src/evals/outcome.ts","../../src/evals/selectors.ts","../../src/evals/runner.ts"],"sourcesContent":["import type { Agent } from \"../agent/agent\";\nimport type { PromptResponse } from \"../agent/request\";\nimport type { Message } from \"../completion\";\nimport type { EvalCase, EvalTarget } from \"./types\";\n\nexport type AgentEvalTargetOptions<Input, Output = PromptResponse> = {\n prompt?: ((input: Input, testCase: EvalCase<Input>) => string | Message) | undefined;\n output?: ((response: PromptResponse, testCase: EvalCase<Input>) => Output) | undefined;\n};\n\nexport function agentEvalTarget<Input>(\n agent: Agent,\n options?: AgentEvalTargetOptions<Input, PromptResponse>,\n): EvalTarget<Input, PromptResponse>;\nexport function agentEvalTarget<Input, Output>(\n agent: Agent,\n options: AgentEvalTargetOptions<Input, Output>,\n): EvalTarget<Input, Output>;\nexport function agentEvalTarget<Input, Output>(\n agent: Agent,\n options: AgentEvalTargetOptions<Input, Output | PromptResponse> = {},\n): EvalTarget<Input, Output | PromptResponse> {\n return async (input, testCase) => {\n const prompt = options.prompt?.(input, testCase) ?? String(input);\n const response = await agent.prompt(prompt).send();\n return options.output === undefined ? response : options.output(response, testCase);\n };\n}\n","import { z } from \"zod\";\nimport type { CompletionModel } from \"../completion\";\nimport { cosineSimilarity, type EmbeddingModel, embedText } from \"../embeddings\";\nimport { ExtractorBuilder } from \"../extractor\";\nimport type { ZodSchema } from \"../schema\";\nimport { errorMessage, formatValue, stableComparable } from \"./format\";\nimport { EvalOutcome } from \"./outcome\";\nimport { resolveActual, resolveActualText, resolveExpected, resolveJudgePrompt } from \"./selectors\";\nimport type { EvalMetric, SelectorOrValue, ValueSelector } from \"./types\";\n\nexport type ExactMatchOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n actual?: ValueSelector<Input, Output, Expected, unknown> | undefined;\n expected?: SelectorOrValue<Input, Output, Expected, unknown> | undefined;\n};\n\nexport function exactMatch<Input, Output, Expected = unknown>(\n options: ExactMatchOptions<Input, Output, Expected> = {},\n): EvalMetric<Input, Output, boolean, Expected> {\n return {\n name: options.name ?? \"exact_match\",\n async evaluate(args) {\n const actual = await resolveActual(options.actual, args);\n const expected = await resolveExpected(options.expected, args);\n if (expected === undefined) {\n return EvalOutcome.invalid(\"No expected value provided for exact match.\");\n }\n const passed = stableComparable(actual) === stableComparable(expected);\n return passed\n ? EvalOutcome.pass(true)\n : EvalOutcome.fail(false, { comment: `Expected ${formatValue(expected)}.` });\n },\n };\n}\n\nexport type ContainsOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n actual?: ValueSelector<Input, Output, Expected, string> | undefined;\n expected?: SelectorOrValue<Input, Output, Expected, string | RegExp> | undefined;\n};\n\nexport function contains<Input, Output, Expected = unknown>(\n options: ContainsOptions<Input, Output, Expected> = {},\n): EvalMetric<Input, Output, boolean, Expected> {\n return {\n name: options.name ?? \"contains\",\n async evaluate(args) {\n const actual = await resolveActualText(options.actual, args);\n const expected = await resolveExpected(options.expected, args);\n if (expected === undefined) {\n return EvalOutcome.invalid(\"No expected value provided for contains.\");\n }\n if (typeof expected !== \"string\" && !(expected instanceof RegExp)) {\n return EvalOutcome.invalid(\"Contains expected value must be a string or RegExp.\");\n }\n const passed = expected instanceof RegExp ? expected.test(actual) : actual.includes(expected);\n return passed\n ? EvalOutcome.pass(true)\n : EvalOutcome.fail(false, { comment: `Output did not contain ${String(expected)}.` });\n },\n };\n}\n\nexport type SemanticSimilarityOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n model: EmbeddingModel;\n threshold: number;\n actual?: ValueSelector<Input, Output, Expected, string> | undefined;\n expected?: SelectorOrValue<Input, Output, Expected, string> | undefined;\n};\n\nexport function semanticSimilarity<Input, Output, Expected = unknown>(\n options: SemanticSimilarityOptions<Input, Output, Expected>,\n): EvalMetric<Input, Output, number, Expected> {\n return {\n name: options.name ?? \"semantic_similarity\",\n async evaluate(args) {\n const actual = await resolveActualText(options.actual, args);\n const expected = await resolveExpected(options.expected, args);\n if (expected === undefined) {\n return EvalOutcome.invalid(\"No expected value provided for semantic similarity.\");\n }\n if (typeof expected !== \"string\") {\n return EvalOutcome.invalid(\"Semantic similarity expected value must be a string.\");\n }\n const [actualEmbedding, expectedEmbedding] = await Promise.all([\n embedText(options.model, actual),\n embedText(options.model, expected),\n ]);\n const score = cosineSimilarity(actualEmbedding.vector, expectedEmbedding.vector);\n return score >= options.threshold\n ? EvalOutcome.pass(score)\n : EvalOutcome.fail(score, { comment: `Similarity below threshold ${options.threshold}.` });\n },\n };\n}\n\nexport type LlmJudgeOptions<Input, Output, SchemaOutput, Expected = unknown> = {\n name?: string | undefined;\n model: CompletionModel;\n schema: ZodSchema<SchemaOutput>;\n passes(value: SchemaOutput): boolean;\n instructions?: string | undefined;\n retries?: number | undefined;\n prompt?: ValueSelector<Input, Output, Expected, string> | undefined;\n};\n\nexport function llmJudge<Input, Output, SchemaOutput, Expected = unknown>(\n options: LlmJudgeOptions<Input, Output, SchemaOutput, Expected>,\n): EvalMetric<Input, Output, SchemaOutput, Expected> {\n const extractor = new ExtractorBuilder(options.model, options.schema)\n .instructions(\n options.instructions ??\n \"Judge the eval case by the requested schema. Submit the judgment using the schema.\",\n )\n .retries(options.retries ?? 0)\n .build();\n\n return {\n name: options.name ?? \"llm_judge\",\n async evaluate(args) {\n try {\n const judgment = await extractor.extract(await resolveJudgePrompt(options.prompt, args));\n return options.passes(judgment) ? EvalOutcome.pass(judgment) : EvalOutcome.fail(judgment);\n } catch (error) {\n return EvalOutcome.invalid(errorMessage(error));\n }\n },\n };\n}\n\nexport type LlmScoreMetricScore = {\n score: number;\n feedback: string;\n};\n\nexport type LlmScoreOptions<Input, Output, Expected = unknown> = {\n name?: string | undefined;\n model: CompletionModel;\n threshold: number;\n criteria: string | string[];\n instructions?: string | undefined;\n retries?: number | undefined;\n prompt?: ValueSelector<Input, Output, Expected, string> | undefined;\n};\n\nexport function llmScore<Input, Output, Expected = unknown>(\n options: LlmScoreOptions<Input, Output, Expected>,\n): EvalMetric<Input, Output, LlmScoreMetricScore, Expected> {\n const criteria = Array.isArray(options.criteria) ? options.criteria.join(\"\\n\") : options.criteria;\n const extractor = new ExtractorBuilder(\n options.model,\n z.object({\n score: z.number(),\n feedback: z.string(),\n }),\n )\n .instructions(\n options.instructions ??\n `Score the eval case against these criteria:\\n${criteria}\\n\\nReturn a score between 0 and 1 and brief feedback.`,\n )\n .retries(options.retries ?? 0)\n .build();\n\n return {\n name: options.name ?? \"llm_score\",\n async evaluate(args) {\n try {\n const score = await extractor.extract(await resolveJudgePrompt(options.prompt, args));\n if (score.score < 0 || score.score > 1) {\n return EvalOutcome.invalid(`Score ${score.score} outside valid range [0, 1].`, {\n score,\n });\n }\n return score.score >= options.threshold\n ? EvalOutcome.pass(score, { comment: score.feedback })\n : EvalOutcome.fail(score, { comment: score.feedback });\n } catch (error) {\n return EvalOutcome.invalid(errorMessage(error));\n }\n },\n };\n}\n","export function defaultOutputValue(output: unknown): unknown {\n if (\n typeof output === \"object\" &&\n output !== null &&\n \"output\" in output &&\n typeof (output as { output?: unknown }).output === \"string\"\n ) {\n return (output as { output: string }).output;\n }\n return output;\n}\n\nexport function stableComparable(value: unknown): string {\n if (typeof value === \"string\") {\n return value;\n }\n return JSON.stringify(value);\n}\n\nexport function formatValue(value: unknown): string {\n if (typeof value === \"string\") {\n return value;\n }\n try {\n return JSON.stringify(value);\n } catch {\n return String(value);\n }\n}\n\nexport function errorMessage(error: unknown): string {\n return error instanceof Error ? error.message : String(error);\n}\n","import type { EvalMetadata } from \"./types\";\n\nexport type EvalOutcome<Score = unknown> =\n | {\n outcome: \"pass\";\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n }\n | {\n outcome: \"fail\";\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n }\n | {\n outcome: \"invalid\";\n reason: string;\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n };\n\nexport const EvalOutcome = {\n pass<Score>(\n score?: Score,\n options: { comment?: string | undefined; metadata?: EvalMetadata | undefined } = {},\n ): EvalOutcome<Score> {\n return {\n outcome: \"pass\",\n ...(score === undefined ? {} : { score }),\n ...(options.comment === undefined ? {} : { comment: options.comment }),\n ...(options.metadata === undefined ? {} : { metadata: options.metadata }),\n };\n },\n\n fail<Score>(\n score?: Score,\n options: { comment?: string | undefined; metadata?: EvalMetadata | undefined } = {},\n ): EvalOutcome<Score> {\n return {\n outcome: \"fail\",\n ...(score === undefined ? {} : { score }),\n ...(options.comment === undefined ? {} : { comment: options.comment }),\n ...(options.metadata === undefined ? {} : { metadata: options.metadata }),\n };\n },\n\n invalid<Score = never>(\n reason: string,\n options: {\n score?: Score | undefined;\n comment?: string | undefined;\n metadata?: EvalMetadata | undefined;\n } = {},\n ): EvalOutcome<Score> {\n return {\n outcome: \"invalid\",\n reason,\n ...(options.score === undefined ? {} : { score: options.score }),\n ...(options.comment === undefined ? {} : { comment: options.comment }),\n ...(options.metadata === undefined ? {} : { metadata: options.metadata }),\n };\n },\n};\n","import { defaultOutputValue, formatValue } from \"./format\";\nimport type { EvalMetricArgs, SelectorOrValue, ValueSelector } from \"./types\";\n\nexport async function resolveActual<Input, Output, Expected>(\n selector: ValueSelector<Input, Output, Expected, unknown> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<unknown> {\n return selector === undefined ? defaultOutputValue(args.output) : selector(args);\n}\n\nexport async function resolveActualText<Input, Output, Expected>(\n selector: ValueSelector<Input, Output, Expected, string> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<string> {\n const value = selector === undefined ? defaultOutputValue(args.output) : await selector(args);\n return typeof value === \"string\" ? value : JSON.stringify(value);\n}\n\nexport async function resolveExpected<Input, Output, Expected, Value>(\n selectorOrValue: SelectorOrValue<Input, Output, Expected, Value> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<Value | Expected | undefined> {\n if (selectorOrValue === undefined) {\n return args.case.expected;\n }\n return typeof selectorOrValue === \"function\"\n ? (selectorOrValue as ValueSelector<Input, Output, Expected, Value>)(args)\n : selectorOrValue;\n}\n\nexport async function resolveJudgePrompt<Input, Output, Expected>(\n selector: ValueSelector<Input, Output, Expected, string> | undefined,\n args: EvalMetricArgs<Input, Output, Expected>,\n): Promise<string> {\n if (selector !== undefined) {\n return selector(args);\n }\n return [\n `Suite: ${args.suiteName}`,\n `Case: ${args.case.id}`,\n `Input: ${formatValue(args.case.input)}`,\n `Expected: ${formatValue(args.case.expected)}`,\n `Output: ${formatValue(defaultOutputValue(args.output))}`,\n ].join(\"\\n\\n\");\n}\n","import { mapWithConcurrency } from \"../internal/concurrency\";\nimport { errorMessage } from \"./format\";\nimport { EvalOutcome, type EvalOutcome as EvalOutcomeType } from \"./outcome\";\nimport type {\n EvalCase,\n EvalCaseResult,\n EvalMetric,\n EvalMetricResult,\n EvalReporter,\n EvalSuiteResult,\n RunEvalSuiteOptions,\n} from \"./types\";\n\nexport async function runEvalSuite<Input, Output, Expected = unknown>(\n options: RunEvalSuiteOptions<Input, Output, Expected>,\n): Promise<EvalSuiteResult<Input, Output, Expected>> {\n const startedAt = Date.now();\n const results = await mapWithConcurrency(\n options.cases,\n Math.max(1, Math.trunc(options.concurrency ?? 1)),\n (testCase) => runEvalCase(options, testCase),\n );\n const counts = countOutcomes(results);\n return {\n name: options.name,\n results,\n ...counts,\n durationMs: Date.now() - startedAt,\n };\n}\n\nasync function runEvalCase<Input, Output, Expected>(\n options: RunEvalSuiteOptions<Input, Output, Expected>,\n testCase: EvalCase<Input, Expected>,\n): Promise<EvalCaseResult<Input, Output, Expected>> {\n let output: Output | undefined;\n let targetError: unknown;\n try {\n output = await options.target(testCase.input, testCase);\n } catch (error) {\n targetError = error;\n }\n\n const metrics: EvalMetricResult[] = [];\n for (const metric of options.metrics) {\n const outcome =\n targetError === undefined\n ? await safeEvaluate(options.name, testCase, output as Output, metric)\n : EvalOutcome.invalid(`Target failed: ${errorMessage(targetError)}`);\n const reporterErrors = await reportOutcome({\n suiteName: options.name,\n testCase,\n output,\n targetError,\n metric,\n outcome,\n reporters: options.reporters ?? [],\n failOnReporterError: options.failOnReporterError === true,\n });\n metrics.push({ metricName: metric.name, outcome, reporterErrors });\n }\n\n return {\n case: testCase,\n ...(output === undefined ? {} : { output }),\n ...(targetError === undefined ? {} : { targetError }),\n metrics,\n };\n}\n\nasync function safeEvaluate<Input, Output, Expected>(\n suiteName: string,\n testCase: EvalCase<Input, Expected>,\n output: Output,\n metric: EvalMetric<Input, Output, unknown, Expected>,\n): Promise<EvalOutcomeType> {\n try {\n return await metric.evaluate({ suiteName, case: testCase, output });\n } catch (error) {\n return EvalOutcome.invalid(errorMessage(error));\n }\n}\n\nasync function reportOutcome<Input, Output, Expected>(args: {\n suiteName: string;\n testCase: EvalCase<Input, Expected>;\n output: Output | undefined;\n targetError: unknown;\n metric: EvalMetric<Input, Output, unknown, Expected>;\n outcome: EvalOutcomeType;\n reporters: Array<EvalReporter<Input, Output, Expected>>;\n failOnReporterError: boolean;\n}): Promise<unknown[]> {\n const errors: unknown[] = [];\n for (const reporter of args.reporters) {\n try {\n await reporter.report({\n suiteName: args.suiteName,\n case: args.testCase,\n output: args.output,\n targetError: args.targetError,\n metric: args.metric,\n outcome: args.outcome,\n });\n } catch (error) {\n if (args.failOnReporterError) {\n throw error;\n }\n errors.push(error);\n }\n }\n return errors;\n}\n\nfunction countOutcomes(results: Array<EvalCaseResult<unknown, unknown, unknown>>): {\n passed: number;\n failed: number;\n invalid: number;\n} {\n let passed = 0;\n let failed = 0;\n let invalid = 0;\n for (const result of results) {\n for (const metric of result.metrics) {\n if (metric.outcome.outcome === \"pass\") passed += 1;\n if (metric.outcome.outcome === \"fail\") failed += 1;\n if (metric.outcome.outcome === \"invalid\") invalid += 1;\n }\n }\n return { passed, failed, invalid };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAkBO,SAAS,gBACd,OACA,UAAkE,CAAC,GACvB;AAC5C,SAAO,OAAO,OAAO,aAAa;AAChC,UAAM,SAAS,QAAQ,SAAS,OAAO,QAAQ,KAAK,OAAO,KAAK;AAChE,UAAM,WAAW,MAAM,MAAM,OAAO,MAAM,EAAE,KAAK;AACjD,WAAO,QAAQ,WAAW,SAAY,WAAW,QAAQ,OAAO,UAAU,QAAQ;AAAA,EACpF;AACF;;;AC3BA,SAAS,SAAS;;;ACAX,SAAS,mBAAmB,QAA0B;AAC3D,MACE,OAAO,WAAW,YAClB,WAAW,QACX,YAAY,UACZ,OAAQ,OAAgC,WAAW,UACnD;AACA,WAAQ,OAA8B;AAAA,EACxC;AACA,SAAO;AACT;AAEO,SAAS,iBAAiB,OAAwB;AACvD,MAAI,OAAO,UAAU,UAAU;AAC7B,WAAO;AAAA,EACT;AACA,SAAO,KAAK,UAAU,KAAK;AAC7B;AAEO,SAAS,YAAY,OAAwB;AAClD,MAAI,OAAO,UAAU,UAAU;AAC7B,WAAO;AAAA,EACT;AACA,MAAI;AACF,WAAO,KAAK,UAAU,KAAK;AAAA,EAC7B,QAAQ;AACN,WAAO,OAAO,KAAK;AAAA,EACrB;AACF;AAEO,SAAS,aAAa,OAAwB;AACnD,SAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC9D;;;ACTO,IAAM,cAAc;AAAA,EACzB,KACE,OACA,UAAiF,CAAC,GAC9D;AACpB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,GAAI,UAAU,SAAY,CAAC,IAAI,EAAE,MAAM;AAAA,MACvC,GAAI,QAAQ,YAAY,SAAY,CAAC,IAAI,EAAE,SAAS,QAAQ,QAAQ;AAAA,MACpE,GAAI,QAAQ,aAAa,SAAY,CAAC,IAAI,EAAE,UAAU,QAAQ,SAAS;AAAA,IACzE;AAAA,EACF;AAAA,EAEA,KACE,OACA,UAAiF,CAAC,GAC9D;AACpB,WAAO;AAAA,MACL,SAAS;AAAA,MACT,GAAI,UAAU,SAAY,CAAC,IAAI,EAAE,MAAM;AAAA,MACvC,GAAI,QAAQ,YAAY,SAAY,CAAC,IAAI,EAAE,SAAS,QAAQ,QAAQ;AAAA,MACpE,GAAI,QAAQ,aAAa,SAAY,CAAC,IAAI,EAAE,UAAU,QAAQ,SAAS;AAAA,IACzE;AAAA,EACF;AAAA,EAEA,QACE,QACA,UAII,CAAC,GACe;AACpB,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,MACA,GAAI,QAAQ,UAAU,SAAY,CAAC,IAAI,EAAE,OAAO,QAAQ,MAAM;AAAA,MAC9D,GAAI,QAAQ,YAAY,SAAY,CAAC,IAAI,EAAE,SAAS,QAAQ,QAAQ;AAAA,MACpE,GAAI,QAAQ,aAAa,SAAY,CAAC,IAAI,EAAE,UAAU,QAAQ,SAAS;AAAA,IACzE;AAAA,EACF;AACF;;;AC7DA,eAAsB,cACpB,UACA,MACkB;AAClB,SAAO,aAAa,SAAY,mBAAmB,KAAK,MAAM,IAAI,SAAS,IAAI;AACjF;AAEA,eAAsB,kBACpB,UACA,MACiB;AACjB,QAAM,QAAQ,aAAa,SAAY,mBAAmB,KAAK,MAAM,IAAI,MAAM,SAAS,IAAI;AAC5F,SAAO,OAAO,UAAU,WAAW,QAAQ,KAAK,UAAU,KAAK;AACjE;AAEA,eAAsB,gBACpB,iBACA,MACuC;AACvC,MAAI,oBAAoB,QAAW;AACjC,WAAO,KAAK,KAAK;AAAA,EACnB;AACA,SAAO,OAAO,oBAAoB,aAC7B,gBAAkE,IAAI,IACvE;AACN;AAEA,eAAsB,mBACpB,UACA,MACiB;AACjB,MAAI,aAAa,QAAW;AAC1B,WAAO,SAAS,IAAI;AAAA,EACtB;AACA,SAAO;AAAA,IACL,UAAU,KAAK,SAAS;AAAA,IACxB,SAAS,KAAK,KAAK,EAAE;AAAA,IACrB,UAAU,YAAY,KAAK,KAAK,KAAK,CAAC;AAAA,IACtC,aAAa,YAAY,KAAK,KAAK,QAAQ,CAAC;AAAA,IAC5C,WAAW,YAAY,mBAAmB,KAAK,MAAM,CAAC,CAAC;AAAA,EACzD,EAAE,KAAK,MAAM;AACf;;;AH5BO,SAAS,WACd,UAAsD,CAAC,GACT;AAC9C,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,YAAM,SAAS,MAAM,cAAc,QAAQ,QAAQ,IAAI;AACvD,YAAM,WAAW,MAAM,gBAAgB,QAAQ,UAAU,IAAI;AAC7D,UAAI,aAAa,QAAW;AAC1B,eAAO,YAAY,QAAQ,6CAA6C;AAAA,MAC1E;AACA,YAAM,SAAS,iBAAiB,MAAM,MAAM,iBAAiB,QAAQ;AACrE,aAAO,SACH,YAAY,KAAK,IAAI,IACrB,YAAY,KAAK,OAAO,EAAE,SAAS,YAAY,YAAY,QAAQ,CAAC,IAAI,CAAC;AAAA,IAC/E;AAAA,EACF;AACF;AAQO,SAAS,SACd,UAAoD,CAAC,GACP;AAC9C,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,YAAM,SAAS,MAAM,kBAAkB,QAAQ,QAAQ,IAAI;AAC3D,YAAM,WAAW,MAAM,gBAAgB,QAAQ,UAAU,IAAI;AAC7D,UAAI,aAAa,QAAW;AAC1B,eAAO,YAAY,QAAQ,0CAA0C;AAAA,MACvE;AACA,UAAI,OAAO,aAAa,YAAY,EAAE,oBAAoB,SAAS;AACjE,eAAO,YAAY,QAAQ,qDAAqD;AAAA,MAClF;AACA,YAAM,SAAS,oBAAoB,SAAS,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,QAAQ;AAC5F,aAAO,SACH,YAAY,KAAK,IAAI,IACrB,YAAY,KAAK,OAAO,EAAE,SAAS,0BAA0B,OAAO,QAAQ,CAAC,IAAI,CAAC;AAAA,IACxF;AAAA,EACF;AACF;AAUO,SAAS,mBACd,SAC6C;AAC7C,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,YAAM,SAAS,MAAM,kBAAkB,QAAQ,QAAQ,IAAI;AAC3D,YAAM,WAAW,MAAM,gBAAgB,QAAQ,UAAU,IAAI;AAC7D,UAAI,aAAa,QAAW;AAC1B,eAAO,YAAY,QAAQ,qDAAqD;AAAA,MAClF;AACA,UAAI,OAAO,aAAa,UAAU;AAChC,eAAO,YAAY,QAAQ,sDAAsD;AAAA,MACnF;AACA,YAAM,CAAC,iBAAiB,iBAAiB,IAAI,MAAM,QAAQ,IAAI;AAAA,QAC7D,UAAU,QAAQ,OAAO,MAAM;AAAA,QAC/B,UAAU,QAAQ,OAAO,QAAQ;AAAA,MACnC,CAAC;AACD,YAAM,QAAQ,iBAAiB,gBAAgB,QAAQ,kBAAkB,MAAM;AAC/E,aAAO,SAAS,QAAQ,YACpB,YAAY,KAAK,KAAK,IACtB,YAAY,KAAK,OAAO,EAAE,SAAS,8BAA8B,QAAQ,SAAS,IAAI,CAAC;AAAA,IAC7F;AAAA,EACF;AACF;AAYO,SAAS,SACd,SACmD;AACnD,QAAM,YAAY,IAAI,iBAAiB,QAAQ,OAAO,QAAQ,MAAM,EACjE;AAAA,IACC,QAAQ,gBACN;AAAA,EACJ,EACC,QAAQ,QAAQ,WAAW,CAAC,EAC5B,MAAM;AAET,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,UAAI;AACF,cAAM,WAAW,MAAM,UAAU,QAAQ,MAAM,mBAAmB,QAAQ,QAAQ,IAAI,CAAC;AACvF,eAAO,QAAQ,OAAO,QAAQ,IAAI,YAAY,KAAK,QAAQ,IAAI,YAAY,KAAK,QAAQ;AAAA,MAC1F,SAAS,OAAO;AACd,eAAO,YAAY,QAAQ,aAAa,KAAK,CAAC;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AACF;AAiBO,SAAS,SACd,SAC0D;AAC1D,QAAM,WAAW,MAAM,QAAQ,QAAQ,QAAQ,IAAI,QAAQ,SAAS,KAAK,IAAI,IAAI,QAAQ;AACzF,QAAM,YAAY,IAAI;AAAA,IACpB,QAAQ;AAAA,IACR,EAAE,OAAO;AAAA,MACP,OAAO,EAAE,OAAO;AAAA,MAChB,UAAU,EAAE,OAAO;AAAA,IACrB,CAAC;AAAA,EACH,EACG;AAAA,IACC,QAAQ,gBACN;AAAA,EAAgD,QAAQ;AAAA;AAAA;AAAA,EAC5D,EACC,QAAQ,QAAQ,WAAW,CAAC,EAC5B,MAAM;AAET,SAAO;AAAA,IACL,MAAM,QAAQ,QAAQ;AAAA,IACtB,MAAM,SAAS,MAAM;AACnB,UAAI;AACF,cAAM,QAAQ,MAAM,UAAU,QAAQ,MAAM,mBAAmB,QAAQ,QAAQ,IAAI,CAAC;AACpF,YAAI,MAAM,QAAQ,KAAK,MAAM,QAAQ,GAAG;AACtC,iBAAO,YAAY,QAAQ,SAAS,MAAM,KAAK,gCAAgC;AAAA,YAC7E;AAAA,UACF,CAAC;AAAA,QACH;AACA,eAAO,MAAM,SAAS,QAAQ,YAC1B,YAAY,KAAK,OAAO,EAAE,SAAS,MAAM,SAAS,CAAC,IACnD,YAAY,KAAK,OAAO,EAAE,SAAS,MAAM,SAAS,CAAC;AAAA,MACzD,SAAS,OAAO;AACd,eAAO,YAAY,QAAQ,aAAa,KAAK,CAAC;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AACF;;;AIzKA,eAAsB,aACpB,SACmD;AACnD,QAAM,YAAY,KAAK,IAAI;AAC3B,QAAM,UAAU,MAAM;AAAA,IACpB,QAAQ;AAAA,IACR,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,eAAe,CAAC,CAAC;AAAA,IAChD,CAAC,aAAa,YAAY,SAAS,QAAQ;AAAA,EAC7C;AACA,QAAM,SAAS,cAAc,OAAO;AACpC,SAAO;AAAA,IACL,MAAM,QAAQ;AAAA,IACd;AAAA,IACA,GAAG;AAAA,IACH,YAAY,KAAK,IAAI,IAAI;AAAA,EAC3B;AACF;AAEA,eAAe,YACb,SACA,UACkD;AAClD,MAAI;AACJ,MAAI;AACJ,MAAI;AACF,aAAS,MAAM,QAAQ,OAAO,SAAS,OAAO,QAAQ;AAAA,EACxD,SAAS,OAAO;AACd,kBAAc;AAAA,EAChB;AAEA,QAAM,UAA8B,CAAC;AACrC,aAAW,UAAU,QAAQ,SAAS;AACpC,UAAM,UACJ,gBAAgB,SACZ,MAAM,aAAa,QAAQ,MAAM,UAAU,QAAkB,MAAM,IACnE,YAAY,QAAQ,kBAAkB,aAAa,WAAW,CAAC,EAAE;AACvE,UAAM,iBAAiB,MAAM,cAAc;AAAA,MACzC,WAAW,QAAQ;AAAA,MACnB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW,QAAQ,aAAa,CAAC;AAAA,MACjC,qBAAqB,QAAQ,wBAAwB;AAAA,IACvD,CAAC;AACD,YAAQ,KAAK,EAAE,YAAY,OAAO,MAAM,SAAS,eAAe,CAAC;AAAA,EACnE;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,GAAI,WAAW,SAAY,CAAC,IAAI,EAAE,OAAO;AAAA,IACzC,GAAI,gBAAgB,SAAY,CAAC,IAAI,EAAE,YAAY;AAAA,IACnD;AAAA,EACF;AACF;AAEA,eAAe,aACb,WACA,UACA,QACA,QAC0B;AAC1B,MAAI;AACF,WAAO,MAAM,OAAO,SAAS,EAAE,WAAW,MAAM,UAAU,OAAO,CAAC;AAAA,EACpE,SAAS,OAAO;AACd,WAAO,YAAY,QAAQ,aAAa,KAAK,CAAC;AAAA,EAChD;AACF;AAEA,eAAe,cAAuC,MAS/B;AACrB,QAAM,SAAoB,CAAC;AAC3B,aAAW,YAAY,KAAK,WAAW;AACrC,QAAI;AACF,YAAM,SAAS,OAAO;AAAA,QACpB,WAAW,KAAK;AAAA,QAChB,MAAM,KAAK;AAAA,QACX,QAAQ,KAAK;AAAA,QACb,aAAa,KAAK;AAAA,QAClB,QAAQ,KAAK;AAAA,QACb,SAAS,KAAK;AAAA,MAChB,CAAC;AAAA,IACH,SAAS,OAAO;AACd,UAAI,KAAK,qBAAqB;AAC5B,cAAM;AAAA,MACR;AACA,aAAO,KAAK,KAAK;AAAA,IACnB;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,cAAc,SAIrB;AACA,MAAI,SAAS;AACb,MAAI,SAAS;AACb,MAAI,UAAU;AACd,aAAW,UAAU,SAAS;AAC5B,eAAW,UAAU,OAAO,SAAS;AACnC,UAAI,OAAO,QAAQ,YAAY,OAAQ,WAAU;AACjD,UAAI,OAAO,QAAQ,YAAY,OAAQ,WAAU;AACjD,UAAI,OAAO,QAAQ,YAAY,UAAW,YAAW;AAAA,IACvD;AAAA,EACF;AACA,SAAO,EAAE,QAAQ,QAAQ,QAAQ;AACnC;","names":[]}
@@ -1,11 +1,11 @@
1
- import { e as Agent } from '../agent-0UeJ9Rad.js';
1
+ import { e as Agent } from '../agent-B-ls5y_g.js';
2
2
  import { U as Usage, M as Message, C as CompletionModel, e as JsonValue, m as ToolChoice } from '../types-C54aNoCd.js';
3
3
  import { Z as ZodSchema } from '../zod-schema-C7F4clpm.js';
4
4
  import '../memory/index.js';
5
5
  import '../observability/index.js';
6
6
  import '../tool-C3ciF-VG.js';
7
- import '../middleware-BQ7fkEEe.js';
8
- import '../embeddings/index.js';
7
+ import '../middleware-CGiEIaBx.js';
8
+ import '../types-IB2e9u5M.js';
9
9
  import '../vector-store/index.js';
10
10
  import 'zod';
11
11
 
@@ -2,16 +2,17 @@ import {
2
2
  ExtractionError,
3
3
  Extractor,
4
4
  ExtractorBuilder
5
- } from "../chunk-PP4VIN3Y.js";
6
- import "../chunk-6U6PZ5MD.js";
7
- import "../chunk-TP32W7XT.js";
5
+ } from "../chunk-4PHDOEKY.js";
6
+ import "../chunk-BD66WU2B.js";
7
+ import "../chunk-TVIADATY.js";
8
8
  import "../chunk-XUUY2L2D.js";
9
9
  import "../chunk-XXT2UCAR.js";
10
10
  import "../chunk-YK4WAAS4.js";
11
- import "../chunk-I2HOMD3R.js";
12
- import "../chunk-65QV627O.js";
13
- import "../chunk-A7VDIZQN.js";
11
+ import "../chunk-CY7SB5FG.js";
12
+ import "../chunk-OIA4CN5V.js";
14
13
  import "../chunk-6GJDBBDC.js";
14
+ import "../chunk-EFGX3EX5.js";
15
+ import "../chunk-OIMLU4SF.js";
15
16
  export {
16
17
  ExtractionError,
17
18
  Extractor,
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  export { AgentBuilder, MaxTurnsError, PromptCancelledError } from './agent/index.js';
2
- export { A as AgentChildStreamEvent, a as AgentStreamEvent, P as PromptResponse, c as cancelPrompt, b as createHook, r as requestToolApproval, d as runControl, s as skipTool, t as toolCallControl } from './agent-0UeJ9Rad.js';
2
+ export { A as AgentChildStreamEvent, a as AgentStreamEvent, P as PromptResponse, c as cancelPrompt, b as createHook, r as requestToolApproval, d as runControl, s as skipTool, t as toolCallControl } from './agent-B-ls5y_g.js';
3
3
  export { A as AssistantContent, a as AssistantMessage, C as CompletionModel, b as CompletionRequest, c as CompletionResponse, D as Document, I as ImageContent, J as JsonObject, d as JsonPrimitive, e as JsonValue, M as Message, S as SystemMessage, T as Text, f as ToolCall, g as ToolDefinition, h as ToolMessage, i as ToolResult, j as ToolResultContent, U as Usage, k as UserContent, l as UserMessage } from './types-C54aNoCd.js';
4
4
  export { MemoryStore } from './memory/index.js';
5
5
  export { Z as ZodSchema } from './zod-schema-C7F4clpm.js';
@@ -12,7 +12,7 @@ import '@modelcontextprotocol/sdk/client/sse.js';
12
12
  import '@modelcontextprotocol/sdk/client/stdio.js';
13
13
  import '@modelcontextprotocol/sdk/client/streamableHttp.js';
14
14
  import './observability/index.js';
15
- import './middleware-BQ7fkEEe.js';
16
- import './embeddings/index.js';
15
+ import './middleware-CGiEIaBx.js';
16
+ import './types-IB2e9u5M.js';
17
17
  import './vector-store/index.js';
18
18
  import 'zod';
package/dist/index.js CHANGED
@@ -2,10 +2,10 @@ import {
2
2
  SkillValidationError,
3
3
  loadSkills,
4
4
  skill
5
- } from "./chunk-TILE6Z2N.js";
5
+ } from "./chunk-L6JCKFAX.js";
6
6
  import {
7
7
  AgentBuilder
8
- } from "./chunk-6U6PZ5MD.js";
8
+ } from "./chunk-BD66WU2B.js";
9
9
  import {
10
10
  MaxTurnsError,
11
11
  PromptCancelledError,
@@ -15,23 +15,24 @@ import {
15
15
  runControl,
16
16
  skipTool,
17
17
  toolCallControl
18
- } from "./chunk-TP32W7XT.js";
18
+ } from "./chunk-TVIADATY.js";
19
19
  import "./chunk-XUUY2L2D.js";
20
20
  import "./chunk-XXT2UCAR.js";
21
21
  import "./chunk-YK4WAAS4.js";
22
22
  import {
23
23
  createThinkTool
24
- } from "./chunk-I2HOMD3R.js";
24
+ } from "./chunk-CY7SB5FG.js";
25
25
  import {
26
26
  createTool
27
- } from "./chunk-65QV627O.js";
28
- import "./chunk-A7VDIZQN.js";
27
+ } from "./chunk-OIA4CN5V.js";
29
28
  import {
30
29
  AssistantContent,
31
30
  Message,
32
31
  Usage,
33
32
  UserContent
34
33
  } from "./chunk-6GJDBBDC.js";
34
+ import "./chunk-EFGX3EX5.js";
35
+ import "./chunk-OIMLU4SF.js";
35
36
  export {
36
37
  AgentBuilder,
37
38
  AssistantContent,
@@ -1,8 +1,8 @@
1
- export { e as Agent, j as AgentEventAppendInput, k as AgentEventRecord, h as AgentEventStore, l as AgentEventStoreInclude, i as AgentEventStoreOptions, x as AgentEventStoreRegistration, y as AgentOptions, z as AgentSession, B as AgentToolOptions, E as DEFAULT_MAX_TURNS, D as DynamicContextOptions, F as DynamicContextRegistration, f as DynamicToolOptions, G as DynamicToolRegistration } from '../agent-0UeJ9Rad.js';
1
+ export { e as Agent, j as AgentEventAppendInput, k as AgentEventRecord, h as AgentEventStore, l as AgentEventStoreInclude, i as AgentEventStoreOptions, x as AgentEventStoreRegistration, y as AgentOptions, z as AgentSession, B as AgentToolOptions, E as DEFAULT_MAX_TURNS, D as DynamicContextOptions, F as DynamicContextRegistration, f as DynamicToolOptions, G as DynamicToolRegistration } from '../agent-B-ls5y_g.js';
2
2
  import '../types-C54aNoCd.js';
3
3
  import '../memory/index.js';
4
4
  import '../observability/index.js';
5
5
  import '../tool-C3ciF-VG.js';
6
- import '../middleware-BQ7fkEEe.js';
7
- import '../embeddings/index.js';
6
+ import '../middleware-CGiEIaBx.js';
7
+ import '../types-IB2e9u5M.js';
8
8
  import '../vector-store/index.js';
@@ -2,13 +2,14 @@ import {
2
2
  Agent,
3
3
  AgentSession,
4
4
  DEFAULT_MAX_TURNS
5
- } from "../chunk-TP32W7XT.js";
5
+ } from "../chunk-TVIADATY.js";
6
6
  import "../chunk-XUUY2L2D.js";
7
7
  import "../chunk-YK4WAAS4.js";
8
- import "../chunk-I2HOMD3R.js";
9
- import "../chunk-65QV627O.js";
10
- import "../chunk-A7VDIZQN.js";
8
+ import "../chunk-CY7SB5FG.js";
9
+ import "../chunk-OIA4CN5V.js";
11
10
  import "../chunk-6GJDBBDC.js";
11
+ import "../chunk-EFGX3EX5.js";
12
+ import "../chunk-OIMLU4SF.js";
12
13
  export {
13
14
  Agent,
14
15
  AgentSession,
@@ -1,5 +1,5 @@
1
1
  import { g as ToolDefinition, j as ToolResultContent } from './types-C54aNoCd.js';
2
- import { VectorMetadata, EmbeddingModel, EmbeddedDocument } from './embeddings/index.js';
2
+ import { V as VectorMetadata, E as EmbeddingModel, b as EmbeddedDocument } from './types-IB2e9u5M.js';
3
3
  import { VectorSearchIndex } from './vector-store/index.js';
4
4
  import { A as AnyTool, c as ToolCallContext, N as NormalizedToolOutput } from './tool-C3ciF-VG.js';
5
5
 
@@ -1,11 +1,11 @@
1
- import { e as Agent } from '../agent-0UeJ9Rad.js';
1
+ import { e as Agent } from '../agent-B-ls5y_g.js';
2
2
  import { J as JsonObject, C as CompletionModel } from '../types-C54aNoCd.js';
3
3
  import { Extractor } from '../extractor/index.js';
4
4
  import '../memory/index.js';
5
5
  import '../observability/index.js';
6
6
  import '../tool-C3ciF-VG.js';
7
- import '../middleware-BQ7fkEEe.js';
8
- import '../embeddings/index.js';
7
+ import '../middleware-CGiEIaBx.js';
8
+ import '../types-IB2e9u5M.js';
9
9
  import '../vector-store/index.js';
10
10
  import '../zod-schema-C7F4clpm.js';
11
11
  import 'zod';
@@ -87,6 +87,7 @@ type PipelineBuilderState = {
87
87
  nextNodeIndex: number;
88
88
  nextEdgeIndex: number;
89
89
  };
90
+
90
91
  /** Runnable pipeline returned by `PipelineBuilder.build()`. */
91
92
  declare class Pipeline<Input, Output> implements PipelineOp<Input, Awaited<Output>> {
92
93
  private readonly executor;
@@ -102,6 +103,7 @@ declare class Pipeline<Input, Output> implements PipelineOp<Input, Awaited<Outpu
102
103
  batch<I extends Iterable<Input>>(inputs: I, options: PipelineBatchOptions): Promise<Array<Awaited<Output>>>;
103
104
  graph(): PipelineGraph;
104
105
  }
106
+
105
107
  /** Builds a typed pipeline from an original input type to an inferred output type. */
106
108
  declare class PipelineBuilder<Input, Output = Input> {
107
109
  private readonly executor;
@@ -1,4 +1,134 @@
1
- // src/pipeline/index.ts
1
+ import {
2
+ mapWithConcurrency
3
+ } from "../chunk-OIMLU4SF.js";
4
+
5
+ // src/pipeline/graph.ts
6
+ function initialBuilderState(metadata) {
7
+ return {
8
+ graph: initialGraph(metadata),
9
+ terminalNodeId: "input",
10
+ terminalNodeIds: ["input"],
11
+ nextNodeIndex: 1,
12
+ nextEdgeIndex: 1
13
+ };
14
+ }
15
+ function initialGraph(metadata) {
16
+ const id = normalizeId(metadata.id ?? "pipeline");
17
+ return {
18
+ id,
19
+ ...metadata.name === void 0 ? {} : { name: metadata.name },
20
+ ...metadata.description === void 0 ? {} : { description: metadata.description },
21
+ ...metadata.metadata === void 0 ? {} : { metadata: metadata.metadata },
22
+ nodes: [{ id: "input", kind: "input", label: "Input" }],
23
+ edges: []
24
+ };
25
+ }
26
+ function appendNode(state, kind, label, options = {}) {
27
+ const node = graphNode(kind, label, state.nextNodeIndex, {
28
+ ...options,
29
+ existingIds: new Set(state.graph.nodes.map((item) => item.id))
30
+ });
31
+ return {
32
+ node,
33
+ state: appendGraphNode(state, node, activeTerminalNodeIds(state), [node.id])
34
+ };
35
+ }
36
+ function appendChildNode(state, parentId, kind, label, options = {}) {
37
+ const node = graphNode(kind, label, state.nextNodeIndex, {
38
+ ...options,
39
+ existingIds: new Set(state.graph.nodes.map((item) => item.id))
40
+ });
41
+ return {
42
+ node,
43
+ state: appendGraphNode(state, node, [parentId], activeTerminalNodeIds(state))
44
+ };
45
+ }
46
+ function activeTerminalNodeIds(state) {
47
+ return state.terminalNodeIds.length > 0 ? state.terminalNodeIds : [state.terminalNodeId];
48
+ }
49
+ function withTerminalNodes(state, terminalNodeIds) {
50
+ return {
51
+ ...state,
52
+ terminalNodeId: terminalNodeIds.at(-1) ?? state.terminalNodeId,
53
+ terminalNodeIds
54
+ };
55
+ }
56
+ function withOutputNode(state) {
57
+ const graph = cloneGraph(state.graph);
58
+ if (graph.nodes.some((node) => node.id === "output")) {
59
+ return graph;
60
+ }
61
+ graph.nodes.push({ id: "output", kind: "output", label: "Output" });
62
+ graph.edges.push(
63
+ ...activeTerminalNodeIds(state).map((sourceId, index) => ({
64
+ id: `edge_${state.nextEdgeIndex + index}`,
65
+ source: sourceId,
66
+ target: "output"
67
+ }))
68
+ );
69
+ return graph;
70
+ }
71
+ function nextStageLabel(state, prefix) {
72
+ return `${prefix} ${state.nextNodeIndex}`;
73
+ }
74
+ function cloneGraph(graph) {
75
+ return {
76
+ ...graph,
77
+ nodes: graph.nodes.map((node) => ({ ...node })),
78
+ edges: graph.edges.map((edge) => ({ ...edge }))
79
+ };
80
+ }
81
+ function appendGraphNode(state, node, sourceIds, terminalNodeIds) {
82
+ const edges = sourceIds.map((sourceId, index) => ({
83
+ id: `edge_${state.nextEdgeIndex + index}`,
84
+ source: sourceId,
85
+ target: node.id
86
+ }));
87
+ const terminalNodeId = terminalNodeIds.at(-1) ?? state.terminalNodeId;
88
+ return {
89
+ graph: {
90
+ ...state.graph,
91
+ nodes: [...state.graph.nodes, node],
92
+ edges: [...state.graph.edges, ...edges]
93
+ },
94
+ terminalNodeId,
95
+ terminalNodeIds,
96
+ nextNodeIndex: state.nextNodeIndex + 1,
97
+ nextEdgeIndex: state.nextEdgeIndex + edges.length
98
+ };
99
+ }
100
+ function graphNode(kind, label, index, options = {}) {
101
+ const id = uniqueGraphNodeId(
102
+ normalizeId(options.preferredId ?? `${kind}_${index}`),
103
+ options.existingIds ?? /* @__PURE__ */ new Set()
104
+ );
105
+ return {
106
+ id,
107
+ kind,
108
+ label,
109
+ ...options.description === void 0 ? {} : { description: options.description },
110
+ ...options.metadata === void 0 ? {} : { metadata: options.metadata },
111
+ ...options.agentId === void 0 ? {} : { agentId: options.agentId },
112
+ ...options.agentName === void 0 ? {} : { agentName: options.agentName },
113
+ ...options.pipelineId === void 0 ? {} : { pipelineId: options.pipelineId },
114
+ ...options.branchKey === void 0 ? {} : { branchKey: options.branchKey }
115
+ };
116
+ }
117
+ function normalizeId(value) {
118
+ const normalized = value.trim().toLowerCase().replace(/[^a-z0-9_-]+/g, "_").replace(/^_+|_+$/g, "");
119
+ return normalized.length === 0 ? "pipeline" : normalized;
120
+ }
121
+ function uniqueGraphNodeId(baseId, existingIds) {
122
+ let id = baseId;
123
+ let suffix = 2;
124
+ while (existingIds.has(id)) {
125
+ id = `${baseId}_${suffix}`;
126
+ suffix += 1;
127
+ }
128
+ return id;
129
+ }
130
+
131
+ // src/pipeline/pipeline.ts
2
132
  var Pipeline = class {
3
133
  constructor(executor, pipelineGraph = initialGraph({})) {
4
134
  this.executor = executor;
@@ -26,6 +156,31 @@ var Pipeline = class {
26
156
  return cloneGraph(this.pipelineGraph);
27
157
  }
28
158
  };
159
+
160
+ // src/pipeline/runtime.ts
161
+ async function runNode(context, node, fn) {
162
+ const startedAt = Date.now();
163
+ await context.observer?.onEvent({ type: "stage_started", node });
164
+ try {
165
+ const output = await fn();
166
+ await context.observer?.onEvent({
167
+ type: "stage_completed",
168
+ node,
169
+ durationMs: Date.now() - startedAt
170
+ });
171
+ return output;
172
+ } catch (error) {
173
+ await context.observer?.onEvent({
174
+ type: "stage_failed",
175
+ node,
176
+ durationMs: Date.now() - startedAt,
177
+ error
178
+ });
179
+ throw error;
180
+ }
181
+ }
182
+
183
+ // src/pipeline/builder.ts
29
184
  var PipelineBuilder = class _PipelineBuilder {
30
185
  executor;
31
186
  state;
@@ -175,168 +330,9 @@ var PipelineBuilder = class _PipelineBuilder {
175
330
  return await this.executor(input, context);
176
331
  }
177
332
  };
178
- function initialBuilderState(metadata) {
179
- return {
180
- graph: initialGraph(metadata),
181
- terminalNodeId: "input",
182
- terminalNodeIds: ["input"],
183
- nextNodeIndex: 1,
184
- nextEdgeIndex: 1
185
- };
186
- }
187
- function initialGraph(metadata) {
188
- const id = normalizeId(metadata.id ?? "pipeline");
189
- return {
190
- id,
191
- ...metadata.name === void 0 ? {} : { name: metadata.name },
192
- ...metadata.description === void 0 ? {} : { description: metadata.description },
193
- ...metadata.metadata === void 0 ? {} : { metadata: metadata.metadata },
194
- nodes: [{ id: "input", kind: "input", label: "Input" }],
195
- edges: []
196
- };
197
- }
198
- function appendNode(state, kind, label, options = {}) {
199
- const node = graphNode(kind, label, state.nextNodeIndex, {
200
- ...options,
201
- existingIds: new Set(state.graph.nodes.map((item) => item.id))
202
- });
203
- return {
204
- node,
205
- state: appendGraphNode(state, node, activeTerminalNodeIds(state), [node.id])
206
- };
207
- }
208
- function appendChildNode(state, parentId, kind, label, options = {}) {
209
- const node = graphNode(kind, label, state.nextNodeIndex, {
210
- ...options,
211
- existingIds: new Set(state.graph.nodes.map((item) => item.id))
212
- });
213
- return {
214
- node,
215
- state: appendGraphNode(state, node, [parentId], activeTerminalNodeIds(state))
216
- };
217
- }
218
- function appendGraphNode(state, node, sourceIds, terminalNodeIds) {
219
- const edges = sourceIds.map((sourceId, index) => ({
220
- id: `edge_${state.nextEdgeIndex + index}`,
221
- source: sourceId,
222
- target: node.id
223
- }));
224
- const terminalNodeId = terminalNodeIds.at(-1) ?? state.terminalNodeId;
225
- return {
226
- graph: {
227
- ...state.graph,
228
- nodes: [...state.graph.nodes, node],
229
- edges: [...state.graph.edges, ...edges]
230
- },
231
- terminalNodeId,
232
- terminalNodeIds,
233
- nextNodeIndex: state.nextNodeIndex + 1,
234
- nextEdgeIndex: state.nextEdgeIndex + edges.length
235
- };
236
- }
237
- function activeTerminalNodeIds(state) {
238
- return state.terminalNodeIds.length > 0 ? state.terminalNodeIds : [state.terminalNodeId];
239
- }
240
- function withTerminalNodes(state, terminalNodeIds) {
241
- return {
242
- ...state,
243
- terminalNodeId: terminalNodeIds.at(-1) ?? state.terminalNodeId,
244
- terminalNodeIds
245
- };
246
- }
247
- function graphNode(kind, label, index, options = {}) {
248
- const id = uniqueGraphNodeId(
249
- normalizeId(options.preferredId ?? `${kind}_${index}`),
250
- options.existingIds ?? /* @__PURE__ */ new Set()
251
- );
252
- return {
253
- id,
254
- kind,
255
- label,
256
- ...options.description === void 0 ? {} : { description: options.description },
257
- ...options.metadata === void 0 ? {} : { metadata: options.metadata },
258
- ...options.agentId === void 0 ? {} : { agentId: options.agentId },
259
- ...options.agentName === void 0 ? {} : { agentName: options.agentName },
260
- ...options.pipelineId === void 0 ? {} : { pipelineId: options.pipelineId },
261
- ...options.branchKey === void 0 ? {} : { branchKey: options.branchKey }
262
- };
263
- }
264
- function withOutputNode(state) {
265
- const graph = cloneGraph(state.graph);
266
- if (graph.nodes.some((node) => node.id === "output")) {
267
- return graph;
268
- }
269
- graph.nodes.push({ id: "output", kind: "output", label: "Output" });
270
- graph.edges.push(
271
- ...activeTerminalNodeIds(state).map((sourceId, index) => ({
272
- id: `edge_${state.nextEdgeIndex + index}`,
273
- source: sourceId,
274
- target: "output"
275
- }))
276
- );
277
- return graph;
278
- }
279
- async function runNode(context, node, fn) {
280
- const startedAt = Date.now();
281
- await context.observer?.onEvent({ type: "stage_started", node });
282
- try {
283
- const output = await fn();
284
- await context.observer?.onEvent({
285
- type: "stage_completed",
286
- node,
287
- durationMs: Date.now() - startedAt
288
- });
289
- return output;
290
- } catch (error) {
291
- await context.observer?.onEvent({
292
- type: "stage_failed",
293
- node,
294
- durationMs: Date.now() - startedAt,
295
- error
296
- });
297
- throw error;
298
- }
299
- }
300
- function nextStageLabel(state, prefix) {
301
- return `${prefix} ${state.nextNodeIndex}`;
302
- }
303
- function cloneGraph(graph) {
304
- return {
305
- ...graph,
306
- nodes: graph.nodes.map((node) => ({ ...node })),
307
- edges: graph.edges.map((edge) => ({ ...edge }))
308
- };
309
- }
310
- function normalizeId(value) {
311
- const normalized = value.trim().toLowerCase().replace(/[^a-z0-9_-]+/g, "_").replace(/^_+|_+$/g, "");
312
- return normalized.length === 0 ? "pipeline" : normalized;
313
- }
314
- function uniqueGraphNodeId(baseId, existingIds) {
315
- let id = baseId;
316
- let suffix = 2;
317
- while (existingIds.has(id)) {
318
- id = `${baseId}_${suffix}`;
319
- suffix += 1;
320
- }
321
- return id;
322
- }
323
333
  function identity(input) {
324
334
  return input;
325
335
  }
326
- async function mapWithConcurrency(inputs, concurrency, fn) {
327
- const limit = Math.max(1, Math.trunc(concurrency));
328
- const results = new Array(inputs.length);
329
- let nextIndex = 0;
330
- async function worker() {
331
- while (nextIndex < inputs.length) {
332
- const index = nextIndex;
333
- nextIndex += 1;
334
- results[index] = await fn(inputs[index]);
335
- }
336
- }
337
- await Promise.all(Array.from({ length: Math.min(limit, inputs.length) }, () => worker()));
338
- return results;
339
- }
340
336
  export {
341
337
  Pipeline,
342
338
  PipelineBuilder