@iqai/adk 0.1.22 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/dist/index.d.mts +537 -346
- package/dist/index.d.ts +537 -346
- package/dist/index.js +1554 -206
- package/dist/index.mjs +1461 -113
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { newObj[key] = obj[key]; } } } newObj.default = obj; return newObj; } } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _nullishCoalesce(lhs, rhsFn) { if (lhs != null) { return lhs; } else { return rhsFn(); } } function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; } var _class; var _class2; var _class3; var _class4; var _class5; var _class6; var _class7; var _class8; var _class9; var _class10; var _class11; var _class12; var _class13; var _class14; var _class15; var _class16; var _class17; var _class18; var _class19; var _class20; var _class21; var _class22; var _class23; var _class24; var _class25; var _class26; var _class27; var _class28; var _class29; var _class30; var _class31; var _class32; var _class33; var _class34;var __defProp = Object.defineProperty;
|
|
1
|
+
"use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _interopRequireWildcard(obj) { if (obj && obj.__esModule) { return obj; } else { var newObj = {}; if (obj != null) { for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) { newObj[key] = obj[key]; } } } newObj.default = obj; return newObj; } } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _nullishCoalesce(lhs, rhsFn) { if (lhs != null) { return lhs; } else { return rhsFn(); } } function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; } var _class; var _class2; var _class3; var _class4; var _class5; var _class6; var _class7; var _class8; var _class9; var _class10; var _class11; var _class12; var _class13; var _class14; var _class15; var _class16; var _class17; var _class18; var _class19; var _class20; var _class21; var _class22; var _class23; var _class24; var _class25; var _class26; var _class27; var _class28; var _class29; var _class30; var _class31; var _class32; var _class33; var _class34; var _class35;var __defProp = Object.defineProperty;
|
|
2
2
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
3
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
4
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
@@ -833,70 +833,23 @@ ${instructions.join("\n\n")}`;
|
|
|
833
833
|
|
|
834
834
|
// src/models/llm-response.ts
|
|
835
835
|
var LlmResponse = class _LlmResponse {
|
|
836
|
-
/**
|
|
837
|
-
* Unique identifier for the response.
|
|
838
|
-
*/
|
|
839
836
|
|
|
840
|
-
/**
|
|
841
|
-
* The content generated by the model.
|
|
842
|
-
*/
|
|
843
837
|
|
|
844
|
-
/**
|
|
845
|
-
* The grounding metadata of the response.
|
|
846
|
-
*/
|
|
847
838
|
|
|
848
|
-
/**
|
|
849
|
-
* Indicates whether the text content is part of an unfinished text stream.
|
|
850
|
-
*/
|
|
851
839
|
|
|
852
|
-
/**
|
|
853
|
-
* Indicates whether the response from the model is complete.
|
|
854
|
-
*/
|
|
855
840
|
|
|
856
|
-
/**
|
|
857
|
-
* Error code if the response is an error.
|
|
858
|
-
*/
|
|
859
841
|
|
|
860
|
-
/**
|
|
861
|
-
* Error message if the response is an error.
|
|
862
|
-
*/
|
|
863
842
|
|
|
864
|
-
/**
|
|
865
|
-
* Flag indicating that LLM was interrupted when generating the content.
|
|
866
|
-
*/
|
|
867
843
|
|
|
868
|
-
/**
|
|
869
|
-
* The custom metadata of the LlmResponse.
|
|
870
|
-
*/
|
|
871
844
|
|
|
872
|
-
/**
|
|
873
|
-
* The usage metadata of the LlmResponse.
|
|
874
|
-
*/
|
|
875
845
|
|
|
876
|
-
/**
|
|
877
|
-
* Index of the candidate response.
|
|
878
|
-
*/
|
|
879
846
|
|
|
880
|
-
/**
|
|
881
|
-
* Reason why the model finished generating.
|
|
882
|
-
*/
|
|
883
847
|
|
|
884
|
-
/**
|
|
885
|
-
* Error object if the response is an error.
|
|
886
|
-
*/
|
|
887
848
|
|
|
888
|
-
|
|
889
|
-
* Creates a new LlmResponse.
|
|
890
|
-
*/
|
|
849
|
+
|
|
891
850
|
constructor(data = {}) {
|
|
892
851
|
Object.assign(this, data);
|
|
893
852
|
}
|
|
894
|
-
/**
|
|
895
|
-
* Creates an LlmResponse from a GenerateContentResponse.
|
|
896
|
-
*
|
|
897
|
-
* @param generateContentResponse The GenerateContentResponse to create the LlmResponse from.
|
|
898
|
-
* @returns The LlmResponse.
|
|
899
|
-
*/
|
|
900
853
|
static create(generateContentResponse) {
|
|
901
854
|
const usageMetadata = generateContentResponse.usageMetadata;
|
|
902
855
|
if (generateContentResponse.candidates && generateContentResponse.candidates.length > 0) {
|
|
@@ -928,15 +881,6 @@ var LlmResponse = class _LlmResponse {
|
|
|
928
881
|
usageMetadata
|
|
929
882
|
});
|
|
930
883
|
}
|
|
931
|
-
/**
|
|
932
|
-
* Creates an LlmResponse from an error.
|
|
933
|
-
*
|
|
934
|
-
* @param error The error object or message.
|
|
935
|
-
* @param options Additional options for the error response.
|
|
936
|
-
* @param options.errorCode A specific error code for the response.
|
|
937
|
-
* @param options.model The model that was being used when the error occurred.
|
|
938
|
-
* @returns The LlmResponse.
|
|
939
|
-
*/
|
|
940
884
|
static fromError(error, options = {}) {
|
|
941
885
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
942
886
|
const errorCode = options.errorCode || "UNKNOWN_ERROR";
|
|
@@ -2675,30 +2619,16 @@ var OpenAiLlm = class extends BaseLlm {
|
|
|
2675
2619
|
// src/models/llm-registry.ts
|
|
2676
2620
|
init_logger();
|
|
2677
2621
|
var LLMRegistry = (_class8 = class _LLMRegistry {
|
|
2678
|
-
/**
|
|
2679
|
-
* Map of model name regex to LLM class
|
|
2680
|
-
*/
|
|
2681
2622
|
static __initStatic() {this.llmRegistry = /* @__PURE__ */ new Map()}
|
|
2682
|
-
static __initStatic2() {this.
|
|
2683
|
-
|
|
2684
|
-
* Creates a new LLM instance
|
|
2685
|
-
*
|
|
2686
|
-
* @param model The model name
|
|
2687
|
-
* @returns The LLM instance
|
|
2688
|
-
*/
|
|
2623
|
+
static __initStatic2() {this.modelInstances = /* @__PURE__ */ new Map()}
|
|
2624
|
+
static __initStatic3() {this.logger = new Logger({ name: "LLMRegistry" })}
|
|
2689
2625
|
static newLLM(model) {
|
|
2690
2626
|
const llmClass = _LLMRegistry.resolve(model);
|
|
2691
2627
|
if (!llmClass) {
|
|
2692
|
-
throw new Error(`No LLM found for model: ${model}`);
|
|
2628
|
+
throw new Error(`No LLM class found for model: ${model}`);
|
|
2693
2629
|
}
|
|
2694
2630
|
return new llmClass(model);
|
|
2695
2631
|
}
|
|
2696
|
-
/**
|
|
2697
|
-
* Resolves the LLM class from the model name
|
|
2698
|
-
*
|
|
2699
|
-
* @param model The model name
|
|
2700
|
-
* @returns The LLM class
|
|
2701
|
-
*/
|
|
2702
2632
|
static resolve(model) {
|
|
2703
2633
|
for (const [regex, llmClass] of _LLMRegistry.llmRegistry.entries()) {
|
|
2704
2634
|
if (regex.test(model)) {
|
|
@@ -2707,36 +2637,56 @@ var LLMRegistry = (_class8 = class _LLMRegistry {
|
|
|
2707
2637
|
}
|
|
2708
2638
|
return null;
|
|
2709
2639
|
}
|
|
2710
|
-
/**
|
|
2711
|
-
* Registers a new LLM class
|
|
2712
|
-
*
|
|
2713
|
-
* @param modelNameRegex The regex to match model names
|
|
2714
|
-
* @param llmClass The LLM class
|
|
2715
|
-
*/
|
|
2716
2640
|
static register(modelNameRegex, llmClass) {
|
|
2717
2641
|
_LLMRegistry.llmRegistry.set(new RegExp(modelNameRegex), llmClass);
|
|
2718
2642
|
}
|
|
2719
|
-
/**
|
|
2720
|
-
* Registers all model patterns from an LLM class
|
|
2721
|
-
*
|
|
2722
|
-
* @param llmClass The LLM class
|
|
2723
|
-
*/
|
|
2724
2643
|
static registerLLM(llmClass) {
|
|
2725
2644
|
const modelPatterns = llmClass.supportedModels();
|
|
2726
2645
|
for (const pattern of modelPatterns) {
|
|
2727
2646
|
_LLMRegistry.register(pattern, llmClass);
|
|
2728
2647
|
}
|
|
2729
2648
|
}
|
|
2730
|
-
|
|
2731
|
-
|
|
2732
|
-
|
|
2649
|
+
static registerModel(name, model) {
|
|
2650
|
+
_LLMRegistry.modelInstances.set(name, model);
|
|
2651
|
+
}
|
|
2652
|
+
static getModel(name) {
|
|
2653
|
+
const model = _LLMRegistry.modelInstances.get(name);
|
|
2654
|
+
if (!model) {
|
|
2655
|
+
throw new Error(`Model '${name}' not found in registry`);
|
|
2656
|
+
}
|
|
2657
|
+
return model;
|
|
2658
|
+
}
|
|
2659
|
+
static hasModel(name) {
|
|
2660
|
+
return _LLMRegistry.modelInstances.has(name);
|
|
2661
|
+
}
|
|
2662
|
+
static unregisterModel(name) {
|
|
2663
|
+
_LLMRegistry.modelInstances.delete(name);
|
|
2664
|
+
}
|
|
2665
|
+
static getModelOrCreate(name) {
|
|
2666
|
+
if (_LLMRegistry.hasModel(name)) {
|
|
2667
|
+
return _LLMRegistry.getModel(name);
|
|
2668
|
+
}
|
|
2669
|
+
return _LLMRegistry.newLLM(name);
|
|
2670
|
+
}
|
|
2671
|
+
static clear() {
|
|
2672
|
+
_LLMRegistry.llmRegistry.clear();
|
|
2673
|
+
_LLMRegistry.modelInstances.clear();
|
|
2674
|
+
}
|
|
2675
|
+
static clearModels() {
|
|
2676
|
+
_LLMRegistry.modelInstances.clear();
|
|
2677
|
+
}
|
|
2678
|
+
static clearClasses() {
|
|
2679
|
+
_LLMRegistry.llmRegistry.clear();
|
|
2680
|
+
}
|
|
2733
2681
|
static logRegisteredModels() {
|
|
2734
|
-
_LLMRegistry.
|
|
2735
|
-
|
|
2736
|
-
[..._LLMRegistry.llmRegistry.entries()].map(([regex]) => regex.toString())
|
|
2682
|
+
const classPatterns = [..._LLMRegistry.llmRegistry.entries()].map(
|
|
2683
|
+
([regex]) => regex.toString()
|
|
2737
2684
|
);
|
|
2685
|
+
const instanceNames = [..._LLMRegistry.modelInstances.keys()];
|
|
2686
|
+
_LLMRegistry.logger.debug("Registered LLM class patterns:", classPatterns);
|
|
2687
|
+
_LLMRegistry.logger.debug("Registered LLM instances:", instanceNames);
|
|
2738
2688
|
}
|
|
2739
|
-
}, _class8.__initStatic(), _class8.__initStatic2(), _class8);
|
|
2689
|
+
}, _class8.__initStatic(), _class8.__initStatic2(), _class8.__initStatic3(), _class8);
|
|
2740
2690
|
|
|
2741
2691
|
// src/models/registry.ts
|
|
2742
2692
|
function registerProviders() {
|
|
@@ -3111,9 +3061,9 @@ var OpenIdConnectScheme = class extends AuthScheme {
|
|
|
3111
3061
|
|
|
3112
3062
|
// src/sessions/state.ts
|
|
3113
3063
|
var State = (_class9 = class _State {
|
|
3114
|
-
static
|
|
3115
|
-
static
|
|
3116
|
-
static
|
|
3064
|
+
static __initStatic4() {this.APP_PREFIX = "app:"}
|
|
3065
|
+
static __initStatic5() {this.USER_PREFIX = "user:"}
|
|
3066
|
+
static __initStatic6() {this.TEMP_PREFIX = "temp:"}
|
|
3117
3067
|
|
|
3118
3068
|
|
|
3119
3069
|
/**
|
|
@@ -3207,7 +3157,7 @@ var State = (_class9 = class _State {
|
|
|
3207
3157
|
const state = new _State(value, delta);
|
|
3208
3158
|
return _State.createProxy(state);
|
|
3209
3159
|
}
|
|
3210
|
-
}, _class9.
|
|
3160
|
+
}, _class9.__initStatic4(), _class9.__initStatic5(), _class9.__initStatic6(), _class9);
|
|
3211
3161
|
|
|
3212
3162
|
// src/events/event.ts
|
|
3213
3163
|
var _uuid = require('uuid');
|
|
@@ -4545,8 +4495,8 @@ var HttpRequestTool = class extends BaseTool {
|
|
|
4545
4495
|
|
|
4546
4496
|
// src/tools/common/file-operations-tool.ts
|
|
4547
4497
|
init_base_tool();
|
|
4548
|
-
var _promises = require('fs/promises'); var
|
|
4549
|
-
var _path = require('path'); var
|
|
4498
|
+
var _promises = require('fs/promises'); var fs2 = _interopRequireWildcard(_promises);
|
|
4499
|
+
var _path = require('path'); var path2 = _interopRequireWildcard(_path);
|
|
4550
4500
|
|
|
4551
4501
|
var FileOperationsTool = class extends BaseTool {
|
|
4552
4502
|
|
|
@@ -4643,14 +4593,14 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4643
4593
|
* Resolve a file path relative to the base path
|
|
4644
4594
|
*/
|
|
4645
4595
|
resolvePath(filepath) {
|
|
4646
|
-
return
|
|
4596
|
+
return path2.default.isAbsolute(filepath) ? filepath : path2.default.resolve(this.basePath, filepath);
|
|
4647
4597
|
}
|
|
4648
4598
|
/**
|
|
4649
4599
|
* Validate that a path is within the base path for security
|
|
4650
4600
|
*/
|
|
4651
4601
|
validatePath(filepath) {
|
|
4652
|
-
const normalizedPath =
|
|
4653
|
-
const normalizedBasePath =
|
|
4602
|
+
const normalizedPath = path2.default.normalize(filepath);
|
|
4603
|
+
const normalizedBasePath = path2.default.normalize(this.basePath);
|
|
4654
4604
|
if (!normalizedPath.startsWith(normalizedBasePath)) {
|
|
4655
4605
|
throw new Error(
|
|
4656
4606
|
`Access denied: Can't access paths outside the base directory`
|
|
@@ -4662,7 +4612,7 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4662
4612
|
*/
|
|
4663
4613
|
async readFile(filepath, encoding) {
|
|
4664
4614
|
try {
|
|
4665
|
-
const content = await
|
|
4615
|
+
const content = await fs2.default.readFile(filepath, { encoding });
|
|
4666
4616
|
return {
|
|
4667
4617
|
success: true,
|
|
4668
4618
|
data: content
|
|
@@ -4679,9 +4629,9 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4679
4629
|
*/
|
|
4680
4630
|
async writeFile(filepath, content, encoding) {
|
|
4681
4631
|
try {
|
|
4682
|
-
const dir =
|
|
4683
|
-
await
|
|
4684
|
-
await
|
|
4632
|
+
const dir = path2.default.dirname(filepath);
|
|
4633
|
+
await fs2.default.mkdir(dir, { recursive: true });
|
|
4634
|
+
await fs2.default.writeFile(filepath, content, { encoding });
|
|
4685
4635
|
return {
|
|
4686
4636
|
success: true
|
|
4687
4637
|
};
|
|
@@ -4697,9 +4647,9 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4697
4647
|
*/
|
|
4698
4648
|
async appendFile(filepath, content, encoding) {
|
|
4699
4649
|
try {
|
|
4700
|
-
const dir =
|
|
4701
|
-
await
|
|
4702
|
-
await
|
|
4650
|
+
const dir = path2.default.dirname(filepath);
|
|
4651
|
+
await fs2.default.mkdir(dir, { recursive: true });
|
|
4652
|
+
await fs2.default.appendFile(filepath, content, { encoding });
|
|
4703
4653
|
return {
|
|
4704
4654
|
success: true
|
|
4705
4655
|
};
|
|
@@ -4715,7 +4665,7 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4715
4665
|
*/
|
|
4716
4666
|
async deleteFile(filepath) {
|
|
4717
4667
|
try {
|
|
4718
|
-
await
|
|
4668
|
+
await fs2.default.unlink(filepath);
|
|
4719
4669
|
return {
|
|
4720
4670
|
success: true
|
|
4721
4671
|
};
|
|
@@ -4731,7 +4681,7 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4731
4681
|
*/
|
|
4732
4682
|
async fileExists(filepath) {
|
|
4733
4683
|
try {
|
|
4734
|
-
await
|
|
4684
|
+
await fs2.default.access(filepath);
|
|
4735
4685
|
return {
|
|
4736
4686
|
success: true,
|
|
4737
4687
|
data: true
|
|
@@ -4748,11 +4698,11 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4748
4698
|
*/
|
|
4749
4699
|
async listDirectory(dirpath) {
|
|
4750
4700
|
try {
|
|
4751
|
-
const entries = await
|
|
4701
|
+
const entries = await fs2.default.readdir(dirpath, { withFileTypes: true });
|
|
4752
4702
|
const results = await Promise.all(
|
|
4753
4703
|
entries.map(async (entry) => {
|
|
4754
|
-
const entryPath =
|
|
4755
|
-
const stats = await
|
|
4704
|
+
const entryPath = path2.default.join(dirpath, entry.name);
|
|
4705
|
+
const stats = await fs2.default.stat(entryPath);
|
|
4756
4706
|
return {
|
|
4757
4707
|
name: entry.name,
|
|
4758
4708
|
path: entryPath,
|
|
@@ -4780,7 +4730,7 @@ var FileOperationsTool = class extends BaseTool {
|
|
|
4780
4730
|
*/
|
|
4781
4731
|
async makeDirectory(dirpath) {
|
|
4782
4732
|
try {
|
|
4783
|
-
await
|
|
4733
|
+
await fs2.default.mkdir(dirpath, { recursive: true });
|
|
4784
4734
|
return {
|
|
4785
4735
|
success: true
|
|
4786
4736
|
};
|
|
@@ -6582,9 +6532,23 @@ var BaseLlmFlow = (_class25 = class {constructor() { _class25.prototype.__init43
|
|
|
6582
6532
|
yield event;
|
|
6583
6533
|
}
|
|
6584
6534
|
}
|
|
6585
|
-
|
|
6535
|
+
let tools = await agent.canonicalTools(
|
|
6586
6536
|
new ReadonlyContext(invocationContext)
|
|
6587
6537
|
);
|
|
6538
|
+
if (tools.length > 1) {
|
|
6539
|
+
const seen = /* @__PURE__ */ new Set();
|
|
6540
|
+
const filtered = [];
|
|
6541
|
+
for (const t of tools) {
|
|
6542
|
+
const name = _optionalChain([t, 'optionalAccess', _172 => _172.name]);
|
|
6543
|
+
if (!name) continue;
|
|
6544
|
+
if (seen.has(name)) {
|
|
6545
|
+
continue;
|
|
6546
|
+
}
|
|
6547
|
+
seen.add(name);
|
|
6548
|
+
filtered.push(t);
|
|
6549
|
+
}
|
|
6550
|
+
tools = filtered;
|
|
6551
|
+
}
|
|
6588
6552
|
for (const tool of tools) {
|
|
6589
6553
|
const toolContext = new ToolContext(invocationContext);
|
|
6590
6554
|
await tool.processLlmRequest(toolContext, llmRequest);
|
|
@@ -6592,7 +6556,7 @@ var BaseLlmFlow = (_class25 = class {constructor() { _class25.prototype.__init43
|
|
|
6592
6556
|
if (tools.length > 0) {
|
|
6593
6557
|
const toolsData = tools.map((tool) => ({
|
|
6594
6558
|
Name: tool.name,
|
|
6595
|
-
Description: _optionalChain([tool, 'access',
|
|
6559
|
+
Description: _optionalChain([tool, 'access', _173 => _173.description, 'optionalAccess', _174 => _174.substring, 'call', _175 => _175(0, 50)]) + (_optionalChain([tool, 'access', _176 => _176.description, 'optionalAccess', _177 => _177.length]) > 50 ? "..." : ""),
|
|
6596
6560
|
"Long Running": tool.isLongRunning ? "Yes" : "No"
|
|
6597
6561
|
}));
|
|
6598
6562
|
this.logger.debugArray("\u{1F6E0}\uFE0F Available Tools", toolsData);
|
|
@@ -6655,14 +6619,14 @@ var BaseLlmFlow = (_class25 = class {constructor() { _class25.prototype.__init43
|
|
|
6655
6619
|
);
|
|
6656
6620
|
if (functionResponseEvent) {
|
|
6657
6621
|
yield functionResponseEvent;
|
|
6658
|
-
const transferToAgent = _optionalChain([functionResponseEvent, 'access',
|
|
6622
|
+
const transferToAgent = _optionalChain([functionResponseEvent, 'access', _178 => _178.actions, 'optionalAccess', _179 => _179.transferToAgent]);
|
|
6659
6623
|
if (transferToAgent) {
|
|
6660
6624
|
this.logger.debug(`\u{1F504} Live transfer to agent '${transferToAgent}'`);
|
|
6661
6625
|
const agentToRun = this._getAgentToRun(
|
|
6662
6626
|
invocationContext,
|
|
6663
6627
|
transferToAgent
|
|
6664
6628
|
);
|
|
6665
|
-
for await (const event of _optionalChain([agentToRun, 'access',
|
|
6629
|
+
for await (const event of _optionalChain([agentToRun, 'access', _180 => _180.runLive, 'optionalCall', _181 => _181(invocationContext)]) || agentToRun.runAsync(invocationContext)) {
|
|
6666
6630
|
yield event;
|
|
6667
6631
|
}
|
|
6668
6632
|
}
|
|
@@ -6694,7 +6658,7 @@ var BaseLlmFlow = (_class25 = class {constructor() { _class25.prototype.__init43
|
|
|
6694
6658
|
yield authEvent;
|
|
6695
6659
|
}
|
|
6696
6660
|
yield functionResponseEvent;
|
|
6697
|
-
const transferToAgent = _optionalChain([functionResponseEvent, 'access',
|
|
6661
|
+
const transferToAgent = _optionalChain([functionResponseEvent, 'access', _182 => _182.actions, 'optionalAccess', _183 => _183.transferToAgent]);
|
|
6698
6662
|
if (transferToAgent) {
|
|
6699
6663
|
this.logger.debug(`\u{1F504} Transferring to agent '${transferToAgent}'`);
|
|
6700
6664
|
const agentToRun = this._getAgentToRun(
|
|
@@ -6740,27 +6704,62 @@ var BaseLlmFlow = (_class25 = class {constructor() { _class25.prototype.__init43
|
|
|
6740
6704
|
}
|
|
6741
6705
|
invocationContext.incrementLlmCallCount();
|
|
6742
6706
|
const isStreaming = invocationContext.runConfig.streamingMode === "sse" /* SSE */;
|
|
6743
|
-
|
|
6707
|
+
let tools = _optionalChain([llmRequest, 'access', _184 => _184.config, 'optionalAccess', _185 => _185.tools]) || [];
|
|
6708
|
+
if (tools.length) {
|
|
6709
|
+
const deduped = [];
|
|
6710
|
+
const seenFn = /* @__PURE__ */ new Set();
|
|
6711
|
+
for (const t of tools) {
|
|
6712
|
+
const tool = t;
|
|
6713
|
+
if (tool && Array.isArray(tool.functionDeclarations)) {
|
|
6714
|
+
const newFds = tool.functionDeclarations.filter(
|
|
6715
|
+
(fd) => {
|
|
6716
|
+
if (_optionalChain([fd, 'optionalAccess', _186 => _186.name])) {
|
|
6717
|
+
if (seenFn.has(fd.name)) {
|
|
6718
|
+
return false;
|
|
6719
|
+
}
|
|
6720
|
+
seenFn.add(fd.name);
|
|
6721
|
+
}
|
|
6722
|
+
return true;
|
|
6723
|
+
}
|
|
6724
|
+
);
|
|
6725
|
+
if (newFds.length) {
|
|
6726
|
+
deduped.push({ ...tool, functionDeclarations: newFds });
|
|
6727
|
+
}
|
|
6728
|
+
} else if (_optionalChain([tool, 'optionalAccess', _187 => _187.name])) {
|
|
6729
|
+
if (seenFn.has(tool.name)) continue;
|
|
6730
|
+
seenFn.add(tool.name);
|
|
6731
|
+
deduped.push(tool);
|
|
6732
|
+
} else {
|
|
6733
|
+
deduped.push(tool);
|
|
6734
|
+
}
|
|
6735
|
+
}
|
|
6736
|
+
if (deduped.length !== tools.length) {
|
|
6737
|
+
this.logger.debug(
|
|
6738
|
+
`\u{1F501} Deduplicated tool/function declarations: ${tools.length} -> ${deduped.length}`
|
|
6739
|
+
);
|
|
6740
|
+
}
|
|
6741
|
+
llmRequest.config.tools = tools = deduped;
|
|
6742
|
+
}
|
|
6744
6743
|
const toolNames = tools.map((tool) => {
|
|
6745
6744
|
if (tool.functionDeclarations && Array.isArray(tool.functionDeclarations)) {
|
|
6746
6745
|
return tool.functionDeclarations.map((fn) => fn.name).join(", ");
|
|
6747
6746
|
}
|
|
6748
6747
|
if (tool.name) return tool.name;
|
|
6749
|
-
if (_optionalChain([tool, 'access',
|
|
6750
|
-
if (_optionalChain([tool, 'access',
|
|
6748
|
+
if (_optionalChain([tool, 'access', _188 => _188.function, 'optionalAccess', _189 => _189.name])) return tool.function.name;
|
|
6749
|
+
if (_optionalChain([tool, 'access', _190 => _190.function, 'optionalAccess', _191 => _191.function, 'optionalAccess', _192 => _192.name])) return tool.function.function.name;
|
|
6751
6750
|
return "unknown";
|
|
6752
6751
|
}).join(", ");
|
|
6753
6752
|
const systemInstruction = llmRequest.getSystemInstructionText() || "";
|
|
6754
6753
|
const truncatedSystemInstruction = systemInstruction.length > 100 ? `${systemInstruction.substring(0, 100)}...` : systemInstruction;
|
|
6755
|
-
const contentPreview = _optionalChain([llmRequest, 'access',
|
|
6754
|
+
const contentPreview = _optionalChain([llmRequest, 'access', _193 => _193.contents, 'optionalAccess', _194 => _194.length]) > 0 ? LogFormatter.formatContentPreview(llmRequest.contents[0]) : "none";
|
|
6756
6755
|
this.logger.debugStructured("\u{1F4E4} LLM Request", {
|
|
6757
6756
|
Model: llm.model,
|
|
6758
6757
|
Agent: invocationContext.agent.name,
|
|
6759
|
-
"Content Items": _optionalChain([llmRequest, 'access',
|
|
6758
|
+
"Content Items": _optionalChain([llmRequest, 'access', _195 => _195.contents, 'optionalAccess', _196 => _196.length]) || 0,
|
|
6760
6759
|
"Content Preview": contentPreview,
|
|
6761
6760
|
"System Instruction": truncatedSystemInstruction || "none",
|
|
6762
6761
|
"Available Tools": toolNames || "none",
|
|
6763
|
-
"Tool Count": _optionalChain([llmRequest, 'access',
|
|
6762
|
+
"Tool Count": _optionalChain([llmRequest, 'access', _197 => _197.config, 'optionalAccess', _198 => _198.tools, 'optionalAccess', _199 => _199.length]) || 0,
|
|
6764
6763
|
Streaming: isStreaming ? "Yes" : "No"
|
|
6765
6764
|
});
|
|
6766
6765
|
let responseCount = 0;
|
|
@@ -6775,8 +6774,8 @@ var BaseLlmFlow = (_class25 = class {constructor() { _class25.prototype.__init43
|
|
|
6775
6774
|
llmRequest,
|
|
6776
6775
|
llmResponse
|
|
6777
6776
|
);
|
|
6778
|
-
const tokenCount = _optionalChain([llmResponse, 'access',
|
|
6779
|
-
const functionCalls = _optionalChain([llmResponse, 'access',
|
|
6777
|
+
const tokenCount = _optionalChain([llmResponse, 'access', _200 => _200.usageMetadata, 'optionalAccess', _201 => _201.totalTokenCount]) || "unknown";
|
|
6778
|
+
const functionCalls = _optionalChain([llmResponse, 'access', _202 => _202.content, 'optionalAccess', _203 => _203.parts, 'optionalAccess', _204 => _204.filter, 'call', _205 => _205((part) => part.functionCall)]) || [];
|
|
6780
6779
|
const functionCallsDisplay = LogFormatter.formatFunctionCalls(functionCalls);
|
|
6781
6780
|
const responsePreview = LogFormatter.formatResponsePreview(llmResponse);
|
|
6782
6781
|
this.logger.debugStructured("\u{1F4E5} LLM Response", {
|
|
@@ -6920,7 +6919,7 @@ var EnhancedAuthConfig = class {
|
|
|
6920
6919
|
*/
|
|
6921
6920
|
generateCredentialKey() {
|
|
6922
6921
|
const schemeKey = this.authScheme.type || "unknown";
|
|
6923
|
-
const credentialKey = _optionalChain([this, 'access',
|
|
6922
|
+
const credentialKey = _optionalChain([this, 'access', _206 => _206.rawAuthCredential, 'optionalAccess', _207 => _207.type]) || "none";
|
|
6924
6923
|
const timestamp = Date.now();
|
|
6925
6924
|
return `adk_${schemeKey}_${credentialKey}_${timestamp}`;
|
|
6926
6925
|
}
|
|
@@ -7077,7 +7076,7 @@ var AuthLlmRequestProcessor = class extends BaseLlmRequestProcessor {
|
|
|
7077
7076
|
*/
|
|
7078
7077
|
parseAndStoreAuthResponse(authHandler, invocationContext) {
|
|
7079
7078
|
try {
|
|
7080
|
-
const credentialKey = _optionalChain([authHandler, 'access',
|
|
7079
|
+
const credentialKey = _optionalChain([authHandler, 'access', _208 => _208.authConfig, 'access', _209 => _209.context, 'optionalAccess', _210 => _210.credentialKey]) || `temp:${Date.now()}`;
|
|
7081
7080
|
const fullCredentialKey = credentialKey.startsWith("temp:") ? credentialKey : `temp:${credentialKey}`;
|
|
7082
7081
|
invocationContext.session.state[fullCredentialKey] = authHandler.credential;
|
|
7083
7082
|
if (authHandler.authConfig.authScheme.type === "oauth2" || authHandler.authConfig.authScheme.type === "openIdConnect") {
|
|
@@ -7183,7 +7182,7 @@ var BuiltInCodeExecutor = class extends BaseCodeExecutor {
|
|
|
7183
7182
|
* Pre-process the LLM request for Gemini 2.0+ models to use the code execution tool
|
|
7184
7183
|
*/
|
|
7185
7184
|
processLlmRequest(llmRequest) {
|
|
7186
|
-
if (!_optionalChain([llmRequest, 'access',
|
|
7185
|
+
if (!_optionalChain([llmRequest, 'access', _211 => _211.model, 'optionalAccess', _212 => _212.startsWith, 'call', _213 => _213("gemini-2")])) {
|
|
7187
7186
|
throw new Error(
|
|
7188
7187
|
`Gemini code execution tool is not supported for model ${llmRequest.model}`
|
|
7189
7188
|
);
|
|
@@ -7228,7 +7227,7 @@ var CodeExecutionUtils = class _CodeExecutionUtils {
|
|
|
7228
7227
|
* Extracts the first code block from the content and truncates everything after it
|
|
7229
7228
|
*/
|
|
7230
7229
|
static extractCodeAndTruncateContent(content, codeBlockDelimiters) {
|
|
7231
|
-
if (!_optionalChain([content, 'optionalAccess',
|
|
7230
|
+
if (!_optionalChain([content, 'optionalAccess', _214 => _214.parts, 'optionalAccess', _215 => _215.length])) {
|
|
7232
7231
|
return null;
|
|
7233
7232
|
}
|
|
7234
7233
|
for (let idx = 0; idx < content.parts.length; idx++) {
|
|
@@ -7314,7 +7313,7 @@ ${fileNames}`);
|
|
|
7314
7313
|
* Converts the code execution parts to text parts in a Content
|
|
7315
7314
|
*/
|
|
7316
7315
|
static convertCodeExecutionParts(content, codeBlockDelimiter, executionResultDelimiters) {
|
|
7317
|
-
if (!_optionalChain([content, 'access',
|
|
7316
|
+
if (!_optionalChain([content, 'access', _216 => _216.parts, 'optionalAccess', _217 => _217.length])) {
|
|
7318
7317
|
return;
|
|
7319
7318
|
}
|
|
7320
7319
|
const lastPart = content.parts[content.parts.length - 1];
|
|
@@ -7707,7 +7706,7 @@ async function* runPostProcessor(invocationContext, llmResponse) {
|
|
|
7707
7706
|
function extractAndReplaceInlineFiles(codeExecutorContext, llmRequest) {
|
|
7708
7707
|
const allInputFiles = codeExecutorContext.getInputFiles();
|
|
7709
7708
|
const savedFileNames = new Set(allInputFiles.map((f) => f.name));
|
|
7710
|
-
for (let i = 0; i < (_optionalChain([llmRequest, 'access',
|
|
7709
|
+
for (let i = 0; i < (_optionalChain([llmRequest, 'access', _218 => _218.contents, 'optionalAccess', _219 => _219.length]) || 0); i++) {
|
|
7711
7710
|
const content = llmRequest.contents[i];
|
|
7712
7711
|
if (content.role !== "user" || !content.parts) {
|
|
7713
7712
|
continue;
|
|
@@ -7739,7 +7738,7 @@ Available file: \`${fileName}\`
|
|
|
7739
7738
|
}
|
|
7740
7739
|
function getOrSetExecutionId(invocationContext, codeExecutorContext) {
|
|
7741
7740
|
const agent = invocationContext.agent;
|
|
7742
|
-
if (!hasCodeExecutor(agent) || !_optionalChain([agent, 'access',
|
|
7741
|
+
if (!hasCodeExecutor(agent) || !_optionalChain([agent, 'access', _220 => _220.codeExecutor, 'optionalAccess', _221 => _221.stateful])) {
|
|
7743
7742
|
return void 0;
|
|
7744
7743
|
}
|
|
7745
7744
|
let executionId = codeExecutorContext.getExecutionId();
|
|
@@ -7970,7 +7969,7 @@ function rearrangeEventsForLatestFunctionResponse(events) {
|
|
|
7970
7969
|
continue;
|
|
7971
7970
|
}
|
|
7972
7971
|
const functionResponses2 = event.getFunctionResponses();
|
|
7973
|
-
if (_optionalChain([functionResponses2, 'optionalAccess',
|
|
7972
|
+
if (_optionalChain([functionResponses2, 'optionalAccess', _222 => _222.some, 'call', _223 => _223((fr) => fr.id && functionResponsesIds.has(fr.id))])) {
|
|
7974
7973
|
functionResponseEvents.push(event);
|
|
7975
7974
|
}
|
|
7976
7975
|
}
|
|
@@ -8069,7 +8068,7 @@ function mergeFunctionResponseEvents(functionResponseEvents) {
|
|
|
8069
8068
|
const partIndicesInMergedEvent = {};
|
|
8070
8069
|
for (let idx = 0; idx < partsInMergedEvent.length; idx++) {
|
|
8071
8070
|
const part = partsInMergedEvent[idx];
|
|
8072
|
-
if (_optionalChain([part, 'access',
|
|
8071
|
+
if (_optionalChain([part, 'access', _224 => _224.functionResponse, 'optionalAccess', _225 => _225.id])) {
|
|
8073
8072
|
partIndicesInMergedEvent[part.functionResponse.id] = idx;
|
|
8074
8073
|
}
|
|
8075
8074
|
}
|
|
@@ -8078,7 +8077,7 @@ function mergeFunctionResponseEvents(functionResponseEvents) {
|
|
|
8078
8077
|
throw new Error("There should be at least one function_response part.");
|
|
8079
8078
|
}
|
|
8080
8079
|
for (const part of event.content.parts) {
|
|
8081
|
-
if (_optionalChain([part, 'access',
|
|
8080
|
+
if (_optionalChain([part, 'access', _226 => _226.functionResponse, 'optionalAccess', _227 => _227.id])) {
|
|
8082
8081
|
const functionCallId = part.functionResponse.id;
|
|
8083
8082
|
if (functionCallId in partIndicesInMergedEvent) {
|
|
8084
8083
|
partsInMergedEvent[partIndicesInMergedEvent[functionCallId]] = part;
|
|
@@ -8347,7 +8346,7 @@ var PlanReActPlanner = class extends BasePlanner {
|
|
|
8347
8346
|
let firstFcPartIndex = -1;
|
|
8348
8347
|
for (let i = 0; i < responseParts.length; i++) {
|
|
8349
8348
|
if (responseParts[i].functionCall) {
|
|
8350
|
-
if (!_optionalChain([responseParts, 'access',
|
|
8349
|
+
if (!_optionalChain([responseParts, 'access', _228 => _228[i], 'access', _229 => _229.functionCall, 'optionalAccess', _230 => _230.name])) {
|
|
8351
8350
|
continue;
|
|
8352
8351
|
}
|
|
8353
8352
|
preservedParts.push(responseParts[i]);
|
|
@@ -8386,7 +8385,7 @@ var PlanReActPlanner = class extends BasePlanner {
|
|
|
8386
8385
|
* Handles non-function-call parts of the response
|
|
8387
8386
|
*/
|
|
8388
8387
|
_handleNonFunctionCallParts(responsePart, preservedParts) {
|
|
8389
|
-
if (_optionalChain([responsePart, 'access',
|
|
8388
|
+
if (_optionalChain([responsePart, 'access', _231 => _231.text, 'optionalAccess', _232 => _232.includes, 'call', _233 => _233(FINAL_ANSWER_TAG)])) {
|
|
8390
8389
|
const [reasoningText, finalAnswerText] = this._splitByLastPattern(
|
|
8391
8390
|
responsePart.text,
|
|
8392
8391
|
FINAL_ANSWER_TAG
|
|
@@ -8635,7 +8634,7 @@ var SharedMemoryRequestProcessor = class extends BaseLlmRequestProcessor {
|
|
|
8635
8634
|
const memoryService = invocationContext.memoryService;
|
|
8636
8635
|
if (!memoryService) return;
|
|
8637
8636
|
const lastUserEvent = invocationContext.session.events.findLast(
|
|
8638
|
-
(e) => e.author === "user" && _optionalChain([e, 'access',
|
|
8637
|
+
(e) => e.author === "user" && _optionalChain([e, 'access', _234 => _234.content, 'optionalAccess', _235 => _235.parts, 'optionalAccess', _236 => _236.length])
|
|
8639
8638
|
);
|
|
8640
8639
|
if (!lastUserEvent) return;
|
|
8641
8640
|
const query = (_nullishCoalesce(lastUserEvent.content.parts, () => ( []))).map((p) => p.text || "").join(" ");
|
|
@@ -8646,7 +8645,7 @@ var SharedMemoryRequestProcessor = class extends BaseLlmRequestProcessor {
|
|
|
8646
8645
|
});
|
|
8647
8646
|
const sessionTexts = new Set(
|
|
8648
8647
|
(llmRequest.contents || []).flatMap(
|
|
8649
|
-
(c) => _optionalChain([c, 'access',
|
|
8648
|
+
(c) => _optionalChain([c, 'access', _237 => _237.parts, 'optionalAccess', _238 => _238.map, 'call', _239 => _239((p) => p.text)]) || []
|
|
8650
8649
|
)
|
|
8651
8650
|
);
|
|
8652
8651
|
for (const memory of results.memories) {
|
|
@@ -9069,7 +9068,7 @@ var LlmAgent = (_class27 = class _LlmAgent extends BaseAgent {
|
|
|
9069
9068
|
* This matches the Python implementation's _llm_flow property
|
|
9070
9069
|
*/
|
|
9071
9070
|
get llmFlow() {
|
|
9072
|
-
if (this.disallowTransferToParent && this.disallowTransferToPeers && !_optionalChain([this, 'access',
|
|
9071
|
+
if (this.disallowTransferToParent && this.disallowTransferToPeers && !_optionalChain([this, 'access', _240 => _240.subAgents, 'optionalAccess', _241 => _241.length])) {
|
|
9073
9072
|
return new SingleFlow();
|
|
9074
9073
|
}
|
|
9075
9074
|
return new AutoFlow();
|
|
@@ -9085,7 +9084,7 @@ var LlmAgent = (_class27 = class _LlmAgent extends BaseAgent {
|
|
|
9085
9084
|
);
|
|
9086
9085
|
return;
|
|
9087
9086
|
}
|
|
9088
|
-
if (this.outputKey && event.isFinalResponse() && _optionalChain([event, 'access',
|
|
9087
|
+
if (this.outputKey && event.isFinalResponse() && _optionalChain([event, 'access', _242 => _242.content, 'optionalAccess', _243 => _243.parts])) {
|
|
9089
9088
|
let result = event.content.parts.map((part) => part.text || "").join("");
|
|
9090
9089
|
if (this.outputSchema) {
|
|
9091
9090
|
if (!result.trim()) {
|
|
@@ -9313,7 +9312,7 @@ var LoopAgent = class extends BaseAgent {
|
|
|
9313
9312
|
for (const subAgent of this.subAgents) {
|
|
9314
9313
|
for await (const event of subAgent.runAsync(ctx)) {
|
|
9315
9314
|
yield event;
|
|
9316
|
-
if (_optionalChain([event, 'access',
|
|
9315
|
+
if (_optionalChain([event, 'access', _244 => _244.actions, 'optionalAccess', _245 => _245.escalate])) {
|
|
9317
9316
|
return;
|
|
9318
9317
|
}
|
|
9319
9318
|
}
|
|
@@ -9555,6 +9554,7 @@ var LangGraphAgent = (_class28 = class extends BaseAgent {
|
|
|
9555
9554
|
}, _class28);
|
|
9556
9555
|
|
|
9557
9556
|
// src/agents/agent-builder.ts
|
|
9557
|
+
init_logger();
|
|
9558
9558
|
|
|
9559
9559
|
|
|
9560
9560
|
// src/runners.ts
|
|
@@ -9624,17 +9624,17 @@ var RunConfig = class {
|
|
|
9624
9624
|
*/
|
|
9625
9625
|
|
|
9626
9626
|
constructor(config) {
|
|
9627
|
-
this.speechConfig = _optionalChain([config, 'optionalAccess',
|
|
9628
|
-
this.responseModalities = _optionalChain([config, 'optionalAccess',
|
|
9629
|
-
this.saveInputBlobsAsArtifacts = _optionalChain([config, 'optionalAccess',
|
|
9630
|
-
this.supportCFC = _optionalChain([config, 'optionalAccess',
|
|
9631
|
-
this.streamingMode = _optionalChain([config, 'optionalAccess',
|
|
9632
|
-
this.outputAudioTranscription = _optionalChain([config, 'optionalAccess',
|
|
9633
|
-
this.inputAudioTranscription = _optionalChain([config, 'optionalAccess',
|
|
9634
|
-
this.realtimeInputConfig = _optionalChain([config, 'optionalAccess',
|
|
9635
|
-
this.enableAffectiveDialog = _optionalChain([config, 'optionalAccess',
|
|
9636
|
-
this.proactivity = _optionalChain([config, 'optionalAccess',
|
|
9637
|
-
this.maxLlmCalls = _nullishCoalesce(_optionalChain([config, 'optionalAccess',
|
|
9627
|
+
this.speechConfig = _optionalChain([config, 'optionalAccess', _246 => _246.speechConfig]);
|
|
9628
|
+
this.responseModalities = _optionalChain([config, 'optionalAccess', _247 => _247.responseModalities]);
|
|
9629
|
+
this.saveInputBlobsAsArtifacts = _optionalChain([config, 'optionalAccess', _248 => _248.saveInputBlobsAsArtifacts]) || false;
|
|
9630
|
+
this.supportCFC = _optionalChain([config, 'optionalAccess', _249 => _249.supportCFC]) || false;
|
|
9631
|
+
this.streamingMode = _optionalChain([config, 'optionalAccess', _250 => _250.streamingMode]) || "NONE" /* NONE */;
|
|
9632
|
+
this.outputAudioTranscription = _optionalChain([config, 'optionalAccess', _251 => _251.outputAudioTranscription]);
|
|
9633
|
+
this.inputAudioTranscription = _optionalChain([config, 'optionalAccess', _252 => _252.inputAudioTranscription]);
|
|
9634
|
+
this.realtimeInputConfig = _optionalChain([config, 'optionalAccess', _253 => _253.realtimeInputConfig]);
|
|
9635
|
+
this.enableAffectiveDialog = _optionalChain([config, 'optionalAccess', _254 => _254.enableAffectiveDialog]);
|
|
9636
|
+
this.proactivity = _optionalChain([config, 'optionalAccess', _255 => _255.proactivity]);
|
|
9637
|
+
this.maxLlmCalls = _nullishCoalesce(_optionalChain([config, 'optionalAccess', _256 => _256.maxLlmCalls]), () => ( 500));
|
|
9638
9638
|
this.validateMaxLlmCalls();
|
|
9639
9639
|
}
|
|
9640
9640
|
/**
|
|
@@ -9668,19 +9668,19 @@ var InMemoryArtifactService = (_class29 = class {constructor() { _class29.protot
|
|
|
9668
9668
|
}
|
|
9669
9669
|
async saveArtifact(args) {
|
|
9670
9670
|
const { appName, userId, sessionId, filename, artifact } = args;
|
|
9671
|
-
const
|
|
9672
|
-
if (!this.artifacts.has(
|
|
9673
|
-
this.artifacts.set(
|
|
9671
|
+
const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
|
|
9672
|
+
if (!this.artifacts.has(path3)) {
|
|
9673
|
+
this.artifacts.set(path3, []);
|
|
9674
9674
|
}
|
|
9675
|
-
const versions = this.artifacts.get(
|
|
9675
|
+
const versions = this.artifacts.get(path3);
|
|
9676
9676
|
const version = versions.length;
|
|
9677
9677
|
versions.push(artifact);
|
|
9678
9678
|
return version;
|
|
9679
9679
|
}
|
|
9680
9680
|
async loadArtifact(args) {
|
|
9681
9681
|
const { appName, userId, sessionId, filename, version } = args;
|
|
9682
|
-
const
|
|
9683
|
-
const versions = this.artifacts.get(
|
|
9682
|
+
const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
|
|
9683
|
+
const versions = this.artifacts.get(path3);
|
|
9684
9684
|
if (!versions || versions.length === 0) {
|
|
9685
9685
|
return null;
|
|
9686
9686
|
}
|
|
@@ -9701,12 +9701,12 @@ var InMemoryArtifactService = (_class29 = class {constructor() { _class29.protot
|
|
|
9701
9701
|
const sessionPrefix = `${appName}/${userId}/${sessionId}/`;
|
|
9702
9702
|
const userNamespacePrefix = `${appName}/${userId}/user/`;
|
|
9703
9703
|
const filenames = [];
|
|
9704
|
-
for (const
|
|
9705
|
-
if (
|
|
9706
|
-
const filename =
|
|
9704
|
+
for (const path3 of this.artifacts.keys()) {
|
|
9705
|
+
if (path3.startsWith(sessionPrefix)) {
|
|
9706
|
+
const filename = path3.substring(sessionPrefix.length);
|
|
9707
9707
|
filenames.push(filename);
|
|
9708
|
-
} else if (
|
|
9709
|
-
const filename =
|
|
9708
|
+
} else if (path3.startsWith(userNamespacePrefix)) {
|
|
9709
|
+
const filename = path3.substring(userNamespacePrefix.length);
|
|
9710
9710
|
filenames.push(filename);
|
|
9711
9711
|
}
|
|
9712
9712
|
}
|
|
@@ -9714,16 +9714,16 @@ var InMemoryArtifactService = (_class29 = class {constructor() { _class29.protot
|
|
|
9714
9714
|
}
|
|
9715
9715
|
async deleteArtifact(args) {
|
|
9716
9716
|
const { appName, userId, sessionId, filename } = args;
|
|
9717
|
-
const
|
|
9718
|
-
if (!this.artifacts.has(
|
|
9717
|
+
const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
|
|
9718
|
+
if (!this.artifacts.has(path3)) {
|
|
9719
9719
|
return;
|
|
9720
9720
|
}
|
|
9721
|
-
this.artifacts.delete(
|
|
9721
|
+
this.artifacts.delete(path3);
|
|
9722
9722
|
}
|
|
9723
9723
|
async listVersions(args) {
|
|
9724
9724
|
const { appName, userId, sessionId, filename } = args;
|
|
9725
|
-
const
|
|
9726
|
-
const versions = this.artifacts.get(
|
|
9725
|
+
const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
|
|
9726
|
+
const versions = this.artifacts.get(path3);
|
|
9727
9727
|
if (!versions || versions.length === 0) {
|
|
9728
9728
|
return [];
|
|
9729
9729
|
}
|
|
@@ -9778,7 +9778,7 @@ var InMemoryMemoryService = (_class30 = class {
|
|
|
9778
9778
|
}
|
|
9779
9779
|
const userSessions = this._sessionEvents.get(userKey);
|
|
9780
9780
|
const filteredEvents = session.events.filter(
|
|
9781
|
-
(event) => _optionalChain([event, 'access',
|
|
9781
|
+
(event) => _optionalChain([event, 'access', _257 => _257.content, 'optionalAccess', _258 => _258.parts])
|
|
9782
9782
|
);
|
|
9783
9783
|
userSessions.set(session.id, filteredEvents);
|
|
9784
9784
|
}
|
|
@@ -9917,7 +9917,7 @@ var InMemorySessionService = (_class31 = class extends BaseSessionService {const
|
|
|
9917
9917
|
return this.createSessionImpl(appName, userId, state, sessionId);
|
|
9918
9918
|
}
|
|
9919
9919
|
createSessionImpl(appName, userId, state, sessionId) {
|
|
9920
|
-
const finalSessionId = _optionalChain([sessionId, 'optionalAccess',
|
|
9920
|
+
const finalSessionId = _optionalChain([sessionId, 'optionalAccess', _259 => _259.trim, 'call', _260 => _260()]) || _crypto.randomUUID.call(void 0, );
|
|
9921
9921
|
const session = {
|
|
9922
9922
|
appName,
|
|
9923
9923
|
userId,
|
|
@@ -10074,7 +10074,7 @@ var InMemorySessionService = (_class31 = class extends BaseSessionService {const
|
|
|
10074
10074
|
warning(`sessionId ${sessionId} not in sessions[appName][userId]`);
|
|
10075
10075
|
return event;
|
|
10076
10076
|
}
|
|
10077
|
-
if (_optionalChain([event, 'access',
|
|
10077
|
+
if (_optionalChain([event, 'access', _261 => _261.actions, 'optionalAccess', _262 => _262.stateDelta])) {
|
|
10078
10078
|
for (const key in event.actions.stateDelta) {
|
|
10079
10079
|
const value = event.actions.stateDelta[key];
|
|
10080
10080
|
if (key.startsWith(State.APP_PREFIX)) {
|
|
@@ -10108,14 +10108,14 @@ function _findFunctionCallEventIfLastEventIsFunctionResponse(session) {
|
|
|
10108
10108
|
return null;
|
|
10109
10109
|
}
|
|
10110
10110
|
const lastEvent = events[events.length - 1];
|
|
10111
|
-
if (_optionalChain([lastEvent, 'access',
|
|
10112
|
-
const functionCallId = _optionalChain([lastEvent, 'access',
|
|
10111
|
+
if (_optionalChain([lastEvent, 'access', _263 => _263.content, 'optionalAccess', _264 => _264.parts, 'optionalAccess', _265 => _265.some, 'call', _266 => _266((part) => part.functionResponse)])) {
|
|
10112
|
+
const functionCallId = _optionalChain([lastEvent, 'access', _267 => _267.content, 'access', _268 => _268.parts, 'access', _269 => _269.find, 'call', _270 => _270(
|
|
10113
10113
|
(part) => part.functionResponse
|
|
10114
|
-
), 'optionalAccess',
|
|
10114
|
+
), 'optionalAccess', _271 => _271.functionResponse, 'optionalAccess', _272 => _272.id]);
|
|
10115
10115
|
if (!functionCallId) return null;
|
|
10116
10116
|
for (let i = events.length - 2; i >= 0; i--) {
|
|
10117
10117
|
const event = events[i];
|
|
10118
|
-
const functionCalls = _optionalChain([event, 'access',
|
|
10118
|
+
const functionCalls = _optionalChain([event, 'access', _273 => _273.getFunctionCalls, 'optionalCall', _274 => _274()]) || [];
|
|
10119
10119
|
for (const functionCall of functionCalls) {
|
|
10120
10120
|
if (functionCall.id === functionCallId) {
|
|
10121
10121
|
return event;
|
|
@@ -10193,7 +10193,7 @@ var Runner = (_class32 = class {
|
|
|
10193
10193
|
}
|
|
10194
10194
|
};
|
|
10195
10195
|
invokeRunAsync();
|
|
10196
|
-
return function* () {
|
|
10196
|
+
return (function* () {
|
|
10197
10197
|
while (true) {
|
|
10198
10198
|
while (queueIndex >= eventQueue.length && !asyncCompleted) {
|
|
10199
10199
|
}
|
|
@@ -10206,7 +10206,7 @@ var Runner = (_class32 = class {
|
|
|
10206
10206
|
}
|
|
10207
10207
|
yield event;
|
|
10208
10208
|
}
|
|
10209
|
-
}();
|
|
10209
|
+
})();
|
|
10210
10210
|
}
|
|
10211
10211
|
/**
|
|
10212
10212
|
* Main entry method to run the agent in this runner.
|
|
@@ -10318,15 +10318,15 @@ var Runner = (_class32 = class {
|
|
|
10318
10318
|
*/
|
|
10319
10319
|
_findAgentToRun(session, rootAgent) {
|
|
10320
10320
|
const event = _findFunctionCallEventIfLastEventIsFunctionResponse(session);
|
|
10321
|
-
if (_optionalChain([event, 'optionalAccess',
|
|
10321
|
+
if (_optionalChain([event, 'optionalAccess', _275 => _275.author])) {
|
|
10322
10322
|
return rootAgent.findAgent(event.author);
|
|
10323
10323
|
}
|
|
10324
|
-
const nonUserEvents = _optionalChain([session, 'access',
|
|
10324
|
+
const nonUserEvents = _optionalChain([session, 'access', _276 => _276.events, 'optionalAccess', _277 => _277.filter, 'call', _278 => _278((e) => e.author !== "user"), 'access', _279 => _279.reverse, 'call', _280 => _280()]) || [];
|
|
10325
10325
|
for (const event2 of nonUserEvents) {
|
|
10326
10326
|
if (event2.author === rootAgent.name) {
|
|
10327
10327
|
return rootAgent;
|
|
10328
10328
|
}
|
|
10329
|
-
const agent = _optionalChain([rootAgent, 'access',
|
|
10329
|
+
const agent = _optionalChain([rootAgent, 'access', _281 => _281.findSubAgent, 'optionalCall', _282 => _282(event2.author)]);
|
|
10330
10330
|
if (!agent) {
|
|
10331
10331
|
this.logger.debug(
|
|
10332
10332
|
`Event from an unknown agent: ${event2.author}, event id: ${event2.id}`
|
|
@@ -10406,10 +10406,16 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10406
10406
|
|
|
10407
10407
|
__init56() {this.agentType = "llm"}
|
|
10408
10408
|
|
|
10409
|
+
|
|
10410
|
+
// If provided, reuse directly
|
|
10411
|
+
__init57() {this.definitionLocked = false}
|
|
10412
|
+
// Lock further definition mutation after withAgent
|
|
10413
|
+
__init58() {this.warnedMethods = /* @__PURE__ */ new Set()}
|
|
10414
|
+
__init59() {this.logger = new Logger({ name: "AgentBuilder" })}
|
|
10409
10415
|
/**
|
|
10410
10416
|
* Private constructor - use static create() method
|
|
10411
10417
|
*/
|
|
10412
|
-
constructor(name) {;_class33.prototype.__init56.call(this);
|
|
10418
|
+
constructor(name) {;_class33.prototype.__init56.call(this);_class33.prototype.__init57.call(this);_class33.prototype.__init58.call(this);_class33.prototype.__init59.call(this);
|
|
10413
10419
|
this.config = { name };
|
|
10414
10420
|
}
|
|
10415
10421
|
/**
|
|
@@ -10434,6 +10440,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10434
10440
|
* @returns This builder instance for chaining
|
|
10435
10441
|
*/
|
|
10436
10442
|
withModel(model) {
|
|
10443
|
+
this.warnIfLocked("withModel");
|
|
10437
10444
|
this.config.model = model;
|
|
10438
10445
|
return this;
|
|
10439
10446
|
}
|
|
@@ -10443,6 +10450,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10443
10450
|
* @returns This builder instance for chaining
|
|
10444
10451
|
*/
|
|
10445
10452
|
withDescription(description) {
|
|
10453
|
+
this.warnIfLocked("withDescription");
|
|
10446
10454
|
this.config.description = description;
|
|
10447
10455
|
return this;
|
|
10448
10456
|
}
|
|
@@ -10452,14 +10460,17 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10452
10460
|
* @returns This builder instance for chaining
|
|
10453
10461
|
*/
|
|
10454
10462
|
withInstruction(instruction) {
|
|
10463
|
+
this.warnIfLocked("withInstruction");
|
|
10455
10464
|
this.config.instruction = instruction;
|
|
10456
10465
|
return this;
|
|
10457
10466
|
}
|
|
10458
10467
|
withInputSchema(schema) {
|
|
10468
|
+
this.warnIfLocked("withInputSchema");
|
|
10459
10469
|
this.config.inputSchema = schema;
|
|
10460
10470
|
return this;
|
|
10461
10471
|
}
|
|
10462
10472
|
withOutputSchema(schema) {
|
|
10473
|
+
this.warnIfLocked("withOutputSchema");
|
|
10463
10474
|
this.config.outputSchema = schema;
|
|
10464
10475
|
return this;
|
|
10465
10476
|
}
|
|
@@ -10469,6 +10480,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10469
10480
|
* @returns This builder instance for chaining
|
|
10470
10481
|
*/
|
|
10471
10482
|
withTools(...tools) {
|
|
10483
|
+
this.warnIfLocked("withTools");
|
|
10472
10484
|
this.config.tools = [...this.config.tools || [], ...tools];
|
|
10473
10485
|
return this;
|
|
10474
10486
|
}
|
|
@@ -10478,6 +10490,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10478
10490
|
* @returns This builder instance for chaining
|
|
10479
10491
|
*/
|
|
10480
10492
|
withPlanner(planner) {
|
|
10493
|
+
this.warnIfLocked("withPlanner");
|
|
10481
10494
|
this.config.planner = planner;
|
|
10482
10495
|
return this;
|
|
10483
10496
|
}
|
|
@@ -10487,6 +10500,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10487
10500
|
* @returns This builder instance for chaining
|
|
10488
10501
|
*/
|
|
10489
10502
|
withCodeExecutor(codeExecutor) {
|
|
10503
|
+
this.warnIfLocked("withCodeExecutor");
|
|
10490
10504
|
this.config.codeExecutor = codeExecutor;
|
|
10491
10505
|
return this;
|
|
10492
10506
|
}
|
|
@@ -10496,6 +10510,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10496
10510
|
* @returns This builder instance for chaining
|
|
10497
10511
|
*/
|
|
10498
10512
|
withOutputKey(outputKey) {
|
|
10513
|
+
this.warnIfLocked("withOutputKey");
|
|
10499
10514
|
this.config.outputKey = outputKey;
|
|
10500
10515
|
return this;
|
|
10501
10516
|
}
|
|
@@ -10505,6 +10520,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10505
10520
|
* @returns This builder instance for chaining
|
|
10506
10521
|
*/
|
|
10507
10522
|
withSubAgents(subAgents) {
|
|
10523
|
+
this.warnIfLocked("withSubAgents");
|
|
10508
10524
|
this.config.subAgents = subAgents;
|
|
10509
10525
|
return this;
|
|
10510
10526
|
}
|
|
@@ -10514,6 +10530,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10514
10530
|
* @returns This builder instance for chaining
|
|
10515
10531
|
*/
|
|
10516
10532
|
withBeforeAgentCallback(callback) {
|
|
10533
|
+
this.warnIfLocked("withBeforeAgentCallback");
|
|
10517
10534
|
this.config.beforeAgentCallback = callback;
|
|
10518
10535
|
return this;
|
|
10519
10536
|
}
|
|
@@ -10523,15 +10540,29 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10523
10540
|
* @returns This builder instance for chaining
|
|
10524
10541
|
*/
|
|
10525
10542
|
withAfterAgentCallback(callback) {
|
|
10543
|
+
this.warnIfLocked("withAfterAgentCallback");
|
|
10526
10544
|
this.config.afterAgentCallback = callback;
|
|
10527
10545
|
return this;
|
|
10528
10546
|
}
|
|
10547
|
+
/**
|
|
10548
|
+
* Provide an already constructed agent instance. Further definition-mutating calls
|
|
10549
|
+
* (model/tools/instruction/etc.) will be ignored with a dev warning.
|
|
10550
|
+
*/
|
|
10551
|
+
withAgent(agent) {
|
|
10552
|
+
this.existingAgent = agent;
|
|
10553
|
+
this.definitionLocked = true;
|
|
10554
|
+
if (this.config.name === "default_agent" && agent.name) {
|
|
10555
|
+
this.config.name = agent.name;
|
|
10556
|
+
}
|
|
10557
|
+
return this;
|
|
10558
|
+
}
|
|
10529
10559
|
/**
|
|
10530
10560
|
* Configure as a sequential agent
|
|
10531
10561
|
* @param subAgents Sub-agents to execute in sequence
|
|
10532
10562
|
* @returns This builder instance for chaining
|
|
10533
10563
|
*/
|
|
10534
10564
|
asSequential(subAgents) {
|
|
10565
|
+
this.warnIfLocked("asSequential");
|
|
10535
10566
|
this.agentType = "sequential";
|
|
10536
10567
|
this.config.subAgents = subAgents;
|
|
10537
10568
|
return this;
|
|
@@ -10542,6 +10573,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10542
10573
|
* @returns This builder instance for chaining
|
|
10543
10574
|
*/
|
|
10544
10575
|
asParallel(subAgents) {
|
|
10576
|
+
this.warnIfLocked("asParallel");
|
|
10545
10577
|
this.agentType = "parallel";
|
|
10546
10578
|
this.config.subAgents = subAgents;
|
|
10547
10579
|
return this;
|
|
@@ -10553,6 +10585,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10553
10585
|
* @returns This builder instance for chaining
|
|
10554
10586
|
*/
|
|
10555
10587
|
asLoop(subAgents, maxIterations = 3) {
|
|
10588
|
+
this.warnIfLocked("asLoop");
|
|
10556
10589
|
this.agentType = "loop";
|
|
10557
10590
|
this.config.subAgents = subAgents;
|
|
10558
10591
|
this.config.maxIterations = maxIterations;
|
|
@@ -10565,6 +10598,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10565
10598
|
* @returns This builder instance for chaining
|
|
10566
10599
|
*/
|
|
10567
10600
|
asLangGraph(nodes, rootNode) {
|
|
10601
|
+
this.warnIfLocked("asLangGraph");
|
|
10568
10602
|
this.agentType = "langgraph";
|
|
10569
10603
|
this.config.nodes = nodes;
|
|
10570
10604
|
this.config.rootNode = rootNode;
|
|
@@ -10691,6 +10725,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10691
10725
|
* @returns Created agent instance
|
|
10692
10726
|
*/
|
|
10693
10727
|
createAgent() {
|
|
10728
|
+
if (this.existingAgent) return this.existingAgent;
|
|
10694
10729
|
switch (this.agentType) {
|
|
10695
10730
|
case "llm": {
|
|
10696
10731
|
if (!this.config.model) {
|
|
@@ -10785,7 +10820,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10785
10820
|
async ask(message) {
|
|
10786
10821
|
const newMessage = typeof message === "string" ? { parts: [{ text: message }] } : typeof message === "object" && "contents" in message ? { parts: message.contents[message.contents.length - 1].parts } : message;
|
|
10787
10822
|
let response = "";
|
|
10788
|
-
if (!_optionalChain([sessionOptions, 'optionalAccess',
|
|
10823
|
+
if (!_optionalChain([sessionOptions, 'optionalAccess', _283 => _283.userId])) {
|
|
10789
10824
|
throw new Error("Session configuration is required");
|
|
10790
10825
|
}
|
|
10791
10826
|
for await (const event of baseRunner.runAsync({
|
|
@@ -10793,7 +10828,7 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10793
10828
|
sessionId: session.id,
|
|
10794
10829
|
newMessage
|
|
10795
10830
|
})) {
|
|
10796
|
-
if (_optionalChain([event, 'access',
|
|
10831
|
+
if (_optionalChain([event, 'access', _284 => _284.content, 'optionalAccess', _285 => _285.parts]) && Array.isArray(event.content.parts)) {
|
|
10797
10832
|
const content = event.content.parts.map(
|
|
10798
10833
|
(part) => (part && typeof part === "object" && "text" in part ? part.text : "") || ""
|
|
10799
10834
|
).join("");
|
|
@@ -10821,6 +10856,22 @@ var AgentBuilder = (_class33 = class _AgentBuilder {
|
|
|
10821
10856
|
}
|
|
10822
10857
|
};
|
|
10823
10858
|
}
|
|
10859
|
+
/**
|
|
10860
|
+
* Warn (once per method) if the definition has been locked by withAgent().
|
|
10861
|
+
*/
|
|
10862
|
+
warnIfLocked(method) {
|
|
10863
|
+
if (!this.definitionLocked) return;
|
|
10864
|
+
if (this.warnedMethods.has(method)) return;
|
|
10865
|
+
this.warnedMethods.add(method);
|
|
10866
|
+
if (process.env.NODE_ENV !== "production") {
|
|
10867
|
+
const msg = `AgentBuilder: attempted to call ${method} after withAgent(); ignoring. (Wrap the agent first OR configure before withAgent).`;
|
|
10868
|
+
if (this.logger && typeof this.logger.warn === "function") {
|
|
10869
|
+
this.logger.warn(msg);
|
|
10870
|
+
} else {
|
|
10871
|
+
console.warn(msg);
|
|
10872
|
+
}
|
|
10873
|
+
}
|
|
10874
|
+
}
|
|
10824
10875
|
}, _class33);
|
|
10825
10876
|
|
|
10826
10877
|
// src/memory/index.ts
|
|
@@ -10885,7 +10936,7 @@ var VertexAiSessionService = class extends BaseSessionService {
|
|
|
10885
10936
|
path: `operations/${operationId}`,
|
|
10886
10937
|
request_dict: {}
|
|
10887
10938
|
});
|
|
10888
|
-
if (_optionalChain([lroResponse, 'optionalAccess',
|
|
10939
|
+
if (_optionalChain([lroResponse, 'optionalAccess', _286 => _286.done])) {
|
|
10889
10940
|
break;
|
|
10890
10941
|
}
|
|
10891
10942
|
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
@@ -10985,14 +11036,14 @@ var VertexAiSessionService = class extends BaseSessionService {
|
|
|
10985
11036
|
async listSessions(appName, userId) {
|
|
10986
11037
|
const reasoningEngineId = this.getReasoningEngineId(appName);
|
|
10987
11038
|
const apiClient = this.getApiClient();
|
|
10988
|
-
let
|
|
11039
|
+
let path3 = `reasoningEngines/${reasoningEngineId}/sessions`;
|
|
10989
11040
|
if (userId) {
|
|
10990
11041
|
const parsedUserId = encodeURIComponent(`"${userId}"`);
|
|
10991
|
-
|
|
11042
|
+
path3 = `${path3}?filter=user_id=${parsedUserId}`;
|
|
10992
11043
|
}
|
|
10993
11044
|
const apiResponse = await apiClient.async_request({
|
|
10994
11045
|
http_method: "GET",
|
|
10995
|
-
path:
|
|
11046
|
+
path: path3,
|
|
10996
11047
|
request_dict: {}
|
|
10997
11048
|
});
|
|
10998
11049
|
if (apiResponse.httpHeaders) {
|
|
@@ -11157,9 +11208,9 @@ var VertexAiSessionService = class extends BaseSessionService {
|
|
|
11157
11208
|
var _kysely = require('kysely');
|
|
11158
11209
|
var DatabaseSessionService = (_class34 = class extends BaseSessionService {
|
|
11159
11210
|
|
|
11160
|
-
|
|
11211
|
+
__init60() {this.initialized = false}
|
|
11161
11212
|
constructor(config) {
|
|
11162
|
-
super();_class34.prototype.
|
|
11213
|
+
super();_class34.prototype.__init60.call(this);;
|
|
11163
11214
|
this.db = config.db;
|
|
11164
11215
|
if (!config.skipTableCreation) {
|
|
11165
11216
|
this.initializeDatabase().catch((error) => {
|
|
@@ -11256,12 +11307,12 @@ var DatabaseSessionService = (_class34 = class extends BaseSessionService {
|
|
|
11256
11307
|
}
|
|
11257
11308
|
async createSession(appName, userId, state, sessionId) {
|
|
11258
11309
|
await this.ensureInitialized();
|
|
11259
|
-
const id = _optionalChain([sessionId, 'optionalAccess',
|
|
11310
|
+
const id = _optionalChain([sessionId, 'optionalAccess', _287 => _287.trim, 'call', _288 => _288()]) || this.generateSessionId();
|
|
11260
11311
|
return await this.db.transaction().execute(async (trx) => {
|
|
11261
11312
|
const appState = await trx.selectFrom("app_states").selectAll().where("app_name", "=", appName).executeTakeFirst();
|
|
11262
11313
|
const userState = await trx.selectFrom("user_states").selectAll().where("app_name", "=", appName).where("user_id", "=", userId).executeTakeFirst();
|
|
11263
|
-
let currentAppState = this.parseJsonSafely(_optionalChain([appState, 'optionalAccess',
|
|
11264
|
-
let currentUserState = this.parseJsonSafely(_optionalChain([userState, 'optionalAccess',
|
|
11314
|
+
let currentAppState = this.parseJsonSafely(_optionalChain([appState, 'optionalAccess', _289 => _289.state]), {});
|
|
11315
|
+
let currentUserState = this.parseJsonSafely(_optionalChain([userState, 'optionalAccess', _290 => _290.state]), {});
|
|
11265
11316
|
if (!appState) {
|
|
11266
11317
|
await trx.insertInto("app_states").values({
|
|
11267
11318
|
app_name: appName,
|
|
@@ -11320,21 +11371,21 @@ var DatabaseSessionService = (_class34 = class extends BaseSessionService {
|
|
|
11320
11371
|
return void 0;
|
|
11321
11372
|
}
|
|
11322
11373
|
let eventQuery = trx.selectFrom("events").selectAll().where("session_id", "=", sessionId).orderBy("timestamp", "desc");
|
|
11323
|
-
if (_optionalChain([config, 'optionalAccess',
|
|
11374
|
+
if (_optionalChain([config, 'optionalAccess', _291 => _291.afterTimestamp])) {
|
|
11324
11375
|
eventQuery = eventQuery.where(
|
|
11325
11376
|
"timestamp",
|
|
11326
11377
|
">=",
|
|
11327
11378
|
new Date(config.afterTimestamp * 1e3)
|
|
11328
11379
|
);
|
|
11329
11380
|
}
|
|
11330
|
-
if (_optionalChain([config, 'optionalAccess',
|
|
11381
|
+
if (_optionalChain([config, 'optionalAccess', _292 => _292.numRecentEvents])) {
|
|
11331
11382
|
eventQuery = eventQuery.limit(config.numRecentEvents);
|
|
11332
11383
|
}
|
|
11333
11384
|
const storageEvents = await eventQuery.execute();
|
|
11334
11385
|
const appState = await trx.selectFrom("app_states").selectAll().where("app_name", "=", appName).executeTakeFirst();
|
|
11335
11386
|
const userState = await trx.selectFrom("user_states").selectAll().where("app_name", "=", appName).where("user_id", "=", userId).executeTakeFirst();
|
|
11336
|
-
const currentAppState = this.parseJsonSafely(_optionalChain([appState, 'optionalAccess',
|
|
11337
|
-
const currentUserState = this.parseJsonSafely(_optionalChain([userState, 'optionalAccess',
|
|
11387
|
+
const currentAppState = this.parseJsonSafely(_optionalChain([appState, 'optionalAccess', _293 => _293.state]), {});
|
|
11388
|
+
const currentUserState = this.parseJsonSafely(_optionalChain([userState, 'optionalAccess', _294 => _294.state]), {});
|
|
11338
11389
|
const sessionState = this.parseJsonSafely(storageSession.state, {});
|
|
11339
11390
|
const mergedState = this.mergeState(
|
|
11340
11391
|
currentAppState,
|
|
@@ -11392,13 +11443,13 @@ var DatabaseSessionService = (_class34 = class extends BaseSessionService {
|
|
|
11392
11443
|
}
|
|
11393
11444
|
const appState = await trx.selectFrom("app_states").selectAll().where("app_name", "=", session.appName).executeTakeFirst();
|
|
11394
11445
|
const userState = await trx.selectFrom("user_states").selectAll().where("app_name", "=", session.appName).where("user_id", "=", session.userId).executeTakeFirst();
|
|
11395
|
-
let currentAppState = this.parseJsonSafely(_optionalChain([appState, 'optionalAccess',
|
|
11396
|
-
let currentUserState = this.parseJsonSafely(_optionalChain([userState, 'optionalAccess',
|
|
11446
|
+
let currentAppState = this.parseJsonSafely(_optionalChain([appState, 'optionalAccess', _295 => _295.state]), {});
|
|
11447
|
+
let currentUserState = this.parseJsonSafely(_optionalChain([userState, 'optionalAccess', _296 => _296.state]), {});
|
|
11397
11448
|
let sessionState = this.parseJsonSafely(storageSession.state, {});
|
|
11398
11449
|
let appStateDelta = {};
|
|
11399
11450
|
let userStateDelta = {};
|
|
11400
11451
|
let sessionStateDelta = {};
|
|
11401
|
-
if (_optionalChain([event, 'access',
|
|
11452
|
+
if (_optionalChain([event, 'access', _297 => _297.actions, 'optionalAccess', _298 => _298.stateDelta])) {
|
|
11402
11453
|
const deltas = this.extractStateDelta(event.actions.stateDelta);
|
|
11403
11454
|
appStateDelta = deltas.appStateDelta;
|
|
11404
11455
|
userStateDelta = deltas.userStateDelta;
|
|
@@ -11544,7 +11595,7 @@ var DatabaseSessionService = (_class34 = class extends BaseSessionService {
|
|
|
11544
11595
|
* Overrides the base class method to work with plain object state.
|
|
11545
11596
|
*/
|
|
11546
11597
|
updateSessionState(session, event) {
|
|
11547
|
-
if (!_optionalChain([event, 'access',
|
|
11598
|
+
if (!_optionalChain([event, 'access', _299 => _299.actions, 'optionalAccess', _300 => _300.stateDelta])) {
|
|
11548
11599
|
return;
|
|
11549
11600
|
}
|
|
11550
11601
|
for (const [key, value] of Object.entries(event.actions.stateDelta)) {
|
|
@@ -11714,7 +11765,7 @@ var GcsArtifactService = class {
|
|
|
11714
11765
|
};
|
|
11715
11766
|
return part;
|
|
11716
11767
|
} catch (error) {
|
|
11717
|
-
if (_optionalChain([error, 'optionalAccess',
|
|
11768
|
+
if (_optionalChain([error, 'optionalAccess', _301 => _301.code]) === 404) {
|
|
11718
11769
|
return null;
|
|
11719
11770
|
}
|
|
11720
11771
|
throw error;
|
|
@@ -11808,6 +11859,1292 @@ __export(flows_exports, {
|
|
|
11808
11859
|
removeClientFunctionCallId: () => removeClientFunctionCallId
|
|
11809
11860
|
});
|
|
11810
11861
|
|
|
11862
|
+
// src/evaluation/index.ts
|
|
11863
|
+
var evaluation_exports = {};
|
|
11864
|
+
__export(evaluation_exports, {
|
|
11865
|
+
AgentEvaluator: () => AgentEvaluator,
|
|
11866
|
+
EvalResult: () => EvalResult,
|
|
11867
|
+
EvalStatus: () => EvalStatus,
|
|
11868
|
+
Evaluator: () => Evaluator,
|
|
11869
|
+
FinalResponseMatchV2Evaluator: () => FinalResponseMatchV2Evaluator,
|
|
11870
|
+
LocalEvalService: () => LocalEvalService,
|
|
11871
|
+
PrebuiltMetrics: () => PrebuiltMetrics,
|
|
11872
|
+
RougeEvaluator: () => RougeEvaluator,
|
|
11873
|
+
SafetyEvaluatorV1: () => SafetyEvaluatorV1,
|
|
11874
|
+
TrajectoryEvaluator: () => TrajectoryEvaluator
|
|
11875
|
+
});
|
|
11876
|
+
|
|
11877
|
+
// src/evaluation/evaluator.ts
|
|
11878
|
+
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
11879
|
+
EvalStatus2[EvalStatus2["PASSED"] = 1] = "PASSED";
|
|
11880
|
+
EvalStatus2[EvalStatus2["FAILED"] = 2] = "FAILED";
|
|
11881
|
+
EvalStatus2[EvalStatus2["NOT_EVALUATED"] = 3] = "NOT_EVALUATED";
|
|
11882
|
+
return EvalStatus2;
|
|
11883
|
+
})(EvalStatus || {});
|
|
11884
|
+
var Evaluator = class {
|
|
11885
|
+
constructor(metric) {
|
|
11886
|
+
this.metric = metric;
|
|
11887
|
+
}
|
|
11888
|
+
static getMetricInfo(metricName) {
|
|
11889
|
+
throw new Error("getMetricInfo() must be implemented by subclass");
|
|
11890
|
+
}
|
|
11891
|
+
};
|
|
11892
|
+
|
|
11893
|
+
// src/evaluation/eval-metrics.ts
|
|
11894
|
+
var PrebuiltMetrics = /* @__PURE__ */ ((PrebuiltMetrics2) => {
|
|
11895
|
+
PrebuiltMetrics2["TOOL_TRAJECTORY_AVG_SCORE"] = "tool_trajectory_avg_score";
|
|
11896
|
+
PrebuiltMetrics2["RESPONSE_EVALUATION_SCORE"] = "response_evaluation_score";
|
|
11897
|
+
PrebuiltMetrics2["RESPONSE_MATCH_SCORE"] = "response_match_score";
|
|
11898
|
+
PrebuiltMetrics2["SAFETY_V1"] = "safety_v1";
|
|
11899
|
+
PrebuiltMetrics2["FINAL_RESPONSE_MATCH_V2"] = "final_response_match_v2";
|
|
11900
|
+
PrebuiltMetrics2["TOOL_TRAJECTORY_SCORE"] = "tool_trajectory_score";
|
|
11901
|
+
PrebuiltMetrics2["SAFETY"] = "safety";
|
|
11902
|
+
PrebuiltMetrics2["RESPONSE_MATCH"] = "response_match";
|
|
11903
|
+
return PrebuiltMetrics2;
|
|
11904
|
+
})(PrebuiltMetrics || {});
|
|
11905
|
+
|
|
11906
|
+
// src/evaluation/eval-result.ts
|
|
11907
|
+
var EvalResult = class {
|
|
11908
|
+
|
|
11909
|
+
|
|
11910
|
+
|
|
11911
|
+
|
|
11912
|
+
|
|
11913
|
+
constructor(init) {
|
|
11914
|
+
this.evalSetResultId = init.evalSetResultId || "";
|
|
11915
|
+
this.evalSetResultName = init.evalSetResultName;
|
|
11916
|
+
this.evalSetId = init.evalSetId || "";
|
|
11917
|
+
this.evalCaseResults = init.evalCaseResults || [];
|
|
11918
|
+
this.creationTimestamp = init.creationTimestamp || Date.now() / 1e3;
|
|
11919
|
+
}
|
|
11920
|
+
};
|
|
11921
|
+
|
|
11922
|
+
// src/evaluation/agent-evaluator.ts
|
|
11923
|
+
|
|
11924
|
+
|
|
11925
|
+
|
|
11926
|
+
// src/evaluation/base-eval-service.ts
|
|
11927
|
+
var BaseEvalService = class {
|
|
11928
|
+
async *evaluateSession(session) {
|
|
11929
|
+
const inferenceResults = [];
|
|
11930
|
+
for await (const result of this.performInference({
|
|
11931
|
+
evalSetId: session.evalSetId,
|
|
11932
|
+
evalCases: session.evalCases
|
|
11933
|
+
})) {
|
|
11934
|
+
inferenceResults.push(result);
|
|
11935
|
+
}
|
|
11936
|
+
for await (const result of this.evaluate({
|
|
11937
|
+
inferenceResults,
|
|
11938
|
+
evaluateConfig: session.evaluateConfig
|
|
11939
|
+
})) {
|
|
11940
|
+
yield result;
|
|
11941
|
+
}
|
|
11942
|
+
}
|
|
11943
|
+
};
|
|
11944
|
+
|
|
11945
|
+
// src/evaluation/vertex-ai-eval-facade.ts
|
|
11946
|
+
var ERROR_MESSAGE_SUFFIX = `
|
|
11947
|
+
You should specify both project id and location. This metric uses Vertex Gen AI
|
|
11948
|
+
Eval SDK, and it requires google cloud credentials.
|
|
11949
|
+
|
|
11950
|
+
If using an .env file add the values there, or explicitly set in the code using
|
|
11951
|
+
the template below:
|
|
11952
|
+
|
|
11953
|
+
process.env.GOOGLE_CLOUD_LOCATION = <LOCATION>
|
|
11954
|
+
process.env.GOOGLE_CLOUD_PROJECT = <PROJECT ID>
|
|
11955
|
+
`;
|
|
11956
|
+
var VertexAiEvalFacade = class _VertexAiEvalFacade {
|
|
11957
|
+
|
|
11958
|
+
|
|
11959
|
+
constructor(config) {
|
|
11960
|
+
this.threshold = config.threshold;
|
|
11961
|
+
this.metricName = config.metricName;
|
|
11962
|
+
}
|
|
11963
|
+
async evaluateInvocations(actualInvocations, expectedInvocations) {
|
|
11964
|
+
let totalScore = 0;
|
|
11965
|
+
let numInvocations = 0;
|
|
11966
|
+
const perInvocationResults = [];
|
|
11967
|
+
for (let i = 0; i < actualInvocations.length; i++) {
|
|
11968
|
+
const actual = actualInvocations[i];
|
|
11969
|
+
const expected = expectedInvocations[i];
|
|
11970
|
+
const prompt = this._getText(expected.userContent);
|
|
11971
|
+
const reference = this._getText(expected.finalResponse);
|
|
11972
|
+
const response = this._getText(actual.finalResponse);
|
|
11973
|
+
const evalCase = {
|
|
11974
|
+
prompt,
|
|
11975
|
+
reference,
|
|
11976
|
+
response
|
|
11977
|
+
};
|
|
11978
|
+
try {
|
|
11979
|
+
const evalCaseResult = await _VertexAiEvalFacade._performEval(
|
|
11980
|
+
[evalCase],
|
|
11981
|
+
[this.metricName]
|
|
11982
|
+
);
|
|
11983
|
+
const score = this._getScore(evalCaseResult);
|
|
11984
|
+
perInvocationResults.push({
|
|
11985
|
+
actualInvocation: actual,
|
|
11986
|
+
expectedInvocation: expected,
|
|
11987
|
+
score,
|
|
11988
|
+
evalStatus: this._getEvalStatus(score)
|
|
11989
|
+
});
|
|
11990
|
+
if (score !== null && score !== void 0) {
|
|
11991
|
+
totalScore += score;
|
|
11992
|
+
numInvocations++;
|
|
11993
|
+
}
|
|
11994
|
+
} catch (error) {
|
|
11995
|
+
console.error("Error evaluating invocation:", error);
|
|
11996
|
+
perInvocationResults.push({
|
|
11997
|
+
actualInvocation: actual,
|
|
11998
|
+
expectedInvocation: expected,
|
|
11999
|
+
score: void 0,
|
|
12000
|
+
evalStatus: 3 /* NOT_EVALUATED */
|
|
12001
|
+
});
|
|
12002
|
+
}
|
|
12003
|
+
}
|
|
12004
|
+
if (perInvocationResults.length > 0) {
|
|
12005
|
+
const overallScore = numInvocations > 0 ? totalScore / numInvocations : void 0;
|
|
12006
|
+
return {
|
|
12007
|
+
overallScore,
|
|
12008
|
+
overallEvalStatus: this._getEvalStatus(overallScore),
|
|
12009
|
+
perInvocationResults
|
|
12010
|
+
};
|
|
12011
|
+
}
|
|
12012
|
+
return {
|
|
12013
|
+
overallScore: void 0,
|
|
12014
|
+
overallEvalStatus: 3 /* NOT_EVALUATED */,
|
|
12015
|
+
perInvocationResults: []
|
|
12016
|
+
};
|
|
12017
|
+
}
|
|
12018
|
+
_getText(content) {
|
|
12019
|
+
if (_optionalChain([content, 'optionalAccess', _302 => _302.parts])) {
|
|
12020
|
+
return content.parts.map((p) => p.text || "").filter((text) => text.length > 0).join("\n");
|
|
12021
|
+
}
|
|
12022
|
+
return "";
|
|
12023
|
+
}
|
|
12024
|
+
_getScore(evalResult) {
|
|
12025
|
+
if (_optionalChain([evalResult, 'optionalAccess', _303 => _303.summaryMetrics, 'optionalAccess', _304 => _304[0], 'optionalAccess', _305 => _305.meanScore]) !== void 0 && typeof evalResult.summaryMetrics[0].meanScore === "number" && !Number.isNaN(evalResult.summaryMetrics[0].meanScore)) {
|
|
12026
|
+
return evalResult.summaryMetrics[0].meanScore;
|
|
12027
|
+
}
|
|
12028
|
+
return void 0;
|
|
12029
|
+
}
|
|
12030
|
+
_getEvalStatus(score) {
|
|
12031
|
+
if (score !== null && score !== void 0) {
|
|
12032
|
+
return score >= this.threshold ? 1 /* PASSED */ : 2 /* FAILED */;
|
|
12033
|
+
}
|
|
12034
|
+
return 3 /* NOT_EVALUATED */;
|
|
12035
|
+
}
|
|
12036
|
+
static async _performEval(dataset, metrics) {
|
|
12037
|
+
const projectId = process.env.GOOGLE_CLOUD_PROJECT;
|
|
12038
|
+
const location = process.env.GOOGLE_CLOUD_LOCATION;
|
|
12039
|
+
if (!projectId) {
|
|
12040
|
+
throw new Error(`Missing project id. ${ERROR_MESSAGE_SUFFIX}`);
|
|
12041
|
+
}
|
|
12042
|
+
if (!location) {
|
|
12043
|
+
throw new Error(`Missing location. ${ERROR_MESSAGE_SUFFIX}`);
|
|
12044
|
+
}
|
|
12045
|
+
console.warn(
|
|
12046
|
+
"Vertex AI evaluation is not fully implemented. Using mock response."
|
|
12047
|
+
);
|
|
12048
|
+
return {
|
|
12049
|
+
summaryMetrics: [
|
|
12050
|
+
{
|
|
12051
|
+
meanScore: Math.random() * 0.5 + 0.5
|
|
12052
|
+
}
|
|
12053
|
+
]
|
|
12054
|
+
};
|
|
12055
|
+
}
|
|
12056
|
+
};
|
|
12057
|
+
|
|
12058
|
+
// src/evaluation/response-evaluator.ts
|
|
12059
|
+
var ResponseEvaluator = class extends Evaluator {
|
|
12060
|
+
|
|
12061
|
+
|
|
12062
|
+
constructor(evalMetric) {
|
|
12063
|
+
super(evalMetric);
|
|
12064
|
+
if (evalMetric.metricName === "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */) {
|
|
12065
|
+
this.metricName = "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */;
|
|
12066
|
+
} else if (evalMetric.metricName === "response_match_score" /* RESPONSE_MATCH_SCORE */) {
|
|
12067
|
+
this.metricName = "response_match_score" /* RESPONSE_MATCH_SCORE */;
|
|
12068
|
+
} else {
|
|
12069
|
+
throw new Error(`Metric ${evalMetric.metricName} is not supported.`);
|
|
12070
|
+
}
|
|
12071
|
+
this.threshold = evalMetric.threshold;
|
|
12072
|
+
}
|
|
12073
|
+
static getMetricInfo(metricName) {
|
|
12074
|
+
if (metricName === "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */) {
|
|
12075
|
+
return {
|
|
12076
|
+
metricName: "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */,
|
|
12077
|
+
description: "This metric evaluates how coherent agent's response was. Value range of this metric is [1,5], with values closer to 5 more desirable.",
|
|
12078
|
+
metricValueInfo: {
|
|
12079
|
+
interval: {
|
|
12080
|
+
minValue: 1,
|
|
12081
|
+
maxValue: 5,
|
|
12082
|
+
openAtMin: false,
|
|
12083
|
+
openAtMax: false
|
|
12084
|
+
}
|
|
12085
|
+
}
|
|
12086
|
+
};
|
|
12087
|
+
}
|
|
12088
|
+
if (metricName === "response_match_score" /* RESPONSE_MATCH_SCORE */) {
|
|
12089
|
+
return {
|
|
12090
|
+
metricName: "response_match_score" /* RESPONSE_MATCH_SCORE */,
|
|
12091
|
+
description: "This metric evaluates if agent's final response matches a golden/expected final response using Rouge_1 metric. Value range for this metric is [0,1], with values closer to 1 more desirable.",
|
|
12092
|
+
metricValueInfo: {
|
|
12093
|
+
interval: {
|
|
12094
|
+
minValue: 0,
|
|
12095
|
+
maxValue: 1,
|
|
12096
|
+
openAtMin: false,
|
|
12097
|
+
openAtMax: false
|
|
12098
|
+
}
|
|
12099
|
+
}
|
|
12100
|
+
};
|
|
12101
|
+
}
|
|
12102
|
+
throw new Error(`Metric ${metricName} is not supported.`);
|
|
12103
|
+
}
|
|
12104
|
+
async evaluateInvocations(actualInvocations, expectedInvocations) {
|
|
12105
|
+
if (this.metricName === "response_match_score" /* RESPONSE_MATCH_SCORE */) {
|
|
12106
|
+
return this.evaluateRougeScore(actualInvocations, expectedInvocations);
|
|
12107
|
+
}
|
|
12108
|
+
const vertexAiFacade = new VertexAiEvalFacade({
|
|
12109
|
+
threshold: this.threshold,
|
|
12110
|
+
metricName: this.metricName
|
|
12111
|
+
});
|
|
12112
|
+
return vertexAiFacade.evaluateInvocations(
|
|
12113
|
+
actualInvocations,
|
|
12114
|
+
expectedInvocations
|
|
12115
|
+
);
|
|
12116
|
+
}
|
|
12117
|
+
async evaluateRougeScore(actualInvocations, expectedInvocations) {
|
|
12118
|
+
if (actualInvocations.length !== expectedInvocations.length) {
|
|
12119
|
+
throw new Error("Number of actual and expected invocations must match");
|
|
12120
|
+
}
|
|
12121
|
+
const results = [];
|
|
12122
|
+
for (let i = 0; i < actualInvocations.length; i++) {
|
|
12123
|
+
const actual = actualInvocations[i];
|
|
12124
|
+
const expected = expectedInvocations[i];
|
|
12125
|
+
const result = await this.evaluateInvocation(actual, expected);
|
|
12126
|
+
results.push(result);
|
|
12127
|
+
}
|
|
12128
|
+
const scores = results.map((r) => r.score).filter((s) => s !== void 0);
|
|
12129
|
+
const overallScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : void 0;
|
|
12130
|
+
const overallStatus = overallScore !== void 0 && overallScore >= this.threshold ? 1 /* PASSED */ : 2 /* FAILED */;
|
|
12131
|
+
return {
|
|
12132
|
+
overallScore,
|
|
12133
|
+
overallEvalStatus: overallStatus,
|
|
12134
|
+
perInvocationResults: results
|
|
12135
|
+
};
|
|
12136
|
+
}
|
|
12137
|
+
async evaluateInvocation(actual, expected) {
|
|
12138
|
+
if (!actual.finalResponse || !expected.finalResponse) {
|
|
12139
|
+
return {
|
|
12140
|
+
actualInvocation: actual,
|
|
12141
|
+
expectedInvocation: expected,
|
|
12142
|
+
evalStatus: 3 /* NOT_EVALUATED */
|
|
12143
|
+
};
|
|
12144
|
+
}
|
|
12145
|
+
const score = await this.computeRougeScore(
|
|
12146
|
+
actual.finalResponse,
|
|
12147
|
+
expected.finalResponse
|
|
12148
|
+
);
|
|
12149
|
+
return {
|
|
12150
|
+
actualInvocation: actual,
|
|
12151
|
+
expectedInvocation: expected,
|
|
12152
|
+
score,
|
|
12153
|
+
evalStatus: score >= this.threshold ? 1 /* PASSED */ : 2 /* FAILED */
|
|
12154
|
+
};
|
|
12155
|
+
}
|
|
12156
|
+
async computeRougeScore(actual, expected) {
|
|
12157
|
+
const actualText = this.extractText(actual);
|
|
12158
|
+
const expectedText = this.extractText(expected);
|
|
12159
|
+
if (!actualText.trim() || !expectedText.trim()) {
|
|
12160
|
+
return 0;
|
|
12161
|
+
}
|
|
12162
|
+
const actualTokens = this.tokenizeText(actualText);
|
|
12163
|
+
const expectedTokens = this.tokenizeText(expectedText);
|
|
12164
|
+
const actualUnigrams = new Set(actualTokens);
|
|
12165
|
+
const expectedUnigrams = new Set(expectedTokens);
|
|
12166
|
+
const commonUnigrams = new Set(
|
|
12167
|
+
[...actualUnigrams].filter((token) => expectedUnigrams.has(token))
|
|
12168
|
+
);
|
|
12169
|
+
const precision = actualUnigrams.size > 0 ? commonUnigrams.size / actualUnigrams.size : 0;
|
|
12170
|
+
const recall = expectedUnigrams.size > 0 ? commonUnigrams.size / expectedUnigrams.size : 0;
|
|
12171
|
+
const fmeasure = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
|
|
12172
|
+
return fmeasure;
|
|
12173
|
+
}
|
|
12174
|
+
extractText(content) {
|
|
12175
|
+
if (_optionalChain([content, 'optionalAccess', _306 => _306.parts])) {
|
|
12176
|
+
return content.parts.map((p) => p.text || "").filter((text) => text.length > 0).join(" ");
|
|
12177
|
+
}
|
|
12178
|
+
return "";
|
|
12179
|
+
}
|
|
12180
|
+
tokenizeText(text) {
|
|
12181
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((token) => token.length > 0);
|
|
12182
|
+
}
|
|
12183
|
+
};
|
|
12184
|
+
|
|
12185
|
+
// src/evaluation/trajectory-evaluator.ts
|
|
12186
|
+
var TrajectoryEvaluator = class extends Evaluator {
|
|
12187
|
+
static getMetricInfo() {
|
|
12188
|
+
return {
|
|
12189
|
+
metricName: "tool_trajectory_avg_score" /* TOOL_TRAJECTORY_AVG_SCORE */,
|
|
12190
|
+
description: "This metric compares two tool call trajectories (expected vs. actual) for the same user interaction. It performs an exact match on the tool name and arguments for each step in the trajectory. A score of 1.0 indicates a perfect match, while 0.0 indicates a mismatch. Higher values are better.",
|
|
12191
|
+
metricValueInfo: {
|
|
12192
|
+
interval: {
|
|
12193
|
+
minValue: 0,
|
|
12194
|
+
maxValue: 1,
|
|
12195
|
+
openAtMin: false,
|
|
12196
|
+
openAtMax: false
|
|
12197
|
+
}
|
|
12198
|
+
}
|
|
12199
|
+
};
|
|
12200
|
+
}
|
|
12201
|
+
async evaluateInvocations(actualInvocations, expectedInvocations) {
|
|
12202
|
+
let totalToolUseAccuracy = 0;
|
|
12203
|
+
let numInvocations = 0;
|
|
12204
|
+
const perInvocationResults = [];
|
|
12205
|
+
for (let i = 0; i < actualInvocations.length; i++) {
|
|
12206
|
+
const actual = actualInvocations[i];
|
|
12207
|
+
const expected = expectedInvocations[i];
|
|
12208
|
+
if (!_optionalChain([actual, 'access', _307 => _307.intermediateData, 'optionalAccess', _308 => _308.toolUses]) || !_optionalChain([expected, 'access', _309 => _309.intermediateData, 'optionalAccess', _310 => _310.toolUses])) {
|
|
12209
|
+
perInvocationResults.push({
|
|
12210
|
+
actualInvocation: actual,
|
|
12211
|
+
expectedInvocation: expected,
|
|
12212
|
+
evalStatus: 3 /* NOT_EVALUATED */
|
|
12213
|
+
});
|
|
12214
|
+
continue;
|
|
12215
|
+
}
|
|
12216
|
+
const toolUseAccuracy = this.areToolCallsEqual(
|
|
12217
|
+
actual.intermediateData.toolUses,
|
|
12218
|
+
expected.intermediateData.toolUses
|
|
12219
|
+
) ? 1 : 0;
|
|
12220
|
+
perInvocationResults.push({
|
|
12221
|
+
actualInvocation: actual,
|
|
12222
|
+
expectedInvocation: expected,
|
|
12223
|
+
score: toolUseAccuracy,
|
|
12224
|
+
evalStatus: toolUseAccuracy >= this.metric.threshold ? 1 /* PASSED */ : 2 /* FAILED */
|
|
12225
|
+
});
|
|
12226
|
+
totalToolUseAccuracy += toolUseAccuracy;
|
|
12227
|
+
numInvocations++;
|
|
12228
|
+
}
|
|
12229
|
+
const overallScore = numInvocations > 0 ? totalToolUseAccuracy / numInvocations : 0;
|
|
12230
|
+
return {
|
|
12231
|
+
overallScore,
|
|
12232
|
+
overallEvalStatus: overallScore >= this.metric.threshold ? 1 /* PASSED */ : 2 /* FAILED */,
|
|
12233
|
+
perInvocationResults
|
|
12234
|
+
};
|
|
12235
|
+
}
|
|
12236
|
+
areToolCallsEqual(actual, expected) {
|
|
12237
|
+
if (actual.length !== expected.length) {
|
|
12238
|
+
return false;
|
|
12239
|
+
}
|
|
12240
|
+
return actual.every((actualCall, index) => {
|
|
12241
|
+
const expectedCall = expected[index];
|
|
12242
|
+
return this.isToolCallEqual(actualCall, expectedCall);
|
|
12243
|
+
});
|
|
12244
|
+
}
|
|
12245
|
+
isToolCallEqual(actual, expected) {
|
|
12246
|
+
if (actual.name !== expected.name) {
|
|
12247
|
+
return false;
|
|
12248
|
+
}
|
|
12249
|
+
const actualArgs = actual.args || {};
|
|
12250
|
+
const expectedArgs = expected.args || {};
|
|
12251
|
+
const actualKeys = Object.keys(actualArgs).sort();
|
|
12252
|
+
const expectedKeys = Object.keys(expectedArgs).sort();
|
|
12253
|
+
if (actualKeys.length !== expectedKeys.length) {
|
|
12254
|
+
return false;
|
|
12255
|
+
}
|
|
12256
|
+
return actualKeys.every((key, index) => {
|
|
12257
|
+
const expectedKey = expectedKeys[index];
|
|
12258
|
+
if (key !== expectedKey) {
|
|
12259
|
+
return false;
|
|
12260
|
+
}
|
|
12261
|
+
return JSON.stringify(actualArgs[key]) === JSON.stringify(expectedArgs[key]);
|
|
12262
|
+
});
|
|
12263
|
+
}
|
|
12264
|
+
};
|
|
12265
|
+
|
|
12266
|
+
// src/evaluation/safety-evaluator.ts
|
|
12267
|
+
var SafetyEvaluatorV1 = class extends Evaluator {
|
|
12268
|
+
static getMetricInfo() {
|
|
12269
|
+
return {
|
|
12270
|
+
metricName: "safety_v1" /* SAFETY_V1 */,
|
|
12271
|
+
description: "This metric evaluates the safety (harmlessness) of an Agent's Response. Value range of the metric is [0, 1], with values closer to 1 to be more desirable (safe).",
|
|
12272
|
+
metricValueInfo: {
|
|
12273
|
+
interval: {
|
|
12274
|
+
minValue: 0,
|
|
12275
|
+
maxValue: 1,
|
|
12276
|
+
openAtMin: false,
|
|
12277
|
+
openAtMax: false
|
|
12278
|
+
}
|
|
12279
|
+
}
|
|
12280
|
+
};
|
|
12281
|
+
}
|
|
12282
|
+
async evaluateInvocations(actualInvocations, expectedInvocations) {
|
|
12283
|
+
const facade = new VertexAiEvalFacade({
|
|
12284
|
+
threshold: this.metric.threshold,
|
|
12285
|
+
metricName: "safety_v1" /* SAFETY_V1 */
|
|
12286
|
+
});
|
|
12287
|
+
return await facade.evaluateInvocations(
|
|
12288
|
+
actualInvocations,
|
|
12289
|
+
expectedInvocations
|
|
12290
|
+
);
|
|
12291
|
+
}
|
|
12292
|
+
};
|
|
12293
|
+
|
|
12294
|
+
// src/evaluation/llm-as-judge-utils.ts
|
|
12295
|
+
function getTextFromContent(content) {
|
|
12296
|
+
if (_optionalChain([content, 'optionalAccess', _311 => _311.parts])) {
|
|
12297
|
+
return content.parts.map((part) => part.text).filter(Boolean).join("\n");
|
|
12298
|
+
}
|
|
12299
|
+
return "";
|
|
12300
|
+
}
|
|
12301
|
+
function getEvalStatus(score, threshold) {
|
|
12302
|
+
return score >= threshold ? 1 /* PASSED */ : 2 /* FAILED */;
|
|
12303
|
+
}
|
|
12304
|
+
|
|
12305
|
+
// src/evaluation/llm-as-judge.ts
|
|
12306
|
+
var LlmAsJudge = class {
|
|
12307
|
+
async sampleJudge(prompt, numSamples, critiqueParser, judgeModelOptions) {
|
|
12308
|
+
const modelName = _optionalChain([judgeModelOptions, 'optionalAccess', _312 => _312.judgeModel]) || "gemini-2.5-flash";
|
|
12309
|
+
const model = LLMRegistry.getModelOrCreate(modelName);
|
|
12310
|
+
const config = _optionalChain([judgeModelOptions, 'optionalAccess', _313 => _313.judgeModelConfig]) || {};
|
|
12311
|
+
const samples = [];
|
|
12312
|
+
for (let i = 0; i < numSamples; i++) {
|
|
12313
|
+
try {
|
|
12314
|
+
const response = await model.generateContent({
|
|
12315
|
+
prompt,
|
|
12316
|
+
...config
|
|
12317
|
+
});
|
|
12318
|
+
const label = critiqueParser(response.text);
|
|
12319
|
+
if (label !== "not_found" /* NOT_FOUND */) {
|
|
12320
|
+
samples.push(label);
|
|
12321
|
+
}
|
|
12322
|
+
} catch (error) {
|
|
12323
|
+
console.error("Error sampling judge model:", error);
|
|
12324
|
+
}
|
|
12325
|
+
}
|
|
12326
|
+
return samples;
|
|
12327
|
+
}
|
|
12328
|
+
};
|
|
12329
|
+
|
|
12330
|
+
// src/evaluation/final-response-match-v2.ts
|
|
12331
|
+
var FINAL_RESPONSE_MATCH_V2_PROMPT = `You are an expert rater for an AI agent. The AI agent is going to call an API to answer the user query and generate API tool use code based for the choice of the API and API arguments. The ideal model response should be a function call that fulfills user query, or a natural language response hedges or asks users for further clarification if a function call does not apply.
|
|
12332
|
+
The primary focus of this rating task is to check correctness of the model responses.
|
|
12333
|
+
|
|
12334
|
+
The data consists of:
|
|
12335
|
+
- A user query.
|
|
12336
|
+
- A model generated response for the prompt. The responses can consist of:
|
|
12337
|
+
- Natural language, when the model is asking for clarification, or tells the user it does not possess the requested functionality / option.
|
|
12338
|
+
- Code, in the form of one or multiple python function calls, and additional code as needed, for when the model is fulfilling the user request.
|
|
12339
|
+
You can use the help from a reference response annotated by a human rater. This reference response is of high quality. You can compare the agent's response with the reference response and decide if the agent's response is valid.
|
|
12340
|
+
Note sometimes the reference response only contains the key entities of the correct answer and you need to be flexible to allow the agent response to contain more information than the reference response, or to present the key entities in a different format or structure or in shorter or longer format.
|
|
12341
|
+
When the agent response is provided in the form of tables/dataframes or should be best provided in the form of tables/dataframes: focus on the key entities and main components requested in the user query and check whether you can retrieve those from the agent response. Likewise, if you have the reference response, then find out the key entities and main components in them and check whether you can retrieve those from the agent response. If the prompt does not specify any format instructions and the main items/components are included in the response then tolerate the differences in the formatting of those tables/dataframes.
|
|
12342
|
+
|
|
12343
|
+
You should follow the constitutions below very carefully to rate the model response:
|
|
12344
|
+
- Allow flexibility of format even when reference code only uses one of the possible format, unless API spec or user prompt has explicit format requirement
|
|
12345
|
+
- e.g. For state name, allow both abbreviation and full name unless API spec has explicit requirement. e.g. both 'tx' and 'Texas' should be allowed in the agent response even when reference code only uses one of them.
|
|
12346
|
+
- e.g. If a reference response list outputs in a list format, the agent response is allowed to use sentence format and vice versa unless user prompt explicitly asks for a specific format.
|
|
12347
|
+
- e.g. For numbers, allow flexibility of formatting, e.g. 1000000 vs 1,000,000.
|
|
12348
|
+
- The model shouldn't assume that it doesn't have access to according data or incapable of answering the question if reference response is able to find a legit answer.
|
|
12349
|
+
- If the model response contains the correct final answer, rate it as valid even when the model response contains more information than the reference response.
|
|
12350
|
+
- If the user prompt has csv or other table format data, don't read it yourself. Trust the reference response final answer instead.
|
|
12351
|
+
- When the validation needs maths, date calculations, do not use your own calculator. Trust the reference response final answer instead.
|
|
12352
|
+
- Be mindful about unit of numbers. For example, if the reference response says 100 miles, but the model response says 100 km, it is invalid.
|
|
12353
|
+
- When the agent response or the reference response is provided in the form of tables/dataframes: focus on the key entities and main components requested in the user query and check whether you can retrieve those from the agent response and whether those match the reference response. If the user query does not specify any format instructions and the main items/components are included in the response then tolerate the differences in the formatting of those tables/dataframes.
|
|
12354
|
+
- When the answer is in numeric format, check whether there are any format requirements in the numeric format, rounding, precision, number of decimals, etc. specified in the user query and the prompt. If there are no such instructions, then tolerate different numerical formats.
|
|
12355
|
+
- When the answer is in numeric format and there are rounding or precision differences between the agent response and the reference response, if no further instructions are provided evaluate if the rounding strategy or precision in the agent response follows the standards for that entity. For instance, model accuracy scores must be reported with at least two decimal places (e.g., 0.798 \u2192 0.80 is acceptable, but 0.7 is not).
|
|
12356
|
+
|
|
12357
|
+
Below are the inputs:
|
|
12358
|
+
{{
|
|
12359
|
+
"User prompt": {prompt},
|
|
12360
|
+
"Agent response": {response},
|
|
12361
|
+
"Reference response": {golden_response},
|
|
12362
|
+
}}
|
|
12363
|
+
|
|
12364
|
+
The answer should be a json alone which follows the json structure below:
|
|
12365
|
+
{{
|
|
12366
|
+
"reasoning": [reasoning],
|
|
12367
|
+
"is_the_agent_response_valid": [valid or invalid],
|
|
12368
|
+
}}
|
|
12369
|
+
Answer with assertiveness:
|
|
12370
|
+
`;
|
|
12371
|
+
var DEFAULT_NUM_SAMPLES = 5;
|
|
12372
|
+
function parseCritique(response) {
|
|
12373
|
+
const labelMatchIsResponseValid = response.match(
|
|
12374
|
+
/"is_the_agent_response_valid":\s*\[*[\n\s]*"*([^"^\]^\s]*)"*[\n\s]*\]*\s*[,\n\}]/
|
|
12375
|
+
);
|
|
12376
|
+
if (_optionalChain([labelMatchIsResponseValid, 'optionalAccess', _314 => _314[1]])) {
|
|
12377
|
+
const label = labelMatchIsResponseValid[1].toLowerCase();
|
|
12378
|
+
return label === "valid" ? "valid" /* VALID */ : "invalid" /* INVALID */;
|
|
12379
|
+
}
|
|
12380
|
+
return "not_found" /* NOT_FOUND */;
|
|
12381
|
+
}
|
|
12382
|
+
var FinalResponseMatchV2Evaluator = class extends Evaluator {
|
|
12383
|
+
constructor(evalMetric, llmAsJudge = new LlmAsJudge()) {
|
|
12384
|
+
super(evalMetric);
|
|
12385
|
+
this.llmAsJudge = llmAsJudge;
|
|
12386
|
+
}
|
|
12387
|
+
static getMetricInfo() {
|
|
12388
|
+
return {
|
|
12389
|
+
metricName: "final_response_match_v2" /* FINAL_RESPONSE_MATCH_V2 */,
|
|
12390
|
+
description: "This metric evaluates if the agent's final response matches a golden/expected final response using an LLM judge. Value range for this metric is [0,1], with values closer to 1 more desirable.",
|
|
12391
|
+
metricValueInfo: {
|
|
12392
|
+
interval: {
|
|
12393
|
+
minValue: 0,
|
|
12394
|
+
maxValue: 1,
|
|
12395
|
+
openAtMin: false,
|
|
12396
|
+
openAtMax: false
|
|
12397
|
+
}
|
|
12398
|
+
}
|
|
12399
|
+
};
|
|
12400
|
+
}
|
|
12401
|
+
async evaluateInvocations(actualInvocations, expectedInvocations) {
|
|
12402
|
+
const perInvocationResults = [];
|
|
12403
|
+
let totalScore = 0;
|
|
12404
|
+
let numInvocations = 0;
|
|
12405
|
+
if (!actualInvocations.length) {
|
|
12406
|
+
return {
|
|
12407
|
+
overallEvalStatus: 3 /* NOT_EVALUATED */,
|
|
12408
|
+
perInvocationResults: []
|
|
12409
|
+
};
|
|
12410
|
+
}
|
|
12411
|
+
for (let i = 0; i < actualInvocations.length; i++) {
|
|
12412
|
+
const actual = actualInvocations[i];
|
|
12413
|
+
const expected = expectedInvocations[i];
|
|
12414
|
+
const prompt = getTextFromContent(expected.userContent);
|
|
12415
|
+
const response = getTextFromContent(actual.finalResponse);
|
|
12416
|
+
const goldenResponse = getTextFromContent(expected.finalResponse);
|
|
12417
|
+
const formattedPrompt = FINAL_RESPONSE_MATCH_V2_PROMPT.replace(
|
|
12418
|
+
"{prompt}",
|
|
12419
|
+
prompt
|
|
12420
|
+
).replace("{response}", response).replace("{golden_response}", goldenResponse);
|
|
12421
|
+
const numSamples = _nullishCoalesce(_optionalChain([this, 'access', _315 => _315.metric, 'access', _316 => _316.judgeModelOptions, 'optionalAccess', _317 => _317.numSamples]), () => ( DEFAULT_NUM_SAMPLES));
|
|
12422
|
+
const labels = await this.llmAsJudge.sampleJudge(
|
|
12423
|
+
formattedPrompt,
|
|
12424
|
+
numSamples,
|
|
12425
|
+
parseCritique,
|
|
12426
|
+
this.metric.judgeModelOptions
|
|
12427
|
+
);
|
|
12428
|
+
const score = labels.filter((l) => l === "valid" /* VALID */).length / labels.length;
|
|
12429
|
+
perInvocationResults.push({
|
|
12430
|
+
actualInvocation: actual,
|
|
12431
|
+
expectedInvocation: expected,
|
|
12432
|
+
score,
|
|
12433
|
+
evalStatus: getEvalStatus(score, this.metric.threshold)
|
|
12434
|
+
});
|
|
12435
|
+
totalScore += score;
|
|
12436
|
+
numInvocations++;
|
|
12437
|
+
}
|
|
12438
|
+
const overallScore = totalScore / numInvocations;
|
|
12439
|
+
return {
|
|
12440
|
+
overallScore,
|
|
12441
|
+
overallEvalStatus: getEvalStatus(overallScore, this.metric.threshold),
|
|
12442
|
+
perInvocationResults
|
|
12443
|
+
};
|
|
12444
|
+
}
|
|
12445
|
+
};
|
|
12446
|
+
|
|
12447
|
+
// src/evaluation/metric-evaluator-registry.ts
|
|
12448
|
+
var MetricEvaluatorRegistry = (_class35 = class {constructor() { _class35.prototype.__init61.call(this); }
|
|
12449
|
+
__init61() {this.registry = /* @__PURE__ */ new Map()}
|
|
12450
|
+
getEvaluator(evalMetric) {
|
|
12451
|
+
const entry = this.registry.get(evalMetric.metricName);
|
|
12452
|
+
if (!entry) {
|
|
12453
|
+
throw new Error(`${evalMetric.metricName} not found in registry.`);
|
|
12454
|
+
}
|
|
12455
|
+
return new entry.evaluator(evalMetric);
|
|
12456
|
+
}
|
|
12457
|
+
registerEvaluator(metricInfo, evaluator) {
|
|
12458
|
+
const metricName = metricInfo.metricName;
|
|
12459
|
+
if (this.registry.has(metricName)) {
|
|
12460
|
+
console.info(
|
|
12461
|
+
`Updating Evaluator class for ${metricName} from ${_optionalChain([this, 'access', _318 => _318.registry, 'access', _319 => _319.get, 'call', _320 => _320(metricName), 'optionalAccess', _321 => _321.evaluator, 'access', _322 => _322.name])} to ${evaluator.name}`
|
|
12462
|
+
);
|
|
12463
|
+
}
|
|
12464
|
+
this.registry.set(metricName, {
|
|
12465
|
+
evaluator,
|
|
12466
|
+
metricInfo: { ...metricInfo }
|
|
12467
|
+
});
|
|
12468
|
+
}
|
|
12469
|
+
getRegisteredMetrics() {
|
|
12470
|
+
return Array.from(this.registry.values()).map((entry) => ({
|
|
12471
|
+
...entry.metricInfo
|
|
12472
|
+
}));
|
|
12473
|
+
}
|
|
12474
|
+
}, _class35);
|
|
12475
|
+
function getDefaultMetricEvaluatorRegistry() {
|
|
12476
|
+
const registry = new MetricEvaluatorRegistry();
|
|
12477
|
+
registry.registerEvaluator(
|
|
12478
|
+
TrajectoryEvaluator.getMetricInfo(),
|
|
12479
|
+
TrajectoryEvaluator
|
|
12480
|
+
);
|
|
12481
|
+
registry.registerEvaluator(
|
|
12482
|
+
ResponseEvaluator.getMetricInfo("response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */),
|
|
12483
|
+
ResponseEvaluator
|
|
12484
|
+
);
|
|
12485
|
+
registry.registerEvaluator(
|
|
12486
|
+
ResponseEvaluator.getMetricInfo("response_match_score" /* RESPONSE_MATCH_SCORE */),
|
|
12487
|
+
ResponseEvaluator
|
|
12488
|
+
);
|
|
12489
|
+
registry.registerEvaluator(
|
|
12490
|
+
SafetyEvaluatorV1.getMetricInfo(),
|
|
12491
|
+
SafetyEvaluatorV1
|
|
12492
|
+
);
|
|
12493
|
+
registry.registerEvaluator(
|
|
12494
|
+
FinalResponseMatchV2Evaluator.getMetricInfo(),
|
|
12495
|
+
FinalResponseMatchV2Evaluator
|
|
12496
|
+
);
|
|
12497
|
+
return registry;
|
|
12498
|
+
}
|
|
12499
|
+
var DEFAULT_METRIC_EVALUATOR_REGISTRY = getDefaultMetricEvaluatorRegistry();
|
|
12500
|
+
|
|
12501
|
+
// src/evaluation/local-eval-service.ts
|
|
12502
|
+
var LocalEvalService = class extends BaseEvalService {
|
|
12503
|
+
constructor(agent, parallelism = 4) {
|
|
12504
|
+
super();
|
|
12505
|
+
this.agent = agent;
|
|
12506
|
+
this.parallelism = parallelism;
|
|
12507
|
+
this.initializeRunner();
|
|
12508
|
+
}
|
|
12509
|
+
|
|
12510
|
+
async initializeRunner() {
|
|
12511
|
+
if ("ask" in this.agent) {
|
|
12512
|
+
this.runner = this.agent;
|
|
12513
|
+
} else {
|
|
12514
|
+
try {
|
|
12515
|
+
const { runner } = await AgentBuilder.create("eval_agent").withModel("gemini-2.5-flash").withDescription("Agent for evaluation purposes").build();
|
|
12516
|
+
this.runner = {
|
|
12517
|
+
ask: async (message) => {
|
|
12518
|
+
return await runner.ask(message);
|
|
12519
|
+
}
|
|
12520
|
+
};
|
|
12521
|
+
} catch (error) {
|
|
12522
|
+
console.warn(
|
|
12523
|
+
"Failed to create AgentBuilder runner, falling back to mock:",
|
|
12524
|
+
error
|
|
12525
|
+
);
|
|
12526
|
+
this.runner = {
|
|
12527
|
+
ask: async (message) => {
|
|
12528
|
+
return `Mock response to: ${message}`;
|
|
12529
|
+
}
|
|
12530
|
+
};
|
|
12531
|
+
}
|
|
12532
|
+
}
|
|
12533
|
+
}
|
|
12534
|
+
async *performInference(request) {
|
|
12535
|
+
for (const evalSet of request.evalCases) {
|
|
12536
|
+
for (const evalCase of evalSet.evalCases) {
|
|
12537
|
+
const expected = [];
|
|
12538
|
+
for (const convo of evalCase.conversation) {
|
|
12539
|
+
if (convo.finalResponse) {
|
|
12540
|
+
expected.push({
|
|
12541
|
+
invocationId: `${evalCase.evalId}-expected-${expected.length}`,
|
|
12542
|
+
userContent: convo.userContent,
|
|
12543
|
+
finalResponse: convo.finalResponse,
|
|
12544
|
+
intermediateData: convo.intermediateData,
|
|
12545
|
+
creationTimestamp: convo.creationTimestamp
|
|
12546
|
+
});
|
|
12547
|
+
}
|
|
12548
|
+
}
|
|
12549
|
+
const actual = await this.runInference(evalCase);
|
|
12550
|
+
yield [...expected, ...actual];
|
|
12551
|
+
}
|
|
12552
|
+
}
|
|
12553
|
+
}
|
|
12554
|
+
async *evaluate(request) {
|
|
12555
|
+
const { inferenceResults, evaluateConfig } = request;
|
|
12556
|
+
const resultsByCase = /* @__PURE__ */ new Map();
|
|
12557
|
+
for (const result of inferenceResults) {
|
|
12558
|
+
const invocationId = result[0].invocationId;
|
|
12559
|
+
if (!invocationId) continue;
|
|
12560
|
+
const lastHyphenIndex = invocationId.lastIndexOf("-");
|
|
12561
|
+
const evalId = lastHyphenIndex !== -1 ? invocationId.substring(0, lastHyphenIndex) : invocationId;
|
|
12562
|
+
const existing = resultsByCase.get(evalId) || [];
|
|
12563
|
+
resultsByCase.set(evalId, [...existing, ...result]);
|
|
12564
|
+
}
|
|
12565
|
+
for (const [evalId, results] of resultsByCase) {
|
|
12566
|
+
const evalResult = {
|
|
12567
|
+
evalSetResultId: `${evalId}-result-${Date.now()}`,
|
|
12568
|
+
evalSetId: evalId,
|
|
12569
|
+
evalCaseResults: [],
|
|
12570
|
+
creationTimestamp: Date.now()
|
|
12571
|
+
};
|
|
12572
|
+
for (const evalMetric of evaluateConfig.evalMetrics) {
|
|
12573
|
+
const evaluator = DEFAULT_METRIC_EVALUATOR_REGISTRY.getEvaluator(evalMetric);
|
|
12574
|
+
const actual = results.filter(
|
|
12575
|
+
(r) => !_optionalChain([r, 'access', _323 => _323.invocationId, 'optionalAccess', _324 => _324.includes, 'call', _325 => _325("expected")])
|
|
12576
|
+
);
|
|
12577
|
+
const expected = results.filter(
|
|
12578
|
+
(r) => _optionalChain([r, 'access', _326 => _326.invocationId, 'optionalAccess', _327 => _327.includes, 'call', _328 => _328("expected")])
|
|
12579
|
+
);
|
|
12580
|
+
const result = await evaluator.evaluateInvocations(actual, expected);
|
|
12581
|
+
evalResult.evalCaseResults.push({
|
|
12582
|
+
evalSetId: evalId,
|
|
12583
|
+
evalId,
|
|
12584
|
+
finalEvalStatus: result.perInvocationResults.length > 0 ? result.perInvocationResults[0].evalStatus : 3 /* NOT_EVALUATED */,
|
|
12585
|
+
overallEvalMetricResults: [],
|
|
12586
|
+
sessionId: evalId,
|
|
12587
|
+
evalMetricResultPerInvocation: result.perInvocationResults.map(
|
|
12588
|
+
(r) => ({
|
|
12589
|
+
actualInvocation: r.actualInvocation,
|
|
12590
|
+
expectedInvocation: r.expectedInvocation,
|
|
12591
|
+
evalMetricResults: [
|
|
12592
|
+
{
|
|
12593
|
+
metricName: evalMetric.metricName,
|
|
12594
|
+
threshold: evalMetric.threshold,
|
|
12595
|
+
score: r.score,
|
|
12596
|
+
evalStatus: r.evalStatus
|
|
12597
|
+
}
|
|
12598
|
+
]
|
|
12599
|
+
})
|
|
12600
|
+
)
|
|
12601
|
+
});
|
|
12602
|
+
}
|
|
12603
|
+
yield evalResult;
|
|
12604
|
+
}
|
|
12605
|
+
}
|
|
12606
|
+
async runInference(evalCase) {
|
|
12607
|
+
const results = [];
|
|
12608
|
+
if (!this.runner) {
|
|
12609
|
+
await this.initializeRunner();
|
|
12610
|
+
}
|
|
12611
|
+
if (evalCase.sessionInput) {
|
|
12612
|
+
try {
|
|
12613
|
+
if (this.runner.initializeSession) {
|
|
12614
|
+
await this.runner.initializeSession(evalCase.sessionInput);
|
|
12615
|
+
} else if (this.runner.setSessionState) {
|
|
12616
|
+
await this.runner.setSessionState(evalCase.sessionInput);
|
|
12617
|
+
} else {
|
|
12618
|
+
console.log(
|
|
12619
|
+
`Session input provided for ${evalCase.evalId}:`,
|
|
12620
|
+
evalCase.sessionInput
|
|
12621
|
+
);
|
|
12622
|
+
}
|
|
12623
|
+
} catch (error) {
|
|
12624
|
+
console.warn(
|
|
12625
|
+
`Failed to initialize session for ${evalCase.evalId}:`,
|
|
12626
|
+
error
|
|
12627
|
+
);
|
|
12628
|
+
}
|
|
12629
|
+
}
|
|
12630
|
+
for (const invocation of evalCase.conversation) {
|
|
12631
|
+
try {
|
|
12632
|
+
const response = await this.runner.ask(invocation.userContent);
|
|
12633
|
+
results.push({
|
|
12634
|
+
invocationId: `${evalCase.evalId}-${results.length}`,
|
|
12635
|
+
userContent: invocation.userContent,
|
|
12636
|
+
finalResponse: {
|
|
12637
|
+
role: "model",
|
|
12638
|
+
parts: [{ text: response || "" }]
|
|
12639
|
+
},
|
|
12640
|
+
intermediateData: {
|
|
12641
|
+
toolUses: [],
|
|
12642
|
+
intermediateResponses: []
|
|
12643
|
+
},
|
|
12644
|
+
creationTimestamp: Date.now()
|
|
12645
|
+
});
|
|
12646
|
+
} catch (error) {
|
|
12647
|
+
console.error(`Error running inference for ${evalCase.evalId}:`, error);
|
|
12648
|
+
results.push({
|
|
12649
|
+
invocationId: `${evalCase.evalId}-${results.length}`,
|
|
12650
|
+
userContent: invocation.userContent,
|
|
12651
|
+
finalResponse: {
|
|
12652
|
+
role: "model",
|
|
12653
|
+
parts: [
|
|
12654
|
+
{
|
|
12655
|
+
text: `Error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
12656
|
+
}
|
|
12657
|
+
]
|
|
12658
|
+
},
|
|
12659
|
+
intermediateData: {
|
|
12660
|
+
toolUses: [],
|
|
12661
|
+
intermediateResponses: []
|
|
12662
|
+
},
|
|
12663
|
+
creationTimestamp: Date.now()
|
|
12664
|
+
});
|
|
12665
|
+
}
|
|
12666
|
+
}
|
|
12667
|
+
return results;
|
|
12668
|
+
}
|
|
12669
|
+
};
|
|
12670
|
+
|
|
12671
|
+
// src/evaluation/agent-evaluator.ts
|
|
12672
|
+
var NUM_RUNS = 2;
|
|
12673
|
+
var TOOL_TRAJECTORY_SCORE_KEY = "tool_trajectory_avg_score" /* TOOL_TRAJECTORY_AVG_SCORE */;
|
|
12674
|
+
var RESPONSE_EVALUATION_SCORE_KEY = "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */;
|
|
12675
|
+
var RESPONSE_MATCH_SCORE_KEY = "response_match_score" /* RESPONSE_MATCH_SCORE */;
|
|
12676
|
+
var SAFETY_V1_KEY = "safety_v1" /* SAFETY_V1 */;
|
|
12677
|
+
var ALLOWED_CRITERIA = [
|
|
12678
|
+
TOOL_TRAJECTORY_SCORE_KEY,
|
|
12679
|
+
RESPONSE_EVALUATION_SCORE_KEY,
|
|
12680
|
+
RESPONSE_MATCH_SCORE_KEY,
|
|
12681
|
+
SAFETY_V1_KEY
|
|
12682
|
+
];
|
|
12683
|
+
var QUERY_COLUMN = "query";
|
|
12684
|
+
var REFERENCE_COLUMN = "reference";
|
|
12685
|
+
var EXPECTED_TOOL_USE_COLUMN = "expected_tool_use";
|
|
12686
|
+
var DEFAULT_CRITERIA = {
|
|
12687
|
+
[TOOL_TRAJECTORY_SCORE_KEY]: 1,
|
|
12688
|
+
[RESPONSE_MATCH_SCORE_KEY]: 0.8
|
|
12689
|
+
};
|
|
12690
|
+
var loadJson = async (filePath) => {
|
|
12691
|
+
try {
|
|
12692
|
+
const fileContent = await fs2.readFile(filePath, "utf-8");
|
|
12693
|
+
return JSON.parse(fileContent);
|
|
12694
|
+
} catch (error) {
|
|
12695
|
+
throw new Error(`Failed to load JSON from ${filePath}: ${error}`);
|
|
12696
|
+
}
|
|
12697
|
+
};
|
|
12698
|
+
var AgentEvaluator = class _AgentEvaluator {
|
|
12699
|
+
static async findConfigForTestFile(testFile) {
|
|
12700
|
+
const testFolder = path2.dirname(testFile);
|
|
12701
|
+
const configPath = path2.join(testFolder, "test_config.json");
|
|
12702
|
+
try {
|
|
12703
|
+
await fs2.access(configPath);
|
|
12704
|
+
const configData = await loadJson(configPath);
|
|
12705
|
+
if ("criteria" in configData && typeof configData.criteria === "object") {
|
|
12706
|
+
return configData.criteria;
|
|
12707
|
+
}
|
|
12708
|
+
throw new Error(
|
|
12709
|
+
`Invalid format for test_config.json at ${configPath}. Expected a 'criteria' dictionary.`
|
|
12710
|
+
);
|
|
12711
|
+
} catch (error) {
|
|
12712
|
+
return DEFAULT_CRITERIA;
|
|
12713
|
+
}
|
|
12714
|
+
}
|
|
12715
|
+
static async evaluateEvalSet(agent, evalSet, criteria, numRuns = NUM_RUNS, printDetailedResults = false) {
|
|
12716
|
+
const evalMetrics = Object.entries(criteria).map(
|
|
12717
|
+
([metricName, threshold]) => ({
|
|
12718
|
+
metricName,
|
|
12719
|
+
threshold
|
|
12720
|
+
})
|
|
12721
|
+
);
|
|
12722
|
+
const evalResultsByEvalId = await _AgentEvaluator._getEvalResultsByEvalId(
|
|
12723
|
+
agent,
|
|
12724
|
+
evalSet,
|
|
12725
|
+
evalMetrics,
|
|
12726
|
+
numRuns
|
|
12727
|
+
);
|
|
12728
|
+
const failures = [];
|
|
12729
|
+
for (const [_, evalResultsPerEvalId] of evalResultsByEvalId) {
|
|
12730
|
+
const evalMetricResults = _AgentEvaluator._getEvalMetricResultsWithInvocation(
|
|
12731
|
+
evalResultsPerEvalId
|
|
12732
|
+
);
|
|
12733
|
+
const failuresPerEvalCase = _AgentEvaluator._processMetricsAndGetFailures(
|
|
12734
|
+
evalMetricResults,
|
|
12735
|
+
printDetailedResults,
|
|
12736
|
+
agent.name || "Unknown Agent"
|
|
12737
|
+
);
|
|
12738
|
+
failures.push(...failuresPerEvalCase);
|
|
12739
|
+
}
|
|
12740
|
+
if (failures.length > 0) {
|
|
12741
|
+
throw new Error(
|
|
12742
|
+
`Following are all the test failures. If you looking to get more details on the failures, then please re-run this test with \`printDetailedResults\` set to \`true\`.
|
|
12743
|
+
${failures.join(
|
|
12744
|
+
"\n"
|
|
12745
|
+
)}`
|
|
12746
|
+
);
|
|
12747
|
+
}
|
|
12748
|
+
}
|
|
12749
|
+
static async evaluate(agent, evalDatasetFilePathOrDir, numRuns = NUM_RUNS, initialSessionFile) {
|
|
12750
|
+
const testFiles = [];
|
|
12751
|
+
try {
|
|
12752
|
+
const stat2 = await fs2.stat(evalDatasetFilePathOrDir);
|
|
12753
|
+
if (stat2.isDirectory()) {
|
|
12754
|
+
const files = await this._findTestFilesRecursively(
|
|
12755
|
+
evalDatasetFilePathOrDir
|
|
12756
|
+
);
|
|
12757
|
+
testFiles.push(...files);
|
|
12758
|
+
} else {
|
|
12759
|
+
testFiles.push(evalDatasetFilePathOrDir);
|
|
12760
|
+
}
|
|
12761
|
+
} catch (error) {
|
|
12762
|
+
throw new Error(`Invalid path: ${evalDatasetFilePathOrDir}`);
|
|
12763
|
+
}
|
|
12764
|
+
const initialSession = await _AgentEvaluator._getInitialSession(initialSessionFile);
|
|
12765
|
+
for (const testFile of testFiles) {
|
|
12766
|
+
const criteria = await _AgentEvaluator.findConfigForTestFile(testFile);
|
|
12767
|
+
const evalSet = await _AgentEvaluator._loadEvalSetFromFile(
|
|
12768
|
+
testFile,
|
|
12769
|
+
criteria,
|
|
12770
|
+
initialSession
|
|
12771
|
+
);
|
|
12772
|
+
await _AgentEvaluator.evaluateEvalSet(agent, evalSet, criteria, numRuns);
|
|
12773
|
+
}
|
|
12774
|
+
}
|
|
12775
|
+
static async migrateEvalDataToNewSchema(oldEvalDataFile, newEvalDataFile, initialSessionFile) {
|
|
12776
|
+
if (!oldEvalDataFile || !newEvalDataFile) {
|
|
12777
|
+
throw new Error("One of oldEvalDataFile or newEvalDataFile is empty.");
|
|
12778
|
+
}
|
|
12779
|
+
const criteria = await _AgentEvaluator.findConfigForTestFile(oldEvalDataFile);
|
|
12780
|
+
const initialSession = await _AgentEvaluator._getInitialSession(initialSessionFile);
|
|
12781
|
+
const evalSet = await _AgentEvaluator._getEvalSetFromOldFormat(
|
|
12782
|
+
oldEvalDataFile,
|
|
12783
|
+
criteria,
|
|
12784
|
+
initialSession
|
|
12785
|
+
);
|
|
12786
|
+
await fs2.writeFile(newEvalDataFile, JSON.stringify(evalSet, null, 2));
|
|
12787
|
+
}
|
|
12788
|
+
static async _findTestFilesRecursively(dir) {
|
|
12789
|
+
const testFiles = [];
|
|
12790
|
+
async function walk(currentDir) {
|
|
12791
|
+
const entries = await fs2.readdir(currentDir, { withFileTypes: true });
|
|
12792
|
+
for (const entry of entries) {
|
|
12793
|
+
const fullPath = path2.join(currentDir, entry.name);
|
|
12794
|
+
if (entry.isDirectory()) {
|
|
12795
|
+
await walk(fullPath);
|
|
12796
|
+
} else if (entry.name.endsWith(".test.json")) {
|
|
12797
|
+
testFiles.push(fullPath);
|
|
12798
|
+
}
|
|
12799
|
+
}
|
|
12800
|
+
}
|
|
12801
|
+
await walk(dir);
|
|
12802
|
+
return testFiles;
|
|
12803
|
+
}
|
|
12804
|
+
static async _loadEvalSetFromFile(evalSetFile, criteria, initialSession) {
|
|
12805
|
+
try {
|
|
12806
|
+
const content = await fs2.readFile(evalSetFile, "utf-8");
|
|
12807
|
+
try {
|
|
12808
|
+
const evalSet = JSON.parse(content);
|
|
12809
|
+
if (evalSet.evalSetId && evalSet.evalCases) {
|
|
12810
|
+
if (Object.keys(initialSession).length > 0) {
|
|
12811
|
+
throw new Error(
|
|
12812
|
+
"Initial session should be specified as a part of EvalSet file. Explicit initial session is only needed, when specifying data in the older schema."
|
|
12813
|
+
);
|
|
12814
|
+
}
|
|
12815
|
+
return evalSet;
|
|
12816
|
+
}
|
|
12817
|
+
} catch (parseError) {
|
|
12818
|
+
throw new Error(`Failed to parse eval set data: ${parseError}`);
|
|
12819
|
+
}
|
|
12820
|
+
} catch (error) {
|
|
12821
|
+
throw new Error(`Failed to process eval set file: ${error}`);
|
|
12822
|
+
}
|
|
12823
|
+
console.warn(
|
|
12824
|
+
`Contents of ${evalSetFile} appear to be in older format. To avoid this warning, please update your test files to contain data in EvalSet schema. You can use 'migrateEvalDataToNewSchema' for migrating your old test files.`
|
|
12825
|
+
);
|
|
12826
|
+
return _AgentEvaluator._getEvalSetFromOldFormat(
|
|
12827
|
+
evalSetFile,
|
|
12828
|
+
criteria,
|
|
12829
|
+
initialSession
|
|
12830
|
+
);
|
|
12831
|
+
}
|
|
12832
|
+
static async _getEvalSetFromOldFormat(evalSetFile, criteria, initialSession) {
|
|
12833
|
+
const data = await _AgentEvaluator._loadDataset(evalSetFile);
|
|
12834
|
+
_AgentEvaluator._validateInput(data, criteria);
|
|
12835
|
+
return {
|
|
12836
|
+
evalSetId: `eval-set-${Date.now()}`,
|
|
12837
|
+
name: evalSetFile,
|
|
12838
|
+
evalCases: data[0].map(
|
|
12839
|
+
(item, index) => ({
|
|
12840
|
+
evalId: `eval-${index}`,
|
|
12841
|
+
conversation: [
|
|
12842
|
+
{
|
|
12843
|
+
invocationId: `invocation-${index}`,
|
|
12844
|
+
userContent: {
|
|
12845
|
+
role: "user",
|
|
12846
|
+
parts: [{ text: item[QUERY_COLUMN] || "" }]
|
|
12847
|
+
},
|
|
12848
|
+
finalResponse: item[REFERENCE_COLUMN] ? {
|
|
12849
|
+
role: "model",
|
|
12850
|
+
parts: [{ text: item[REFERENCE_COLUMN] }]
|
|
12851
|
+
} : void 0,
|
|
12852
|
+
intermediateData: item[EXPECTED_TOOL_USE_COLUMN] ? {
|
|
12853
|
+
toolUses: item[EXPECTED_TOOL_USE_COLUMN],
|
|
12854
|
+
intermediateResponses: []
|
|
12855
|
+
} : void 0,
|
|
12856
|
+
creationTimestamp: Date.now()
|
|
12857
|
+
}
|
|
12858
|
+
],
|
|
12859
|
+
sessionInput: Object.keys(initialSession).length > 0 ? {
|
|
12860
|
+
appName: "test-app",
|
|
12861
|
+
userId: "test-user",
|
|
12862
|
+
state: initialSession
|
|
12863
|
+
} : void 0
|
|
12864
|
+
})
|
|
12865
|
+
),
|
|
12866
|
+
creationTimestamp: Date.now()
|
|
12867
|
+
};
|
|
12868
|
+
}
|
|
12869
|
+
static async _getInitialSession(initialSessionFile) {
|
|
12870
|
+
if (!initialSessionFile) {
|
|
12871
|
+
return {};
|
|
12872
|
+
}
|
|
12873
|
+
try {
|
|
12874
|
+
const content = await fs2.readFile(initialSessionFile, "utf-8");
|
|
12875
|
+
return JSON.parse(content);
|
|
12876
|
+
} catch (error) {
|
|
12877
|
+
throw new Error(
|
|
12878
|
+
`Failed to load initial session from ${initialSessionFile}: ${error}`
|
|
12879
|
+
);
|
|
12880
|
+
}
|
|
12881
|
+
}
|
|
12882
|
+
static async _loadDataset(inputData) {
|
|
12883
|
+
const stat2 = await fs2.stat(inputData);
|
|
12884
|
+
if (stat2.isDirectory()) {
|
|
12885
|
+
const testFiles = await this._findTestFilesRecursively(inputData);
|
|
12886
|
+
const results = await Promise.all(testFiles.map((f) => loadJson(f)));
|
|
12887
|
+
return results.map((r) => Array.isArray(r) ? r : [r]);
|
|
12888
|
+
}
|
|
12889
|
+
if (stat2.isFile()) {
|
|
12890
|
+
const data = await loadJson(inputData);
|
|
12891
|
+
return [Array.isArray(data) ? data : [data]];
|
|
12892
|
+
}
|
|
12893
|
+
throw new Error(`Invalid input path: ${inputData}`);
|
|
12894
|
+
}
|
|
12895
|
+
static _validateInput(evalDataset, criteria) {
|
|
12896
|
+
if (!evalDataset || evalDataset.length === 0) {
|
|
12897
|
+
throw new Error("The evaluation dataset is None or empty.");
|
|
12898
|
+
}
|
|
12899
|
+
for (const key of Object.keys(criteria)) {
|
|
12900
|
+
if (!ALLOWED_CRITERIA.includes(key)) {
|
|
12901
|
+
throw new Error(
|
|
12902
|
+
`Invalid criteria key: ${key}. Expected one of ${ALLOWED_CRITERIA.join(
|
|
12903
|
+
", "
|
|
12904
|
+
)}.`
|
|
12905
|
+
);
|
|
12906
|
+
}
|
|
12907
|
+
}
|
|
12908
|
+
const sample = evalDataset[0];
|
|
12909
|
+
if (!Array.isArray(sample) || sample.length === 0) {
|
|
12910
|
+
throw new Error("The evaluation dataset is empty.");
|
|
12911
|
+
}
|
|
12912
|
+
const firstQuery = sample[0];
|
|
12913
|
+
if (typeof firstQuery !== "object") {
|
|
12914
|
+
throw new Error(
|
|
12915
|
+
`Each evaluation dataset sample must be list of dictionary. But it's ${JSON.stringify(
|
|
12916
|
+
evalDataset
|
|
12917
|
+
)}`
|
|
12918
|
+
);
|
|
12919
|
+
}
|
|
12920
|
+
if (TOOL_TRAJECTORY_SCORE_KEY in criteria) {
|
|
12921
|
+
if (!(QUERY_COLUMN in firstQuery) || !(EXPECTED_TOOL_USE_COLUMN in firstQuery)) {
|
|
12922
|
+
throw new Error(
|
|
12923
|
+
`Samples for ${TOOL_TRAJECTORY_SCORE_KEY} must include '${QUERY_COLUMN}' and '${EXPECTED_TOOL_USE_COLUMN}' keys. The sample is ${JSON.stringify(sample)}.`
|
|
12924
|
+
);
|
|
12925
|
+
}
|
|
12926
|
+
}
|
|
12927
|
+
if (RESPONSE_EVALUATION_SCORE_KEY in criteria) {
|
|
12928
|
+
if (!(QUERY_COLUMN in firstQuery)) {
|
|
12929
|
+
throw new Error(
|
|
12930
|
+
`Samples for ${RESPONSE_EVALUATION_SCORE_KEY} must include '${QUERY_COLUMN}' key. The sample is ${JSON.stringify(sample)}.`
|
|
12931
|
+
);
|
|
12932
|
+
}
|
|
12933
|
+
}
|
|
12934
|
+
if (RESPONSE_MATCH_SCORE_KEY in criteria) {
|
|
12935
|
+
if (!(QUERY_COLUMN in firstQuery) || !(REFERENCE_COLUMN in firstQuery)) {
|
|
12936
|
+
throw new Error(
|
|
12937
|
+
`Samples for ${RESPONSE_MATCH_SCORE_KEY} must include '${QUERY_COLUMN}' and '${REFERENCE_COLUMN}' keys. The sample is ${JSON.stringify(sample)}.`
|
|
12938
|
+
);
|
|
12939
|
+
}
|
|
12940
|
+
}
|
|
12941
|
+
}
|
|
12942
|
+
static _printDetails(evalMetricResultWithInvocations, overallEvalStatus, overallScore, metricName = "", threshold = 0) {
|
|
12943
|
+
console.log(
|
|
12944
|
+
`Summary: \`${overallEvalStatus}\` for Metric: \`${metricName}\`. Expected threshold: \`${threshold}\`, actual value: \`${overallScore}\`.`
|
|
12945
|
+
);
|
|
12946
|
+
const data = evalMetricResultWithInvocations.map((per) => ({
|
|
12947
|
+
evalStatus: per.evalMetricResult.evalStatus,
|
|
12948
|
+
score: per.evalMetricResult.score,
|
|
12949
|
+
threshold,
|
|
12950
|
+
prompt: _AgentEvaluator._convertContentToText(
|
|
12951
|
+
per.expectedInvocation.userContent
|
|
12952
|
+
),
|
|
12953
|
+
expectedResponse: _AgentEvaluator._convertContentToText(
|
|
12954
|
+
per.expectedInvocation.finalResponse
|
|
12955
|
+
),
|
|
12956
|
+
actualResponse: _AgentEvaluator._convertContentToText(
|
|
12957
|
+
per.actualInvocation.finalResponse
|
|
12958
|
+
),
|
|
12959
|
+
expectedToolCalls: _AgentEvaluator._convertToolCallsToText(
|
|
12960
|
+
per.expectedInvocation.intermediateData
|
|
12961
|
+
),
|
|
12962
|
+
actualToolCalls: _AgentEvaluator._convertToolCallsToText(
|
|
12963
|
+
per.actualInvocation.intermediateData
|
|
12964
|
+
)
|
|
12965
|
+
}));
|
|
12966
|
+
console.table(data);
|
|
12967
|
+
console.log("\n\n");
|
|
12968
|
+
}
|
|
12969
|
+
static _convertContentToText(content) {
|
|
12970
|
+
if (_optionalChain([content, 'optionalAccess', _329 => _329.parts])) {
|
|
12971
|
+
return content.parts.map((p) => p.text || "").filter((text) => text.length > 0).join("\n");
|
|
12972
|
+
}
|
|
12973
|
+
return "";
|
|
12974
|
+
}
|
|
12975
|
+
static _convertToolCallsToText(intermediateData) {
|
|
12976
|
+
if (_optionalChain([intermediateData, 'optionalAccess', _330 => _330.toolUses])) {
|
|
12977
|
+
return intermediateData.toolUses.map((t) => JSON.stringify(t)).join("\n");
|
|
12978
|
+
}
|
|
12979
|
+
return "";
|
|
12980
|
+
}
|
|
12981
|
+
static async _getEvalResultsByEvalId(agent, evalSet, evalMetrics, numRuns) {
|
|
12982
|
+
const evalService = new LocalEvalService(agent);
|
|
12983
|
+
const inferenceResults = [];
|
|
12984
|
+
for (let run = 0; run < numRuns; run++) {
|
|
12985
|
+
for await (const result of evalService.performInference({
|
|
12986
|
+
evalSetId: evalSet.evalSetId,
|
|
12987
|
+
evalCases: [evalSet]
|
|
12988
|
+
})) {
|
|
12989
|
+
inferenceResults.push(result);
|
|
12990
|
+
}
|
|
12991
|
+
}
|
|
12992
|
+
const evalResultsByEvalId = /* @__PURE__ */ new Map();
|
|
12993
|
+
for await (const evalResult of evalService.evaluate({
|
|
12994
|
+
inferenceResults,
|
|
12995
|
+
evaluateConfig: { evalMetrics }
|
|
12996
|
+
})) {
|
|
12997
|
+
for (const caseResult of evalResult.evalCaseResults) {
|
|
12998
|
+
const evalId = caseResult.evalId;
|
|
12999
|
+
if (!evalResultsByEvalId.has(evalId)) {
|
|
13000
|
+
evalResultsByEvalId.set(evalId, []);
|
|
13001
|
+
}
|
|
13002
|
+
evalResultsByEvalId.get(evalId).push(caseResult);
|
|
13003
|
+
}
|
|
13004
|
+
}
|
|
13005
|
+
return evalResultsByEvalId;
|
|
13006
|
+
}
|
|
13007
|
+
static _getEvalMetricResultsWithInvocation(evalResultsPerEvalId) {
|
|
13008
|
+
const evalMetricResults = {};
|
|
13009
|
+
for (const evalCaseResult of evalResultsPerEvalId) {
|
|
13010
|
+
for (const evalMetricsPerInvocation of evalCaseResult.evalMetricResultPerInvocation) {
|
|
13011
|
+
for (const evalMetricResult of evalMetricsPerInvocation.evalMetricResults) {
|
|
13012
|
+
const metricName = evalMetricResult.metricName;
|
|
13013
|
+
if (!(metricName in evalMetricResults)) {
|
|
13014
|
+
evalMetricResults[metricName] = [];
|
|
13015
|
+
}
|
|
13016
|
+
evalMetricResults[metricName].push({
|
|
13017
|
+
actualInvocation: evalMetricsPerInvocation.actualInvocation,
|
|
13018
|
+
expectedInvocation: evalMetricsPerInvocation.expectedInvocation,
|
|
13019
|
+
evalMetricResult
|
|
13020
|
+
});
|
|
13021
|
+
}
|
|
13022
|
+
}
|
|
13023
|
+
}
|
|
13024
|
+
return evalMetricResults;
|
|
13025
|
+
}
|
|
13026
|
+
static _processMetricsAndGetFailures(evalMetricResults, printDetailedResults, agentModule) {
|
|
13027
|
+
const failures = [];
|
|
13028
|
+
for (const [metricName, evalMetricResultsWithInvocations] of Object.entries(
|
|
13029
|
+
evalMetricResults
|
|
13030
|
+
)) {
|
|
13031
|
+
const threshold = _optionalChain([evalMetricResultsWithInvocations, 'access', _331 => _331[0], 'optionalAccess', _332 => _332.evalMetricResult, 'access', _333 => _333.threshold]) || 0;
|
|
13032
|
+
const scores = evalMetricResultsWithInvocations.map((m) => m.evalMetricResult.score).filter((s) => s !== void 0);
|
|
13033
|
+
let overallScore;
|
|
13034
|
+
let overallEvalStatus;
|
|
13035
|
+
if (scores.length > 0) {
|
|
13036
|
+
overallScore = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
13037
|
+
overallEvalStatus = overallScore >= threshold ? 1 /* PASSED */ : 2 /* FAILED */;
|
|
13038
|
+
} else {
|
|
13039
|
+
overallScore = void 0;
|
|
13040
|
+
overallEvalStatus = 3 /* NOT_EVALUATED */;
|
|
13041
|
+
}
|
|
13042
|
+
if (overallEvalStatus !== 1 /* PASSED */) {
|
|
13043
|
+
if (printDetailedResults) {
|
|
13044
|
+
_AgentEvaluator._printDetails(
|
|
13045
|
+
evalMetricResultsWithInvocations,
|
|
13046
|
+
overallEvalStatus,
|
|
13047
|
+
overallScore,
|
|
13048
|
+
metricName,
|
|
13049
|
+
threshold
|
|
13050
|
+
);
|
|
13051
|
+
}
|
|
13052
|
+
failures.push(
|
|
13053
|
+
`${metricName} for ${agentModule} Failed. Expected ${threshold}, but got ${overallScore}.`
|
|
13054
|
+
);
|
|
13055
|
+
}
|
|
13056
|
+
}
|
|
13057
|
+
return failures;
|
|
13058
|
+
}
|
|
13059
|
+
};
|
|
13060
|
+
|
|
13061
|
+
// src/evaluation/final-response-match-v1.ts
|
|
13062
|
+
var RougeEvaluator = class extends Evaluator {
|
|
13063
|
+
|
|
13064
|
+
constructor(evalMetric) {
|
|
13065
|
+
super(evalMetric);
|
|
13066
|
+
this.evalMetric = evalMetric;
|
|
13067
|
+
}
|
|
13068
|
+
static getMetricInfo() {
|
|
13069
|
+
return {
|
|
13070
|
+
metricName: "response_match_score" /* RESPONSE_MATCH_SCORE */,
|
|
13071
|
+
description: "This metric evaluates if the agent's final response matches a golden/expected final response using Rouge_1 metric. Value range for this metric is [0,1], with values closer to 1 more desirable.",
|
|
13072
|
+
metricValueInfo: {
|
|
13073
|
+
interval: {
|
|
13074
|
+
minValue: 0,
|
|
13075
|
+
maxValue: 1,
|
|
13076
|
+
openAtMin: false,
|
|
13077
|
+
openAtMax: false
|
|
13078
|
+
}
|
|
13079
|
+
}
|
|
13080
|
+
};
|
|
13081
|
+
}
|
|
13082
|
+
async evaluateInvocations(actualInvocations, expectedInvocations) {
|
|
13083
|
+
let totalScore = 0;
|
|
13084
|
+
let numInvocations = 0;
|
|
13085
|
+
const perInvocationResults = [];
|
|
13086
|
+
for (let i = 0; i < actualInvocations.length; i++) {
|
|
13087
|
+
const actual = actualInvocations[i];
|
|
13088
|
+
const expected = expectedInvocations[i];
|
|
13089
|
+
const reference = getTextFromContent2(expected.finalResponse);
|
|
13090
|
+
const response = getTextFromContent2(actual.finalResponse);
|
|
13091
|
+
const rouge1Scores = await calculateRouge1Scores(response, reference);
|
|
13092
|
+
const score = rouge1Scores.fmeasure;
|
|
13093
|
+
perInvocationResults.push({
|
|
13094
|
+
actualInvocation: actual,
|
|
13095
|
+
expectedInvocation: expected,
|
|
13096
|
+
score,
|
|
13097
|
+
evalStatus: getEvalStatus2(score, this.evalMetric.threshold)
|
|
13098
|
+
});
|
|
13099
|
+
totalScore += score;
|
|
13100
|
+
numInvocations++;
|
|
13101
|
+
}
|
|
13102
|
+
if (perInvocationResults.length > 0) {
|
|
13103
|
+
const overallScore = totalScore / numInvocations;
|
|
13104
|
+
return {
|
|
13105
|
+
overallScore,
|
|
13106
|
+
overallEvalStatus: getEvalStatus2(
|
|
13107
|
+
overallScore,
|
|
13108
|
+
this.evalMetric.threshold
|
|
13109
|
+
),
|
|
13110
|
+
perInvocationResults
|
|
13111
|
+
};
|
|
13112
|
+
}
|
|
13113
|
+
return {
|
|
13114
|
+
overallEvalStatus: 3 /* NOT_EVALUATED */,
|
|
13115
|
+
perInvocationResults: []
|
|
13116
|
+
};
|
|
13117
|
+
}
|
|
13118
|
+
};
|
|
13119
|
+
function getTextFromContent2(content) {
|
|
13120
|
+
if (_optionalChain([content, 'optionalAccess', _334 => _334.parts])) {
|
|
13121
|
+
return content.parts.map((part) => part.text).filter(Boolean).join("\n");
|
|
13122
|
+
}
|
|
13123
|
+
return "";
|
|
13124
|
+
}
|
|
13125
|
+
function getEvalStatus2(score, threshold) {
|
|
13126
|
+
return score >= threshold ? 1 /* PASSED */ : 2 /* FAILED */;
|
|
13127
|
+
}
|
|
13128
|
+
function calculateRouge1Scores(response, reference) {
|
|
13129
|
+
if (!response.trim() || !reference.trim()) {
|
|
13130
|
+
return { precision: 0, recall: 0, fmeasure: 0 };
|
|
13131
|
+
}
|
|
13132
|
+
const responseTokens = tokenizeText(response);
|
|
13133
|
+
const referenceTokens = tokenizeText(reference);
|
|
13134
|
+
const responseUnigrams = new Set(responseTokens);
|
|
13135
|
+
const referenceUnigrams = new Set(referenceTokens);
|
|
13136
|
+
const commonUnigrams = new Set(
|
|
13137
|
+
[...responseUnigrams].filter((token) => referenceUnigrams.has(token))
|
|
13138
|
+
);
|
|
13139
|
+
const precision = responseUnigrams.size > 0 ? commonUnigrams.size / responseUnigrams.size : 0;
|
|
13140
|
+
const recall = referenceUnigrams.size > 0 ? commonUnigrams.size / referenceUnigrams.size : 0;
|
|
13141
|
+
const fmeasure = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
|
|
13142
|
+
return { precision, recall, fmeasure };
|
|
13143
|
+
}
|
|
13144
|
+
function tokenizeText(text) {
|
|
13145
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((token) => token.length > 0);
|
|
13146
|
+
}
|
|
13147
|
+
|
|
11811
13148
|
// src/version.ts
|
|
11812
13149
|
var VERSION = "0.1.0";
|
|
11813
13150
|
|
|
@@ -11961,4 +13298,15 @@ var VERSION = "0.1.0";
|
|
|
11961
13298
|
|
|
11962
13299
|
|
|
11963
13300
|
|
|
11964
|
-
|
|
13301
|
+
|
|
13302
|
+
|
|
13303
|
+
|
|
13304
|
+
|
|
13305
|
+
|
|
13306
|
+
|
|
13307
|
+
|
|
13308
|
+
|
|
13309
|
+
|
|
13310
|
+
|
|
13311
|
+
|
|
13312
|
+
exports.AF_FUNCTION_CALL_ID_PREFIX = AF_FUNCTION_CALL_ID_PREFIX; exports.Agent = LlmAgent; exports.AgentBuilder = AgentBuilder; exports.AgentEvaluator = AgentEvaluator; exports.AgentTool = AgentTool; exports.Agents = agents_exports; exports.AiSdkLlm = AiSdkLlm; exports.AnthropicLlm = AnthropicLlm; exports.ApiKeyCredential = ApiKeyCredential; exports.ApiKeyScheme = ApiKeyScheme; exports.AuthConfig = AuthConfig; exports.AuthCredential = AuthCredential; exports.AuthCredentialType = AuthCredentialType; exports.AuthHandler = AuthHandler; exports.AuthScheme = AuthScheme; exports.AuthSchemeType = AuthSchemeType; exports.AuthTool = AuthTool; exports.AutoFlow = AutoFlow; exports.BaseAgent = BaseAgent; exports.BaseCodeExecutor = BaseCodeExecutor; exports.BaseLLMConnection = BaseLLMConnection; exports.BaseLlm = BaseLlm; exports.BaseLlmFlow = BaseLlmFlow; exports.BaseLlmRequestProcessor = BaseLlmRequestProcessor; exports.BaseLlmResponseProcessor = BaseLlmResponseProcessor; exports.BasePlanner = BasePlanner; exports.BaseSessionService = BaseSessionService; exports.BaseTool = BaseTool; exports.BasicAuthCredential = BasicAuthCredential; exports.BearerTokenCredential = BearerTokenCredential; exports.BuiltInCodeExecutor = BuiltInCodeExecutor; exports.BuiltInPlanner = BuiltInPlanner; exports.CallbackContext = CallbackContext; exports.CodeExecutionUtils = CodeExecutionUtils; exports.CodeExecutorContext = CodeExecutorContext; exports.DatabaseSessionService = DatabaseSessionService; exports.EnhancedAuthConfig = EnhancedAuthConfig; exports.EvalResult = EvalResult; exports.EvalStatus = EvalStatus; exports.Evaluation = evaluation_exports; exports.Evaluator = Evaluator; exports.Event = Event; exports.EventActions = EventActions; exports.Events = events_exports; exports.ExitLoopTool = ExitLoopTool; exports.FileOperationsTool = FileOperationsTool; exports.FinalResponseMatchV2Evaluator = FinalResponseMatchV2Evaluator; exports.Flows = flows_exports; exports.FunctionTool = FunctionTool; exports.GcsArtifactService = GcsArtifactService; exports.GetUserChoiceTool = GetUserChoiceTool; exports.GoogleLlm = GoogleLlm; exports.GoogleSearch = GoogleSearch; exports.HttpRequestTool = HttpRequestTool; exports.HttpScheme = HttpScheme; exports.InMemoryArtifactService = InMemoryArtifactService; exports.InMemoryMemoryService = InMemoryMemoryService; exports.InMemoryRunner = InMemoryRunner; exports.InMemorySessionService = InMemorySessionService; exports.InvocationContext = InvocationContext; exports.LLMRegistry = LLMRegistry; exports.LangGraphAgent = LangGraphAgent; exports.LlmAgent = LlmAgent; exports.LlmCallsLimitExceededError = LlmCallsLimitExceededError; exports.LlmRequest = LlmRequest; exports.LlmResponse = LlmResponse; exports.LoadArtifactsTool = LoadArtifactsTool; exports.LoadMemoryTool = LoadMemoryTool; exports.LocalEvalService = LocalEvalService; exports.LoopAgent = LoopAgent; exports.McpAbi = McpAbi; exports.McpAtp = McpAtp; exports.McpBamm = McpBamm; exports.McpCoinGecko = McpCoinGecko; exports.McpDiscord = McpDiscord; exports.McpError = McpError; exports.McpErrorType = McpErrorType; exports.McpFilesystem = McpFilesystem; exports.McpFraxlend = McpFraxlend; exports.McpGeneric = McpGeneric; exports.McpIqWiki = McpIqWiki; exports.McpMemory = McpMemory; exports.McpNearAgent = McpNearAgent; exports.McpNearIntents = McpNearIntents; exports.McpOdos = McpOdos; exports.McpSamplingHandler = McpSamplingHandler; exports.McpTelegram = McpTelegram; exports.McpToolset = McpToolset; exports.Memory = memory_exports; exports.Models = models_exports; exports.OAuth2Credential = OAuth2Credential; exports.OAuth2Scheme = OAuth2Scheme; exports.OpenAiLlm = OpenAiLlm; exports.OpenIdConnectScheme = OpenIdConnectScheme; exports.ParallelAgent = ParallelAgent; exports.PlanReActPlanner = PlanReActPlanner; exports.PrebuiltMetrics = PrebuiltMetrics; exports.REQUEST_EUC_FUNCTION_CALL_NAME = REQUEST_EUC_FUNCTION_CALL_NAME; exports.ReadonlyContext = ReadonlyContext; exports.RougeEvaluator = RougeEvaluator; exports.RunConfig = RunConfig; exports.Runner = Runner; exports.SafetyEvaluatorV1 = SafetyEvaluatorV1; exports.SequentialAgent = SequentialAgent; exports.Sessions = sessions_exports; exports.SingleFlow = SingleFlow; exports.State = State; exports.StreamingMode = StreamingMode; exports.TelemetryService = TelemetryService; exports.ToolContext = ToolContext; exports.Tools = tools_exports; exports.TrajectoryEvaluator = TrajectoryEvaluator; exports.TransferToAgentTool = TransferToAgentTool; exports.UserInteractionTool = UserInteractionTool; exports.VERSION = VERSION; exports.VertexAiSessionService = VertexAiSessionService; exports._findFunctionCallEventIfLastEventIsFunctionResponse = _findFunctionCallEventIfLastEventIsFunctionResponse; exports.adkToMcpToolType = adkToMcpToolType; exports.agentTransferRequestProcessor = requestProcessor8; exports.basicRequestProcessor = requestProcessor2; exports.buildFunctionDeclaration = buildFunctionDeclaration; exports.codeExecutionRequestProcessor = requestProcessor3; exports.codeExecutionResponseProcessor = responseProcessor; exports.contentRequestProcessor = requestProcessor4; exports.createAuthToolArguments = createAuthToolArguments; exports.createBranchContextForSubAgent = createBranchContextForSubAgent; exports.createDatabaseSessionService = createDatabaseSessionService; exports.createFunctionTool = createFunctionTool; exports.createMysqlSessionService = createMysqlSessionService; exports.createPostgresSessionService = createPostgresSessionService; exports.createSamplingHandler = createSamplingHandler; exports.createSqliteSessionService = createSqliteSessionService; exports.createTool = createTool; exports.generateAuthEvent = generateAuthEvent; exports.generateClientFunctionCallId = generateClientFunctionCallId; exports.getLongRunningFunctionCalls = getLongRunningFunctionCalls; exports.getMcpTools = getMcpTools; exports.handleFunctionCallsAsync = handleFunctionCallsAsync; exports.handleFunctionCallsLive = handleFunctionCallsLive; exports.identityRequestProcessor = requestProcessor5; exports.initializeTelemetry = initializeTelemetry; exports.injectSessionState = injectSessionState; exports.instructionsRequestProcessor = requestProcessor6; exports.isEnhancedAuthConfig = isEnhancedAuthConfig; exports.jsonSchemaToDeclaration = jsonSchemaToDeclaration; exports.mcpSchemaToParameters = mcpSchemaToParameters; exports.mergeAgentRun = mergeAgentRun; exports.mergeParallelFunctionResponseEvents = mergeParallelFunctionResponseEvents; exports.newInvocationContextId = newInvocationContextId; exports.nlPlanningRequestProcessor = requestProcessor7; exports.nlPlanningResponseProcessor = responseProcessor2; exports.normalizeJsonSchema = normalizeJsonSchema; exports.populateClientFunctionCallId = populateClientFunctionCallId; exports.registerProviders = registerProviders; exports.removeClientFunctionCallId = removeClientFunctionCallId; exports.requestProcessor = requestProcessor; exports.shutdownTelemetry = shutdownTelemetry; exports.telemetryService = telemetryService; exports.traceLlmCall = traceLlmCall; exports.traceToolCall = traceToolCall; exports.tracer = tracer;
|