@launchdarkly/server-sdk-ai 0.15.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +4 -4
- package/dist/index.cjs +78 -55
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +24 -5
- package/dist/index.d.ts +24 -5
- package/dist/index.js +78 -55
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.cts
CHANGED
|
@@ -440,8 +440,14 @@ interface LDAIJudgeConfigDefault extends LDAIConfigDefault {
|
|
|
440
440
|
*/
|
|
441
441
|
messages?: LDMessage[];
|
|
442
442
|
/**
|
|
443
|
-
* Evaluation metric
|
|
443
|
+
* Evaluation metric key for judge configurations.
|
|
444
|
+
* The key of the metric that this judge can evaluate.
|
|
445
|
+
*/
|
|
446
|
+
evaluationMetricKey?: string;
|
|
447
|
+
/**
|
|
448
|
+
* Evaluation metric keys for judge configurations (legacy).
|
|
444
449
|
* The keys of the metrics that this judge can evaluate.
|
|
450
|
+
* @deprecated Use evaluationMetricKey instead. This field is kept for legacy support.
|
|
445
451
|
*/
|
|
446
452
|
evaluationMetricKeys?: string[];
|
|
447
453
|
}
|
|
@@ -486,10 +492,16 @@ interface LDAIJudgeConfig extends LDAIConfig {
|
|
|
486
492
|
*/
|
|
487
493
|
messages?: LDMessage[];
|
|
488
494
|
/**
|
|
489
|
-
* Evaluation metric
|
|
495
|
+
* Evaluation metric key for judge configurations.
|
|
496
|
+
* The key of the metric that this judge can evaluate.
|
|
497
|
+
*/
|
|
498
|
+
evaluationMetricKey?: string;
|
|
499
|
+
/**
|
|
500
|
+
* Evaluation metric keys for judge configurations (legacy).
|
|
490
501
|
* The keys of the metrics that this judge can evaluate.
|
|
502
|
+
* @deprecated Use evaluationMetricKey instead. This field is kept for legacy support.
|
|
491
503
|
*/
|
|
492
|
-
evaluationMetricKeys
|
|
504
|
+
evaluationMetricKeys?: string[];
|
|
493
505
|
}
|
|
494
506
|
/**
|
|
495
507
|
* Union type for all AI Config variants.
|
|
@@ -597,6 +609,13 @@ declare class Judge {
|
|
|
597
609
|
private readonly _logger?;
|
|
598
610
|
private readonly _evaluationResponseStructure;
|
|
599
611
|
constructor(_aiConfig: LDAIJudgeConfig, _aiConfigTracker: LDAIConfigTracker, _aiProvider: AIProvider, logger?: LDLogger$1);
|
|
612
|
+
/**
|
|
613
|
+
* Gets the evaluation metric key, prioritizing evaluationMetricKey over evaluationMetricKeys.
|
|
614
|
+
* Falls back to the first valid (non-empty, non-whitespace) value in evaluationMetricKeys if evaluationMetricKey is not provided.
|
|
615
|
+
* Treats empty strings and whitespace-only strings as invalid.
|
|
616
|
+
* @returns The evaluation metric key, or undefined if not available
|
|
617
|
+
*/
|
|
618
|
+
private _getEvaluationMetricKey;
|
|
600
619
|
/**
|
|
601
620
|
* Evaluates an AI response using the judge's configuration.
|
|
602
621
|
*
|
|
@@ -858,7 +877,7 @@ interface LDAIClient {
|
|
|
858
877
|
* enabled: true,
|
|
859
878
|
* model: { name: 'gpt-4' },
|
|
860
879
|
* provider: { name: 'openai' },
|
|
861
|
-
*
|
|
880
|
+
* evaluationMetricKey: '$ld:ai:judge:relevance',
|
|
862
881
|
* messages: [{ role: 'system', content: 'You are a relevance judge.' }]
|
|
863
882
|
* }, variables);
|
|
864
883
|
*
|
|
@@ -977,7 +996,7 @@ interface LDAIClient {
|
|
|
977
996
|
* enabled: true,
|
|
978
997
|
* model: { name: "gpt-4" },
|
|
979
998
|
* provider: { name: "openai" },
|
|
980
|
-
*
|
|
999
|
+
* evaluationMetricKey: '$ld:ai:judge:relevance',
|
|
981
1000
|
* messages: [{ role: 'system', content: 'You are a relevance judge.' }]
|
|
982
1001
|
* },
|
|
983
1002
|
* { metric: "relevance" }
|
package/dist/index.d.ts
CHANGED
|
@@ -440,8 +440,14 @@ interface LDAIJudgeConfigDefault extends LDAIConfigDefault {
|
|
|
440
440
|
*/
|
|
441
441
|
messages?: LDMessage[];
|
|
442
442
|
/**
|
|
443
|
-
* Evaluation metric
|
|
443
|
+
* Evaluation metric key for judge configurations.
|
|
444
|
+
* The key of the metric that this judge can evaluate.
|
|
445
|
+
*/
|
|
446
|
+
evaluationMetricKey?: string;
|
|
447
|
+
/**
|
|
448
|
+
* Evaluation metric keys for judge configurations (legacy).
|
|
444
449
|
* The keys of the metrics that this judge can evaluate.
|
|
450
|
+
* @deprecated Use evaluationMetricKey instead. This field is kept for legacy support.
|
|
445
451
|
*/
|
|
446
452
|
evaluationMetricKeys?: string[];
|
|
447
453
|
}
|
|
@@ -486,10 +492,16 @@ interface LDAIJudgeConfig extends LDAIConfig {
|
|
|
486
492
|
*/
|
|
487
493
|
messages?: LDMessage[];
|
|
488
494
|
/**
|
|
489
|
-
* Evaluation metric
|
|
495
|
+
* Evaluation metric key for judge configurations.
|
|
496
|
+
* The key of the metric that this judge can evaluate.
|
|
497
|
+
*/
|
|
498
|
+
evaluationMetricKey?: string;
|
|
499
|
+
/**
|
|
500
|
+
* Evaluation metric keys for judge configurations (legacy).
|
|
490
501
|
* The keys of the metrics that this judge can evaluate.
|
|
502
|
+
* @deprecated Use evaluationMetricKey instead. This field is kept for legacy support.
|
|
491
503
|
*/
|
|
492
|
-
evaluationMetricKeys
|
|
504
|
+
evaluationMetricKeys?: string[];
|
|
493
505
|
}
|
|
494
506
|
/**
|
|
495
507
|
* Union type for all AI Config variants.
|
|
@@ -597,6 +609,13 @@ declare class Judge {
|
|
|
597
609
|
private readonly _logger?;
|
|
598
610
|
private readonly _evaluationResponseStructure;
|
|
599
611
|
constructor(_aiConfig: LDAIJudgeConfig, _aiConfigTracker: LDAIConfigTracker, _aiProvider: AIProvider, logger?: LDLogger$1);
|
|
612
|
+
/**
|
|
613
|
+
* Gets the evaluation metric key, prioritizing evaluationMetricKey over evaluationMetricKeys.
|
|
614
|
+
* Falls back to the first valid (non-empty, non-whitespace) value in evaluationMetricKeys if evaluationMetricKey is not provided.
|
|
615
|
+
* Treats empty strings and whitespace-only strings as invalid.
|
|
616
|
+
* @returns The evaluation metric key, or undefined if not available
|
|
617
|
+
*/
|
|
618
|
+
private _getEvaluationMetricKey;
|
|
600
619
|
/**
|
|
601
620
|
* Evaluates an AI response using the judge's configuration.
|
|
602
621
|
*
|
|
@@ -858,7 +877,7 @@ interface LDAIClient {
|
|
|
858
877
|
* enabled: true,
|
|
859
878
|
* model: { name: 'gpt-4' },
|
|
860
879
|
* provider: { name: 'openai' },
|
|
861
|
-
*
|
|
880
|
+
* evaluationMetricKey: '$ld:ai:judge:relevance',
|
|
862
881
|
* messages: [{ role: 'system', content: 'You are a relevance judge.' }]
|
|
863
882
|
* }, variables);
|
|
864
883
|
*
|
|
@@ -977,7 +996,7 @@ interface LDAIClient {
|
|
|
977
996
|
* enabled: true,
|
|
978
997
|
* model: { name: "gpt-4" },
|
|
979
998
|
* provider: { name: "openai" },
|
|
980
|
-
*
|
|
999
|
+
* evaluationMetricKey: '$ld:ai:judge:relevance',
|
|
981
1000
|
* messages: [{ role: 'system', content: 'You are a relevance judge.' }]
|
|
982
1001
|
* },
|
|
983
1002
|
* { metric: "relevance" }
|
package/dist/index.js
CHANGED
|
@@ -147,6 +147,9 @@ var LDAIConfigUtils = class {
|
|
|
147
147
|
if ("instructions" in config && config.instructions !== void 0) {
|
|
148
148
|
flagValue.instructions = config.instructions;
|
|
149
149
|
}
|
|
150
|
+
if ("evaluationMetricKey" in config && config.evaluationMetricKey !== void 0) {
|
|
151
|
+
flagValue.evaluationMetricKey = config.evaluationMetricKey;
|
|
152
|
+
}
|
|
150
153
|
if ("evaluationMetricKeys" in config && config.evaluationMetricKeys !== void 0) {
|
|
151
154
|
flagValue.evaluationMetricKeys = config.evaluationMetricKeys;
|
|
152
155
|
}
|
|
@@ -192,8 +195,7 @@ var LDAIConfigUtils = class {
|
|
|
192
195
|
return {
|
|
193
196
|
key,
|
|
194
197
|
enabled: false,
|
|
195
|
-
tracker: void 0
|
|
196
|
-
evaluationMetricKeys: []
|
|
198
|
+
tracker: void 0
|
|
197
199
|
};
|
|
198
200
|
case "completion":
|
|
199
201
|
default:
|
|
@@ -257,11 +259,20 @@ var LDAIConfigUtils = class {
|
|
|
257
259
|
* @returns A judge configuration
|
|
258
260
|
*/
|
|
259
261
|
static toJudgeConfig(key, flagValue, tracker) {
|
|
262
|
+
let evaluationMetricKey;
|
|
263
|
+
if (flagValue.evaluationMetricKey && flagValue.evaluationMetricKey.trim().length > 0) {
|
|
264
|
+
evaluationMetricKey = flagValue.evaluationMetricKey.trim();
|
|
265
|
+
} else if (flagValue.evaluationMetricKeys && flagValue.evaluationMetricKeys.length > 0) {
|
|
266
|
+
const validKey = flagValue.evaluationMetricKeys.find(
|
|
267
|
+
(metricKey) => metricKey && metricKey.trim().length > 0
|
|
268
|
+
);
|
|
269
|
+
evaluationMetricKey = validKey ? validKey.trim() : void 0;
|
|
270
|
+
}
|
|
260
271
|
return {
|
|
261
272
|
...this._toBaseConfig(key, flagValue),
|
|
262
273
|
tracker,
|
|
263
274
|
messages: flagValue.messages,
|
|
264
|
-
|
|
275
|
+
evaluationMetricKey
|
|
265
276
|
};
|
|
266
277
|
}
|
|
267
278
|
};
|
|
@@ -271,15 +282,20 @@ import Mustache from "mustache";
|
|
|
271
282
|
|
|
272
283
|
// src/api/judge/EvaluationSchemaBuilder.ts
|
|
273
284
|
var EvaluationSchemaBuilder = class {
|
|
274
|
-
static build(
|
|
285
|
+
static build(evaluationMetricKey) {
|
|
286
|
+
if (!evaluationMetricKey) {
|
|
287
|
+
return {};
|
|
288
|
+
}
|
|
275
289
|
return {
|
|
276
290
|
type: "object",
|
|
277
291
|
properties: {
|
|
278
292
|
evaluations: {
|
|
279
293
|
type: "object",
|
|
280
|
-
description: `Object containing evaluation results for ${
|
|
281
|
-
properties:
|
|
282
|
-
|
|
294
|
+
description: `Object containing evaluation results for ${evaluationMetricKey} metric`,
|
|
295
|
+
properties: {
|
|
296
|
+
[evaluationMetricKey]: this._buildKeySchema(evaluationMetricKey)
|
|
297
|
+
},
|
|
298
|
+
required: [evaluationMetricKey],
|
|
283
299
|
additionalProperties: false
|
|
284
300
|
}
|
|
285
301
|
},
|
|
@@ -287,15 +303,6 @@ var EvaluationSchemaBuilder = class {
|
|
|
287
303
|
additionalProperties: false
|
|
288
304
|
};
|
|
289
305
|
}
|
|
290
|
-
static _buildKeyProperties(evaluationMetricKeys) {
|
|
291
|
-
return evaluationMetricKeys.reduce(
|
|
292
|
-
(acc, key) => {
|
|
293
|
-
acc[key] = this._buildKeySchema(key);
|
|
294
|
-
return acc;
|
|
295
|
-
},
|
|
296
|
-
{}
|
|
297
|
-
);
|
|
298
|
-
}
|
|
299
306
|
static _buildKeySchema(key) {
|
|
300
307
|
return {
|
|
301
308
|
type: "object",
|
|
@@ -324,9 +331,26 @@ var Judge = class {
|
|
|
324
331
|
this._aiConfigTracker = _aiConfigTracker;
|
|
325
332
|
this._aiProvider = _aiProvider;
|
|
326
333
|
this._logger = logger;
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
334
|
+
const evaluationMetricKey = this._getEvaluationMetricKey();
|
|
335
|
+
this._evaluationResponseStructure = EvaluationSchemaBuilder.build(evaluationMetricKey);
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Gets the evaluation metric key, prioritizing evaluationMetricKey over evaluationMetricKeys.
|
|
339
|
+
* Falls back to the first valid (non-empty, non-whitespace) value in evaluationMetricKeys if evaluationMetricKey is not provided.
|
|
340
|
+
* Treats empty strings and whitespace-only strings as invalid.
|
|
341
|
+
* @returns The evaluation metric key, or undefined if not available
|
|
342
|
+
*/
|
|
343
|
+
_getEvaluationMetricKey() {
|
|
344
|
+
if (this._aiConfig.evaluationMetricKey && this._aiConfig.evaluationMetricKey.trim().length > 0) {
|
|
345
|
+
return this._aiConfig.evaluationMetricKey.trim();
|
|
346
|
+
}
|
|
347
|
+
if (this._aiConfig.evaluationMetricKeys && this._aiConfig.evaluationMetricKeys.length > 0) {
|
|
348
|
+
const validKey = this._aiConfig.evaluationMetricKeys.find(
|
|
349
|
+
(key) => key && key.trim().length > 0
|
|
350
|
+
);
|
|
351
|
+
return validKey ? validKey.trim() : void 0;
|
|
352
|
+
}
|
|
353
|
+
return void 0;
|
|
330
354
|
}
|
|
331
355
|
/**
|
|
332
356
|
* Evaluates an AI response using the judge's configuration.
|
|
@@ -338,9 +362,10 @@ var Judge = class {
|
|
|
338
362
|
*/
|
|
339
363
|
async evaluate(input, output, samplingRate = 1) {
|
|
340
364
|
try {
|
|
341
|
-
|
|
365
|
+
const evaluationMetricKey = this._getEvaluationMetricKey();
|
|
366
|
+
if (!evaluationMetricKey) {
|
|
342
367
|
this._logger?.warn(
|
|
343
|
-
"Judge configuration is missing required
|
|
368
|
+
"Judge configuration is missing required evaluation metric key",
|
|
344
369
|
this._aiConfigTracker.getTrackData()
|
|
345
370
|
);
|
|
346
371
|
return void 0;
|
|
@@ -362,10 +387,10 @@ var Judge = class {
|
|
|
362
387
|
() => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure)
|
|
363
388
|
);
|
|
364
389
|
let { success } = response.metrics;
|
|
365
|
-
const evals = this._parseEvaluationResponse(response.data);
|
|
366
|
-
if (
|
|
390
|
+
const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey);
|
|
391
|
+
if (!evals[evaluationMetricKey]) {
|
|
367
392
|
this._logger?.warn(
|
|
368
|
-
"Judge evaluation did not return
|
|
393
|
+
"Judge evaluation did not return the expected evaluation",
|
|
369
394
|
this._aiConfigTracker.getTrackData()
|
|
370
395
|
);
|
|
371
396
|
success = false;
|
|
@@ -438,42 +463,40 @@ var Judge = class {
|
|
|
438
463
|
/**
|
|
439
464
|
* Parses the structured evaluation response from the AI provider.
|
|
440
465
|
*/
|
|
441
|
-
_parseEvaluationResponse(data) {
|
|
466
|
+
_parseEvaluationResponse(data, evaluationMetricKey) {
|
|
442
467
|
const evaluations = data.evaluations;
|
|
443
468
|
const results = {};
|
|
444
469
|
if (!data.evaluations || typeof data.evaluations !== "object") {
|
|
445
470
|
this._logger?.warn("Invalid response: missing or invalid evaluations object");
|
|
446
471
|
return results;
|
|
447
472
|
}
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
};
|
|
476
|
-
});
|
|
473
|
+
const evaluation = evaluations[evaluationMetricKey];
|
|
474
|
+
if (!evaluation || typeof evaluation !== "object") {
|
|
475
|
+
this._logger?.warn(
|
|
476
|
+
`Missing evaluation for metric key: ${evaluationMetricKey}`,
|
|
477
|
+
this._aiConfigTracker.getTrackData()
|
|
478
|
+
);
|
|
479
|
+
return results;
|
|
480
|
+
}
|
|
481
|
+
const evalData = evaluation;
|
|
482
|
+
if (typeof evalData.score !== "number" || evalData.score < 0 || evalData.score > 1) {
|
|
483
|
+
this._logger?.warn(
|
|
484
|
+
`Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
|
|
485
|
+
this._aiConfigTracker.getTrackData()
|
|
486
|
+
);
|
|
487
|
+
return results;
|
|
488
|
+
}
|
|
489
|
+
if (typeof evalData.reasoning !== "string") {
|
|
490
|
+
this._logger?.warn(
|
|
491
|
+
`Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`,
|
|
492
|
+
this._aiConfigTracker.getTrackData()
|
|
493
|
+
);
|
|
494
|
+
return results;
|
|
495
|
+
}
|
|
496
|
+
results[evaluationMetricKey] = {
|
|
497
|
+
score: evalData.score,
|
|
498
|
+
reasoning: evalData.reasoning
|
|
499
|
+
};
|
|
477
500
|
return results;
|
|
478
501
|
}
|
|
479
502
|
};
|
|
@@ -675,7 +698,7 @@ function createVercelAISDKTokenUsage(data) {
|
|
|
675
698
|
|
|
676
699
|
// src/sdkInfo.ts
|
|
677
700
|
var aiSdkName = "@launchdarkly/server-sdk-ai";
|
|
678
|
-
var aiSdkVersion = "0.
|
|
701
|
+
var aiSdkVersion = "0.16.0";
|
|
679
702
|
|
|
680
703
|
// src/LDAIConfigTrackerImpl.ts
|
|
681
704
|
var LDAIConfigTrackerImpl = class {
|