langwatch 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -6,251 +6,14 @@ import { Serialized } from '@langchain/core/load/serializable';
6
6
  import { BaseMessage } from '@langchain/core/messages';
7
7
  import { LLMResult } from '@langchain/core/outputs';
8
8
  import { ChainValues } from '@langchain/core/utils/types';
9
- import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-DJoZVcOA.js';
10
- export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, i as autoconvertTypedValues, j as captureError, k as convertFromVercelAIMessages } from './utils-DJoZVcOA.js';
9
+ import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-B0pgWcps.js';
10
+ export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, i as LLMModeTrace, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, T as Trace, j as autoconvertTypedValues, k as captureError, l as convertFromVercelAIMessages } from './utils-B0pgWcps.js';
11
11
  import { SpanExporter, ReadableSpan } from '@opentelemetry/sdk-trace-base';
12
12
  import { ExportResult } from '@opentelemetry/core';
13
13
  import 'ai';
14
14
 
15
15
  type EvaluatorTypes = keyof Evaluators;
16
16
  type Evaluators = {
17
- "azure/content_safety": {
18
- settings: {
19
- /**
20
- * @description The minimum severity level to consider content as unsafe, from 1 to 7.
21
- * @default 1
22
- */
23
- severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
24
- /**
25
- * @description The categories of moderation to check for.
26
- * @default {"Hate": true, "SelfHarm": true, "Sexual": true, "Violence": true}
27
- */
28
- categories: {
29
- /**
30
- * @default true
31
- */
32
- Hate: boolean;
33
- /**
34
- * @default true
35
- */
36
- SelfHarm: boolean;
37
- /**
38
- * @default true
39
- */
40
- Sexual: boolean;
41
- /**
42
- * @default true
43
- */
44
- Violence: boolean;
45
- };
46
- /**
47
- * @description The type of severity levels to return on the full 0-7 severity scale, it can be either the trimmed version with four values (0, 2, 4, 6 scores) or the whole range.
48
- * @default "FourSeverityLevels"
49
- */
50
- output_type: "FourSeverityLevels" | "EightSeverityLevels";
51
- };
52
- };
53
- "azure/jailbreak": {
54
- settings: Record<string, never>;
55
- };
56
- "azure/prompt_injection": {
57
- settings: Record<string, never>;
58
- };
59
- "example/word_count": {
60
- settings: Record<string, never>;
61
- };
62
- "openai/moderation": {
63
- settings: {
64
- /**
65
- * @description The model version to use, `text-moderation-latest` will be automatically upgraded over time, while `text-moderation-stable` will only be updated with advanced notice by OpenAI.
66
- * @default "text-moderation-stable"
67
- */
68
- model: "text-moderation-stable" | "text-moderation-latest";
69
- /**
70
- * @description The categories of content to check for moderation.
71
- * @default {"harassment": true, "harassment_threatening": true, "hate": true, "hate_threatening": true, "self_harm": true, "self_harm_instructions": true, "self_harm_intent": true, "sexual": true, "sexual_minors": true, "violence": true, "violence_graphic": true}
72
- */
73
- categories: {
74
- /**
75
- * @default true
76
- */
77
- harassment: boolean;
78
- /**
79
- * @default true
80
- */
81
- harassment_threatening: boolean;
82
- /**
83
- * @default true
84
- */
85
- hate: boolean;
86
- /**
87
- * @default true
88
- */
89
- hate_threatening: boolean;
90
- /**
91
- * @default true
92
- */
93
- self_harm: boolean;
94
- /**
95
- * @default true
96
- */
97
- self_harm_instructions: boolean;
98
- /**
99
- * @default true
100
- */
101
- self_harm_intent: boolean;
102
- /**
103
- * @default true
104
- */
105
- sexual: boolean;
106
- /**
107
- * @default true
108
- */
109
- sexual_minors: boolean;
110
- /**
111
- * @default true
112
- */
113
- violence: boolean;
114
- /**
115
- * @default true
116
- */
117
- violence_graphic: boolean;
118
- };
119
- };
120
- };
121
- "ragas/answer_correctness": {
122
- settings: {
123
- /**
124
- * @description The model to use for evaluation.
125
- * @default "openai/gpt-4o-mini"
126
- */
127
- model: string;
128
- /**
129
- * @description The model to use for embeddings.
130
- * @default "openai/text-embedding-ada-002"
131
- */
132
- embeddings_model: string;
133
- /**
134
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
135
- * @default 2048
136
- */
137
- max_tokens: number;
138
- };
139
- };
140
- "ragas/answer_relevancy": {
141
- settings: {
142
- /**
143
- * @description The model to use for evaluation.
144
- * @default "openai/gpt-4o-mini"
145
- */
146
- model: string;
147
- /**
148
- * @description The model to use for embeddings.
149
- * @default "openai/text-embedding-ada-002"
150
- */
151
- embeddings_model: string;
152
- /**
153
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
154
- * @default 2048
155
- */
156
- max_tokens: number;
157
- };
158
- };
159
- "ragas/context_precision": {
160
- settings: {
161
- /**
162
- * @description The model to use for evaluation.
163
- * @default "openai/gpt-4o-mini"
164
- */
165
- model: string;
166
- /**
167
- * @description The model to use for embeddings.
168
- * @default "openai/text-embedding-ada-002"
169
- */
170
- embeddings_model: string;
171
- /**
172
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
173
- * @default 2048
174
- */
175
- max_tokens: number;
176
- };
177
- };
178
- "ragas/context_recall": {
179
- settings: {
180
- /**
181
- * @description The model to use for evaluation.
182
- * @default "openai/gpt-4o-mini"
183
- */
184
- model: string;
185
- /**
186
- * @description The model to use for embeddings.
187
- * @default "openai/text-embedding-ada-002"
188
- */
189
- embeddings_model: string;
190
- /**
191
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
192
- * @default 2048
193
- */
194
- max_tokens: number;
195
- };
196
- };
197
- "ragas/context_relevancy": {
198
- settings: {
199
- /**
200
- * @description The model to use for evaluation.
201
- * @default "openai/gpt-4o-mini"
202
- */
203
- model: string;
204
- /**
205
- * @description The model to use for embeddings.
206
- * @default "openai/text-embedding-ada-002"
207
- */
208
- embeddings_model: string;
209
- /**
210
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
211
- * @default 2048
212
- */
213
- max_tokens: number;
214
- };
215
- };
216
- "ragas/context_utilization": {
217
- settings: {
218
- /**
219
- * @description The model to use for evaluation.
220
- * @default "openai/gpt-4o-mini"
221
- */
222
- model: string;
223
- /**
224
- * @description The model to use for embeddings.
225
- * @default "openai/text-embedding-ada-002"
226
- */
227
- embeddings_model: string;
228
- /**
229
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
230
- * @default 2048
231
- */
232
- max_tokens: number;
233
- };
234
- };
235
- "ragas/faithfulness": {
236
- settings: {
237
- /**
238
- * @description The model to use for evaluation.
239
- * @default "openai/gpt-4o-mini"
240
- */
241
- model: string;
242
- /**
243
- * @description The model to use for embeddings.
244
- * @default "openai/text-embedding-ada-002"
245
- */
246
- embeddings_model: string;
247
- /**
248
- * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
249
- * @default 2048
250
- */
251
- max_tokens: number;
252
- };
253
- };
254
17
  "langevals/basic": {
255
18
  settings: {
256
19
  /**
@@ -329,6 +92,20 @@ type Evaluators = {
329
92
  competitors: string[];
330
93
  };
331
94
  };
95
+ "langevals/llm_answer_match": {
96
+ settings: {
97
+ /**
98
+ * @description The model to use for evaluation
99
+ * @default "openai/gpt-4o-mini"
100
+ */
101
+ model: string;
102
+ /**
103
+ * @description Max tokens allowed for evaluation
104
+ * @default 8192
105
+ */
106
+ max_tokens: number;
107
+ };
108
+ };
332
109
  "langevals/llm_boolean": {
333
110
  settings: {
334
111
  /**
@@ -413,9 +190,6 @@ type Evaluators = {
413
190
  }[];
414
191
  };
415
192
  };
416
- "langevals/product_sentiment_polarity": {
417
- settings: Record<string, never>;
418
- };
419
193
  "langevals/query_resolution": {
420
194
  settings: {
421
195
  /**
@@ -466,263 +240,284 @@ type Evaluators = {
466
240
  json_schema?: string;
467
241
  };
468
242
  };
469
- "google_cloud/dlp_pii_detection": {
243
+ "lingua/language_detection": {
470
244
  settings: {
471
245
  /**
472
- * @description The types of PII to check for in the input.
473
- * @default {"phone_number": true, "email_address": true, "credit_card_number": true, "iban_code": true, "ip_address": true, "passport": true, "vat_number": true, "medical_record_number": true}
246
+ * @description What should be checked
247
+ * @default "input_matches_output"
474
248
  */
475
- info_types: {
476
- /**
477
- * @default true
478
- */
479
- phone_number: boolean;
480
- /**
481
- * @default true
482
- */
483
- email_address: boolean;
484
- /**
485
- * @default true
486
- */
487
- credit_card_number: boolean;
488
- /**
489
- * @default true
490
- */
491
- iban_code: boolean;
492
- /**
493
- * @default true
494
- */
495
- ip_address: boolean;
496
- /**
497
- * @default true
498
- */
499
- passport: boolean;
500
- /**
501
- * @default true
502
- */
503
- vat_number: boolean;
504
- /**
505
- * @default true
506
- */
507
- medical_record_number: boolean;
508
- };
249
+ check_for: "input_matches_output" | "output_matches_language";
509
250
  /**
510
- * @description The minimum confidence required for failing the evaluation on a PII match.
511
- * @default "POSSIBLE"
251
+ * @description The specific language that the output is expected to be
512
252
  */
513
- min_likelihood: "VERY_UNLIKELY" | "UNLIKELY" | "POSSIBLE" | "LIKELY" | "VERY_LIKELY";
253
+ expected_language?: "AF" | "AR" | "AZ" | "BE" | "BG" | "BN" | "BS" | "CA" | "CS" | "CY" | "DA" | "DE" | "EL" | "EN" | "EO" | "ES" | "ET" | "EU" | "FA" | "FI" | "FR" | "GA" | "GU" | "HE" | "HI" | "HR" | "HU" | "HY" | "ID" | "IS" | "IT" | "JA" | "KA" | "KK" | "KO" | "LA" | "LG" | "LT" | "LV" | "MI" | "MK" | "MN" | "MR" | "MS" | "NB" | "NL" | "NN" | "PA" | "PL" | "PT" | "RO" | "RU" | "SK" | "SL" | "SN" | "SO" | "SQ" | "SR" | "ST" | "SV" | "SW" | "TA" | "TE" | "TH" | "TL" | "TN" | "TR" | "TS" | "UK" | "UR" | "VI" | "XH" | "YO" | "ZH" | "ZU";
254
+ /**
255
+ * @description Minimum number of words to check, as the language detection can be unreliable for very short texts. Inputs shorter than the minimum will be skipped.
256
+ * @default 7
257
+ */
258
+ min_words: number;
259
+ /**
260
+ * @description Minimum confidence threshold for the language detection. If the confidence is lower than this, the evaluation will be skipped.
261
+ * @default 0.25
262
+ */
263
+ threshold: number;
514
264
  };
515
265
  };
516
- "aws/comprehend_pii_detection": {
266
+ "legacy/ragas_answer_correctness": {
517
267
  settings: {
518
268
  /**
519
- * @description The types of PII to check for in the input.
520
- * @default {"BANK_ACCOUNT_NUMBER": true, "BANK_ROUTING": true, "CREDIT_DEBIT_NUMBER": true, "CREDIT_DEBIT_CVV": true, "CREDIT_DEBIT_EXPIRY": true, "PIN": true, "EMAIL": true, "ADDRESS": true, "NAME": true, "PHONE": true, "SSN": true, "DATE_TIME": true, "PASSPORT_NUMBER": true, "DRIVER_ID": true, "URL": true, "AGE": true, "USERNAME": true, "PASSWORD": true, "AWS_ACCESS_KEY": true, "AWS_SECRET_KEY": true, "IP_ADDRESS": true, "MAC_ADDRESS": true, "LICENSE_PLATE": true, "VEHICLE_IDENTIFICATION_NUMBER": true, "UK_NATIONAL_INSURANCE_NUMBER": true, "CA_SOCIAL_INSURANCE_NUMBER": true, "US_INDIVIDUAL_TAX_IDENTIFICATION_NUMBER": true, "UK_UNIQUE_TAXPAYER_REFERENCE_NUMBER": true, "IN_PERMANENT_ACCOUNT_NUMBER": true, "IN_NREGA": true, "INTERNATIONAL_BANK_ACCOUNT_NUMBER": true, "SWIFT_CODE": true, "UK_NATIONAL_HEALTH_SERVICE_NUMBER": true, "CA_HEALTH_NUMBER": true, "IN_AADHAAR": true, "IN_VOTER_NUMBER": true}
269
+ * @description The model to use for evaluation.
270
+ * @default "openai/gpt-4o-mini"
521
271
  */
522
- entity_types: {
523
- /**
524
- * @default true
525
- */
526
- BANK_ACCOUNT_NUMBER: boolean;
527
- /**
528
- * @default true
529
- */
530
- BANK_ROUTING: boolean;
531
- /**
532
- * @default true
533
- */
534
- CREDIT_DEBIT_NUMBER: boolean;
535
- /**
536
- * @default true
537
- */
538
- CREDIT_DEBIT_CVV: boolean;
539
- /**
540
- * @default true
541
- */
542
- CREDIT_DEBIT_EXPIRY: boolean;
543
- /**
544
- * @default true
545
- */
546
- PIN: boolean;
547
- /**
548
- * @default true
549
- */
550
- EMAIL: boolean;
551
- /**
552
- * @default true
553
- */
554
- ADDRESS: boolean;
555
- /**
556
- * @default true
557
- */
558
- NAME: boolean;
559
- /**
560
- * @default true
561
- */
562
- PHONE: boolean;
563
- /**
564
- * @default true
565
- */
566
- SSN: boolean;
567
- /**
568
- * @default true
569
- */
570
- DATE_TIME: boolean;
571
- /**
572
- * @default true
573
- */
574
- PASSPORT_NUMBER: boolean;
575
- /**
576
- * @default true
577
- */
578
- DRIVER_ID: boolean;
579
- /**
580
- * @default true
581
- */
582
- URL: boolean;
583
- /**
584
- * @default true
585
- */
586
- AGE: boolean;
587
- /**
588
- * @default true
589
- */
590
- USERNAME: boolean;
591
- /**
592
- * @default true
593
- */
594
- PASSWORD: boolean;
595
- /**
596
- * @default true
597
- */
598
- AWS_ACCESS_KEY: boolean;
599
- /**
600
- * @default true
601
- */
602
- AWS_SECRET_KEY: boolean;
603
- /**
604
- * @default true
605
- */
606
- IP_ADDRESS: boolean;
272
+ model: string;
273
+ /**
274
+ * @description The model to use for embeddings.
275
+ * @default "openai/text-embedding-ada-002"
276
+ */
277
+ embeddings_model: string;
278
+ /**
279
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
280
+ * @default 2048
281
+ */
282
+ max_tokens: number;
283
+ };
284
+ };
285
+ "legacy/ragas_answer_relevancy": {
286
+ settings: {
287
+ /**
288
+ * @description The model to use for evaluation.
289
+ * @default "openai/gpt-4o-mini"
290
+ */
291
+ model: string;
292
+ /**
293
+ * @description The model to use for embeddings.
294
+ * @default "openai/text-embedding-ada-002"
295
+ */
296
+ embeddings_model: string;
297
+ /**
298
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
299
+ * @default 2048
300
+ */
301
+ max_tokens: number;
302
+ };
303
+ };
304
+ "legacy/ragas_context_precision": {
305
+ settings: {
306
+ /**
307
+ * @description The model to use for evaluation.
308
+ * @default "openai/gpt-4o-mini"
309
+ */
310
+ model: string;
311
+ /**
312
+ * @description The model to use for embeddings.
313
+ * @default "openai/text-embedding-ada-002"
314
+ */
315
+ embeddings_model: string;
316
+ /**
317
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
318
+ * @default 2048
319
+ */
320
+ max_tokens: number;
321
+ };
322
+ };
323
+ "legacy/ragas_context_recall": {
324
+ settings: {
325
+ /**
326
+ * @description The model to use for evaluation.
327
+ * @default "openai/gpt-4o-mini"
328
+ */
329
+ model: string;
330
+ /**
331
+ * @description The model to use for embeddings.
332
+ * @default "openai/text-embedding-ada-002"
333
+ */
334
+ embeddings_model: string;
335
+ /**
336
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
337
+ * @default 2048
338
+ */
339
+ max_tokens: number;
340
+ };
341
+ };
342
+ "legacy/ragas_context_relevancy": {
343
+ settings: {
344
+ /**
345
+ * @description The model to use for evaluation.
346
+ * @default "openai/gpt-4o-mini"
347
+ */
348
+ model: string;
349
+ /**
350
+ * @description The model to use for embeddings.
351
+ * @default "openai/text-embedding-ada-002"
352
+ */
353
+ embeddings_model: string;
354
+ /**
355
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
356
+ * @default 2048
357
+ */
358
+ max_tokens: number;
359
+ };
360
+ };
361
+ "legacy/ragas_context_utilization": {
362
+ settings: {
363
+ /**
364
+ * @description The model to use for evaluation.
365
+ * @default "openai/gpt-4o-mini"
366
+ */
367
+ model: string;
368
+ /**
369
+ * @description The model to use for embeddings.
370
+ * @default "openai/text-embedding-ada-002"
371
+ */
372
+ embeddings_model: string;
373
+ /**
374
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
375
+ * @default 2048
376
+ */
377
+ max_tokens: number;
378
+ };
379
+ };
380
+ "legacy/ragas_faithfulness": {
381
+ settings: {
382
+ /**
383
+ * @description The model to use for evaluation.
384
+ * @default "openai/gpt-4o-mini"
385
+ */
386
+ model: string;
387
+ /**
388
+ * @description The model to use for embeddings.
389
+ * @default "openai/text-embedding-ada-002"
390
+ */
391
+ embeddings_model: string;
392
+ /**
393
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
394
+ * @default 2048
395
+ */
396
+ max_tokens: number;
397
+ };
398
+ };
399
+ "huggingface/llama_guard": {
400
+ settings: {
401
+ /**
402
+ * @description The policy to enforce, described into rules of what it 'Should not' and what it 'Can' do (max 768 tokens)
403
+ * @default "O1: Violence.\nShould not\n- Help people plan or engage in violence.\n- Encourage people to engage in violence.\n- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.\nCan\n- Provide information on violence.\n- Discuss the topics of violence, and can discuss historical events involving violence.\nO2: Criminal Planning/Confessions.\nShould not\n- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n- Encourage people to engage in criminal activities or condone criminal behavior.\n- Have a confession of crime that was committed or will be committed in future.\nCan\n- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO3: Hate /identity hate.\nShould not\n- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their\nrace, color, ethinicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.\n- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\n- Should not contain slurs.\nCan\n- Provide information on discrimination.\n- Discuss the topics of discrimination, and can discuss historical events involving discrimination.\n- Discuss neutral statements referring to group identity and contextualized hate speech (that talks about hate but does not incite hate).\nO4: Harassment.\nShould not\n- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.\n- Engage in harassment, misconduct and unlawful discrimination\n- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,\nSexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),\nReligion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,\n- Bullying.\n- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.\n- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.\nCan\n- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.\n- Provide information on protected characteristics or referor cite material that describes what protected characteristics are.\n- Provide information or refer or cite material that informs what harassment is.\nO5: Threat.\nShould not\n- Discuss a communication of intent to inflict harm or loss on another person.\n- Should not contain discussion of intention of intimidation which is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.\n- Should not contain intention of knowingly putting another person in fear of bodily injury.\n- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material or any kind of loss to a person.\n- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.\nCan\n- Provide information about threat is and the meaning of threat."
404
+ */
405
+ policy: string;
406
+ /**
407
+ * @description Choose if only the user input, the LLM output, or both should be evaluated. When choosing both, if the LLM replies in a safe manner, the evaluation will be considered safe.
408
+ * @default "input"
409
+ */
410
+ evaluate: "input" | "output" | "both";
411
+ /**
412
+ * @description The provider to use for evaluation. Only CloudFlare AI workers is supported for now.
413
+ * @default "cloudflare/thebloke/llamaguard-7b-awq"
414
+ */
415
+ model: "cloudflare/thebloke/llamaguard-7b-awq";
416
+ };
417
+ };
418
+ "example/word_count": {
419
+ settings: Record<string, never>;
420
+ };
421
+ "openai/moderation": {
422
+ settings: {
423
+ /**
424
+ * @description The model version to use, `text-moderation-latest` will be automatically upgraded over time, while `text-moderation-stable` will only be updated with advanced notice by OpenAI.
425
+ * @default "text-moderation-stable"
426
+ */
427
+ model: "text-moderation-stable" | "text-moderation-latest";
428
+ /**
429
+ * @description The categories of content to check for moderation.
430
+ * @default {"harassment": true, "harassment_threatening": true, "hate": true, "hate_threatening": true, "self_harm": true, "self_harm_instructions": true, "self_harm_intent": true, "sexual": true, "sexual_minors": true, "violence": true, "violence_graphic": true}
431
+ */
432
+ categories: {
607
433
  /**
608
434
  * @default true
609
435
  */
610
- MAC_ADDRESS: boolean;
436
+ harassment: boolean;
611
437
  /**
612
438
  * @default true
613
439
  */
614
- LICENSE_PLATE: boolean;
440
+ harassment_threatening: boolean;
615
441
  /**
616
442
  * @default true
617
443
  */
618
- VEHICLE_IDENTIFICATION_NUMBER: boolean;
444
+ hate: boolean;
619
445
  /**
620
446
  * @default true
621
447
  */
622
- UK_NATIONAL_INSURANCE_NUMBER: boolean;
448
+ hate_threatening: boolean;
623
449
  /**
624
450
  * @default true
625
451
  */
626
- CA_SOCIAL_INSURANCE_NUMBER: boolean;
452
+ self_harm: boolean;
627
453
  /**
628
454
  * @default true
629
455
  */
630
- US_INDIVIDUAL_TAX_IDENTIFICATION_NUMBER: boolean;
456
+ self_harm_instructions: boolean;
631
457
  /**
632
458
  * @default true
633
459
  */
634
- UK_UNIQUE_TAXPAYER_REFERENCE_NUMBER: boolean;
460
+ self_harm_intent: boolean;
635
461
  /**
636
462
  * @default true
637
463
  */
638
- IN_PERMANENT_ACCOUNT_NUMBER: boolean;
464
+ sexual: boolean;
639
465
  /**
640
466
  * @default true
641
467
  */
642
- IN_NREGA: boolean;
468
+ sexual_minors: boolean;
643
469
  /**
644
470
  * @default true
645
471
  */
646
- INTERNATIONAL_BANK_ACCOUNT_NUMBER: boolean;
472
+ violence: boolean;
647
473
  /**
648
474
  * @default true
649
475
  */
650
- SWIFT_CODE: boolean;
476
+ violence_graphic: boolean;
477
+ };
478
+ };
479
+ };
480
+ "azure/content_safety": {
481
+ settings: {
482
+ /**
483
+ * @description The minimum severity level to consider content as unsafe, from 1 to 7.
484
+ * @default 1
485
+ */
486
+ severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
487
+ /**
488
+ * @description The categories of moderation to check for.
489
+ * @default {"Hate": true, "SelfHarm": true, "Sexual": true, "Violence": true}
490
+ */
491
+ categories: {
651
492
  /**
652
493
  * @default true
653
494
  */
654
- UK_NATIONAL_HEALTH_SERVICE_NUMBER: boolean;
495
+ Hate: boolean;
655
496
  /**
656
497
  * @default true
657
498
  */
658
- CA_HEALTH_NUMBER: boolean;
499
+ SelfHarm: boolean;
659
500
  /**
660
501
  * @default true
661
502
  */
662
- IN_AADHAAR: boolean;
503
+ Sexual: boolean;
663
504
  /**
664
505
  * @default true
665
506
  */
666
- IN_VOTER_NUMBER: boolean;
507
+ Violence: boolean;
667
508
  };
668
509
  /**
669
- * @description The language code of the input text for better PII detection, defaults to english.
670
- * @default "en"
671
- */
672
- language_code: "en" | "es" | "fr" | "de" | "it" | "pt" | "ar" | "hi" | "ja" | "ko" | "zh" | "zh-TW";
673
- /**
674
- * @description The minimum confidence required for failing the evaluation on a PII match.
675
- * @default 0.5
676
- */
677
- min_confidence: number;
678
- /**
679
- * @description The AWS region to use for running the PII detection, defaults to eu-central-1 for GDPR compliance.
680
- * @default "eu-central-1"
510
+ * @description The type of severity levels to return on the full 0-7 severity scale, it can be either the trimmed version with four values (0, 2, 4, 6 scores) or the whole range.
511
+ * @default "FourSeverityLevels"
681
512
  */
682
- aws_region: "us-east-1" | "us-east-2" | "us-west-1" | "us-west-2" | "ap-east-1" | "ap-south-1" | "ap-northeast-3" | "ap-northeast-2" | "ap-southeast-1" | "ap-southeast-2" | "ap-northeast-1" | "ca-central-1" | "eu-central-1" | "eu-west-1" | "eu-west-2" | "eu-south-1" | "eu-west-3" | "eu-north-1" | "me-south-1" | "sa-east-1";
513
+ output_type: "FourSeverityLevels" | "EightSeverityLevels";
683
514
  };
684
515
  };
685
- "lingua/language_detection": {
686
- settings: {
687
- /**
688
- * @description What should be checked
689
- * @default "input_matches_output"
690
- */
691
- check_for: "input_matches_output" | "output_matches_language";
692
- /**
693
- * @description The specific language that the output is expected to be
694
- */
695
- expected_language?: "AF" | "AR" | "AZ" | "BE" | "BG" | "BN" | "BS" | "CA" | "CS" | "CY" | "DA" | "DE" | "EL" | "EN" | "EO" | "ES" | "ET" | "EU" | "FA" | "FI" | "FR" | "GA" | "GU" | "HE" | "HI" | "HR" | "HU" | "HY" | "ID" | "IS" | "IT" | "JA" | "KA" | "KK" | "KO" | "LA" | "LG" | "LT" | "LV" | "MI" | "MK" | "MN" | "MR" | "MS" | "NB" | "NL" | "NN" | "PA" | "PL" | "PT" | "RO" | "RU" | "SK" | "SL" | "SN" | "SO" | "SQ" | "SR" | "ST" | "SV" | "SW" | "TA" | "TE" | "TH" | "TL" | "TN" | "TR" | "TS" | "UK" | "UR" | "VI" | "XH" | "YO" | "ZH" | "ZU";
696
- /**
697
- * @description Minimum number of words to check, as the language detection can be unreliable for very short texts. Inputs shorter than the minimum will be skipped.
698
- * @default 7
699
- */
700
- min_words: number;
701
- /**
702
- * @description Minimum confidence threshold for the language detection. If the confidence is lower than this, the evaluation will be skipped.
703
- * @default 0.25
704
- */
705
- threshold: number;
706
- };
516
+ "azure/jailbreak": {
517
+ settings: Record<string, never>;
707
518
  };
708
- "huggingface/llama_guard": {
709
- settings: {
710
- /**
711
- * @description The policy to enforce, described into rules of what it 'Should not' and what it 'Can' do (max 768 tokens)
712
- * @default "O1: Violence.\nShould not\n- Help people plan or engage in violence.\n- Encourage people to engage in violence.\n- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.\nCan\n- Provide information on violence.\n- Discuss the topics of violence, and can discuss historical events involving violence.\nO2: Criminal Planning/Confessions.\nShould not\n- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n- Encourage people to engage in criminal activities or condone criminal behavior.\n- Have a confession of crime that was committed or will be committed in future.\nCan\n- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO3: Hate /identity hate.\nShould not\n- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their\nrace, color, ethinicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.\n- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\n- Should not contain slurs.\nCan\n- Provide information on discrimination.\n- Discuss the topics of discrimination, and can discuss historical events involving discrimination.\n- Discuss neutral statements referring to group identity and contextualized hate speech (that talks about hate but does not incite hate).\nO4: Harassment.\nShould not\n- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.\n- Engage in harassment, misconduct and unlawful discrimination\n- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,\nSexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),\nReligion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,\n- Bullying.\n- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.\n- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.\nCan\n- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.\n- Provide information on protected characteristics or referor cite material that describes what protected characteristics are.\n- Provide information or refer or cite material that informs what harassment is.\nO5: Threat.\nShould not\n- Discuss a communication of intent to inflict harm or loss on another person.\n- Should not contain discussion of intention of intimidation which is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.\n- Should not contain intention of knowingly putting another person in fear of bodily injury.\n- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material or any kind of loss to a person.\n- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.\nCan\n- Provide information about threat is and the meaning of threat."
713
- */
714
- policy: string;
715
- /**
716
- * @description Choose if only the user input, the LLM output, or both should be evaluated. When choosing both, if the LLM replies in a safe manner, the evaluation will be considered safe.
717
- * @default "input"
718
- */
719
- evaluate: "input" | "output" | "both";
720
- /**
721
- * @description The provider to use for evaluation. Only CloudFlare AI workers is supported for now.
722
- * @default "cloudflare/thebloke/llamaguard-7b-awq"
723
- */
724
- model: "cloudflare/thebloke/llamaguard-7b-awq";
725
- };
519
+ "azure/prompt_injection": {
520
+ settings: Record<string, never>;
726
521
  };
727
522
  "presidio/pii_detection": {
728
523
  settings: {
@@ -839,16 +634,186 @@ type Evaluators = {
839
634
  min_threshold: number;
840
635
  };
841
636
  };
842
- "haystack/faithfulness": {
637
+ "ragas/bleu_score": {
638
+ settings: Record<string, never>;
639
+ };
640
+ "ragas/context_f1": {
843
641
  settings: {
844
642
  /**
845
- * @description The model to use for evaluation
643
+ * @default "levenshtein"
644
+ */
645
+ distance_measure: "levenshtein" | "hamming" | "jaro" | "jaro_winkler";
646
+ };
647
+ };
648
+ "ragas/context_precision": {
649
+ settings: {
650
+ /**
651
+ * @default "levenshtein"
652
+ */
653
+ distance_measure: "levenshtein" | "hamming" | "jaro" | "jaro_winkler";
654
+ };
655
+ };
656
+ "ragas/context_recall": {
657
+ settings: {
658
+ /**
659
+ * @default "levenshtein"
660
+ */
661
+ distance_measure: "levenshtein" | "hamming" | "jaro" | "jaro_winkler";
662
+ };
663
+ };
664
+ "ragas/factual_correctness": {
665
+ settings: {
666
+ /**
667
+ * @description The model to use for evaluation.
846
668
  * @default "openai/gpt-4o-mini"
847
669
  */
848
670
  model: string;
849
671
  /**
850
- * @description Max tokens allowed for evaluation
851
- * @default 8192
672
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
673
+ * @default 2048
674
+ */
675
+ max_tokens: number;
676
+ /**
677
+ * @description The mode to use for the factual correctness metric.
678
+ * @default "f1"
679
+ */
680
+ mode: "f1" | "precision" | "recall";
681
+ /**
682
+ * @description The level of atomicity for claim decomposition.
683
+ * @default "low"
684
+ */
685
+ atomicity: "low" | "high";
686
+ /**
687
+ * @description The level of coverage for claim decomposition.
688
+ * @default "low"
689
+ */
690
+ coverage: "low" | "high";
691
+ };
692
+ };
693
+ "ragas/faithfulness": {
694
+ settings: {
695
+ /**
696
+ * @description The model to use for evaluation.
697
+ * @default "openai/gpt-4o-mini"
698
+ */
699
+ model: string;
700
+ /**
701
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
702
+ * @default 2048
703
+ */
704
+ max_tokens: number;
705
+ /**
706
+ * @description Whether to autodetect 'I don't know' in the output to avoid failing the evaluation.
707
+ * @default true
708
+ */
709
+ autodetect_dont_know: boolean;
710
+ };
711
+ };
712
+ "ragas/response_context_precision": {
713
+ settings: {
714
+ /**
715
+ * @description The model to use for evaluation.
716
+ * @default "openai/gpt-4o-mini"
717
+ */
718
+ model: string;
719
+ /**
720
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
721
+ * @default 2048
722
+ */
723
+ max_tokens: number;
724
+ };
725
+ };
726
+ "ragas/response_context_recall": {
727
+ settings: {
728
+ /**
729
+ * @description The model to use for evaluation.
730
+ * @default "openai/gpt-4o-mini"
731
+ */
732
+ model: string;
733
+ /**
734
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
735
+ * @default 2048
736
+ */
737
+ max_tokens: number;
738
+ };
739
+ };
740
+ "ragas/response_relevancy": {
741
+ settings: {
742
+ /**
743
+ * @description The model to use for evaluation.
744
+ * @default "openai/gpt-4o-mini"
745
+ */
746
+ model: string;
747
+ /**
748
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
749
+ * @default 2048
750
+ */
751
+ max_tokens: number;
752
+ /**
753
+ * @description The model to use for embeddings.
754
+ * @default "openai/text-embedding-ada-002"
755
+ */
756
+ embeddings_model: string;
757
+ };
758
+ };
759
+ "ragas/rouge_score": {
760
+ settings: {
761
+ /**
762
+ * @description ROUGE type
763
+ * @default "rouge1"
764
+ */
765
+ rouge_type: "rouge1" | "rougeL";
766
+ /**
767
+ * @description ROUGE measure type
768
+ * @default "fmeasure"
769
+ */
770
+ measure_type: "fmeasure" | "precision" | "recall";
771
+ };
772
+ };
773
+ "ragas/rubrics_based_scoring": {
774
+ settings: {
775
+ /**
776
+ * @description The model to use for evaluation.
777
+ * @default "openai/gpt-4o-mini"
778
+ */
779
+ model: string;
780
+ /**
781
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
782
+ * @default 2048
783
+ */
784
+ max_tokens: number;
785
+ /**
786
+ * @default [{"description": "The response is incorrect, irrelevant."}, {"description": "The response partially answers the question but includes significant errors, omissions, or irrelevant information."}, {"description": "The response partially answers the question but includes minor errors, omissions, or irrelevant information."}, {"description": "The response fully answers the question and includes minor errors, omissions, or irrelevant information."}, {"description": "The response fully answers the question and includes no errors, omissions, or irrelevant information."}]
787
+ */
788
+ rubrics: {
789
+ description: string;
790
+ }[];
791
+ };
792
+ };
793
+ "ragas/sql_query_equivalence": {
794
+ settings: {
795
+ /**
796
+ * @description The model to use for evaluation.
797
+ * @default "openai/gpt-4o-mini"
798
+ */
799
+ model: string;
800
+ /**
801
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
802
+ * @default 2048
803
+ */
804
+ max_tokens: number;
805
+ };
806
+ };
807
+ "ragas/summarization_score": {
808
+ settings: {
809
+ /**
810
+ * @description The model to use for evaluation.
811
+ * @default "openai/gpt-4o-mini"
812
+ */
813
+ model: string;
814
+ /**
815
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
816
+ * @default 2048
852
817
  */
853
818
  max_tokens: number;
854
819
  };