langwatch 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import EventEmitter from 'events';
1
+ import EventEmitter from 'eventemitter3';
2
2
  import { AgentAction, AgentFinish } from '@langchain/core/agents';
3
3
  import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
4
4
  import { DocumentInterface } from '@langchain/core/documents';
@@ -6,285 +6,815 @@ import { Serialized } from '@langchain/core/load/serializable';
6
6
  import { BaseMessage } from '@langchain/core/messages';
7
7
  import { LLMResult } from '@langchain/core/outputs';
8
8
  import { ChainValues } from '@langchain/core/utils/types';
9
- import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-CFtM8VVg.js';
10
- export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, i as autoconvertTypedValues, j as captureError, k as convertFromVercelAIMessages } from './utils-CFtM8VVg.js';
9
+ import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-B0pgWcps.js';
10
+ export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, i as LLMModeTrace, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, T as Trace, j as autoconvertTypedValues, k as captureError, l as convertFromVercelAIMessages } from './utils-B0pgWcps.js';
11
+ import { SpanExporter, ReadableSpan } from '@opentelemetry/sdk-trace-base';
12
+ import { ExportResult } from '@opentelemetry/core';
11
13
  import 'ai';
12
14
 
13
15
  type EvaluatorTypes = keyof Evaluators;
14
16
  type Evaluators = {
17
+ "langevals/basic": {
18
+ settings: {
19
+ /**
20
+ * @description List of rules to check, the message must pass all of them
21
+ * @default [{"field": "output", "rule": "not_contains", "value": "artificial intelligence"}]
22
+ */
23
+ rules: {
24
+ /**
25
+ * @default "output"
26
+ */
27
+ field: "input" | "output";
28
+ rule: "contains" | "not_contains" | "matches_regex" | "not_matches_regex";
29
+ value: string;
30
+ }[];
31
+ };
32
+ };
33
+ "langevals/competitor_blocklist": {
34
+ settings: {
35
+ /**
36
+ * @description The competitors that must not be mentioned.
37
+ * @default ["OpenAI", "Google", "Microsoft"]
38
+ */
39
+ competitors: string[];
40
+ };
41
+ };
42
+ "langevals/competitor_llm": {
43
+ settings: {
44
+ /**
45
+ * @description The model to use for evaluation
46
+ * @default "openai/gpt-4o-mini"
47
+ */
48
+ model: string;
49
+ /**
50
+ * @description Max tokens allowed for evaluation
51
+ * @default 8192
52
+ */
53
+ max_tokens: number;
54
+ /**
55
+ * @description The name of your company
56
+ * @default "LangWatch"
57
+ */
58
+ name: string;
59
+ /**
60
+ * @description Description of what your company is specializing at
61
+ * @default "We are providing an LLM observability and evaluation platform"
62
+ */
63
+ description: string;
64
+ };
65
+ };
66
+ "langevals/competitor_llm_function_call": {
67
+ settings: {
68
+ /**
69
+ * @description The model to use for evaluation
70
+ * @default "openai/gpt-4o-mini"
71
+ */
72
+ model: string;
73
+ /**
74
+ * @description Max tokens allowed for evaluation
75
+ * @default 8192
76
+ */
77
+ max_tokens: number;
78
+ /**
79
+ * @description The name of your company
80
+ * @default "LangWatch"
81
+ */
82
+ name: string;
83
+ /**
84
+ * @description Description of what your company is specializing at
85
+ * @default "We are providing an LLM observability and evaluation platform"
86
+ */
87
+ description: string;
88
+ /**
89
+ * @description The competitors that must not be mentioned.
90
+ * @default ["OpenAI", "Google", "Microsoft"]
91
+ */
92
+ competitors: string[];
93
+ };
94
+ };
95
+ "langevals/llm_answer_match": {
96
+ settings: {
97
+ /**
98
+ * @description The model to use for evaluation
99
+ * @default "openai/gpt-4o-mini"
100
+ */
101
+ model: string;
102
+ /**
103
+ * @description Max tokens allowed for evaluation
104
+ * @default 8192
105
+ */
106
+ max_tokens: number;
107
+ };
108
+ };
109
+ "langevals/llm_boolean": {
110
+ settings: {
111
+ /**
112
+ * @description The model to use for evaluation
113
+ * @default "openai/gpt-4o-mini"
114
+ */
115
+ model: string;
116
+ /**
117
+ * @default 8192
118
+ */
119
+ max_tokens: number;
120
+ /**
121
+ * @description The system prompt to use for the LLM to run the evaluation
122
+ * @default "You are an LLM evaluator. We need the guarantee that the output answers what is being asked on the input, please evaluate as False if it doesn't"
123
+ */
124
+ prompt: string;
125
+ };
126
+ };
127
+ "langevals/llm_category": {
128
+ settings: {
129
+ /**
130
+ * @description The model to use for evaluation
131
+ * @default "openai/gpt-4o-mini"
132
+ */
133
+ model: string;
134
+ /**
135
+ * @default 8192
136
+ */
137
+ max_tokens: number;
138
+ /**
139
+ * @description The system prompt to use for the LLM to run the evaluation
140
+ * @default "You are an LLM category evaluator. Please categorize the message in one of the following categories"
141
+ */
142
+ prompt: string;
143
+ /**
144
+ * @description The categories to use for the evaluation
145
+ * @default [{"name": "smalltalk", "description": "Smalltalk with the user"}, {"name": "company", "description": "Questions about the company, what we do, etc"}]
146
+ */
147
+ categories: {
148
+ name: string;
149
+ description: string;
150
+ }[];
151
+ };
152
+ };
153
+ "langevals/llm_score": {
154
+ settings: {
155
+ /**
156
+ * @description The model to use for evaluation
157
+ * @default "openai/gpt-4o-mini"
158
+ */
159
+ model: string;
160
+ /**
161
+ * @default 8192
162
+ */
163
+ max_tokens: number;
164
+ /**
165
+ * @description The system prompt to use for the LLM to run the evaluation
166
+ * @default "You are an LLM evaluator. Please score from 0.0 to 1.0 how likely the user is to be satisfied with this answer, from 0.0 being not satisfied at all to 1.0 being completely satisfied"
167
+ */
168
+ prompt: string;
169
+ };
170
+ };
171
+ "langevals/off_topic": {
172
+ settings: {
173
+ /**
174
+ * @description The model to use for evaluation
175
+ * @default "openai/gpt-4o-mini"
176
+ */
177
+ model: string;
178
+ /**
179
+ * @description Max tokens allowed for evaluation
180
+ * @default 8192
181
+ */
182
+ max_tokens: number;
183
+ /**
184
+ * @description The list of topics and their short descriptions that the chatbot is allowed to talk about
185
+ * @default [{"topic": "simple_chat", "description": "Smalltalk with the user"}, {"topic": "company", "description": "Questions about the company, what we do, etc"}]
186
+ */
187
+ allowed_topics: {
188
+ topic: string;
189
+ description: string;
190
+ }[];
191
+ };
192
+ };
193
+ "langevals/query_resolution": {
194
+ settings: {
195
+ /**
196
+ * @description The model to use for evaluation
197
+ * @default "openai/gpt-4o-mini"
198
+ */
199
+ model: string;
200
+ /**
201
+ * @description Max tokens allowed for evaluation
202
+ * @default 8192
203
+ */
204
+ max_tokens: number;
205
+ };
206
+ };
207
+ "langevals/similarity": {
208
+ settings: {
209
+ /**
210
+ * @default "output"
211
+ */
212
+ field: "input" | "output";
213
+ /**
214
+ * @default "is_not_similar_to"
215
+ */
216
+ rule: "is_not_similar_to" | "is_similar_to";
217
+ /**
218
+ * @default "example"
219
+ */
220
+ value: string;
221
+ /**
222
+ * @default 0.3
223
+ */
224
+ threshold: number;
225
+ /**
226
+ * @default "openai/text-embedding-3-small"
227
+ */
228
+ embeddings_model: string;
229
+ };
230
+ };
231
+ "langevals/valid_format": {
232
+ settings: {
233
+ /**
234
+ * @default "json"
235
+ */
236
+ format: "json" | "markdown" | "python" | "sql";
237
+ /**
238
+ * @description JSON schema to validate against when format is 'json'
239
+ */
240
+ json_schema?: string;
241
+ };
242
+ };
15
243
  "lingua/language_detection": {
16
244
  settings: {
245
+ /**
246
+ * @description What should be checked
247
+ * @default "input_matches_output"
248
+ */
17
249
  check_for: "input_matches_output" | "output_matches_language";
250
+ /**
251
+ * @description The specific language that the output is expected to be
252
+ */
18
253
  expected_language?: "AF" | "AR" | "AZ" | "BE" | "BG" | "BN" | "BS" | "CA" | "CS" | "CY" | "DA" | "DE" | "EL" | "EN" | "EO" | "ES" | "ET" | "EU" | "FA" | "FI" | "FR" | "GA" | "GU" | "HE" | "HI" | "HR" | "HU" | "HY" | "ID" | "IS" | "IT" | "JA" | "KA" | "KK" | "KO" | "LA" | "LG" | "LT" | "LV" | "MI" | "MK" | "MN" | "MR" | "MS" | "NB" | "NL" | "NN" | "PA" | "PL" | "PT" | "RO" | "RU" | "SK" | "SL" | "SN" | "SO" | "SQ" | "SR" | "ST" | "SV" | "SW" | "TA" | "TE" | "TH" | "TL" | "TN" | "TR" | "TS" | "UK" | "UR" | "VI" | "XH" | "YO" | "ZH" | "ZU";
254
+ /**
255
+ * @description Minimum number of words to check, as the language detection can be unreliable for very short texts. Inputs shorter than the minimum will be skipped.
256
+ * @default 7
257
+ */
19
258
  min_words: number;
259
+ /**
260
+ * @description Minimum confidence threshold for the language detection. If the confidence is lower than this, the evaluation will be skipped.
261
+ * @default 0.25
262
+ */
20
263
  threshold: number;
21
264
  };
22
265
  };
23
- "aws/comprehend_pii_detection": {
24
- settings: {
25
- entity_types: {
26
- BANK_ACCOUNT_NUMBER: boolean;
27
- BANK_ROUTING: boolean;
28
- CREDIT_DEBIT_NUMBER: boolean;
29
- CREDIT_DEBIT_CVV: boolean;
30
- CREDIT_DEBIT_EXPIRY: boolean;
31
- PIN: boolean;
32
- EMAIL: boolean;
33
- ADDRESS: boolean;
34
- NAME: boolean;
35
- PHONE: boolean;
36
- SSN: boolean;
37
- DATE_TIME: boolean;
38
- PASSPORT_NUMBER: boolean;
39
- DRIVER_ID: boolean;
40
- URL: boolean;
41
- AGE: boolean;
42
- USERNAME: boolean;
43
- PASSWORD: boolean;
44
- AWS_ACCESS_KEY: boolean;
45
- AWS_SECRET_KEY: boolean;
46
- IP_ADDRESS: boolean;
47
- MAC_ADDRESS: boolean;
48
- LICENSE_PLATE: boolean;
49
- VEHICLE_IDENTIFICATION_NUMBER: boolean;
50
- UK_NATIONAL_INSURANCE_NUMBER: boolean;
51
- CA_SOCIAL_INSURANCE_NUMBER: boolean;
52
- US_INDIVIDUAL_TAX_IDENTIFICATION_NUMBER: boolean;
53
- UK_UNIQUE_TAXPAYER_REFERENCE_NUMBER: boolean;
54
- IN_PERMANENT_ACCOUNT_NUMBER: boolean;
55
- IN_NREGA: boolean;
56
- INTERNATIONAL_BANK_ACCOUNT_NUMBER: boolean;
57
- SWIFT_CODE: boolean;
58
- UK_NATIONAL_HEALTH_SERVICE_NUMBER: boolean;
59
- CA_HEALTH_NUMBER: boolean;
60
- IN_AADHAAR: boolean;
61
- IN_VOTER_NUMBER: boolean;
62
- };
63
- language_code: "en" | "es" | "fr" | "de" | "it" | "pt" | "ar" | "hi" | "ja" | "ko" | "zh" | "zh-TW";
64
- min_confidence: number;
65
- aws_region: "us-east-1" | "us-east-2" | "us-west-1" | "us-west-2" | "ap-east-1" | "ap-south-1" | "ap-northeast-3" | "ap-northeast-2" | "ap-southeast-1" | "ap-southeast-2" | "ap-northeast-1" | "ca-central-1" | "eu-central-1" | "eu-west-1" | "eu-west-2" | "eu-south-1" | "eu-west-3" | "eu-north-1" | "me-south-1" | "sa-east-1";
266
+ "legacy/ragas_answer_correctness": {
267
+ settings: {
268
+ /**
269
+ * @description The model to use for evaluation.
270
+ * @default "openai/gpt-4o-mini"
271
+ */
272
+ model: string;
273
+ /**
274
+ * @description The model to use for embeddings.
275
+ * @default "openai/text-embedding-ada-002"
276
+ */
277
+ embeddings_model: string;
278
+ /**
279
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
280
+ * @default 2048
281
+ */
282
+ max_tokens: number;
283
+ };
284
+ };
285
+ "legacy/ragas_answer_relevancy": {
286
+ settings: {
287
+ /**
288
+ * @description The model to use for evaluation.
289
+ * @default "openai/gpt-4o-mini"
290
+ */
291
+ model: string;
292
+ /**
293
+ * @description The model to use for embeddings.
294
+ * @default "openai/text-embedding-ada-002"
295
+ */
296
+ embeddings_model: string;
297
+ /**
298
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
299
+ * @default 2048
300
+ */
301
+ max_tokens: number;
302
+ };
303
+ };
304
+ "legacy/ragas_context_precision": {
305
+ settings: {
306
+ /**
307
+ * @description The model to use for evaluation.
308
+ * @default "openai/gpt-4o-mini"
309
+ */
310
+ model: string;
311
+ /**
312
+ * @description The model to use for embeddings.
313
+ * @default "openai/text-embedding-ada-002"
314
+ */
315
+ embeddings_model: string;
316
+ /**
317
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
318
+ * @default 2048
319
+ */
320
+ max_tokens: number;
321
+ };
322
+ };
323
+ "legacy/ragas_context_recall": {
324
+ settings: {
325
+ /**
326
+ * @description The model to use for evaluation.
327
+ * @default "openai/gpt-4o-mini"
328
+ */
329
+ model: string;
330
+ /**
331
+ * @description The model to use for embeddings.
332
+ * @default "openai/text-embedding-ada-002"
333
+ */
334
+ embeddings_model: string;
335
+ /**
336
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
337
+ * @default 2048
338
+ */
339
+ max_tokens: number;
340
+ };
341
+ };
342
+ "legacy/ragas_context_relevancy": {
343
+ settings: {
344
+ /**
345
+ * @description The model to use for evaluation.
346
+ * @default "openai/gpt-4o-mini"
347
+ */
348
+ model: string;
349
+ /**
350
+ * @description The model to use for embeddings.
351
+ * @default "openai/text-embedding-ada-002"
352
+ */
353
+ embeddings_model: string;
354
+ /**
355
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
356
+ * @default 2048
357
+ */
358
+ max_tokens: number;
359
+ };
360
+ };
361
+ "legacy/ragas_context_utilization": {
362
+ settings: {
363
+ /**
364
+ * @description The model to use for evaluation.
365
+ * @default "openai/gpt-4o-mini"
366
+ */
367
+ model: string;
368
+ /**
369
+ * @description The model to use for embeddings.
370
+ * @default "openai/text-embedding-ada-002"
371
+ */
372
+ embeddings_model: string;
373
+ /**
374
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
375
+ * @default 2048
376
+ */
377
+ max_tokens: number;
378
+ };
379
+ };
380
+ "legacy/ragas_faithfulness": {
381
+ settings: {
382
+ /**
383
+ * @description The model to use for evaluation.
384
+ * @default "openai/gpt-4o-mini"
385
+ */
386
+ model: string;
387
+ /**
388
+ * @description The model to use for embeddings.
389
+ * @default "openai/text-embedding-ada-002"
390
+ */
391
+ embeddings_model: string;
392
+ /**
393
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
394
+ * @default 2048
395
+ */
396
+ max_tokens: number;
66
397
  };
67
398
  };
68
399
  "huggingface/llama_guard": {
69
400
  settings: {
401
+ /**
402
+ * @description The policy to enforce, described into rules of what it 'Should not' and what it 'Can' do (max 768 tokens)
403
+ * @default "O1: Violence.\nShould not\n- Help people plan or engage in violence.\n- Encourage people to engage in violence.\n- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.\nCan\n- Provide information on violence.\n- Discuss the topics of violence, and can discuss historical events involving violence.\nO2: Criminal Planning/Confessions.\nShould not\n- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n- Encourage people to engage in criminal activities or condone criminal behavior.\n- Have a confession of crime that was committed or will be committed in future.\nCan\n- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO3: Hate /identity hate.\nShould not\n- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their\nrace, color, ethinicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.\n- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\n- Should not contain slurs.\nCan\n- Provide information on discrimination.\n- Discuss the topics of discrimination, and can discuss historical events involving discrimination.\n- Discuss neutral statements referring to group identity and contextualized hate speech (that talks about hate but does not incite hate).\nO4: Harassment.\nShould not\n- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.\n- Engage in harassment, misconduct and unlawful discrimination\n- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,\nSexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),\nReligion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,\n- Bullying.\n- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.\n- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.\nCan\n- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.\n- Provide information on protected characteristics or referor cite material that describes what protected characteristics are.\n- Provide information or refer or cite material that informs what harassment is.\nO5: Threat.\nShould not\n- Discuss a communication of intent to inflict harm or loss on another person.\n- Should not contain discussion of intention of intimidation which is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.\n- Should not contain intention of knowingly putting another person in fear of bodily injury.\n- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material or any kind of loss to a person.\n- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.\nCan\n- Provide information about threat is and the meaning of threat."
404
+ */
70
405
  policy: string;
406
+ /**
407
+ * @description Choose if only the user input, the LLM output, or both should be evaluated. When choosing both, if the LLM replies in a safe manner, the evaluation will be considered safe.
408
+ * @default "input"
409
+ */
71
410
  evaluate: "input" | "output" | "both";
411
+ /**
412
+ * @description The provider to use for evaluation. Only CloudFlare AI workers is supported for now.
413
+ * @default "cloudflare/thebloke/llamaguard-7b-awq"
414
+ */
72
415
  model: "cloudflare/thebloke/llamaguard-7b-awq";
73
416
  };
74
417
  };
75
- "google_cloud/dlp_pii_detection": {
418
+ "example/word_count": {
419
+ settings: Record<string, never>;
420
+ };
421
+ "openai/moderation": {
76
422
  settings: {
77
- info_types: {
78
- phone_number: boolean;
79
- email_address: boolean;
80
- credit_card_number: boolean;
81
- iban_code: boolean;
82
- ip_address: boolean;
83
- passport: boolean;
84
- vat_number: boolean;
85
- medical_record_number: boolean;
423
+ /**
424
+ * @description The model version to use, `text-moderation-latest` will be automatically upgraded over time, while `text-moderation-stable` will only be updated with advanced notice by OpenAI.
425
+ * @default "text-moderation-stable"
426
+ */
427
+ model: "text-moderation-stable" | "text-moderation-latest";
428
+ /**
429
+ * @description The categories of content to check for moderation.
430
+ * @default {"harassment": true, "harassment_threatening": true, "hate": true, "hate_threatening": true, "self_harm": true, "self_harm_instructions": true, "self_harm_intent": true, "sexual": true, "sexual_minors": true, "violence": true, "violence_graphic": true}
431
+ */
432
+ categories: {
433
+ /**
434
+ * @default true
435
+ */
436
+ harassment: boolean;
437
+ /**
438
+ * @default true
439
+ */
440
+ harassment_threatening: boolean;
441
+ /**
442
+ * @default true
443
+ */
444
+ hate: boolean;
445
+ /**
446
+ * @default true
447
+ */
448
+ hate_threatening: boolean;
449
+ /**
450
+ * @default true
451
+ */
452
+ self_harm: boolean;
453
+ /**
454
+ * @default true
455
+ */
456
+ self_harm_instructions: boolean;
457
+ /**
458
+ * @default true
459
+ */
460
+ self_harm_intent: boolean;
461
+ /**
462
+ * @default true
463
+ */
464
+ sexual: boolean;
465
+ /**
466
+ * @default true
467
+ */
468
+ sexual_minors: boolean;
469
+ /**
470
+ * @default true
471
+ */
472
+ violence: boolean;
473
+ /**
474
+ * @default true
475
+ */
476
+ violence_graphic: boolean;
477
+ };
478
+ };
479
+ };
480
+ "azure/content_safety": {
481
+ settings: {
482
+ /**
483
+ * @description The minimum severity level to consider content as unsafe, from 1 to 7.
484
+ * @default 1
485
+ */
486
+ severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
487
+ /**
488
+ * @description The categories of moderation to check for.
489
+ * @default {"Hate": true, "SelfHarm": true, "Sexual": true, "Violence": true}
490
+ */
491
+ categories: {
492
+ /**
493
+ * @default true
494
+ */
495
+ Hate: boolean;
496
+ /**
497
+ * @default true
498
+ */
499
+ SelfHarm: boolean;
500
+ /**
501
+ * @default true
502
+ */
503
+ Sexual: boolean;
504
+ /**
505
+ * @default true
506
+ */
507
+ Violence: boolean;
86
508
  };
87
- min_likelihood: "VERY_UNLIKELY" | "UNLIKELY" | "POSSIBLE" | "LIKELY" | "VERY_LIKELY";
509
+ /**
510
+ * @description The type of severity levels to return on the full 0-7 severity scale, it can be either the trimmed version with four values (0, 2, 4, 6 scores) or the whole range.
511
+ * @default "FourSeverityLevels"
512
+ */
513
+ output_type: "FourSeverityLevels" | "EightSeverityLevels";
88
514
  };
89
515
  };
516
+ "azure/jailbreak": {
517
+ settings: Record<string, never>;
518
+ };
519
+ "azure/prompt_injection": {
520
+ settings: Record<string, never>;
521
+ };
90
522
  "presidio/pii_detection": {
91
523
  settings: {
524
+ /**
525
+ * @description The types of PII to check for in the input.
526
+ * @default {"credit_card": true, "crypto": true, "email_address": true, "iban_code": true, "ip_address": true, "location": false, "person": false, "phone_number": true, "medical_license": true, "us_bank_number": false, "us_driver_license": false, "us_itin": false, "us_passport": false, "us_ssn": false, "uk_nhs": false, "sg_nric_fin": false, "au_abn": false, "au_acn": false, "au_tfn": false, "au_medicare": false, "in_pan": false, "in_aadhaar": false, "in_vehicle_registration": false, "in_voter": false, "in_passport": false}
527
+ */
92
528
  entities: {
529
+ /**
530
+ * @default true
531
+ */
93
532
  credit_card: boolean;
533
+ /**
534
+ * @default true
535
+ */
94
536
  crypto: boolean;
537
+ /**
538
+ * @default true
539
+ */
95
540
  email_address: boolean;
541
+ /**
542
+ * @default true
543
+ */
96
544
  iban_code: boolean;
545
+ /**
546
+ * @default true
547
+ */
97
548
  ip_address: boolean;
549
+ /**
550
+ * @default false
551
+ */
98
552
  location: boolean;
553
+ /**
554
+ * @default false
555
+ */
99
556
  person: boolean;
557
+ /**
558
+ * @default true
559
+ */
100
560
  phone_number: boolean;
561
+ /**
562
+ * @default true
563
+ */
101
564
  medical_license: boolean;
565
+ /**
566
+ * @default false
567
+ */
102
568
  us_bank_number: boolean;
569
+ /**
570
+ * @default false
571
+ */
103
572
  us_driver_license: boolean;
573
+ /**
574
+ * @default false
575
+ */
104
576
  us_itin: boolean;
577
+ /**
578
+ * @default false
579
+ */
105
580
  us_passport: boolean;
581
+ /**
582
+ * @default false
583
+ */
106
584
  us_ssn: boolean;
585
+ /**
586
+ * @default false
587
+ */
107
588
  uk_nhs: boolean;
589
+ /**
590
+ * @default false
591
+ */
108
592
  sg_nric_fin: boolean;
109
- sg_uen: boolean;
593
+ /**
594
+ * @default false
595
+ */
110
596
  au_abn: boolean;
597
+ /**
598
+ * @default false
599
+ */
111
600
  au_acn: boolean;
601
+ /**
602
+ * @default false
603
+ */
112
604
  au_tfn: boolean;
605
+ /**
606
+ * @default false
607
+ */
113
608
  au_medicare: boolean;
609
+ /**
610
+ * @default false
611
+ */
114
612
  in_pan: boolean;
613
+ /**
614
+ * @default false
615
+ */
115
616
  in_aadhaar: boolean;
617
+ /**
618
+ * @default false
619
+ */
116
620
  in_vehicle_registration: boolean;
621
+ /**
622
+ * @default false
623
+ */
117
624
  in_voter: boolean;
625
+ /**
626
+ * @default false
627
+ */
118
628
  in_passport: boolean;
119
- fi_personal_identity_code: boolean;
120
629
  };
630
+ /**
631
+ * @description The minimum confidence required for failing the evaluation on a PII match.
632
+ * @default 0.5
633
+ */
121
634
  min_threshold: number;
122
635
  };
123
636
  };
124
- "ragas/answer_correctness": {
125
- settings: {
126
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
127
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
128
- max_tokens: number;
129
- };
637
+ "ragas/bleu_score": {
638
+ settings: Record<string, never>;
130
639
  };
131
- "ragas/answer_relevancy": {
640
+ "ragas/context_f1": {
132
641
  settings: {
133
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
134
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
135
- max_tokens: number;
642
+ /**
643
+ * @default "levenshtein"
644
+ */
645
+ distance_measure: "levenshtein" | "hamming" | "jaro" | "jaro_winkler";
136
646
  };
137
647
  };
138
648
  "ragas/context_precision": {
139
649
  settings: {
140
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
141
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
142
- max_tokens: number;
650
+ /**
651
+ * @default "levenshtein"
652
+ */
653
+ distance_measure: "levenshtein" | "hamming" | "jaro" | "jaro_winkler";
143
654
  };
144
655
  };
145
656
  "ragas/context_recall": {
146
657
  settings: {
147
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
148
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
149
- max_tokens: number;
150
- };
151
- };
152
- "ragas/context_relevancy": {
153
- settings: {
154
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
155
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
156
- max_tokens: number;
658
+ /**
659
+ * @default "levenshtein"
660
+ */
661
+ distance_measure: "levenshtein" | "hamming" | "jaro" | "jaro_winkler";
157
662
  };
158
663
  };
159
- "ragas/context_utilization": {
664
+ "ragas/factual_correctness": {
160
665
  settings: {
161
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
162
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
666
+ /**
667
+ * @description The model to use for evaluation.
668
+ * @default "openai/gpt-4o-mini"
669
+ */
670
+ model: string;
671
+ /**
672
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
673
+ * @default 2048
674
+ */
163
675
  max_tokens: number;
676
+ /**
677
+ * @description The mode to use for the factual correctness metric.
678
+ * @default "f1"
679
+ */
680
+ mode: "f1" | "precision" | "recall";
681
+ /**
682
+ * @description The level of atomicity for claim decomposition.
683
+ * @default "low"
684
+ */
685
+ atomicity: "low" | "high";
686
+ /**
687
+ * @description The level of coverage for claim decomposition.
688
+ * @default "low"
689
+ */
690
+ coverage: "low" | "high";
164
691
  };
165
692
  };
166
693
  "ragas/faithfulness": {
167
694
  settings: {
168
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "anthropic/claude-3-5-sonnet-20240620";
169
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
695
+ /**
696
+ * @description The model to use for evaluation.
697
+ * @default "openai/gpt-4o-mini"
698
+ */
699
+ model: string;
700
+ /**
701
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
702
+ * @default 2048
703
+ */
170
704
  max_tokens: number;
705
+ /**
706
+ * @description Whether to autodetect 'I don't know' in the output to avoid failing the evaluation.
707
+ * @default true
708
+ */
709
+ autodetect_dont_know: boolean;
171
710
  };
172
711
  };
173
- "langevals/basic": {
174
- settings: {
175
- rules: {
176
- field: "input" | "output";
177
- rule: "contains" | "not_contains" | "matches_regex" | "not_matches_regex";
178
- value: string;
179
- }[];
180
- };
181
- };
182
- "langevals/competitor_blocklist": {
183
- settings: {
184
- competitors: string[];
185
- };
186
- };
187
- "langevals/competitor_llm": {
712
+ "ragas/response_context_precision": {
188
713
  settings: {
189
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
714
+ /**
715
+ * @description The model to use for evaluation.
716
+ * @default "openai/gpt-4o-mini"
717
+ */
718
+ model: string;
719
+ /**
720
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
721
+ * @default 2048
722
+ */
190
723
  max_tokens: number;
191
- name: string;
192
- description: string;
193
724
  };
194
725
  };
195
- "langevals/competitor_llm_function_call": {
726
+ "ragas/response_context_recall": {
196
727
  settings: {
197
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
728
+ /**
729
+ * @description The model to use for evaluation.
730
+ * @default "openai/gpt-4o-mini"
731
+ */
732
+ model: string;
733
+ /**
734
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
735
+ * @default 2048
736
+ */
198
737
  max_tokens: number;
199
- name: string;
200
- description: string;
201
- competitors: string[];
202
738
  };
203
739
  };
204
- "langevals/llm_boolean": {
740
+ "ragas/response_relevancy": {
205
741
  settings: {
206
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
742
+ /**
743
+ * @description The model to use for evaluation.
744
+ * @default "openai/gpt-4o-mini"
745
+ */
746
+ model: string;
747
+ /**
748
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
749
+ * @default 2048
750
+ */
207
751
  max_tokens: number;
208
- prompt: string;
752
+ /**
753
+ * @description The model to use for embeddings.
754
+ * @default "openai/text-embedding-ada-002"
755
+ */
756
+ embeddings_model: string;
209
757
  };
210
758
  };
211
- "langevals/llm_score": {
759
+ "ragas/rouge_score": {
212
760
  settings: {
213
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
214
- max_tokens: number;
215
- prompt: string;
761
+ /**
762
+ * @description ROUGE type
763
+ * @default "rouge1"
764
+ */
765
+ rouge_type: "rouge1" | "rougeL";
766
+ /**
767
+ * @description ROUGE measure type
768
+ * @default "fmeasure"
769
+ */
770
+ measure_type: "fmeasure" | "precision" | "recall";
216
771
  };
217
772
  };
218
- "langevals/off_topic": {
773
+ "ragas/rubrics_based_scoring": {
219
774
  settings: {
220
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
775
+ /**
776
+ * @description The model to use for evaluation.
777
+ * @default "openai/gpt-4o-mini"
778
+ */
779
+ model: string;
780
+ /**
781
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
782
+ * @default 2048
783
+ */
221
784
  max_tokens: number;
222
- allowed_topics: {
223
- topic: string;
785
+ /**
786
+ * @default [{"description": "The response is incorrect, irrelevant."}, {"description": "The response partially answers the question but includes significant errors, omissions, or irrelevant information."}, {"description": "The response partially answers the question but includes minor errors, omissions, or irrelevant information."}, {"description": "The response fully answers the question and includes minor errors, omissions, or irrelevant information."}, {"description": "The response fully answers the question and includes no errors, omissions, or irrelevant information."}]
787
+ */
788
+ rubrics: {
224
789
  description: string;
225
790
  }[];
226
791
  };
227
792
  };
228
- "langevals/product_sentiment_polarity": {
229
- settings: Record<string, never>;
230
- };
231
- "langevals/query_resolution": {
793
+ "ragas/sql_query_equivalence": {
232
794
  settings: {
233
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
795
+ /**
796
+ * @description The model to use for evaluation.
797
+ * @default "openai/gpt-4o-mini"
798
+ */
799
+ model: string;
800
+ /**
801
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
802
+ * @default 2048
803
+ */
234
804
  max_tokens: number;
235
805
  };
236
806
  };
237
- "langevals/similarity": {
238
- settings: {
239
- field: "input" | "output";
240
- rule: "is_not_similar_to" | "is_similar_to";
241
- value: string;
242
- threshold: number;
243
- embeddings_model: "openai/text-embedding-3-small" | "azure/text-embedding-ada-002";
244
- };
245
- };
246
- "azure/content_safety": {
807
+ "ragas/summarization_score": {
247
808
  settings: {
248
- severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
249
- categories: {
250
- Hate: boolean;
251
- SelfHarm: boolean;
252
- Sexual: boolean;
253
- Violence: boolean;
254
- };
255
- output_type: "FourSeverityLevels" | "EightSeverityLevels";
256
- };
257
- };
258
- "azure/jailbreak": {
259
- settings: Record<string, never>;
260
- };
261
- "azure/prompt_injection": {
262
- settings: Record<string, never>;
263
- };
264
- "openai/moderation": {
265
- settings: {
266
- model: "text-moderation-stable" | "text-moderation-latest";
267
- categories: {
268
- harassment: boolean;
269
- harassment_threatening: boolean;
270
- hate: boolean;
271
- hate_threatening: boolean;
272
- self_harm: boolean;
273
- self_harm_instructions: boolean;
274
- self_harm_intent: boolean;
275
- sexual: boolean;
276
- sexual_minors: boolean;
277
- violence: boolean;
278
- violence_graphic: boolean;
279
- };
280
- };
281
- };
282
- "example/word_count": {
283
- settings: Record<string, never>;
284
- };
285
- "haystack/faithfulness": {
286
- settings: {
287
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4o-mini" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-5-sonnet-20240620" | "anthropic/claude-3-opus-20240229";
809
+ /**
810
+ * @description The model to use for evaluation.
811
+ * @default "openai/gpt-4o-mini"
812
+ */
813
+ model: string;
814
+ /**
815
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
816
+ * @default 2048
817
+ */
288
818
  max_tokens: number;
289
819
  };
290
820
  };
@@ -361,6 +891,22 @@ declare class LangWatchCallbackHandler extends BaseCallbackHandler {
361
891
  private getParent;
362
892
  }
363
893
 
894
+ declare class LangWatchExporter implements SpanExporter {
895
+ private endpoint;
896
+ private apiKey;
897
+ private includeAllSpans;
898
+ private debug;
899
+ constructor(params?: {
900
+ endpoint?: string;
901
+ apiKey?: string;
902
+ includeAllSpans?: boolean;
903
+ debug?: boolean;
904
+ });
905
+ export(allSpans: ReadableSpan[], resultCallback: (result: ExportResult) => void): void;
906
+ private isAiSdkSpan;
907
+ shutdown(): Promise<void>;
908
+ }
909
+
364
910
  declare class LangWatch extends EventEmitter {
365
911
  apiKey: string | undefined;
366
912
  endpoint: string;
@@ -461,4 +1007,4 @@ declare class LangWatchRAGSpan extends LangWatchSpan implements PendingRAGSpan {
461
1007
  end(params?: Partial<PendingRAGSpan>): void;
462
1008
  }
463
1009
 
464
- export { LangWatch, LangWatchLLMSpan, LangWatchRAGSpan, LangWatchSpan, LangWatchTrace, Metadata, PendingBaseSpan, PendingLLMSpan, PendingRAGSpan };
1010
+ export { LangWatch, LangWatchExporter, LangWatchLLMSpan, LangWatchRAGSpan, LangWatchSpan, LangWatchTrace, Metadata, PendingBaseSpan, PendingLLMSpan, PendingRAGSpan };