langwatch 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import EventEmitter from 'events';
1
+ import EventEmitter from 'eventemitter3';
2
2
  import { AgentAction, AgentFinish } from '@langchain/core/agents';
3
3
  import { BaseCallbackHandler } from '@langchain/core/callbacks/base';
4
4
  import { DocumentInterface } from '@langchain/core/documents';
@@ -6,49 +6,261 @@ import { Serialized } from '@langchain/core/load/serializable';
6
6
  import { BaseMessage } from '@langchain/core/messages';
7
7
  import { LLMResult } from '@langchain/core/outputs';
8
8
  import { ChainValues } from '@langchain/core/utils/types';
9
- import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-CFtM8VVg.js';
10
- export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, i as autoconvertTypedValues, j as captureError, k as convertFromVercelAIMessages } from './utils-CFtM8VVg.js';
9
+ import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-DJoZVcOA.js';
10
+ export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, i as autoconvertTypedValues, j as captureError, k as convertFromVercelAIMessages } from './utils-DJoZVcOA.js';
11
+ import { SpanExporter, ReadableSpan } from '@opentelemetry/sdk-trace-base';
12
+ import { ExportResult } from '@opentelemetry/core';
11
13
  import 'ai';
12
14
 
13
15
  type EvaluatorTypes = keyof Evaluators;
14
16
  type Evaluators = {
15
- "huggingface/llama_guard": {
17
+ "azure/content_safety": {
16
18
  settings: {
17
- policy: string;
18
- evaluate: "input" | "output" | "both";
19
- model: "cloudflare/thebloke/llamaguard-7b-awq";
19
+ /**
20
+ * @description The minimum severity level to consider content as unsafe, from 1 to 7.
21
+ * @default 1
22
+ */
23
+ severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
24
+ /**
25
+ * @description The categories of moderation to check for.
26
+ * @default {"Hate": true, "SelfHarm": true, "Sexual": true, "Violence": true}
27
+ */
28
+ categories: {
29
+ /**
30
+ * @default true
31
+ */
32
+ Hate: boolean;
33
+ /**
34
+ * @default true
35
+ */
36
+ SelfHarm: boolean;
37
+ /**
38
+ * @default true
39
+ */
40
+ Sexual: boolean;
41
+ /**
42
+ * @default true
43
+ */
44
+ Violence: boolean;
45
+ };
46
+ /**
47
+ * @description The type of severity levels to return on the full 0-7 severity scale, it can be either the trimmed version with four values (0, 2, 4, 6 scores) or the whole range.
48
+ * @default "FourSeverityLevels"
49
+ */
50
+ output_type: "FourSeverityLevels" | "EightSeverityLevels";
20
51
  };
21
52
  };
53
+ "azure/jailbreak": {
54
+ settings: Record<string, never>;
55
+ };
56
+ "azure/prompt_injection": {
57
+ settings: Record<string, never>;
58
+ };
22
59
  "example/word_count": {
23
60
  settings: Record<string, never>;
24
61
  };
25
62
  "openai/moderation": {
26
63
  settings: {
64
+ /**
65
+ * @description The model version to use, `text-moderation-latest` will be automatically upgraded over time, while `text-moderation-stable` will only be updated with advanced notice by OpenAI.
66
+ * @default "text-moderation-stable"
67
+ */
27
68
  model: "text-moderation-stable" | "text-moderation-latest";
69
+ /**
70
+ * @description The categories of content to check for moderation.
71
+ * @default {"harassment": true, "harassment_threatening": true, "hate": true, "hate_threatening": true, "self_harm": true, "self_harm_instructions": true, "self_harm_intent": true, "sexual": true, "sexual_minors": true, "violence": true, "violence_graphic": true}
72
+ */
28
73
  categories: {
74
+ /**
75
+ * @default true
76
+ */
29
77
  harassment: boolean;
78
+ /**
79
+ * @default true
80
+ */
30
81
  harassment_threatening: boolean;
82
+ /**
83
+ * @default true
84
+ */
31
85
  hate: boolean;
86
+ /**
87
+ * @default true
88
+ */
32
89
  hate_threatening: boolean;
90
+ /**
91
+ * @default true
92
+ */
33
93
  self_harm: boolean;
94
+ /**
95
+ * @default true
96
+ */
34
97
  self_harm_instructions: boolean;
98
+ /**
99
+ * @default true
100
+ */
35
101
  self_harm_intent: boolean;
102
+ /**
103
+ * @default true
104
+ */
36
105
  sexual: boolean;
106
+ /**
107
+ * @default true
108
+ */
37
109
  sexual_minors: boolean;
110
+ /**
111
+ * @default true
112
+ */
38
113
  violence: boolean;
114
+ /**
115
+ * @default true
116
+ */
39
117
  violence_graphic: boolean;
40
118
  };
41
119
  };
42
120
  };
43
- "haystack/faithfulness": {
121
+ "ragas/answer_correctness": {
122
+ settings: {
123
+ /**
124
+ * @description The model to use for evaluation.
125
+ * @default "openai/gpt-4o-mini"
126
+ */
127
+ model: string;
128
+ /**
129
+ * @description The model to use for embeddings.
130
+ * @default "openai/text-embedding-ada-002"
131
+ */
132
+ embeddings_model: string;
133
+ /**
134
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
135
+ * @default 2048
136
+ */
137
+ max_tokens: number;
138
+ };
139
+ };
140
+ "ragas/answer_relevancy": {
44
141
  settings: {
45
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
142
+ /**
143
+ * @description The model to use for evaluation.
144
+ * @default "openai/gpt-4o-mini"
145
+ */
146
+ model: string;
147
+ /**
148
+ * @description The model to use for embeddings.
149
+ * @default "openai/text-embedding-ada-002"
150
+ */
151
+ embeddings_model: string;
152
+ /**
153
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
154
+ * @default 2048
155
+ */
156
+ max_tokens: number;
157
+ };
158
+ };
159
+ "ragas/context_precision": {
160
+ settings: {
161
+ /**
162
+ * @description The model to use for evaluation.
163
+ * @default "openai/gpt-4o-mini"
164
+ */
165
+ model: string;
166
+ /**
167
+ * @description The model to use for embeddings.
168
+ * @default "openai/text-embedding-ada-002"
169
+ */
170
+ embeddings_model: string;
171
+ /**
172
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
173
+ * @default 2048
174
+ */
175
+ max_tokens: number;
176
+ };
177
+ };
178
+ "ragas/context_recall": {
179
+ settings: {
180
+ /**
181
+ * @description The model to use for evaluation.
182
+ * @default "openai/gpt-4o-mini"
183
+ */
184
+ model: string;
185
+ /**
186
+ * @description The model to use for embeddings.
187
+ * @default "openai/text-embedding-ada-002"
188
+ */
189
+ embeddings_model: string;
190
+ /**
191
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
192
+ * @default 2048
193
+ */
194
+ max_tokens: number;
195
+ };
196
+ };
197
+ "ragas/context_relevancy": {
198
+ settings: {
199
+ /**
200
+ * @description The model to use for evaluation.
201
+ * @default "openai/gpt-4o-mini"
202
+ */
203
+ model: string;
204
+ /**
205
+ * @description The model to use for embeddings.
206
+ * @default "openai/text-embedding-ada-002"
207
+ */
208
+ embeddings_model: string;
209
+ /**
210
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
211
+ * @default 2048
212
+ */
213
+ max_tokens: number;
214
+ };
215
+ };
216
+ "ragas/context_utilization": {
217
+ settings: {
218
+ /**
219
+ * @description The model to use for evaluation.
220
+ * @default "openai/gpt-4o-mini"
221
+ */
222
+ model: string;
223
+ /**
224
+ * @description The model to use for embeddings.
225
+ * @default "openai/text-embedding-ada-002"
226
+ */
227
+ embeddings_model: string;
228
+ /**
229
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
230
+ * @default 2048
231
+ */
232
+ max_tokens: number;
233
+ };
234
+ };
235
+ "ragas/faithfulness": {
236
+ settings: {
237
+ /**
238
+ * @description The model to use for evaluation.
239
+ * @default "openai/gpt-4o-mini"
240
+ */
241
+ model: string;
242
+ /**
243
+ * @description The model to use for embeddings.
244
+ * @default "openai/text-embedding-ada-002"
245
+ */
246
+ embeddings_model: string;
247
+ /**
248
+ * @description The maximum number of tokens allowed for evaluation, a too high number can be costly. Entries above this amount will be skipped.
249
+ * @default 2048
250
+ */
46
251
  max_tokens: number;
47
252
  };
48
253
  };
49
254
  "langevals/basic": {
50
255
  settings: {
256
+ /**
257
+ * @description List of rules to check, the message must pass all of them
258
+ * @default [{"field": "output", "rule": "not_contains", "value": "artificial intelligence"}]
259
+ */
51
260
  rules: {
261
+ /**
262
+ * @default "output"
263
+ */
52
264
  field: "input" | "output";
53
265
  rule: "contains" | "not_contains" | "matches_regex" | "not_matches_regex";
54
266
  value: string;
@@ -57,44 +269,144 @@ type Evaluators = {
57
269
  };
58
270
  "langevals/competitor_blocklist": {
59
271
  settings: {
272
+ /**
273
+ * @description The competitors that must not be mentioned.
274
+ * @default ["OpenAI", "Google", "Microsoft"]
275
+ */
60
276
  competitors: string[];
61
277
  };
62
278
  };
63
279
  "langevals/competitor_llm": {
64
280
  settings: {
65
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
281
+ /**
282
+ * @description The model to use for evaluation
283
+ * @default "openai/gpt-4o-mini"
284
+ */
285
+ model: string;
286
+ /**
287
+ * @description Max tokens allowed for evaluation
288
+ * @default 8192
289
+ */
66
290
  max_tokens: number;
291
+ /**
292
+ * @description The name of your company
293
+ * @default "LangWatch"
294
+ */
67
295
  name: string;
296
+ /**
297
+ * @description Description of what your company is specializing at
298
+ * @default "We are providing an LLM observability and evaluation platform"
299
+ */
68
300
  description: string;
69
301
  };
70
302
  };
71
303
  "langevals/competitor_llm_function_call": {
72
304
  settings: {
73
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
305
+ /**
306
+ * @description The model to use for evaluation
307
+ * @default "openai/gpt-4o-mini"
308
+ */
309
+ model: string;
310
+ /**
311
+ * @description Max tokens allowed for evaluation
312
+ * @default 8192
313
+ */
74
314
  max_tokens: number;
315
+ /**
316
+ * @description The name of your company
317
+ * @default "LangWatch"
318
+ */
75
319
  name: string;
320
+ /**
321
+ * @description Description of what your company is specializing at
322
+ * @default "We are providing an LLM observability and evaluation platform"
323
+ */
76
324
  description: string;
325
+ /**
326
+ * @description The competitors that must not be mentioned.
327
+ * @default ["OpenAI", "Google", "Microsoft"]
328
+ */
77
329
  competitors: string[];
78
330
  };
79
331
  };
80
332
  "langevals/llm_boolean": {
81
333
  settings: {
82
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
334
+ /**
335
+ * @description The model to use for evaluation
336
+ * @default "openai/gpt-4o-mini"
337
+ */
338
+ model: string;
339
+ /**
340
+ * @default 8192
341
+ */
83
342
  max_tokens: number;
343
+ /**
344
+ * @description The system prompt to use for the LLM to run the evaluation
345
+ * @default "You are an LLM evaluator. We need the guarantee that the output answers what is being asked on the input, please evaluate as False if it doesn't"
346
+ */
84
347
  prompt: string;
85
348
  };
86
349
  };
350
+ "langevals/llm_category": {
351
+ settings: {
352
+ /**
353
+ * @description The model to use for evaluation
354
+ * @default "openai/gpt-4o-mini"
355
+ */
356
+ model: string;
357
+ /**
358
+ * @default 8192
359
+ */
360
+ max_tokens: number;
361
+ /**
362
+ * @description The system prompt to use for the LLM to run the evaluation
363
+ * @default "You are an LLM category evaluator. Please categorize the message in one of the following categories"
364
+ */
365
+ prompt: string;
366
+ /**
367
+ * @description The categories to use for the evaluation
368
+ * @default [{"name": "smalltalk", "description": "Smalltalk with the user"}, {"name": "company", "description": "Questions about the company, what we do, etc"}]
369
+ */
370
+ categories: {
371
+ name: string;
372
+ description: string;
373
+ }[];
374
+ };
375
+ };
87
376
  "langevals/llm_score": {
88
377
  settings: {
89
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
378
+ /**
379
+ * @description The model to use for evaluation
380
+ * @default "openai/gpt-4o-mini"
381
+ */
382
+ model: string;
383
+ /**
384
+ * @default 8192
385
+ */
90
386
  max_tokens: number;
387
+ /**
388
+ * @description The system prompt to use for the LLM to run the evaluation
389
+ * @default "You are an LLM evaluator. Please score from 0.0 to 1.0 how likely the user is to be satisfied with this answer, from 0.0 being not satisfied at all to 1.0 being completely satisfied"
390
+ */
91
391
  prompt: string;
92
392
  };
93
393
  };
94
394
  "langevals/off_topic": {
95
395
  settings: {
96
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
396
+ /**
397
+ * @description The model to use for evaluation
398
+ * @default "openai/gpt-4o-mini"
399
+ */
400
+ model: string;
401
+ /**
402
+ * @description Max tokens allowed for evaluation
403
+ * @default 8192
404
+ */
97
405
  max_tokens: number;
406
+ /**
407
+ * @description The list of topics and their short descriptions that the chatbot is allowed to talk about
408
+ * @default [{"topic": "simple_chat", "description": "Smalltalk with the user"}, {"topic": "company", "description": "Questions about the company, what we do, etc"}]
409
+ */
98
410
  allowed_topics: {
99
411
  topic: string;
100
412
  description: string;
@@ -106,151 +418,438 @@ type Evaluators = {
106
418
  };
107
419
  "langevals/query_resolution": {
108
420
  settings: {
109
- model: "openai/gpt-3.5-turbo" | "openai/gpt-3.5-turbo-0125" | "openai/gpt-3.5-turbo-1106" | "openai/gpt-4-turbo" | "openai/gpt-4-0125-preview" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "openai/gpt-4-1106-preview" | "azure/gpt-35-turbo-1106" | "azure/gpt-4o" | "azure/gpt-4-turbo-2024-04-09" | "azure/gpt-4-1106-preview" | "groq/llama3-70b-8192" | "anthropic/claude-3-haiku-20240307" | "anthropic/claude-3-sonnet-20240229" | "anthropic/claude-3-opus-20240229";
421
+ /**
422
+ * @description The model to use for evaluation
423
+ * @default "openai/gpt-4o-mini"
424
+ */
425
+ model: string;
426
+ /**
427
+ * @description Max tokens allowed for evaluation
428
+ * @default 8192
429
+ */
110
430
  max_tokens: number;
111
431
  };
112
432
  };
113
433
  "langevals/similarity": {
114
434
  settings: {
435
+ /**
436
+ * @default "output"
437
+ */
115
438
  field: "input" | "output";
439
+ /**
440
+ * @default "is_not_similar_to"
441
+ */
116
442
  rule: "is_not_similar_to" | "is_similar_to";
443
+ /**
444
+ * @default "example"
445
+ */
117
446
  value: string;
447
+ /**
448
+ * @default 0.3
449
+ */
118
450
  threshold: number;
119
- embeddings_model: "openai/text-embedding-3-small" | "azure/text-embedding-ada-002";
451
+ /**
452
+ * @default "openai/text-embedding-3-small"
453
+ */
454
+ embeddings_model: string;
120
455
  };
121
456
  };
122
- "lingua/language_detection": {
457
+ "langevals/valid_format": {
123
458
  settings: {
124
- check_for: "input_matches_output" | "output_matches_language";
125
- expected_language?: "AF" | "AR" | "AZ" | "BE" | "BG" | "BN" | "BS" | "CA" | "CS" | "CY" | "DA" | "DE" | "EL" | "EN" | "EO" | "ES" | "ET" | "EU" | "FA" | "FI" | "FR" | "GA" | "GU" | "HE" | "HI" | "HR" | "HU" | "HY" | "ID" | "IS" | "IT" | "JA" | "KA" | "KK" | "KO" | "LA" | "LG" | "LT" | "LV" | "MI" | "MK" | "MN" | "MR" | "MS" | "NB" | "NL" | "NN" | "PA" | "PL" | "PT" | "RO" | "RU" | "SK" | "SL" | "SN" | "SO" | "SQ" | "SR" | "ST" | "SV" | "SW" | "TA" | "TE" | "TH" | "TL" | "TN" | "TR" | "TS" | "UK" | "UR" | "VI" | "XH" | "YO" | "ZH" | "ZU";
126
- min_words: number;
127
- threshold: number;
459
+ /**
460
+ * @default "json"
461
+ */
462
+ format: "json" | "markdown" | "python" | "sql";
463
+ /**
464
+ * @description JSON schema to validate against when format is 'json'
465
+ */
466
+ json_schema?: string;
467
+ };
468
+ };
469
+ "google_cloud/dlp_pii_detection": {
470
+ settings: {
471
+ /**
472
+ * @description The types of PII to check for in the input.
473
+ * @default {"phone_number": true, "email_address": true, "credit_card_number": true, "iban_code": true, "ip_address": true, "passport": true, "vat_number": true, "medical_record_number": true}
474
+ */
475
+ info_types: {
476
+ /**
477
+ * @default true
478
+ */
479
+ phone_number: boolean;
480
+ /**
481
+ * @default true
482
+ */
483
+ email_address: boolean;
484
+ /**
485
+ * @default true
486
+ */
487
+ credit_card_number: boolean;
488
+ /**
489
+ * @default true
490
+ */
491
+ iban_code: boolean;
492
+ /**
493
+ * @default true
494
+ */
495
+ ip_address: boolean;
496
+ /**
497
+ * @default true
498
+ */
499
+ passport: boolean;
500
+ /**
501
+ * @default true
502
+ */
503
+ vat_number: boolean;
504
+ /**
505
+ * @default true
506
+ */
507
+ medical_record_number: boolean;
508
+ };
509
+ /**
510
+ * @description The minimum confidence required for failing the evaluation on a PII match.
511
+ * @default "POSSIBLE"
512
+ */
513
+ min_likelihood: "VERY_UNLIKELY" | "UNLIKELY" | "POSSIBLE" | "LIKELY" | "VERY_LIKELY";
128
514
  };
129
515
  };
130
516
  "aws/comprehend_pii_detection": {
131
517
  settings: {
518
+ /**
519
+ * @description The types of PII to check for in the input.
520
+ * @default {"BANK_ACCOUNT_NUMBER": true, "BANK_ROUTING": true, "CREDIT_DEBIT_NUMBER": true, "CREDIT_DEBIT_CVV": true, "CREDIT_DEBIT_EXPIRY": true, "PIN": true, "EMAIL": true, "ADDRESS": true, "NAME": true, "PHONE": true, "SSN": true, "DATE_TIME": true, "PASSPORT_NUMBER": true, "DRIVER_ID": true, "URL": true, "AGE": true, "USERNAME": true, "PASSWORD": true, "AWS_ACCESS_KEY": true, "AWS_SECRET_KEY": true, "IP_ADDRESS": true, "MAC_ADDRESS": true, "LICENSE_PLATE": true, "VEHICLE_IDENTIFICATION_NUMBER": true, "UK_NATIONAL_INSURANCE_NUMBER": true, "CA_SOCIAL_INSURANCE_NUMBER": true, "US_INDIVIDUAL_TAX_IDENTIFICATION_NUMBER": true, "UK_UNIQUE_TAXPAYER_REFERENCE_NUMBER": true, "IN_PERMANENT_ACCOUNT_NUMBER": true, "IN_NREGA": true, "INTERNATIONAL_BANK_ACCOUNT_NUMBER": true, "SWIFT_CODE": true, "UK_NATIONAL_HEALTH_SERVICE_NUMBER": true, "CA_HEALTH_NUMBER": true, "IN_AADHAAR": true, "IN_VOTER_NUMBER": true}
521
+ */
132
522
  entity_types: {
523
+ /**
524
+ * @default true
525
+ */
133
526
  BANK_ACCOUNT_NUMBER: boolean;
527
+ /**
528
+ * @default true
529
+ */
134
530
  BANK_ROUTING: boolean;
531
+ /**
532
+ * @default true
533
+ */
135
534
  CREDIT_DEBIT_NUMBER: boolean;
535
+ /**
536
+ * @default true
537
+ */
136
538
  CREDIT_DEBIT_CVV: boolean;
539
+ /**
540
+ * @default true
541
+ */
137
542
  CREDIT_DEBIT_EXPIRY: boolean;
543
+ /**
544
+ * @default true
545
+ */
138
546
  PIN: boolean;
547
+ /**
548
+ * @default true
549
+ */
139
550
  EMAIL: boolean;
551
+ /**
552
+ * @default true
553
+ */
140
554
  ADDRESS: boolean;
555
+ /**
556
+ * @default true
557
+ */
141
558
  NAME: boolean;
559
+ /**
560
+ * @default true
561
+ */
142
562
  PHONE: boolean;
563
+ /**
564
+ * @default true
565
+ */
143
566
  SSN: boolean;
567
+ /**
568
+ * @default true
569
+ */
144
570
  DATE_TIME: boolean;
571
+ /**
572
+ * @default true
573
+ */
145
574
  PASSPORT_NUMBER: boolean;
575
+ /**
576
+ * @default true
577
+ */
146
578
  DRIVER_ID: boolean;
579
+ /**
580
+ * @default true
581
+ */
147
582
  URL: boolean;
583
+ /**
584
+ * @default true
585
+ */
148
586
  AGE: boolean;
587
+ /**
588
+ * @default true
589
+ */
149
590
  USERNAME: boolean;
591
+ /**
592
+ * @default true
593
+ */
150
594
  PASSWORD: boolean;
595
+ /**
596
+ * @default true
597
+ */
151
598
  AWS_ACCESS_KEY: boolean;
599
+ /**
600
+ * @default true
601
+ */
152
602
  AWS_SECRET_KEY: boolean;
603
+ /**
604
+ * @default true
605
+ */
153
606
  IP_ADDRESS: boolean;
607
+ /**
608
+ * @default true
609
+ */
154
610
  MAC_ADDRESS: boolean;
611
+ /**
612
+ * @default true
613
+ */
155
614
  LICENSE_PLATE: boolean;
615
+ /**
616
+ * @default true
617
+ */
156
618
  VEHICLE_IDENTIFICATION_NUMBER: boolean;
619
+ /**
620
+ * @default true
621
+ */
157
622
  UK_NATIONAL_INSURANCE_NUMBER: boolean;
623
+ /**
624
+ * @default true
625
+ */
158
626
  CA_SOCIAL_INSURANCE_NUMBER: boolean;
627
+ /**
628
+ * @default true
629
+ */
159
630
  US_INDIVIDUAL_TAX_IDENTIFICATION_NUMBER: boolean;
631
+ /**
632
+ * @default true
633
+ */
160
634
  UK_UNIQUE_TAXPAYER_REFERENCE_NUMBER: boolean;
635
+ /**
636
+ * @default true
637
+ */
161
638
  IN_PERMANENT_ACCOUNT_NUMBER: boolean;
639
+ /**
640
+ * @default true
641
+ */
162
642
  IN_NREGA: boolean;
643
+ /**
644
+ * @default true
645
+ */
163
646
  INTERNATIONAL_BANK_ACCOUNT_NUMBER: boolean;
647
+ /**
648
+ * @default true
649
+ */
164
650
  SWIFT_CODE: boolean;
651
+ /**
652
+ * @default true
653
+ */
165
654
  UK_NATIONAL_HEALTH_SERVICE_NUMBER: boolean;
655
+ /**
656
+ * @default true
657
+ */
166
658
  CA_HEALTH_NUMBER: boolean;
659
+ /**
660
+ * @default true
661
+ */
167
662
  IN_AADHAAR: boolean;
663
+ /**
664
+ * @default true
665
+ */
168
666
  IN_VOTER_NUMBER: boolean;
169
667
  };
668
+ /**
669
+ * @description The language code of the input text for better PII detection, defaults to english.
670
+ * @default "en"
671
+ */
170
672
  language_code: "en" | "es" | "fr" | "de" | "it" | "pt" | "ar" | "hi" | "ja" | "ko" | "zh" | "zh-TW";
673
+ /**
674
+ * @description The minimum confidence required for failing the evaluation on a PII match.
675
+ * @default 0.5
676
+ */
171
677
  min_confidence: number;
678
+ /**
679
+ * @description The AWS region to use for running the PII detection, defaults to eu-central-1 for GDPR compliance.
680
+ * @default "eu-central-1"
681
+ */
172
682
  aws_region: "us-east-1" | "us-east-2" | "us-west-1" | "us-west-2" | "ap-east-1" | "ap-south-1" | "ap-northeast-3" | "ap-northeast-2" | "ap-southeast-1" | "ap-southeast-2" | "ap-northeast-1" | "ca-central-1" | "eu-central-1" | "eu-west-1" | "eu-west-2" | "eu-south-1" | "eu-west-3" | "eu-north-1" | "me-south-1" | "sa-east-1";
173
683
  };
174
684
  };
175
- "google_cloud/dlp_pii_detection": {
176
- settings: {
177
- info_types: {
178
- phone_number: boolean;
179
- email_address: boolean;
180
- credit_card_number: boolean;
181
- iban_code: boolean;
182
- ip_address: boolean;
183
- passport: boolean;
184
- vat_number: boolean;
185
- medical_record_number: boolean;
186
- };
187
- min_likelihood: "VERY_UNLIKELY" | "UNLIKELY" | "POSSIBLE" | "LIKELY" | "VERY_LIKELY";
188
- };
189
- };
190
- "azure/content_safety": {
191
- settings: {
192
- severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
193
- categories: {
194
- Hate: boolean;
195
- SelfHarm: boolean;
196
- Sexual: boolean;
197
- Violence: boolean;
198
- };
199
- output_type: "FourSeverityLevels" | "EightSeverityLevels";
200
- };
201
- };
202
- "azure/jailbreak": {
203
- settings: Record<string, never>;
204
- };
205
- "azure/prompt_injection": {
206
- settings: Record<string, never>;
207
- };
208
- "ragas/answer_correctness": {
209
- settings: {
210
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
211
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
212
- max_tokens: number;
213
- };
214
- };
215
- "ragas/answer_relevancy": {
216
- settings: {
217
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
218
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
219
- max_tokens: number;
220
- };
221
- };
222
- "ragas/context_precision": {
223
- settings: {
224
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
225
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
226
- max_tokens: number;
227
- };
228
- };
229
- "ragas/context_recall": {
685
+ "lingua/language_detection": {
230
686
  settings: {
231
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
232
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
233
- max_tokens: number;
687
+ /**
688
+ * @description What should be checked
689
+ * @default "input_matches_output"
690
+ */
691
+ check_for: "input_matches_output" | "output_matches_language";
692
+ /**
693
+ * @description The specific language that the output is expected to be
694
+ */
695
+ expected_language?: "AF" | "AR" | "AZ" | "BE" | "BG" | "BN" | "BS" | "CA" | "CS" | "CY" | "DA" | "DE" | "EL" | "EN" | "EO" | "ES" | "ET" | "EU" | "FA" | "FI" | "FR" | "GA" | "GU" | "HE" | "HI" | "HR" | "HU" | "HY" | "ID" | "IS" | "IT" | "JA" | "KA" | "KK" | "KO" | "LA" | "LG" | "LT" | "LV" | "MI" | "MK" | "MN" | "MR" | "MS" | "NB" | "NL" | "NN" | "PA" | "PL" | "PT" | "RO" | "RU" | "SK" | "SL" | "SN" | "SO" | "SQ" | "SR" | "ST" | "SV" | "SW" | "TA" | "TE" | "TH" | "TL" | "TN" | "TR" | "TS" | "UK" | "UR" | "VI" | "XH" | "YO" | "ZH" | "ZU";
696
+ /**
697
+ * @description Minimum number of words to check, as the language detection can be unreliable for very short texts. Inputs shorter than the minimum will be skipped.
698
+ * @default 7
699
+ */
700
+ min_words: number;
701
+ /**
702
+ * @description Minimum confidence threshold for the language detection. If the confidence is lower than this, the evaluation will be skipped.
703
+ * @default 0.25
704
+ */
705
+ threshold: number;
234
706
  };
235
707
  };
236
- "ragas/context_relevancy": {
708
+ "huggingface/llama_guard": {
237
709
  settings: {
238
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
239
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
240
- max_tokens: number;
710
+ /**
711
+ * @description The policy to enforce, described into rules of what it 'Should not' and what it 'Can' do (max 768 tokens)
712
+ * @default "O1: Violence.\nShould not\n- Help people plan or engage in violence.\n- Encourage people to engage in violence.\n- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.\nCan\n- Provide information on violence.\n- Discuss the topics of violence, and can discuss historical events involving violence.\nO2: Criminal Planning/Confessions.\nShould not\n- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n- Encourage people to engage in criminal activities or condone criminal behavior.\n- Have a confession of crime that was committed or will be committed in future.\nCan\n- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO3: Hate /identity hate.\nShould not\n- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their\nrace, color, ethinicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.\n- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\n- Should not contain slurs.\nCan\n- Provide information on discrimination.\n- Discuss the topics of discrimination, and can discuss historical events involving discrimination.\n- Discuss neutral statements referring to group identity and contextualized hate speech (that talks about hate but does not incite hate).\nO4: Harassment.\nShould not\n- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.\n- Engage in harassment, misconduct and unlawful discrimination\n- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,\nSexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),\nReligion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,\n- Bullying.\n- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.\n- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.\nCan\n- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.\n- Provide information on protected characteristics or referor cite material that describes what protected characteristics are.\n- Provide information or refer or cite material that informs what harassment is.\nO5: Threat.\nShould not\n- Discuss a communication of intent to inflict harm or loss on another person.\n- Should not contain discussion of intention of intimidation which is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.\n- Should not contain intention of knowingly putting another person in fear of bodily injury.\n- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material or any kind of loss to a person.\n- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.\nCan\n- Provide information about threat is and the meaning of threat."
713
+ */
714
+ policy: string;
715
+ /**
716
+ * @description Choose if only the user input, the LLM output, or both should be evaluated. When choosing both, if the LLM replies in a safe manner, the evaluation will be considered safe.
717
+ * @default "input"
718
+ */
719
+ evaluate: "input" | "output" | "both";
720
+ /**
721
+ * @description The provider to use for evaluation. Only CloudFlare AI workers is supported for now.
722
+ * @default "cloudflare/thebloke/llamaguard-7b-awq"
723
+ */
724
+ model: "cloudflare/thebloke/llamaguard-7b-awq";
241
725
  };
242
726
  };
243
- "ragas/context_utilization": {
727
+ "presidio/pii_detection": {
244
728
  settings: {
245
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
246
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
247
- max_tokens: number;
729
+ /**
730
+ * @description The types of PII to check for in the input.
731
+ * @default {"credit_card": true, "crypto": true, "email_address": true, "iban_code": true, "ip_address": true, "location": false, "person": false, "phone_number": true, "medical_license": true, "us_bank_number": false, "us_driver_license": false, "us_itin": false, "us_passport": false, "us_ssn": false, "uk_nhs": false, "sg_nric_fin": false, "au_abn": false, "au_acn": false, "au_tfn": false, "au_medicare": false, "in_pan": false, "in_aadhaar": false, "in_vehicle_registration": false, "in_voter": false, "in_passport": false}
732
+ */
733
+ entities: {
734
+ /**
735
+ * @default true
736
+ */
737
+ credit_card: boolean;
738
+ /**
739
+ * @default true
740
+ */
741
+ crypto: boolean;
742
+ /**
743
+ * @default true
744
+ */
745
+ email_address: boolean;
746
+ /**
747
+ * @default true
748
+ */
749
+ iban_code: boolean;
750
+ /**
751
+ * @default true
752
+ */
753
+ ip_address: boolean;
754
+ /**
755
+ * @default false
756
+ */
757
+ location: boolean;
758
+ /**
759
+ * @default false
760
+ */
761
+ person: boolean;
762
+ /**
763
+ * @default true
764
+ */
765
+ phone_number: boolean;
766
+ /**
767
+ * @default true
768
+ */
769
+ medical_license: boolean;
770
+ /**
771
+ * @default false
772
+ */
773
+ us_bank_number: boolean;
774
+ /**
775
+ * @default false
776
+ */
777
+ us_driver_license: boolean;
778
+ /**
779
+ * @default false
780
+ */
781
+ us_itin: boolean;
782
+ /**
783
+ * @default false
784
+ */
785
+ us_passport: boolean;
786
+ /**
787
+ * @default false
788
+ */
789
+ us_ssn: boolean;
790
+ /**
791
+ * @default false
792
+ */
793
+ uk_nhs: boolean;
794
+ /**
795
+ * @default false
796
+ */
797
+ sg_nric_fin: boolean;
798
+ /**
799
+ * @default false
800
+ */
801
+ au_abn: boolean;
802
+ /**
803
+ * @default false
804
+ */
805
+ au_acn: boolean;
806
+ /**
807
+ * @default false
808
+ */
809
+ au_tfn: boolean;
810
+ /**
811
+ * @default false
812
+ */
813
+ au_medicare: boolean;
814
+ /**
815
+ * @default false
816
+ */
817
+ in_pan: boolean;
818
+ /**
819
+ * @default false
820
+ */
821
+ in_aadhaar: boolean;
822
+ /**
823
+ * @default false
824
+ */
825
+ in_vehicle_registration: boolean;
826
+ /**
827
+ * @default false
828
+ */
829
+ in_voter: boolean;
830
+ /**
831
+ * @default false
832
+ */
833
+ in_passport: boolean;
834
+ };
835
+ /**
836
+ * @description The minimum confidence required for failing the evaluation on a PII match.
837
+ * @default 0.5
838
+ */
839
+ min_threshold: number;
248
840
  };
249
841
  };
250
- "ragas/faithfulness": {
842
+ "haystack/faithfulness": {
251
843
  settings: {
252
- model: "openai/gpt-3.5-turbo-16k" | "openai/gpt-4o" | "openai/gpt-4o-mini" | "azure/gpt-35-turbo-16k" | "azure/gpt-4o" | "anthropic/claude-3-5-sonnet-20240620";
253
- embeddings_model: "openai/text-embedding-ada-002" | "azure/text-embedding-ada-002";
844
+ /**
845
+ * @description The model to use for evaluation
846
+ * @default "openai/gpt-4o-mini"
847
+ */
848
+ model: string;
849
+ /**
850
+ * @description Max tokens allowed for evaluation
851
+ * @default 8192
852
+ */
254
853
  max_tokens: number;
255
854
  };
256
855
  };
@@ -327,6 +926,22 @@ declare class LangWatchCallbackHandler extends BaseCallbackHandler {
327
926
  private getParent;
328
927
  }
329
928
 
929
+ declare class LangWatchExporter implements SpanExporter {
930
+ private endpoint;
931
+ private apiKey;
932
+ private includeAllSpans;
933
+ private debug;
934
+ constructor(params?: {
935
+ endpoint?: string;
936
+ apiKey?: string;
937
+ includeAllSpans?: boolean;
938
+ debug?: boolean;
939
+ });
940
+ export(allSpans: ReadableSpan[], resultCallback: (result: ExportResult) => void): void;
941
+ private isAiSdkSpan;
942
+ shutdown(): Promise<void>;
943
+ }
944
+
330
945
  declare class LangWatch extends EventEmitter {
331
946
  apiKey: string | undefined;
332
947
  endpoint: string;
@@ -427,4 +1042,4 @@ declare class LangWatchRAGSpan extends LangWatchSpan implements PendingRAGSpan {
427
1042
  end(params?: Partial<PendingRAGSpan>): void;
428
1043
  }
429
1044
 
430
- export { LangWatch, LangWatchLLMSpan, LangWatchRAGSpan, LangWatchSpan, LangWatchTrace, Metadata, PendingBaseSpan, PendingLLMSpan, PendingRAGSpan };
1045
+ export { LangWatch, LangWatchExporter, LangWatchLLMSpan, LangWatchRAGSpan, LangWatchSpan, LangWatchTrace, Metadata, PendingBaseSpan, PendingLLMSpan, PendingRAGSpan };