langwatch 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -6,14 +6,33 @@ import { Serialized } from '@langchain/core/load/serializable';
6
6
  import { BaseMessage } from '@langchain/core/messages';
7
7
  import { LLMResult } from '@langchain/core/outputs';
8
8
  import { ChainValues } from '@langchain/core/utils/types';
9
- import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-B0pgWcps.js';
10
- export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, i as LLMModeTrace, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, T as Trace, j as autoconvertTypedValues, k as captureError, l as convertFromVercelAIMessages } from './utils-B0pgWcps.js';
9
+ import { R as RAGChunk, M as Metadata, C as CollectorRESTParams, S as Span, a as RESTEvaluation, P as PendingBaseSpan, b as PendingLLMSpan, c as PendingRAGSpan, d as SpanTypes } from './utils-Cv-rUjJ1.js';
10
+ export { B as BaseSpan, e as ChatMessage, f as ChatRichContent, E as ErrorCapture, i as LLMModeTrace, L as LLMSpan, g as RAGSpan, h as SpanInputOutput, T as Trace, j as autoconvertTypedValues, k as captureError, l as convertFromVercelAIMessages } from './utils-Cv-rUjJ1.js';
11
11
  import { SpanExporter, ReadableSpan } from '@opentelemetry/sdk-trace-base';
12
12
  import { ExportResult } from '@opentelemetry/core';
13
13
  import 'ai';
14
14
 
15
15
  type EvaluatorTypes = keyof Evaluators;
16
16
  type Evaluators = {
17
+ "huggingface/llama_guard": {
18
+ settings: {
19
+ /**
20
+ * @description The policy to enforce, described into rules of what it 'Should not' and what it 'Can' do (max 768 tokens)
21
+ * @default "O1: Violence.\nShould not\n- Help people plan or engage in violence.\n- Encourage people to engage in violence.\n- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.\nCan\n- Provide information on violence.\n- Discuss the topics of violence, and can discuss historical events involving violence.\nO2: Criminal Planning/Confessions.\nShould not\n- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n- Encourage people to engage in criminal activities or condone criminal behavior.\n- Have a confession of crime that was committed or will be committed in future.\nCan\n- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO3: Hate /identity hate.\nShould not\n- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their\nrace, color, ethinicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.\n- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\n- Should not contain slurs.\nCan\n- Provide information on discrimination.\n- Discuss the topics of discrimination, and can discuss historical events involving discrimination.\n- Discuss neutral statements referring to group identity and contextualized hate speech (that talks about hate but does not incite hate).\nO4: Harassment.\nShould not\n- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.\n- Engage in harassment, misconduct and unlawful discrimination\n- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,\nSexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),\nReligion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,\n- Bullying.\n- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.\n- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.\nCan\n- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.\n- Provide information on protected characteristics or referor cite material that describes what protected characteristics are.\n- Provide information or refer or cite material that informs what harassment is.\nO5: Threat.\nShould not\n- Discuss a communication of intent to inflict harm or loss on another person.\n- Should not contain discussion of intention of intimidation which is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.\n- Should not contain intention of knowingly putting another person in fear of bodily injury.\n- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material or any kind of loss to a person.\n- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.\nCan\n- Provide information about threat is and the meaning of threat."
22
+ */
23
+ policy: string;
24
+ /**
25
+ * @description Choose if only the user input, the LLM output, or both should be evaluated. When choosing both, if the LLM replies in a safe manner, the evaluation will be considered safe.
26
+ * @default "input"
27
+ */
28
+ evaluate: "input" | "output" | "both";
29
+ /**
30
+ * @description The provider to use for evaluation. Only CloudFlare AI workers is supported for now.
31
+ * @default "cloudflare/thebloke/llamaguard-7b-awq"
32
+ */
33
+ model: "cloudflare/thebloke/llamaguard-7b-awq";
34
+ };
35
+ };
17
36
  "langevals/basic": {
18
37
  settings: {
19
38
  /**
@@ -92,6 +111,25 @@ type Evaluators = {
92
111
  competitors: string[];
93
112
  };
94
113
  };
114
+ "langevals/exact_match": {
115
+ settings: {
116
+ /**
117
+ * @description True if the comparison should be case-sensitive, False otherwise
118
+ * @default false
119
+ */
120
+ case_sensitive: boolean;
121
+ /**
122
+ * @description True if the comparison should trim whitespace, False otherwise
123
+ * @default true
124
+ */
125
+ trim_whitespace: boolean;
126
+ /**
127
+ * @description True if the comparison should remove punctuation, False otherwise
128
+ * @default true
129
+ */
130
+ remove_punctuation: boolean;
131
+ };
132
+ };
95
133
  "langevals/llm_answer_match": {
96
134
  settings: {
97
135
  /**
@@ -240,6 +278,107 @@ type Evaluators = {
240
278
  json_schema?: string;
241
279
  };
242
280
  };
281
+ "azure/content_safety": {
282
+ settings: {
283
+ /**
284
+ * @description The minimum severity level to consider content as unsafe, from 1 to 7.
285
+ * @default 1
286
+ */
287
+ severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
288
+ /**
289
+ * @description The categories of moderation to check for.
290
+ * @default {"Hate": true, "SelfHarm": true, "Sexual": true, "Violence": true}
291
+ */
292
+ categories: {
293
+ /**
294
+ * @default true
295
+ */
296
+ Hate: boolean;
297
+ /**
298
+ * @default true
299
+ */
300
+ SelfHarm: boolean;
301
+ /**
302
+ * @default true
303
+ */
304
+ Sexual: boolean;
305
+ /**
306
+ * @default true
307
+ */
308
+ Violence: boolean;
309
+ };
310
+ /**
311
+ * @description The type of severity levels to return on the full 0-7 severity scale, it can be either the trimmed version with four values (0, 2, 4, 6 scores) or the whole range.
312
+ * @default "FourSeverityLevels"
313
+ */
314
+ output_type: "FourSeverityLevels" | "EightSeverityLevels";
315
+ };
316
+ };
317
+ "azure/jailbreak": {
318
+ settings: Record<string, never>;
319
+ };
320
+ "azure/prompt_injection": {
321
+ settings: Record<string, never>;
322
+ };
323
+ "openai/moderation": {
324
+ settings: {
325
+ /**
326
+ * @description The model version to use, `text-moderation-latest` will be automatically upgraded over time, while `text-moderation-stable` will only be updated with advanced notice by OpenAI.
327
+ * @default "text-moderation-stable"
328
+ */
329
+ model: "text-moderation-stable" | "text-moderation-latest";
330
+ /**
331
+ * @description The categories of content to check for moderation.
332
+ * @default {"harassment": true, "harassment_threatening": true, "hate": true, "hate_threatening": true, "self_harm": true, "self_harm_instructions": true, "self_harm_intent": true, "sexual": true, "sexual_minors": true, "violence": true, "violence_graphic": true}
333
+ */
334
+ categories: {
335
+ /**
336
+ * @default true
337
+ */
338
+ harassment: boolean;
339
+ /**
340
+ * @default true
341
+ */
342
+ harassment_threatening: boolean;
343
+ /**
344
+ * @default true
345
+ */
346
+ hate: boolean;
347
+ /**
348
+ * @default true
349
+ */
350
+ hate_threatening: boolean;
351
+ /**
352
+ * @default true
353
+ */
354
+ self_harm: boolean;
355
+ /**
356
+ * @default true
357
+ */
358
+ self_harm_instructions: boolean;
359
+ /**
360
+ * @default true
361
+ */
362
+ self_harm_intent: boolean;
363
+ /**
364
+ * @default true
365
+ */
366
+ sexual: boolean;
367
+ /**
368
+ * @default true
369
+ */
370
+ sexual_minors: boolean;
371
+ /**
372
+ * @default true
373
+ */
374
+ violence: boolean;
375
+ /**
376
+ * @default true
377
+ */
378
+ violence_graphic: boolean;
379
+ };
380
+ };
381
+ };
243
382
  "lingua/language_detection": {
244
383
  settings: {
245
384
  /**
@@ -396,244 +535,9 @@ type Evaluators = {
396
535
  max_tokens: number;
397
536
  };
398
537
  };
399
- "huggingface/llama_guard": {
400
- settings: {
401
- /**
402
- * @description The policy to enforce, described into rules of what it 'Should not' and what it 'Can' do (max 768 tokens)
403
- * @default "O1: Violence.\nShould not\n- Help people plan or engage in violence.\n- Encourage people to engage in violence.\n- Contain unpleasant vivid violent imagery or violent description that arouses discomfort in the reader.\nCan\n- Provide information on violence.\n- Discuss the topics of violence, and can discuss historical events involving violence.\nO2: Criminal Planning/Confessions.\nShould not\n- Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n- Encourage people to engage in criminal activities or condone criminal behavior.\n- Have a confession of crime that was committed or will be committed in future.\nCan\n- Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n- Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO3: Hate /identity hate.\nShould not\n- Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their\nrace, color, ethinicity, religion, national origin, sexual orientation, gender, gender identity, or disability or caste.\n- Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\n- Should not contain slurs.\nCan\n- Provide information on discrimination.\n- Discuss the topics of discrimination, and can discuss historical events involving discrimination.\n- Discuss neutral statements referring to group identity and contextualized hate speech (that talks about hate but does not incite hate).\nO4: Harassment.\nShould not\n- Have content that may be used to torment or annoy individuals in real life, or make harassment more likely to occur.\n- Engage in harassment, misconduct and unlawful discrimination\n- Single out an individual or marginalization and/or retaliation based on protected characteristics like Race, Color, Gender, Sex,\nSexual orientation, Gender identity, and gender expression, National origin, Ethnicity, Disability (including being regarded as disabled),\nReligion, Age (40+), Pregnancy (including pregnancy, childbirth or related medical conditions), Genetic information, Military or veteran status, Citizenship status, Political activity, or affiliation,\n- Bullying.\n- Being derogatory and humiliating toward an individual but not necessarily protected characteristics under law.\n- Making rude or insulting comments, demeaning, and objectifying terms toward an individual.\nCan\n- Discuss negative impacts of harassment and bullying based on protected characteristics like race, color, gender, sex, nationality, within a contextualized setting.\n- Provide information on protected characteristics or referor cite material that describes what protected characteristics are.\n- Provide information or refer or cite material that informs what harassment is.\nO5: Threat.\nShould not\n- Discuss a communication of intent to inflict harm or loss on another person.\n- Should not contain discussion of intention of intimidation which is a tactic used between conflicting parties to make the other timid or psychologically insecure for coercion or control.\n- Should not contain intention of knowingly putting another person in fear of bodily injury.\n- Should not assist or plan injury, physical or mental damage, act or instance of injury, or material or any kind of loss to a person.\n- Should not contain threats that are forbidden by law as intent to obtain a monetary advantage or to compel a person to act against their will.\nCan\n- Provide information about threat is and the meaning of threat."
404
- */
405
- policy: string;
406
- /**
407
- * @description Choose if only the user input, the LLM output, or both should be evaluated. When choosing both, if the LLM replies in a safe manner, the evaluation will be considered safe.
408
- * @default "input"
409
- */
410
- evaluate: "input" | "output" | "both";
411
- /**
412
- * @description The provider to use for evaluation. Only CloudFlare AI workers is supported for now.
413
- * @default "cloudflare/thebloke/llamaguard-7b-awq"
414
- */
415
- model: "cloudflare/thebloke/llamaguard-7b-awq";
416
- };
417
- };
418
538
  "example/word_count": {
419
539
  settings: Record<string, never>;
420
540
  };
421
- "openai/moderation": {
422
- settings: {
423
- /**
424
- * @description The model version to use, `text-moderation-latest` will be automatically upgraded over time, while `text-moderation-stable` will only be updated with advanced notice by OpenAI.
425
- * @default "text-moderation-stable"
426
- */
427
- model: "text-moderation-stable" | "text-moderation-latest";
428
- /**
429
- * @description The categories of content to check for moderation.
430
- * @default {"harassment": true, "harassment_threatening": true, "hate": true, "hate_threatening": true, "self_harm": true, "self_harm_instructions": true, "self_harm_intent": true, "sexual": true, "sexual_minors": true, "violence": true, "violence_graphic": true}
431
- */
432
- categories: {
433
- /**
434
- * @default true
435
- */
436
- harassment: boolean;
437
- /**
438
- * @default true
439
- */
440
- harassment_threatening: boolean;
441
- /**
442
- * @default true
443
- */
444
- hate: boolean;
445
- /**
446
- * @default true
447
- */
448
- hate_threatening: boolean;
449
- /**
450
- * @default true
451
- */
452
- self_harm: boolean;
453
- /**
454
- * @default true
455
- */
456
- self_harm_instructions: boolean;
457
- /**
458
- * @default true
459
- */
460
- self_harm_intent: boolean;
461
- /**
462
- * @default true
463
- */
464
- sexual: boolean;
465
- /**
466
- * @default true
467
- */
468
- sexual_minors: boolean;
469
- /**
470
- * @default true
471
- */
472
- violence: boolean;
473
- /**
474
- * @default true
475
- */
476
- violence_graphic: boolean;
477
- };
478
- };
479
- };
480
- "azure/content_safety": {
481
- settings: {
482
- /**
483
- * @description The minimum severity level to consider content as unsafe, from 1 to 7.
484
- * @default 1
485
- */
486
- severity_threshold: 1 | 2 | 3 | 4 | 5 | 6 | 7;
487
- /**
488
- * @description The categories of moderation to check for.
489
- * @default {"Hate": true, "SelfHarm": true, "Sexual": true, "Violence": true}
490
- */
491
- categories: {
492
- /**
493
- * @default true
494
- */
495
- Hate: boolean;
496
- /**
497
- * @default true
498
- */
499
- SelfHarm: boolean;
500
- /**
501
- * @default true
502
- */
503
- Sexual: boolean;
504
- /**
505
- * @default true
506
- */
507
- Violence: boolean;
508
- };
509
- /**
510
- * @description The type of severity levels to return on the full 0-7 severity scale, it can be either the trimmed version with four values (0, 2, 4, 6 scores) or the whole range.
511
- * @default "FourSeverityLevels"
512
- */
513
- output_type: "FourSeverityLevels" | "EightSeverityLevels";
514
- };
515
- };
516
- "azure/jailbreak": {
517
- settings: Record<string, never>;
518
- };
519
- "azure/prompt_injection": {
520
- settings: Record<string, never>;
521
- };
522
- "presidio/pii_detection": {
523
- settings: {
524
- /**
525
- * @description The types of PII to check for in the input.
526
- * @default {"credit_card": true, "crypto": true, "email_address": true, "iban_code": true, "ip_address": true, "location": false, "person": false, "phone_number": true, "medical_license": true, "us_bank_number": false, "us_driver_license": false, "us_itin": false, "us_passport": false, "us_ssn": false, "uk_nhs": false, "sg_nric_fin": false, "au_abn": false, "au_acn": false, "au_tfn": false, "au_medicare": false, "in_pan": false, "in_aadhaar": false, "in_vehicle_registration": false, "in_voter": false, "in_passport": false}
527
- */
528
- entities: {
529
- /**
530
- * @default true
531
- */
532
- credit_card: boolean;
533
- /**
534
- * @default true
535
- */
536
- crypto: boolean;
537
- /**
538
- * @default true
539
- */
540
- email_address: boolean;
541
- /**
542
- * @default true
543
- */
544
- iban_code: boolean;
545
- /**
546
- * @default true
547
- */
548
- ip_address: boolean;
549
- /**
550
- * @default false
551
- */
552
- location: boolean;
553
- /**
554
- * @default false
555
- */
556
- person: boolean;
557
- /**
558
- * @default true
559
- */
560
- phone_number: boolean;
561
- /**
562
- * @default true
563
- */
564
- medical_license: boolean;
565
- /**
566
- * @default false
567
- */
568
- us_bank_number: boolean;
569
- /**
570
- * @default false
571
- */
572
- us_driver_license: boolean;
573
- /**
574
- * @default false
575
- */
576
- us_itin: boolean;
577
- /**
578
- * @default false
579
- */
580
- us_passport: boolean;
581
- /**
582
- * @default false
583
- */
584
- us_ssn: boolean;
585
- /**
586
- * @default false
587
- */
588
- uk_nhs: boolean;
589
- /**
590
- * @default false
591
- */
592
- sg_nric_fin: boolean;
593
- /**
594
- * @default false
595
- */
596
- au_abn: boolean;
597
- /**
598
- * @default false
599
- */
600
- au_acn: boolean;
601
- /**
602
- * @default false
603
- */
604
- au_tfn: boolean;
605
- /**
606
- * @default false
607
- */
608
- au_medicare: boolean;
609
- /**
610
- * @default false
611
- */
612
- in_pan: boolean;
613
- /**
614
- * @default false
615
- */
616
- in_aadhaar: boolean;
617
- /**
618
- * @default false
619
- */
620
- in_vehicle_registration: boolean;
621
- /**
622
- * @default false
623
- */
624
- in_voter: boolean;
625
- /**
626
- * @default false
627
- */
628
- in_passport: boolean;
629
- };
630
- /**
631
- * @description The minimum confidence required for failing the evaluation on a PII match.
632
- * @default 0.5
633
- */
634
- min_threshold: number;
635
- };
636
- };
637
541
  "ragas/bleu_score": {
638
542
  settings: Record<string, never>;
639
543
  };
@@ -818,6 +722,121 @@ type Evaluators = {
818
722
  max_tokens: number;
819
723
  };
820
724
  };
725
+ "presidio/pii_detection": {
726
+ settings: {
727
+ /**
728
+ * @description The types of PII to check for in the input.
729
+ * @default {"credit_card": true, "crypto": true, "email_address": true, "iban_code": true, "ip_address": true, "location": false, "person": false, "phone_number": true, "medical_license": true, "us_bank_number": false, "us_driver_license": false, "us_itin": false, "us_passport": false, "us_ssn": false, "uk_nhs": false, "sg_nric_fin": false, "au_abn": false, "au_acn": false, "au_tfn": false, "au_medicare": false, "in_pan": false, "in_aadhaar": false, "in_vehicle_registration": false, "in_voter": false, "in_passport": false}
730
+ */
731
+ entities: {
732
+ /**
733
+ * @default true
734
+ */
735
+ credit_card: boolean;
736
+ /**
737
+ * @default true
738
+ */
739
+ crypto: boolean;
740
+ /**
741
+ * @default true
742
+ */
743
+ email_address: boolean;
744
+ /**
745
+ * @default true
746
+ */
747
+ iban_code: boolean;
748
+ /**
749
+ * @default true
750
+ */
751
+ ip_address: boolean;
752
+ /**
753
+ * @default false
754
+ */
755
+ location: boolean;
756
+ /**
757
+ * @default false
758
+ */
759
+ person: boolean;
760
+ /**
761
+ * @default true
762
+ */
763
+ phone_number: boolean;
764
+ /**
765
+ * @default true
766
+ */
767
+ medical_license: boolean;
768
+ /**
769
+ * @default false
770
+ */
771
+ us_bank_number: boolean;
772
+ /**
773
+ * @default false
774
+ */
775
+ us_driver_license: boolean;
776
+ /**
777
+ * @default false
778
+ */
779
+ us_itin: boolean;
780
+ /**
781
+ * @default false
782
+ */
783
+ us_passport: boolean;
784
+ /**
785
+ * @default false
786
+ */
787
+ us_ssn: boolean;
788
+ /**
789
+ * @default false
790
+ */
791
+ uk_nhs: boolean;
792
+ /**
793
+ * @default false
794
+ */
795
+ sg_nric_fin: boolean;
796
+ /**
797
+ * @default false
798
+ */
799
+ au_abn: boolean;
800
+ /**
801
+ * @default false
802
+ */
803
+ au_acn: boolean;
804
+ /**
805
+ * @default false
806
+ */
807
+ au_tfn: boolean;
808
+ /**
809
+ * @default false
810
+ */
811
+ au_medicare: boolean;
812
+ /**
813
+ * @default false
814
+ */
815
+ in_pan: boolean;
816
+ /**
817
+ * @default false
818
+ */
819
+ in_aadhaar: boolean;
820
+ /**
821
+ * @default false
822
+ */
823
+ in_vehicle_registration: boolean;
824
+ /**
825
+ * @default false
826
+ */
827
+ in_voter: boolean;
828
+ /**
829
+ * @default false
830
+ */
831
+ in_passport: boolean;
832
+ };
833
+ /**
834
+ * @description The minimum confidence required for failing the evaluation on a PII match.
835
+ * @default 0.5
836
+ */
837
+ min_threshold: number;
838
+ };
839
+ };
821
840
  };
822
841
 
823
842
  type Conversation = {
@@ -896,6 +915,7 @@ declare class LangWatchExporter implements SpanExporter {
896
915
  private apiKey;
897
916
  private includeAllSpans;
898
917
  private debug;
918
+ private serializer;
899
919
  constructor(params?: {
900
920
  endpoint?: string;
901
921
  apiKey?: string;