arthur-common 2.1.58__py3-none-any.whl → 2.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. arthur_common/aggregations/aggregator.py +73 -9
  2. arthur_common/aggregations/functions/agentic_aggregations.py +260 -85
  3. arthur_common/aggregations/functions/categorical_count.py +15 -15
  4. arthur_common/aggregations/functions/confusion_matrix.py +24 -26
  5. arthur_common/aggregations/functions/inference_count.py +5 -9
  6. arthur_common/aggregations/functions/inference_count_by_class.py +16 -27
  7. arthur_common/aggregations/functions/inference_null_count.py +10 -13
  8. arthur_common/aggregations/functions/mean_absolute_error.py +12 -18
  9. arthur_common/aggregations/functions/mean_squared_error.py +12 -18
  10. arthur_common/aggregations/functions/multiclass_confusion_matrix.py +13 -20
  11. arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +1 -1
  12. arthur_common/aggregations/functions/numeric_stats.py +13 -15
  13. arthur_common/aggregations/functions/numeric_sum.py +12 -15
  14. arthur_common/aggregations/functions/shield_aggregations.py +457 -215
  15. arthur_common/models/common_schemas.py +214 -0
  16. arthur_common/models/connectors.py +10 -2
  17. arthur_common/models/constants.py +24 -0
  18. arthur_common/models/datasets.py +0 -9
  19. arthur_common/models/enums.py +177 -0
  20. arthur_common/models/metric_schemas.py +63 -0
  21. arthur_common/models/metrics.py +2 -9
  22. arthur_common/models/request_schemas.py +870 -0
  23. arthur_common/models/response_schemas.py +785 -0
  24. arthur_common/models/schema_definitions.py +6 -1
  25. arthur_common/models/task_job_specs.py +3 -12
  26. arthur_common/tools/duckdb_data_loader.py +34 -2
  27. arthur_common/tools/duckdb_utils.py +3 -6
  28. arthur_common/tools/schema_inferer.py +3 -6
  29. {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/METADATA +12 -4
  30. arthur_common-2.4.13.dist-info/RECORD +49 -0
  31. arthur_common/models/shield.py +0 -642
  32. arthur_common-2.1.58.dist-info/RECORD +0 -44
  33. {arthur_common-2.1.58.dist-info → arthur_common-2.4.13.dist-info}/WHEEL +0 -0
@@ -1,642 +0,0 @@
1
- from datetime import datetime
2
- from enum import Enum
3
- from typing import Any, Dict, List, Optional, Self, Type, Union
4
-
5
- from fastapi import HTTPException
6
- from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
7
-
8
- DEFAULT_TOXICITY_RULE_THRESHOLD = 0.5
9
- DEFAULT_PII_RULE_CONFIDENCE_SCORE_THRESHOLD = 0
10
-
11
-
12
- class RuleType(str, Enum):
13
- KEYWORD = "KeywordRule"
14
- MODEL_HALLUCINATION_V2 = "ModelHallucinationRuleV2"
15
- MODEL_SENSITIVE_DATA = "ModelSensitiveDataRule"
16
- PII_DATA = "PIIDataRule"
17
- PROMPT_INJECTION = "PromptInjectionRule"
18
- REGEX = "RegexRule"
19
- TOXICITY = "ToxicityRule"
20
-
21
- def __str__(self) -> str:
22
- return self.value
23
-
24
-
25
- class RuleScope(str, Enum):
26
- DEFAULT = "default"
27
- TASK = "task"
28
-
29
-
30
- class MetricType(str, Enum):
31
- QUERY_RELEVANCE = "QueryRelevance"
32
- RESPONSE_RELEVANCE = "ResponseRelevance"
33
- TOOL_SELECTION = "ToolSelection"
34
-
35
- def __str__(self):
36
- return self.value
37
-
38
-
39
- class BaseEnum(str, Enum):
40
- @classmethod
41
- def values(cls) -> list[Any]:
42
- values: list[str] = [e for e in cls]
43
- return values
44
-
45
- def __str__(self) -> Any:
46
- return self.value
47
-
48
-
49
- # Note: These string values are not arbitrary and map to Presidio entity types: https://microsoft.github.io/presidio/supported_entities/
50
- class PIIEntityTypes(BaseEnum):
51
- CREDIT_CARD = "CREDIT_CARD"
52
- CRYPTO = "CRYPTO"
53
- DATE_TIME = "DATE_TIME"
54
- EMAIL_ADDRESS = "EMAIL_ADDRESS"
55
- IBAN_CODE = "IBAN_CODE"
56
- IP_ADDRESS = "IP_ADDRESS"
57
- NRP = "NRP"
58
- LOCATION = "LOCATION"
59
- PERSON = "PERSON"
60
- PHONE_NUMBER = "PHONE_NUMBER"
61
- MEDICAL_LICENSE = "MEDICAL_LICENSE"
62
- URL = "URL"
63
- US_BANK_NUMBER = "US_BANK_NUMBER"
64
- US_DRIVER_LICENSE = "US_DRIVER_LICENSE"
65
- US_ITIN = "US_ITIN"
66
- US_PASSPORT = "US_PASSPORT"
67
- US_SSN = "US_SSN"
68
-
69
- @classmethod
70
- def to_string(cls) -> str:
71
- return ",".join(member.value for member in cls)
72
-
73
-
74
- class KeywordsConfig(BaseModel):
75
- keywords: List[str] = Field(description="List of Keywords")
76
-
77
- model_config = ConfigDict(
78
- json_schema_extra={
79
- "example": {"keywords": ["Blocked_Keyword_1", "Blocked_Keyword_2"]},
80
- },
81
- )
82
-
83
-
84
- class RegexConfig(BaseModel):
85
- regex_patterns: List[str] = Field(
86
- description="List of Regex patterns to be used for validation. Be sure to encode requests in JSON and account for escape characters.",
87
- )
88
-
89
- model_config = ConfigDict(
90
- json_schema_extra={
91
- "example": {
92
- "regex_patterns": ["\\d{3}-\\d{2}-\\d{4}", "\\d{5}-\\d{6}-\\d{7}"],
93
- },
94
- },
95
- extra="forbid",
96
- )
97
-
98
-
99
- class ToxicityConfig(BaseModel):
100
- threshold: float = Field(
101
- default=DEFAULT_TOXICITY_RULE_THRESHOLD,
102
- description=f"Optional. Float (0, 1) indicating the level of tolerable toxicity to consider the rule passed or failed. Min: 0 (no toxic language) Max: 1 (very toxic language). Default: {DEFAULT_TOXICITY_RULE_THRESHOLD}",
103
- )
104
-
105
- model_config = ConfigDict(
106
- extra="forbid",
107
- json_schema_extra={"example": {"threshold": DEFAULT_TOXICITY_RULE_THRESHOLD}},
108
- )
109
-
110
- @field_validator("threshold")
111
- def validate_toxicity_threshold(cls, v: float) -> float:
112
- if v and ((v < 0) | (v > 1)):
113
- raise ValueError(f'"threshold" must be between 0 and 1')
114
- return v
115
-
116
-
117
- class PIIConfig(BaseModel):
118
- disabled_pii_entities: Optional[list[str]] = Field(
119
- description=f"Optional. List of PII entities to disable. Valid values are: {PIIEntityTypes.to_string()}",
120
- default=None,
121
- )
122
-
123
- confidence_threshold: Optional[float] = Field(
124
- description=f"Optional. Float (0, 1) indicating the level of tolerable PII to consider the rule passed or failed. Min: 0 (less confident) Max: 1 (very confident). Default: {DEFAULT_PII_RULE_CONFIDENCE_SCORE_THRESHOLD}",
125
- default=DEFAULT_PII_RULE_CONFIDENCE_SCORE_THRESHOLD,
126
- json_schema_extra={"deprecated": True},
127
- )
128
-
129
- allow_list: Optional[list[str]] = Field(
130
- description="Optional. List of strings to pass PII validation.",
131
- default=None,
132
- )
133
-
134
- @field_validator("disabled_pii_entities")
135
- def validate_pii_entities(cls, v: Optional[List[str]]) -> Optional[List[str]]:
136
- if v:
137
- entities_passed = set(v)
138
- entities_supported = set(PIIEntityTypes.values())
139
- invalid_entities = entities_passed - entities_supported
140
- if invalid_entities:
141
- raise ValueError(
142
- f"The following values are not valid PII entities: {invalid_entities}",
143
- )
144
-
145
- # Fail the case where they are trying to disable all PII entity types
146
- if (not invalid_entities) & (
147
- len(entities_passed) == len(entities_supported)
148
- ):
149
- raise ValueError(
150
- f"Cannot disable all supported PII entities on PIIDataRule",
151
- )
152
- return v
153
- else:
154
- return v
155
-
156
- @field_validator("confidence_threshold")
157
- def validate_confidence_threshold(cls, v: Optional[float]) -> Optional[float]:
158
- if v and ((v < 0) | (v > 1)):
159
- raise ValueError(f'"confidence_threshold" must be between 0 and 1')
160
- return v
161
-
162
- model_config = ConfigDict(
163
- json_schema_extra={
164
- "example": {
165
- "disabled_pii_entities": ["PERSON", "URL"],
166
- "confidence_threshold": "0.5",
167
- "allow_list": ["arthur.ai", "Arthur"],
168
- },
169
- },
170
- extra="forbid",
171
- )
172
-
173
-
174
- NEGATIVE_BLOOD_EXAMPLE = "John has O negative blood group"
175
-
176
-
177
- class ExampleConfig(BaseModel):
178
- example: str = Field(description="Custom example for the sensitive data")
179
- result: bool = Field(
180
- description="Boolean value representing if the example passes or fails the the sensitive "
181
- "data rule ",
182
- )
183
-
184
- model_config = ConfigDict(
185
- json_schema_extra={
186
- "example": {"example": NEGATIVE_BLOOD_EXAMPLE, "result": True},
187
- },
188
- )
189
-
190
-
191
- class ExamplesConfig(BaseModel):
192
- examples: List[ExampleConfig] = Field(
193
- description="List of all the examples for Sensitive Data Rule",
194
- )
195
-
196
- model_config = ConfigDict(
197
- json_schema_extra={
198
- "example": {
199
- "examples": [
200
- {"example": NEGATIVE_BLOOD_EXAMPLE, "result": True},
201
- {
202
- "example": "Most of the people have A positive blood group",
203
- "result": False,
204
- },
205
- ],
206
- "hint": "specific individual's blood type",
207
- },
208
- },
209
- )
210
- hint: Optional[str] = Field(
211
- description="Optional. Hint added to describe what Sensitive Data Rule should be checking for",
212
- default=None,
213
- )
214
-
215
- def to_dict(self) -> Dict[str, Any]:
216
- d = self.__dict__
217
- d["examples"] = [ex.__dict__ for ex in self.examples]
218
- d["hint"] = self.hint
219
- return d
220
-
221
-
222
- class RuleResponse(BaseModel):
223
- id: str = Field(description="ID of the Rule")
224
- name: str = Field(description="Name of the Rule")
225
- type: RuleType = Field(description="Type of Rule")
226
- apply_to_prompt: bool = Field(description="Rule applies to prompt")
227
- apply_to_response: bool = Field(description="Rule applies to response")
228
- enabled: Optional[bool] = Field(
229
- description="Rule is enabled for the task",
230
- default=None,
231
- )
232
- scope: RuleScope = Field(
233
- description="Scope of the rule. The rule can be set at default level or task level.",
234
- )
235
- # UNIX millis format
236
- created_at: int = Field(
237
- description="Time the rule was created in unix milliseconds",
238
- )
239
- updated_at: int = Field(
240
- description="Time the rule was updated in unix milliseconds",
241
- )
242
- # added a title to this to differentiate it in the generated client from the
243
- # config field on the NewRuleRequest object
244
- config: Optional[
245
- Union[KeywordsConfig, RegexConfig, ExamplesConfig, ToxicityConfig, PIIConfig]
246
- ] = Field(
247
- description="Config of the rule",
248
- default=None,
249
- title="Rule Response Config",
250
- )
251
-
252
-
253
- class MetricResponse(BaseModel):
254
- id: str = Field(description="ID of the Metric")
255
- name: str = Field(description="Name of the Metric")
256
- type: MetricType = Field(description="Type of the Metric")
257
- metric_metadata: str = Field(description="Metadata of the Metric")
258
- config: Optional[str] = Field(
259
- description="JSON-serialized configuration for the Metric",
260
- default=None,
261
- )
262
- created_at: datetime = Field(
263
- description="Time the Metric was created in unix milliseconds",
264
- )
265
- updated_at: datetime = Field(
266
- description="Time the Metric was updated in unix milliseconds",
267
- )
268
- enabled: Optional[bool] = Field(
269
- description="Whether the Metric is enabled",
270
- default=None,
271
- )
272
-
273
-
274
- class TaskResponse(BaseModel):
275
- id: str = Field(description=" ID of the task")
276
- name: str = Field(description="Name of the task")
277
- created_at: int = Field(
278
- description="Time the task was created in unix milliseconds",
279
- )
280
- updated_at: int = Field(
281
- description="Time the task was created in unix milliseconds",
282
- )
283
- is_agentic: Optional[bool] = Field(
284
- description="Whether the task is agentic or not",
285
- default=None,
286
- )
287
- rules: List[RuleResponse] = Field(description="List of all the rules for the task.")
288
- metrics: Optional[List[MetricResponse]] = Field(
289
- description="List of all the metrics for the task.",
290
- default=None,
291
- )
292
-
293
-
294
- class UpdateRuleRequest(BaseModel):
295
- enabled: bool = Field(description="Boolean value to enable or disable the rule. ")
296
-
297
-
298
- HALLUCINATION_RULE_NAME = "Hallucination Rule"
299
-
300
-
301
- class NewRuleRequest(BaseModel):
302
- name: str = Field(description="Name of the rule", examples=["SSN Regex Rule"])
303
- type: str = Field(
304
- description="Type of the rule. It can only be one of KeywordRule, RegexRule, "
305
- "ModelSensitiveDataRule, ModelHallucinationRule, ModelHallucinationRuleV2, PromptInjectionRule, PIIDataRule",
306
- examples=["RegexRule"],
307
- )
308
- apply_to_prompt: bool = Field(
309
- description="Boolean value to enable or disable the rule for llm prompt",
310
- examples=[True],
311
- )
312
- apply_to_response: bool = Field(
313
- description="Boolean value to enable or disable the rule for llm response",
314
- examples=[False],
315
- )
316
- config: Optional[
317
- Union[RegexConfig, KeywordsConfig, ToxicityConfig, PIIConfig, ExamplesConfig]
318
- ] = Field(description="Config for the rule", default=None)
319
-
320
- model_config = ConfigDict(
321
- json_schema_extra={
322
- "example1": {
323
- "summary": "Sensitive Data Example",
324
- "description": "Sensitive Data Example with its required configuration",
325
- "value": {
326
- "name": "Sensitive Data Rule",
327
- "type": "ModelSensitiveDataRule",
328
- "apply_to_prompt": True,
329
- "apply_to_response": False,
330
- "config": {
331
- "examples": [
332
- {
333
- "example": NEGATIVE_BLOOD_EXAMPLE,
334
- "result": True,
335
- },
336
- {
337
- "example": "Most of the people have A positive blood group",
338
- "result": False,
339
- },
340
- ],
341
- "hint": "specific individual's blood types",
342
- },
343
- },
344
- },
345
- "example2": {
346
- "summary": "Regex Example",
347
- "description": "Regex Example with its required configuration. Be sure to properly encode requests "
348
- "using JSON libraries. For example, the regex provided encodes to a different string "
349
- "when encoded to account for escape characters.",
350
- "value": {
351
- "name": "SSN Regex Rule",
352
- "type": "RegexRule",
353
- "apply_to_prompt": True,
354
- "apply_to_response": True,
355
- "config": {
356
- "regex_patterns": [
357
- "\\d{3}-\\d{2}-\\d{4}",
358
- "\\d{5}-\\d{6}-\\d{7}",
359
- ],
360
- },
361
- },
362
- },
363
- "example3": {
364
- "summary": "Keywords Rule Example",
365
- "description": "Keywords Rule Example with its required configuration",
366
- "value": {
367
- "name": "Blocked Keywords Rule",
368
- "type": "KeywordRule",
369
- "apply_to_prompt": True,
370
- "apply_to_response": True,
371
- "config": {"keywords": ["Blocked_Keyword_1", "Blocked_Keyword_2"]},
372
- },
373
- },
374
- "example4": {
375
- "summary": "Prompt Injection Rule Example",
376
- "description": "Prompt Injection Rule Example, no configuration required",
377
- "value": {
378
- "name": "Prompt Injection Rule",
379
- "type": "PromptInjectionRule",
380
- "apply_to_prompt": True,
381
- "apply_to_response": False,
382
- },
383
- },
384
- "example5": {
385
- "summary": "Hallucination Rule V1 Example (Deprecated)",
386
- "description": "Hallucination Rule Example, no configuration required (This rule is deprecated. Use "
387
- "ModelHallucinationRuleV2 instead.)",
388
- "value": {
389
- "name": HALLUCINATION_RULE_NAME,
390
- "type": "ModelHallucinationRule",
391
- "apply_to_prompt": False,
392
- "apply_to_response": True,
393
- },
394
- },
395
- "example6": {
396
- "summary": "Hallucination Rule V2 Example",
397
- "description": "Hallucination Rule Example, no configuration required",
398
- "value": {
399
- "name": HALLUCINATION_RULE_NAME,
400
- "type": "ModelHallucinationRuleV2",
401
- "apply_to_prompt": False,
402
- "apply_to_response": True,
403
- },
404
- },
405
- "example7": {
406
- "summary": "Hallucination Rule V3 Example (Beta)",
407
- "description": "Hallucination Rule Example, no configuration required. This rule is in beta and must "
408
- "be enabled by the system administrator.",
409
- "value": {
410
- "name": HALLUCINATION_RULE_NAME,
411
- "type": "ModelHallucinationRuleV3",
412
- "apply_to_prompt": False,
413
- "apply_to_response": True,
414
- },
415
- },
416
- "example8": {
417
- "summary": "PII Rule Example",
418
- "description": f'PII Rule Example, no configuration required. "disabled_pii_entities", '
419
- f'"confidence_threshold", and "allow_list" accepted. Valid value for '
420
- f'"confidence_threshold" is 0.0-1.0. Valid values for "disabled_pii_entities" '
421
- f"are {PIIEntityTypes.to_string()}",
422
- "value": {
423
- "name": "PII Rule",
424
- "type": "PIIDataRule",
425
- "apply_to_prompt": True,
426
- "apply_to_response": True,
427
- "config": {
428
- "disabled_pii_entities": [
429
- "EMAIL_ADDRESS",
430
- "PHONE_NUMBER",
431
- ],
432
- "confidence_threshold": "0.5",
433
- "allow_list": ["arthur.ai", "Arthur"],
434
- },
435
- },
436
- },
437
- "example9": {
438
- "summary": "Toxicity Rule Example",
439
- "description": "Toxicity Rule Example, no configuration required. Threshold accepted",
440
- "value": {
441
- "name": "Toxicity Rule",
442
- "type": "ToxicityRule",
443
- "apply_to_prompt": True,
444
- "apply_to_response": True,
445
- "config": {"threshold": 0.5},
446
- },
447
- },
448
- },
449
- )
450
-
451
- @model_validator(mode="before")
452
- def set_config_type(cls, values: Dict[str, Any]) -> Dict[str, Any]:
453
- config_type_to_class: Dict[str, Type[BaseModel]] = {
454
- RuleType.REGEX: RegexConfig,
455
- RuleType.KEYWORD: KeywordsConfig,
456
- RuleType.TOXICITY: ToxicityConfig,
457
- RuleType.PII_DATA: PIIConfig,
458
- RuleType.MODEL_SENSITIVE_DATA: ExamplesConfig,
459
- }
460
-
461
- config_type = values["type"]
462
- config_class = config_type_to_class.get(config_type)
463
-
464
- if config_class is not None:
465
- config_values = values.get("config")
466
- if config_values is None:
467
- if config_type in [RuleType.REGEX, RuleType.KEYWORD]:
468
- raise HTTPException(
469
- status_code=400,
470
- detail="This rule must be created with a config parameter",
471
- )
472
- config_values = {}
473
- if isinstance(config_values, BaseModel):
474
- config_values = config_values.model_dump()
475
- values["config"] = config_class(**config_values)
476
- return values
477
-
478
- @model_validator(mode="after")
479
- def check_prompt_or_response(self) -> Self:
480
- if (self.type == RuleType.MODEL_SENSITIVE_DATA) and (
481
- self.apply_to_response is True
482
- ):
483
- raise HTTPException(
484
- status_code=400,
485
- detail="ModelSensitiveDataRule can only be enabled for prompt. Please set the 'apply_to_response' "
486
- "field to false.",
487
- )
488
- if (self.type == RuleType.PROMPT_INJECTION) and (
489
- self.apply_to_response is True
490
- ):
491
- raise HTTPException(
492
- status_code=400,
493
- detail="PromptInjectionRule can only be enabled for prompt. Please set the 'apply_to_response' field "
494
- "to false.",
495
- )
496
- if (self.type == RuleType.MODEL_HALLUCINATION_V2) and (
497
- self.apply_to_prompt is True
498
- ):
499
- raise HTTPException(
500
- status_code=400,
501
- detail="ModelHallucinationRuleV2 can only be enabled for response. Please set the 'apply_to_prompt' "
502
- "field to false.",
503
- )
504
- if (self.apply_to_prompt is False) and (self.apply_to_response is False):
505
- raise HTTPException(
506
- status_code=400,
507
- detail="Rule must be either applied to the prompt or to the response.",
508
- )
509
-
510
- return self
511
-
512
- @model_validator(mode="after")
513
- def check_examples_non_null(self) -> Self:
514
- if self.type == RuleType.MODEL_SENSITIVE_DATA:
515
- config = self.config
516
- if (
517
- config is not None
518
- and isinstance(config, ExamplesConfig)
519
- and (config.examples is None or len(config.examples) == 0)
520
- ):
521
- raise HTTPException(
522
- status_code=400,
523
- detail="Examples must be provided to onboard a ModelSensitiveDataRule",
524
- )
525
- return self
526
-
527
-
528
- class RelevanceMetricConfig(BaseModel):
529
- """Configuration for relevance metrics including QueryRelevance and ResponseRelevance"""
530
-
531
- relevance_threshold: Optional[float] = Field(
532
- default=None,
533
- description="Threshold for determining relevance when not using LLM judge",
534
- )
535
- use_llm_judge: bool = Field(
536
- default=True,
537
- description="Whether to use LLM as a judge for relevance scoring",
538
- )
539
-
540
-
541
- class NewMetricRequest(BaseModel):
542
- type: MetricType = Field(
543
- description="Type of the metric. It can only be one of QueryRelevance, ResponseRelevance, ToolSelection",
544
- examples=["UserQueryRelevance"],
545
- )
546
- name: str = Field(
547
- description="Name of metric",
548
- examples=["My User Query Relevance"],
549
- )
550
- metric_metadata: str = Field(description="Additional metadata for the metric")
551
- config: Optional[RelevanceMetricConfig] = Field(
552
- description="Configuration for the metric. Currently only applies to UserQueryRelevance and ResponseRelevance metric types.",
553
- default=None,
554
- )
555
-
556
- model_config = ConfigDict(
557
- json_schema_extra={
558
- "example1": {
559
- "type": "QueryRelevance",
560
- "name": "My User Query Relevance",
561
- "metric_metadata": "This is a test metric metadata",
562
- },
563
- "example2": {
564
- "type": "QueryRelevance",
565
- "name": "My User Query Relevance with Config",
566
- "metric_metadata": "This is a test metric metadata",
567
- "config": {"relevance_threshold": 0.8, "use_llm_judge": False},
568
- },
569
- "example3": {
570
- "type": "ResponseRelevance",
571
- "name": "My Response Relevance",
572
- "metric_metadata": "This is a test metric metadata",
573
- "config": {"use_llm_judge": True},
574
- },
575
- },
576
- )
577
-
578
- @field_validator("type")
579
- def validate_metric_type(cls, value):
580
- if value not in MetricType:
581
- raise ValueError(
582
- f"Invalid metric type: {value}. Valid types are: {', '.join([t.value for t in MetricType])}",
583
- )
584
- return value
585
-
586
- @model_validator(mode="before")
587
- def set_config_type(cls, values):
588
- if not isinstance(values, dict):
589
- return values
590
-
591
- metric_type = values.get("type")
592
- config_values = values.get("config")
593
-
594
- # Map metric types to their corresponding config classes
595
- metric_type_to_config = {
596
- MetricType.QUERY_RELEVANCE: RelevanceMetricConfig,
597
- MetricType.RESPONSE_RELEVANCE: RelevanceMetricConfig,
598
- # Add new metric types and their configs here as needed
599
- }
600
-
601
- config_class = metric_type_to_config.get(metric_type)
602
-
603
- if config_class is not None:
604
- if config_values is None:
605
- # Default config when none is provided
606
- config_values = {"use_llm_judge": True}
607
- elif isinstance(config_values, dict):
608
- relevance_threshold = config_values.get("relevance_threshold")
609
- use_llm_judge = config_values.get("use_llm_judge")
610
-
611
- # Handle mutually exclusive parameters
612
- if relevance_threshold is not None and use_llm_judge:
613
- raise HTTPException(
614
- status_code=400,
615
- detail="relevance_threshold and use_llm_judge=true are mutually exclusive. Set use_llm_judge=false when using relevance_threshold.",
616
- headers={"full_stacktrace": "false"},
617
- )
618
-
619
- # If relevance_threshold is set but use_llm_judge isn't, set use_llm_judge to false
620
- if relevance_threshold is not None and use_llm_judge is None:
621
- config_values["use_llm_judge"] = False
622
-
623
- # If neither is set, default to use_llm_judge=True
624
- if relevance_threshold is None and (
625
- use_llm_judge is None or use_llm_judge == False
626
- ):
627
- config_values["use_llm_judge"] = True
628
-
629
- if isinstance(config_values, BaseModel):
630
- config_values = config_values.model_dump()
631
-
632
- values["config"] = config_class(**config_values)
633
- elif config_values is not None:
634
- # Provide a nice error message listing supported metric types
635
- supported_types = [t.value for t in metric_type_to_config.keys()]
636
- raise HTTPException(
637
- status_code=400,
638
- detail=f"Config is only supported for {', '.join(supported_types)} metric types",
639
- headers={"full_stacktrace": "false"},
640
- )
641
-
642
- return values
@@ -1,44 +0,0 @@
1
- arthur_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- arthur_common/aggregations/__init__.py,sha256=vISWyciQAtksa71OKeHNP-QyFGd1NzBKq_LBsG0QSG8,67
3
- arthur_common/aggregations/aggregator.py,sha256=Hbnk9Wc3asmHdW7Noaf8gEbtACVcE8s44O-7_a6igVA,7969
4
- arthur_common/aggregations/functions/README.md,sha256=MkZoTAJ94My96R5Z8GAxud7S6vyR0vgVi9gqdt9a4XY,5460
5
- arthur_common/aggregations/functions/__init__.py,sha256=HqC3UNRURX7ZQHgamTrQvfA8u_FiZGZ4I4eQW7Ooe5o,1299
6
- arthur_common/aggregations/functions/agentic_aggregations.py,sha256=AXPuIgESf-q2JG4vRc8XYARFbI8R92e7uaR7cgaTMqY,33401
7
- arthur_common/aggregations/functions/categorical_count.py,sha256=wc1ovL8JoiSeoSTk9h1fgrLj1QuQeYYZmEqgffGc2cw,5328
8
- arthur_common/aggregations/functions/confusion_matrix.py,sha256=Zac-biMeIVyLRcMXWmENgYq8X4I7Trm8gOE5NRLGKU0,22108
9
- arthur_common/aggregations/functions/inference_count.py,sha256=SrRfxQVnX-wRTZ1zbqUKupPdACvfKeUpZDidZs45ZUY,4079
10
- arthur_common/aggregations/functions/inference_count_by_class.py,sha256=aRSimiiK4F6PxLgq_njTrVCmvjljw4sNst0Qzq4oSV0,11554
11
- arthur_common/aggregations/functions/inference_null_count.py,sha256=w9sfu1QDlVBJwMW5EEkgda65nyMAABzd-FBKtj8amw4,4825
12
- arthur_common/aggregations/functions/mean_absolute_error.py,sha256=T8HfXhs9V6enP1U3dmy7zXtJjnvsV0IVe3VfKzlENMM,6834
13
- arthur_common/aggregations/functions/mean_squared_error.py,sha256=Zs_6z_agA4sTNllZTn7fLfiDH62Vynmrf44kn6vyceA,6855
14
- arthur_common/aggregations/functions/multiclass_confusion_matrix.py,sha256=YPtR1Jtrdnf50qL5-c7lDFruTDu7KyZZg2V-Us7Cd2Y,12615
15
- arthur_common/aggregations/functions/multiclass_inference_count_by_class.py,sha256=r6MMDbtJA03R38mzvMhwW-h6IkNC8VFvWg45MRGkfHM,4264
16
- arthur_common/aggregations/functions/numeric_stats.py,sha256=uHTyOAHW6xF6D-TeFLtY16iVR-Ju_6lmXSSY77mH0Qs,4921
17
- arthur_common/aggregations/functions/numeric_sum.py,sha256=kGE6Jjnjwf2E4TKE3NwPyrlEKgygfCxv1z_YGDCOcCQ,5028
18
- arthur_common/aggregations/functions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- arthur_common/aggregations/functions/shield_aggregations.py,sha256=1dAx8s2_xgEsKeQcpCOE35UIaTah8zuWH-hoLFztaoA,35662
20
- arthur_common/aggregations/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- arthur_common/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- arthur_common/config/config.py,sha256=fcpjOYjPKu4Duk63CuTHrOWKQKAlAhVUR60kF_2_Xog,1247
23
- arthur_common/config/settings.yaml,sha256=0CrygUwJzC5mGcO5Xnvv2ttp-P7LIsx682jllYA96NQ,161
24
- arthur_common/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- arthur_common/models/connectors.py,sha256=5f5DUgOQ16P3lBPZ0zpUv9kTAqw45Agrl526F-iFJes,1862
26
- arthur_common/models/datasets.py,sha256=oO-HgZ_OZW-E9DlQYwxkw2T31jwZEqYaB3NvkbYAiYI,527
27
- arthur_common/models/metrics.py,sha256=vNgXaKNIgkLS2sjmUSsWIRLdbaP8zZUn8dLNWefrvho,11353
28
- arthur_common/models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- arthur_common/models/schema_definitions.py,sha256=4FSbL51RvOgeikNnVfCVSXmYDNzkyqtEKC2a6FjwRqI,16879
30
- arthur_common/models/shield.py,sha256=T7mZtd1tl5Ecq2DwqYgGkmAobArkTN-aTWxBxIzzJpw,24303
31
- arthur_common/models/task_job_specs.py,sha256=xYej0vtHE5zvBQ-ka9Rn4N1lQtR1XXgbGVzhzemiL64,3509
32
- arthur_common/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- arthur_common/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- arthur_common/tools/aggregation_analyzer.py,sha256=UfMtvFWXV2Dqly8S6nneGgomuvEGN-1tBz81tfkMcAE,11206
35
- arthur_common/tools/aggregation_loader.py,sha256=3CF46bNi-GdJBNOXkjYfCQ1Aung8lf65L532sdWmR_s,2351
36
- arthur_common/tools/duckdb_data_loader.py,sha256=nscmarfP5FeL8p-9e3uZhpGEV0xFqDJmR3t77HdR26U,11081
37
- arthur_common/tools/duckdb_utils.py,sha256=1i-kRXu95gh4Sf9Osl2LFUpdb0yZifOjLDtIgSfSmfs,1197
38
- arthur_common/tools/functions.py,sha256=FWL4eWO5-vLp86WudT-MGUKvf2B8f02IdoXQFKd6d8k,1093
39
- arthur_common/tools/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- arthur_common/tools/schema_inferer.py,sha256=Ur4CXGAkd6ZMSU0nMNrkOEElsBopHXq0lctTV8X92W8,5188
41
- arthur_common/tools/time_utils.py,sha256=4gfiu9NXfvPZltiVNLSIQGylX6h2W0viNi9Kv4bKyfw,1410
42
- arthur_common-2.1.58.dist-info/METADATA,sha256=MYHMGNuzewFPKvwCl97Ir9AuJL7NDD4xXzNb8c4UXLw,1609
43
- arthur_common-2.1.58.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- arthur_common-2.1.58.dist-info/RECORD,,