arthur-common 2.1.58__tar.gz → 2.1.59__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arthur-common might be problematic. Click here for more details.

Files changed (44) hide show
  1. {arthur_common-2.1.58 → arthur_common-2.1.59}/PKG-INFO +9 -1
  2. {arthur_common-2.1.58 → arthur_common-2.1.59}/README.md +8 -0
  3. {arthur_common-2.1.58 → arthur_common-2.1.59}/pyproject.toml +48 -2
  4. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/agentic_aggregations.py +36 -21
  5. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/connectors.py +9 -0
  6. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/metrics.py +16 -2
  7. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/shield.py +11 -11
  8. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/__init__.py +0 -0
  9. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/__init__.py +0 -0
  10. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/aggregator.py +0 -0
  11. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/README.md +0 -0
  12. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/__init__.py +0 -0
  13. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/categorical_count.py +0 -0
  14. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/confusion_matrix.py +0 -0
  15. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/inference_count.py +0 -0
  16. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/inference_count_by_class.py +0 -0
  17. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/inference_null_count.py +0 -0
  18. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/mean_absolute_error.py +0 -0
  19. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/mean_squared_error.py +0 -0
  20. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/multiclass_confusion_matrix.py +0 -0
  21. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/multiclass_inference_count_by_class.py +0 -0
  22. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/numeric_stats.py +0 -0
  23. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/numeric_sum.py +0 -0
  24. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/py.typed +0 -0
  25. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/functions/shield_aggregations.py +0 -0
  26. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/aggregations/py.typed +0 -0
  27. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/config/__init__.py +0 -0
  28. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/config/config.py +0 -0
  29. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/config/settings.yaml +0 -0
  30. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/__init__.py +0 -0
  31. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/datasets.py +0 -0
  32. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/py.typed +0 -0
  33. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/schema_definitions.py +0 -0
  34. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/models/task_job_specs.py +0 -0
  35. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/py.typed +0 -0
  36. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/__init__.py +0 -0
  37. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/aggregation_analyzer.py +0 -0
  38. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/aggregation_loader.py +0 -0
  39. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/duckdb_data_loader.py +0 -0
  40. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/duckdb_utils.py +0 -0
  41. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/functions.py +0 -0
  42. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/py.typed +0 -0
  43. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/schema_inferer.py +0 -0
  44. {arthur_common-2.1.58 → arthur_common-2.1.59}/src/arthur_common/tools/time_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arthur-common
3
- Version: 2.1.58
3
+ Version: 2.1.59
4
4
  Summary: Utility code common to Arthur platform components.
5
5
  License: MIT
6
6
  Author: Arthur
@@ -62,6 +62,14 @@ This project uses [pytest](https://pytest.org/) for testing. To run the tests, e
62
62
  poetry run pytest
63
63
  ```
64
64
 
65
+ ## Release process
66
+ 1. Merge changes into **main** branch
67
+ 2. Go to **Actions** -> **Arthur Common Version Bump**
68
+ 3. Manually trigger workflow there, it will create a PR with version bumping
69
+ 4. Go to **Pull requests** and check PR for version bump, accept it if everything is okay
70
+ 5. Version bump commit will be merged to **main** branch and it will start release process
71
+ 6. Update package version in your project (arthur-engine)
72
+
65
73
  ## License
66
74
 
67
75
  This project is licensed under the MIT License.
@@ -37,6 +37,14 @@ This project uses [pytest](https://pytest.org/) for testing. To run the tests, e
37
37
  poetry run pytest
38
38
  ```
39
39
 
40
+ ## Release process
41
+ 1. Merge changes into **main** branch
42
+ 2. Go to **Actions** -> **Arthur Common Version Bump**
43
+ 3. Manually trigger workflow there, it will create a PR with version bumping
44
+ 4. Go to **Pull requests** and check PR for version bump, accept it if everything is okay
45
+ 5. Version bump commit will be merged to **main** branch and it will start release process
46
+ 6. Update package version in your project (arthur-engine)
47
+
40
48
  ## License
41
49
 
42
50
  This project is licensed under the MIT License.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "arthur-common"
3
- version = "2.1.58"
3
+ version = "2.1.59"
4
4
  description = "Utility code common to Arthur platform components."
5
5
  authors = ["Arthur <engineering@arthur.ai>"]
6
6
  license = "MIT"
@@ -27,7 +27,53 @@ responses = "0.25.7"
27
27
  pytest-xdist = "3.6.1"
28
28
  pytest-cov = "^6.1.1"
29
29
  pre-commit = "^4.2.0"
30
- mypy = "^1.16.1"
30
+
31
+
32
+ [tool.poetry.group.linters.dependencies]
33
+ autoflake = "^2.3.1"
34
+ isort = "^6.0.1"
35
+ black = "^25.1.0"
36
+ mypy = "^1.17.0"
37
+
38
+
39
+ [tool.autoflake]
40
+ remove-all-unused-imports = true
41
+ in-place = true
42
+ recursive = true
43
+
44
+
45
+ [tool.isort]
46
+ profile = "black"
47
+ src_paths = ["src"]
48
+
49
+
50
+ [tool.black]
51
+ target-version = ['py312', 'py313']
52
+ include = '\.pyi?$'
53
+ extend-exclude = '''
54
+ /(
55
+ # directories
56
+ \.eggs
57
+ | \.git
58
+ 06
59
+ | \.hg
60
+ | \.mypy_cache
61
+ | \.tox
62
+ | \.venv
63
+ | build
64
+ | dist
65
+ )/
66
+ '''
67
+
68
+ [tool.mypy]
69
+ ignore_missing_imports = true
70
+ implicit_reexport = true
71
+ explicit_package_bases = true
72
+ strict = true
73
+ exclude = ["clients/python", "alembic_app_db", "alembic_ts_db", "tests"]
74
+ namespace_packages = true
75
+ mypy_path = "src"
76
+
31
77
 
32
78
  [tool.pytest.ini_options]
33
79
  pythonpath = ["src"]
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import logging
3
- from typing import Annotated
3
+ from typing import Annotated, Any
4
4
  from uuid import UUID
5
5
 
6
6
  import pandas as pd
@@ -27,7 +27,10 @@ TOOL_SCORE_NO_TOOL_VALUE = 2
27
27
  logger = logging.getLogger(__name__)
28
28
 
29
29
 
30
- def extract_spans_with_metrics_and_agents(root_spans):
30
+ # TODO: create TypedDict for span
31
+ def extract_spans_with_metrics_and_agents(
32
+ root_spans: list[str | dict[str, Any]],
33
+ ) -> list[tuple[dict[str, Any], str]]:
31
34
  """Recursively extract all spans with metrics and their associated agent names from the span tree.
32
35
 
33
36
  Returns:
@@ -35,14 +38,21 @@ def extract_spans_with_metrics_and_agents(root_spans):
35
38
  """
36
39
  spans_with_metrics_and_agents = []
37
40
 
38
- def traverse_spans(spans, current_agent_name="unknown"):
39
- for span_str in spans:
40
- span = json.loads(span_str) if type(span_str) == str else span_str
41
+ # TODO: Improve function so it won't modify variable outside of its scope
42
+ def traverse_spans(
43
+ spans: list[str | dict[str, Any]],
44
+ current_agent_name: str = "unknown",
45
+ ) -> None:
46
+ for span_to_parse in spans:
47
+ if isinstance(span_to_parse, str):
48
+ parsed_span = json.loads(span_to_parse)
49
+ else:
50
+ parsed_span = span_to_parse
41
51
 
42
52
  # Update current agent name if this span is an AGENT
43
- if span.get("span_kind") == "AGENT":
53
+ if parsed_span.get("span_kind") == "AGENT":
44
54
  try:
45
- raw_data = span.get("raw_data", {})
55
+ raw_data = parsed_span.get("raw_data", {})
46
56
  if isinstance(raw_data, str):
47
57
  raw_data = json.loads(raw_data)
48
58
 
@@ -52,29 +62,31 @@ def extract_spans_with_metrics_and_agents(root_spans):
52
62
  current_agent_name = agent_name
53
63
  except (json.JSONDecodeError, KeyError, TypeError):
54
64
  logger.error(
55
- f"Error parsing attributes from span (span_id: {span.get('span_id')}) in trace {span.get('trace_id')}",
65
+ f"Error parsing attributes from span (span_id: {parsed_span.get('span_id')}) in trace {parsed_span.get('trace_id')}",
56
66
  )
57
67
 
58
68
  # Check if this span has metrics
59
- if span.get("metric_results") and len(span.get("metric_results", [])) > 0:
60
- spans_with_metrics_and_agents.append((span, current_agent_name))
69
+ if parsed_span.get("metric_results", []):
70
+ spans_with_metrics_and_agents.append(
71
+ (parsed_span, current_agent_name),
72
+ )
61
73
 
62
74
  # Recursively traverse children with the current agent name
63
- if span.get("children", []):
64
- traverse_spans(span["children"], current_agent_name)
75
+ if children_span := parsed_span.get("children", []):
76
+ traverse_spans(children_span, current_agent_name)
65
77
 
66
78
  traverse_spans(root_spans)
67
79
  return spans_with_metrics_and_agents
68
80
 
69
81
 
70
- def determine_relevance_pass_fail(score):
82
+ def determine_relevance_pass_fail(score: float | None) -> str | None:
71
83
  """Determine pass/fail for relevance scores using global threshold"""
72
84
  if score is None:
73
85
  return None
74
86
  return "pass" if score >= RELEVANCE_SCORE_THRESHOLD else "fail"
75
87
 
76
88
 
77
- def determine_tool_pass_fail(score):
89
+ def determine_tool_pass_fail(score: int | None) -> str | None:
78
90
  """Determine pass/fail for tool scores using global threshold"""
79
91
  if score is None:
80
92
  return None
@@ -177,7 +189,7 @@ class AgenticMetricsOverTimeAggregation(SketchAggregationFunction):
177
189
 
178
190
  for metric_result in metric_results:
179
191
  metric_type = metric_result.get("metric_type")
180
- details = json.loads(metric_result.get("details", '{}'))
192
+ details = json.loads(metric_result.get("details", "{}"))
181
193
 
182
194
  if metric_type == "ToolSelection":
183
195
  tool_selection = details.get("tool_selection", {})
@@ -430,7 +442,7 @@ class AgenticRelevancePassFailCountAggregation(NumericAggregationFunction):
430
442
 
431
443
  for metric_result in metric_results:
432
444
  metric_type = metric_result.get("metric_type")
433
- details = json.loads(metric_result.get("details", '{}'))
445
+ details = json.loads(metric_result.get("details", "{}"))
434
446
 
435
447
  if metric_type in ["QueryRelevance", "ResponseRelevance"]:
436
448
  relevance_data = details.get(
@@ -555,7 +567,7 @@ class AgenticToolPassFailCountAggregation(NumericAggregationFunction):
555
567
 
556
568
  for metric_result in metric_results:
557
569
  if metric_result.get("metric_type") == "ToolSelection":
558
- details = json.loads(metric_result.get("details", '{}'))
570
+ details = json.loads(metric_result.get("details", "{}"))
559
571
  tool_selection = details.get("tool_selection", {})
560
572
 
561
573
  tool_selection_score = tool_selection.get("tool_selection")
@@ -723,10 +735,13 @@ class AgenticLLMCallCountAggregation(NumericAggregationFunction):
723
735
  root_spans = json.loads(root_spans)
724
736
 
725
737
  # Count LLM spans in the tree
726
- def count_llm_spans(spans):
738
+ def count_llm_spans(spans: list[str | dict[str, Any]]) -> int:
727
739
  count = 0
728
- for span_str in spans:
729
- span = json.loads(span_str) if type(span_str) == str else span_str
740
+ for span_to_parse in spans:
741
+ if isinstance(span_to_parse, str):
742
+ span = json.loads(span_to_parse)
743
+ else:
744
+ span = span_to_parse
730
745
 
731
746
  # Check if this span is an LLM span
732
747
  if span.get("span_kind") == "LLM":
@@ -830,7 +845,7 @@ class AgenticToolSelectionAndUsageByAgentAggregation(NumericAggregationFunction)
830
845
 
831
846
  for metric_result in metric_results:
832
847
  if metric_result.get("metric_type") == "ToolSelection":
833
- details = json.loads(metric_result.get("details", '{}'))
848
+ details = json.loads(metric_result.get("details", "{}"))
834
849
  tool_selection = details.get("tool_selection", {})
835
850
 
836
851
  tool_selection_score = tool_selection.get("tool_selection")
@@ -38,6 +38,15 @@ ODBC_CONNECTOR_DRIVER_FIELD = "driver"
38
38
  ODBC_CONNECTOR_TABLE_NAME_FIELD = "table_name"
39
39
  ODBC_CONNECTOR_DIALECT_FIELD = "dialect"
40
40
 
41
+ # Snowflake connector constants
42
+ SNOWFLAKE_CONNECTOR_ACCOUNT_FIELD = "account"
43
+ SNOWFLAKE_CONNECTOR_SCHEMA_FIELD = "schema"
44
+ SNOWFLAKE_CONNECTOR_WAREHOUSE_FIELD = "warehouse"
45
+ SNOWFLAKE_CONNECTOR_ROLE_FIELD = "role"
46
+ SNOWFLAKE_CONNECTOR_AUTHENTICATOR_FIELD = "authenticator"
47
+ SNOWFLAKE_CONNECTOR_PRIVATE_KEY_FIELD = "private_key"
48
+ SNOWFLAKE_CONNECTOR_PRIVATE_KEY_PASSPHRASE_FIELD = "private_key_passphrase"
49
+
41
50
 
42
51
  # dataset (connector type dependent) constants
43
52
  SHIELD_DATASET_TASK_ID_FIELD = "task_id"
@@ -122,6 +122,20 @@ class BaseAggregationParameterSchema(BaseModel):
122
122
  description="Description of the parameter.",
123
123
  )
124
124
 
125
+ @field_validator("parameter_key")
126
+ @classmethod
127
+ def validate_parameter_key_allowed_characters(cls, v: str) -> str:
128
+ if not v.replace("_", "").isalpha():
129
+ raise ValueError("Parameter key can only contain letters and underscores.")
130
+ return v
131
+
132
+ @field_validator("friendly_name")
133
+ @classmethod
134
+ def validate_friendly_name_allowed_characters(cls, v: str) -> str:
135
+ if not v.replace("_", "").replace(" ", "").isalpha():
136
+ raise ValueError("Friendly name can only contain letters and underscores.")
137
+ return v
138
+
125
139
 
126
140
  class MetricsParameterSchema(BaseAggregationParameterSchema):
127
141
  # specific to default metrics/Python metrics—not available to custom aggregations
@@ -195,7 +209,7 @@ class MetricsColumnParameterSchema(MetricsParameterSchema, BaseColumnParameterSc
195
209
 
196
210
  class MetricsColumnListParameterSchema(
197
211
  MetricsParameterSchema,
198
- BaseColumnParameterSchema,
212
+ BaseColumnBaseParameterSchema,
199
213
  ):
200
214
  # list column parameter schema specific to default metrics
201
215
  parameter_type: Literal["column_list"] = "column_list"
@@ -298,7 +312,7 @@ class ReportedCustomAggregation(BaseReportedAggregation):
298
312
 
299
313
  @field_validator("dimension_columns")
300
314
  @classmethod
301
- def validate_dimension_columns_length(cls, v: list[str]) -> str:
315
+ def validate_dimension_columns_length(cls, v: list[str]) -> list[str]:
302
316
  if len(v) > 1:
303
317
  raise ValueError("Only one dimension column can be specified.")
304
318
  return v
@@ -32,7 +32,7 @@ class MetricType(str, Enum):
32
32
  RESPONSE_RELEVANCE = "ResponseRelevance"
33
33
  TOOL_SELECTION = "ToolSelection"
34
34
 
35
- def __str__(self):
35
+ def __str__(self) -> str:
36
36
  return self.value
37
37
 
38
38
 
@@ -575,20 +575,20 @@ class NewMetricRequest(BaseModel):
575
575
  },
576
576
  )
577
577
 
578
- @field_validator("type")
579
- def validate_metric_type(cls, value):
580
- if value not in MetricType:
581
- raise ValueError(
582
- f"Invalid metric type: {value}. Valid types are: {', '.join([t.value for t in MetricType])}",
583
- )
584
- return value
585
-
586
578
  @model_validator(mode="before")
587
- def set_config_type(cls, values):
579
+ def set_config_type(cls, values: dict[str, Any] | None) -> dict[str, Any] | None:
588
580
  if not isinstance(values, dict):
589
581
  return values
590
582
 
591
- metric_type = values.get("type")
583
+ try:
584
+ metric_type = MetricType(values.get("type", "empty_value"))
585
+ except ValueError:
586
+ raise HTTPException(
587
+ status_code=400,
588
+ detail=f"Invalid metric type: {values.get('type', 'empty_value')}. Must be one of {[t.value for t in MetricType]}",
589
+ headers={"full_stacktrace": "false"},
590
+ )
591
+
592
592
  config_values = values.get("config")
593
593
 
594
594
  # Map metric types to their corresponding config classes