great-expectations-cloud 20240523.0.dev0__py3-none-any.whl → 20251124.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. great_expectations_cloud/agent/__init__.py +3 -0
  2. great_expectations_cloud/agent/actions/__init__.py +8 -5
  3. great_expectations_cloud/agent/actions/agent_action.py +21 -6
  4. great_expectations_cloud/agent/actions/draft_datasource_config_action.py +45 -24
  5. great_expectations_cloud/agent/actions/generate_data_quality_check_expectations_action.py +557 -0
  6. great_expectations_cloud/agent/actions/list_asset_names.py +65 -0
  7. great_expectations_cloud/agent/actions/run_checkpoint.py +74 -27
  8. great_expectations_cloud/agent/actions/run_metric_list_action.py +11 -5
  9. great_expectations_cloud/agent/actions/run_scheduled_checkpoint.py +67 -0
  10. great_expectations_cloud/agent/actions/run_window_checkpoint.py +66 -0
  11. great_expectations_cloud/agent/actions/utils.py +35 -0
  12. great_expectations_cloud/agent/agent.py +444 -101
  13. great_expectations_cloud/agent/cli.py +2 -2
  14. great_expectations_cloud/agent/config.py +19 -5
  15. great_expectations_cloud/agent/event_handler.py +49 -12
  16. great_expectations_cloud/agent/exceptions.py +9 -0
  17. great_expectations_cloud/agent/message_service/asyncio_rabbit_mq_client.py +80 -14
  18. great_expectations_cloud/agent/message_service/subscriber.py +8 -5
  19. great_expectations_cloud/agent/models.py +197 -20
  20. great_expectations_cloud/agent/utils.py +84 -0
  21. great_expectations_cloud/logging/logging_cfg.py +20 -4
  22. great_expectations_cloud/py.typed +0 -0
  23. {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info}/METADATA +54 -46
  24. great_expectations_cloud-20251124.0.dev1.dist-info/RECORD +34 -0
  25. {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info}/WHEEL +1 -1
  26. great_expectations_cloud/agent/actions/data_assistants/__init__.py +0 -8
  27. great_expectations_cloud/agent/actions/data_assistants/run_missingness_data_assistant.py +0 -45
  28. great_expectations_cloud/agent/actions/data_assistants/run_onboarding_data_assistant.py +0 -45
  29. great_expectations_cloud/agent/actions/data_assistants/utils.py +0 -123
  30. great_expectations_cloud/agent/actions/list_table_names.py +0 -76
  31. great_expectations_cloud-20240523.0.dev0.dist-info/RECORD +0 -32
  32. {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info}/entry_points.txt +0 -0
  33. {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info/licenses}/LICENSE +0 -0
@@ -1,5 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING, Any
5
+
3
6
  from typing_extensions import override
4
7
 
5
8
  from great_expectations_cloud.agent.actions.agent_action import (
@@ -10,42 +13,86 @@ from great_expectations_cloud.agent.event_handler import register_event_action
10
13
  from great_expectations_cloud.agent.models import (
11
14
  CreatedResource,
12
15
  RunCheckpointEvent,
16
+ RunScheduledCheckpointEvent,
17
+ RunWindowCheckpointEvent,
13
18
  )
14
19
 
20
+ if TYPE_CHECKING:
21
+ from great_expectations.data_context import CloudDataContext
22
+ from great_expectations.datasource.fluent.interfaces import DataAsset, Datasource
23
+
15
24
 
16
25
  class RunCheckpointAction(AgentAction[RunCheckpointEvent]):
17
26
  @override
18
27
  def run(self, event: RunCheckpointEvent, id: str) -> ActionResult:
19
- # TODO: move connection testing into OSS; there isn't really a reason it can't be done there
20
- for datasource_name, data_asset_names in event.datasource_names_to_asset_names.items():
21
- datasource = self._context.get_datasource(datasource_name)
22
- datasource.test_connection(test_assets=False) # raises `TestConnectionError` on failure
23
- for data_asset_name in (
24
- data_asset_names
25
- ): # only test connection for assets that are validated in checkpoint
26
- asset = datasource.get_asset(data_asset_name)
27
- asset.test_connection() # raises `TestConnectionError` on failure
28
- checkpoint_run_result = self._context.run_checkpoint(
29
- ge_cloud_id=event.checkpoint_id,
30
- batch_request={"options": event.splitter_options} if event.splitter_options else None,
31
- )
28
+ return run_checkpoint(self._context, event, id)
29
+
30
+
31
+ class MissingCheckpointNameError(ValueError):
32
+ """Property checkpoint_name is required but not present."""
33
+
34
+
35
+ # using a dataclass because we don't want the pydantic behavior of copying objects
36
+ @dataclass
37
+ class DataSourceAssets:
38
+ data_source: Datasource[Any, Any]
39
+ assets_by_name: dict[str, DataAsset[Any, Any]]
32
40
 
33
- validation_results = checkpoint_run_result.run_results
34
- created_resources = []
35
- for key in validation_results.keys():
36
- created_resource = CreatedResource(
37
- resource_id=validation_results[key]["actions_results"]["store_validation_result"][
38
- "id"
39
- ],
40
- type="SuiteValidationResult",
41
+
42
+ def run_checkpoint(
43
+ context: CloudDataContext,
44
+ event: RunCheckpointEvent | RunScheduledCheckpointEvent | RunWindowCheckpointEvent,
45
+ id: str,
46
+ expectation_parameters: dict[str, Any] | None = None,
47
+ ) -> ActionResult:
48
+ # the checkpoint_name property on possible events is optional for backwards compatibility,
49
+ # but this action requires it in order to run:
50
+ if not event.checkpoint_name:
51
+ raise MissingCheckpointNameError
52
+
53
+ checkpoint = context.checkpoints.get(name=event.checkpoint_name)
54
+
55
+ # only GX-managed Checkpoints are currently validated here and they contain only one validation definition, but
56
+ # the Checkpoint does allow for multiple validation definitions so we'll be defensive and ensure we only test each
57
+ # source/asset once
58
+ data_sources_assets_by_data_source_name: dict[str, DataSourceAssets] = {}
59
+ for vd in checkpoint.validation_definitions:
60
+ ds = vd.data_source
61
+ ds_name = ds.name
62
+ # create assets by name dict
63
+ if ds_name not in data_sources_assets_by_data_source_name:
64
+ data_sources_assets_by_data_source_name[ds_name] = DataSourceAssets(
65
+ data_source=ds, assets_by_name={}
41
66
  )
42
- created_resources.append(created_resource)
67
+ data_sources_assets_by_data_source_name[ds_name].assets_by_name[vd.asset.name] = vd.asset
43
68
 
44
- return ActionResult(
45
- id=id,
46
- type=event.type,
47
- created_resources=created_resources,
69
+ for data_sources_assets in data_sources_assets_by_data_source_name.values():
70
+ data_source = data_sources_assets.data_source
71
+ data_source.test_connection(test_assets=False) # raises `TestConnectionError` on failure
72
+ for data_asset in data_sources_assets.assets_by_name.values():
73
+ data_asset.test_connection() # raises `TestConnectionError` on failure
74
+
75
+ checkpoint_run_result = checkpoint.run(
76
+ batch_parameters=event.splitter_options, expectation_parameters=expectation_parameters
77
+ )
78
+
79
+ validation_results = checkpoint_run_result.run_results
80
+ created_resources = []
81
+ for key in validation_results.keys():
82
+ suite_validation_result = validation_results[key]
83
+ if suite_validation_result.id is None:
84
+ raise RuntimeError(f"SuiteValidationResult.id is None for key: {key}") # noqa: TRY003
85
+ created_resource = CreatedResource(
86
+ resource_id=suite_validation_result.id,
87
+ type="SuiteValidationResult",
48
88
  )
89
+ created_resources.append(created_resource)
90
+
91
+ return ActionResult(
92
+ id=id,
93
+ type=event.type,
94
+ created_resources=created_resources,
95
+ )
49
96
 
50
97
 
51
- register_event_action("0", RunCheckpointEvent, RunCheckpointAction)
98
+ register_event_action("1", RunCheckpointEvent, RunCheckpointAction)
@@ -20,6 +20,7 @@ from great_expectations_cloud.agent.actions import ActionResult, AgentAction
20
20
  from great_expectations_cloud.agent.event_handler import register_event_action
21
21
  from great_expectations_cloud.agent.models import (
22
22
  CreatedResource,
23
+ DomainContext,
23
24
  RunMetricsListEvent,
24
25
  )
25
26
 
@@ -29,13 +30,18 @@ if TYPE_CHECKING:
29
30
 
30
31
 
31
32
  class MetricListAction(AgentAction[RunMetricsListEvent]):
32
- def __init__(
33
+ def __init__( # noqa: PLR0913 # Refactor opportunity
33
34
  self,
34
35
  context: CloudDataContext,
36
+ base_url: str,
37
+ domain_context: DomainContext,
38
+ auth_key: str,
35
39
  metric_repository: MetricRepository | None = None,
36
40
  batch_inspector: BatchInspector | None = None,
37
41
  ):
38
- super().__init__(context=context)
42
+ super().__init__(
43
+ context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
44
+ )
39
45
  self._metric_repository = metric_repository or MetricRepository(
40
46
  data_store=CloudDataStore(self._context)
41
47
  )
@@ -45,7 +51,7 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
45
51
 
46
52
  @override
47
53
  def run(self, event: RunMetricsListEvent, id: str) -> ActionResult:
48
- datasource = self._context.get_datasource(event.datasource_name)
54
+ datasource = self._context.data_sources.get(event.datasource_name)
49
55
  data_asset = datasource.get_asset(event.data_asset_name)
50
56
  data_asset.test_connection() # raises `TestConnectionError` on failure
51
57
 
@@ -54,7 +60,7 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
54
60
  metric_run = self._batch_inspector.compute_metric_list_run(
55
61
  data_asset_id=data_asset.id,
56
62
  batch_request=batch_request,
57
- metric_list=event.metric_names,
63
+ metric_list=list(event.metric_names),
58
64
  )
59
65
 
60
66
  metric_run_id = self._metric_repository.add_metric_run(metric_run)
@@ -78,4 +84,4 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
78
84
  )
79
85
 
80
86
 
81
- register_event_action("0", RunMetricsListEvent, MetricListAction)
87
+ register_event_action("1", RunMetricsListEvent, MetricListAction)
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from urllib.parse import urljoin
5
+
6
+ from great_expectations.core.http import create_session
7
+ from great_expectations.exceptions import GXCloudError
8
+ from typing_extensions import override
9
+
10
+ from great_expectations_cloud.agent.actions.agent_action import (
11
+ ActionResult,
12
+ AgentAction,
13
+ )
14
+ from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
15
+ from great_expectations_cloud.agent.event_handler import register_event_action
16
+ from great_expectations_cloud.agent.models import RunScheduledCheckpointEvent
17
+
18
+ if TYPE_CHECKING:
19
+ from great_expectations.data_context import CloudDataContext
20
+
21
+
22
+ class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
23
+ @override
24
+ def run(self, event: RunScheduledCheckpointEvent, id: str) -> ActionResult:
25
+ expectation_parameters_url = urljoin(
26
+ base=self._base_url,
27
+ url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
28
+ )
29
+ return run_scheduled_checkpoint(
30
+ context=self._context,
31
+ event=event,
32
+ id=id,
33
+ auth_key=self._auth_key,
34
+ url=expectation_parameters_url,
35
+ )
36
+
37
+
38
+ def run_scheduled_checkpoint(
39
+ context: CloudDataContext, event: RunScheduledCheckpointEvent, id: str, auth_key: str, url: str
40
+ ) -> ActionResult:
41
+ with create_session(access_token=auth_key) as session:
42
+ response = session.get(url=url)
43
+
44
+ if not response.ok:
45
+ raise GXCloudError(
46
+ message=f"RunScheduledCheckpointAction encountered an error while connecting to GX Cloud. "
47
+ f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
48
+ response=response,
49
+ )
50
+ data = response.json()
51
+
52
+ try:
53
+ expectation_parameters = (
54
+ data["data"]["expectation_parameters"]
55
+ if len(data["data"]["expectation_parameters"]) > 0
56
+ else None
57
+ )
58
+ except KeyError as e:
59
+ raise GXCloudError(
60
+ message="Malformed response received from GX Cloud",
61
+ response=response,
62
+ ) from e
63
+
64
+ return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
65
+
66
+
67
+ register_event_action("1", RunScheduledCheckpointEvent, RunScheduledCheckpointAction)
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from urllib.parse import urljoin
5
+
6
+ from great_expectations.core.http import create_session
7
+ from great_expectations.exceptions import GXCloudError
8
+ from typing_extensions import override
9
+
10
+ from great_expectations_cloud.agent.actions.agent_action import (
11
+ ActionResult,
12
+ AgentAction,
13
+ )
14
+ from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
15
+ from great_expectations_cloud.agent.event_handler import register_event_action
16
+ from great_expectations_cloud.agent.models import RunWindowCheckpointEvent
17
+
18
+ if TYPE_CHECKING:
19
+ from great_expectations.data_context import CloudDataContext
20
+
21
+
22
+ class RunWindowCheckpointAction(AgentAction[RunWindowCheckpointEvent]):
23
+ @override
24
+ def run(self, event: RunWindowCheckpointEvent, id: str) -> ActionResult:
25
+ expectation_parameters_url = urljoin(
26
+ base=self._base_url,
27
+ url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
28
+ )
29
+ return run_window_checkpoint(
30
+ self._context,
31
+ event,
32
+ id,
33
+ auth_key=self._auth_key,
34
+ url=expectation_parameters_url,
35
+ )
36
+
37
+
38
+ register_event_action("1", RunWindowCheckpointEvent, RunWindowCheckpointAction)
39
+
40
+
41
+ def run_window_checkpoint(
42
+ context: CloudDataContext,
43
+ event: RunWindowCheckpointEvent,
44
+ id: str,
45
+ auth_key: str,
46
+ url: str,
47
+ ) -> ActionResult:
48
+ with create_session(access_token=auth_key) as session:
49
+ response = session.get(url=url)
50
+
51
+ if not response.ok:
52
+ raise GXCloudError(
53
+ message=f"RunWindowCheckpointAction encountered an error while connecting to GX Cloud. "
54
+ f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
55
+ response=response,
56
+ )
57
+ data = response.json()
58
+ try:
59
+ expectation_parameters = data["data"]["expectation_parameters"]
60
+ except KeyError as e:
61
+ raise GXCloudError(
62
+ message="Malformed response received from GX Cloud",
63
+ response=response,
64
+ ) from e
65
+
66
+ return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from great_expectations.datasource.fluent import SnowflakeDatasource, SQLDatasource
6
+ from sqlalchemy import inspect
7
+
8
+ if TYPE_CHECKING:
9
+ from sqlalchemy.engine import Inspector
10
+ from sqlalchemy.sql.compiler import IdentifierPreparer
11
+
12
+
13
+ def get_asset_names(datasource: SQLDatasource) -> list[str]:
14
+ inspector: Inspector = inspect(datasource.get_engine())
15
+ identifier_preparer: IdentifierPreparer = inspector.dialect.identifier_preparer
16
+
17
+ if isinstance(datasource, SnowflakeDatasource) and datasource.schema_:
18
+ # Snowflake-SQLAlchemy uses the default_schema if no schema is provided to get_table_names
19
+ # Or if the role does not have access to the schema (it silently fails and defaults to using default_schema)
20
+ # See https://github.com/snowflakedb/snowflake-sqlalchemy/blob/e78319725d4b96ea205ef1264b744c65eb37853d/src/snowflake/sqlalchemy/snowdialect.py#L731
21
+ # Explicitly passing the schema to the inspector to get the table and view names
22
+ # Also converting to list to ensure JSON serializable
23
+ tables = list(inspector.get_table_names(schema=datasource.schema_))
24
+ views = list(inspector.get_view_names(schema=datasource.schema_))
25
+ asset_names = tables + views
26
+ else:
27
+ tables = list(inspector.get_table_names())
28
+ views = list(inspector.get_view_names())
29
+ asset_names = tables + views
30
+
31
+ # the identifier preparer adds quotes when they are necessary
32
+ quoted_asset_names: list[str] = [
33
+ identifier_preparer.quote(asset_name) for asset_name in asset_names
34
+ ]
35
+ return quoted_asset_names