great-expectations-cloud 20240523.0.dev0__py3-none-any.whl → 20251124.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- great_expectations_cloud/agent/__init__.py +3 -0
- great_expectations_cloud/agent/actions/__init__.py +8 -5
- great_expectations_cloud/agent/actions/agent_action.py +21 -6
- great_expectations_cloud/agent/actions/draft_datasource_config_action.py +45 -24
- great_expectations_cloud/agent/actions/generate_data_quality_check_expectations_action.py +557 -0
- great_expectations_cloud/agent/actions/list_asset_names.py +65 -0
- great_expectations_cloud/agent/actions/run_checkpoint.py +74 -27
- great_expectations_cloud/agent/actions/run_metric_list_action.py +11 -5
- great_expectations_cloud/agent/actions/run_scheduled_checkpoint.py +67 -0
- great_expectations_cloud/agent/actions/run_window_checkpoint.py +66 -0
- great_expectations_cloud/agent/actions/utils.py +35 -0
- great_expectations_cloud/agent/agent.py +444 -101
- great_expectations_cloud/agent/cli.py +2 -2
- great_expectations_cloud/agent/config.py +19 -5
- great_expectations_cloud/agent/event_handler.py +49 -12
- great_expectations_cloud/agent/exceptions.py +9 -0
- great_expectations_cloud/agent/message_service/asyncio_rabbit_mq_client.py +80 -14
- great_expectations_cloud/agent/message_service/subscriber.py +8 -5
- great_expectations_cloud/agent/models.py +197 -20
- great_expectations_cloud/agent/utils.py +84 -0
- great_expectations_cloud/logging/logging_cfg.py +20 -4
- great_expectations_cloud/py.typed +0 -0
- {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info}/METADATA +54 -46
- great_expectations_cloud-20251124.0.dev1.dist-info/RECORD +34 -0
- {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info}/WHEEL +1 -1
- great_expectations_cloud/agent/actions/data_assistants/__init__.py +0 -8
- great_expectations_cloud/agent/actions/data_assistants/run_missingness_data_assistant.py +0 -45
- great_expectations_cloud/agent/actions/data_assistants/run_onboarding_data_assistant.py +0 -45
- great_expectations_cloud/agent/actions/data_assistants/utils.py +0 -123
- great_expectations_cloud/agent/actions/list_table_names.py +0 -76
- great_expectations_cloud-20240523.0.dev0.dist-info/RECORD +0 -32
- {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info}/entry_points.txt +0 -0
- {great_expectations_cloud-20240523.0.dev0.dist-info → great_expectations_cloud-20251124.0.dev1.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
3
6
|
from typing_extensions import override
|
|
4
7
|
|
|
5
8
|
from great_expectations_cloud.agent.actions.agent_action import (
|
|
@@ -10,42 +13,86 @@ from great_expectations_cloud.agent.event_handler import register_event_action
|
|
|
10
13
|
from great_expectations_cloud.agent.models import (
|
|
11
14
|
CreatedResource,
|
|
12
15
|
RunCheckpointEvent,
|
|
16
|
+
RunScheduledCheckpointEvent,
|
|
17
|
+
RunWindowCheckpointEvent,
|
|
13
18
|
)
|
|
14
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from great_expectations.data_context import CloudDataContext
|
|
22
|
+
from great_expectations.datasource.fluent.interfaces import DataAsset, Datasource
|
|
23
|
+
|
|
15
24
|
|
|
16
25
|
class RunCheckpointAction(AgentAction[RunCheckpointEvent]):
|
|
17
26
|
@override
|
|
18
27
|
def run(self, event: RunCheckpointEvent, id: str) -> ActionResult:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
)
|
|
28
|
+
return run_checkpoint(self._context, event, id)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MissingCheckpointNameError(ValueError):
|
|
32
|
+
"""Property checkpoint_name is required but not present."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# using a dataclass because we don't want the pydantic behavior of copying objects
|
|
36
|
+
@dataclass
|
|
37
|
+
class DataSourceAssets:
|
|
38
|
+
data_source: Datasource[Any, Any]
|
|
39
|
+
assets_by_name: dict[str, DataAsset[Any, Any]]
|
|
32
40
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
+
|
|
42
|
+
def run_checkpoint(
|
|
43
|
+
context: CloudDataContext,
|
|
44
|
+
event: RunCheckpointEvent | RunScheduledCheckpointEvent | RunWindowCheckpointEvent,
|
|
45
|
+
id: str,
|
|
46
|
+
expectation_parameters: dict[str, Any] | None = None,
|
|
47
|
+
) -> ActionResult:
|
|
48
|
+
# the checkpoint_name property on possible events is optional for backwards compatibility,
|
|
49
|
+
# but this action requires it in order to run:
|
|
50
|
+
if not event.checkpoint_name:
|
|
51
|
+
raise MissingCheckpointNameError
|
|
52
|
+
|
|
53
|
+
checkpoint = context.checkpoints.get(name=event.checkpoint_name)
|
|
54
|
+
|
|
55
|
+
# only GX-managed Checkpoints are currently validated here and they contain only one validation definition, but
|
|
56
|
+
# the Checkpoint does allow for multiple validation definitions so we'll be defensive and ensure we only test each
|
|
57
|
+
# source/asset once
|
|
58
|
+
data_sources_assets_by_data_source_name: dict[str, DataSourceAssets] = {}
|
|
59
|
+
for vd in checkpoint.validation_definitions:
|
|
60
|
+
ds = vd.data_source
|
|
61
|
+
ds_name = ds.name
|
|
62
|
+
# create assets by name dict
|
|
63
|
+
if ds_name not in data_sources_assets_by_data_source_name:
|
|
64
|
+
data_sources_assets_by_data_source_name[ds_name] = DataSourceAssets(
|
|
65
|
+
data_source=ds, assets_by_name={}
|
|
41
66
|
)
|
|
42
|
-
|
|
67
|
+
data_sources_assets_by_data_source_name[ds_name].assets_by_name[vd.asset.name] = vd.asset
|
|
43
68
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
69
|
+
for data_sources_assets in data_sources_assets_by_data_source_name.values():
|
|
70
|
+
data_source = data_sources_assets.data_source
|
|
71
|
+
data_source.test_connection(test_assets=False) # raises `TestConnectionError` on failure
|
|
72
|
+
for data_asset in data_sources_assets.assets_by_name.values():
|
|
73
|
+
data_asset.test_connection() # raises `TestConnectionError` on failure
|
|
74
|
+
|
|
75
|
+
checkpoint_run_result = checkpoint.run(
|
|
76
|
+
batch_parameters=event.splitter_options, expectation_parameters=expectation_parameters
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
validation_results = checkpoint_run_result.run_results
|
|
80
|
+
created_resources = []
|
|
81
|
+
for key in validation_results.keys():
|
|
82
|
+
suite_validation_result = validation_results[key]
|
|
83
|
+
if suite_validation_result.id is None:
|
|
84
|
+
raise RuntimeError(f"SuiteValidationResult.id is None for key: {key}") # noqa: TRY003
|
|
85
|
+
created_resource = CreatedResource(
|
|
86
|
+
resource_id=suite_validation_result.id,
|
|
87
|
+
type="SuiteValidationResult",
|
|
48
88
|
)
|
|
89
|
+
created_resources.append(created_resource)
|
|
90
|
+
|
|
91
|
+
return ActionResult(
|
|
92
|
+
id=id,
|
|
93
|
+
type=event.type,
|
|
94
|
+
created_resources=created_resources,
|
|
95
|
+
)
|
|
49
96
|
|
|
50
97
|
|
|
51
|
-
register_event_action("
|
|
98
|
+
register_event_action("1", RunCheckpointEvent, RunCheckpointAction)
|
|
@@ -20,6 +20,7 @@ from great_expectations_cloud.agent.actions import ActionResult, AgentAction
|
|
|
20
20
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
21
21
|
from great_expectations_cloud.agent.models import (
|
|
22
22
|
CreatedResource,
|
|
23
|
+
DomainContext,
|
|
23
24
|
RunMetricsListEvent,
|
|
24
25
|
)
|
|
25
26
|
|
|
@@ -29,13 +30,18 @@ if TYPE_CHECKING:
|
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
32
|
-
def __init__(
|
|
33
|
+
def __init__( # noqa: PLR0913 # Refactor opportunity
|
|
33
34
|
self,
|
|
34
35
|
context: CloudDataContext,
|
|
36
|
+
base_url: str,
|
|
37
|
+
domain_context: DomainContext,
|
|
38
|
+
auth_key: str,
|
|
35
39
|
metric_repository: MetricRepository | None = None,
|
|
36
40
|
batch_inspector: BatchInspector | None = None,
|
|
37
41
|
):
|
|
38
|
-
super().__init__(
|
|
42
|
+
super().__init__(
|
|
43
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
44
|
+
)
|
|
39
45
|
self._metric_repository = metric_repository or MetricRepository(
|
|
40
46
|
data_store=CloudDataStore(self._context)
|
|
41
47
|
)
|
|
@@ -45,7 +51,7 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
|
45
51
|
|
|
46
52
|
@override
|
|
47
53
|
def run(self, event: RunMetricsListEvent, id: str) -> ActionResult:
|
|
48
|
-
datasource = self._context.
|
|
54
|
+
datasource = self._context.data_sources.get(event.datasource_name)
|
|
49
55
|
data_asset = datasource.get_asset(event.data_asset_name)
|
|
50
56
|
data_asset.test_connection() # raises `TestConnectionError` on failure
|
|
51
57
|
|
|
@@ -54,7 +60,7 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
|
54
60
|
metric_run = self._batch_inspector.compute_metric_list_run(
|
|
55
61
|
data_asset_id=data_asset.id,
|
|
56
62
|
batch_request=batch_request,
|
|
57
|
-
metric_list=event.metric_names,
|
|
63
|
+
metric_list=list(event.metric_names),
|
|
58
64
|
)
|
|
59
65
|
|
|
60
66
|
metric_run_id = self._metric_repository.add_metric_run(metric_run)
|
|
@@ -78,4 +84,4 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
|
78
84
|
)
|
|
79
85
|
|
|
80
86
|
|
|
81
|
-
register_event_action("
|
|
87
|
+
register_event_action("1", RunMetricsListEvent, MetricListAction)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
from great_expectations.core.http import create_session
|
|
7
|
+
from great_expectations.exceptions import GXCloudError
|
|
8
|
+
from typing_extensions import override
|
|
9
|
+
|
|
10
|
+
from great_expectations_cloud.agent.actions.agent_action import (
|
|
11
|
+
ActionResult,
|
|
12
|
+
AgentAction,
|
|
13
|
+
)
|
|
14
|
+
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
|
+
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
+
from great_expectations_cloud.agent.models import RunScheduledCheckpointEvent
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from great_expectations.data_context import CloudDataContext
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
23
|
+
@override
|
|
24
|
+
def run(self, event: RunScheduledCheckpointEvent, id: str) -> ActionResult:
|
|
25
|
+
expectation_parameters_url = urljoin(
|
|
26
|
+
base=self._base_url,
|
|
27
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
28
|
+
)
|
|
29
|
+
return run_scheduled_checkpoint(
|
|
30
|
+
context=self._context,
|
|
31
|
+
event=event,
|
|
32
|
+
id=id,
|
|
33
|
+
auth_key=self._auth_key,
|
|
34
|
+
url=expectation_parameters_url,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def run_scheduled_checkpoint(
|
|
39
|
+
context: CloudDataContext, event: RunScheduledCheckpointEvent, id: str, auth_key: str, url: str
|
|
40
|
+
) -> ActionResult:
|
|
41
|
+
with create_session(access_token=auth_key) as session:
|
|
42
|
+
response = session.get(url=url)
|
|
43
|
+
|
|
44
|
+
if not response.ok:
|
|
45
|
+
raise GXCloudError(
|
|
46
|
+
message=f"RunScheduledCheckpointAction encountered an error while connecting to GX Cloud. "
|
|
47
|
+
f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
|
|
48
|
+
response=response,
|
|
49
|
+
)
|
|
50
|
+
data = response.json()
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
expectation_parameters = (
|
|
54
|
+
data["data"]["expectation_parameters"]
|
|
55
|
+
if len(data["data"]["expectation_parameters"]) > 0
|
|
56
|
+
else None
|
|
57
|
+
)
|
|
58
|
+
except KeyError as e:
|
|
59
|
+
raise GXCloudError(
|
|
60
|
+
message="Malformed response received from GX Cloud",
|
|
61
|
+
response=response,
|
|
62
|
+
) from e
|
|
63
|
+
|
|
64
|
+
return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
register_event_action("1", RunScheduledCheckpointEvent, RunScheduledCheckpointAction)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
|
|
6
|
+
from great_expectations.core.http import create_session
|
|
7
|
+
from great_expectations.exceptions import GXCloudError
|
|
8
|
+
from typing_extensions import override
|
|
9
|
+
|
|
10
|
+
from great_expectations_cloud.agent.actions.agent_action import (
|
|
11
|
+
ActionResult,
|
|
12
|
+
AgentAction,
|
|
13
|
+
)
|
|
14
|
+
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
|
+
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
+
from great_expectations_cloud.agent.models import RunWindowCheckpointEvent
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from great_expectations.data_context import CloudDataContext
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RunWindowCheckpointAction(AgentAction[RunWindowCheckpointEvent]):
|
|
23
|
+
@override
|
|
24
|
+
def run(self, event: RunWindowCheckpointEvent, id: str) -> ActionResult:
|
|
25
|
+
expectation_parameters_url = urljoin(
|
|
26
|
+
base=self._base_url,
|
|
27
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
28
|
+
)
|
|
29
|
+
return run_window_checkpoint(
|
|
30
|
+
self._context,
|
|
31
|
+
event,
|
|
32
|
+
id,
|
|
33
|
+
auth_key=self._auth_key,
|
|
34
|
+
url=expectation_parameters_url,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
register_event_action("1", RunWindowCheckpointEvent, RunWindowCheckpointAction)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def run_window_checkpoint(
|
|
42
|
+
context: CloudDataContext,
|
|
43
|
+
event: RunWindowCheckpointEvent,
|
|
44
|
+
id: str,
|
|
45
|
+
auth_key: str,
|
|
46
|
+
url: str,
|
|
47
|
+
) -> ActionResult:
|
|
48
|
+
with create_session(access_token=auth_key) as session:
|
|
49
|
+
response = session.get(url=url)
|
|
50
|
+
|
|
51
|
+
if not response.ok:
|
|
52
|
+
raise GXCloudError(
|
|
53
|
+
message=f"RunWindowCheckpointAction encountered an error while connecting to GX Cloud. "
|
|
54
|
+
f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
|
|
55
|
+
response=response,
|
|
56
|
+
)
|
|
57
|
+
data = response.json()
|
|
58
|
+
try:
|
|
59
|
+
expectation_parameters = data["data"]["expectation_parameters"]
|
|
60
|
+
except KeyError as e:
|
|
61
|
+
raise GXCloudError(
|
|
62
|
+
message="Malformed response received from GX Cloud",
|
|
63
|
+
response=response,
|
|
64
|
+
) from e
|
|
65
|
+
|
|
66
|
+
return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from great_expectations.datasource.fluent import SnowflakeDatasource, SQLDatasource
|
|
6
|
+
from sqlalchemy import inspect
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from sqlalchemy.engine import Inspector
|
|
10
|
+
from sqlalchemy.sql.compiler import IdentifierPreparer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
14
|
+
inspector: Inspector = inspect(datasource.get_engine())
|
|
15
|
+
identifier_preparer: IdentifierPreparer = inspector.dialect.identifier_preparer
|
|
16
|
+
|
|
17
|
+
if isinstance(datasource, SnowflakeDatasource) and datasource.schema_:
|
|
18
|
+
# Snowflake-SQLAlchemy uses the default_schema if no schema is provided to get_table_names
|
|
19
|
+
# Or if the role does not have access to the schema (it silently fails and defaults to using default_schema)
|
|
20
|
+
# See https://github.com/snowflakedb/snowflake-sqlalchemy/blob/e78319725d4b96ea205ef1264b744c65eb37853d/src/snowflake/sqlalchemy/snowdialect.py#L731
|
|
21
|
+
# Explicitly passing the schema to the inspector to get the table and view names
|
|
22
|
+
# Also converting to list to ensure JSON serializable
|
|
23
|
+
tables = list(inspector.get_table_names(schema=datasource.schema_))
|
|
24
|
+
views = list(inspector.get_view_names(schema=datasource.schema_))
|
|
25
|
+
asset_names = tables + views
|
|
26
|
+
else:
|
|
27
|
+
tables = list(inspector.get_table_names())
|
|
28
|
+
views = list(inspector.get_view_names())
|
|
29
|
+
asset_names = tables + views
|
|
30
|
+
|
|
31
|
+
# the identifier preparer adds quotes when they are necessary
|
|
32
|
+
quoted_asset_names: list[str] = [
|
|
33
|
+
identifier_preparer.quote(asset_name) for asset_name in asset_names
|
|
34
|
+
]
|
|
35
|
+
return quoted_asset_names
|