great-expectations-cloud 20250703.0.dev0__tar.gz → 20260114.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of great-expectations-cloud might be problematic. Click here for more details.
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/PKG-INFO +5 -4
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/README.md +2 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/agent_action.py +3 -3
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/draft_datasource_config_action.py +2 -2
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/generate_data_quality_check_expectations_action.py +67 -23
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/list_asset_names.py +4 -5
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/run_checkpoint.py +64 -3
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/run_metric_list_action.py +3 -3
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/run_scheduled_checkpoint.py +28 -5
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/run_window_checkpoint.py +2 -4
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/utils.py +13 -4
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/agent.py +277 -45
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/event_handler.py +11 -8
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/message_service/asyncio_rabbit_mq_client.py +35 -8
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/message_service/subscriber.py +4 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/models.py +50 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/pyproject.toml +8 -9
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/LICENSE +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/__init__.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/__init__.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/__init__.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/actions/unknown.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/agent_warnings.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/cli.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/config.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/constants.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/exceptions.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/message_service/__init__.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/run.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/agent/utils.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/logging/README.md +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/logging/logging_cfg.py +0 -0
- {great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/great_expectations_cloud/py.typed +0 -0
{great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/PKG-INFO
RENAMED
|
@@ -1,22 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: great_expectations_cloud
|
|
3
|
-
Version:
|
|
3
|
+
Version: 20260114.0.dev0
|
|
4
4
|
Summary: Great Expectations Cloud
|
|
5
5
|
License: Proprietary
|
|
6
6
|
Author: The Great Expectations Team
|
|
7
7
|
Author-email: team@greatexpectations.io
|
|
8
|
-
Requires-Python: >=3.11,<3.12
|
|
8
|
+
Requires-Python: >=3.11.4,<3.12
|
|
9
9
|
Classifier: Development Status :: 3 - Alpha
|
|
10
10
|
Classifier: Intended Audience :: Developers
|
|
11
11
|
Classifier: Intended Audience :: Science/Research
|
|
12
12
|
Classifier: License :: Other/Proprietary License
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Topic :: Scientific/Engineering
|
|
16
15
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
17
16
|
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
17
|
Classifier: Topic :: Software Development :: Testing
|
|
19
|
-
Requires-Dist: great-expectations[databricks,gx-redshift,postgresql,snowflake] (==1.
|
|
18
|
+
Requires-Dist: great-expectations[databricks,gx-redshift,mssql,postgresql,snowflake,trino] (==1.11.0)
|
|
20
19
|
Requires-Dist: orjson (>=3.9.7,<4.0.0,!=3.9.10)
|
|
21
20
|
Requires-Dist: packaging (>=21.3,<26.0)
|
|
22
21
|
Requires-Dist: pika (>=1.3.1,<2.0.0)
|
|
@@ -127,6 +126,8 @@ The dependencies installed in our CI and the Docker build step are determined by
|
|
|
127
126
|
poetry update great_expectations
|
|
128
127
|
```
|
|
129
128
|
|
|
129
|
+
**Note:** If `poetry update` does not find the latest version of `great_expectations`, you can manually update the version in `pyproject.toml`, and then update the lockfile using `poetry lock`.
|
|
130
|
+
|
|
130
131
|
[To resolve and update all dependencies ...](https://python-poetry.org/docs/cli/#lock)
|
|
131
132
|
|
|
132
133
|
```console
|
{great_expectations_cloud-20250703.0.dev0 → great_expectations_cloud-20260114.0.dev0}/README.md
RENAMED
|
@@ -97,6 +97,8 @@ The dependencies installed in our CI and the Docker build step are determined by
|
|
|
97
97
|
poetry update great_expectations
|
|
98
98
|
```
|
|
99
99
|
|
|
100
|
+
**Note:** If `poetry update` does not find the latest version of `great_expectations`, you can manually update the version in `pyproject.toml`, and then update the lockfile using `poetry lock`.
|
|
101
|
+
|
|
100
102
|
[To resolve and update all dependencies ...](https://python-poetry.org/docs/cli/#lock)
|
|
101
103
|
|
|
102
104
|
```console
|
|
@@ -4,7 +4,6 @@ import datetime
|
|
|
4
4
|
from abc import abstractmethod
|
|
5
5
|
from collections.abc import Sequence
|
|
6
6
|
from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
|
|
7
|
-
from uuid import UUID
|
|
8
7
|
|
|
9
8
|
from pydantic.v1 import BaseModel
|
|
10
9
|
|
|
@@ -12,6 +11,7 @@ from great_expectations_cloud.agent.models import (
|
|
|
12
11
|
AgentBaseExtraForbid,
|
|
13
12
|
AgentBaseExtraIgnore,
|
|
14
13
|
CreatedResource,
|
|
14
|
+
DomainContext,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -32,11 +32,11 @@ _EventT = TypeVar("_EventT", bound=Union[AgentBaseExtraForbid, AgentBaseExtraIgn
|
|
|
32
32
|
|
|
33
33
|
class AgentAction(Generic[_EventT]):
|
|
34
34
|
def __init__(
|
|
35
|
-
self, context: CloudDataContext, base_url: str,
|
|
35
|
+
self, context: CloudDataContext, base_url: str, domain_context: DomainContext, auth_key: str
|
|
36
36
|
):
|
|
37
37
|
self._context = context
|
|
38
38
|
self._base_url = base_url
|
|
39
|
-
self.
|
|
39
|
+
self._domain_context = domain_context
|
|
40
40
|
self._auth_key = auth_key
|
|
41
41
|
|
|
42
42
|
@abstractmethod
|
|
@@ -70,7 +70,7 @@ class DraftDatasourceConfigAction(AgentAction[DraftDatasourceConfigEvent]):
|
|
|
70
70
|
with create_session(access_token=self._auth_key) as session:
|
|
71
71
|
url = urljoin(
|
|
72
72
|
base=self._base_url,
|
|
73
|
-
url=f"/api/v1/organizations/{self.
|
|
73
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/draft-table-names/{config_id}",
|
|
74
74
|
)
|
|
75
75
|
response = session.put(
|
|
76
76
|
url=url,
|
|
@@ -87,7 +87,7 @@ class DraftDatasourceConfigAction(AgentAction[DraftDatasourceConfigEvent]):
|
|
|
87
87
|
def get_draft_config(self, config_id: UUID) -> dict[str, Any]:
|
|
88
88
|
resource_url = urljoin(
|
|
89
89
|
base=self._base_url,
|
|
90
|
-
url=f"/api/v1/organizations/{self.
|
|
90
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/draft-datasources/{config_id}",
|
|
91
91
|
)
|
|
92
92
|
with create_session(access_token=self._auth_key) as session:
|
|
93
93
|
response = session.get(resource_url)
|
|
@@ -10,9 +10,13 @@ from uuid import UUID
|
|
|
10
10
|
|
|
11
11
|
import great_expectations.expectations as gx_expectations
|
|
12
12
|
from great_expectations.core.http import create_session
|
|
13
|
-
from great_expectations.exceptions import
|
|
13
|
+
from great_expectations.exceptions import (
|
|
14
|
+
GXCloudError,
|
|
15
|
+
InvalidExpectationConfigurationError,
|
|
16
|
+
)
|
|
14
17
|
from great_expectations.expectations.metadata_types import (
|
|
15
18
|
DataQualityIssues,
|
|
19
|
+
FailureSeverity,
|
|
16
20
|
)
|
|
17
21
|
from great_expectations.expectations.window import Offset, Window
|
|
18
22
|
from great_expectations.experimental.metric_repository.batch_inspector import (
|
|
@@ -39,6 +43,7 @@ from great_expectations_cloud.agent.event_handler import register_event_action
|
|
|
39
43
|
from great_expectations_cloud.agent.exceptions import GXAgentError
|
|
40
44
|
from great_expectations_cloud.agent.models import (
|
|
41
45
|
CreatedResource,
|
|
46
|
+
DomainContext,
|
|
42
47
|
GenerateDataQualityCheckExpectationsEvent,
|
|
43
48
|
)
|
|
44
49
|
from great_expectations_cloud.agent.utils import (
|
|
@@ -48,9 +53,7 @@ from great_expectations_cloud.agent.utils import (
|
|
|
48
53
|
)
|
|
49
54
|
|
|
50
55
|
if TYPE_CHECKING:
|
|
51
|
-
from great_expectations.core.suite_parameters import
|
|
52
|
-
SuiteParameterDict,
|
|
53
|
-
)
|
|
56
|
+
from great_expectations.core.suite_parameters import SuiteParameterDict
|
|
54
57
|
from great_expectations.data_context import CloudDataContext
|
|
55
58
|
from great_expectations.datasource.fluent import DataAsset
|
|
56
59
|
|
|
@@ -81,13 +84,13 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
81
84
|
self,
|
|
82
85
|
context: CloudDataContext,
|
|
83
86
|
base_url: str,
|
|
84
|
-
|
|
87
|
+
domain_context: DomainContext,
|
|
85
88
|
auth_key: str,
|
|
86
89
|
metric_repository: MetricRepository | None = None,
|
|
87
90
|
batch_inspector: BatchInspector | None = None,
|
|
88
91
|
):
|
|
89
92
|
super().__init__(
|
|
90
|
-
context=context, base_url=base_url,
|
|
93
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
91
94
|
)
|
|
92
95
|
self._metric_repository = metric_repository or MetricRepository(
|
|
93
96
|
data_store=CloudDataStore(self._context)
|
|
@@ -101,6 +104,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
101
104
|
created_resources: list[CreatedResource] = []
|
|
102
105
|
assets_with_errors: list[str] = []
|
|
103
106
|
selected_dqis: Sequence[DataQualityIssues] = event.selected_data_quality_issues or []
|
|
107
|
+
created_via: str | None = event.created_via or None
|
|
104
108
|
for asset_name in event.data_assets:
|
|
105
109
|
try:
|
|
106
110
|
data_asset = self._retrieve_asset_from_asset_name(event, asset_name)
|
|
@@ -122,6 +126,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
122
126
|
volume_change_expectation_id = self._add_volume_change_expectation(
|
|
123
127
|
asset_id=data_asset.id,
|
|
124
128
|
use_forecast=event.use_forecast,
|
|
129
|
+
created_via=created_via,
|
|
125
130
|
)
|
|
126
131
|
created_resources.append(
|
|
127
132
|
CreatedResource(
|
|
@@ -134,7 +139,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
134
139
|
pre_existing_anomaly_detection_coverage=pre_existing_anomaly_detection_coverage,
|
|
135
140
|
):
|
|
136
141
|
schema_change_expectation_id = self._add_schema_change_expectation(
|
|
137
|
-
metric_run=metric_run, asset_id=data_asset.id
|
|
142
|
+
metric_run=metric_run, asset_id=data_asset.id, created_via=created_via
|
|
138
143
|
)
|
|
139
144
|
created_resources.append(
|
|
140
145
|
CreatedResource(
|
|
@@ -152,6 +157,8 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
152
157
|
metric_run=metric_run,
|
|
153
158
|
asset_id=data_asset.id,
|
|
154
159
|
pre_existing_completeness_change_expectations=pre_existing_completeness_change_expectations,
|
|
160
|
+
created_via=created_via,
|
|
161
|
+
use_forecast=event.use_forecast,
|
|
155
162
|
)
|
|
156
163
|
for exp_id in completeness_change_expectation_ids:
|
|
157
164
|
created_resources.append(
|
|
@@ -198,7 +205,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
198
205
|
metric_list=[
|
|
199
206
|
MetricTypes.TABLE_COLUMNS,
|
|
200
207
|
MetricTypes.TABLE_COLUMN_TYPES,
|
|
201
|
-
MetricTypes.
|
|
208
|
+
MetricTypes.COLUMN_NON_NULL_COUNT,
|
|
202
209
|
MetricTypes.TABLE_ROW_COUNT,
|
|
203
210
|
],
|
|
204
211
|
)
|
|
@@ -217,7 +224,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
217
224
|
"""
|
|
218
225
|
url = urljoin(
|
|
219
226
|
base=self._base_url,
|
|
220
|
-
url=f"/api/v1/organizations/{self.
|
|
227
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/expectations/",
|
|
221
228
|
)
|
|
222
229
|
with create_session(access_token=self._auth_key) as session:
|
|
223
230
|
response = session.get(
|
|
@@ -265,7 +272,9 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
265
272
|
and len(pre_existing_anomaly_detection_coverage.get(DataQualityIssues.SCHEMA, [])) == 0
|
|
266
273
|
)
|
|
267
274
|
|
|
268
|
-
def _add_volume_change_expectation(
|
|
275
|
+
def _add_volume_change_expectation(
|
|
276
|
+
self, asset_id: UUID | None, use_forecast: bool, created_via: str | None
|
|
277
|
+
) -> UUID:
|
|
269
278
|
unique_id = param_safe_unique_id(16)
|
|
270
279
|
lower_bound_param_name = f"{unique_id}_min_value_min"
|
|
271
280
|
upper_bound_param_name = f"{unique_id}_max_value_max"
|
|
@@ -311,13 +320,16 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
311
320
|
strict_max=strict_max,
|
|
312
321
|
min_value=min_value,
|
|
313
322
|
max_value=max_value,
|
|
323
|
+
severity=FailureSeverity.WARNING,
|
|
314
324
|
)
|
|
315
325
|
expectation_id = self._create_expectation_for_asset(
|
|
316
|
-
expectation=expectation, asset_id=asset_id
|
|
326
|
+
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
317
327
|
)
|
|
318
328
|
return expectation_id
|
|
319
329
|
|
|
320
|
-
def _add_schema_change_expectation(
|
|
330
|
+
def _add_schema_change_expectation(
|
|
331
|
+
self, metric_run: MetricRun, asset_id: UUID | None, created_via: str | None
|
|
332
|
+
) -> UUID:
|
|
321
333
|
# Find the TABLE_COLUMNS metric by type instead of assuming it's at position 0
|
|
322
334
|
table_columns_metric = next(
|
|
323
335
|
(
|
|
@@ -331,10 +343,11 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
331
343
|
raise RuntimeError("missing TABLE_COLUMNS metric") # noqa: TRY003
|
|
332
344
|
|
|
333
345
|
expectation = gx_expectations.ExpectTableColumnsToMatchSet(
|
|
334
|
-
column_set=table_columns_metric.value
|
|
346
|
+
column_set=table_columns_metric.value,
|
|
347
|
+
severity=FailureSeverity.WARNING,
|
|
335
348
|
)
|
|
336
349
|
expectation_id = self._create_expectation_for_asset(
|
|
337
|
-
expectation=expectation, asset_id=asset_id
|
|
350
|
+
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
338
351
|
)
|
|
339
352
|
return expectation_id
|
|
340
353
|
|
|
@@ -345,6 +358,8 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
345
358
|
pre_existing_completeness_change_expectations: list[
|
|
346
359
|
dict[Any, Any]
|
|
347
360
|
], # list of ExpectationConfiguration dicts
|
|
361
|
+
created_via: str | None,
|
|
362
|
+
use_forecast: bool = False,
|
|
348
363
|
) -> list[UUID]:
|
|
349
364
|
table_row_count = next(
|
|
350
365
|
metric
|
|
@@ -359,11 +374,11 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
359
374
|
metric
|
|
360
375
|
for metric in metric_run.metrics
|
|
361
376
|
if isinstance(metric, ColumnMetric)
|
|
362
|
-
and metric.metric_name == MetricTypes.
|
|
377
|
+
and metric.metric_name == MetricTypes.COLUMN_NON_NULL_COUNT
|
|
363
378
|
]
|
|
364
379
|
|
|
365
380
|
if not column_null_values_metric or len(column_null_values_metric) == 0:
|
|
366
|
-
raise RuntimeError("missing
|
|
381
|
+
raise RuntimeError("missing COLUMN_NON_NULL_COUNT metrics") # noqa: TRY003
|
|
367
382
|
|
|
368
383
|
expectation_ids = []
|
|
369
384
|
# Single-expectation approach using ExpectColumnProportionOfNonNullValuesToBeBetween
|
|
@@ -374,7 +389,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
374
389
|
)
|
|
375
390
|
for column in columns_missing_completeness_coverage:
|
|
376
391
|
column_name = column.column
|
|
377
|
-
|
|
392
|
+
non_null_count = column.value
|
|
378
393
|
row_count = table_row_count.value
|
|
379
394
|
expectation: gx_expectations.Expectation
|
|
380
395
|
|
|
@@ -384,18 +399,42 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
384
399
|
max_param_name = f"{unique_id}_proportion_max"
|
|
385
400
|
|
|
386
401
|
# Calculate non-null proportion
|
|
387
|
-
non_null_count = row_count - null_count if row_count > 0 else 0
|
|
388
402
|
non_null_proportion = non_null_count / row_count if row_count > 0 else 0
|
|
389
403
|
|
|
390
|
-
if
|
|
404
|
+
if use_forecast:
|
|
405
|
+
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
406
|
+
windows=[
|
|
407
|
+
Window(
|
|
408
|
+
constraint_fn=ExpectationConstraintFunction.FORECAST,
|
|
409
|
+
parameter_name=min_param_name,
|
|
410
|
+
range=1,
|
|
411
|
+
offset=Offset(positive=0.0, negative=0.0),
|
|
412
|
+
strict=True,
|
|
413
|
+
),
|
|
414
|
+
Window(
|
|
415
|
+
constraint_fn=ExpectationConstraintFunction.FORECAST,
|
|
416
|
+
parameter_name=max_param_name,
|
|
417
|
+
range=1,
|
|
418
|
+
offset=Offset(positive=0.0, negative=0.0),
|
|
419
|
+
strict=True,
|
|
420
|
+
),
|
|
421
|
+
],
|
|
422
|
+
column=column_name,
|
|
423
|
+
min_value={"$PARAMETER": min_param_name},
|
|
424
|
+
max_value={"$PARAMETER": max_param_name},
|
|
425
|
+
severity=FailureSeverity.WARNING,
|
|
426
|
+
)
|
|
427
|
+
elif non_null_proportion == 0:
|
|
391
428
|
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
392
429
|
column=column_name,
|
|
393
430
|
max_value=0,
|
|
431
|
+
severity=FailureSeverity.WARNING,
|
|
394
432
|
)
|
|
395
433
|
elif non_null_proportion == 1:
|
|
396
434
|
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
397
435
|
column=column_name,
|
|
398
436
|
min_value=1,
|
|
437
|
+
severity=FailureSeverity.WARNING,
|
|
399
438
|
)
|
|
400
439
|
else:
|
|
401
440
|
# Use triangular interpolation to compute min/max values
|
|
@@ -427,10 +466,11 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
427
466
|
column=column_name,
|
|
428
467
|
min_value={"$PARAMETER": min_param_name},
|
|
429
468
|
max_value={"$PARAMETER": max_param_name},
|
|
469
|
+
severity=FailureSeverity.WARNING,
|
|
430
470
|
)
|
|
431
471
|
|
|
432
472
|
expectation_id = self._create_expectation_for_asset(
|
|
433
|
-
expectation=expectation, asset_id=asset_id
|
|
473
|
+
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
434
474
|
)
|
|
435
475
|
expectation_ids.append(expectation_id)
|
|
436
476
|
|
|
@@ -478,16 +518,20 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
478
518
|
return max(0.0001, round(triangular_interpolation(value, options), 5))
|
|
479
519
|
|
|
480
520
|
def _create_expectation_for_asset(
|
|
481
|
-
self,
|
|
521
|
+
self,
|
|
522
|
+
expectation: gx_expectations.Expectation,
|
|
523
|
+
asset_id: UUID | None,
|
|
524
|
+
created_via: str | None,
|
|
482
525
|
) -> UUID:
|
|
483
526
|
url = urljoin(
|
|
484
527
|
base=self._base_url,
|
|
485
|
-
url=f"/api/v1/organizations/{self.
|
|
528
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/expectations/{asset_id}",
|
|
486
529
|
)
|
|
487
530
|
|
|
488
531
|
expectation_payload = expectation.configuration.to_json_dict()
|
|
489
532
|
expectation_payload["autogenerated"] = True
|
|
490
|
-
|
|
533
|
+
if created_via is not None:
|
|
534
|
+
expectation_payload["created_via"] = created_via
|
|
491
535
|
|
|
492
536
|
# Backend expects `expectation_type` instead of `type`:
|
|
493
537
|
expectation_type = expectation_payload.pop("type")
|
|
@@ -13,9 +13,7 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
13
|
)
|
|
14
14
|
from great_expectations_cloud.agent.actions.utils import get_asset_names
|
|
15
15
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
ListAssetNamesEvent,
|
|
18
|
-
)
|
|
16
|
+
from great_expectations_cloud.agent.models import ListAssetNamesEvent
|
|
19
17
|
|
|
20
18
|
|
|
21
19
|
class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
@@ -34,7 +32,8 @@ class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
|
34
32
|
asset_names = get_asset_names(datasource)
|
|
35
33
|
|
|
36
34
|
self._add_or_update_asset_names_list(
|
|
37
|
-
datasource_id=str(datasource.id),
|
|
35
|
+
datasource_id=str(datasource.id),
|
|
36
|
+
asset_names=asset_names,
|
|
38
37
|
)
|
|
39
38
|
|
|
40
39
|
return ActionResult(
|
|
@@ -47,7 +46,7 @@ class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
|
47
46
|
with create_session(access_token=self._auth_key) as session:
|
|
48
47
|
url = urljoin(
|
|
49
48
|
base=self._base_url,
|
|
50
|
-
url=f"/api/v1/organizations/{self.
|
|
49
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/table-names/{datasource_id}",
|
|
51
50
|
)
|
|
52
51
|
response = session.put(
|
|
53
52
|
url=url,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
3
5
|
from dataclasses import dataclass
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Final
|
|
5
7
|
|
|
6
8
|
from typing_extensions import override
|
|
7
9
|
|
|
@@ -21,6 +23,8 @@ if TYPE_CHECKING:
|
|
|
21
23
|
from great_expectations.data_context import CloudDataContext
|
|
22
24
|
from great_expectations.datasource.fluent.interfaces import DataAsset, Datasource
|
|
23
25
|
|
|
26
|
+
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
class RunCheckpointAction(AgentAction[RunCheckpointEvent]):
|
|
26
30
|
@override
|
|
@@ -45,12 +49,28 @@ def run_checkpoint(
|
|
|
45
49
|
id: str,
|
|
46
50
|
expectation_parameters: dict[str, Any] | None = None,
|
|
47
51
|
) -> ActionResult:
|
|
52
|
+
"""Run a checkpoint and return the result."""
|
|
53
|
+
hostname = socket.gethostname()
|
|
54
|
+
log_extra = {
|
|
55
|
+
"correlation_id": id,
|
|
56
|
+
"checkpoint_name": event.checkpoint_name,
|
|
57
|
+
"hostname": hostname,
|
|
58
|
+
}
|
|
59
|
+
|
|
48
60
|
# the checkpoint_name property on possible events is optional for backwards compatibility,
|
|
49
61
|
# but this action requires it in order to run:
|
|
50
62
|
if not event.checkpoint_name:
|
|
51
63
|
raise MissingCheckpointNameError
|
|
52
64
|
|
|
65
|
+
LOGGER.debug("Fetching checkpoint from context", extra=log_extra)
|
|
53
66
|
checkpoint = context.checkpoints.get(name=event.checkpoint_name)
|
|
67
|
+
LOGGER.debug(
|
|
68
|
+
"Checkpoint fetched successfully",
|
|
69
|
+
extra={
|
|
70
|
+
**log_extra,
|
|
71
|
+
"validation_definitions_count": len(checkpoint.validation_definitions),
|
|
72
|
+
},
|
|
73
|
+
)
|
|
54
74
|
|
|
55
75
|
# only GX-managed Checkpoints are currently validated here and they contain only one validation definition, but
|
|
56
76
|
# the Checkpoint does allow for multiple validation definitions so we'll be defensive and ensure we only test each
|
|
@@ -66,15 +86,48 @@ def run_checkpoint(
|
|
|
66
86
|
)
|
|
67
87
|
data_sources_assets_by_data_source_name[ds_name].assets_by_name[vd.asset.name] = vd.asset
|
|
68
88
|
|
|
69
|
-
|
|
89
|
+
# Test connections to all datasources and assets
|
|
90
|
+
for ds_name, data_sources_assets in data_sources_assets_by_data_source_name.items():
|
|
70
91
|
data_source = data_sources_assets.data_source
|
|
92
|
+
LOGGER.debug(
|
|
93
|
+
"Testing datasource connection",
|
|
94
|
+
extra={**log_extra, "datasource_name": ds_name},
|
|
95
|
+
)
|
|
71
96
|
data_source.test_connection(test_assets=False) # raises `TestConnectionError` on failure
|
|
72
|
-
|
|
97
|
+
LOGGER.debug(
|
|
98
|
+
"Datasource connection successful",
|
|
99
|
+
extra={**log_extra, "datasource_name": ds_name},
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
for asset_name, data_asset in data_sources_assets.assets_by_name.items():
|
|
103
|
+
LOGGER.debug(
|
|
104
|
+
"Testing data asset connection",
|
|
105
|
+
extra={**log_extra, "datasource_name": ds_name, "asset_name": asset_name},
|
|
106
|
+
)
|
|
73
107
|
data_asset.test_connection() # raises `TestConnectionError` on failure
|
|
108
|
+
LOGGER.debug(
|
|
109
|
+
"Data asset connection successful",
|
|
110
|
+
extra={**log_extra, "datasource_name": ds_name, "asset_name": asset_name},
|
|
111
|
+
)
|
|
74
112
|
|
|
113
|
+
LOGGER.debug(
|
|
114
|
+
"Running checkpoint",
|
|
115
|
+
extra={
|
|
116
|
+
**log_extra,
|
|
117
|
+
"datasources_count": len(data_sources_assets_by_data_source_name),
|
|
118
|
+
"has_expectation_parameters": expectation_parameters is not None,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
75
121
|
checkpoint_run_result = checkpoint.run(
|
|
76
122
|
batch_parameters=event.splitter_options, expectation_parameters=expectation_parameters
|
|
77
123
|
)
|
|
124
|
+
LOGGER.debug(
|
|
125
|
+
"Checkpoint run completed",
|
|
126
|
+
extra={
|
|
127
|
+
**log_extra,
|
|
128
|
+
"run_results_count": len(checkpoint_run_result.run_results),
|
|
129
|
+
},
|
|
130
|
+
)
|
|
78
131
|
|
|
79
132
|
validation_results = checkpoint_run_result.run_results
|
|
80
133
|
created_resources = []
|
|
@@ -88,6 +141,14 @@ def run_checkpoint(
|
|
|
88
141
|
)
|
|
89
142
|
created_resources.append(created_resource)
|
|
90
143
|
|
|
144
|
+
LOGGER.debug(
|
|
145
|
+
"Checkpoint action completed successfully",
|
|
146
|
+
extra={
|
|
147
|
+
**log_extra,
|
|
148
|
+
"created_resources_count": len(created_resources),
|
|
149
|
+
},
|
|
150
|
+
)
|
|
151
|
+
|
|
91
152
|
return ActionResult(
|
|
92
153
|
id=id,
|
|
93
154
|
type=event.type,
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
|
-
from uuid import UUID
|
|
5
4
|
|
|
6
5
|
from great_expectations.experimental.metric_repository.batch_inspector import (
|
|
7
6
|
BatchInspector,
|
|
@@ -21,6 +20,7 @@ from great_expectations_cloud.agent.actions import ActionResult, AgentAction
|
|
|
21
20
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
22
21
|
from great_expectations_cloud.agent.models import (
|
|
23
22
|
CreatedResource,
|
|
23
|
+
DomainContext,
|
|
24
24
|
RunMetricsListEvent,
|
|
25
25
|
)
|
|
26
26
|
|
|
@@ -34,13 +34,13 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
|
34
34
|
self,
|
|
35
35
|
context: CloudDataContext,
|
|
36
36
|
base_url: str,
|
|
37
|
-
|
|
37
|
+
domain_context: DomainContext,
|
|
38
38
|
auth_key: str,
|
|
39
39
|
metric_repository: MetricRepository | None = None,
|
|
40
40
|
batch_inspector: BatchInspector | None = None,
|
|
41
41
|
):
|
|
42
42
|
super().__init__(
|
|
43
|
-
context=context, base_url=base_url,
|
|
43
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
44
44
|
)
|
|
45
45
|
self._metric_repository = metric_repository or MetricRepository(
|
|
46
46
|
data_store=CloudDataStore(self._context)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
5
|
+
from typing import TYPE_CHECKING, Final
|
|
4
6
|
from urllib.parse import urljoin
|
|
5
7
|
|
|
6
8
|
from great_expectations.core.http import create_session
|
|
@@ -13,20 +15,20 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
15
|
)
|
|
14
16
|
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
17
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
RunScheduledCheckpointEvent,
|
|
18
|
-
)
|
|
18
|
+
from great_expectations_cloud.agent.models import RunScheduledCheckpointEvent
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
21
|
from great_expectations.data_context import CloudDataContext
|
|
22
22
|
|
|
23
|
+
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
25
27
|
@override
|
|
26
28
|
def run(self, event: RunScheduledCheckpointEvent, id: str) -> ActionResult:
|
|
27
29
|
expectation_parameters_url = urljoin(
|
|
28
30
|
base=self._base_url,
|
|
29
|
-
url=f"/api/v1/organizations/{self.
|
|
31
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
30
32
|
)
|
|
31
33
|
return run_scheduled_checkpoint(
|
|
32
34
|
context=self._context,
|
|
@@ -40,16 +42,32 @@ class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
|
40
42
|
def run_scheduled_checkpoint(
|
|
41
43
|
context: CloudDataContext, event: RunScheduledCheckpointEvent, id: str, auth_key: str, url: str
|
|
42
44
|
) -> ActionResult:
|
|
45
|
+
"""Run a scheduled checkpoint, fetching expectation parameters from GX Cloud first."""
|
|
46
|
+
hostname = socket.gethostname()
|
|
47
|
+
log_extra = {
|
|
48
|
+
"correlation_id": id,
|
|
49
|
+
"checkpoint_id": str(event.checkpoint_id),
|
|
50
|
+
"schedule_id": str(event.schedule_id),
|
|
51
|
+
"hostname": hostname,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
LOGGER.debug("Fetching expectation parameters from GX Cloud", extra=log_extra)
|
|
43
55
|
with create_session(access_token=auth_key) as session:
|
|
44
56
|
response = session.get(url=url)
|
|
45
57
|
|
|
46
58
|
if not response.ok:
|
|
59
|
+
LOGGER.error(
|
|
60
|
+
"Failed to fetch expectation parameters",
|
|
61
|
+
extra={**log_extra, "response_status": response.status_code},
|
|
62
|
+
)
|
|
47
63
|
raise GXCloudError(
|
|
48
64
|
message=f"RunScheduledCheckpointAction encountered an error while connecting to GX Cloud. "
|
|
49
65
|
f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
|
|
50
66
|
response=response,
|
|
51
67
|
)
|
|
52
68
|
data = response.json()
|
|
69
|
+
LOGGER.debug("Expectation parameters fetched successfully", extra=log_extra)
|
|
70
|
+
|
|
53
71
|
try:
|
|
54
72
|
expectation_parameters = (
|
|
55
73
|
data["data"]["expectation_parameters"]
|
|
@@ -57,11 +75,16 @@ def run_scheduled_checkpoint(
|
|
|
57
75
|
else None
|
|
58
76
|
)
|
|
59
77
|
except KeyError as e:
|
|
78
|
+
LOGGER.exception("Malformed response from GX Cloud", extra=log_extra)
|
|
60
79
|
raise GXCloudError(
|
|
61
80
|
message="Malformed response received from GX Cloud",
|
|
62
81
|
response=response,
|
|
63
82
|
) from e
|
|
64
83
|
|
|
84
|
+
LOGGER.debug(
|
|
85
|
+
"Proceeding to run checkpoint",
|
|
86
|
+
extra={**log_extra, "has_expectation_parameters": expectation_parameters is not None},
|
|
87
|
+
)
|
|
65
88
|
return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
|
|
66
89
|
|
|
67
90
|
|
|
@@ -13,9 +13,7 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
13
|
)
|
|
14
14
|
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
15
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
RunWindowCheckpointEvent,
|
|
18
|
-
)
|
|
16
|
+
from great_expectations_cloud.agent.models import RunWindowCheckpointEvent
|
|
19
17
|
|
|
20
18
|
if TYPE_CHECKING:
|
|
21
19
|
from great_expectations.data_context import CloudDataContext
|
|
@@ -26,7 +24,7 @@ class RunWindowCheckpointAction(AgentAction[RunWindowCheckpointEvent]):
|
|
|
26
24
|
def run(self, event: RunWindowCheckpointEvent, id: str) -> ActionResult:
|
|
27
25
|
expectation_parameters_url = urljoin(
|
|
28
26
|
base=self._base_url,
|
|
29
|
-
url=f"/api/v1/organizations/{self.
|
|
27
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
30
28
|
)
|
|
31
29
|
return run_window_checkpoint(
|
|
32
30
|
self._context,
|
|
@@ -7,10 +7,13 @@ from sqlalchemy import inspect
|
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from sqlalchemy.engine import Inspector
|
|
10
|
+
from sqlalchemy.sql.compiler import IdentifierPreparer
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
13
14
|
inspector: Inspector = inspect(datasource.get_engine())
|
|
15
|
+
identifier_preparer: IdentifierPreparer = inspector.dialect.identifier_preparer
|
|
16
|
+
|
|
14
17
|
if isinstance(datasource, SnowflakeDatasource) and datasource.schema_:
|
|
15
18
|
# Snowflake-SQLAlchemy uses the default_schema if no schema is provided to get_table_names
|
|
16
19
|
# Or if the role does not have access to the schema (it silently fails and defaults to using default_schema)
|
|
@@ -19,8 +22,14 @@ def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
|
19
22
|
# Also converting to list to ensure JSON serializable
|
|
20
23
|
tables = list(inspector.get_table_names(schema=datasource.schema_))
|
|
21
24
|
views = list(inspector.get_view_names(schema=datasource.schema_))
|
|
22
|
-
|
|
25
|
+
asset_names = tables + views
|
|
26
|
+
else:
|
|
27
|
+
tables = list(inspector.get_table_names())
|
|
28
|
+
views = list(inspector.get_view_names())
|
|
29
|
+
asset_names = tables + views
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
# the identifier preparer adds quotes when they are necessary
|
|
32
|
+
quoted_asset_names: list[str] = [
|
|
33
|
+
identifier_preparer.quote(asset_name) for asset_name in asset_names
|
|
34
|
+
]
|
|
35
|
+
return quoted_asset_names
|