great-expectations-cloud 20250811.1.dev0__py3-none-any.whl → 20260113.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of great-expectations-cloud might be problematic. Click here for more details.
- great_expectations_cloud/agent/actions/agent_action.py +3 -3
- great_expectations_cloud/agent/actions/draft_datasource_config_action.py +2 -2
- great_expectations_cloud/agent/actions/generate_data_quality_check_expectations_action.py +47 -24
- great_expectations_cloud/agent/actions/list_asset_names.py +4 -5
- great_expectations_cloud/agent/actions/run_checkpoint.py +64 -3
- great_expectations_cloud/agent/actions/run_metric_list_action.py +3 -3
- great_expectations_cloud/agent/actions/run_scheduled_checkpoint.py +28 -5
- great_expectations_cloud/agent/actions/run_window_checkpoint.py +2 -4
- great_expectations_cloud/agent/actions/utils.py +13 -4
- great_expectations_cloud/agent/agent.py +259 -36
- great_expectations_cloud/agent/event_handler.py +8 -7
- great_expectations_cloud/agent/message_service/asyncio_rabbit_mq_client.py +33 -8
- great_expectations_cloud/agent/message_service/subscriber.py +4 -0
- great_expectations_cloud/agent/models.py +13 -0
- {great_expectations_cloud-20250811.1.dev0.dist-info → great_expectations_cloud-20260113.0.dev1.dist-info}/METADATA +7 -5
- {great_expectations_cloud-20250811.1.dev0.dist-info → great_expectations_cloud-20260113.0.dev1.dist-info}/RECORD +19 -19
- {great_expectations_cloud-20250811.1.dev0.dist-info → great_expectations_cloud-20260113.0.dev1.dist-info}/WHEEL +1 -1
- {great_expectations_cloud-20250811.1.dev0.dist-info → great_expectations_cloud-20260113.0.dev1.dist-info}/entry_points.txt +0 -0
- {great_expectations_cloud-20250811.1.dev0.dist-info → great_expectations_cloud-20260113.0.dev1.dist-info/licenses}/LICENSE +0 -0
|
@@ -4,7 +4,6 @@ import datetime
|
|
|
4
4
|
from abc import abstractmethod
|
|
5
5
|
from collections.abc import Sequence
|
|
6
6
|
from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
|
|
7
|
-
from uuid import UUID
|
|
8
7
|
|
|
9
8
|
from pydantic.v1 import BaseModel
|
|
10
9
|
|
|
@@ -12,6 +11,7 @@ from great_expectations_cloud.agent.models import (
|
|
|
12
11
|
AgentBaseExtraForbid,
|
|
13
12
|
AgentBaseExtraIgnore,
|
|
14
13
|
CreatedResource,
|
|
14
|
+
DomainContext,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -32,11 +32,11 @@ _EventT = TypeVar("_EventT", bound=Union[AgentBaseExtraForbid, AgentBaseExtraIgn
|
|
|
32
32
|
|
|
33
33
|
class AgentAction(Generic[_EventT]):
|
|
34
34
|
def __init__(
|
|
35
|
-
self, context: CloudDataContext, base_url: str,
|
|
35
|
+
self, context: CloudDataContext, base_url: str, domain_context: DomainContext, auth_key: str
|
|
36
36
|
):
|
|
37
37
|
self._context = context
|
|
38
38
|
self._base_url = base_url
|
|
39
|
-
self.
|
|
39
|
+
self._domain_context = domain_context
|
|
40
40
|
self._auth_key = auth_key
|
|
41
41
|
|
|
42
42
|
@abstractmethod
|
|
@@ -70,7 +70,7 @@ class DraftDatasourceConfigAction(AgentAction[DraftDatasourceConfigEvent]):
|
|
|
70
70
|
with create_session(access_token=self._auth_key) as session:
|
|
71
71
|
url = urljoin(
|
|
72
72
|
base=self._base_url,
|
|
73
|
-
url=f"/api/v1/organizations/{self.
|
|
73
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/draft-table-names/{config_id}",
|
|
74
74
|
)
|
|
75
75
|
response = session.put(
|
|
76
76
|
url=url,
|
|
@@ -87,7 +87,7 @@ class DraftDatasourceConfigAction(AgentAction[DraftDatasourceConfigEvent]):
|
|
|
87
87
|
def get_draft_config(self, config_id: UUID) -> dict[str, Any]:
|
|
88
88
|
resource_url = urljoin(
|
|
89
89
|
base=self._base_url,
|
|
90
|
-
url=f"/api/v1/organizations/{self.
|
|
90
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/draft-datasources/{config_id}",
|
|
91
91
|
)
|
|
92
92
|
with create_session(access_token=self._auth_key) as session:
|
|
93
93
|
response = session.get(resource_url)
|
|
@@ -10,9 +10,13 @@ from uuid import UUID
|
|
|
10
10
|
|
|
11
11
|
import great_expectations.expectations as gx_expectations
|
|
12
12
|
from great_expectations.core.http import create_session
|
|
13
|
-
from great_expectations.exceptions import
|
|
13
|
+
from great_expectations.exceptions import (
|
|
14
|
+
GXCloudError,
|
|
15
|
+
InvalidExpectationConfigurationError,
|
|
16
|
+
)
|
|
14
17
|
from great_expectations.expectations.metadata_types import (
|
|
15
18
|
DataQualityIssues,
|
|
19
|
+
FailureSeverity,
|
|
16
20
|
)
|
|
17
21
|
from great_expectations.expectations.window import Offset, Window
|
|
18
22
|
from great_expectations.experimental.metric_repository.batch_inspector import (
|
|
@@ -39,6 +43,7 @@ from great_expectations_cloud.agent.event_handler import register_event_action
|
|
|
39
43
|
from great_expectations_cloud.agent.exceptions import GXAgentError
|
|
40
44
|
from great_expectations_cloud.agent.models import (
|
|
41
45
|
CreatedResource,
|
|
46
|
+
DomainContext,
|
|
42
47
|
GenerateDataQualityCheckExpectationsEvent,
|
|
43
48
|
)
|
|
44
49
|
from great_expectations_cloud.agent.utils import (
|
|
@@ -48,9 +53,7 @@ from great_expectations_cloud.agent.utils import (
|
|
|
48
53
|
)
|
|
49
54
|
|
|
50
55
|
if TYPE_CHECKING:
|
|
51
|
-
from great_expectations.core.suite_parameters import
|
|
52
|
-
SuiteParameterDict,
|
|
53
|
-
)
|
|
56
|
+
from great_expectations.core.suite_parameters import SuiteParameterDict
|
|
54
57
|
from great_expectations.data_context import CloudDataContext
|
|
55
58
|
from great_expectations.datasource.fluent import DataAsset
|
|
56
59
|
|
|
@@ -81,13 +84,13 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
81
84
|
self,
|
|
82
85
|
context: CloudDataContext,
|
|
83
86
|
base_url: str,
|
|
84
|
-
|
|
87
|
+
domain_context: DomainContext,
|
|
85
88
|
auth_key: str,
|
|
86
89
|
metric_repository: MetricRepository | None = None,
|
|
87
90
|
batch_inspector: BatchInspector | None = None,
|
|
88
91
|
):
|
|
89
92
|
super().__init__(
|
|
90
|
-
context=context, base_url=base_url,
|
|
93
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
91
94
|
)
|
|
92
95
|
self._metric_repository = metric_repository or MetricRepository(
|
|
93
96
|
data_store=CloudDataStore(self._context)
|
|
@@ -155,6 +158,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
155
158
|
asset_id=data_asset.id,
|
|
156
159
|
pre_existing_completeness_change_expectations=pre_existing_completeness_change_expectations,
|
|
157
160
|
created_via=created_via,
|
|
161
|
+
use_forecast=event.use_forecast,
|
|
158
162
|
)
|
|
159
163
|
for exp_id in completeness_change_expectation_ids:
|
|
160
164
|
created_resources.append(
|
|
@@ -201,7 +205,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
201
205
|
metric_list=[
|
|
202
206
|
MetricTypes.TABLE_COLUMNS,
|
|
203
207
|
MetricTypes.TABLE_COLUMN_TYPES,
|
|
204
|
-
MetricTypes.
|
|
208
|
+
MetricTypes.COLUMN_NON_NULL_COUNT,
|
|
205
209
|
MetricTypes.TABLE_ROW_COUNT,
|
|
206
210
|
],
|
|
207
211
|
)
|
|
@@ -220,7 +224,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
220
224
|
"""
|
|
221
225
|
url = urljoin(
|
|
222
226
|
base=self._base_url,
|
|
223
|
-
url=f"/api/v1/organizations/{self.
|
|
227
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/expectations/",
|
|
224
228
|
)
|
|
225
229
|
with create_session(access_token=self._auth_key) as session:
|
|
226
230
|
response = session.get(
|
|
@@ -316,6 +320,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
316
320
|
strict_max=strict_max,
|
|
317
321
|
min_value=min_value,
|
|
318
322
|
max_value=max_value,
|
|
323
|
+
severity=FailureSeverity.WARNING,
|
|
319
324
|
)
|
|
320
325
|
expectation_id = self._create_expectation_for_asset(
|
|
321
326
|
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
@@ -338,7 +343,8 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
338
343
|
raise RuntimeError("missing TABLE_COLUMNS metric") # noqa: TRY003
|
|
339
344
|
|
|
340
345
|
expectation = gx_expectations.ExpectTableColumnsToMatchSet(
|
|
341
|
-
column_set=table_columns_metric.value
|
|
346
|
+
column_set=table_columns_metric.value,
|
|
347
|
+
severity=FailureSeverity.WARNING,
|
|
342
348
|
)
|
|
343
349
|
expectation_id = self._create_expectation_for_asset(
|
|
344
350
|
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
@@ -353,6 +359,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
353
359
|
dict[Any, Any]
|
|
354
360
|
], # list of ExpectationConfiguration dicts
|
|
355
361
|
created_via: str | None,
|
|
362
|
+
use_forecast: bool = False,
|
|
356
363
|
) -> list[UUID]:
|
|
357
364
|
table_row_count = next(
|
|
358
365
|
metric
|
|
@@ -367,11 +374,11 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
367
374
|
metric
|
|
368
375
|
for metric in metric_run.metrics
|
|
369
376
|
if isinstance(metric, ColumnMetric)
|
|
370
|
-
and metric.metric_name == MetricTypes.
|
|
377
|
+
and metric.metric_name == MetricTypes.COLUMN_NON_NULL_COUNT
|
|
371
378
|
]
|
|
372
379
|
|
|
373
380
|
if not column_null_values_metric or len(column_null_values_metric) == 0:
|
|
374
|
-
raise RuntimeError("missing
|
|
381
|
+
raise RuntimeError("missing COLUMN_NON_NULL_COUNT metrics") # noqa: TRY003
|
|
375
382
|
|
|
376
383
|
expectation_ids = []
|
|
377
384
|
# Single-expectation approach using ExpectColumnProportionOfNonNullValuesToBeBetween
|
|
@@ -382,7 +389,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
382
389
|
)
|
|
383
390
|
for column in columns_missing_completeness_coverage:
|
|
384
391
|
column_name = column.column
|
|
385
|
-
|
|
392
|
+
non_null_count = column.value
|
|
386
393
|
row_count = table_row_count.value
|
|
387
394
|
expectation: gx_expectations.Expectation
|
|
388
395
|
|
|
@@ -392,18 +399,42 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
392
399
|
max_param_name = f"{unique_id}_proportion_max"
|
|
393
400
|
|
|
394
401
|
# Calculate non-null proportion
|
|
395
|
-
non_null_count = row_count - null_count if row_count > 0 else 0
|
|
396
402
|
non_null_proportion = non_null_count / row_count if row_count > 0 else 0
|
|
397
403
|
|
|
398
|
-
if
|
|
404
|
+
if use_forecast:
|
|
405
|
+
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
406
|
+
windows=[
|
|
407
|
+
Window(
|
|
408
|
+
constraint_fn=ExpectationConstraintFunction.FORECAST,
|
|
409
|
+
parameter_name=min_param_name,
|
|
410
|
+
range=1,
|
|
411
|
+
offset=Offset(positive=0.0, negative=0.0),
|
|
412
|
+
strict=True,
|
|
413
|
+
),
|
|
414
|
+
Window(
|
|
415
|
+
constraint_fn=ExpectationConstraintFunction.FORECAST,
|
|
416
|
+
parameter_name=max_param_name,
|
|
417
|
+
range=1,
|
|
418
|
+
offset=Offset(positive=0.0, negative=0.0),
|
|
419
|
+
strict=True,
|
|
420
|
+
),
|
|
421
|
+
],
|
|
422
|
+
column=column_name,
|
|
423
|
+
min_value={"$PARAMETER": min_param_name},
|
|
424
|
+
max_value={"$PARAMETER": max_param_name},
|
|
425
|
+
severity=FailureSeverity.WARNING,
|
|
426
|
+
)
|
|
427
|
+
elif non_null_proportion == 0:
|
|
399
428
|
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
400
429
|
column=column_name,
|
|
401
430
|
max_value=0,
|
|
431
|
+
severity=FailureSeverity.WARNING,
|
|
402
432
|
)
|
|
403
433
|
elif non_null_proportion == 1:
|
|
404
434
|
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
405
435
|
column=column_name,
|
|
406
436
|
min_value=1,
|
|
437
|
+
severity=FailureSeverity.WARNING,
|
|
407
438
|
)
|
|
408
439
|
else:
|
|
409
440
|
# Use triangular interpolation to compute min/max values
|
|
@@ -435,6 +466,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
435
466
|
column=column_name,
|
|
436
467
|
min_value={"$PARAMETER": min_param_name},
|
|
437
468
|
max_value={"$PARAMETER": max_param_name},
|
|
469
|
+
severity=FailureSeverity.WARNING,
|
|
438
470
|
)
|
|
439
471
|
|
|
440
472
|
expectation_id = self._create_expectation_for_asset(
|
|
@@ -493,7 +525,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
493
525
|
) -> UUID:
|
|
494
526
|
url = urljoin(
|
|
495
527
|
base=self._base_url,
|
|
496
|
-
url=f"/api/v1/organizations/{self.
|
|
528
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/expectations/{asset_id}",
|
|
497
529
|
)
|
|
498
530
|
|
|
499
531
|
expectation_payload = expectation.configuration.to_json_dict()
|
|
@@ -505,15 +537,6 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
505
537
|
expectation_type = expectation_payload.pop("type")
|
|
506
538
|
expectation_payload["expectation_type"] = expectation_type
|
|
507
539
|
|
|
508
|
-
# Add failure severity to kwargs
|
|
509
|
-
if "kwargs" not in expectation_payload:
|
|
510
|
-
expectation_payload["kwargs"] = {}
|
|
511
|
-
if not isinstance(expectation_payload["kwargs"], dict):
|
|
512
|
-
raise InvalidExpectationConfigurationError( # noqa: TRY003 # one off error
|
|
513
|
-
"Expectation configuration kwargs must be a dict."
|
|
514
|
-
)
|
|
515
|
-
expectation_payload["kwargs"]["severity"] = "warning"
|
|
516
|
-
|
|
517
540
|
with create_session(access_token=self._auth_key) as session:
|
|
518
541
|
response = session.post(url=url, json=expectation_payload)
|
|
519
542
|
|
|
@@ -13,9 +13,7 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
13
|
)
|
|
14
14
|
from great_expectations_cloud.agent.actions.utils import get_asset_names
|
|
15
15
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
ListAssetNamesEvent,
|
|
18
|
-
)
|
|
16
|
+
from great_expectations_cloud.agent.models import ListAssetNamesEvent
|
|
19
17
|
|
|
20
18
|
|
|
21
19
|
class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
@@ -34,7 +32,8 @@ class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
|
34
32
|
asset_names = get_asset_names(datasource)
|
|
35
33
|
|
|
36
34
|
self._add_or_update_asset_names_list(
|
|
37
|
-
datasource_id=str(datasource.id),
|
|
35
|
+
datasource_id=str(datasource.id),
|
|
36
|
+
asset_names=asset_names,
|
|
38
37
|
)
|
|
39
38
|
|
|
40
39
|
return ActionResult(
|
|
@@ -47,7 +46,7 @@ class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
|
47
46
|
with create_session(access_token=self._auth_key) as session:
|
|
48
47
|
url = urljoin(
|
|
49
48
|
base=self._base_url,
|
|
50
|
-
url=f"/api/v1/organizations/{self.
|
|
49
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/table-names/{datasource_id}",
|
|
51
50
|
)
|
|
52
51
|
response = session.put(
|
|
53
52
|
url=url,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
3
5
|
from dataclasses import dataclass
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Final
|
|
5
7
|
|
|
6
8
|
from typing_extensions import override
|
|
7
9
|
|
|
@@ -21,6 +23,8 @@ if TYPE_CHECKING:
|
|
|
21
23
|
from great_expectations.data_context import CloudDataContext
|
|
22
24
|
from great_expectations.datasource.fluent.interfaces import DataAsset, Datasource
|
|
23
25
|
|
|
26
|
+
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
class RunCheckpointAction(AgentAction[RunCheckpointEvent]):
|
|
26
30
|
@override
|
|
@@ -45,12 +49,28 @@ def run_checkpoint(
|
|
|
45
49
|
id: str,
|
|
46
50
|
expectation_parameters: dict[str, Any] | None = None,
|
|
47
51
|
) -> ActionResult:
|
|
52
|
+
"""Run a checkpoint and return the result."""
|
|
53
|
+
hostname = socket.gethostname()
|
|
54
|
+
log_extra = {
|
|
55
|
+
"correlation_id": id,
|
|
56
|
+
"checkpoint_name": event.checkpoint_name,
|
|
57
|
+
"hostname": hostname,
|
|
58
|
+
}
|
|
59
|
+
|
|
48
60
|
# the checkpoint_name property on possible events is optional for backwards compatibility,
|
|
49
61
|
# but this action requires it in order to run:
|
|
50
62
|
if not event.checkpoint_name:
|
|
51
63
|
raise MissingCheckpointNameError
|
|
52
64
|
|
|
65
|
+
LOGGER.debug("Fetching checkpoint from context", extra=log_extra)
|
|
53
66
|
checkpoint = context.checkpoints.get(name=event.checkpoint_name)
|
|
67
|
+
LOGGER.debug(
|
|
68
|
+
"Checkpoint fetched successfully",
|
|
69
|
+
extra={
|
|
70
|
+
**log_extra,
|
|
71
|
+
"validation_definitions_count": len(checkpoint.validation_definitions),
|
|
72
|
+
},
|
|
73
|
+
)
|
|
54
74
|
|
|
55
75
|
# only GX-managed Checkpoints are currently validated here and they contain only one validation definition, but
|
|
56
76
|
# the Checkpoint does allow for multiple validation definitions so we'll be defensive and ensure we only test each
|
|
@@ -66,15 +86,48 @@ def run_checkpoint(
|
|
|
66
86
|
)
|
|
67
87
|
data_sources_assets_by_data_source_name[ds_name].assets_by_name[vd.asset.name] = vd.asset
|
|
68
88
|
|
|
69
|
-
|
|
89
|
+
# Test connections to all datasources and assets
|
|
90
|
+
for ds_name, data_sources_assets in data_sources_assets_by_data_source_name.items():
|
|
70
91
|
data_source = data_sources_assets.data_source
|
|
92
|
+
LOGGER.debug(
|
|
93
|
+
"Testing datasource connection",
|
|
94
|
+
extra={**log_extra, "datasource_name": ds_name},
|
|
95
|
+
)
|
|
71
96
|
data_source.test_connection(test_assets=False) # raises `TestConnectionError` on failure
|
|
72
|
-
|
|
97
|
+
LOGGER.debug(
|
|
98
|
+
"Datasource connection successful",
|
|
99
|
+
extra={**log_extra, "datasource_name": ds_name},
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
for asset_name, data_asset in data_sources_assets.assets_by_name.items():
|
|
103
|
+
LOGGER.debug(
|
|
104
|
+
"Testing data asset connection",
|
|
105
|
+
extra={**log_extra, "datasource_name": ds_name, "asset_name": asset_name},
|
|
106
|
+
)
|
|
73
107
|
data_asset.test_connection() # raises `TestConnectionError` on failure
|
|
108
|
+
LOGGER.debug(
|
|
109
|
+
"Data asset connection successful",
|
|
110
|
+
extra={**log_extra, "datasource_name": ds_name, "asset_name": asset_name},
|
|
111
|
+
)
|
|
74
112
|
|
|
113
|
+
LOGGER.debug(
|
|
114
|
+
"Running checkpoint",
|
|
115
|
+
extra={
|
|
116
|
+
**log_extra,
|
|
117
|
+
"datasources_count": len(data_sources_assets_by_data_source_name),
|
|
118
|
+
"has_expectation_parameters": expectation_parameters is not None,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
75
121
|
checkpoint_run_result = checkpoint.run(
|
|
76
122
|
batch_parameters=event.splitter_options, expectation_parameters=expectation_parameters
|
|
77
123
|
)
|
|
124
|
+
LOGGER.debug(
|
|
125
|
+
"Checkpoint run completed",
|
|
126
|
+
extra={
|
|
127
|
+
**log_extra,
|
|
128
|
+
"run_results_count": len(checkpoint_run_result.run_results),
|
|
129
|
+
},
|
|
130
|
+
)
|
|
78
131
|
|
|
79
132
|
validation_results = checkpoint_run_result.run_results
|
|
80
133
|
created_resources = []
|
|
@@ -88,6 +141,14 @@ def run_checkpoint(
|
|
|
88
141
|
)
|
|
89
142
|
created_resources.append(created_resource)
|
|
90
143
|
|
|
144
|
+
LOGGER.debug(
|
|
145
|
+
"Checkpoint action completed successfully",
|
|
146
|
+
extra={
|
|
147
|
+
**log_extra,
|
|
148
|
+
"created_resources_count": len(created_resources),
|
|
149
|
+
},
|
|
150
|
+
)
|
|
151
|
+
|
|
91
152
|
return ActionResult(
|
|
92
153
|
id=id,
|
|
93
154
|
type=event.type,
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
|
-
from uuid import UUID
|
|
5
4
|
|
|
6
5
|
from great_expectations.experimental.metric_repository.batch_inspector import (
|
|
7
6
|
BatchInspector,
|
|
@@ -21,6 +20,7 @@ from great_expectations_cloud.agent.actions import ActionResult, AgentAction
|
|
|
21
20
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
22
21
|
from great_expectations_cloud.agent.models import (
|
|
23
22
|
CreatedResource,
|
|
23
|
+
DomainContext,
|
|
24
24
|
RunMetricsListEvent,
|
|
25
25
|
)
|
|
26
26
|
|
|
@@ -34,13 +34,13 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
|
34
34
|
self,
|
|
35
35
|
context: CloudDataContext,
|
|
36
36
|
base_url: str,
|
|
37
|
-
|
|
37
|
+
domain_context: DomainContext,
|
|
38
38
|
auth_key: str,
|
|
39
39
|
metric_repository: MetricRepository | None = None,
|
|
40
40
|
batch_inspector: BatchInspector | None = None,
|
|
41
41
|
):
|
|
42
42
|
super().__init__(
|
|
43
|
-
context=context, base_url=base_url,
|
|
43
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
44
44
|
)
|
|
45
45
|
self._metric_repository = metric_repository or MetricRepository(
|
|
46
46
|
data_store=CloudDataStore(self._context)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
5
|
+
from typing import TYPE_CHECKING, Final
|
|
4
6
|
from urllib.parse import urljoin
|
|
5
7
|
|
|
6
8
|
from great_expectations.core.http import create_session
|
|
@@ -13,20 +15,20 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
15
|
)
|
|
14
16
|
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
17
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
RunScheduledCheckpointEvent,
|
|
18
|
-
)
|
|
18
|
+
from great_expectations_cloud.agent.models import RunScheduledCheckpointEvent
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
21
|
from great_expectations.data_context import CloudDataContext
|
|
22
22
|
|
|
23
|
+
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
25
27
|
@override
|
|
26
28
|
def run(self, event: RunScheduledCheckpointEvent, id: str) -> ActionResult:
|
|
27
29
|
expectation_parameters_url = urljoin(
|
|
28
30
|
base=self._base_url,
|
|
29
|
-
url=f"/api/v1/organizations/{self.
|
|
31
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
30
32
|
)
|
|
31
33
|
return run_scheduled_checkpoint(
|
|
32
34
|
context=self._context,
|
|
@@ -40,16 +42,32 @@ class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
|
40
42
|
def run_scheduled_checkpoint(
|
|
41
43
|
context: CloudDataContext, event: RunScheduledCheckpointEvent, id: str, auth_key: str, url: str
|
|
42
44
|
) -> ActionResult:
|
|
45
|
+
"""Run a scheduled checkpoint, fetching expectation parameters from GX Cloud first."""
|
|
46
|
+
hostname = socket.gethostname()
|
|
47
|
+
log_extra = {
|
|
48
|
+
"correlation_id": id,
|
|
49
|
+
"checkpoint_id": str(event.checkpoint_id),
|
|
50
|
+
"schedule_id": str(event.schedule_id),
|
|
51
|
+
"hostname": hostname,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
LOGGER.debug("Fetching expectation parameters from GX Cloud", extra=log_extra)
|
|
43
55
|
with create_session(access_token=auth_key) as session:
|
|
44
56
|
response = session.get(url=url)
|
|
45
57
|
|
|
46
58
|
if not response.ok:
|
|
59
|
+
LOGGER.error(
|
|
60
|
+
"Failed to fetch expectation parameters",
|
|
61
|
+
extra={**log_extra, "response_status": response.status_code},
|
|
62
|
+
)
|
|
47
63
|
raise GXCloudError(
|
|
48
64
|
message=f"RunScheduledCheckpointAction encountered an error while connecting to GX Cloud. "
|
|
49
65
|
f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
|
|
50
66
|
response=response,
|
|
51
67
|
)
|
|
52
68
|
data = response.json()
|
|
69
|
+
LOGGER.debug("Expectation parameters fetched successfully", extra=log_extra)
|
|
70
|
+
|
|
53
71
|
try:
|
|
54
72
|
expectation_parameters = (
|
|
55
73
|
data["data"]["expectation_parameters"]
|
|
@@ -57,11 +75,16 @@ def run_scheduled_checkpoint(
|
|
|
57
75
|
else None
|
|
58
76
|
)
|
|
59
77
|
except KeyError as e:
|
|
78
|
+
LOGGER.exception("Malformed response from GX Cloud", extra=log_extra)
|
|
60
79
|
raise GXCloudError(
|
|
61
80
|
message="Malformed response received from GX Cloud",
|
|
62
81
|
response=response,
|
|
63
82
|
) from e
|
|
64
83
|
|
|
84
|
+
LOGGER.debug(
|
|
85
|
+
"Proceeding to run checkpoint",
|
|
86
|
+
extra={**log_extra, "has_expectation_parameters": expectation_parameters is not None},
|
|
87
|
+
)
|
|
65
88
|
return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
|
|
66
89
|
|
|
67
90
|
|
|
@@ -13,9 +13,7 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
13
|
)
|
|
14
14
|
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
15
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
RunWindowCheckpointEvent,
|
|
18
|
-
)
|
|
16
|
+
from great_expectations_cloud.agent.models import RunWindowCheckpointEvent
|
|
19
17
|
|
|
20
18
|
if TYPE_CHECKING:
|
|
21
19
|
from great_expectations.data_context import CloudDataContext
|
|
@@ -26,7 +24,7 @@ class RunWindowCheckpointAction(AgentAction[RunWindowCheckpointEvent]):
|
|
|
26
24
|
def run(self, event: RunWindowCheckpointEvent, id: str) -> ActionResult:
|
|
27
25
|
expectation_parameters_url = urljoin(
|
|
28
26
|
base=self._base_url,
|
|
29
|
-
url=f"/api/v1/organizations/{self.
|
|
27
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
30
28
|
)
|
|
31
29
|
return run_window_checkpoint(
|
|
32
30
|
self._context,
|
|
@@ -7,10 +7,13 @@ from sqlalchemy import inspect
|
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from sqlalchemy.engine import Inspector
|
|
10
|
+
from sqlalchemy.sql.compiler import IdentifierPreparer
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
13
14
|
inspector: Inspector = inspect(datasource.get_engine())
|
|
15
|
+
identifier_preparer: IdentifierPreparer = inspector.dialect.identifier_preparer
|
|
16
|
+
|
|
14
17
|
if isinstance(datasource, SnowflakeDatasource) and datasource.schema_:
|
|
15
18
|
# Snowflake-SQLAlchemy uses the default_schema if no schema is provided to get_table_names
|
|
16
19
|
# Or if the role does not have access to the schema (it silently fails and defaults to using default_schema)
|
|
@@ -19,8 +22,14 @@ def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
|
19
22
|
# Also converting to list to ensure JSON serializable
|
|
20
23
|
tables = list(inspector.get_table_names(schema=datasource.schema_))
|
|
21
24
|
views = list(inspector.get_view_names(schema=datasource.schema_))
|
|
22
|
-
|
|
25
|
+
asset_names = tables + views
|
|
26
|
+
else:
|
|
27
|
+
tables = list(inspector.get_table_names())
|
|
28
|
+
views = list(inspector.get_view_names())
|
|
29
|
+
asset_names = tables + views
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
# the identifier preparer adds quotes when they are necessary
|
|
32
|
+
quoted_asset_names: list[str] = [
|
|
33
|
+
identifier_preparer.quote(asset_name) for asset_name in asset_names
|
|
34
|
+
]
|
|
35
|
+
return quoted_asset_names
|