great-expectations-cloud 20250902.0.dev1__py3-none-any.whl → 20260120.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of great-expectations-cloud might be problematic. Click here for more details.
- great_expectations_cloud/agent/actions/agent_action.py +3 -3
- great_expectations_cloud/agent/actions/draft_datasource_config_action.py +2 -2
- great_expectations_cloud/agent/actions/generate_data_quality_check_expectations_action.py +22 -14
- great_expectations_cloud/agent/actions/list_asset_names.py +4 -5
- great_expectations_cloud/agent/actions/run_checkpoint.py +64 -3
- great_expectations_cloud/agent/actions/run_metric_list_action.py +3 -3
- great_expectations_cloud/agent/actions/run_scheduled_checkpoint.py +28 -5
- great_expectations_cloud/agent/actions/run_window_checkpoint.py +2 -4
- great_expectations_cloud/agent/actions/utils.py +13 -4
- great_expectations_cloud/agent/agent.py +280 -43
- great_expectations_cloud/agent/event_handler.py +8 -7
- great_expectations_cloud/agent/message_service/asyncio_rabbit_mq_client.py +36 -8
- great_expectations_cloud/agent/message_service/subscriber.py +4 -0
- great_expectations_cloud/agent/models.py +23 -2
- {great_expectations_cloud-20250902.0.dev1.dist-info → great_expectations_cloud-20260120.0.dev0.dist-info}/METADATA +5 -5
- {great_expectations_cloud-20250902.0.dev1.dist-info → great_expectations_cloud-20260120.0.dev0.dist-info}/RECORD +19 -19
- {great_expectations_cloud-20250902.0.dev1.dist-info → great_expectations_cloud-20260120.0.dev0.dist-info}/WHEEL +1 -1
- {great_expectations_cloud-20250902.0.dev1.dist-info → great_expectations_cloud-20260120.0.dev0.dist-info}/entry_points.txt +0 -0
- {great_expectations_cloud-20250902.0.dev1.dist-info → great_expectations_cloud-20260120.0.dev0.dist-info/licenses}/LICENSE +0 -0
|
@@ -4,7 +4,6 @@ import datetime
|
|
|
4
4
|
from abc import abstractmethod
|
|
5
5
|
from collections.abc import Sequence
|
|
6
6
|
from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
|
|
7
|
-
from uuid import UUID
|
|
8
7
|
|
|
9
8
|
from pydantic.v1 import BaseModel
|
|
10
9
|
|
|
@@ -12,6 +11,7 @@ from great_expectations_cloud.agent.models import (
|
|
|
12
11
|
AgentBaseExtraForbid,
|
|
13
12
|
AgentBaseExtraIgnore,
|
|
14
13
|
CreatedResource,
|
|
14
|
+
DomainContext,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -32,11 +32,11 @@ _EventT = TypeVar("_EventT", bound=Union[AgentBaseExtraForbid, AgentBaseExtraIgn
|
|
|
32
32
|
|
|
33
33
|
class AgentAction(Generic[_EventT]):
|
|
34
34
|
def __init__(
|
|
35
|
-
self, context: CloudDataContext, base_url: str,
|
|
35
|
+
self, context: CloudDataContext, base_url: str, domain_context: DomainContext, auth_key: str
|
|
36
36
|
):
|
|
37
37
|
self._context = context
|
|
38
38
|
self._base_url = base_url
|
|
39
|
-
self.
|
|
39
|
+
self._domain_context = domain_context
|
|
40
40
|
self._auth_key = auth_key
|
|
41
41
|
|
|
42
42
|
@abstractmethod
|
|
@@ -70,7 +70,7 @@ class DraftDatasourceConfigAction(AgentAction[DraftDatasourceConfigEvent]):
|
|
|
70
70
|
with create_session(access_token=self._auth_key) as session:
|
|
71
71
|
url = urljoin(
|
|
72
72
|
base=self._base_url,
|
|
73
|
-
url=f"/api/v1/organizations/{self.
|
|
73
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/draft-table-names/{config_id}",
|
|
74
74
|
)
|
|
75
75
|
response = session.put(
|
|
76
76
|
url=url,
|
|
@@ -87,7 +87,7 @@ class DraftDatasourceConfigAction(AgentAction[DraftDatasourceConfigEvent]):
|
|
|
87
87
|
def get_draft_config(self, config_id: UUID) -> dict[str, Any]:
|
|
88
88
|
resource_url = urljoin(
|
|
89
89
|
base=self._base_url,
|
|
90
|
-
url=f"/api/v1/organizations/{self.
|
|
90
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/draft-datasources/{config_id}",
|
|
91
91
|
)
|
|
92
92
|
with create_session(access_token=self._auth_key) as session:
|
|
93
93
|
response = session.get(resource_url)
|
|
@@ -10,9 +10,13 @@ from uuid import UUID
|
|
|
10
10
|
|
|
11
11
|
import great_expectations.expectations as gx_expectations
|
|
12
12
|
from great_expectations.core.http import create_session
|
|
13
|
-
from great_expectations.exceptions import
|
|
13
|
+
from great_expectations.exceptions import (
|
|
14
|
+
GXCloudError,
|
|
15
|
+
InvalidExpectationConfigurationError,
|
|
16
|
+
)
|
|
14
17
|
from great_expectations.expectations.metadata_types import (
|
|
15
18
|
DataQualityIssues,
|
|
19
|
+
FailureSeverity,
|
|
16
20
|
)
|
|
17
21
|
from great_expectations.expectations.window import Offset, Window
|
|
18
22
|
from great_expectations.experimental.metric_repository.batch_inspector import (
|
|
@@ -39,6 +43,7 @@ from great_expectations_cloud.agent.event_handler import register_event_action
|
|
|
39
43
|
from great_expectations_cloud.agent.exceptions import GXAgentError
|
|
40
44
|
from great_expectations_cloud.agent.models import (
|
|
41
45
|
CreatedResource,
|
|
46
|
+
DomainContext,
|
|
42
47
|
GenerateDataQualityCheckExpectationsEvent,
|
|
43
48
|
)
|
|
44
49
|
from great_expectations_cloud.agent.utils import (
|
|
@@ -48,9 +53,7 @@ from great_expectations_cloud.agent.utils import (
|
|
|
48
53
|
)
|
|
49
54
|
|
|
50
55
|
if TYPE_CHECKING:
|
|
51
|
-
from great_expectations.core.suite_parameters import
|
|
52
|
-
SuiteParameterDict,
|
|
53
|
-
)
|
|
56
|
+
from great_expectations.core.suite_parameters import SuiteParameterDict
|
|
54
57
|
from great_expectations.data_context import CloudDataContext
|
|
55
58
|
from great_expectations.datasource.fluent import DataAsset
|
|
56
59
|
|
|
@@ -81,13 +84,13 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
81
84
|
self,
|
|
82
85
|
context: CloudDataContext,
|
|
83
86
|
base_url: str,
|
|
84
|
-
|
|
87
|
+
domain_context: DomainContext,
|
|
85
88
|
auth_key: str,
|
|
86
89
|
metric_repository: MetricRepository | None = None,
|
|
87
90
|
batch_inspector: BatchInspector | None = None,
|
|
88
91
|
):
|
|
89
92
|
super().__init__(
|
|
90
|
-
context=context, base_url=base_url,
|
|
93
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
91
94
|
)
|
|
92
95
|
self._metric_repository = metric_repository or MetricRepository(
|
|
93
96
|
data_store=CloudDataStore(self._context)
|
|
@@ -202,7 +205,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
202
205
|
metric_list=[
|
|
203
206
|
MetricTypes.TABLE_COLUMNS,
|
|
204
207
|
MetricTypes.TABLE_COLUMN_TYPES,
|
|
205
|
-
MetricTypes.
|
|
208
|
+
MetricTypes.COLUMN_NON_NULL_COUNT,
|
|
206
209
|
MetricTypes.TABLE_ROW_COUNT,
|
|
207
210
|
],
|
|
208
211
|
)
|
|
@@ -221,7 +224,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
221
224
|
"""
|
|
222
225
|
url = urljoin(
|
|
223
226
|
base=self._base_url,
|
|
224
|
-
url=f"/api/v1/organizations/{self.
|
|
227
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/expectations/",
|
|
225
228
|
)
|
|
226
229
|
with create_session(access_token=self._auth_key) as session:
|
|
227
230
|
response = session.get(
|
|
@@ -317,6 +320,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
317
320
|
strict_max=strict_max,
|
|
318
321
|
min_value=min_value,
|
|
319
322
|
max_value=max_value,
|
|
323
|
+
severity=FailureSeverity.WARNING,
|
|
320
324
|
)
|
|
321
325
|
expectation_id = self._create_expectation_for_asset(
|
|
322
326
|
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
@@ -339,7 +343,8 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
339
343
|
raise RuntimeError("missing TABLE_COLUMNS metric") # noqa: TRY003
|
|
340
344
|
|
|
341
345
|
expectation = gx_expectations.ExpectTableColumnsToMatchSet(
|
|
342
|
-
column_set=table_columns_metric.value
|
|
346
|
+
column_set=table_columns_metric.value,
|
|
347
|
+
severity=FailureSeverity.WARNING,
|
|
343
348
|
)
|
|
344
349
|
expectation_id = self._create_expectation_for_asset(
|
|
345
350
|
expectation=expectation, asset_id=asset_id, created_via=created_via
|
|
@@ -369,11 +374,11 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
369
374
|
metric
|
|
370
375
|
for metric in metric_run.metrics
|
|
371
376
|
if isinstance(metric, ColumnMetric)
|
|
372
|
-
and metric.metric_name == MetricTypes.
|
|
377
|
+
and metric.metric_name == MetricTypes.COLUMN_NON_NULL_COUNT
|
|
373
378
|
]
|
|
374
379
|
|
|
375
380
|
if not column_null_values_metric or len(column_null_values_metric) == 0:
|
|
376
|
-
raise RuntimeError("missing
|
|
381
|
+
raise RuntimeError("missing COLUMN_NON_NULL_COUNT metrics") # noqa: TRY003
|
|
377
382
|
|
|
378
383
|
expectation_ids = []
|
|
379
384
|
# Single-expectation approach using ExpectColumnProportionOfNonNullValuesToBeBetween
|
|
@@ -384,7 +389,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
384
389
|
)
|
|
385
390
|
for column in columns_missing_completeness_coverage:
|
|
386
391
|
column_name = column.column
|
|
387
|
-
|
|
392
|
+
non_null_count = column.value
|
|
388
393
|
row_count = table_row_count.value
|
|
389
394
|
expectation: gx_expectations.Expectation
|
|
390
395
|
|
|
@@ -394,7 +399,6 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
394
399
|
max_param_name = f"{unique_id}_proportion_max"
|
|
395
400
|
|
|
396
401
|
# Calculate non-null proportion
|
|
397
|
-
non_null_count = row_count - null_count if row_count > 0 else 0
|
|
398
402
|
non_null_proportion = non_null_count / row_count if row_count > 0 else 0
|
|
399
403
|
|
|
400
404
|
if use_forecast:
|
|
@@ -418,16 +422,19 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
418
422
|
column=column_name,
|
|
419
423
|
min_value={"$PARAMETER": min_param_name},
|
|
420
424
|
max_value={"$PARAMETER": max_param_name},
|
|
425
|
+
severity=FailureSeverity.WARNING,
|
|
421
426
|
)
|
|
422
427
|
elif non_null_proportion == 0:
|
|
423
428
|
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
424
429
|
column=column_name,
|
|
425
430
|
max_value=0,
|
|
431
|
+
severity=FailureSeverity.WARNING,
|
|
426
432
|
)
|
|
427
433
|
elif non_null_proportion == 1:
|
|
428
434
|
expectation = gx_expectations.ExpectColumnProportionOfNonNullValuesToBeBetween(
|
|
429
435
|
column=column_name,
|
|
430
436
|
min_value=1,
|
|
437
|
+
severity=FailureSeverity.WARNING,
|
|
431
438
|
)
|
|
432
439
|
else:
|
|
433
440
|
# Use triangular interpolation to compute min/max values
|
|
@@ -459,6 +466,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
459
466
|
column=column_name,
|
|
460
467
|
min_value={"$PARAMETER": min_param_name},
|
|
461
468
|
max_value={"$PARAMETER": max_param_name},
|
|
469
|
+
severity=FailureSeverity.WARNING,
|
|
462
470
|
)
|
|
463
471
|
|
|
464
472
|
expectation_id = self._create_expectation_for_asset(
|
|
@@ -517,7 +525,7 @@ class GenerateDataQualityCheckExpectationsAction(
|
|
|
517
525
|
) -> UUID:
|
|
518
526
|
url = urljoin(
|
|
519
527
|
base=self._base_url,
|
|
520
|
-
url=f"/api/v1/organizations/{self.
|
|
528
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/expectations/{asset_id}",
|
|
521
529
|
)
|
|
522
530
|
|
|
523
531
|
expectation_payload = expectation.configuration.to_json_dict()
|
|
@@ -13,9 +13,7 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
13
|
)
|
|
14
14
|
from great_expectations_cloud.agent.actions.utils import get_asset_names
|
|
15
15
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
ListAssetNamesEvent,
|
|
18
|
-
)
|
|
16
|
+
from great_expectations_cloud.agent.models import ListAssetNamesEvent
|
|
19
17
|
|
|
20
18
|
|
|
21
19
|
class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
@@ -34,7 +32,8 @@ class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
|
34
32
|
asset_names = get_asset_names(datasource)
|
|
35
33
|
|
|
36
34
|
self._add_or_update_asset_names_list(
|
|
37
|
-
datasource_id=str(datasource.id),
|
|
35
|
+
datasource_id=str(datasource.id),
|
|
36
|
+
asset_names=asset_names,
|
|
38
37
|
)
|
|
39
38
|
|
|
40
39
|
return ActionResult(
|
|
@@ -47,7 +46,7 @@ class ListAssetNamesAction(AgentAction[ListAssetNamesEvent]):
|
|
|
47
46
|
with create_session(access_token=self._auth_key) as session:
|
|
48
47
|
url = urljoin(
|
|
49
48
|
base=self._base_url,
|
|
50
|
-
url=f"/api/v1/organizations/{self.
|
|
49
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/table-names/{datasource_id}",
|
|
51
50
|
)
|
|
52
51
|
response = session.put(
|
|
53
52
|
url=url,
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
3
5
|
from dataclasses import dataclass
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Final
|
|
5
7
|
|
|
6
8
|
from typing_extensions import override
|
|
7
9
|
|
|
@@ -21,6 +23,8 @@ if TYPE_CHECKING:
|
|
|
21
23
|
from great_expectations.data_context import CloudDataContext
|
|
22
24
|
from great_expectations.datasource.fluent.interfaces import DataAsset, Datasource
|
|
23
25
|
|
|
26
|
+
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
|
27
|
+
|
|
24
28
|
|
|
25
29
|
class RunCheckpointAction(AgentAction[RunCheckpointEvent]):
|
|
26
30
|
@override
|
|
@@ -45,12 +49,28 @@ def run_checkpoint(
|
|
|
45
49
|
id: str,
|
|
46
50
|
expectation_parameters: dict[str, Any] | None = None,
|
|
47
51
|
) -> ActionResult:
|
|
52
|
+
"""Run a checkpoint and return the result."""
|
|
53
|
+
hostname = socket.gethostname()
|
|
54
|
+
log_extra = {
|
|
55
|
+
"correlation_id": id,
|
|
56
|
+
"checkpoint_name": event.checkpoint_name,
|
|
57
|
+
"hostname": hostname,
|
|
58
|
+
}
|
|
59
|
+
|
|
48
60
|
# the checkpoint_name property on possible events is optional for backwards compatibility,
|
|
49
61
|
# but this action requires it in order to run:
|
|
50
62
|
if not event.checkpoint_name:
|
|
51
63
|
raise MissingCheckpointNameError
|
|
52
64
|
|
|
65
|
+
LOGGER.debug("Fetching checkpoint from context", extra=log_extra)
|
|
53
66
|
checkpoint = context.checkpoints.get(name=event.checkpoint_name)
|
|
67
|
+
LOGGER.debug(
|
|
68
|
+
"Checkpoint fetched successfully",
|
|
69
|
+
extra={
|
|
70
|
+
**log_extra,
|
|
71
|
+
"validation_definitions_count": len(checkpoint.validation_definitions),
|
|
72
|
+
},
|
|
73
|
+
)
|
|
54
74
|
|
|
55
75
|
# only GX-managed Checkpoints are currently validated here and they contain only one validation definition, but
|
|
56
76
|
# the Checkpoint does allow for multiple validation definitions so we'll be defensive and ensure we only test each
|
|
@@ -66,15 +86,48 @@ def run_checkpoint(
|
|
|
66
86
|
)
|
|
67
87
|
data_sources_assets_by_data_source_name[ds_name].assets_by_name[vd.asset.name] = vd.asset
|
|
68
88
|
|
|
69
|
-
|
|
89
|
+
# Test connections to all datasources and assets
|
|
90
|
+
for ds_name, data_sources_assets in data_sources_assets_by_data_source_name.items():
|
|
70
91
|
data_source = data_sources_assets.data_source
|
|
92
|
+
LOGGER.debug(
|
|
93
|
+
"Testing datasource connection",
|
|
94
|
+
extra={**log_extra, "datasource_name": ds_name},
|
|
95
|
+
)
|
|
71
96
|
data_source.test_connection(test_assets=False) # raises `TestConnectionError` on failure
|
|
72
|
-
|
|
97
|
+
LOGGER.debug(
|
|
98
|
+
"Datasource connection successful",
|
|
99
|
+
extra={**log_extra, "datasource_name": ds_name},
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
for asset_name, data_asset in data_sources_assets.assets_by_name.items():
|
|
103
|
+
LOGGER.debug(
|
|
104
|
+
"Testing data asset connection",
|
|
105
|
+
extra={**log_extra, "datasource_name": ds_name, "asset_name": asset_name},
|
|
106
|
+
)
|
|
73
107
|
data_asset.test_connection() # raises `TestConnectionError` on failure
|
|
108
|
+
LOGGER.debug(
|
|
109
|
+
"Data asset connection successful",
|
|
110
|
+
extra={**log_extra, "datasource_name": ds_name, "asset_name": asset_name},
|
|
111
|
+
)
|
|
74
112
|
|
|
113
|
+
LOGGER.debug(
|
|
114
|
+
"Running checkpoint",
|
|
115
|
+
extra={
|
|
116
|
+
**log_extra,
|
|
117
|
+
"datasources_count": len(data_sources_assets_by_data_source_name),
|
|
118
|
+
"has_expectation_parameters": expectation_parameters is not None,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
75
121
|
checkpoint_run_result = checkpoint.run(
|
|
76
122
|
batch_parameters=event.splitter_options, expectation_parameters=expectation_parameters
|
|
77
123
|
)
|
|
124
|
+
LOGGER.debug(
|
|
125
|
+
"Checkpoint run completed",
|
|
126
|
+
extra={
|
|
127
|
+
**log_extra,
|
|
128
|
+
"run_results_count": len(checkpoint_run_result.run_results),
|
|
129
|
+
},
|
|
130
|
+
)
|
|
78
131
|
|
|
79
132
|
validation_results = checkpoint_run_result.run_results
|
|
80
133
|
created_resources = []
|
|
@@ -88,6 +141,14 @@ def run_checkpoint(
|
|
|
88
141
|
)
|
|
89
142
|
created_resources.append(created_resource)
|
|
90
143
|
|
|
144
|
+
LOGGER.debug(
|
|
145
|
+
"Checkpoint action completed successfully",
|
|
146
|
+
extra={
|
|
147
|
+
**log_extra,
|
|
148
|
+
"created_resources_count": len(created_resources),
|
|
149
|
+
},
|
|
150
|
+
)
|
|
151
|
+
|
|
91
152
|
return ActionResult(
|
|
92
153
|
id=id,
|
|
93
154
|
type=event.type,
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
|
-
from uuid import UUID
|
|
5
4
|
|
|
6
5
|
from great_expectations.experimental.metric_repository.batch_inspector import (
|
|
7
6
|
BatchInspector,
|
|
@@ -21,6 +20,7 @@ from great_expectations_cloud.agent.actions import ActionResult, AgentAction
|
|
|
21
20
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
22
21
|
from great_expectations_cloud.agent.models import (
|
|
23
22
|
CreatedResource,
|
|
23
|
+
DomainContext,
|
|
24
24
|
RunMetricsListEvent,
|
|
25
25
|
)
|
|
26
26
|
|
|
@@ -34,13 +34,13 @@ class MetricListAction(AgentAction[RunMetricsListEvent]):
|
|
|
34
34
|
self,
|
|
35
35
|
context: CloudDataContext,
|
|
36
36
|
base_url: str,
|
|
37
|
-
|
|
37
|
+
domain_context: DomainContext,
|
|
38
38
|
auth_key: str,
|
|
39
39
|
metric_repository: MetricRepository | None = None,
|
|
40
40
|
batch_inspector: BatchInspector | None = None,
|
|
41
41
|
):
|
|
42
42
|
super().__init__(
|
|
43
|
-
context=context, base_url=base_url,
|
|
43
|
+
context=context, base_url=base_url, domain_context=domain_context, auth_key=auth_key
|
|
44
44
|
)
|
|
45
45
|
self._metric_repository = metric_repository or MetricRepository(
|
|
46
46
|
data_store=CloudDataStore(self._context)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import logging
|
|
4
|
+
import socket
|
|
5
|
+
from typing import TYPE_CHECKING, Final
|
|
4
6
|
from urllib.parse import urljoin
|
|
5
7
|
|
|
6
8
|
from great_expectations.core.http import create_session
|
|
@@ -13,20 +15,20 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
15
|
)
|
|
14
16
|
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
17
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
RunScheduledCheckpointEvent,
|
|
18
|
-
)
|
|
18
|
+
from great_expectations_cloud.agent.models import RunScheduledCheckpointEvent
|
|
19
19
|
|
|
20
20
|
if TYPE_CHECKING:
|
|
21
21
|
from great_expectations.data_context import CloudDataContext
|
|
22
22
|
|
|
23
|
+
LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
25
27
|
@override
|
|
26
28
|
def run(self, event: RunScheduledCheckpointEvent, id: str) -> ActionResult:
|
|
27
29
|
expectation_parameters_url = urljoin(
|
|
28
30
|
base=self._base_url,
|
|
29
|
-
url=f"/api/v1/organizations/{self.
|
|
31
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
30
32
|
)
|
|
31
33
|
return run_scheduled_checkpoint(
|
|
32
34
|
context=self._context,
|
|
@@ -40,16 +42,32 @@ class RunScheduledCheckpointAction(AgentAction[RunScheduledCheckpointEvent]):
|
|
|
40
42
|
def run_scheduled_checkpoint(
|
|
41
43
|
context: CloudDataContext, event: RunScheduledCheckpointEvent, id: str, auth_key: str, url: str
|
|
42
44
|
) -> ActionResult:
|
|
45
|
+
"""Run a scheduled checkpoint, fetching expectation parameters from GX Cloud first."""
|
|
46
|
+
hostname = socket.gethostname()
|
|
47
|
+
log_extra = {
|
|
48
|
+
"correlation_id": id,
|
|
49
|
+
"checkpoint_id": str(event.checkpoint_id),
|
|
50
|
+
"schedule_id": str(event.schedule_id),
|
|
51
|
+
"hostname": hostname,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
LOGGER.debug("Fetching expectation parameters from GX Cloud", extra=log_extra)
|
|
43
55
|
with create_session(access_token=auth_key) as session:
|
|
44
56
|
response = session.get(url=url)
|
|
45
57
|
|
|
46
58
|
if not response.ok:
|
|
59
|
+
LOGGER.error(
|
|
60
|
+
"Failed to fetch expectation parameters",
|
|
61
|
+
extra={**log_extra, "response_status": response.status_code},
|
|
62
|
+
)
|
|
47
63
|
raise GXCloudError(
|
|
48
64
|
message=f"RunScheduledCheckpointAction encountered an error while connecting to GX Cloud. "
|
|
49
65
|
f"Unable to retrieve expectation_parameters for Checkpoint with ID={event.checkpoint_id}.",
|
|
50
66
|
response=response,
|
|
51
67
|
)
|
|
52
68
|
data = response.json()
|
|
69
|
+
LOGGER.debug("Expectation parameters fetched successfully", extra=log_extra)
|
|
70
|
+
|
|
53
71
|
try:
|
|
54
72
|
expectation_parameters = (
|
|
55
73
|
data["data"]["expectation_parameters"]
|
|
@@ -57,11 +75,16 @@ def run_scheduled_checkpoint(
|
|
|
57
75
|
else None
|
|
58
76
|
)
|
|
59
77
|
except KeyError as e:
|
|
78
|
+
LOGGER.exception("Malformed response from GX Cloud", extra=log_extra)
|
|
60
79
|
raise GXCloudError(
|
|
61
80
|
message="Malformed response received from GX Cloud",
|
|
62
81
|
response=response,
|
|
63
82
|
) from e
|
|
64
83
|
|
|
84
|
+
LOGGER.debug(
|
|
85
|
+
"Proceeding to run checkpoint",
|
|
86
|
+
extra={**log_extra, "has_expectation_parameters": expectation_parameters is not None},
|
|
87
|
+
)
|
|
65
88
|
return run_checkpoint(context, event, id, expectation_parameters=expectation_parameters)
|
|
66
89
|
|
|
67
90
|
|
|
@@ -13,9 +13,7 @@ from great_expectations_cloud.agent.actions.agent_action import (
|
|
|
13
13
|
)
|
|
14
14
|
from great_expectations_cloud.agent.actions.run_checkpoint import run_checkpoint
|
|
15
15
|
from great_expectations_cloud.agent.event_handler import register_event_action
|
|
16
|
-
from great_expectations_cloud.agent.models import
|
|
17
|
-
RunWindowCheckpointEvent,
|
|
18
|
-
)
|
|
16
|
+
from great_expectations_cloud.agent.models import RunWindowCheckpointEvent
|
|
19
17
|
|
|
20
18
|
if TYPE_CHECKING:
|
|
21
19
|
from great_expectations.data_context import CloudDataContext
|
|
@@ -26,7 +24,7 @@ class RunWindowCheckpointAction(AgentAction[RunWindowCheckpointEvent]):
|
|
|
26
24
|
def run(self, event: RunWindowCheckpointEvent, id: str) -> ActionResult:
|
|
27
25
|
expectation_parameters_url = urljoin(
|
|
28
26
|
base=self._base_url,
|
|
29
|
-
url=f"/api/v1/organizations/{self.
|
|
27
|
+
url=f"/api/v1/organizations/{self._domain_context.organization_id}/workspaces/{self._domain_context.workspace_id}/checkpoints/{event.checkpoint_id}/expectation-parameters",
|
|
30
28
|
)
|
|
31
29
|
return run_window_checkpoint(
|
|
32
30
|
self._context,
|
|
@@ -7,10 +7,13 @@ from sqlalchemy import inspect
|
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from sqlalchemy.engine import Inspector
|
|
10
|
+
from sqlalchemy.sql.compiler import IdentifierPreparer
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
13
14
|
inspector: Inspector = inspect(datasource.get_engine())
|
|
15
|
+
identifier_preparer: IdentifierPreparer = inspector.dialect.identifier_preparer
|
|
16
|
+
|
|
14
17
|
if isinstance(datasource, SnowflakeDatasource) and datasource.schema_:
|
|
15
18
|
# Snowflake-SQLAlchemy uses the default_schema if no schema is provided to get_table_names
|
|
16
19
|
# Or if the role does not have access to the schema (it silently fails and defaults to using default_schema)
|
|
@@ -19,8 +22,14 @@ def get_asset_names(datasource: SQLDatasource) -> list[str]:
|
|
|
19
22
|
# Also converting to list to ensure JSON serializable
|
|
20
23
|
tables = list(inspector.get_table_names(schema=datasource.schema_))
|
|
21
24
|
views = list(inspector.get_view_names(schema=datasource.schema_))
|
|
22
|
-
|
|
25
|
+
asset_names = tables + views
|
|
26
|
+
else:
|
|
27
|
+
tables = list(inspector.get_table_names())
|
|
28
|
+
views = list(inspector.get_view_names())
|
|
29
|
+
asset_names = tables + views
|
|
23
30
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
31
|
+
# the identifier preparer adds quotes when they are necessary
|
|
32
|
+
quoted_asset_names: list[str] = [
|
|
33
|
+
identifier_preparer.quote(asset_name) for asset_name in asset_names
|
|
34
|
+
]
|
|
35
|
+
return quoted_asset_names
|