datacheck-cli 2.1.1__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/PKG-INFO +16 -8
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/README_PYPI.md +14 -7
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/airflow/operators.py +13 -48
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/cli/schema.py +7 -16
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/cli/validate.py +26 -263
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/loader.py +1 -57
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/schema.py +2 -48
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/source.py +3 -13
- datacheck_cli-2.1.2/datacheck/config/templates/basic.yaml +30 -0
- datacheck_cli-2.1.2/datacheck/config/templates/ecommerce.yaml +31 -0
- datacheck_cli-2.1.2/datacheck/config/templates/finance.yaml +37 -0
- datacheck_cli-2.1.2/datacheck/config/templates/healthcare.yaml +36 -0
- datacheck_cli-2.1.2/datacheck/config/templates/iot.yaml +30 -0
- datacheck_cli-2.1.2/datacheck/config/templates/rules-reference.yaml +32 -0
- datacheck_cli-2.1.2/datacheck/config/templates/saas.yaml +33 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/templates/sources.yaml +14 -21
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/__init__.py +11 -1
- datacheck_cli-2.1.2/datacheck/connectors/duckdb.py +162 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/factory.py +7 -33
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/engine.py +6 -190
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/sql_pushdown/builder.py +31 -1
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/sql_pushdown/dialects.py +218 -10
- datacheck_cli-2.1.2/datacheck/utils/__init__.py +1 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/pyproject.toml +11 -1
- datacheck_cli-2.1.1/datacheck/config/templates/basic.yaml +0 -116
- datacheck_cli-2.1.1/datacheck/config/templates/ecommerce.yaml +0 -189
- datacheck_cli-2.1.1/datacheck/config/templates/finance.yaml +0 -159
- datacheck_cli-2.1.1/datacheck/config/templates/healthcare.yaml +0 -183
- datacheck_cli-2.1.1/datacheck/config/templates/iot.yaml +0 -195
- datacheck_cli-2.1.1/datacheck/config/templates/rules-reference.yaml +0 -168
- datacheck_cli-2.1.1/datacheck/config/templates/saas.yaml +0 -186
- datacheck_cli-2.1.1/datacheck/parallel/__init__.py +0 -19
- datacheck_cli-2.1.1/datacheck/parallel/executor.py +0 -309
- datacheck_cli-2.1.1/datacheck/parallel/progress.py +0 -396
- datacheck_cli-2.1.1/datacheck/utils/__init__.py +0 -8
- datacheck_cli-2.1.1/datacheck/utils/connection_parser.py +0 -232
- datacheck_cli-2.1.1/datacheck/validation/__init__.py +0 -76
- datacheck_cli-2.1.1/datacheck/validation/config.py +0 -520
- datacheck_cli-2.1.1/datacheck/validation/rules.py +0 -630
- datacheck_cli-2.1.1/datacheck/validation/validator.py +0 -275
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/LICENSE +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/__main__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/airflow/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/cli/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/cli/config.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/parser.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/sample_data.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/templates/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/config/validator.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/base.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/bigquery.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/cloud_base.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/mysql.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/postgresql.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/redshift.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/s3.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/connectors/snowflake.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/exceptions.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/loader.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/logging/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/logging/config.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/logging/filters.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/logging/formatters.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/logging/utils.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/notifications/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/notifications/slack.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/output.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/csv_exporter.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/distribution_analyzer.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/json_reporter.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/sarif_exporter.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/suggestion_engine.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/reporting/terminal_reporter.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/results.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/base.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/composite_rules.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/factory.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/null_rules.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/numeric_rules.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/string_rules.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/rules/temporal_rules.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/schema/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/schema/baseline.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/schema/comparator.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/schema/detector.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/schema/models.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/security/__init__.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/security/validators.py +0 -0
- {datacheck_cli-2.1.1 → datacheck_cli-2.1.2}/datacheck/sql_pushdown/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datacheck-cli
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: A linter for data pipelines. Enforce deterministic validation rules in CI/CD, Airflow, and beyond.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -39,6 +39,7 @@ Provides-Extra: validation
|
|
|
39
39
|
Provides-Extra: warehouses
|
|
40
40
|
Requires-Dist: boto3 (>=1.34.0,<2.0.0) ; extra == "s3" or extra == "cloud" or extra == "redshift" or extra == "warehouses" or extra == "all"
|
|
41
41
|
Requires-Dist: click (>=8.1.0,<9.0.0)
|
|
42
|
+
Requires-Dist: duckdb (>=1.0.0,<2.0.0)
|
|
42
43
|
Requires-Dist: google-auth (>=2.0.0,<3.0.0) ; extra == "bigquery" or extra == "warehouses" or extra == "all"
|
|
43
44
|
Requires-Dist: google-cloud-bigquery (>=3.0.0,<4.0.0) ; extra == "bigquery" or extra == "warehouses" or extra == "all"
|
|
44
45
|
Requires-Dist: jsonschema (>=4.17.0,<5.0.0) ; extra == "validation" or extra == "all"
|
|
@@ -111,12 +112,20 @@ datacheck config init --with-sample-data
|
|
|
111
112
|
datacheck config init --template ecommerce --with-sample-data
|
|
112
113
|
```
|
|
113
114
|
|
|
114
|
-
**Option 2 - Write manually.** Create a `.datacheck.yaml`
|
|
115
|
+
**Option 2 - Write manually.** Create a `sources.yaml` and `.datacheck.yaml` with your data source and validation rules:
|
|
115
116
|
|
|
116
117
|
```yaml
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
# sources.yaml
|
|
119
|
+
sources:
|
|
120
|
+
orders:
|
|
121
|
+
type: duckdb
|
|
122
|
+
path: ./data/orders.csv
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
```yaml
|
|
126
|
+
# .datacheck.yaml
|
|
127
|
+
sources_file: sources.yaml
|
|
128
|
+
source: orders
|
|
120
129
|
|
|
121
130
|
checks:
|
|
122
131
|
- name: id_check
|
|
@@ -137,8 +146,7 @@ Run validation:
|
|
|
137
146
|
|
|
138
147
|
```bash
|
|
139
148
|
datacheck validate # auto-discover config
|
|
140
|
-
datacheck validate
|
|
141
|
-
datacheck validate --config checks.yaml
|
|
149
|
+
datacheck validate --config checks.yaml # explicit config path
|
|
142
150
|
echo $? # 1 if any error-severity rule fails
|
|
143
151
|
```
|
|
144
152
|
|
|
@@ -252,7 +260,7 @@ datacheck schema compare --fail-on-breaking
|
|
|
252
260
|
from datacheck import ValidationEngine
|
|
253
261
|
|
|
254
262
|
engine = ValidationEngine(config_path=".datacheck.yaml")
|
|
255
|
-
summary = engine.
|
|
263
|
+
summary = engine.validate_sources()
|
|
256
264
|
|
|
257
265
|
print(f"Passed: {summary.passed_rules}/{summary.total_rules}")
|
|
258
266
|
|
|
@@ -52,12 +52,20 @@ datacheck config init --with-sample-data
|
|
|
52
52
|
datacheck config init --template ecommerce --with-sample-data
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
**Option 2 - Write manually.** Create a `.datacheck.yaml`
|
|
55
|
+
**Option 2 - Write manually.** Create a `sources.yaml` and `.datacheck.yaml` with your data source and validation rules:
|
|
56
56
|
|
|
57
57
|
```yaml
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
# sources.yaml
|
|
59
|
+
sources:
|
|
60
|
+
orders:
|
|
61
|
+
type: duckdb
|
|
62
|
+
path: ./data/orders.csv
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
```yaml
|
|
66
|
+
# .datacheck.yaml
|
|
67
|
+
sources_file: sources.yaml
|
|
68
|
+
source: orders
|
|
61
69
|
|
|
62
70
|
checks:
|
|
63
71
|
- name: id_check
|
|
@@ -78,8 +86,7 @@ Run validation:
|
|
|
78
86
|
|
|
79
87
|
```bash
|
|
80
88
|
datacheck validate # auto-discover config
|
|
81
|
-
datacheck validate
|
|
82
|
-
datacheck validate --config checks.yaml
|
|
89
|
+
datacheck validate --config checks.yaml # explicit config path
|
|
83
90
|
echo $? # 1 if any error-severity rule fails
|
|
84
91
|
```
|
|
85
92
|
|
|
@@ -193,7 +200,7 @@ datacheck schema compare --fail-on-breaking
|
|
|
193
200
|
from datacheck import ValidationEngine
|
|
194
201
|
|
|
195
202
|
engine = ValidationEngine(config_path=".datacheck.yaml")
|
|
196
|
-
summary = engine.
|
|
203
|
+
summary = engine.validate_sources()
|
|
197
204
|
|
|
198
205
|
print(f"Passed: {summary.passed_rules}/{summary.total_rules}")
|
|
199
206
|
|
|
@@ -13,7 +13,6 @@ from collections.abc import Sequence
|
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
import pandas as pd
|
|
16
|
-
from pathlib import Path
|
|
17
16
|
|
|
18
17
|
try:
|
|
19
18
|
from airflow.models import BaseOperator
|
|
@@ -63,31 +62,21 @@ class DataCheckOperator(BaseOperator):
|
|
|
63
62
|
"""Operator for running DataCheck validation in Airflow DAGs.
|
|
64
63
|
|
|
65
64
|
Uses the full ValidationEngine to run config-based rules against
|
|
66
|
-
data
|
|
67
|
-
|
|
65
|
+
named data sources. All data sources are defined in a sources.yaml
|
|
66
|
+
file and referenced by name.
|
|
68
67
|
|
|
69
68
|
Data source resolution (in order):
|
|
70
|
-
1. ``
|
|
71
|
-
2.
|
|
72
|
-
3. Config default — uses ``source`` or ``data_source`` from config
|
|
69
|
+
1. ``source_name`` — validate a named source from sources.yaml
|
|
70
|
+
2. Config default — uses ``source`` defined in the config
|
|
73
71
|
|
|
74
72
|
Examples:
|
|
75
|
-
Validate a
|
|
76
|
-
|
|
77
|
-
DataCheckOperator(
|
|
78
|
-
task_id="validate_orders",
|
|
79
|
-
config_path="/config/checks.yaml",
|
|
80
|
-
file_path="/data/orders_{{ ds }}.parquet",
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
Validate a database table::
|
|
73
|
+
Validate a named source::
|
|
84
74
|
|
|
85
75
|
DataCheckOperator(
|
|
86
76
|
task_id="validate_orders",
|
|
87
77
|
config_path="/config/checks.yaml",
|
|
88
78
|
sources_file="/config/sources.yaml",
|
|
89
|
-
source_name="
|
|
90
|
-
table="orders",
|
|
79
|
+
source_name="orders",
|
|
91
80
|
where="created_at >= '{{ ds }}'",
|
|
92
81
|
)
|
|
93
82
|
|
|
@@ -96,21 +85,19 @@ class DataCheckOperator(BaseOperator):
|
|
|
96
85
|
DataCheckOperator(
|
|
97
86
|
task_id="validate_orders",
|
|
98
87
|
config_path="/config/checks.yaml",
|
|
99
|
-
|
|
88
|
+
sources_file="/config/sources.yaml",
|
|
89
|
+
source_name="orders",
|
|
100
90
|
min_pass_rate=95.0,
|
|
101
91
|
fail_on_error=True,
|
|
102
92
|
)
|
|
103
93
|
|
|
104
94
|
Attributes:
|
|
105
95
|
config_path: Path to the DataCheck validation config YAML
|
|
106
|
-
file_path: Path to a data file (CSV, Parquet)
|
|
107
96
|
sources_file: Path to named sources YAML file
|
|
108
97
|
source_name: Named source to validate
|
|
109
98
|
table: Database table name override
|
|
110
99
|
where: SQL WHERE clause for filtering
|
|
111
100
|
query: Custom SQL query (alternative to table)
|
|
112
|
-
parallel: Enable multi-core validation
|
|
113
|
-
workers: Number of worker processes
|
|
114
101
|
min_pass_rate: Minimum rule pass rate to succeed (0-100)
|
|
115
102
|
fail_on_error: Whether to fail the Airflow task on validation failure
|
|
116
103
|
push_results: Whether to push results to XCom
|
|
@@ -118,7 +105,6 @@ class DataCheckOperator(BaseOperator):
|
|
|
118
105
|
|
|
119
106
|
template_fields: Sequence[str] = (
|
|
120
107
|
"config_path",
|
|
121
|
-
"file_path",
|
|
122
108
|
"sources_file",
|
|
123
109
|
"source_name",
|
|
124
110
|
"table",
|
|
@@ -133,14 +119,11 @@ class DataCheckOperator(BaseOperator):
|
|
|
133
119
|
def __init__(
|
|
134
120
|
self,
|
|
135
121
|
config_path: str,
|
|
136
|
-
file_path: str | None = None,
|
|
137
122
|
sources_file: str | None = None,
|
|
138
123
|
source_name: str | None = None,
|
|
139
124
|
table: str | None = None,
|
|
140
125
|
where: str | None = None,
|
|
141
126
|
query: str | None = None,
|
|
142
|
-
parallel: bool = False,
|
|
143
|
-
workers: int | None = None,
|
|
144
127
|
min_pass_rate: float = 0.0,
|
|
145
128
|
fail_on_error: bool = True,
|
|
146
129
|
push_results: bool = True,
|
|
@@ -150,14 +133,11 @@ class DataCheckOperator(BaseOperator):
|
|
|
150
133
|
|
|
151
134
|
Args:
|
|
152
135
|
config_path: Path to DataCheck validation config YAML (required)
|
|
153
|
-
file_path: Path to data file (CSV, Parquet)
|
|
154
136
|
sources_file: Path to sources YAML file (overrides config)
|
|
155
137
|
source_name: Named source from sources.yaml
|
|
156
138
|
table: Database table name (for database sources)
|
|
157
139
|
where: WHERE clause for filtering (for database sources)
|
|
158
140
|
query: Custom SQL query (alternative to table)
|
|
159
|
-
parallel: Enable parallel execution
|
|
160
|
-
workers: Number of worker processes (default: CPU count)
|
|
161
141
|
min_pass_rate: Minimum pass rate percentage (0-100, 0 = disabled)
|
|
162
142
|
fail_on_error: Whether to raise AirflowException on failure
|
|
163
143
|
push_results: Whether to push results to XCom
|
|
@@ -165,14 +145,11 @@ class DataCheckOperator(BaseOperator):
|
|
|
165
145
|
"""
|
|
166
146
|
super().__init__(**kwargs)
|
|
167
147
|
self.config_path = config_path
|
|
168
|
-
self.file_path = file_path
|
|
169
148
|
self.sources_file = sources_file
|
|
170
149
|
self.source_name = source_name
|
|
171
150
|
self.table = table
|
|
172
151
|
self.where = where
|
|
173
152
|
self.query = query
|
|
174
|
-
self.parallel = parallel
|
|
175
|
-
self.workers = workers
|
|
176
153
|
self.min_pass_rate = min_pass_rate
|
|
177
154
|
self.fail_on_error = fail_on_error
|
|
178
155
|
self.push_results = push_results
|
|
@@ -193,7 +170,7 @@ class DataCheckOperator(BaseOperator):
|
|
|
193
170
|
|
|
194
171
|
self.log.info(
|
|
195
172
|
f"Running DataCheck validation: config={self.config_path}, "
|
|
196
|
-
f"
|
|
173
|
+
f"source={self.source_name}"
|
|
197
174
|
)
|
|
198
175
|
|
|
199
176
|
# Initialize engine
|
|
@@ -201,34 +178,23 @@ class DataCheckOperator(BaseOperator):
|
|
|
201
178
|
engine = ValidationEngine(
|
|
202
179
|
config_path=self.config_path,
|
|
203
180
|
sources_file=self.sources_file,
|
|
204
|
-
parallel=self.parallel,
|
|
205
|
-
workers=self.workers,
|
|
206
181
|
)
|
|
207
182
|
except Exception as e:
|
|
208
183
|
raise AirflowException(f"Failed to initialize ValidationEngine: {e}")
|
|
209
184
|
|
|
210
|
-
# Run validation
|
|
185
|
+
# Run validation
|
|
211
186
|
try:
|
|
212
|
-
if self.
|
|
213
|
-
# File-based validation
|
|
214
|
-
summary = engine.validate_file(self.file_path)
|
|
215
|
-
elif self.source_name or engine.config.source:
|
|
216
|
-
# Named source validation
|
|
187
|
+
if self.source_name or engine.config.source:
|
|
217
188
|
summary = engine.validate_sources(
|
|
218
189
|
source_name=self.source_name,
|
|
219
190
|
table=self.table,
|
|
220
191
|
where=self.where,
|
|
221
192
|
query=self.query,
|
|
222
193
|
)
|
|
223
|
-
elif engine.config.data_source is not None:
|
|
224
|
-
# Inline data_source from config
|
|
225
|
-
config_dir = Path(self.config_path).parent
|
|
226
|
-
source_path = config_dir / engine.config.data_source.path
|
|
227
|
-
summary = engine.validate_file(str(source_path))
|
|
228
194
|
else:
|
|
229
195
|
raise AirflowException(
|
|
230
|
-
"No data source specified. Provide
|
|
231
|
-
"
|
|
196
|
+
"No data source specified. Provide source_name "
|
|
197
|
+
"or a config with a named source."
|
|
232
198
|
)
|
|
233
199
|
except AirflowException:
|
|
234
200
|
raise
|
|
@@ -249,7 +215,6 @@ class DataCheckOperator(BaseOperator):
|
|
|
249
215
|
# Build results
|
|
250
216
|
results = {
|
|
251
217
|
"config_path": self.config_path,
|
|
252
|
-
"file_path": self.file_path,
|
|
253
218
|
"source": self.source_name,
|
|
254
219
|
"table": self.table,
|
|
255
220
|
"passed": summary.all_passed,
|
|
@@ -84,16 +84,11 @@ def _resolve_data_source(
|
|
|
84
84
|
df = load_source_data(source_config, table=table, query=query)
|
|
85
85
|
resolved_source_name = source
|
|
86
86
|
|
|
87
|
-
# Option 2:
|
|
87
|
+
# Option 2: Default source from config
|
|
88
88
|
elif data_source is None and config:
|
|
89
89
|
config_data = ConfigLoader.load(config)
|
|
90
90
|
config_dir = Path(config).parent
|
|
91
|
-
if config_data.
|
|
92
|
-
source_path = config_dir / config_data.data_source.path
|
|
93
|
-
df = LoaderFactory.load(str(source_path), table=table, query=query)
|
|
94
|
-
resolved_source_name = str(source_path)
|
|
95
|
-
elif config_data.sources_file and config_data.source:
|
|
96
|
-
# Use default source from config
|
|
91
|
+
if config_data.sources_file and config_data.source:
|
|
97
92
|
sources_path = config_dir / config_data.sources_file
|
|
98
93
|
sources = load_sources(sources_path)
|
|
99
94
|
if config_data.source not in sources:
|
|
@@ -107,7 +102,8 @@ def _resolve_data_source(
|
|
|
107
102
|
resolved_source_name = config_data.source
|
|
108
103
|
else:
|
|
109
104
|
console.print(
|
|
110
|
-
"[red]Error:[/red] Config file has no
|
|
105
|
+
"[red]Error:[/red] Config file has no sources_file/source defined. "
|
|
106
|
+
"Use --source with --sources-file.",
|
|
111
107
|
style="red",
|
|
112
108
|
)
|
|
113
109
|
raise typer.Exit(code=2)
|
|
@@ -118,11 +114,7 @@ def _resolve_data_source(
|
|
|
118
114
|
if found_config:
|
|
119
115
|
config_data = ConfigLoader.load(found_config)
|
|
120
116
|
config_dir = found_config.parent
|
|
121
|
-
if config_data.
|
|
122
|
-
source_path = config_dir / config_data.data_source.path
|
|
123
|
-
df = LoaderFactory.load(str(source_path), table=table, query=query)
|
|
124
|
-
resolved_source_name = str(source_path)
|
|
125
|
-
elif config_data.sources_file and config_data.source:
|
|
117
|
+
if config_data.sources_file and config_data.source:
|
|
126
118
|
sources_path = config_dir / config_data.sources_file
|
|
127
119
|
sources = load_sources(sources_path)
|
|
128
120
|
if config_data.source in sources:
|
|
@@ -138,15 +130,14 @@ def _resolve_data_source(
|
|
|
138
130
|
else:
|
|
139
131
|
console.print(
|
|
140
132
|
"[red]Error:[/red] No data source specified. "
|
|
141
|
-
"Provide a file path as argument, use --
|
|
142
|
-
"or use --source with --sources-file.",
|
|
133
|
+
"Provide a file path as argument, or use --source with --sources-file.",
|
|
143
134
|
style="red",
|
|
144
135
|
)
|
|
145
136
|
raise typer.Exit(code=2)
|
|
146
137
|
else:
|
|
147
138
|
console.print(
|
|
148
139
|
"[red]Error:[/red] No data source specified. "
|
|
149
|
-
"Provide a file path as argument, use --
|
|
140
|
+
"Provide a file path as argument, or use --source with --sources-file.",
|
|
150
141
|
style="red",
|
|
151
142
|
)
|
|
152
143
|
raise typer.Exit(code=2)
|