acryl-datahub 1.0.0.3rc12__py3-none-any.whl → 1.0.0.4rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/METADATA +2529 -2527
- {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/RECORD +37 -34
- {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/emitter/request_helper.py +10 -5
- datahub/emitter/rest_emitter.py +183 -106
- datahub/ingestion/extractor/schema_util.py +17 -1
- datahub/ingestion/graph/client.py +17 -4
- datahub/ingestion/graph/links.py +53 -0
- datahub/ingestion/sink/datahub_rest.py +11 -10
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/ge_data_profiler.py +25 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -12
- datahub/ingestion/source/snowflake/snowflake_connection.py +5 -17
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
- datahub/ingestion/source/sql/stored_procedures/lineage.py +1 -0
- datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
- datahub/ingestion/source/tableau/tableau.py +4 -2
- datahub/ingestion/source/unity/config.py +2 -1
- datahub/metadata/_internal_schema_classes.py +13 -0
- datahub/metadata/schema.avsc +17 -0
- datahub/metadata/schemas/Operation.avsc +17 -0
- datahub/sdk/main_client.py +15 -0
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/sql_parsing_aggregator.py +3 -2
- datahub/utilities/server_config_util.py +14 -75
- {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/top_level.txt +0 -0
|
@@ -254,6 +254,23 @@
|
|
|
254
254
|
"type": "long",
|
|
255
255
|
"name": "lastUpdatedTimestamp",
|
|
256
256
|
"doc": "The time at which the operation occurred. Would be better named 'operationTime'"
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
"TimeseriesFieldCollection": {
|
|
260
|
+
"key": "query"
|
|
261
|
+
},
|
|
262
|
+
"type": [
|
|
263
|
+
"null",
|
|
264
|
+
{
|
|
265
|
+
"type": "array",
|
|
266
|
+
"items": "string"
|
|
267
|
+
}
|
|
268
|
+
],
|
|
269
|
+
"name": "queries",
|
|
270
|
+
"default": null,
|
|
271
|
+
"doc": "Which queries were used in this operation.",
|
|
272
|
+
"Urn": "Urn",
|
|
273
|
+
"urn_is_array": true
|
|
257
274
|
}
|
|
258
275
|
],
|
|
259
276
|
"doc": "Operational info for an entity."
|
datahub/sdk/main_client.py
CHANGED
|
@@ -10,6 +10,13 @@ from datahub.sdk.lineage_client import LineageClient
|
|
|
10
10
|
from datahub.sdk.resolver_client import ResolverClient
|
|
11
11
|
from datahub.sdk.search_client import SearchClient
|
|
12
12
|
|
|
13
|
+
try:
|
|
14
|
+
from acryl_datahub_cloud._sdk_extras import ( # type: ignore[import-not-found]
|
|
15
|
+
AssertionClient,
|
|
16
|
+
)
|
|
17
|
+
except ImportError:
|
|
18
|
+
AssertionClient = None
|
|
19
|
+
|
|
13
20
|
|
|
14
21
|
class DataHubClient:
|
|
15
22
|
"""Main client for interacting with DataHub.
|
|
@@ -103,3 +110,11 @@ class DataHubClient:
|
|
|
103
110
|
@property
|
|
104
111
|
def lineage(self) -> LineageClient:
|
|
105
112
|
return LineageClient(self)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def assertion(self) -> AssertionClient: # type: ignore[return-value] # Type is not available if assertion_client is not installed
|
|
116
|
+
if AssertionClient is None:
|
|
117
|
+
raise SdkUsageError(
|
|
118
|
+
"AssertionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
|
|
119
|
+
)
|
|
120
|
+
return AssertionClient(self)
|
|
@@ -163,8 +163,7 @@ def _patch_lineage() -> None:
|
|
|
163
163
|
- source_columns = set(find_all_in_scope(select, exp.Column))
|
|
164
164
|
+ source_columns = list(find_all_in_scope(select, exp.Column))
|
|
165
165
|
|
|
166
|
-
|
|
167
|
-
+ # If the source is a UDTF find columns used in the UDTF to generate the table
|
|
166
|
+
# If the source is a UDTF find columns used in the UDTF to generate the table
|
|
168
167
|
+ source = scope.expression
|
|
169
168
|
if isinstance(source, exp.UDTF):
|
|
170
169
|
- source_columns |= set(source.find_all(exp.Column))
|
|
@@ -1753,8 +1753,9 @@ class SqlParsingAggregator(Closeable):
|
|
|
1753
1753
|
operationType=operation_type,
|
|
1754
1754
|
lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp),
|
|
1755
1755
|
actor=query.actor.urn() if query.actor else None,
|
|
1756
|
-
|
|
1757
|
-
|
|
1756
|
+
sourceType=models.OperationSourceTypeClass.DATA_PLATFORM,
|
|
1757
|
+
queries=(
|
|
1758
|
+
[self._query_urn(query_id)]
|
|
1758
1759
|
if self.can_generate_query(query_id)
|
|
1759
1760
|
else None
|
|
1760
1761
|
),
|
|
@@ -10,11 +10,6 @@ from typing import (
|
|
|
10
10
|
Union,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
|
-
import requests
|
|
14
|
-
|
|
15
|
-
from datahub.configuration.common import (
|
|
16
|
-
ConfigurationError,
|
|
17
|
-
)
|
|
18
13
|
from datahub.telemetry.telemetry import suppress_telemetry
|
|
19
14
|
|
|
20
15
|
logger = logging.getLogger(__name__)
|
|
@@ -55,66 +50,9 @@ class RestServiceConfig:
|
|
|
55
50
|
A class to represent REST service configuration with semantic version parsing capabilities.
|
|
56
51
|
"""
|
|
57
52
|
|
|
58
|
-
session: Optional[requests.Session] = None
|
|
59
|
-
url: Optional[str] = None
|
|
60
53
|
raw_config: Dict[str, Any] = field(default_factory=dict)
|
|
61
54
|
_version_cache: Optional[Tuple[int, int, int, int]] = None
|
|
62
55
|
|
|
63
|
-
def fetch_config(self) -> Dict[str, Any]:
|
|
64
|
-
"""
|
|
65
|
-
Fetch configuration from the server if not already loaded.
|
|
66
|
-
|
|
67
|
-
Returns:
|
|
68
|
-
The configuration dictionary
|
|
69
|
-
|
|
70
|
-
Raises:
|
|
71
|
-
ConfigurationError: If there's an error fetching or validating the configuration
|
|
72
|
-
"""
|
|
73
|
-
if not self.raw_config:
|
|
74
|
-
if self.session is None or self.url is None:
|
|
75
|
-
raise ConfigurationError(
|
|
76
|
-
"Session and URL are required to load configuration"
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
response = self.session.get(self.url)
|
|
80
|
-
|
|
81
|
-
if response.status_code == 200:
|
|
82
|
-
config = response.json()
|
|
83
|
-
|
|
84
|
-
# Validate that we're connected to the correct service
|
|
85
|
-
if config.get("noCode") == "true":
|
|
86
|
-
self.raw_config = config
|
|
87
|
-
else:
|
|
88
|
-
raise ConfigurationError(
|
|
89
|
-
"You seem to have connected to the frontend service instead of the GMS endpoint. "
|
|
90
|
-
"The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms). "
|
|
91
|
-
"For Acryl users, the endpoint should be https://<name>.acryl.io/gms"
|
|
92
|
-
)
|
|
93
|
-
else:
|
|
94
|
-
logger.debug(
|
|
95
|
-
f"Unable to connect to {self.url} with status_code: {response.status_code}. Response: {response.text}"
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
if response.status_code == 401:
|
|
99
|
-
message = f"Unable to connect to {self.url} - got an authentication error: {response.text}."
|
|
100
|
-
else:
|
|
101
|
-
message = f"Unable to connect to {self.url} with status_code: {response.status_code}."
|
|
102
|
-
|
|
103
|
-
message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
|
|
104
|
-
raise ConfigurationError(message)
|
|
105
|
-
|
|
106
|
-
return self.raw_config
|
|
107
|
-
|
|
108
|
-
@property
|
|
109
|
-
def config(self) -> Dict[str, Any]:
|
|
110
|
-
"""
|
|
111
|
-
Get the full configuration dictionary, loading it if necessary.
|
|
112
|
-
|
|
113
|
-
Returns:
|
|
114
|
-
The configuration dictionary
|
|
115
|
-
"""
|
|
116
|
-
return self.fetch_config()
|
|
117
|
-
|
|
118
56
|
@property
|
|
119
57
|
def commit_hash(self) -> Optional[str]:
|
|
120
58
|
"""
|
|
@@ -123,7 +61,7 @@ class RestServiceConfig:
|
|
|
123
61
|
Returns:
|
|
124
62
|
The commit hash or None if not found
|
|
125
63
|
"""
|
|
126
|
-
versions = self.
|
|
64
|
+
versions = self.raw_config.get("versions") or {}
|
|
127
65
|
datahub_info = versions.get("acryldata/datahub") or {}
|
|
128
66
|
return datahub_info.get("commit")
|
|
129
67
|
|
|
@@ -135,7 +73,7 @@ class RestServiceConfig:
|
|
|
135
73
|
Returns:
|
|
136
74
|
The server type or "unknown" if not found
|
|
137
75
|
"""
|
|
138
|
-
datahub = self.
|
|
76
|
+
datahub = self.raw_config.get("datahub") or {}
|
|
139
77
|
return datahub.get("serverType", "unknown")
|
|
140
78
|
|
|
141
79
|
@property
|
|
@@ -146,8 +84,7 @@ class RestServiceConfig:
|
|
|
146
84
|
Returns:
|
|
147
85
|
The version string or None if not found
|
|
148
86
|
"""
|
|
149
|
-
|
|
150
|
-
versions = config.get("versions") or {}
|
|
87
|
+
versions = self.raw_config.get("versions") or {}
|
|
151
88
|
datahub_info = versions.get("acryldata/datahub") or {}
|
|
152
89
|
return datahub_info.get("version")
|
|
153
90
|
|
|
@@ -233,7 +170,7 @@ class RestServiceConfig:
|
|
|
233
170
|
Returns:
|
|
234
171
|
True if noCode is set to "true"
|
|
235
172
|
"""
|
|
236
|
-
return self.
|
|
173
|
+
return self.raw_config.get("noCode") == "true"
|
|
237
174
|
|
|
238
175
|
@property
|
|
239
176
|
def is_managed_ingestion_enabled(self) -> bool:
|
|
@@ -243,7 +180,7 @@ class RestServiceConfig:
|
|
|
243
180
|
Returns:
|
|
244
181
|
True if managedIngestion.enabled is True
|
|
245
182
|
"""
|
|
246
|
-
managed_ingestion = self.
|
|
183
|
+
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
247
184
|
return managed_ingestion.get("enabled", False)
|
|
248
185
|
|
|
249
186
|
@property
|
|
@@ -254,7 +191,7 @@ class RestServiceConfig:
|
|
|
254
191
|
Returns:
|
|
255
192
|
True if the server environment is not 'core'
|
|
256
193
|
"""
|
|
257
|
-
datahub_config = self.
|
|
194
|
+
datahub_config = self.raw_config.get("datahub") or {}
|
|
258
195
|
server_env = datahub_config.get("serverEnv")
|
|
259
196
|
|
|
260
197
|
# Return False if serverEnv is None or empty string
|
|
@@ -277,18 +214,20 @@ class RestServiceConfig:
|
|
|
277
214
|
# Special handling for features that rely on config flags
|
|
278
215
|
config_based_features = {
|
|
279
216
|
ServiceFeature.NO_CODE: lambda: self.is_no_code_enabled,
|
|
280
|
-
ServiceFeature.STATEFUL_INGESTION: lambda: self.
|
|
217
|
+
ServiceFeature.STATEFUL_INGESTION: lambda: self.raw_config.get(
|
|
281
218
|
"statefulIngestionCapable", False
|
|
282
219
|
)
|
|
283
220
|
is True,
|
|
284
|
-
ServiceFeature.IMPACT_ANALYSIS: lambda: self.
|
|
221
|
+
ServiceFeature.IMPACT_ANALYSIS: lambda: self.raw_config.get(
|
|
285
222
|
"supportsImpactAnalysis", False
|
|
286
223
|
)
|
|
287
224
|
is True,
|
|
288
|
-
ServiceFeature.PATCH_CAPABLE: lambda: self.
|
|
225
|
+
ServiceFeature.PATCH_CAPABLE: lambda: self.raw_config.get(
|
|
226
|
+
"patchCapable", False
|
|
227
|
+
)
|
|
289
228
|
is True,
|
|
290
229
|
ServiceFeature.CLI_TELEMETRY: lambda: (
|
|
291
|
-
self.
|
|
230
|
+
self.raw_config.get("telemetry") or {}
|
|
292
231
|
).get("enabledCli", None),
|
|
293
232
|
ServiceFeature.DATAHUB_CLOUD: lambda: self.is_datahub_cloud,
|
|
294
233
|
}
|
|
@@ -332,7 +271,7 @@ class RestServiceConfig:
|
|
|
332
271
|
Returns:
|
|
333
272
|
A string representation of the configuration dictionary
|
|
334
273
|
"""
|
|
335
|
-
return str(self.
|
|
274
|
+
return str(self.raw_config)
|
|
336
275
|
|
|
337
276
|
def __repr__(self) -> str:
|
|
338
277
|
"""
|
|
@@ -341,7 +280,7 @@ class RestServiceConfig:
|
|
|
341
280
|
Returns:
|
|
342
281
|
A string representation that can be used with pprint
|
|
343
282
|
"""
|
|
344
|
-
return str(self.
|
|
283
|
+
return str(self.raw_config)
|
|
345
284
|
|
|
346
285
|
|
|
347
286
|
def set_gms_config(config: Union[Dict[str, Any], RestServiceConfig]) -> None:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|