acryl-datahub 1.0.0.3rc11__py3-none-any.whl → 1.0.0.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/METADATA +2545 -2548
- {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/RECORD +37 -34
- {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/emitter/request_helper.py +10 -5
- datahub/emitter/rest_emitter.py +183 -106
- datahub/ingestion/extractor/schema_util.py +17 -1
- datahub/ingestion/graph/client.py +17 -4
- datahub/ingestion/graph/links.py +53 -0
- datahub/ingestion/sink/datahub_rest.py +11 -10
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
- datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/ge_data_profiler.py +25 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +1 -12
- datahub/ingestion/source/snowflake/snowflake_connection.py +5 -17
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/hive_metastore.py +5 -5
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
- datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
- datahub/ingestion/source/unity/config.py +2 -1
- datahub/metadata/_internal_schema_classes.py +503 -490
- datahub/metadata/_urns/urn_defs.py +1528 -1528
- datahub/metadata/schema.avsc +15431 -15414
- datahub/metadata/schemas/Operation.avsc +17 -0
- datahub/sdk/main_client.py +15 -0
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/sql_parsing_aggregator.py +3 -2
- datahub/utilities/server_config_util.py +37 -126
- {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/top_level.txt +0 -0
|
@@ -254,6 +254,23 @@
|
|
|
254
254
|
"type": "long",
|
|
255
255
|
"name": "lastUpdatedTimestamp",
|
|
256
256
|
"doc": "The time at which the operation occurred. Would be better named 'operationTime'"
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
"TimeseriesFieldCollection": {
|
|
260
|
+
"key": "query"
|
|
261
|
+
},
|
|
262
|
+
"type": [
|
|
263
|
+
"null",
|
|
264
|
+
{
|
|
265
|
+
"type": "array",
|
|
266
|
+
"items": "string"
|
|
267
|
+
}
|
|
268
|
+
],
|
|
269
|
+
"name": "queries",
|
|
270
|
+
"default": null,
|
|
271
|
+
"doc": "Which queries were used in this operation.",
|
|
272
|
+
"Urn": "Urn",
|
|
273
|
+
"urn_is_array": true
|
|
257
274
|
}
|
|
258
275
|
],
|
|
259
276
|
"doc": "Operational info for an entity."
|
datahub/sdk/main_client.py
CHANGED
|
@@ -10,6 +10,13 @@ from datahub.sdk.lineage_client import LineageClient
|
|
|
10
10
|
from datahub.sdk.resolver_client import ResolverClient
|
|
11
11
|
from datahub.sdk.search_client import SearchClient
|
|
12
12
|
|
|
13
|
+
try:
|
|
14
|
+
from acryl_datahub_cloud._sdk_extras import ( # type: ignore[import-not-found]
|
|
15
|
+
AssertionClient,
|
|
16
|
+
)
|
|
17
|
+
except ImportError:
|
|
18
|
+
AssertionClient = None
|
|
19
|
+
|
|
13
20
|
|
|
14
21
|
class DataHubClient:
|
|
15
22
|
"""Main client for interacting with DataHub.
|
|
@@ -103,3 +110,11 @@ class DataHubClient:
|
|
|
103
110
|
@property
|
|
104
111
|
def lineage(self) -> LineageClient:
|
|
105
112
|
return LineageClient(self)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def assertion(self) -> AssertionClient: # type: ignore[return-value] # Type is not available if assertion_client is not installed
|
|
116
|
+
if AssertionClient is None:
|
|
117
|
+
raise SdkUsageError(
|
|
118
|
+
"AssertionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
|
|
119
|
+
)
|
|
120
|
+
return AssertionClient(self)
|
|
@@ -163,8 +163,7 @@ def _patch_lineage() -> None:
|
|
|
163
163
|
- source_columns = set(find_all_in_scope(select, exp.Column))
|
|
164
164
|
+ source_columns = list(find_all_in_scope(select, exp.Column))
|
|
165
165
|
|
|
166
|
-
|
|
167
|
-
+ # If the source is a UDTF find columns used in the UDTF to generate the table
|
|
166
|
+
# If the source is a UDTF find columns used in the UDTF to generate the table
|
|
168
167
|
+ source = scope.expression
|
|
169
168
|
if isinstance(source, exp.UDTF):
|
|
170
169
|
- source_columns |= set(source.find_all(exp.Column))
|
|
@@ -1753,8 +1753,9 @@ class SqlParsingAggregator(Closeable):
|
|
|
1753
1753
|
operationType=operation_type,
|
|
1754
1754
|
lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp),
|
|
1755
1755
|
actor=query.actor.urn() if query.actor else None,
|
|
1756
|
-
|
|
1757
|
-
|
|
1756
|
+
sourceType=models.OperationSourceTypeClass.DATA_PLATFORM,
|
|
1757
|
+
queries=(
|
|
1758
|
+
[self._query_urn(query_id)]
|
|
1758
1759
|
if self.can_generate_query(query_id)
|
|
1759
1760
|
else None
|
|
1760
1761
|
),
|
|
@@ -10,11 +10,6 @@ from typing import (
|
|
|
10
10
|
Union,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
|
-
import requests
|
|
14
|
-
|
|
15
|
-
from datahub.configuration.common import (
|
|
16
|
-
ConfigurationError,
|
|
17
|
-
)
|
|
18
13
|
from datahub.telemetry.telemetry import suppress_telemetry
|
|
19
14
|
|
|
20
15
|
logger = logging.getLogger(__name__)
|
|
@@ -44,15 +39,8 @@ class ServiceFeature(Enum):
|
|
|
44
39
|
|
|
45
40
|
|
|
46
41
|
_REQUIRED_VERSION_OPENAPI_TRACING = {
|
|
47
|
-
"
|
|
48
|
-
|
|
49
|
-
3,
|
|
50
|
-
11,
|
|
51
|
-
0,
|
|
52
|
-
), # Requires v0.3.11.0 or higher for acryl versions
|
|
53
|
-
"cloud": (0, 3, 11, 0), # Special case for '-cloud' suffix
|
|
54
|
-
"any_suffix": (0, 3, 11, 0), # Generic requirement for any other suffix
|
|
55
|
-
"none": (1, 0, 1, 0), # Requirement for versions without suffix
|
|
42
|
+
"cloud": (0, 3, 11, 0),
|
|
43
|
+
"core": (1, 0, 1, 0),
|
|
56
44
|
}
|
|
57
45
|
|
|
58
46
|
|
|
@@ -62,66 +50,9 @@ class RestServiceConfig:
|
|
|
62
50
|
A class to represent REST service configuration with semantic version parsing capabilities.
|
|
63
51
|
"""
|
|
64
52
|
|
|
65
|
-
session: Optional[requests.Session] = None
|
|
66
|
-
url: Optional[str] = None
|
|
67
53
|
raw_config: Dict[str, Any] = field(default_factory=dict)
|
|
68
54
|
_version_cache: Optional[Tuple[int, int, int, int]] = None
|
|
69
55
|
|
|
70
|
-
def fetch_config(self) -> Dict[str, Any]:
|
|
71
|
-
"""
|
|
72
|
-
Fetch configuration from the server if not already loaded.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
The configuration dictionary
|
|
76
|
-
|
|
77
|
-
Raises:
|
|
78
|
-
ConfigurationError: If there's an error fetching or validating the configuration
|
|
79
|
-
"""
|
|
80
|
-
if not self.raw_config:
|
|
81
|
-
if self.session is None or self.url is None:
|
|
82
|
-
raise ConfigurationError(
|
|
83
|
-
"Session and URL are required to load configuration"
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
response = self.session.get(self.url)
|
|
87
|
-
|
|
88
|
-
if response.status_code == 200:
|
|
89
|
-
config = response.json()
|
|
90
|
-
|
|
91
|
-
# Validate that we're connected to the correct service
|
|
92
|
-
if config.get("noCode") == "true":
|
|
93
|
-
self.raw_config = config
|
|
94
|
-
else:
|
|
95
|
-
raise ConfigurationError(
|
|
96
|
-
"You seem to have connected to the frontend service instead of the GMS endpoint. "
|
|
97
|
-
"The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms). "
|
|
98
|
-
"For Acryl users, the endpoint should be https://<name>.acryl.io/gms"
|
|
99
|
-
)
|
|
100
|
-
else:
|
|
101
|
-
logger.debug(
|
|
102
|
-
f"Unable to connect to {self.url} with status_code: {response.status_code}. Response: {response.text}"
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
if response.status_code == 401:
|
|
106
|
-
message = f"Unable to connect to {self.url} - got an authentication error: {response.text}."
|
|
107
|
-
else:
|
|
108
|
-
message = f"Unable to connect to {self.url} with status_code: {response.status_code}."
|
|
109
|
-
|
|
110
|
-
message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
|
|
111
|
-
raise ConfigurationError(message)
|
|
112
|
-
|
|
113
|
-
return self.raw_config
|
|
114
|
-
|
|
115
|
-
@property
|
|
116
|
-
def config(self) -> Dict[str, Any]:
|
|
117
|
-
"""
|
|
118
|
-
Get the full configuration dictionary, loading it if necessary.
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
The configuration dictionary
|
|
122
|
-
"""
|
|
123
|
-
return self.fetch_config()
|
|
124
|
-
|
|
125
56
|
@property
|
|
126
57
|
def commit_hash(self) -> Optional[str]:
|
|
127
58
|
"""
|
|
@@ -130,7 +61,7 @@ class RestServiceConfig:
|
|
|
130
61
|
Returns:
|
|
131
62
|
The commit hash or None if not found
|
|
132
63
|
"""
|
|
133
|
-
versions = self.
|
|
64
|
+
versions = self.raw_config.get("versions") or {}
|
|
134
65
|
datahub_info = versions.get("acryldata/datahub") or {}
|
|
135
66
|
return datahub_info.get("commit")
|
|
136
67
|
|
|
@@ -142,7 +73,7 @@ class RestServiceConfig:
|
|
|
142
73
|
Returns:
|
|
143
74
|
The server type or "unknown" if not found
|
|
144
75
|
"""
|
|
145
|
-
datahub = self.
|
|
76
|
+
datahub = self.raw_config.get("datahub") or {}
|
|
146
77
|
return datahub.get("serverType", "unknown")
|
|
147
78
|
|
|
148
79
|
@property
|
|
@@ -153,8 +84,7 @@ class RestServiceConfig:
|
|
|
153
84
|
Returns:
|
|
154
85
|
The version string or None if not found
|
|
155
86
|
"""
|
|
156
|
-
|
|
157
|
-
versions = config.get("versions") or {}
|
|
87
|
+
versions = self.raw_config.get("versions") or {}
|
|
158
88
|
datahub_info = versions.get("acryldata/datahub") or {}
|
|
159
89
|
return datahub_info.get("version")
|
|
160
90
|
|
|
@@ -240,7 +170,7 @@ class RestServiceConfig:
|
|
|
240
170
|
Returns:
|
|
241
171
|
True if noCode is set to "true"
|
|
242
172
|
"""
|
|
243
|
-
return self.
|
|
173
|
+
return self.raw_config.get("noCode") == "true"
|
|
244
174
|
|
|
245
175
|
@property
|
|
246
176
|
def is_managed_ingestion_enabled(self) -> bool:
|
|
@@ -250,7 +180,7 @@ class RestServiceConfig:
|
|
|
250
180
|
Returns:
|
|
251
181
|
True if managedIngestion.enabled is True
|
|
252
182
|
"""
|
|
253
|
-
managed_ingestion = self.
|
|
183
|
+
managed_ingestion = self.raw_config.get("managedIngestion") or {}
|
|
254
184
|
return managed_ingestion.get("enabled", False)
|
|
255
185
|
|
|
256
186
|
@property
|
|
@@ -259,26 +189,21 @@ class RestServiceConfig:
|
|
|
259
189
|
Check if DataHub Cloud is enabled.
|
|
260
190
|
|
|
261
191
|
Returns:
|
|
262
|
-
True if the server environment is not '
|
|
192
|
+
True if the server environment is not 'core'
|
|
263
193
|
"""
|
|
264
|
-
datahub_config = self.
|
|
194
|
+
datahub_config = self.raw_config.get("datahub") or {}
|
|
265
195
|
server_env = datahub_config.get("serverEnv")
|
|
266
196
|
|
|
267
197
|
# Return False if serverEnv is None or empty string
|
|
268
198
|
if not server_env:
|
|
269
199
|
return False
|
|
270
200
|
|
|
271
|
-
return server_env != "
|
|
201
|
+
return server_env != "core"
|
|
272
202
|
|
|
273
203
|
def supports_feature(self, feature: ServiceFeature) -> bool:
|
|
274
204
|
"""
|
|
275
|
-
Determines whether a specific feature is supported based on service version
|
|
276
|
-
|
|
277
|
-
Version categorization follows these rules:
|
|
278
|
-
1. Has '-acryl' suffix (highest priority)
|
|
279
|
-
2. Has a specific known suffix (e.g. '-other')
|
|
280
|
-
3. Has some other suffix (catchall for any suffix)
|
|
281
|
-
4. No suffix
|
|
205
|
+
Determines whether a specific feature is supported based on service version
|
|
206
|
+
and whether this is a cloud deployment or not.
|
|
282
207
|
|
|
283
208
|
Args:
|
|
284
209
|
feature: Feature enum value to check
|
|
@@ -286,68 +211,54 @@ class RestServiceConfig:
|
|
|
286
211
|
Returns:
|
|
287
212
|
Boolean indicating whether the feature is supported
|
|
288
213
|
"""
|
|
289
|
-
|
|
290
|
-
if not version:
|
|
291
|
-
return False
|
|
292
|
-
|
|
293
|
-
# Determine the suffix category
|
|
294
|
-
suffix_category = "none" # Default: no suffix
|
|
295
|
-
|
|
296
|
-
if "-" in version:
|
|
297
|
-
suffix = version.split("-", 1)[1]
|
|
298
|
-
|
|
299
|
-
if suffix == "acryl":
|
|
300
|
-
suffix_category = "acryl"
|
|
301
|
-
elif suffix == "cloud": # Example of a specific override
|
|
302
|
-
suffix_category = "cloud"
|
|
303
|
-
else:
|
|
304
|
-
suffix_category = "any_suffix" # Catchall for any other suffix
|
|
305
|
-
|
|
306
|
-
# Define feature requirements based on version scheme
|
|
307
|
-
# This can be expanded to include more features
|
|
308
|
-
feature_requirements = {
|
|
309
|
-
ServiceFeature.OPEN_API_SDK: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
310
|
-
ServiceFeature.API_TRACING: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
311
|
-
# Additional features can be defined here
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
# Special handling for features that rely on config flags instead of version
|
|
214
|
+
# Special handling for features that rely on config flags
|
|
315
215
|
config_based_features = {
|
|
316
216
|
ServiceFeature.NO_CODE: lambda: self.is_no_code_enabled,
|
|
317
|
-
ServiceFeature.STATEFUL_INGESTION: lambda: self.
|
|
217
|
+
ServiceFeature.STATEFUL_INGESTION: lambda: self.raw_config.get(
|
|
318
218
|
"statefulIngestionCapable", False
|
|
319
219
|
)
|
|
320
220
|
is True,
|
|
321
|
-
ServiceFeature.IMPACT_ANALYSIS: lambda: self.
|
|
221
|
+
ServiceFeature.IMPACT_ANALYSIS: lambda: self.raw_config.get(
|
|
322
222
|
"supportsImpactAnalysis", False
|
|
323
223
|
)
|
|
324
224
|
is True,
|
|
325
|
-
ServiceFeature.PATCH_CAPABLE: lambda: self.
|
|
225
|
+
ServiceFeature.PATCH_CAPABLE: lambda: self.raw_config.get(
|
|
226
|
+
"patchCapable", False
|
|
227
|
+
)
|
|
326
228
|
is True,
|
|
327
229
|
ServiceFeature.CLI_TELEMETRY: lambda: (
|
|
328
|
-
self.
|
|
230
|
+
self.raw_config.get("telemetry") or {}
|
|
329
231
|
).get("enabledCli", None),
|
|
330
|
-
|
|
232
|
+
ServiceFeature.DATAHUB_CLOUD: lambda: self.is_datahub_cloud,
|
|
331
233
|
}
|
|
332
234
|
|
|
333
235
|
# Check if this is a config-based feature
|
|
334
236
|
if feature in config_based_features:
|
|
335
237
|
return config_based_features[feature]()
|
|
336
238
|
|
|
239
|
+
# For environment-based features, determine requirements based on cloud vs. non-cloud
|
|
240
|
+
deployment_type = "cloud" if self.is_datahub_cloud else "core"
|
|
241
|
+
|
|
242
|
+
# Define feature requirements
|
|
243
|
+
feature_requirements = {
|
|
244
|
+
ServiceFeature.OPEN_API_SDK: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
245
|
+
ServiceFeature.API_TRACING: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
246
|
+
# Additional features can be defined here
|
|
247
|
+
}
|
|
248
|
+
|
|
337
249
|
# Check if the feature exists in our requirements dictionary
|
|
338
250
|
if feature not in feature_requirements:
|
|
339
251
|
# Unknown feature, assume not supported
|
|
340
252
|
return False
|
|
341
253
|
|
|
342
|
-
# Get version requirements for this feature and
|
|
254
|
+
# Get version requirements for this feature and deployment type
|
|
343
255
|
feature_reqs = feature_requirements[feature]
|
|
344
|
-
requirements = feature_reqs.get(
|
|
256
|
+
requirements = feature_reqs.get(deployment_type)
|
|
345
257
|
|
|
346
258
|
if not requirements:
|
|
347
|
-
#
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
) # Very high version if none defined
|
|
259
|
+
# If no specific requirements defined for this deployment type,
|
|
260
|
+
# assume feature is not supported
|
|
261
|
+
return False
|
|
351
262
|
|
|
352
263
|
# Check if the current version meets the requirements
|
|
353
264
|
req_major, req_minor, req_patch, req_build = requirements
|
|
@@ -360,7 +271,7 @@ class RestServiceConfig:
|
|
|
360
271
|
Returns:
|
|
361
272
|
A string representation of the configuration dictionary
|
|
362
273
|
"""
|
|
363
|
-
return str(self.
|
|
274
|
+
return str(self.raw_config)
|
|
364
275
|
|
|
365
276
|
def __repr__(self) -> str:
|
|
366
277
|
"""
|
|
@@ -369,7 +280,7 @@ class RestServiceConfig:
|
|
|
369
280
|
Returns:
|
|
370
281
|
A string representation that can be used with pprint
|
|
371
282
|
"""
|
|
372
|
-
return str(self.
|
|
283
|
+
return str(self.raw_config)
|
|
373
284
|
|
|
374
285
|
|
|
375
286
|
def set_gms_config(config: Union[Dict[str, Any], RestServiceConfig]) -> None:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|