acryl-datahub 1.0.0.3rc11__py3-none-any.whl → 1.0.0.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (37) hide show
  1. {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/METADATA +2545 -2548
  2. {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/RECORD +37 -34
  3. {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/emitter/request_helper.py +10 -5
  6. datahub/emitter/rest_emitter.py +183 -106
  7. datahub/ingestion/extractor/schema_util.py +17 -1
  8. datahub/ingestion/graph/client.py +17 -4
  9. datahub/ingestion/graph/links.py +53 -0
  10. datahub/ingestion/sink/datahub_rest.py +11 -10
  11. datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
  12. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
  13. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
  14. datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
  15. datahub/ingestion/source/fivetran/config.py +1 -1
  16. datahub/ingestion/source/ge_data_profiler.py +25 -0
  17. datahub/ingestion/source/snowflake/snowflake_config.py +1 -12
  18. datahub/ingestion/source/snowflake/snowflake_connection.py +5 -17
  19. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  20. datahub/ingestion/source/sql/athena.py +2 -1
  21. datahub/ingestion/source/sql/hive_metastore.py +5 -5
  22. datahub/ingestion/source/sql/mssql/source.py +1 -1
  23. datahub/ingestion/source/sql/sql_config.py +1 -34
  24. datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
  25. datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
  26. datahub/ingestion/source/unity/config.py +2 -1
  27. datahub/metadata/_internal_schema_classes.py +503 -490
  28. datahub/metadata/_urns/urn_defs.py +1528 -1528
  29. datahub/metadata/schema.avsc +15431 -15414
  30. datahub/metadata/schemas/Operation.avsc +17 -0
  31. datahub/sdk/main_client.py +15 -0
  32. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  33. datahub/sql_parsing/sql_parsing_aggregator.py +3 -2
  34. datahub/utilities/server_config_util.py +37 -126
  35. {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/licenses/LICENSE +0 -0
  37. {acryl_datahub-1.0.0.3rc11.dist-info → acryl_datahub-1.0.0.4rc1.dist-info}/top_level.txt +0 -0
@@ -254,6 +254,23 @@
254
254
  "type": "long",
255
255
  "name": "lastUpdatedTimestamp",
256
256
  "doc": "The time at which the operation occurred. Would be better named 'operationTime'"
257
+ },
258
+ {
259
+ "TimeseriesFieldCollection": {
260
+ "key": "query"
261
+ },
262
+ "type": [
263
+ "null",
264
+ {
265
+ "type": "array",
266
+ "items": "string"
267
+ }
268
+ ],
269
+ "name": "queries",
270
+ "default": null,
271
+ "doc": "Which queries were used in this operation.",
272
+ "Urn": "Urn",
273
+ "urn_is_array": true
257
274
  }
258
275
  ],
259
276
  "doc": "Operational info for an entity."
@@ -10,6 +10,13 @@ from datahub.sdk.lineage_client import LineageClient
10
10
  from datahub.sdk.resolver_client import ResolverClient
11
11
  from datahub.sdk.search_client import SearchClient
12
12
 
13
+ try:
14
+ from acryl_datahub_cloud._sdk_extras import ( # type: ignore[import-not-found]
15
+ AssertionClient,
16
+ )
17
+ except ImportError:
18
+ AssertionClient = None
19
+
13
20
 
14
21
  class DataHubClient:
15
22
  """Main client for interacting with DataHub.
@@ -103,3 +110,11 @@ class DataHubClient:
103
110
  @property
104
111
  def lineage(self) -> LineageClient:
105
112
  return LineageClient(self)
113
+
114
+ @property
115
+ def assertion(self) -> AssertionClient: # type: ignore[return-value] # Type is not available if assertion_client is not installed
116
+ if AssertionClient is None:
117
+ raise SdkUsageError(
118
+ "AssertionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
119
+ )
120
+ return AssertionClient(self)
@@ -163,8 +163,7 @@ def _patch_lineage() -> None:
163
163
  - source_columns = set(find_all_in_scope(select, exp.Column))
164
164
  + source_columns = list(find_all_in_scope(select, exp.Column))
165
165
 
166
- - # If the source is a UDTF find columns used in the UTDF to generate the table
167
- + # If the source is a UDTF find columns used in the UDTF to generate the table
166
+ # If the source is a UDTF find columns used in the UDTF to generate the table
168
167
  + source = scope.expression
169
168
  if isinstance(source, exp.UDTF):
170
169
  - source_columns |= set(source.find_all(exp.Column))
@@ -1753,8 +1753,9 @@ class SqlParsingAggregator(Closeable):
1753
1753
  operationType=operation_type,
1754
1754
  lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp),
1755
1755
  actor=query.actor.urn() if query.actor else None,
1756
- customProperties=(
1757
- {"query_urn": self._query_urn(query_id)}
1756
+ sourceType=models.OperationSourceTypeClass.DATA_PLATFORM,
1757
+ queries=(
1758
+ [self._query_urn(query_id)]
1758
1759
  if self.can_generate_query(query_id)
1759
1760
  else None
1760
1761
  ),
@@ -10,11 +10,6 @@ from typing import (
10
10
  Union,
11
11
  )
12
12
 
13
- import requests
14
-
15
- from datahub.configuration.common import (
16
- ConfigurationError,
17
- )
18
13
  from datahub.telemetry.telemetry import suppress_telemetry
19
14
 
20
15
  logger = logging.getLogger(__name__)
@@ -44,15 +39,8 @@ class ServiceFeature(Enum):
44
39
 
45
40
 
46
41
  _REQUIRED_VERSION_OPENAPI_TRACING = {
47
- "acryl": (
48
- 0,
49
- 3,
50
- 11,
51
- 0,
52
- ), # Requires v0.3.11.0 or higher for acryl versions
53
- "cloud": (0, 3, 11, 0), # Special case for '-cloud' suffix
54
- "any_suffix": (0, 3, 11, 0), # Generic requirement for any other suffix
55
- "none": (1, 0, 1, 0), # Requirement for versions without suffix
42
+ "cloud": (0, 3, 11, 0),
43
+ "core": (1, 0, 1, 0),
56
44
  }
57
45
 
58
46
 
@@ -62,66 +50,9 @@ class RestServiceConfig:
62
50
  A class to represent REST service configuration with semantic version parsing capabilities.
63
51
  """
64
52
 
65
- session: Optional[requests.Session] = None
66
- url: Optional[str] = None
67
53
  raw_config: Dict[str, Any] = field(default_factory=dict)
68
54
  _version_cache: Optional[Tuple[int, int, int, int]] = None
69
55
 
70
- def fetch_config(self) -> Dict[str, Any]:
71
- """
72
- Fetch configuration from the server if not already loaded.
73
-
74
- Returns:
75
- The configuration dictionary
76
-
77
- Raises:
78
- ConfigurationError: If there's an error fetching or validating the configuration
79
- """
80
- if not self.raw_config:
81
- if self.session is None or self.url is None:
82
- raise ConfigurationError(
83
- "Session and URL are required to load configuration"
84
- )
85
-
86
- response = self.session.get(self.url)
87
-
88
- if response.status_code == 200:
89
- config = response.json()
90
-
91
- # Validate that we're connected to the correct service
92
- if config.get("noCode") == "true":
93
- self.raw_config = config
94
- else:
95
- raise ConfigurationError(
96
- "You seem to have connected to the frontend service instead of the GMS endpoint. "
97
- "The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms). "
98
- "For Acryl users, the endpoint should be https://<name>.acryl.io/gms"
99
- )
100
- else:
101
- logger.debug(
102
- f"Unable to connect to {self.url} with status_code: {response.status_code}. Response: {response.text}"
103
- )
104
-
105
- if response.status_code == 401:
106
- message = f"Unable to connect to {self.url} - got an authentication error: {response.text}."
107
- else:
108
- message = f"Unable to connect to {self.url} with status_code: {response.status_code}."
109
-
110
- message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
111
- raise ConfigurationError(message)
112
-
113
- return self.raw_config
114
-
115
- @property
116
- def config(self) -> Dict[str, Any]:
117
- """
118
- Get the full configuration dictionary, loading it if necessary.
119
-
120
- Returns:
121
- The configuration dictionary
122
- """
123
- return self.fetch_config()
124
-
125
56
  @property
126
57
  def commit_hash(self) -> Optional[str]:
127
58
  """
@@ -130,7 +61,7 @@ class RestServiceConfig:
130
61
  Returns:
131
62
  The commit hash or None if not found
132
63
  """
133
- versions = self.config.get("versions") or {}
64
+ versions = self.raw_config.get("versions") or {}
134
65
  datahub_info = versions.get("acryldata/datahub") or {}
135
66
  return datahub_info.get("commit")
136
67
 
@@ -142,7 +73,7 @@ class RestServiceConfig:
142
73
  Returns:
143
74
  The server type or "unknown" if not found
144
75
  """
145
- datahub = self.config.get("datahub") or {}
76
+ datahub = self.raw_config.get("datahub") or {}
146
77
  return datahub.get("serverType", "unknown")
147
78
 
148
79
  @property
@@ -153,8 +84,7 @@ class RestServiceConfig:
153
84
  Returns:
154
85
  The version string or None if not found
155
86
  """
156
- config = self.fetch_config()
157
- versions = config.get("versions") or {}
87
+ versions = self.raw_config.get("versions") or {}
158
88
  datahub_info = versions.get("acryldata/datahub") or {}
159
89
  return datahub_info.get("version")
160
90
 
@@ -240,7 +170,7 @@ class RestServiceConfig:
240
170
  Returns:
241
171
  True if noCode is set to "true"
242
172
  """
243
- return self.config.get("noCode") == "true"
173
+ return self.raw_config.get("noCode") == "true"
244
174
 
245
175
  @property
246
176
  def is_managed_ingestion_enabled(self) -> bool:
@@ -250,7 +180,7 @@ class RestServiceConfig:
250
180
  Returns:
251
181
  True if managedIngestion.enabled is True
252
182
  """
253
- managed_ingestion = self.config.get("managedIngestion") or {}
183
+ managed_ingestion = self.raw_config.get("managedIngestion") or {}
254
184
  return managed_ingestion.get("enabled", False)
255
185
 
256
186
  @property
@@ -259,26 +189,21 @@ class RestServiceConfig:
259
189
  Check if DataHub Cloud is enabled.
260
190
 
261
191
  Returns:
262
- True if the server environment is not 'oss'
192
+ True if the server environment is not 'core'
263
193
  """
264
- datahub_config = self.config.get("datahub") or {}
194
+ datahub_config = self.raw_config.get("datahub") or {}
265
195
  server_env = datahub_config.get("serverEnv")
266
196
 
267
197
  # Return False if serverEnv is None or empty string
268
198
  if not server_env:
269
199
  return False
270
200
 
271
- return server_env != "oss"
201
+ return server_env != "core"
272
202
 
273
203
  def supports_feature(self, feature: ServiceFeature) -> bool:
274
204
  """
275
- Determines whether a specific feature is supported based on service version.
276
-
277
- Version categorization follows these rules:
278
- 1. Has '-acryl' suffix (highest priority)
279
- 2. Has a specific known suffix (e.g. '-other')
280
- 3. Has some other suffix (catchall for any suffix)
281
- 4. No suffix
205
+ Determines whether a specific feature is supported based on service version
206
+ and whether this is a cloud deployment or not.
282
207
 
283
208
  Args:
284
209
  feature: Feature enum value to check
@@ -286,68 +211,54 @@ class RestServiceConfig:
286
211
  Returns:
287
212
  Boolean indicating whether the feature is supported
288
213
  """
289
- version = self.service_version
290
- if not version:
291
- return False
292
-
293
- # Determine the suffix category
294
- suffix_category = "none" # Default: no suffix
295
-
296
- if "-" in version:
297
- suffix = version.split("-", 1)[1]
298
-
299
- if suffix == "acryl":
300
- suffix_category = "acryl"
301
- elif suffix == "cloud": # Example of a specific override
302
- suffix_category = "cloud"
303
- else:
304
- suffix_category = "any_suffix" # Catchall for any other suffix
305
-
306
- # Define feature requirements based on version scheme
307
- # This can be expanded to include more features
308
- feature_requirements = {
309
- ServiceFeature.OPEN_API_SDK: _REQUIRED_VERSION_OPENAPI_TRACING,
310
- ServiceFeature.API_TRACING: _REQUIRED_VERSION_OPENAPI_TRACING,
311
- # Additional features can be defined here
312
- }
313
-
314
- # Special handling for features that rely on config flags instead of version
214
+ # Special handling for features that rely on config flags
315
215
  config_based_features = {
316
216
  ServiceFeature.NO_CODE: lambda: self.is_no_code_enabled,
317
- ServiceFeature.STATEFUL_INGESTION: lambda: self.config.get(
217
+ ServiceFeature.STATEFUL_INGESTION: lambda: self.raw_config.get(
318
218
  "statefulIngestionCapable", False
319
219
  )
320
220
  is True,
321
- ServiceFeature.IMPACT_ANALYSIS: lambda: self.config.get(
221
+ ServiceFeature.IMPACT_ANALYSIS: lambda: self.raw_config.get(
322
222
  "supportsImpactAnalysis", False
323
223
  )
324
224
  is True,
325
- ServiceFeature.PATCH_CAPABLE: lambda: self.config.get("patchCapable", False)
225
+ ServiceFeature.PATCH_CAPABLE: lambda: self.raw_config.get(
226
+ "patchCapable", False
227
+ )
326
228
  is True,
327
229
  ServiceFeature.CLI_TELEMETRY: lambda: (
328
- self.config.get("telemetry") or {}
230
+ self.raw_config.get("telemetry") or {}
329
231
  ).get("enabledCli", None),
330
- # Add more config-based feature checks as needed
232
+ ServiceFeature.DATAHUB_CLOUD: lambda: self.is_datahub_cloud,
331
233
  }
332
234
 
333
235
  # Check if this is a config-based feature
334
236
  if feature in config_based_features:
335
237
  return config_based_features[feature]()
336
238
 
239
+ # For environment-based features, determine requirements based on cloud vs. non-cloud
240
+ deployment_type = "cloud" if self.is_datahub_cloud else "core"
241
+
242
+ # Define feature requirements
243
+ feature_requirements = {
244
+ ServiceFeature.OPEN_API_SDK: _REQUIRED_VERSION_OPENAPI_TRACING,
245
+ ServiceFeature.API_TRACING: _REQUIRED_VERSION_OPENAPI_TRACING,
246
+ # Additional features can be defined here
247
+ }
248
+
337
249
  # Check if the feature exists in our requirements dictionary
338
250
  if feature not in feature_requirements:
339
251
  # Unknown feature, assume not supported
340
252
  return False
341
253
 
342
- # Get version requirements for this feature and version category
254
+ # Get version requirements for this feature and deployment type
343
255
  feature_reqs = feature_requirements[feature]
344
- requirements = feature_reqs.get(suffix_category)
256
+ requirements = feature_reqs.get(deployment_type)
345
257
 
346
258
  if not requirements:
347
- # Fallback to the no-suffix requirements if specific requirements aren't defined
348
- requirements = feature_reqs.get(
349
- "none", (99, 99, 99, 99)
350
- ) # Very high version if none defined
259
+ # If no specific requirements defined for this deployment type,
260
+ # assume feature is not supported
261
+ return False
351
262
 
352
263
  # Check if the current version meets the requirements
353
264
  req_major, req_minor, req_patch, req_build = requirements
@@ -360,7 +271,7 @@ class RestServiceConfig:
360
271
  Returns:
361
272
  A string representation of the configuration dictionary
362
273
  """
363
- return str(self.config)
274
+ return str(self.raw_config)
364
275
 
365
276
  def __repr__(self) -> str:
366
277
  """
@@ -369,7 +280,7 @@ class RestServiceConfig:
369
280
  Returns:
370
281
  A string representation that can be used with pprint
371
282
  """
372
- return str(self.config)
283
+ return str(self.raw_config)
373
284
 
374
285
 
375
286
  def set_gms_config(config: Union[Dict[str, Any], RestServiceConfig]) -> None: