acryl-datahub 1.0.0.3rc12__py3-none-any.whl → 1.0.0.4rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (37) hide show
  1. {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/METADATA +2529 -2527
  2. {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/RECORD +37 -34
  3. {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/emitter/request_helper.py +10 -5
  6. datahub/emitter/rest_emitter.py +183 -106
  7. datahub/ingestion/extractor/schema_util.py +17 -1
  8. datahub/ingestion/graph/client.py +17 -4
  9. datahub/ingestion/graph/links.py +53 -0
  10. datahub/ingestion/sink/datahub_rest.py +11 -10
  11. datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
  12. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
  13. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
  14. datahub/ingestion/source/fivetran/config.py +1 -1
  15. datahub/ingestion/source/ge_data_profiler.py +25 -0
  16. datahub/ingestion/source/snowflake/snowflake_config.py +1 -12
  17. datahub/ingestion/source/snowflake/snowflake_connection.py +5 -17
  18. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  19. datahub/ingestion/source/sql/athena.py +2 -1
  20. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  21. datahub/ingestion/source/sql/mssql/source.py +1 -1
  22. datahub/ingestion/source/sql/sql_config.py +1 -34
  23. datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
  24. datahub/ingestion/source/sql/stored_procedures/lineage.py +1 -0
  25. datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
  26. datahub/ingestion/source/tableau/tableau.py +4 -2
  27. datahub/ingestion/source/unity/config.py +2 -1
  28. datahub/metadata/_internal_schema_classes.py +13 -0
  29. datahub/metadata/schema.avsc +17 -0
  30. datahub/metadata/schemas/Operation.avsc +17 -0
  31. datahub/sdk/main_client.py +15 -0
  32. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  33. datahub/sql_parsing/sql_parsing_aggregator.py +3 -2
  34. datahub/utilities/server_config_util.py +14 -75
  35. {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/licenses/LICENSE +0 -0
  37. {acryl_datahub-1.0.0.3rc12.dist-info → acryl_datahub-1.0.0.4rc2.dist-info}/top_level.txt +0 -0
@@ -254,6 +254,23 @@
254
254
  "type": "long",
255
255
  "name": "lastUpdatedTimestamp",
256
256
  "doc": "The time at which the operation occurred. Would be better named 'operationTime'"
257
+ },
258
+ {
259
+ "TimeseriesFieldCollection": {
260
+ "key": "query"
261
+ },
262
+ "type": [
263
+ "null",
264
+ {
265
+ "type": "array",
266
+ "items": "string"
267
+ }
268
+ ],
269
+ "name": "queries",
270
+ "default": null,
271
+ "doc": "Which queries were used in this operation.",
272
+ "Urn": "Urn",
273
+ "urn_is_array": true
257
274
  }
258
275
  ],
259
276
  "doc": "Operational info for an entity."
@@ -10,6 +10,13 @@ from datahub.sdk.lineage_client import LineageClient
10
10
  from datahub.sdk.resolver_client import ResolverClient
11
11
  from datahub.sdk.search_client import SearchClient
12
12
 
13
+ try:
14
+ from acryl_datahub_cloud._sdk_extras import ( # type: ignore[import-not-found]
15
+ AssertionClient,
16
+ )
17
+ except ImportError:
18
+ AssertionClient = None
19
+
13
20
 
14
21
  class DataHubClient:
15
22
  """Main client for interacting with DataHub.
@@ -103,3 +110,11 @@ class DataHubClient:
103
110
  @property
104
111
  def lineage(self) -> LineageClient:
105
112
  return LineageClient(self)
113
+
114
+ @property
115
+ def assertion(self) -> AssertionClient: # type: ignore[return-value] # Type is not available if assertion_client is not installed
116
+ if AssertionClient is None:
117
+ raise SdkUsageError(
118
+ "AssertionClient is not installed, please install it with `pip install acryl-datahub-cloud`"
119
+ )
120
+ return AssertionClient(self)
@@ -163,8 +163,7 @@ def _patch_lineage() -> None:
163
163
  - source_columns = set(find_all_in_scope(select, exp.Column))
164
164
  + source_columns = list(find_all_in_scope(select, exp.Column))
165
165
 
166
- - # If the source is a UDTF find columns used in the UTDF to generate the table
167
- + # If the source is a UDTF find columns used in the UDTF to generate the table
166
+ # If the source is a UDTF find columns used in the UDTF to generate the table
168
167
  + source = scope.expression
169
168
  if isinstance(source, exp.UDTF):
170
169
  - source_columns |= set(source.find_all(exp.Column))
@@ -1753,8 +1753,9 @@ class SqlParsingAggregator(Closeable):
1753
1753
  operationType=operation_type,
1754
1754
  lastUpdatedTimestamp=make_ts_millis(query.latest_timestamp),
1755
1755
  actor=query.actor.urn() if query.actor else None,
1756
- customProperties=(
1757
- {"query_urn": self._query_urn(query_id)}
1756
+ sourceType=models.OperationSourceTypeClass.DATA_PLATFORM,
1757
+ queries=(
1758
+ [self._query_urn(query_id)]
1758
1759
  if self.can_generate_query(query_id)
1759
1760
  else None
1760
1761
  ),
@@ -10,11 +10,6 @@ from typing import (
10
10
  Union,
11
11
  )
12
12
 
13
- import requests
14
-
15
- from datahub.configuration.common import (
16
- ConfigurationError,
17
- )
18
13
  from datahub.telemetry.telemetry import suppress_telemetry
19
14
 
20
15
  logger = logging.getLogger(__name__)
@@ -55,66 +50,9 @@ class RestServiceConfig:
55
50
  A class to represent REST service configuration with semantic version parsing capabilities.
56
51
  """
57
52
 
58
- session: Optional[requests.Session] = None
59
- url: Optional[str] = None
60
53
  raw_config: Dict[str, Any] = field(default_factory=dict)
61
54
  _version_cache: Optional[Tuple[int, int, int, int]] = None
62
55
 
63
- def fetch_config(self) -> Dict[str, Any]:
64
- """
65
- Fetch configuration from the server if not already loaded.
66
-
67
- Returns:
68
- The configuration dictionary
69
-
70
- Raises:
71
- ConfigurationError: If there's an error fetching or validating the configuration
72
- """
73
- if not self.raw_config:
74
- if self.session is None or self.url is None:
75
- raise ConfigurationError(
76
- "Session and URL are required to load configuration"
77
- )
78
-
79
- response = self.session.get(self.url)
80
-
81
- if response.status_code == 200:
82
- config = response.json()
83
-
84
- # Validate that we're connected to the correct service
85
- if config.get("noCode") == "true":
86
- self.raw_config = config
87
- else:
88
- raise ConfigurationError(
89
- "You seem to have connected to the frontend service instead of the GMS endpoint. "
90
- "The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms). "
91
- "For Acryl users, the endpoint should be https://<name>.acryl.io/gms"
92
- )
93
- else:
94
- logger.debug(
95
- f"Unable to connect to {self.url} with status_code: {response.status_code}. Response: {response.text}"
96
- )
97
-
98
- if response.status_code == 401:
99
- message = f"Unable to connect to {self.url} - got an authentication error: {response.text}."
100
- else:
101
- message = f"Unable to connect to {self.url} with status_code: {response.status_code}."
102
-
103
- message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
104
- raise ConfigurationError(message)
105
-
106
- return self.raw_config
107
-
108
- @property
109
- def config(self) -> Dict[str, Any]:
110
- """
111
- Get the full configuration dictionary, loading it if necessary.
112
-
113
- Returns:
114
- The configuration dictionary
115
- """
116
- return self.fetch_config()
117
-
118
56
  @property
119
57
  def commit_hash(self) -> Optional[str]:
120
58
  """
@@ -123,7 +61,7 @@ class RestServiceConfig:
123
61
  Returns:
124
62
  The commit hash or None if not found
125
63
  """
126
- versions = self.config.get("versions") or {}
64
+ versions = self.raw_config.get("versions") or {}
127
65
  datahub_info = versions.get("acryldata/datahub") or {}
128
66
  return datahub_info.get("commit")
129
67
 
@@ -135,7 +73,7 @@ class RestServiceConfig:
135
73
  Returns:
136
74
  The server type or "unknown" if not found
137
75
  """
138
- datahub = self.config.get("datahub") or {}
76
+ datahub = self.raw_config.get("datahub") or {}
139
77
  return datahub.get("serverType", "unknown")
140
78
 
141
79
  @property
@@ -146,8 +84,7 @@ class RestServiceConfig:
146
84
  Returns:
147
85
  The version string or None if not found
148
86
  """
149
- config = self.fetch_config()
150
- versions = config.get("versions") or {}
87
+ versions = self.raw_config.get("versions") or {}
151
88
  datahub_info = versions.get("acryldata/datahub") or {}
152
89
  return datahub_info.get("version")
153
90
 
@@ -233,7 +170,7 @@ class RestServiceConfig:
233
170
  Returns:
234
171
  True if noCode is set to "true"
235
172
  """
236
- return self.config.get("noCode") == "true"
173
+ return self.raw_config.get("noCode") == "true"
237
174
 
238
175
  @property
239
176
  def is_managed_ingestion_enabled(self) -> bool:
@@ -243,7 +180,7 @@ class RestServiceConfig:
243
180
  Returns:
244
181
  True if managedIngestion.enabled is True
245
182
  """
246
- managed_ingestion = self.config.get("managedIngestion") or {}
183
+ managed_ingestion = self.raw_config.get("managedIngestion") or {}
247
184
  return managed_ingestion.get("enabled", False)
248
185
 
249
186
  @property
@@ -254,7 +191,7 @@ class RestServiceConfig:
254
191
  Returns:
255
192
  True if the server environment is not 'core'
256
193
  """
257
- datahub_config = self.config.get("datahub") or {}
194
+ datahub_config = self.raw_config.get("datahub") or {}
258
195
  server_env = datahub_config.get("serverEnv")
259
196
 
260
197
  # Return False if serverEnv is None or empty string
@@ -277,18 +214,20 @@ class RestServiceConfig:
277
214
  # Special handling for features that rely on config flags
278
215
  config_based_features = {
279
216
  ServiceFeature.NO_CODE: lambda: self.is_no_code_enabled,
280
- ServiceFeature.STATEFUL_INGESTION: lambda: self.config.get(
217
+ ServiceFeature.STATEFUL_INGESTION: lambda: self.raw_config.get(
281
218
  "statefulIngestionCapable", False
282
219
  )
283
220
  is True,
284
- ServiceFeature.IMPACT_ANALYSIS: lambda: self.config.get(
221
+ ServiceFeature.IMPACT_ANALYSIS: lambda: self.raw_config.get(
285
222
  "supportsImpactAnalysis", False
286
223
  )
287
224
  is True,
288
- ServiceFeature.PATCH_CAPABLE: lambda: self.config.get("patchCapable", False)
225
+ ServiceFeature.PATCH_CAPABLE: lambda: self.raw_config.get(
226
+ "patchCapable", False
227
+ )
289
228
  is True,
290
229
  ServiceFeature.CLI_TELEMETRY: lambda: (
291
- self.config.get("telemetry") or {}
230
+ self.raw_config.get("telemetry") or {}
292
231
  ).get("enabledCli", None),
293
232
  ServiceFeature.DATAHUB_CLOUD: lambda: self.is_datahub_cloud,
294
233
  }
@@ -332,7 +271,7 @@ class RestServiceConfig:
332
271
  Returns:
333
272
  A string representation of the configuration dictionary
334
273
  """
335
- return str(self.config)
274
+ return str(self.raw_config)
336
275
 
337
276
  def __repr__(self) -> str:
338
277
  """
@@ -341,7 +280,7 @@ class RestServiceConfig:
341
280
  Returns:
342
281
  A string representation that can be used with pprint
343
282
  """
344
- return str(self.config)
283
+ return str(self.raw_config)
345
284
 
346
285
 
347
286
  def set_gms_config(config: Union[Dict[str, Any], RestServiceConfig]) -> None: