acryl-datahub 1.1.0.5rc9__py3-none-any.whl → 1.1.0.5rc10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show
  1. {acryl_datahub-1.1.0.5rc9.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/METADATA +2647 -2647
  2. {acryl_datahub-1.1.0.5rc9.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/RECORD +45 -45
  3. datahub/_version.py +1 -1
  4. datahub/cli/check_cli.py +45 -1
  5. datahub/cli/cli_utils.py +0 -10
  6. datahub/cli/container_cli.py +5 -0
  7. datahub/cli/delete_cli.py +5 -0
  8. datahub/cli/docker_cli.py +2 -0
  9. datahub/cli/exists_cli.py +2 -0
  10. datahub/cli/get_cli.py +2 -0
  11. datahub/cli/iceberg_cli.py +5 -0
  12. datahub/cli/ingest_cli.py +7 -0
  13. datahub/cli/migrate.py +2 -0
  14. datahub/cli/put_cli.py +3 -0
  15. datahub/cli/specific/assertions_cli.py +2 -0
  16. datahub/cli/specific/datacontract_cli.py +3 -0
  17. datahub/cli/specific/dataproduct_cli.py +11 -0
  18. datahub/cli/specific/dataset_cli.py +4 -0
  19. datahub/cli/specific/forms_cli.py +2 -0
  20. datahub/cli/specific/group_cli.py +2 -0
  21. datahub/cli/specific/structuredproperties_cli.py +4 -0
  22. datahub/cli/specific/user_cli.py +2 -0
  23. datahub/cli/state_cli.py +2 -0
  24. datahub/cli/timeline_cli.py +2 -0
  25. datahub/emitter/rest_emitter.py +24 -8
  26. datahub/ingestion/api/report.py +72 -12
  27. datahub/ingestion/autogenerated/capability_summary.json +19 -1
  28. datahub/ingestion/autogenerated/lineage_helper.py +101 -19
  29. datahub/ingestion/source/common/subtypes.py +2 -0
  30. datahub/ingestion/source/dremio/dremio_api.py +38 -27
  31. datahub/ingestion/source/mlflow.py +11 -1
  32. datahub/ingestion/source/snowflake/snowflake_queries.py +127 -0
  33. datahub/ingestion/source/tableau/tableau.py +11 -2
  34. datahub/ingestion/source/tableau/tableau_constant.py +0 -2
  35. datahub/metadata/_internal_schema_classes.py +528 -529
  36. datahub/metadata/_urns/urn_defs.py +1803 -1803
  37. datahub/metadata/schema.avsc +16720 -17109
  38. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +1 -3
  39. datahub/sdk/main_client.py +14 -2
  40. datahub/sdk/search_client.py +4 -3
  41. datahub/telemetry/telemetry.py +17 -11
  42. {acryl_datahub-1.1.0.5rc9.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/WHEEL +0 -0
  43. {acryl_datahub-1.1.0.5rc9.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/entry_points.txt +0 -0
  44. {acryl_datahub-1.1.0.5rc9.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/licenses/LICENSE +0 -0
  45. {acryl_datahub-1.1.0.5rc9.dist-info → acryl_datahub-1.1.0.5rc10.dist-info}/top_level.txt +0 -0
@@ -23,8 +23,7 @@
23
23
  "HIERARCHY": "A module displaying a hierarchy to navigate",
24
24
  "LINK": "Link type module",
25
25
  "OWNED_ASSETS": "Module displaying assets owned by a user",
26
- "RICH_TEXT": "Module containing rich text to be rendered",
27
- "SUBSCRIBED_ASSETS": "Module displaying assets subscribed to by a given user"
26
+ "RICH_TEXT": "Module containing rich text to be rendered"
28
27
  },
29
28
  "name": "DataHubPageModuleType",
30
29
  "namespace": "com.linkedin.pegasus2avro.module",
@@ -34,7 +33,6 @@
34
33
  "ASSET_COLLECTION",
35
34
  "HIERARCHY",
36
35
  "OWNED_ASSETS",
37
- "SUBSCRIBED_ASSETS",
38
36
  "DOMAINS"
39
37
  ],
40
38
  "doc": "Enum containing the types of page modules that there are"
@@ -66,7 +66,12 @@ class DataHubClient:
66
66
  self._graph.test_connection()
67
67
 
68
68
  @classmethod
69
- def from_env(cls) -> "DataHubClient":
69
+ def from_env(
70
+ cls,
71
+ *,
72
+ client_mode: ClientMode = ClientMode.SDK,
73
+ datahub_component: Optional[str] = None,
74
+ ) -> "DataHubClient":
70
75
  """Initialize a DataHubClient from the environment variables or ~/.datahubenv file.
71
76
 
72
77
  This will first check DATAHUB_GMS_URL and DATAHUB_GMS_TOKEN. If not present,
@@ -76,6 +81,10 @@ class DataHubClient:
76
81
  If you're looking to specify the server/token in code, use the
77
82
  DataHubClient(server=..., token=...) constructor instead.
78
83
 
84
+ Args:
85
+ client_mode: [internal] The client mode to use. Defaults to "SDK".
86
+ datahub_component: [internal] The DataHub component name to include in the user agent.
87
+
79
88
  Returns:
80
89
  A DataHubClient instance.
81
90
  """
@@ -83,7 +92,10 @@ class DataHubClient:
83
92
  # Inspired by the DockerClient.from_env() method.
84
93
  # TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
85
94
  # That file is part of the "environment", but is not a traditional "env variable".
86
- graph = get_default_graph(ClientMode.SDK)
95
+ graph = get_default_graph(
96
+ client_mode=client_mode,
97
+ datahub_component=datahub_component,
98
+ )
87
99
 
88
100
  return cls(graph=graph)
89
101
 
@@ -19,6 +19,7 @@ from datahub.sdk.search_filters import (
19
19
  _OrFilters,
20
20
  _StatusFilter,
21
21
  )
22
+ from datahub.utilities.ordered_set import OrderedSet
22
23
 
23
24
  if TYPE_CHECKING:
24
25
  from datahub.sdk.main_client import DataHubClient
@@ -80,7 +81,7 @@ def compute_entity_types(
80
81
  ) -> Optional[List[str]]:
81
82
  found_filters = False
82
83
  found_positive_filters = False
83
- entity_types: List[str] = []
84
+ entity_types: OrderedSet[str] = OrderedSet()
84
85
  for ands in filters:
85
86
  for clause in ands["and"]:
86
87
  if clause.field == _EntityTypeFilter.ENTITY_TYPE_FIELD:
@@ -88,7 +89,7 @@ def compute_entity_types(
88
89
  if not clause.negated:
89
90
  found_positive_filters = True
90
91
 
91
- entity_types.extend(clause.values)
92
+ entity_types.update(clause.values)
92
93
 
93
94
  if not found_filters:
94
95
  # If we didn't find any filters, use None so we use the default set.
@@ -100,7 +101,7 @@ def compute_entity_types(
100
101
  # still want to use the default set.
101
102
  return None
102
103
 
103
- return entity_types
104
+ return list(entity_types)
104
105
 
105
106
 
106
107
  class SearchClient:
@@ -104,7 +104,7 @@ SENTRY_DSN: Optional[str] = os.environ.get("SENTRY_DSN", None)
104
104
  SENTRY_ENVIRONMENT: str = os.environ.get("SENTRY_ENVIRONMENT", "dev")
105
105
 
106
106
 
107
- def _default_telemetry_properties() -> Dict[str, Any]:
107
+ def _default_global_properties() -> Dict[str, Any]:
108
108
  return {
109
109
  "datahub_version": nice_version_name(),
110
110
  "python_version": platform.python_version(),
@@ -122,6 +122,7 @@ class Telemetry:
122
122
  context_properties: Dict[str, Any] = {}
123
123
 
124
124
  def __init__(self):
125
+ self.global_properties = _default_global_properties()
125
126
  self.context_properties = {}
126
127
 
127
128
  if SENTRY_DSN:
@@ -247,6 +248,10 @@ class Telemetry:
247
248
 
248
249
  return False
249
250
 
251
+ def add_global_property(self, key: str, value: Any) -> None:
252
+ self.global_properties[key] = value
253
+ self._update_sentry_properties()
254
+
250
255
  def set_context(
251
256
  self,
252
257
  server: Optional["DataHubGraph"] = None,
@@ -257,16 +262,17 @@ class Telemetry:
257
262
  **(properties or {}),
258
263
  }
259
264
 
260
- if self.sentry_enabled:
261
- from sentry_sdk import set_tag
265
+ self._update_sentry_properties()
262
266
 
263
- properties = {
264
- **_default_telemetry_properties(),
265
- **self.context_properties,
266
- }
267
+ def _update_sentry_properties(self) -> None:
268
+ properties = {
269
+ **self.global_properties,
270
+ **self.context_properties,
271
+ }
272
+ if self.sentry_enabled:
273
+ import sentry_sdk
267
274
 
268
- for key in properties:
269
- set_tag(key, properties[key])
275
+ sentry_sdk.set_tags(properties)
270
276
 
271
277
  def init_capture_exception(self) -> None:
272
278
  if self.sentry_enabled:
@@ -300,7 +306,7 @@ class Telemetry:
300
306
  try:
301
307
  self.mp.people_set(
302
308
  self.client_id,
303
- _default_telemetry_properties(),
309
+ self.global_properties,
304
310
  )
305
311
  except Exception as e:
306
312
  logger.debug(f"Error initializing telemetry: {e}")
@@ -334,7 +340,7 @@ class Telemetry:
334
340
  logger.debug(f"Sending telemetry for {event_name}")
335
341
 
336
342
  properties = {
337
- **_default_telemetry_properties(),
343
+ **self.global_properties,
338
344
  **self.context_properties,
339
345
  **properties,
340
346
  }