acryl-datahub 0.15.0rc1__py3-none-any.whl → 0.15.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=MoEVuYLqJQKtHGXciQ4YfjAktrZuIswXVPeaXMasj-8,574
1
+ datahub/__init__.py,sha256=bjNaEX_th93Zf5oFOCSAaZbNy-DUb3y4CxlHrAvZOkI,574
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -162,7 +162,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
162
162
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
163
163
  datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
164
164
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
- datahub/ingestion/graph/client.py,sha256=-SpQq0zWJ9hoeG9YhWUVZgPB97DD78AsgFJgpOsdAZ0,64476
165
+ datahub/ingestion/graph/client.py,sha256=oBlM6RSo0SPFJ-yit2eFFOB3rOpnjKtQ83YNiWGd334,64584
166
166
  datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
167
167
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
168
168
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
@@ -172,7 +172,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
172
172
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
173
173
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
174
  datahub/ingestion/run/connection.py,sha256=dqS9Fp8byIJNydPmVgtjjjlPJguuUWuMuvGnpNbQdSs,1474
175
- datahub/ingestion/run/pipeline.py,sha256=QV1i1TWCIH9gBDGe8Xs0JEbOqEUmWbhUhfx7gvrR7vc,30548
175
+ datahub/ingestion/run/pipeline.py,sha256=8MNUC19h7AvxjlDJj3E_FZlY56SAUlYG0heIko2XK_g,30572
176
176
  datahub/ingestion/run/pipeline_config.py,sha256=91Uvs76EGbCzZZbm819TT0L6pixf2tfI2_nHpnCoyS4,3948
177
177
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
178
178
  datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvSc7YOgY,557
@@ -300,8 +300,8 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
300
300
  datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
301
301
  datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
302
302
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
303
- datahub/ingestion/source/gc/datahub_gc.py,sha256=p1LiiZJDMaEjWuhnT5t83ALWDEHcPqmoZX64fCBGYmQ,11645
304
- datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=Y4KluNVVSdUbE85jPba8oc_EKm8WmKJrIbAuTPnSzx0,14301
303
+ datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
304
+ datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=IEEHO6UvDWWK3W5siqFrk4J1zUKbL6TrKNUaXdNiEW4,14362
305
305
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
306
306
  datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=qLgdr-Rrsba0z_Y-CaHT9d1zSgy2jzg6CXaCKoN2jFk,7360
307
307
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -872,7 +872,7 @@ datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG
872
872
  datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
873
873
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
874
874
  datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
875
- datahub/telemetry/telemetry.py,sha256=xXtvMVkX1YNu4Z0NUZnDAiIYP6c9mFujbGpd1wk3bgM,14763
875
+ datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
876
876
  datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,272
877
877
  datahub/testing/check_imports.py,sha256=EKuJmgUA46uOrlaOy0fCvPB7j9POkpJ0ExhO_pT3YAk,1356
878
878
  datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6mXiXmcJNT_RzM,2671
@@ -971,8 +971,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
971
971
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
972
972
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
973
973
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
974
- acryl_datahub-0.15.0rc1.dist-info/METADATA,sha256=bar0qt0g6b-U-fWNB2iUu0HtuFOWGNhm_8lrnyK7iSg,171129
975
- acryl_datahub-0.15.0rc1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
976
- acryl_datahub-0.15.0rc1.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
977
- acryl_datahub-0.15.0rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
978
- acryl_datahub-0.15.0rc1.dist-info/RECORD,,
974
+ acryl_datahub-0.15.0rc3.dist-info/METADATA,sha256=l71CWZc9fD8L_4p0NFRk9ruU6w4QEZgGJq3RObW_ljA,171129
975
+ acryl_datahub-0.15.0rc3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
976
+ acryl_datahub-0.15.0rc3.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
977
+ acryl_datahub-0.15.0rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
978
+ acryl_datahub-0.15.0rc3.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc1"
6
+ __version__ = "0.15.0rc3"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -67,6 +67,7 @@ from datahub.metadata.schema_classes import (
67
67
  SystemMetadataClass,
68
68
  TelemetryClientIdClass,
69
69
  )
70
+ from datahub.telemetry.telemetry import telemetry_instance
70
71
  from datahub.utilities.perf_timer import PerfTimer
71
72
  from datahub.utilities.str_enum import StrEnum
72
73
  from datahub.utilities.urns.urn import Urn, guess_entity_type
@@ -1819,4 +1820,5 @@ def get_default_graph() -> DataHubGraph:
1819
1820
  graph_config = config_utils.load_client_config()
1820
1821
  graph = DataHubGraph(graph_config)
1821
1822
  graph.test_connection()
1823
+ telemetry_instance.set_context(server=graph)
1822
1824
  return graph
@@ -44,7 +44,8 @@ from datahub.ingestion.transformer.system_metadata_transformer import (
44
44
  )
45
45
  from datahub.ingestion.transformer.transform_registry import transform_registry
46
46
  from datahub.metadata.schema_classes import MetadataChangeProposalClass
47
- from datahub.telemetry import stats, telemetry
47
+ from datahub.telemetry import stats
48
+ from datahub.telemetry.telemetry import telemetry_instance
48
49
  from datahub.utilities._custom_package_loader import model_version_name
49
50
  from datahub.utilities.global_warning_util import (
50
51
  clear_global_warnings,
@@ -273,8 +274,9 @@ class Pipeline:
273
274
  if self.graph is None and isinstance(self.sink, DatahubRestSink):
274
275
  with _add_init_error_context("setup default datahub client"):
275
276
  self.graph = self.sink.emitter.to_graph()
277
+ self.graph.test_connection()
276
278
  self.ctx.graph = self.graph
277
- telemetry.telemetry_instance.update_capture_exception_context(server=self.graph)
279
+ telemetry_instance.set_context(server=self.graph)
278
280
 
279
281
  with set_graph_context(self.graph):
280
282
  with _add_init_error_context("configure reporters"):
@@ -615,7 +617,7 @@ class Pipeline:
615
617
  sink_warnings = len(self.sink.get_report().warnings)
616
618
  global_warnings = len(get_global_warnings())
617
619
 
618
- telemetry.telemetry_instance.ping(
620
+ telemetry_instance.ping(
619
621
  "ingest_stats",
620
622
  {
621
623
  "source_type": self.source_type,
@@ -637,7 +639,6 @@ class Pipeline:
637
639
  ),
638
640
  "has_pipeline_name": bool(self.config.pipeline_name),
639
641
  },
640
- self.ctx.graph,
641
642
  )
642
643
 
643
644
  def _approx_all_vals(self, d: LossyList[Any]) -> int:
@@ -144,15 +144,32 @@ class DataHubGcSource(Source):
144
144
  self,
145
145
  ) -> Iterable[MetadataWorkUnit]:
146
146
  if self.config.cleanup_expired_tokens:
147
- self.revoke_expired_tokens()
147
+ try:
148
+ self.revoke_expired_tokens()
149
+ except Exception as e:
150
+ self.report.failure("While trying to cleanup expired token ", exc=e)
148
151
  if self.config.truncate_indices:
149
- self.truncate_indices()
152
+ try:
153
+ self.truncate_indices()
154
+ except Exception as e:
155
+ self.report.failure("While trying to truncate indices ", exc=e)
150
156
  if self.dataprocess_cleanup:
151
- yield from self.dataprocess_cleanup.get_workunits_internal()
157
+ try:
158
+ yield from self.dataprocess_cleanup.get_workunits_internal()
159
+ except Exception as e:
160
+ self.report.failure("While trying to cleanup data process ", exc=e)
152
161
  if self.soft_deleted_entities_cleanup:
153
- self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
162
+ try:
163
+ self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
164
+ except Exception as e:
165
+ self.report.failure(
166
+ "While trying to cleanup soft deleted entities ", exc=e
167
+ )
154
168
  if self.execution_request_cleanup:
155
- self.execution_request_cleanup.run()
169
+ try:
170
+ self.execution_request_cleanup.run()
171
+ except Exception as e:
172
+ self.report.failure("While trying to cleanup execution request ", exc=e)
156
173
  yield from []
157
174
 
158
175
  def truncate_indices(self) -> None:
@@ -404,7 +404,9 @@ class DataProcessCleanup:
404
404
  try:
405
405
  self.delete_dpi_from_datajobs(datajob_entity)
406
406
  except Exception as e:
407
- logger.error(f"While trying to delete {datajob_entity} got {e}")
407
+ self.report.failure(
408
+ f"While trying to delete {datajob_entity} ", exc=e
409
+ )
408
410
  if (
409
411
  datajob_entity.total_runs == 0
410
412
  and self.config.delete_empty_data_jobs
@@ -7,7 +7,7 @@ import sys
7
7
  import uuid
8
8
  from functools import wraps
9
9
  from pathlib import Path
10
- from typing import Any, Callable, Dict, List, Optional, TypeVar
10
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar
11
11
 
12
12
  from mixpanel import Consumer, Mixpanel
13
13
  from typing_extensions import ParamSpec
@@ -16,10 +16,12 @@ import datahub as datahub_package
16
16
  from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
17
17
  from datahub.cli.env_utils import get_boolean_env_variable
18
18
  from datahub.configuration.common import ExceptionWithProps
19
- from datahub.ingestion.graph.client import DataHubGraph
20
19
  from datahub.metadata.schema_classes import _custom_package_path
21
20
  from datahub.utilities.perf_timer import PerfTimer
22
21
 
22
+ if TYPE_CHECKING:
23
+ from datahub.ingestion.graph.client import DataHubGraph
24
+
23
25
  logger = logging.getLogger(__name__)
24
26
 
25
27
  DATAHUB_FOLDER = Path(DATAHUB_ROOT_FOLDER)
@@ -117,7 +119,11 @@ class Telemetry:
117
119
  tracking_init: bool = False
118
120
  sentry_enabled: bool = False
119
121
 
122
+ context_properties: Dict[str, Any] = {}
123
+
120
124
  def __init__(self):
125
+ self.context_properties = {}
126
+
121
127
  if SENTRY_DSN:
122
128
  self.sentry_enabled = True
123
129
  try:
@@ -157,6 +163,9 @@ class Telemetry:
157
163
  except Exception as e:
158
164
  logger.debug(f"Error connecting to mixpanel: {e}")
159
165
 
166
+ # Initialize the default properties for all events.
167
+ self.set_context()
168
+
160
169
  def update_config(self) -> bool:
161
170
  """
162
171
  Update the config file with the current client ID and enabled status.
@@ -238,18 +247,22 @@ class Telemetry:
238
247
 
239
248
  return False
240
249
 
241
- def update_capture_exception_context(
250
+ def set_context(
242
251
  self,
243
- server: Optional[DataHubGraph] = None,
252
+ server: Optional["DataHubGraph"] = None,
244
253
  properties: Optional[Dict[str, Any]] = None,
245
254
  ) -> None:
255
+ self.context_properties = {
256
+ **self._server_props(server),
257
+ **(properties or {}),
258
+ }
259
+
246
260
  if self.sentry_enabled:
247
261
  from sentry_sdk import set_tag
248
262
 
249
263
  properties = {
250
264
  **_default_telemetry_properties(),
251
- **self._server_props(server),
252
- **(properties or {}),
265
+ **self.context_properties,
253
266
  }
254
267
 
255
268
  for key in properties:
@@ -297,7 +310,6 @@ class Telemetry:
297
310
  self,
298
311
  event_name: str,
299
312
  properties: Optional[Dict[str, Any]] = None,
300
- server: Optional[DataHubGraph] = None,
301
313
  ) -> None:
302
314
  """
303
315
  Send a single telemetry event.
@@ -323,14 +335,15 @@ class Telemetry:
323
335
 
324
336
  properties = {
325
337
  **_default_telemetry_properties(),
326
- **self._server_props(server),
338
+ **self.context_properties,
327
339
  **properties,
328
340
  }
329
341
  self.mp.track(self.client_id, event_name, properties)
330
342
  except Exception as e:
331
343
  logger.debug(f"Error reporting telemetry: {e}")
332
344
 
333
- def _server_props(self, server: Optional[DataHubGraph]) -> Dict[str, str]:
345
+ @classmethod
346
+ def _server_props(cls, server: Optional["DataHubGraph"]) -> Dict[str, str]:
334
347
  if not server:
335
348
  return {
336
349
  "server_type": "n/a",
@@ -435,6 +448,7 @@ def with_telemetry(
435
448
  **call_props,
436
449
  "status": "error",
437
450
  **_error_props(e),
451
+ "code": e.code,
438
452
  },
439
453
  )
440
454
  telemetry_instance.capture_exception(e)