acryl-datahub 0.15.0rc2__py3-none-any.whl → 0.15.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc3.dist-info}/METADATA +2440 -2440
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc3.dist-info}/RECORD +11 -11
- datahub/__init__.py +1 -1
- datahub/ingestion/graph/client.py +2 -0
- datahub/ingestion/run/pipeline.py +5 -4
- datahub/ingestion/source/gc/datahub_gc.py +22 -5
- datahub/ingestion/source/gc/dataprocess_cleanup.py +3 -1
- datahub/telemetry/telemetry.py +23 -9
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc3.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=bjNaEX_th93Zf5oFOCSAaZbNy-DUb3y4CxlHrAvZOkI,574
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -162,7 +162,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
162
162
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
163
163
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
|
|
164
164
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
|
-
datahub/ingestion/graph/client.py,sha256
|
|
165
|
+
datahub/ingestion/graph/client.py,sha256=oBlM6RSo0SPFJ-yit2eFFOB3rOpnjKtQ83YNiWGd334,64584
|
|
166
166
|
datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
|
|
167
167
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
168
168
|
datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
|
|
@@ -172,7 +172,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
|
|
|
172
172
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
173
173
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
174
|
datahub/ingestion/run/connection.py,sha256=dqS9Fp8byIJNydPmVgtjjjlPJguuUWuMuvGnpNbQdSs,1474
|
|
175
|
-
datahub/ingestion/run/pipeline.py,sha256=
|
|
175
|
+
datahub/ingestion/run/pipeline.py,sha256=8MNUC19h7AvxjlDJj3E_FZlY56SAUlYG0heIko2XK_g,30572
|
|
176
176
|
datahub/ingestion/run/pipeline_config.py,sha256=91Uvs76EGbCzZZbm819TT0L6pixf2tfI2_nHpnCoyS4,3948
|
|
177
177
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
178
|
datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvSc7YOgY,557
|
|
@@ -300,8 +300,8 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
|
|
|
300
300
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
|
|
301
301
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
302
302
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
|
-
datahub/ingestion/source/gc/datahub_gc.py,sha256=
|
|
304
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
303
|
+
datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
|
|
304
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=IEEHO6UvDWWK3W5siqFrk4J1zUKbL6TrKNUaXdNiEW4,14362
|
|
305
305
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
306
306
|
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=qLgdr-Rrsba0z_Y-CaHT9d1zSgy2jzg6CXaCKoN2jFk,7360
|
|
307
307
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -872,7 +872,7 @@ datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG
|
|
|
872
872
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
|
|
873
873
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
874
874
|
datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
|
|
875
|
-
datahub/telemetry/telemetry.py,sha256=
|
|
875
|
+
datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
|
|
876
876
|
datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,272
|
|
877
877
|
datahub/testing/check_imports.py,sha256=EKuJmgUA46uOrlaOy0fCvPB7j9POkpJ0ExhO_pT3YAk,1356
|
|
878
878
|
datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6mXiXmcJNT_RzM,2671
|
|
@@ -971,8 +971,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
971
971
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
972
972
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
973
973
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
974
|
-
acryl_datahub-0.15.
|
|
975
|
-
acryl_datahub-0.15.
|
|
976
|
-
acryl_datahub-0.15.
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
974
|
+
acryl_datahub-0.15.0rc3.dist-info/METADATA,sha256=l71CWZc9fD8L_4p0NFRk9ruU6w4QEZgGJq3RObW_ljA,171129
|
|
975
|
+
acryl_datahub-0.15.0rc3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
976
|
+
acryl_datahub-0.15.0rc3.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
|
|
977
|
+
acryl_datahub-0.15.0rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
978
|
+
acryl_datahub-0.15.0rc3.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -67,6 +67,7 @@ from datahub.metadata.schema_classes import (
|
|
|
67
67
|
SystemMetadataClass,
|
|
68
68
|
TelemetryClientIdClass,
|
|
69
69
|
)
|
|
70
|
+
from datahub.telemetry.telemetry import telemetry_instance
|
|
70
71
|
from datahub.utilities.perf_timer import PerfTimer
|
|
71
72
|
from datahub.utilities.str_enum import StrEnum
|
|
72
73
|
from datahub.utilities.urns.urn import Urn, guess_entity_type
|
|
@@ -1819,4 +1820,5 @@ def get_default_graph() -> DataHubGraph:
|
|
|
1819
1820
|
graph_config = config_utils.load_client_config()
|
|
1820
1821
|
graph = DataHubGraph(graph_config)
|
|
1821
1822
|
graph.test_connection()
|
|
1823
|
+
telemetry_instance.set_context(server=graph)
|
|
1822
1824
|
return graph
|
|
@@ -44,7 +44,8 @@ from datahub.ingestion.transformer.system_metadata_transformer import (
|
|
|
44
44
|
)
|
|
45
45
|
from datahub.ingestion.transformer.transform_registry import transform_registry
|
|
46
46
|
from datahub.metadata.schema_classes import MetadataChangeProposalClass
|
|
47
|
-
from datahub.telemetry import stats
|
|
47
|
+
from datahub.telemetry import stats
|
|
48
|
+
from datahub.telemetry.telemetry import telemetry_instance
|
|
48
49
|
from datahub.utilities._custom_package_loader import model_version_name
|
|
49
50
|
from datahub.utilities.global_warning_util import (
|
|
50
51
|
clear_global_warnings,
|
|
@@ -273,8 +274,9 @@ class Pipeline:
|
|
|
273
274
|
if self.graph is None and isinstance(self.sink, DatahubRestSink):
|
|
274
275
|
with _add_init_error_context("setup default datahub client"):
|
|
275
276
|
self.graph = self.sink.emitter.to_graph()
|
|
277
|
+
self.graph.test_connection()
|
|
276
278
|
self.ctx.graph = self.graph
|
|
277
|
-
|
|
279
|
+
telemetry_instance.set_context(server=self.graph)
|
|
278
280
|
|
|
279
281
|
with set_graph_context(self.graph):
|
|
280
282
|
with _add_init_error_context("configure reporters"):
|
|
@@ -615,7 +617,7 @@ class Pipeline:
|
|
|
615
617
|
sink_warnings = len(self.sink.get_report().warnings)
|
|
616
618
|
global_warnings = len(get_global_warnings())
|
|
617
619
|
|
|
618
|
-
|
|
620
|
+
telemetry_instance.ping(
|
|
619
621
|
"ingest_stats",
|
|
620
622
|
{
|
|
621
623
|
"source_type": self.source_type,
|
|
@@ -637,7 +639,6 @@ class Pipeline:
|
|
|
637
639
|
),
|
|
638
640
|
"has_pipeline_name": bool(self.config.pipeline_name),
|
|
639
641
|
},
|
|
640
|
-
self.ctx.graph,
|
|
641
642
|
)
|
|
642
643
|
|
|
643
644
|
def _approx_all_vals(self, d: LossyList[Any]) -> int:
|
|
@@ -144,15 +144,32 @@ class DataHubGcSource(Source):
|
|
|
144
144
|
self,
|
|
145
145
|
) -> Iterable[MetadataWorkUnit]:
|
|
146
146
|
if self.config.cleanup_expired_tokens:
|
|
147
|
-
|
|
147
|
+
try:
|
|
148
|
+
self.revoke_expired_tokens()
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.report.failure("While trying to cleanup expired token ", exc=e)
|
|
148
151
|
if self.config.truncate_indices:
|
|
149
|
-
|
|
152
|
+
try:
|
|
153
|
+
self.truncate_indices()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
self.report.failure("While trying to truncate indices ", exc=e)
|
|
150
156
|
if self.dataprocess_cleanup:
|
|
151
|
-
|
|
157
|
+
try:
|
|
158
|
+
yield from self.dataprocess_cleanup.get_workunits_internal()
|
|
159
|
+
except Exception as e:
|
|
160
|
+
self.report.failure("While trying to cleanup data process ", exc=e)
|
|
152
161
|
if self.soft_deleted_entities_cleanup:
|
|
153
|
-
|
|
162
|
+
try:
|
|
163
|
+
self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
|
|
164
|
+
except Exception as e:
|
|
165
|
+
self.report.failure(
|
|
166
|
+
"While trying to cleanup soft deleted entities ", exc=e
|
|
167
|
+
)
|
|
154
168
|
if self.execution_request_cleanup:
|
|
155
|
-
|
|
169
|
+
try:
|
|
170
|
+
self.execution_request_cleanup.run()
|
|
171
|
+
except Exception as e:
|
|
172
|
+
self.report.failure("While trying to cleanup execution request ", exc=e)
|
|
156
173
|
yield from []
|
|
157
174
|
|
|
158
175
|
def truncate_indices(self) -> None:
|
|
@@ -404,7 +404,9 @@ class DataProcessCleanup:
|
|
|
404
404
|
try:
|
|
405
405
|
self.delete_dpi_from_datajobs(datajob_entity)
|
|
406
406
|
except Exception as e:
|
|
407
|
-
|
|
407
|
+
self.report.failure(
|
|
408
|
+
f"While trying to delete {datajob_entity} ", exc=e
|
|
409
|
+
)
|
|
408
410
|
if (
|
|
409
411
|
datajob_entity.total_runs == 0
|
|
410
412
|
and self.config.delete_empty_data_jobs
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -7,7 +7,7 @@ import sys
|
|
|
7
7
|
import uuid
|
|
8
8
|
from functools import wraps
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar
|
|
11
11
|
|
|
12
12
|
from mixpanel import Consumer, Mixpanel
|
|
13
13
|
from typing_extensions import ParamSpec
|
|
@@ -16,10 +16,12 @@ import datahub as datahub_package
|
|
|
16
16
|
from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
|
|
17
17
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
18
18
|
from datahub.configuration.common import ExceptionWithProps
|
|
19
|
-
from datahub.ingestion.graph.client import DataHubGraph
|
|
20
19
|
from datahub.metadata.schema_classes import _custom_package_path
|
|
21
20
|
from datahub.utilities.perf_timer import PerfTimer
|
|
22
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
24
|
+
|
|
23
25
|
logger = logging.getLogger(__name__)
|
|
24
26
|
|
|
25
27
|
DATAHUB_FOLDER = Path(DATAHUB_ROOT_FOLDER)
|
|
@@ -117,7 +119,11 @@ class Telemetry:
|
|
|
117
119
|
tracking_init: bool = False
|
|
118
120
|
sentry_enabled: bool = False
|
|
119
121
|
|
|
122
|
+
context_properties: Dict[str, Any] = {}
|
|
123
|
+
|
|
120
124
|
def __init__(self):
|
|
125
|
+
self.context_properties = {}
|
|
126
|
+
|
|
121
127
|
if SENTRY_DSN:
|
|
122
128
|
self.sentry_enabled = True
|
|
123
129
|
try:
|
|
@@ -157,6 +163,9 @@ class Telemetry:
|
|
|
157
163
|
except Exception as e:
|
|
158
164
|
logger.debug(f"Error connecting to mixpanel: {e}")
|
|
159
165
|
|
|
166
|
+
# Initialize the default properties for all events.
|
|
167
|
+
self.set_context()
|
|
168
|
+
|
|
160
169
|
def update_config(self) -> bool:
|
|
161
170
|
"""
|
|
162
171
|
Update the config file with the current client ID and enabled status.
|
|
@@ -238,18 +247,22 @@ class Telemetry:
|
|
|
238
247
|
|
|
239
248
|
return False
|
|
240
249
|
|
|
241
|
-
def
|
|
250
|
+
def set_context(
|
|
242
251
|
self,
|
|
243
|
-
server: Optional[DataHubGraph] = None,
|
|
252
|
+
server: Optional["DataHubGraph"] = None,
|
|
244
253
|
properties: Optional[Dict[str, Any]] = None,
|
|
245
254
|
) -> None:
|
|
255
|
+
self.context_properties = {
|
|
256
|
+
**self._server_props(server),
|
|
257
|
+
**(properties or {}),
|
|
258
|
+
}
|
|
259
|
+
|
|
246
260
|
if self.sentry_enabled:
|
|
247
261
|
from sentry_sdk import set_tag
|
|
248
262
|
|
|
249
263
|
properties = {
|
|
250
264
|
**_default_telemetry_properties(),
|
|
251
|
-
**self.
|
|
252
|
-
**(properties or {}),
|
|
265
|
+
**self.context_properties,
|
|
253
266
|
}
|
|
254
267
|
|
|
255
268
|
for key in properties:
|
|
@@ -297,7 +310,6 @@ class Telemetry:
|
|
|
297
310
|
self,
|
|
298
311
|
event_name: str,
|
|
299
312
|
properties: Optional[Dict[str, Any]] = None,
|
|
300
|
-
server: Optional[DataHubGraph] = None,
|
|
301
313
|
) -> None:
|
|
302
314
|
"""
|
|
303
315
|
Send a single telemetry event.
|
|
@@ -323,14 +335,15 @@ class Telemetry:
|
|
|
323
335
|
|
|
324
336
|
properties = {
|
|
325
337
|
**_default_telemetry_properties(),
|
|
326
|
-
**self.
|
|
338
|
+
**self.context_properties,
|
|
327
339
|
**properties,
|
|
328
340
|
}
|
|
329
341
|
self.mp.track(self.client_id, event_name, properties)
|
|
330
342
|
except Exception as e:
|
|
331
343
|
logger.debug(f"Error reporting telemetry: {e}")
|
|
332
344
|
|
|
333
|
-
|
|
345
|
+
@classmethod
|
|
346
|
+
def _server_props(cls, server: Optional["DataHubGraph"]) -> Dict[str, str]:
|
|
334
347
|
if not server:
|
|
335
348
|
return {
|
|
336
349
|
"server_type": "n/a",
|
|
@@ -435,6 +448,7 @@ def with_telemetry(
|
|
|
435
448
|
**call_props,
|
|
436
449
|
"status": "error",
|
|
437
450
|
**_error_props(e),
|
|
451
|
+
"code": e.code,
|
|
438
452
|
},
|
|
439
453
|
)
|
|
440
454
|
telemetry_instance.capture_exception(e)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|