acryl-datahub 0.15.0rc2__py3-none-any.whl → 0.15.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/METADATA +2390 -2390
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/RECORD +20 -19
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/entry_points.txt +1 -1
- datahub/__init__.py +1 -1
- datahub/ingestion/graph/client.py +2 -0
- datahub/ingestion/run/pipeline.py +5 -4
- datahub/ingestion/source/feast.py +97 -6
- datahub/ingestion/source/gc/datahub_gc.py +22 -5
- datahub/ingestion/source/gc/dataprocess_cleanup.py +3 -1
- datahub/ingestion/source/powerbi/__init__.py +0 -1
- datahub/ingestion/source/powerbi/config.py +3 -3
- datahub/ingestion/source/powerbi/m_query/data_classes.py +34 -2
- datahub/ingestion/source/powerbi/m_query/parser.py +6 -3
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +920 -0
- datahub/ingestion/source/powerbi/m_query/resolver.py +16 -938
- datahub/ingestion/source/powerbi/m_query/validator.py +9 -3
- datahub/ingestion/source/powerbi/powerbi.py +12 -6
- datahub/telemetry/telemetry.py +23 -9
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc2.dist-info → acryl_datahub-0.15.0rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Optional, Tuple
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
import datahub.ingestion.source.powerbi.m_query.data_classes
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
@@ -14,12 +14,18 @@ def validate_parse_tree(
|
|
|
14
14
|
:param native_query_enabled: Whether user want to extract lineage from native query
|
|
15
15
|
:return: True or False.
|
|
16
16
|
"""
|
|
17
|
-
function_names = [
|
|
17
|
+
function_names = [
|
|
18
|
+
fun.value
|
|
19
|
+
for fun in datahub.ingestion.source.powerbi.m_query.data_classes.FunctionName
|
|
20
|
+
]
|
|
18
21
|
if not any(fun in expression for fun in function_names):
|
|
19
22
|
return False, "DataAccess function is not present in M-Query expression."
|
|
20
23
|
|
|
21
24
|
if native_query_enabled is False:
|
|
22
|
-
if
|
|
25
|
+
if (
|
|
26
|
+
datahub.ingestion.source.powerbi.m_query.data_classes.FunctionName.NATIVE_QUERY.value
|
|
27
|
+
in function_names
|
|
28
|
+
):
|
|
23
29
|
return (
|
|
24
30
|
False,
|
|
25
31
|
"Lineage extraction from native query is disabled. Enable native_query_parsing in recipe",
|
|
@@ -10,6 +10,7 @@ from typing import Iterable, List, Optional, Tuple, Union
|
|
|
10
10
|
import more_itertools
|
|
11
11
|
|
|
12
12
|
import datahub.emitter.mce_builder as builder
|
|
13
|
+
import datahub.ingestion.source.powerbi.m_query.data_classes
|
|
13
14
|
import datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes as powerbi_data_classes
|
|
14
15
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
15
16
|
from datahub.emitter.mcp_builder import ContainerKey, gen_containers
|
|
@@ -42,12 +43,13 @@ from datahub.ingestion.source.powerbi.config import (
|
|
|
42
43
|
Constant,
|
|
43
44
|
PowerBiDashboardSourceConfig,
|
|
44
45
|
PowerBiDashboardSourceReport,
|
|
46
|
+
SupportedDataPlatform,
|
|
45
47
|
)
|
|
46
48
|
from datahub.ingestion.source.powerbi.dataplatform_instance_resolver import (
|
|
47
49
|
AbstractDataPlatformInstanceResolver,
|
|
48
50
|
create_dataplatform_instance_resolver,
|
|
49
51
|
)
|
|
50
|
-
from datahub.ingestion.source.powerbi.m_query import parser
|
|
52
|
+
from datahub.ingestion.source.powerbi.m_query import parser
|
|
51
53
|
from datahub.ingestion.source.powerbi.rest_api_wrapper.powerbi_api import PowerBiAPI
|
|
52
54
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
53
55
|
StaleEntityRemovalHandler,
|
|
@@ -182,7 +184,9 @@ class Mapper:
|
|
|
182
184
|
return [schema_mcp]
|
|
183
185
|
|
|
184
186
|
def make_fine_grained_lineage_class(
|
|
185
|
-
self,
|
|
187
|
+
self,
|
|
188
|
+
lineage: datahub.ingestion.source.powerbi.m_query.data_classes.Lineage,
|
|
189
|
+
dataset_urn: str,
|
|
186
190
|
) -> List[FineGrainedLineage]:
|
|
187
191
|
fine_grained_lineages: List[FineGrainedLineage] = []
|
|
188
192
|
|
|
@@ -234,7 +238,9 @@ class Mapper:
|
|
|
234
238
|
upstream: List[UpstreamClass] = []
|
|
235
239
|
cll_lineage: List[FineGrainedLineage] = []
|
|
236
240
|
|
|
237
|
-
upstream_lineage: List[
|
|
241
|
+
upstream_lineage: List[
|
|
242
|
+
datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
|
|
243
|
+
] = parser.get_upstream_tables(
|
|
238
244
|
table=table,
|
|
239
245
|
reporter=self.__reporter,
|
|
240
246
|
platform_instance_resolver=self.__dataplatform_instance_resolver,
|
|
@@ -1294,7 +1300,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1294
1300
|
def validate_dataset_type_mapping(self):
|
|
1295
1301
|
powerbi_data_platforms: List[str] = [
|
|
1296
1302
|
data_platform.value.powerbi_data_platform_name
|
|
1297
|
-
for data_platform in
|
|
1303
|
+
for data_platform in SupportedDataPlatform
|
|
1298
1304
|
]
|
|
1299
1305
|
|
|
1300
1306
|
for key in self.source_config.dataset_type_mapping.keys():
|
|
@@ -1481,7 +1487,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1481
1487
|
|
|
1482
1488
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
1483
1489
|
# As modified_workspaces is not idempotent, hence workunit processors are run later for each workspace_id
|
|
1484
|
-
# This will result in creating checkpoint for each workspace_id
|
|
1490
|
+
# This will result in creating a checkpoint for each workspace_id
|
|
1485
1491
|
if self.source_config.modified_since:
|
|
1486
1492
|
return [] # Handle these in get_workunits_internal
|
|
1487
1493
|
else:
|
|
@@ -1492,7 +1498,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1492
1498
|
|
|
1493
1499
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
1494
1500
|
"""
|
|
1495
|
-
Datahub Ingestion framework
|
|
1501
|
+
Datahub Ingestion framework invokes this method
|
|
1496
1502
|
"""
|
|
1497
1503
|
logger.info("PowerBi plugin execution is started")
|
|
1498
1504
|
# Validate dataset type mapping
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -7,7 +7,7 @@ import sys
|
|
|
7
7
|
import uuid
|
|
8
8
|
from functools import wraps
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar
|
|
11
11
|
|
|
12
12
|
from mixpanel import Consumer, Mixpanel
|
|
13
13
|
from typing_extensions import ParamSpec
|
|
@@ -16,10 +16,12 @@ import datahub as datahub_package
|
|
|
16
16
|
from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
|
|
17
17
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
18
18
|
from datahub.configuration.common import ExceptionWithProps
|
|
19
|
-
from datahub.ingestion.graph.client import DataHubGraph
|
|
20
19
|
from datahub.metadata.schema_classes import _custom_package_path
|
|
21
20
|
from datahub.utilities.perf_timer import PerfTimer
|
|
22
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
24
|
+
|
|
23
25
|
logger = logging.getLogger(__name__)
|
|
24
26
|
|
|
25
27
|
DATAHUB_FOLDER = Path(DATAHUB_ROOT_FOLDER)
|
|
@@ -117,7 +119,11 @@ class Telemetry:
|
|
|
117
119
|
tracking_init: bool = False
|
|
118
120
|
sentry_enabled: bool = False
|
|
119
121
|
|
|
122
|
+
context_properties: Dict[str, Any] = {}
|
|
123
|
+
|
|
120
124
|
def __init__(self):
|
|
125
|
+
self.context_properties = {}
|
|
126
|
+
|
|
121
127
|
if SENTRY_DSN:
|
|
122
128
|
self.sentry_enabled = True
|
|
123
129
|
try:
|
|
@@ -157,6 +163,9 @@ class Telemetry:
|
|
|
157
163
|
except Exception as e:
|
|
158
164
|
logger.debug(f"Error connecting to mixpanel: {e}")
|
|
159
165
|
|
|
166
|
+
# Initialize the default properties for all events.
|
|
167
|
+
self.set_context()
|
|
168
|
+
|
|
160
169
|
def update_config(self) -> bool:
|
|
161
170
|
"""
|
|
162
171
|
Update the config file with the current client ID and enabled status.
|
|
@@ -238,18 +247,22 @@ class Telemetry:
|
|
|
238
247
|
|
|
239
248
|
return False
|
|
240
249
|
|
|
241
|
-
def
|
|
250
|
+
def set_context(
|
|
242
251
|
self,
|
|
243
|
-
server: Optional[DataHubGraph] = None,
|
|
252
|
+
server: Optional["DataHubGraph"] = None,
|
|
244
253
|
properties: Optional[Dict[str, Any]] = None,
|
|
245
254
|
) -> None:
|
|
255
|
+
self.context_properties = {
|
|
256
|
+
**self._server_props(server),
|
|
257
|
+
**(properties or {}),
|
|
258
|
+
}
|
|
259
|
+
|
|
246
260
|
if self.sentry_enabled:
|
|
247
261
|
from sentry_sdk import set_tag
|
|
248
262
|
|
|
249
263
|
properties = {
|
|
250
264
|
**_default_telemetry_properties(),
|
|
251
|
-
**self.
|
|
252
|
-
**(properties or {}),
|
|
265
|
+
**self.context_properties,
|
|
253
266
|
}
|
|
254
267
|
|
|
255
268
|
for key in properties:
|
|
@@ -297,7 +310,6 @@ class Telemetry:
|
|
|
297
310
|
self,
|
|
298
311
|
event_name: str,
|
|
299
312
|
properties: Optional[Dict[str, Any]] = None,
|
|
300
|
-
server: Optional[DataHubGraph] = None,
|
|
301
313
|
) -> None:
|
|
302
314
|
"""
|
|
303
315
|
Send a single telemetry event.
|
|
@@ -323,14 +335,15 @@ class Telemetry:
|
|
|
323
335
|
|
|
324
336
|
properties = {
|
|
325
337
|
**_default_telemetry_properties(),
|
|
326
|
-
**self.
|
|
338
|
+
**self.context_properties,
|
|
327
339
|
**properties,
|
|
328
340
|
}
|
|
329
341
|
self.mp.track(self.client_id, event_name, properties)
|
|
330
342
|
except Exception as e:
|
|
331
343
|
logger.debug(f"Error reporting telemetry: {e}")
|
|
332
344
|
|
|
333
|
-
|
|
345
|
+
@classmethod
|
|
346
|
+
def _server_props(cls, server: Optional["DataHubGraph"]) -> Dict[str, str]:
|
|
334
347
|
if not server:
|
|
335
348
|
return {
|
|
336
349
|
"server_type": "n/a",
|
|
@@ -435,6 +448,7 @@ def with_telemetry(
|
|
|
435
448
|
**call_props,
|
|
436
449
|
"status": "error",
|
|
437
450
|
**_error_props(e),
|
|
451
|
+
"code": e.code,
|
|
438
452
|
},
|
|
439
453
|
)
|
|
440
454
|
telemetry_instance.capture_exception(e)
|
|
File without changes
|
|
File without changes
|