acryl-datahub 0.15.0rc22__py3-none-any.whl → 0.15.0rc24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc22.dist-info → acryl_datahub-0.15.0rc24.dist-info}/METADATA +2430 -2430
- {acryl_datahub-0.15.0rc22.dist-info → acryl_datahub-0.15.0rc24.dist-info}/RECORD +16 -16
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +116 -129
- datahub/cli/specific/structuredproperties_cli.py +2 -1
- datahub/configuration/git.py +7 -1
- datahub/ingestion/api/source.py +1 -0
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/source/gc/datahub_gc.py +24 -30
- datahub/ingestion/source/gc/dataprocess_cleanup.py +36 -16
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +5 -0
- datahub/ingestion/source/pulsar.py +10 -1
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +5 -1
- {acryl_datahub-0.15.0rc22.dist-info → acryl_datahub-0.15.0rc24.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc22.dist-info → acryl_datahub-0.15.0rc24.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc22.dist-info → acryl_datahub-0.15.0rc24.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=I7rWvDl7l3VZ5DC3mtaoQKDToqQCmmprWfOtkh9E_mM,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
|
|
|
52
52
|
datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
|
|
54
54
|
datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=
|
|
55
|
+
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=tYEVp2oqJa9FhlrnbAf2Zw82WqicJI9lF0P5U9soY9E,7502
|
|
56
56
|
datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
|
|
57
57
|
datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
|
|
58
58
|
datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
|
|
@@ -85,7 +85,7 @@ datahub/cli/specific/dataset_cli.py,sha256=AwSmIiuV3XbgprW4_1Wj-EJq1OPqFyolSNczQ
|
|
|
85
85
|
datahub/cli/specific/file_loader.py,sha256=YMyv_evdKyHSft5Tm_kOcqJ4ALpRmMm54ZJAyl7Nxqs,773
|
|
86
86
|
datahub/cli/specific/forms_cli.py,sha256=OLVeG8NtK1eDBuUKCT5Ald35np8__f8mLzbZM_zUfWU,1484
|
|
87
87
|
datahub/cli/specific/group_cli.py,sha256=xPUYk48VbVXLMj-z9VNW0RZzXOe4rQsc2jLwSOGCoec,1967
|
|
88
|
-
datahub/cli/specific/structuredproperties_cli.py,sha256=
|
|
88
|
+
datahub/cli/specific/structuredproperties_cli.py,sha256=qP7kHpN7y3cOR0IGZkD4PGlRzFqLdXqZ6yrbCKVmG8M,1937
|
|
89
89
|
datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
|
|
90
90
|
datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
|
|
91
91
|
datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
|
|
@@ -93,7 +93,7 @@ datahub/configuration/common.py,sha256=Ngj2-HKPEhCMbcx3phUqyoOHayhqWNt1t0e2hO3GQ
|
|
|
93
93
|
datahub/configuration/config_loader.py,sha256=4V8rrbKvCbfEys2Tlw2uZXb3yC9Hpoubn2O8GXhGe3A,5785
|
|
94
94
|
datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
|
|
95
95
|
datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
|
|
96
|
-
datahub/configuration/git.py,sha256=
|
|
96
|
+
datahub/configuration/git.py,sha256=q9iac6cc6oZ3RVSPTyuR2VMsmt2wr-uVaCLWohdKVV0,6461
|
|
97
97
|
datahub/configuration/import_resolver.py,sha256=b4Ie9L7knN1LALEVMxTcNFSklDD6CVE-4Ipy4ZYhNYA,369
|
|
98
98
|
datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
|
|
99
99
|
datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVgI,2579
|
|
@@ -138,8 +138,8 @@ datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBm
|
|
|
138
138
|
datahub/ingestion/api/report.py,sha256=CpQHqLAoYGV4bxNIpYQugLY0EUoxROlp2NUM9ONHj_I,4364
|
|
139
139
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
140
140
|
datahub/ingestion/api/sink.py,sha256=6g01wou8pv79s0leDWyK12cgl7eLtpiwSUHqOw08vx4,4503
|
|
141
|
-
datahub/ingestion/api/source.py,sha256=
|
|
142
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
141
|
+
datahub/ingestion/api/source.py,sha256=pHfFIBZa57ySpZWnt03mmayWLdbbBAGOhWqWZnf1KUA,18815
|
|
142
|
+
datahub/ingestion/api/source_helpers.py,sha256=k40ofHHPsfbYFJgZXWaD6ORvEa0SBmsaOacF1ttolcQ,19743
|
|
143
143
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
144
144
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
145
145
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -202,7 +202,7 @@ datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPG
|
|
|
202
202
|
datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
|
|
203
203
|
datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
|
|
204
204
|
datahub/ingestion/source/preset.py,sha256=fByqamRLnXxsfCGdLPzWN_5LJR_s2_G2f_zwSKUc8EA,3981
|
|
205
|
-
datahub/ingestion/source/pulsar.py,sha256=
|
|
205
|
+
datahub/ingestion/source/pulsar.py,sha256=7rTOEqYmeOuRZl5DG8d5OFkb4l9H6-1bETZfa-4DfmI,20163
|
|
206
206
|
datahub/ingestion/source/redash.py,sha256=g-wBJ4e54EdA2A2D5XmoNBilCDyh5b32M_C_fY1bhmA,30055
|
|
207
207
|
datahub/ingestion/source/salesforce.py,sha256=S6LSM6mzl8-zKbrJPoINhM1SCpYfM244Xb74pbEI-J0,31792
|
|
208
208
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
@@ -301,10 +301,10 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
|
|
|
301
301
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
|
|
302
302
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
303
303
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
304
|
-
datahub/ingestion/source/gc/datahub_gc.py,sha256=
|
|
305
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
304
|
+
datahub/ingestion/source/gc/datahub_gc.py,sha256=AHlKGwDD-E_TEHcJIpRtwk6ikjT-KiyfTo-BXZnMSk0,12114
|
|
305
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=u90XEmW1vRFbvp4CQ8ujPxTGJUyJqO2U6ApcI6mFrjE,16588
|
|
306
306
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
307
|
-
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=
|
|
307
|
+
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=wRnRaIVUG483tY4nyDkEn6Xi2RL5MjrVvoCoZimqwSg,7514
|
|
308
308
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
309
309
|
datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
|
|
310
310
|
datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
|
|
@@ -427,7 +427,7 @@ datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81
|
|
|
427
427
|
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
|
|
428
428
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
429
429
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
430
|
-
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=
|
|
430
|
+
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=suMICPFPvoV6shkjD_14JunLc8jAZBINzlFk2mYldkU,23676
|
|
431
431
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
432
432
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
|
|
433
433
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=yDu_1aTAG7eLEh1w1FGmn2-c6NJZURdslnI6fC_4B_0,38723
|
|
@@ -976,8 +976,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
976
976
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
977
977
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
978
978
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
982
|
-
acryl_datahub-0.15.
|
|
983
|
-
acryl_datahub-0.15.
|
|
979
|
+
acryl_datahub-0.15.0rc24.dist-info/METADATA,sha256=z1GOrJZhoUNozAZuAKJuhaUEOtkFO6qXVGtHd5xC3mo,173559
|
|
980
|
+
acryl_datahub-0.15.0rc24.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
981
|
+
acryl_datahub-0.15.0rc24.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
982
|
+
acryl_datahub-0.15.0rc24.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
983
|
+
acryl_datahub-0.15.0rc24.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -9,27 +9,18 @@ from ruamel.yaml import YAML
|
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
11
11
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
12
|
-
from datahub.ingestion.
|
|
13
|
-
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
14
13
|
from datahub.metadata.schema_classes import (
|
|
15
14
|
PropertyValueClass,
|
|
16
15
|
StructuredPropertyDefinitionClass,
|
|
17
16
|
)
|
|
18
|
-
from datahub.
|
|
17
|
+
from datahub.metadata.urns import StructuredPropertyUrn, Urn
|
|
18
|
+
from datahub.utilities.urns._urn_base import URN_TYPES
|
|
19
19
|
|
|
20
20
|
logging.basicConfig(level=logging.INFO)
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class StructuredPropertiesConfig:
|
|
25
|
-
"""Configuration class to hold the graph client"""
|
|
26
|
-
|
|
27
|
-
@classmethod
|
|
28
|
-
def get_graph_required(cls) -> DataHubGraph:
|
|
29
|
-
"""Get the current graph, falling back to default if none set"""
|
|
30
|
-
return get_graph_context() or get_default_graph()
|
|
31
|
-
|
|
32
|
-
|
|
33
24
|
class AllowedTypes(Enum):
|
|
34
25
|
STRING = "string"
|
|
35
26
|
RICH_TEXT = "rich_text"
|
|
@@ -51,29 +42,28 @@ class AllowedValue(ConfigModel):
|
|
|
51
42
|
description: Optional[str] = None
|
|
52
43
|
|
|
53
44
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
45
|
+
VALID_ENTITY_TYPE_URNS = [
|
|
46
|
+
Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES.keys()
|
|
47
|
+
]
|
|
48
|
+
_VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _validate_entity_type_urn(v: str) -> str:
|
|
52
|
+
urn = Urn.make_entity_type_urn(v)
|
|
53
|
+
if urn not in VALID_ENTITY_TYPE_URNS:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"Input {v} is not a valid entity type urn. {_VALID_ENTITY_TYPES_STRING}"
|
|
56
|
+
)
|
|
57
|
+
v = str(urn)
|
|
58
|
+
return v
|
|
61
59
|
|
|
62
60
|
|
|
63
61
|
class TypeQualifierAllowedTypes(ConfigModel):
|
|
64
62
|
allowed_types: List[str]
|
|
65
63
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
graph = StructuredPropertiesConfig.get_graph_required()
|
|
70
|
-
validated_urn = Urn.make_entity_type_urn(v)
|
|
71
|
-
if not graph.exists(validated_urn):
|
|
72
|
-
raise ValueError(
|
|
73
|
-
f"Input {v} is not a valid entity type urn. {VALID_ENTITY_TYPES_STRING}"
|
|
74
|
-
)
|
|
75
|
-
v = str(validated_urn)
|
|
76
|
-
return v
|
|
64
|
+
_check_allowed_types = validator("allowed_types", each_item=True, allow_reuse=True)(
|
|
65
|
+
_validate_entity_type_urn
|
|
66
|
+
)
|
|
77
67
|
|
|
78
68
|
|
|
79
69
|
class StructuredProperties(ConfigModel):
|
|
@@ -90,22 +80,30 @@ class StructuredProperties(ConfigModel):
|
|
|
90
80
|
type_qualifier: Optional[TypeQualifierAllowedTypes] = None
|
|
91
81
|
immutable: Optional[bool] = False
|
|
92
82
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
83
|
+
_check_entity_types = validator("entity_types", each_item=True, allow_reuse=True)(
|
|
84
|
+
_validate_entity_type_urn
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@validator("type")
|
|
88
|
+
def validate_type(cls, v: str) -> str:
|
|
89
|
+
# Convert to lowercase if needed
|
|
90
|
+
if not v.islower():
|
|
91
|
+
logger.warning(
|
|
92
|
+
f"Structured property type should be lowercase. Updated to {v.lower()}"
|
|
93
|
+
)
|
|
94
|
+
v = v.lower()
|
|
95
|
+
|
|
96
|
+
# Check if type is allowed
|
|
97
|
+
if not AllowedTypes.check_allowed_type(v):
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"Type {v} is not allowed. Allowed types are {AllowedTypes.values()}"
|
|
100
|
+
)
|
|
103
101
|
return v
|
|
104
102
|
|
|
105
103
|
@property
|
|
106
104
|
def fqn(self) -> str:
|
|
107
105
|
assert self.urn is not None
|
|
108
|
-
id =
|
|
106
|
+
id = StructuredPropertyUrn.from_string(self.urn).id
|
|
109
107
|
if self.qualified_name is not None:
|
|
110
108
|
# ensure that qualified name and ID match
|
|
111
109
|
assert (
|
|
@@ -122,101 +120,90 @@ class StructuredProperties(ConfigModel):
|
|
|
122
120
|
return v
|
|
123
121
|
|
|
124
122
|
@staticmethod
|
|
125
|
-
def
|
|
126
|
-
with
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
valueType=Urn.make_data_type_urn(structuredproperty.type),
|
|
150
|
-
displayName=structuredproperty.display_name,
|
|
151
|
-
description=structuredproperty.description,
|
|
152
|
-
entityTypes=[
|
|
153
|
-
Urn.make_entity_type_urn(entity_type)
|
|
154
|
-
for entity_type in structuredproperty.entity_types or []
|
|
155
|
-
],
|
|
156
|
-
cardinality=structuredproperty.cardinality,
|
|
157
|
-
immutable=structuredproperty.immutable,
|
|
158
|
-
allowedValues=(
|
|
159
|
-
[
|
|
160
|
-
PropertyValueClass(
|
|
161
|
-
value=v.value, description=v.description
|
|
162
|
-
)
|
|
163
|
-
for v in structuredproperty.allowed_values
|
|
164
|
-
]
|
|
165
|
-
if structuredproperty.allowed_values
|
|
166
|
-
else None
|
|
167
|
-
),
|
|
168
|
-
typeQualifier=(
|
|
169
|
-
{
|
|
170
|
-
"allowedTypes": structuredproperty.type_qualifier.allowed_types
|
|
171
|
-
}
|
|
172
|
-
if structuredproperty.type_qualifier
|
|
173
|
-
else None
|
|
174
|
-
),
|
|
175
|
-
),
|
|
176
|
-
)
|
|
177
|
-
graph.emit_mcp(mcp)
|
|
178
|
-
|
|
179
|
-
logger.info(f"Created structured property {structuredproperty.urn}")
|
|
180
|
-
|
|
181
|
-
@classmethod
|
|
182
|
-
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
|
|
183
|
-
with set_graph_context(graph):
|
|
184
|
-
structured_property: Optional[
|
|
185
|
-
StructuredPropertyDefinitionClass
|
|
186
|
-
] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
|
|
187
|
-
if structured_property is None:
|
|
188
|
-
raise Exception(
|
|
189
|
-
"StructuredPropertyDefinition aspect is None. Unable to create structured property."
|
|
190
|
-
)
|
|
191
|
-
return StructuredProperties(
|
|
192
|
-
urn=urn,
|
|
193
|
-
qualified_name=structured_property.qualifiedName,
|
|
194
|
-
display_name=structured_property.displayName,
|
|
195
|
-
type=structured_property.valueType,
|
|
196
|
-
description=structured_property.description,
|
|
197
|
-
entity_types=structured_property.entityTypes,
|
|
198
|
-
cardinality=structured_property.cardinality,
|
|
199
|
-
allowed_values=(
|
|
123
|
+
def from_yaml(file: str) -> List["StructuredProperties"]:
|
|
124
|
+
with open(file) as fp:
|
|
125
|
+
structuredproperties: List[dict] = yaml.safe_load(fp)
|
|
126
|
+
|
|
127
|
+
result: List[StructuredProperties] = []
|
|
128
|
+
for structuredproperty_raw in structuredproperties:
|
|
129
|
+
result.append(StructuredProperties.parse_obj(structuredproperty_raw))
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
def generate_mcps(self) -> List[MetadataChangeProposalWrapper]:
|
|
133
|
+
mcp = MetadataChangeProposalWrapper(
|
|
134
|
+
entityUrn=self.urn,
|
|
135
|
+
aspect=StructuredPropertyDefinitionClass(
|
|
136
|
+
qualifiedName=self.fqn,
|
|
137
|
+
valueType=Urn.make_data_type_urn(self.type),
|
|
138
|
+
displayName=self.display_name,
|
|
139
|
+
description=self.description,
|
|
140
|
+
entityTypes=[
|
|
141
|
+
Urn.make_entity_type_urn(entity_type)
|
|
142
|
+
for entity_type in self.entity_types or []
|
|
143
|
+
],
|
|
144
|
+
cardinality=self.cardinality,
|
|
145
|
+
immutable=self.immutable,
|
|
146
|
+
allowedValues=(
|
|
200
147
|
[
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
description=av.description,
|
|
204
|
-
)
|
|
205
|
-
for av in structured_property.allowedValues or []
|
|
148
|
+
PropertyValueClass(value=v.value, description=v.description)
|
|
149
|
+
for v in self.allowed_values
|
|
206
150
|
]
|
|
207
|
-
if
|
|
151
|
+
if self.allowed_values
|
|
208
152
|
else None
|
|
209
153
|
),
|
|
210
|
-
|
|
211
|
-
{
|
|
212
|
-
|
|
213
|
-
"allowedTypes"
|
|
214
|
-
)
|
|
215
|
-
}
|
|
216
|
-
if structured_property.typeQualifier
|
|
154
|
+
typeQualifier=(
|
|
155
|
+
{"allowedTypes": self.type_qualifier.allowed_types}
|
|
156
|
+
if self.type_qualifier
|
|
217
157
|
else None
|
|
218
158
|
),
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
return [mcp]
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def create(file: str, graph: DataHubGraph) -> None:
|
|
165
|
+
# TODO: Deprecate this method.
|
|
166
|
+
structuredproperties = StructuredProperties.from_yaml(file)
|
|
167
|
+
for structuredproperty in structuredproperties:
|
|
168
|
+
for mcp in structuredproperty.generate_mcps():
|
|
169
|
+
graph.emit_mcp(mcp)
|
|
170
|
+
|
|
171
|
+
logger.info(f"Created structured property {structuredproperty.urn}")
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
|
|
175
|
+
structured_property: Optional[
|
|
176
|
+
StructuredPropertyDefinitionClass
|
|
177
|
+
] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
|
|
178
|
+
if structured_property is None:
|
|
179
|
+
raise Exception(
|
|
180
|
+
"StructuredPropertyDefinition aspect is None. Unable to create structured property."
|
|
219
181
|
)
|
|
182
|
+
return StructuredProperties(
|
|
183
|
+
urn=urn,
|
|
184
|
+
qualified_name=structured_property.qualifiedName,
|
|
185
|
+
display_name=structured_property.displayName,
|
|
186
|
+
type=structured_property.valueType,
|
|
187
|
+
description=structured_property.description,
|
|
188
|
+
entity_types=structured_property.entityTypes,
|
|
189
|
+
cardinality=structured_property.cardinality,
|
|
190
|
+
allowed_values=(
|
|
191
|
+
[
|
|
192
|
+
AllowedValue(
|
|
193
|
+
value=av.value,
|
|
194
|
+
description=av.description,
|
|
195
|
+
)
|
|
196
|
+
for av in structured_property.allowedValues or []
|
|
197
|
+
]
|
|
198
|
+
if structured_property.allowedValues is not None
|
|
199
|
+
else None
|
|
200
|
+
),
|
|
201
|
+
type_qualifier=(
|
|
202
|
+
{"allowed_types": structured_property.typeQualifier.get("allowedTypes")}
|
|
203
|
+
if structured_property.typeQualifier
|
|
204
|
+
else None
|
|
205
|
+
),
|
|
206
|
+
)
|
|
220
207
|
|
|
221
208
|
def to_yaml(
|
|
222
209
|
self,
|
|
@@ -31,7 +31,8 @@ def properties() -> None:
|
|
|
31
31
|
def upsert(file: Path) -> None:
|
|
32
32
|
"""Upsert structured properties in DataHub."""
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
with get_default_graph() as graph:
|
|
35
|
+
StructuredProperties.create(str(file), graph)
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
@properties.command(
|
datahub/configuration/git.py
CHANGED
|
@@ -24,7 +24,11 @@ class GitReference(ConfigModel):
|
|
|
24
24
|
"main",
|
|
25
25
|
description="Branch on which your files live by default. Typically main or master. This can also be a commit hash.",
|
|
26
26
|
)
|
|
27
|
-
|
|
27
|
+
url_subdir: Optional[str] = Field(
|
|
28
|
+
default=None,
|
|
29
|
+
description="Prefix to prepend when generating URLs for files - useful when files are in a subdirectory. "
|
|
30
|
+
"Only affects URL generation, not git operations.",
|
|
31
|
+
)
|
|
28
32
|
url_template: Optional[str] = Field(
|
|
29
33
|
None,
|
|
30
34
|
description=f"Template for generating a URL to a file in the repo e.g. '{_GITHUB_URL_TEMPLATE}'. We can infer this for GitHub and GitLab repos, and it is otherwise required."
|
|
@@ -68,6 +72,8 @@ class GitReference(ConfigModel):
|
|
|
68
72
|
|
|
69
73
|
def get_url_for_file_path(self, file_path: str) -> str:
|
|
70
74
|
assert self.url_template
|
|
75
|
+
if self.url_subdir:
|
|
76
|
+
file_path = f"{self.url_subdir}/{file_path}"
|
|
71
77
|
return self.url_template.format(
|
|
72
78
|
repo_url=self.repo, branch=self.branch, file_path=file_path
|
|
73
79
|
)
|
datahub/ingestion/api/source.py
CHANGED
|
@@ -150,7 +150,7 @@ def auto_workunit_reporter(report: "SourceReport", stream: Iterable[T]) -> Itera
|
|
|
150
150
|
report.report_workunit(wu)
|
|
151
151
|
yield wu
|
|
152
152
|
|
|
153
|
-
if report.events_produced == 0:
|
|
153
|
+
if report.event_not_produced_warn and report.events_produced == 0:
|
|
154
154
|
report.warning(
|
|
155
155
|
title="No metadata was produced by the source",
|
|
156
156
|
message="Please check the source configuration, filters, and permissions.",
|
|
@@ -65,18 +65,18 @@ class DataHubGcSourceConfig(ConfigModel):
|
|
|
65
65
|
description="Sleep between truncation monitoring.",
|
|
66
66
|
)
|
|
67
67
|
|
|
68
|
-
dataprocess_cleanup:
|
|
69
|
-
|
|
68
|
+
dataprocess_cleanup: DataProcessCleanupConfig = Field(
|
|
69
|
+
default_factory=DataProcessCleanupConfig,
|
|
70
70
|
description="Configuration for data process cleanup",
|
|
71
71
|
)
|
|
72
72
|
|
|
73
|
-
soft_deleted_entities_cleanup:
|
|
74
|
-
|
|
73
|
+
soft_deleted_entities_cleanup: SoftDeletedEntitiesCleanupConfig = Field(
|
|
74
|
+
default_factory=SoftDeletedEntitiesCleanupConfig,
|
|
75
75
|
description="Configuration for soft deleted entities cleanup",
|
|
76
76
|
)
|
|
77
77
|
|
|
78
|
-
execution_request_cleanup:
|
|
79
|
-
|
|
78
|
+
execution_request_cleanup: DatahubExecutionRequestCleanupConfig = Field(
|
|
79
|
+
default_factory=DatahubExecutionRequestCleanupConfig,
|
|
80
80
|
description="Configuration for execution request cleanup",
|
|
81
81
|
)
|
|
82
82
|
|
|
@@ -108,28 +108,22 @@ class DataHubGcSource(Source):
|
|
|
108
108
|
self.ctx = ctx
|
|
109
109
|
self.config = config
|
|
110
110
|
self.report = DataHubGcSourceReport()
|
|
111
|
+
self.report.event_not_produced_warn = False
|
|
111
112
|
self.graph = ctx.require_graph("The DataHubGc source")
|
|
112
|
-
self.dataprocess_cleanup
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
self.
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
)
|
|
127
|
-
if self.config.execution_request_cleanup:
|
|
128
|
-
self.execution_request_cleanup = DatahubExecutionRequestCleanup(
|
|
129
|
-
config=self.config.execution_request_cleanup,
|
|
130
|
-
graph=self.graph,
|
|
131
|
-
report=self.report,
|
|
132
|
-
)
|
|
113
|
+
self.dataprocess_cleanup = DataProcessCleanup(
|
|
114
|
+
ctx, self.config.dataprocess_cleanup, self.report, self.config.dry_run
|
|
115
|
+
)
|
|
116
|
+
self.soft_deleted_entities_cleanup = SoftDeletedEntitiesCleanup(
|
|
117
|
+
ctx,
|
|
118
|
+
self.config.soft_deleted_entities_cleanup,
|
|
119
|
+
self.report,
|
|
120
|
+
self.config.dry_run,
|
|
121
|
+
)
|
|
122
|
+
self.execution_request_cleanup = DatahubExecutionRequestCleanup(
|
|
123
|
+
config=self.config.execution_request_cleanup,
|
|
124
|
+
graph=self.graph,
|
|
125
|
+
report=self.report,
|
|
126
|
+
)
|
|
133
127
|
|
|
134
128
|
@classmethod
|
|
135
129
|
def create(cls, config_dict, ctx):
|
|
@@ -153,19 +147,19 @@ class DataHubGcSource(Source):
|
|
|
153
147
|
self.truncate_indices()
|
|
154
148
|
except Exception as e:
|
|
155
149
|
self.report.failure("While trying to truncate indices ", exc=e)
|
|
156
|
-
if self.soft_deleted_entities_cleanup:
|
|
150
|
+
if self.config.soft_deleted_entities_cleanup.enabled:
|
|
157
151
|
try:
|
|
158
152
|
self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
|
|
159
153
|
except Exception as e:
|
|
160
154
|
self.report.failure(
|
|
161
155
|
"While trying to cleanup soft deleted entities ", exc=e
|
|
162
156
|
)
|
|
163
|
-
if self.execution_request_cleanup:
|
|
157
|
+
if self.config.execution_request_cleanup.enabled:
|
|
164
158
|
try:
|
|
165
159
|
self.execution_request_cleanup.run()
|
|
166
160
|
except Exception as e:
|
|
167
161
|
self.report.failure("While trying to cleanup execution request ", exc=e)
|
|
168
|
-
if self.dataprocess_cleanup:
|
|
162
|
+
if self.config.dataprocess_cleanup.enabled:
|
|
169
163
|
try:
|
|
170
164
|
yield from self.dataprocess_cleanup.get_workunits_internal()
|
|
171
165
|
except Exception as e:
|
|
@@ -98,6 +98,9 @@ query getDataJobRuns($dataJobUrn: String!, $start: Int!, $count: Int!) {
|
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
class DataProcessCleanupConfig(ConfigModel):
|
|
101
|
+
enabled: bool = Field(
|
|
102
|
+
default=True, description="Whether to do data process cleanup."
|
|
103
|
+
)
|
|
101
104
|
retention_days: Optional[int] = Field(
|
|
102
105
|
10,
|
|
103
106
|
description="Number of days to retain metadata in DataHub",
|
|
@@ -371,17 +374,26 @@ class DataProcessCleanup:
|
|
|
371
374
|
previous_scroll_id: Optional[str] = None
|
|
372
375
|
|
|
373
376
|
while True:
|
|
374
|
-
result =
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
377
|
+
result = None
|
|
378
|
+
try:
|
|
379
|
+
result = self.ctx.graph.execute_graphql(
|
|
380
|
+
DATAFLOW_QUERY,
|
|
381
|
+
{
|
|
382
|
+
"query": "*",
|
|
383
|
+
"scrollId": scroll_id if scroll_id else None,
|
|
384
|
+
"batchSize": self.config.batch_size,
|
|
385
|
+
},
|
|
386
|
+
)
|
|
387
|
+
except Exception as e:
|
|
388
|
+
self.report.failure(
|
|
389
|
+
f"While trying to get dataflows with {scroll_id}", exc=e
|
|
390
|
+
)
|
|
391
|
+
break
|
|
392
|
+
|
|
382
393
|
scrollAcrossEntities = result.get("scrollAcrossEntities")
|
|
383
394
|
if not scrollAcrossEntities:
|
|
384
395
|
raise ValueError("Missing scrollAcrossEntities in response")
|
|
396
|
+
logger.info(f"Got {scrollAcrossEntities.get('count')} DataFlow entities")
|
|
385
397
|
|
|
386
398
|
scroll_id = scrollAcrossEntities.get("nextScrollId")
|
|
387
399
|
for flow in scrollAcrossEntities.get("searchResults"):
|
|
@@ -398,6 +410,8 @@ class DataProcessCleanup:
|
|
|
398
410
|
previous_scroll_id = scroll_id
|
|
399
411
|
|
|
400
412
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
413
|
+
if not self.config.enabled:
|
|
414
|
+
return []
|
|
401
415
|
assert self.ctx.graph
|
|
402
416
|
|
|
403
417
|
dataFlows: Dict[str, DataFlowEntity] = {}
|
|
@@ -411,14 +425,20 @@ class DataProcessCleanup:
|
|
|
411
425
|
deleted_jobs: int = 0
|
|
412
426
|
|
|
413
427
|
while True:
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
428
|
+
try:
|
|
429
|
+
result = self.ctx.graph.execute_graphql(
|
|
430
|
+
DATAJOB_QUERY,
|
|
431
|
+
{
|
|
432
|
+
"query": "*",
|
|
433
|
+
"scrollId": scroll_id if scroll_id else None,
|
|
434
|
+
"batchSize": self.config.batch_size,
|
|
435
|
+
},
|
|
436
|
+
)
|
|
437
|
+
except Exception as e:
|
|
438
|
+
self.report.failure(
|
|
439
|
+
f"While trying to get data jobs with {scroll_id}", exc=e
|
|
440
|
+
)
|
|
441
|
+
break
|
|
422
442
|
scrollAcrossEntities = result.get("scrollAcrossEntities")
|
|
423
443
|
if not scrollAcrossEntities:
|
|
424
444
|
raise ValueError("Missing scrollAcrossEntities in response")
|
|
@@ -20,6 +20,9 @@ logger = logging.getLogger(__name__)
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class SoftDeletedEntitiesCleanupConfig(ConfigModel):
|
|
23
|
+
enabled: bool = Field(
|
|
24
|
+
default=True, description="Whether to do soft deletion cleanup."
|
|
25
|
+
)
|
|
23
26
|
retention_days: Optional[int] = Field(
|
|
24
27
|
10,
|
|
25
28
|
description="Number of days to retain metadata in DataHub",
|
|
@@ -156,6 +159,8 @@ class SoftDeletedEntitiesCleanup:
|
|
|
156
159
|
self.delete_entity(urn)
|
|
157
160
|
|
|
158
161
|
def cleanup_soft_deleted_entities(self) -> None:
|
|
162
|
+
if not self.config.enabled:
|
|
163
|
+
return
|
|
159
164
|
assert self.ctx.graph
|
|
160
165
|
start_time = time.time()
|
|
161
166
|
|