acryl-datahub 1.2.0__py3-none-any.whl → 1.2.0.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/METADATA +2509 -2509
- {acryl_datahub-1.2.0.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/RECORD +16 -16
- datahub/_version.py +1 -1
- datahub/configuration/pydantic_migration_helpers.py +7 -5
- datahub/ingestion/source/mock_data/datahub_mock_data.py +55 -20
- datahub/ingestion/source/mock_data/table_naming_helper.py +10 -4
- datahub/metadata/_internal_schema_classes.py +74 -8
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
- datahub/metadata/schema.avsc +42 -5
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +42 -5
- datahub/sdk/search_filters.py +95 -27
- datahub/upgrade/upgrade.py +14 -5
- {acryl_datahub-1.2.0.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1
|
+
acryl_datahub-1.2.0.1rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=lFvgok6Ukyjs_TiTTBHr0Swy25dhf9vLGNMLEKQ9Szg,323
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -110,7 +110,7 @@ datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7
|
|
|
110
110
|
datahub/configuration/kafka.py,sha256=qj4qNBzeXeS-mUtf441B1jj_22wPO6Eho_stErMD-GY,2586
|
|
111
111
|
datahub/configuration/kafka_consumer_config.py,sha256=LivsObTt9yC3WoGnslJbF_x4ojfNdxMIMEhb8vvJfcA,2133
|
|
112
112
|
datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
|
|
113
|
-
datahub/configuration/pydantic_migration_helpers.py,sha256=
|
|
113
|
+
datahub/configuration/pydantic_migration_helpers.py,sha256=7HbLtMl95ERWminslXFxaEliXEpbTzDINspkiP_jT2c,1672
|
|
114
114
|
datahub/configuration/source_common.py,sha256=t7rLiV76dKQUCqdRb6BpFnDqeNW_NvoE0ZehhrUSCOI,2605
|
|
115
115
|
datahub/configuration/time_window_config.py,sha256=c4mbrgmTobt4t_j6unDeYvmGSlbRB2hAgAst6yq4nHA,5412
|
|
116
116
|
datahub/configuration/toml.py,sha256=Ohc5sAWLPoAinPYL8njyheZ3ak81fC2Sp8IbBbESPGg,380
|
|
@@ -391,9 +391,9 @@ datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
391
391
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=T_RJHst6iQRghJNmLLPeSBMEDsbEKf3yBldOAgMcGuo,19666
|
|
392
392
|
datahub/ingestion/source/metadata/lineage.py,sha256=PA4JwSeQ-30XFMN4O5tPwIu-hZF1e-xMZ_CnEUE2c-Q,9595
|
|
393
393
|
datahub/ingestion/source/mock_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
394
|
-
datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=
|
|
394
|
+
datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=JHR6NZnR15qoVZ9R4CZl61nmZXmpsX55PaoP0ykJ_Ns,18810
|
|
395
395
|
datahub/ingestion/source/mock_data/datahub_mock_data_report.py,sha256=sV_H7JgcuVbrpIBqtGse_BBigMdqP32ZXuanpeXmwVI,331
|
|
396
|
-
datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=
|
|
396
|
+
datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=zJtEBSJGDvVr-kiKjK7LbHAifK3sfE786M3yO--Bn2o,3493
|
|
397
397
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
398
398
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=JqKCwxBJfOrC8SF7CmDG0cseWxHk_7E2v4Diw3Q0-WM,14181
|
|
399
399
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -618,8 +618,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
618
618
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
619
619
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
620
620
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
621
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
622
|
-
datahub/metadata/schema.avsc,sha256=
|
|
621
|
+
datahub/metadata/_internal_schema_classes.py,sha256=V43yFMGVycJf0UV_b-ZAaOigWedg_iItFj3rOoIx3Ro,1046990
|
|
622
|
+
datahub/metadata/schema.avsc,sha256=NlFTKx_U18Je4-BV3GAKS6wlEZrBTRUt-UF9gUH-5z0,735189
|
|
623
623
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
624
624
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
625
625
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -673,7 +673,7 @@ datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha
|
|
|
673
673
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
|
|
674
674
|
datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
675
675
|
datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py,sha256=qefB0n1xilQHCPla80b39wdjHOYoVtzBJT2jGc2szkM,3309
|
|
676
|
-
datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py,sha256=
|
|
676
|
+
datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py,sha256=Np5zhmoMzMEIioN7bPwPZkXBh_0aTvcEaVAdL0qrzSE,1108
|
|
677
677
|
datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py,sha256=LqGp9QTLk_tiSsbHMGSUH7uPG00Bf_qQIMiU7vtO4Tk,973
|
|
678
678
|
datahub/metadata/com/linkedin/pegasus2avro/notebook/__init__.py,sha256=BcjOsz4YeHQbLLBb4Im4uJ7ux1hGHquQDmiIOiDXVtE,901
|
|
679
679
|
datahub/metadata/com/linkedin/pegasus2avro/ownership/__init__.py,sha256=r813MW_bkP1ZpC2NJf7uCHEOapjebl611c90vryKX4A,302
|
|
@@ -752,7 +752,7 @@ datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjO
|
|
|
752
752
|
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
|
|
753
753
|
datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
|
|
754
754
|
datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
|
|
755
|
-
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=
|
|
755
|
+
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=NldfplvG_NKnbu1x0A1T6oTYKoTcGf_saa9AYFrcsTs,7618
|
|
756
756
|
datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
|
|
757
757
|
datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=0ndN64UNAADL6G_GVjJLHbe_dBnWhVRjtI3MilOlHQc,5651
|
|
758
758
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
@@ -954,7 +954,7 @@ datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
|
|
|
954
954
|
datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
|
|
955
955
|
datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
|
|
956
956
|
datahub/sdk/search_client.py,sha256=hlk40VnD3eT88hMgwXAUv31-ENbDe50P-gsXUnGSNeo,3512
|
|
957
|
-
datahub/sdk/search_filters.py,sha256=
|
|
957
|
+
datahub/sdk/search_filters.py,sha256=xk19K7V6Y3YflNqNXgMTn_BpaRuoFhlub7w4tLjQlc8,15619
|
|
958
958
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
959
959
|
datahub/secret/datahub_secret_store.py,sha256=xyNAZY62d6KSz_kYF9wN7RDMLvNhu2ayOzcYvubOX1E,2519
|
|
960
960
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -1003,7 +1003,7 @@ datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,1
|
|
|
1003
1003
|
datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
|
|
1004
1004
|
datahub/testing/sdk_v2_helpers.py,sha256=FooqGn5PfdJJrCFm3x_uh02IMhDdLjqEf64W16WdvE0,424
|
|
1005
1005
|
datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1006
|
-
datahub/upgrade/upgrade.py,sha256
|
|
1006
|
+
datahub/upgrade/upgrade.py,sha256=bxGjfLU-hSQXS9Q1RlhWeJMETTGKqLNGFDPtcheCO4o,18474
|
|
1007
1007
|
datahub/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1008
1008
|
datahub/utilities/_custom_package_loader.py,sha256=9kgPE7Y77E-hNee8l4sKtVby-btUNum3dBfDixMzcVA,2059
|
|
1009
1009
|
datahub/utilities/_markupsafe_compat.py,sha256=QX7c9KiHs56ASl7bJlgR4FAf3CGiY94zIr0h6Ak15To,444
|
|
@@ -1093,8 +1093,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1093
1093
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1094
1094
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1095
1095
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1096
|
-
acryl_datahub-1.2.0.dist-info/METADATA,sha256=
|
|
1097
|
-
acryl_datahub-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1098
|
-
acryl_datahub-1.2.0.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1099
|
-
acryl_datahub-1.2.0.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1100
|
-
acryl_datahub-1.2.0.dist-info/RECORD,,
|
|
1096
|
+
acryl_datahub-1.2.0.1rc1.dist-info/METADATA,sha256=Xybyu1aEfknH9PuIeVzawfwQ-xAlvMoBFASyzB83Mfo,181893
|
|
1097
|
+
acryl_datahub-1.2.0.1rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1098
|
+
acryl_datahub-1.2.0.1rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1099
|
+
acryl_datahub-1.2.0.1rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1100
|
+
acryl_datahub-1.2.0.1rc1.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import pydantic.version
|
|
2
2
|
from packaging.version import Version
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
else:
|
|
8
|
-
PYDANTIC_VERSION_2 = False
|
|
4
|
+
_pydantic_version = Version(pydantic.version.VERSION)
|
|
5
|
+
|
|
6
|
+
PYDANTIC_VERSION_2 = _pydantic_version >= Version("2.0")
|
|
9
7
|
|
|
8
|
+
# The pydantic.Discriminator type was added in v2.5.0.
|
|
9
|
+
# https://docs.pydantic.dev/latest/changelog/#v250-2023-11-13
|
|
10
|
+
PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR = _pydantic_version >= Version("2.5.0")
|
|
10
11
|
|
|
11
12
|
# This can be used to silence deprecation warnings while we migrate.
|
|
12
13
|
if PYDANTIC_VERSION_2:
|
|
@@ -50,6 +51,7 @@ class v1_ConfigModel(v1_BaseModel):
|
|
|
50
51
|
|
|
51
52
|
__all__ = [
|
|
52
53
|
"PYDANTIC_VERSION_2",
|
|
54
|
+
"PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR",
|
|
53
55
|
"PydanticDeprecatedSince20",
|
|
54
56
|
"GenericModel",
|
|
55
57
|
"v1_ConfigModel",
|
|
@@ -75,6 +75,11 @@ class LineageConfigGen1(ConfigModel):
|
|
|
75
75
|
description="Whether this source is enabled",
|
|
76
76
|
)
|
|
77
77
|
|
|
78
|
+
table_name_prefix: Optional[str] = Field(
|
|
79
|
+
default=None,
|
|
80
|
+
description="Prefix to add to the table name. This is useful for testing purposes.",
|
|
81
|
+
)
|
|
82
|
+
|
|
78
83
|
emit_lineage: bool = Field(
|
|
79
84
|
default=True,
|
|
80
85
|
description="Whether to emit lineage data for testing purposes. When False, no lineage data is generated regardless of other settings.",
|
|
@@ -104,6 +109,11 @@ class LineageConfigGen1(ConfigModel):
|
|
|
104
109
|
description="Pattern for determining SubTypes. Options: 'alternating', 'all_table', 'all_view', 'level_based'",
|
|
105
110
|
)
|
|
106
111
|
|
|
112
|
+
subtype_types: List[str] = Field(
|
|
113
|
+
default=["Table", "View"],
|
|
114
|
+
description="List of types to use in alternating pattern. Defaults to ['Table', 'View'].",
|
|
115
|
+
)
|
|
116
|
+
|
|
107
117
|
level_subtypes: Dict[int, str] = Field(
|
|
108
118
|
default={0: "Table", 1: "View", 2: "Table"},
|
|
109
119
|
description="Mapping of level to subtype for level_based pattern",
|
|
@@ -240,38 +250,45 @@ class DataHubMockDataSource(Source):
|
|
|
240
250
|
return fan_out_after_first if fan_out_after_first is not None else fan_out
|
|
241
251
|
|
|
242
252
|
def _determine_subtype(
|
|
243
|
-
self,
|
|
253
|
+
self,
|
|
254
|
+
table_level: int,
|
|
255
|
+
table_index: int,
|
|
256
|
+
subtype_pattern: SubTypePattern,
|
|
257
|
+
subtype_types: List[str],
|
|
258
|
+
level_subtypes: Dict[int, str],
|
|
244
259
|
) -> str:
|
|
245
260
|
"""
|
|
246
261
|
Determine subtype based on configured pattern.
|
|
247
262
|
|
|
248
263
|
Args:
|
|
249
|
-
table_name: Name of the table
|
|
250
264
|
table_level: Level of the table in the lineage graph
|
|
251
265
|
table_index: Index of the table within its level
|
|
266
|
+
subtype_pattern: Pattern for determining subtypes
|
|
267
|
+
subtype_types: List of types to use in alternating pattern
|
|
268
|
+
level_subtypes: Mapping of level to subtype for level_based pattern
|
|
252
269
|
|
|
253
270
|
Returns:
|
|
254
|
-
The determined subtype
|
|
271
|
+
The determined subtype from the configured types
|
|
255
272
|
"""
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
return (
|
|
260
|
-
|
|
261
|
-
)
|
|
262
|
-
elif pattern == SubTypePattern.LEVEL_BASED:
|
|
263
|
-
return self.config.gen_1.level_subtypes.get(
|
|
264
|
-
table_level, DatasetSubTypes.TABLE
|
|
265
|
-
)
|
|
266
|
-
elif pattern == SubTypePattern.ALL_TABLE:
|
|
273
|
+
if subtype_pattern == SubTypePattern.ALTERNATING:
|
|
274
|
+
return subtype_types[table_index % len(subtype_types)]
|
|
275
|
+
elif subtype_pattern == SubTypePattern.LEVEL_BASED:
|
|
276
|
+
return level_subtypes.get(table_level, DatasetSubTypes.TABLE)
|
|
277
|
+
elif subtype_pattern == SubTypePattern.ALL_TABLE:
|
|
267
278
|
return DatasetSubTypes.TABLE
|
|
268
|
-
elif
|
|
279
|
+
elif subtype_pattern == SubTypePattern.ALL_VIEW:
|
|
269
280
|
return DatasetSubTypes.VIEW
|
|
270
281
|
else:
|
|
271
282
|
return DatasetSubTypes.TABLE # default
|
|
272
283
|
|
|
273
284
|
def _get_subtypes_aspect(
|
|
274
|
-
self,
|
|
285
|
+
self,
|
|
286
|
+
table_name: str,
|
|
287
|
+
table_level: int,
|
|
288
|
+
table_index: int,
|
|
289
|
+
subtype_pattern: SubTypePattern,
|
|
290
|
+
subtype_types: List[str],
|
|
291
|
+
level_subtypes: Dict[int, str],
|
|
275
292
|
) -> MetadataWorkUnit:
|
|
276
293
|
"""
|
|
277
294
|
Create a SubTypes aspect for a table based on deterministic pattern.
|
|
@@ -280,12 +297,17 @@ class DataHubMockDataSource(Source):
|
|
|
280
297
|
table_name: Name of the table
|
|
281
298
|
table_level: Level of the table in the lineage graph
|
|
282
299
|
table_index: Index of the table within its level
|
|
300
|
+
subtype_pattern: Pattern for determining subtypes
|
|
301
|
+
subtype_types: List of types to use in alternating pattern
|
|
302
|
+
level_subtypes: Mapping of level to subtype for level_based pattern
|
|
283
303
|
|
|
284
304
|
Returns:
|
|
285
305
|
MetadataWorkUnit containing the SubTypes aspect
|
|
286
306
|
"""
|
|
287
307
|
# Determine subtype based on pattern
|
|
288
|
-
subtype = self._determine_subtype(
|
|
308
|
+
subtype = self._determine_subtype(
|
|
309
|
+
table_level, table_index, subtype_pattern, subtype_types, level_subtypes
|
|
310
|
+
)
|
|
289
311
|
|
|
290
312
|
urn = make_dataset_urn(platform="fake", name=table_name)
|
|
291
313
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -316,11 +338,20 @@ class DataHubMockDataSource(Source):
|
|
|
316
338
|
tables_at_level = tables_at_levels[i]
|
|
317
339
|
|
|
318
340
|
for j in range(tables_at_level):
|
|
319
|
-
table_name = TableNamingHelper.generate_table_name(
|
|
341
|
+
table_name = TableNamingHelper.generate_table_name(
|
|
342
|
+
hops, fan_out, i, j, gen_1.table_name_prefix
|
|
343
|
+
)
|
|
320
344
|
|
|
321
345
|
yield self._get_status_aspect(table_name)
|
|
322
346
|
|
|
323
|
-
yield self._get_subtypes_aspect(
|
|
347
|
+
yield self._get_subtypes_aspect(
|
|
348
|
+
table_name,
|
|
349
|
+
i,
|
|
350
|
+
j,
|
|
351
|
+
gen_1.subtype_pattern,
|
|
352
|
+
gen_1.subtype_types,
|
|
353
|
+
gen_1.level_subtypes,
|
|
354
|
+
)
|
|
324
355
|
|
|
325
356
|
yield self._get_profile_aspect(table_name)
|
|
326
357
|
|
|
@@ -336,6 +367,7 @@ class DataHubMockDataSource(Source):
|
|
|
336
367
|
fan_out=fan_out,
|
|
337
368
|
fan_out_after_first=fan_out_after_first,
|
|
338
369
|
tables_at_levels=tables_at_levels,
|
|
370
|
+
table_name_prefix=gen_1.table_name_prefix,
|
|
339
371
|
)
|
|
340
372
|
|
|
341
373
|
def _generate_lineage_for_table(
|
|
@@ -347,6 +379,7 @@ class DataHubMockDataSource(Source):
|
|
|
347
379
|
fan_out: int,
|
|
348
380
|
fan_out_after_first: Optional[int],
|
|
349
381
|
tables_at_levels: List[int],
|
|
382
|
+
table_name_prefix: Optional[str],
|
|
350
383
|
) -> Iterable[MetadataWorkUnit]:
|
|
351
384
|
"""Generate lineage relationships for a specific table."""
|
|
352
385
|
# Only generate lineage if there are downstream levels
|
|
@@ -365,6 +398,7 @@ class DataHubMockDataSource(Source):
|
|
|
365
398
|
hops=hops,
|
|
366
399
|
fan_out=fan_out,
|
|
367
400
|
tables_at_levels=tables_at_levels,
|
|
401
|
+
table_name_prefix=table_name_prefix,
|
|
368
402
|
)
|
|
369
403
|
|
|
370
404
|
def _generate_downstream_lineage(
|
|
@@ -376,6 +410,7 @@ class DataHubMockDataSource(Source):
|
|
|
376
410
|
hops: int,
|
|
377
411
|
fan_out: int,
|
|
378
412
|
tables_at_levels: List[int],
|
|
413
|
+
table_name_prefix: Optional[str],
|
|
379
414
|
) -> Iterable[MetadataWorkUnit]:
|
|
380
415
|
"""Generate lineage relationships to downstream tables."""
|
|
381
416
|
downstream_level = upstream_table_level + 1
|
|
@@ -389,7 +424,7 @@ class DataHubMockDataSource(Source):
|
|
|
389
424
|
|
|
390
425
|
for downstream_index in range(start_downstream, end_downstream):
|
|
391
426
|
downstream_table_name = TableNamingHelper.generate_table_name(
|
|
392
|
-
hops, fan_out, downstream_level, downstream_index
|
|
427
|
+
hops, fan_out, downstream_level, downstream_index, table_name_prefix
|
|
393
428
|
)
|
|
394
429
|
yield self._get_upstream_aspect(
|
|
395
430
|
upstream_table=upstream_table_name,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Dict, Optional
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class TableNamingHelper:
|
|
@@ -10,7 +10,11 @@ class TableNamingHelper:
|
|
|
10
10
|
|
|
11
11
|
@staticmethod
|
|
12
12
|
def generate_table_name(
|
|
13
|
-
lineage_hops: int,
|
|
13
|
+
lineage_hops: int,
|
|
14
|
+
lineage_fan_out: int,
|
|
15
|
+
level: int,
|
|
16
|
+
table_index: int,
|
|
17
|
+
prefix: Optional[str] = None,
|
|
14
18
|
) -> str:
|
|
15
19
|
"""
|
|
16
20
|
Generate a table name following the standard naming convention.
|
|
@@ -20,11 +24,13 @@ class TableNamingHelper:
|
|
|
20
24
|
lineage_fan_out: Number of downstream tables per upstream table
|
|
21
25
|
level: Level of the table in the lineage graph (0-based)
|
|
22
26
|
table_index: Index of the table within its level (0-based)
|
|
27
|
+
prefix: Optional prefix to add to the table name
|
|
23
28
|
|
|
24
29
|
Returns:
|
|
25
|
-
Table name following the pattern: "hops_{lineage_hops}_f_{lineage_fan_out}_h{level}_t{table_index}"
|
|
30
|
+
Table name following the pattern: "{prefix}hops_{lineage_hops}_f_{lineage_fan_out}_h{level}_t{table_index}"
|
|
26
31
|
"""
|
|
27
|
-
|
|
32
|
+
base_name = f"hops_{lineage_hops}_f_{lineage_fan_out}_h{level}_t{table_index}"
|
|
33
|
+
return f"{prefix}{base_name}" if prefix else base_name
|
|
28
34
|
|
|
29
35
|
@staticmethod
|
|
30
36
|
def parse_table_name(table_name: str) -> Dict[str, int]:
|
|
@@ -19931,6 +19931,31 @@ class TrainingDataClass(_Aspect):
|
|
|
19931
19931
|
self._inner_dict['trainingData'] = value
|
|
19932
19932
|
|
|
19933
19933
|
|
|
19934
|
+
class AssetCollectionModuleParamsClass(DictWrapper):
|
|
19935
|
+
"""The params required if the module is type ASSET_COLLECTION"""
|
|
19936
|
+
|
|
19937
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.module.AssetCollectionModuleParams")
|
|
19938
|
+
def __init__(self,
|
|
19939
|
+
assetUrns: List[str],
|
|
19940
|
+
):
|
|
19941
|
+
super().__init__()
|
|
19942
|
+
|
|
19943
|
+
self.assetUrns = assetUrns
|
|
19944
|
+
|
|
19945
|
+
def _restore_defaults(self) -> None:
|
|
19946
|
+
self.assetUrns = list()
|
|
19947
|
+
|
|
19948
|
+
|
|
19949
|
+
@property
|
|
19950
|
+
def assetUrns(self) -> List[str]:
|
|
19951
|
+
# No docs available.
|
|
19952
|
+
return self._inner_dict.get('assetUrns') # type: ignore
|
|
19953
|
+
|
|
19954
|
+
@assetUrns.setter
|
|
19955
|
+
def assetUrns(self, value: List[str]) -> None:
|
|
19956
|
+
self._inner_dict['assetUrns'] = value
|
|
19957
|
+
|
|
19958
|
+
|
|
19934
19959
|
class DataHubPageModuleParamsClass(DictWrapper):
|
|
19935
19960
|
"""The specific parameters stored for a module"""
|
|
19936
19961
|
|
|
@@ -19938,15 +19963,18 @@ class DataHubPageModuleParamsClass(DictWrapper):
|
|
|
19938
19963
|
def __init__(self,
|
|
19939
19964
|
linkParams: Union[None, "LinkModuleParamsClass"]=None,
|
|
19940
19965
|
richTextParams: Union[None, "RichTextModuleParamsClass"]=None,
|
|
19966
|
+
assetCollectionParams: Union[None, "AssetCollectionModuleParamsClass"]=None,
|
|
19941
19967
|
):
|
|
19942
19968
|
super().__init__()
|
|
19943
19969
|
|
|
19944
19970
|
self.linkParams = linkParams
|
|
19945
19971
|
self.richTextParams = richTextParams
|
|
19972
|
+
self.assetCollectionParams = assetCollectionParams
|
|
19946
19973
|
|
|
19947
19974
|
def _restore_defaults(self) -> None:
|
|
19948
19975
|
self.linkParams = self.RECORD_SCHEMA.fields_dict["linkParams"].default
|
|
19949
19976
|
self.richTextParams = self.RECORD_SCHEMA.fields_dict["richTextParams"].default
|
|
19977
|
+
self.assetCollectionParams = self.RECORD_SCHEMA.fields_dict["assetCollectionParams"].default
|
|
19950
19978
|
|
|
19951
19979
|
|
|
19952
19980
|
@property
|
|
@@ -19969,6 +19997,16 @@ class DataHubPageModuleParamsClass(DictWrapper):
|
|
|
19969
19997
|
self._inner_dict['richTextParams'] = value
|
|
19970
19998
|
|
|
19971
19999
|
|
|
20000
|
+
@property
|
|
20001
|
+
def assetCollectionParams(self) -> Union[None, "AssetCollectionModuleParamsClass"]:
|
|
20002
|
+
"""The params required if the module is type ASSET_COLLECTION"""
|
|
20003
|
+
return self._inner_dict.get('assetCollectionParams') # type: ignore
|
|
20004
|
+
|
|
20005
|
+
@assetCollectionParams.setter
|
|
20006
|
+
def assetCollectionParams(self, value: Union[None, "AssetCollectionModuleParamsClass"]) -> None:
|
|
20007
|
+
self._inner_dict['assetCollectionParams'] = value
|
|
20008
|
+
|
|
20009
|
+
|
|
19972
20010
|
class DataHubPageModulePropertiesClass(_Aspect):
|
|
19973
20011
|
"""The main properties of a DataHub page module"""
|
|
19974
20012
|
|
|
@@ -20116,24 +20154,50 @@ class LinkModuleParamsClass(DictWrapper):
|
|
|
20116
20154
|
|
|
20117
20155
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.module.LinkModuleParams")
|
|
20118
20156
|
def __init__(self,
|
|
20119
|
-
|
|
20157
|
+
linkUrl: str,
|
|
20158
|
+
imageUrl: Union[None, str]=None,
|
|
20159
|
+
description: Union[None, str]=None,
|
|
20120
20160
|
):
|
|
20121
20161
|
super().__init__()
|
|
20122
20162
|
|
|
20123
|
-
self.
|
|
20163
|
+
self.linkUrl = linkUrl
|
|
20164
|
+
self.imageUrl = imageUrl
|
|
20165
|
+
self.description = description
|
|
20124
20166
|
|
|
20125
20167
|
def _restore_defaults(self) -> None:
|
|
20126
|
-
self.
|
|
20168
|
+
self.linkUrl = str()
|
|
20169
|
+
self.imageUrl = self.RECORD_SCHEMA.fields_dict["imageUrl"].default
|
|
20170
|
+
self.description = self.RECORD_SCHEMA.fields_dict["description"].default
|
|
20171
|
+
|
|
20172
|
+
|
|
20173
|
+
@property
|
|
20174
|
+
def linkUrl(self) -> str:
|
|
20175
|
+
# No docs available.
|
|
20176
|
+
return self._inner_dict.get('linkUrl') # type: ignore
|
|
20177
|
+
|
|
20178
|
+
@linkUrl.setter
|
|
20179
|
+
def linkUrl(self, value: str) -> None:
|
|
20180
|
+
self._inner_dict['linkUrl'] = value
|
|
20181
|
+
|
|
20182
|
+
|
|
20183
|
+
@property
|
|
20184
|
+
def imageUrl(self) -> Union[None, str]:
|
|
20185
|
+
# No docs available.
|
|
20186
|
+
return self._inner_dict.get('imageUrl') # type: ignore
|
|
20187
|
+
|
|
20188
|
+
@imageUrl.setter
|
|
20189
|
+
def imageUrl(self, value: Union[None, str]) -> None:
|
|
20190
|
+
self._inner_dict['imageUrl'] = value
|
|
20127
20191
|
|
|
20128
20192
|
|
|
20129
20193
|
@property
|
|
20130
|
-
def
|
|
20194
|
+
def description(self) -> Union[None, str]:
|
|
20131
20195
|
# No docs available.
|
|
20132
|
-
return self._inner_dict.get('
|
|
20196
|
+
return self._inner_dict.get('description') # type: ignore
|
|
20133
20197
|
|
|
20134
|
-
@
|
|
20135
|
-
def
|
|
20136
|
-
self._inner_dict['
|
|
20198
|
+
@description.setter
|
|
20199
|
+
def description(self, value: Union[None, str]) -> None:
|
|
20200
|
+
self._inner_dict['description'] = value
|
|
20137
20201
|
|
|
20138
20202
|
|
|
20139
20203
|
class PageModuleScopeClass(object):
|
|
@@ -27102,6 +27166,7 @@ __SCHEMA_TYPES = {
|
|
|
27102
27166
|
'com.linkedin.pegasus2avro.ml.metadata.SourceCodeUrl': SourceCodeUrlClass,
|
|
27103
27167
|
'com.linkedin.pegasus2avro.ml.metadata.SourceCodeUrlType': SourceCodeUrlTypeClass,
|
|
27104
27168
|
'com.linkedin.pegasus2avro.ml.metadata.TrainingData': TrainingDataClass,
|
|
27169
|
+
'com.linkedin.pegasus2avro.module.AssetCollectionModuleParams': AssetCollectionModuleParamsClass,
|
|
27105
27170
|
'com.linkedin.pegasus2avro.module.DataHubPageModuleParams': DataHubPageModuleParamsClass,
|
|
27106
27171
|
'com.linkedin.pegasus2avro.module.DataHubPageModuleProperties': DataHubPageModulePropertiesClass,
|
|
27107
27172
|
'com.linkedin.pegasus2avro.module.DataHubPageModuleType': DataHubPageModuleTypeClass,
|
|
@@ -27610,6 +27675,7 @@ __SCHEMA_TYPES = {
|
|
|
27610
27675
|
'SourceCodeUrl': SourceCodeUrlClass,
|
|
27611
27676
|
'SourceCodeUrlType': SourceCodeUrlTypeClass,
|
|
27612
27677
|
'TrainingData': TrainingDataClass,
|
|
27678
|
+
'AssetCollectionModuleParams': AssetCollectionModuleParamsClass,
|
|
27613
27679
|
'DataHubPageModuleParams': DataHubPageModuleParamsClass,
|
|
27614
27680
|
'DataHubPageModuleProperties': DataHubPageModulePropertiesClass,
|
|
27615
27681
|
'DataHubPageModuleType': DataHubPageModuleTypeClass,
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
# pylint: skip-file
|
|
8
8
|
# fmt: off
|
|
9
9
|
# isort: skip_file
|
|
10
|
+
from .....schema_classes import AssetCollectionModuleParamsClass
|
|
10
11
|
from .....schema_classes import DataHubPageModuleParamsClass
|
|
11
12
|
from .....schema_classes import DataHubPageModulePropertiesClass
|
|
12
13
|
from .....schema_classes import DataHubPageModuleTypeClass
|
|
@@ -16,6 +17,7 @@ from .....schema_classes import PageModuleScopeClass
|
|
|
16
17
|
from .....schema_classes import RichTextModuleParamsClass
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
AssetCollectionModuleParams = AssetCollectionModuleParamsClass
|
|
19
21
|
DataHubPageModuleParams = DataHubPageModuleParamsClass
|
|
20
22
|
DataHubPageModuleProperties = DataHubPageModulePropertiesClass
|
|
21
23
|
DataHubPageModuleType = DataHubPageModuleTypeClass
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -17723,12 +17723,24 @@
|
|
|
17723
17723
|
"namespace": "com.linkedin.pegasus2avro.module",
|
|
17724
17724
|
"fields": [
|
|
17725
17725
|
{
|
|
17726
|
-
"java": {
|
|
17727
|
-
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
17728
|
-
},
|
|
17729
|
-
"Urn": "Urn",
|
|
17730
17726
|
"type": "string",
|
|
17731
|
-
"name": "
|
|
17727
|
+
"name": "linkUrl"
|
|
17728
|
+
},
|
|
17729
|
+
{
|
|
17730
|
+
"type": [
|
|
17731
|
+
"null",
|
|
17732
|
+
"string"
|
|
17733
|
+
],
|
|
17734
|
+
"name": "imageUrl",
|
|
17735
|
+
"default": null
|
|
17736
|
+
},
|
|
17737
|
+
{
|
|
17738
|
+
"type": [
|
|
17739
|
+
"null",
|
|
17740
|
+
"string"
|
|
17741
|
+
],
|
|
17742
|
+
"name": "description",
|
|
17743
|
+
"default": null
|
|
17732
17744
|
}
|
|
17733
17745
|
]
|
|
17734
17746
|
}
|
|
@@ -17755,6 +17767,31 @@
|
|
|
17755
17767
|
"name": "richTextParams",
|
|
17756
17768
|
"default": null,
|
|
17757
17769
|
"doc": "The params required if the module is type RICH_TEXT"
|
|
17770
|
+
},
|
|
17771
|
+
{
|
|
17772
|
+
"type": [
|
|
17773
|
+
"null",
|
|
17774
|
+
{
|
|
17775
|
+
"type": "record",
|
|
17776
|
+
"name": "AssetCollectionModuleParams",
|
|
17777
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
17778
|
+
"fields": [
|
|
17779
|
+
{
|
|
17780
|
+
"Urn": "Urn",
|
|
17781
|
+
"urn_is_array": true,
|
|
17782
|
+
"type": {
|
|
17783
|
+
"type": "array",
|
|
17784
|
+
"items": "string"
|
|
17785
|
+
},
|
|
17786
|
+
"name": "assetUrns"
|
|
17787
|
+
}
|
|
17788
|
+
],
|
|
17789
|
+
"doc": "The params required if the module is type ASSET_COLLECTION"
|
|
17790
|
+
}
|
|
17791
|
+
],
|
|
17792
|
+
"name": "assetCollectionParams",
|
|
17793
|
+
"default": null,
|
|
17794
|
+
"doc": "The params required if the module is type ASSET_COLLECTION"
|
|
17758
17795
|
}
|
|
17759
17796
|
],
|
|
17760
17797
|
"doc": "The specific parameters stored for a module"
|
|
@@ -87,12 +87,24 @@
|
|
|
87
87
|
"namespace": "com.linkedin.pegasus2avro.module",
|
|
88
88
|
"fields": [
|
|
89
89
|
{
|
|
90
|
-
"java": {
|
|
91
|
-
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
92
|
-
},
|
|
93
90
|
"type": "string",
|
|
94
|
-
"name": "
|
|
95
|
-
|
|
91
|
+
"name": "linkUrl"
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"type": [
|
|
95
|
+
"null",
|
|
96
|
+
"string"
|
|
97
|
+
],
|
|
98
|
+
"name": "imageUrl",
|
|
99
|
+
"default": null
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"type": [
|
|
103
|
+
"null",
|
|
104
|
+
"string"
|
|
105
|
+
],
|
|
106
|
+
"name": "description",
|
|
107
|
+
"default": null
|
|
96
108
|
}
|
|
97
109
|
]
|
|
98
110
|
}
|
|
@@ -119,6 +131,31 @@
|
|
|
119
131
|
"name": "richTextParams",
|
|
120
132
|
"default": null,
|
|
121
133
|
"doc": "The params required if the module is type RICH_TEXT"
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"type": [
|
|
137
|
+
"null",
|
|
138
|
+
{
|
|
139
|
+
"type": "record",
|
|
140
|
+
"name": "AssetCollectionModuleParams",
|
|
141
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
142
|
+
"fields": [
|
|
143
|
+
{
|
|
144
|
+
"type": {
|
|
145
|
+
"type": "array",
|
|
146
|
+
"items": "string"
|
|
147
|
+
},
|
|
148
|
+
"name": "assetUrns",
|
|
149
|
+
"Urn": "Urn",
|
|
150
|
+
"urn_is_array": true
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
"doc": "The params required if the module is type ASSET_COLLECTION"
|
|
154
|
+
}
|
|
155
|
+
],
|
|
156
|
+
"name": "assetCollectionParams",
|
|
157
|
+
"default": null,
|
|
158
|
+
"doc": "The params required if the module is type ASSET_COLLECTION"
|
|
122
159
|
}
|
|
123
160
|
],
|
|
124
161
|
"doc": "The specific parameters stored for a module"
|