acryl-datahub 1.2.0.10rc4__py3-none-any.whl → 1.2.0.10rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/METADATA +2528 -2502
- {acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/RECORD +20 -13
- {acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/ingestion/autogenerated/capability_summary.json +33 -1
- datahub/ingestion/source/looker/looker_source.py +52 -3
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/tableau/tableau.py +3 -3
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/sdk/_shared.py +126 -0
- datahub/sdk/chart.py +87 -30
- datahub/sdk/dashboard.py +79 -32
- {acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.
|
|
1
|
+
acryl_datahub-1.2.0.10rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=3JayCFLMEiiPnmNQMtSHZ8p5gHdHdXKS7AvrQiPCs3g,324
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -159,7 +159,7 @@ datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
|
|
|
159
159
|
datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
|
|
160
160
|
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
|
|
161
161
|
datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
-
datahub/ingestion/autogenerated/capability_summary.json,sha256=
|
|
162
|
+
datahub/ingestion/autogenerated/capability_summary.json,sha256=cSA7jRLLphwkM9C9LK6HdbWRNM0s2febr-4Rh69vQss,111506
|
|
163
163
|
datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
|
|
164
164
|
datahub/ingestion/autogenerated/lineage_helper.py,sha256=I_k1pZSCCCjDbUVifPTfy6fkmV8jqdVhbirE8EkpmxI,4748
|
|
165
165
|
datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -392,7 +392,7 @@ datahub/ingestion/source/looker/looker_file_loader.py,sha256=gb2Z97_w28MsybYe01J
|
|
|
392
392
|
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=6smUt_Ya7ZJMHWdGZl3TnhM7XHZVpYQ6gz2i5hHejZ4,11547
|
|
393
393
|
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=27WnOuTghayaH-HL4lLoq0IcHvNm1UybMqMnoaxN8Cs,5383
|
|
394
394
|
datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
|
|
395
|
-
datahub/ingestion/source/looker/looker_source.py,sha256=
|
|
395
|
+
datahub/ingestion/source/looker/looker_source.py,sha256=7mRlIJq2DoM1h2y-heNdNoqok8sNl7Qmpwsx0dQsYP8,67273
|
|
396
396
|
datahub/ingestion/source/looker/looker_template_language.py,sha256=5fZFPKFP3IYbJg3jLifjaji4wWg8wRy-1XDvc8Qucus,17949
|
|
397
397
|
datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
|
|
398
398
|
datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
|
|
@@ -482,6 +482,12 @@ datahub/ingestion/source/sigma/sigma.py,sha256=7dbkwk8_wp94XH9mhmtI_8ihR35cqYywt
|
|
|
482
482
|
datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
|
|
483
483
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
484
484
|
datahub/ingestion/source/slack/slack.py,sha256=JWanUfzFGynV_PWcH0YzJIbRcmL880DA6dEI9QW-QiQ,25800
|
|
485
|
+
datahub/ingestion/source/snaplogic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
486
|
+
datahub/ingestion/source/snaplogic/snaplogic.py,sha256=VoCRSBS0kE4RAsn4hOJgQy3uadFvelLIOv3I0_gne-w,13540
|
|
487
|
+
datahub/ingestion/source/snaplogic/snaplogic_config.py,sha256=oxuNBfAHWMHoOvh52gifOFcBOSN8aaPpFC8QgmgXwWI,1445
|
|
488
|
+
datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py,sha256=IzCYwXLN6IfRFqns7XXtZxzQjjHC-XNTGXgEMeSfW8U,3776
|
|
489
|
+
datahub/ingestion/source/snaplogic/snaplogic_parser.py,sha256=q5dRfWtOpSELPZrpyLbszOD49MJBXNbKgnITLMPiyGI,5783
|
|
490
|
+
datahub/ingestion/source/snaplogic/snaplogic_utils.py,sha256=SVrV9ZXVE2cKKPfoVsxjBN2fIcpYbs2PBLiyQIcJMVQ,1068
|
|
485
491
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
486
492
|
datahub/ingestion/source/snowflake/constants.py,sha256=iDTamMozHwLYyglpRfqwTbxPxYPhb-uJGRHIgDRHUkA,2767
|
|
487
493
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
@@ -554,7 +560,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
554
560
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
|
|
555
561
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
556
562
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
557
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
563
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=BgR_-IuzUYuaSV3EOvMcefwBZ_h0dYAvNEn5Dv6meA4,158448
|
|
558
564
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=2vE7DIigPvMNcTCWSou0tliaVy9MgFR1qwqnE4pilw8,27086
|
|
559
565
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=2WPAHN-GAR83_c3eTTNd8cy0-zC8GIXeUdSxX_mNdas,2608
|
|
560
566
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9_g0aDclKwYkcoxugaWyAcyAMgBCAU,1136
|
|
@@ -612,6 +618,7 @@ datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py,sha256=jTURu
|
|
|
612
618
|
datahub/ingestion/transformer/pattern_cleanup_ownership.py,sha256=YJH4lv1ztKAYwsdRz5RiUu6SX08h2GBr1S-9QlcmB18,3755
|
|
613
619
|
datahub/ingestion/transformer/remove_dataset_ownership.py,sha256=kHiIcT19BDKNIuCQqAj827E1ZEvME0eGGrILEACALRc,1195
|
|
614
620
|
datahub/ingestion/transformer/replace_external_url.py,sha256=Nw2V1m86fD1vMGLYGPlaoIJEORV0O9qAqWydL-1n2Ng,4058
|
|
621
|
+
datahub/ingestion/transformer/set_browse_path.py,sha256=K8Y4O9vjeJQCdYGsFuNE0aClT73HKBpmj51Yy6Jm_uQ,4065
|
|
615
622
|
datahub/ingestion/transformer/tags_to_terms.py,sha256=VDcd7cM5hGCxo6QP1x4RNEw5Q9v4WDxjRhQMpAl95-A,5558
|
|
616
623
|
datahub/ingestion/transformer/transform_registry.py,sha256=bartmA1zEaULNy5W1Q7gRF8h5Y57BFC6XNOGfCzh1Zw,251
|
|
617
624
|
datahub/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -957,11 +964,11 @@ datahub/pydantic/compat.py,sha256=TUEo4kSEeOWVAhV6LQtst1phrpVgGtK4uif4OI5vQ2M,19
|
|
|
957
964
|
datahub/sdk/__init__.py,sha256=66OOcFi7qlnL6q72c_yUX2mWU2HudbOdRsC5CIoDxow,1922
|
|
958
965
|
datahub/sdk/_all_entities.py,sha256=eQAmD_fcEHlTShe1_nHpdvHxLDN9njk9bdLnuTrYg8M,905
|
|
959
966
|
datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
|
|
960
|
-
datahub/sdk/_shared.py,sha256=
|
|
967
|
+
datahub/sdk/_shared.py,sha256=tlyxyxgo7x-8sJhUG9VvUiLpxcOP-GIg4ToqPixagbw,34221
|
|
961
968
|
datahub/sdk/_utils.py,sha256=oXE2BzsXE5zmSkCP3R1tObD4RHnPeH_ps83D_Dw9JaQ,1169
|
|
962
|
-
datahub/sdk/chart.py,sha256=
|
|
969
|
+
datahub/sdk/chart.py,sha256=gsyq_saUInGJKm0s4wwZ9pLMyWLmodrDhrwdZ_Y2h9E,13902
|
|
963
970
|
datahub/sdk/container.py,sha256=IjnFVGDpSFDvgHuuMb7C3VdBxhJuIMq0q6crOs5PupE,7899
|
|
964
|
-
datahub/sdk/dashboard.py,sha256=
|
|
971
|
+
datahub/sdk/dashboard.py,sha256=Vsy_geQzwk3xzqpSuywbgQJ6T__nEvfK5rmrtvY2DPo,16812
|
|
965
972
|
datahub/sdk/dataflow.py,sha256=gdAPVVkyKvsKtsa1AwhN_LpzidG_XzV3nhtd1cjnzDA,11128
|
|
966
973
|
datahub/sdk/datajob.py,sha256=5kU0txTDcn2ce3AhNry83TazPVhoYZ2rAPPNWM1_FP8,13677
|
|
967
974
|
datahub/sdk/dataset.py,sha256=-C4TCJAs1PFkLAgkUZEU1JOg3orm7AAIkqjw7oo_4PQ,31400
|
|
@@ -1114,8 +1121,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1114
1121
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1115
1122
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1116
1123
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1117
|
-
acryl_datahub-1.2.0.
|
|
1118
|
-
acryl_datahub-1.2.0.
|
|
1119
|
-
acryl_datahub-1.2.0.
|
|
1120
|
-
acryl_datahub-1.2.0.
|
|
1121
|
-
acryl_datahub-1.2.0.
|
|
1124
|
+
acryl_datahub-1.2.0.10rc5.dist-info/METADATA,sha256=Nm6aD5BhD2bytkuh33mzJCtDW23cDHNlIw5fboab3dU,184162
|
|
1125
|
+
acryl_datahub-1.2.0.10rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1126
|
+
acryl_datahub-1.2.0.10rc5.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
|
|
1127
|
+
acryl_datahub-1.2.0.10rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1128
|
+
acryl_datahub-1.2.0.10rc5.dist-info/RECORD,,
|
{acryl_datahub-1.2.0.10rc4.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/entry_points.txt
RENAMED
|
@@ -94,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
|
|
|
94
94
|
salesforce = datahub.ingestion.source.salesforce:SalesforceSource
|
|
95
95
|
sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
|
|
96
96
|
slack = datahub.ingestion.source.slack.slack:SlackSource
|
|
97
|
+
snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
|
|
97
98
|
snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
|
|
98
99
|
snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
|
|
99
100
|
snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
|
|
@@ -130,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
|
|
|
130
131
|
pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
|
|
131
132
|
replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
|
|
132
133
|
replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
|
|
134
|
+
set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
|
|
133
135
|
set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
|
|
134
136
|
simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
|
|
135
137
|
simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
|
datahub/_version.py
CHANGED
|
@@ -2968,6 +2968,38 @@
|
|
|
2968
2968
|
"platform_name": "Slack",
|
|
2969
2969
|
"support_status": "TESTING"
|
|
2970
2970
|
},
|
|
2971
|
+
"snaplogic": {
|
|
2972
|
+
"capabilities": [
|
|
2973
|
+
{
|
|
2974
|
+
"capability": "LINEAGE_FINE",
|
|
2975
|
+
"description": "Enabled by default",
|
|
2976
|
+
"subtype_modifier": null,
|
|
2977
|
+
"supported": true
|
|
2978
|
+
},
|
|
2979
|
+
{
|
|
2980
|
+
"capability": "DELETION_DETECTION",
|
|
2981
|
+
"description": "Not supported yet",
|
|
2982
|
+
"subtype_modifier": null,
|
|
2983
|
+
"supported": false
|
|
2984
|
+
},
|
|
2985
|
+
{
|
|
2986
|
+
"capability": "PLATFORM_INSTANCE",
|
|
2987
|
+
"description": "Snaplogic does not support platform instances",
|
|
2988
|
+
"subtype_modifier": null,
|
|
2989
|
+
"supported": false
|
|
2990
|
+
},
|
|
2991
|
+
{
|
|
2992
|
+
"capability": "LINEAGE_COARSE",
|
|
2993
|
+
"description": "Enabled by default",
|
|
2994
|
+
"subtype_modifier": null,
|
|
2995
|
+
"supported": true
|
|
2996
|
+
}
|
|
2997
|
+
],
|
|
2998
|
+
"classname": "datahub.ingestion.source.snaplogic.snaplogic.SnaplogicSource",
|
|
2999
|
+
"platform_id": "snaplogic",
|
|
3000
|
+
"platform_name": "Snaplogic",
|
|
3001
|
+
"support_status": "TESTING"
|
|
3002
|
+
},
|
|
2971
3003
|
"snowflake": {
|
|
2972
3004
|
"capabilities": [
|
|
2973
3005
|
{
|
|
@@ -3617,4 +3649,4 @@
|
|
|
3617
3649
|
"support_status": "CERTIFIED"
|
|
3618
3650
|
}
|
|
3619
3651
|
}
|
|
3620
|
-
}
|
|
3652
|
+
}
|
|
@@ -736,7 +736,16 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
736
736
|
display_name=dashboard_element.title, # title is (deprecated) using display_name
|
|
737
737
|
extra_aspects=chart_extra_aspects,
|
|
738
738
|
input_datasets=dashboard_element.get_view_urns(self.source_config),
|
|
739
|
-
last_modified=self._get_last_modified_time(
|
|
739
|
+
last_modified=self._get_last_modified_time(
|
|
740
|
+
dashboard
|
|
741
|
+
), # Inherited from Dashboard
|
|
742
|
+
last_modified_by=self._get_last_modified_by(
|
|
743
|
+
dashboard
|
|
744
|
+
), # Inherited from Dashboard
|
|
745
|
+
created_at=self._get_created_at(dashboard), # Inherited from Dashboard
|
|
746
|
+
created_by=self._get_created_by(dashboard), # Inherited from Dashboard
|
|
747
|
+
deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
|
|
748
|
+
deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
|
|
740
749
|
name=dashboard_element.get_urn_element_id(),
|
|
741
750
|
owners=chart_ownership,
|
|
742
751
|
parent_container=chart_parent_container,
|
|
@@ -803,6 +812,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
803
812
|
display_name=looker_dashboard.title, # title is (deprecated) using display_name
|
|
804
813
|
extra_aspects=dashboard_extra_aspects,
|
|
805
814
|
last_modified=self._get_last_modified_time(looker_dashboard),
|
|
815
|
+
last_modified_by=self._get_last_modified_by(looker_dashboard),
|
|
816
|
+
created_at=self._get_created_at(looker_dashboard),
|
|
817
|
+
created_by=self._get_created_by(looker_dashboard),
|
|
818
|
+
deleted_on=self._get_deleted_on(looker_dashboard),
|
|
819
|
+
deleted_by=self._get_deleted_by(looker_dashboard),
|
|
806
820
|
name=looker_dashboard.get_urn_dashboard_id(),
|
|
807
821
|
owners=dashboard_ownership,
|
|
808
822
|
parent_container=dashboard_parent_container,
|
|
@@ -988,9 +1002,44 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
988
1002
|
def _get_last_modified_time(
|
|
989
1003
|
self, looker_dashboard: Optional[LookerDashboard]
|
|
990
1004
|
) -> Optional[datetime.datetime]:
|
|
991
|
-
if looker_dashboard
|
|
1005
|
+
return looker_dashboard.last_updated_at if looker_dashboard else None
|
|
1006
|
+
|
|
1007
|
+
def _get_last_modified_by(
|
|
1008
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1009
|
+
) -> Optional[str]:
|
|
1010
|
+
if not looker_dashboard or not looker_dashboard.last_updated_by:
|
|
1011
|
+
return None
|
|
1012
|
+
return looker_dashboard.last_updated_by.get_urn(
|
|
1013
|
+
self.source_config.strip_user_ids_from_email
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
def _get_created_at(
|
|
1017
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1018
|
+
) -> Optional[datetime.datetime]:
|
|
1019
|
+
return looker_dashboard.created_at if looker_dashboard else None
|
|
1020
|
+
|
|
1021
|
+
def _get_created_by(
|
|
1022
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1023
|
+
) -> Optional[str]:
|
|
1024
|
+
if not looker_dashboard or not looker_dashboard.owner:
|
|
1025
|
+
return None
|
|
1026
|
+
return looker_dashboard.owner.get_urn(
|
|
1027
|
+
self.source_config.strip_user_ids_from_email
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
def _get_deleted_on(
|
|
1031
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1032
|
+
) -> Optional[datetime.datetime]:
|
|
1033
|
+
return looker_dashboard.deleted_at if looker_dashboard else None
|
|
1034
|
+
|
|
1035
|
+
def _get_deleted_by(
|
|
1036
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1037
|
+
) -> Optional[str]:
|
|
1038
|
+
if not looker_dashboard or not looker_dashboard.deleted_by:
|
|
992
1039
|
return None
|
|
993
|
-
return looker_dashboard.
|
|
1040
|
+
return looker_dashboard.deleted_by.get_urn(
|
|
1041
|
+
self.source_config.strip_user_ids_from_email
|
|
1042
|
+
)
|
|
994
1043
|
|
|
995
1044
|
def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
|
|
996
1045
|
assert folder.id
|
|
File without changes
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
from typing import Iterable, List, Optional
|
|
2
|
+
|
|
3
|
+
from datahub.emitter.mce_builder import (
|
|
4
|
+
make_data_flow_urn,
|
|
5
|
+
make_data_job_urn,
|
|
6
|
+
make_data_platform_urn,
|
|
7
|
+
make_dataset_urn_with_platform_instance,
|
|
8
|
+
make_schema_field_urn,
|
|
9
|
+
)
|
|
10
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
11
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
12
|
+
from datahub.ingestion.api.decorators import (
|
|
13
|
+
SupportStatus,
|
|
14
|
+
capability,
|
|
15
|
+
config_class,
|
|
16
|
+
platform_name,
|
|
17
|
+
support_status,
|
|
18
|
+
)
|
|
19
|
+
from datahub.ingestion.api.source import (
|
|
20
|
+
MetadataWorkUnitProcessor,
|
|
21
|
+
SourceCapability,
|
|
22
|
+
SourceReport,
|
|
23
|
+
)
|
|
24
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
25
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
26
|
+
from datahub.ingestion.source.snaplogic.snaplogic_config import SnaplogicConfig
|
|
27
|
+
from datahub.ingestion.source.snaplogic.snaplogic_lineage_extractor import (
|
|
28
|
+
SnaplogicLineageExtractor,
|
|
29
|
+
)
|
|
30
|
+
from datahub.ingestion.source.snaplogic.snaplogic_parser import (
|
|
31
|
+
ColumnMapping,
|
|
32
|
+
Dataset,
|
|
33
|
+
SnapLogicParser,
|
|
34
|
+
)
|
|
35
|
+
from datahub.ingestion.source.snaplogic.snaplogic_utils import SnaplogicUtils
|
|
36
|
+
from datahub.ingestion.source.state.redundant_run_skip_handler import (
|
|
37
|
+
RedundantLineageRunSkipHandler,
|
|
38
|
+
)
|
|
39
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
40
|
+
StaleEntityRemovalHandler,
|
|
41
|
+
StaleEntityRemovalSourceReport,
|
|
42
|
+
)
|
|
43
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
44
|
+
StatefulIngestionSourceBase,
|
|
45
|
+
)
|
|
46
|
+
from datahub.metadata.schema_classes import (
|
|
47
|
+
DataFlowInfoClass,
|
|
48
|
+
DataJobInfoClass,
|
|
49
|
+
DataJobInputOutputClass,
|
|
50
|
+
DatasetPropertiesClass,
|
|
51
|
+
FineGrainedLineageClass,
|
|
52
|
+
FineGrainedLineageDownstreamTypeClass,
|
|
53
|
+
OtherSchemaClass,
|
|
54
|
+
SchemaFieldClass,
|
|
55
|
+
SchemaMetadataClass,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@platform_name("Snaplogic")
|
|
60
|
+
@config_class(SnaplogicConfig)
|
|
61
|
+
@support_status(SupportStatus.TESTING)
|
|
62
|
+
@capability(
|
|
63
|
+
SourceCapability.PLATFORM_INSTANCE,
|
|
64
|
+
"Snaplogic does not support platform instances",
|
|
65
|
+
supported=False,
|
|
66
|
+
)
|
|
67
|
+
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
|
|
68
|
+
@capability(SourceCapability.LINEAGE_FINE, "Enabled by default")
|
|
69
|
+
@capability(SourceCapability.DELETION_DETECTION, "Not supported yet", supported=False)
|
|
70
|
+
class SnaplogicSource(StatefulIngestionSourceBase):
|
|
71
|
+
"""
|
|
72
|
+
A source plugin for ingesting lineage and metadata from Snaplogic.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, config: SnaplogicConfig, ctx: PipelineContext):
|
|
76
|
+
super().__init__(config, ctx)
|
|
77
|
+
self.config = config
|
|
78
|
+
self.report = StaleEntityRemovalSourceReport()
|
|
79
|
+
self.graph: Optional[DataHubGraph] = ctx.graph
|
|
80
|
+
self.snaplogic_parser = SnapLogicParser(
|
|
81
|
+
config.case_insensitive_namespaces, self.config.namespace_mapping
|
|
82
|
+
)
|
|
83
|
+
self.redundant_lineage_run_skip_handler: Optional[
|
|
84
|
+
RedundantLineageRunSkipHandler
|
|
85
|
+
] = None
|
|
86
|
+
if self.config.enable_stateful_lineage_ingestion:
|
|
87
|
+
self.redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
|
|
88
|
+
source=self,
|
|
89
|
+
config=self.config,
|
|
90
|
+
pipeline_name=ctx.pipeline_name,
|
|
91
|
+
run_id=ctx.run_id,
|
|
92
|
+
)
|
|
93
|
+
self.snaplogic_lineage_extractor = SnaplogicLineageExtractor(
|
|
94
|
+
config=config,
|
|
95
|
+
redundant_run_skip_handler=self.redundant_lineage_run_skip_handler,
|
|
96
|
+
report=self.report,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
100
|
+
try:
|
|
101
|
+
self.report.info(
|
|
102
|
+
message="Starting lineage ingestion from Snaplogic",
|
|
103
|
+
title="Lineage Ingestion",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
records_processed = 0
|
|
107
|
+
for lineage in self.snaplogic_lineage_extractor.get_lineages():
|
|
108
|
+
try:
|
|
109
|
+
for workunit in self._process_lineage_record(lineage):
|
|
110
|
+
yield workunit
|
|
111
|
+
records_processed += 1
|
|
112
|
+
|
|
113
|
+
if records_processed % 20 == 0:
|
|
114
|
+
self.report.info(
|
|
115
|
+
message=f"Processed {records_processed} lineage records",
|
|
116
|
+
title="Lineage Ingestion Progress",
|
|
117
|
+
)
|
|
118
|
+
except Exception as e:
|
|
119
|
+
self.report.report_failure(
|
|
120
|
+
message="Failed to process lineage record",
|
|
121
|
+
context=str(lineage),
|
|
122
|
+
exc=e,
|
|
123
|
+
)
|
|
124
|
+
self.report.info(
|
|
125
|
+
message=f"Completed processing {records_processed} lineage records",
|
|
126
|
+
title="Lineage Ingestion Complete",
|
|
127
|
+
)
|
|
128
|
+
self.snaplogic_lineage_extractor.report_status("lineage_ingestion", True)
|
|
129
|
+
self.snaplogic_lineage_extractor.update_stats()
|
|
130
|
+
except Exception as e:
|
|
131
|
+
self.report.report_failure(message="Failed to fetch lineages", exc=e)
|
|
132
|
+
self.snaplogic_lineage_extractor.report_status("lineage_ingestion", False)
|
|
133
|
+
|
|
134
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
135
|
+
return [
|
|
136
|
+
*super().get_workunit_processors(),
|
|
137
|
+
StaleEntityRemovalHandler.create(
|
|
138
|
+
self, self.config, self.ctx
|
|
139
|
+
).workunit_processor,
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
def _process_lineage_record(self, lineage: dict) -> Iterable[MetadataWorkUnit]:
|
|
143
|
+
"""Process a lineage record to create pipeline and task workunits with relationships."""
|
|
144
|
+
producer = lineage.get("producer")
|
|
145
|
+
if not producer:
|
|
146
|
+
return
|
|
147
|
+
pipeline_snode_id = producer.split("#pipe_snode=")[1]
|
|
148
|
+
if not pipeline_snode_id:
|
|
149
|
+
return
|
|
150
|
+
datasets = self.snaplogic_parser.extract_datasets_from_lineage(lineage)
|
|
151
|
+
pipeline = self.snaplogic_parser.extract_pipeline_from_lineage(lineage)
|
|
152
|
+
task = self.snaplogic_parser.extract_task_from_lineage(lineage)
|
|
153
|
+
columns_mapping = self.snaplogic_parser.extract_columns_mapping_from_lineage(
|
|
154
|
+
lineage
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Create pipeline MCP
|
|
158
|
+
for pipeline_workunit in self.create_pipeline_mcp(
|
|
159
|
+
name=pipeline.name,
|
|
160
|
+
pipeline_snode_id=pipeline.id,
|
|
161
|
+
namespace=pipeline.namespace,
|
|
162
|
+
):
|
|
163
|
+
self.report.report_workunit(pipeline_workunit)
|
|
164
|
+
yield pipeline_workunit
|
|
165
|
+
|
|
166
|
+
# Create dataset MCP
|
|
167
|
+
for dataset in datasets:
|
|
168
|
+
for dataset_workunit in self.create_dataset_mcp(
|
|
169
|
+
dataset_name=dataset.name,
|
|
170
|
+
dataset_display_name=dataset.display_name,
|
|
171
|
+
fields=dataset.fields,
|
|
172
|
+
platform=dataset.platform,
|
|
173
|
+
platform_instance=dataset.platform_instance,
|
|
174
|
+
):
|
|
175
|
+
self.report.report_workunit(dataset_workunit)
|
|
176
|
+
yield dataset_workunit
|
|
177
|
+
|
|
178
|
+
# Create task MCP
|
|
179
|
+
for task_workunit in self.create_task_mcp(
|
|
180
|
+
name=task.name,
|
|
181
|
+
task_id=task.id,
|
|
182
|
+
namespace=task.namespace,
|
|
183
|
+
pipeline_snode_id=pipeline_snode_id,
|
|
184
|
+
input_datasets=[dataset for dataset in datasets if dataset.type == "INPUT"],
|
|
185
|
+
output_datasets=[
|
|
186
|
+
dataset for dataset in datasets if dataset.type == "OUTPUT"
|
|
187
|
+
],
|
|
188
|
+
columns_mapping=columns_mapping,
|
|
189
|
+
):
|
|
190
|
+
self.report.report_workunit(task_workunit)
|
|
191
|
+
yield task_workunit
|
|
192
|
+
|
|
193
|
+
def create_task_mcp(
|
|
194
|
+
self,
|
|
195
|
+
task_id: str,
|
|
196
|
+
name: str,
|
|
197
|
+
namespace: str,
|
|
198
|
+
pipeline_snode_id: str,
|
|
199
|
+
input_datasets: list[Dataset],
|
|
200
|
+
output_datasets: list[Dataset],
|
|
201
|
+
columns_mapping: list[ColumnMapping],
|
|
202
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
203
|
+
"""Create MCPs for a task (snap) including metadata and lineage."""
|
|
204
|
+
job_urn = make_data_job_urn(
|
|
205
|
+
orchestrator=namespace,
|
|
206
|
+
flow_id=pipeline_snode_id,
|
|
207
|
+
job_id=task_id,
|
|
208
|
+
cluster="PROD",
|
|
209
|
+
)
|
|
210
|
+
yield MetadataChangeProposalWrapper(
|
|
211
|
+
entityUrn=job_urn,
|
|
212
|
+
aspect=DataJobInfoClass(
|
|
213
|
+
name=name,
|
|
214
|
+
description="",
|
|
215
|
+
externalUrl=f"{self.config.base_url}/sl/designer.html?v=21818#pipe_snode={pipeline_snode_id}",
|
|
216
|
+
type="SNAPLOGIC_SNAP",
|
|
217
|
+
),
|
|
218
|
+
).as_workunit()
|
|
219
|
+
|
|
220
|
+
# Helper functions
|
|
221
|
+
def dataset_urn(d: Dataset) -> str:
|
|
222
|
+
return make_dataset_urn_with_platform_instance(
|
|
223
|
+
d.platform, d.name, d.platform_instance
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def field_urn(d, f):
|
|
227
|
+
return make_schema_field_urn(dataset_urn(d), f["name"])
|
|
228
|
+
|
|
229
|
+
# Emit lineage
|
|
230
|
+
yield MetadataChangeProposalWrapper(
|
|
231
|
+
entityUrn=job_urn,
|
|
232
|
+
aspect=DataJobInputOutputClass(
|
|
233
|
+
inputDatasets=[dataset_urn(d) for d in input_datasets],
|
|
234
|
+
outputDatasets=[dataset_urn(d) for d in output_datasets],
|
|
235
|
+
inputDatasetFields=[
|
|
236
|
+
field_urn(d, f) for d in input_datasets for f in d.fields
|
|
237
|
+
],
|
|
238
|
+
outputDatasetFields=[
|
|
239
|
+
field_urn(d, f) for d in output_datasets for f in d.fields
|
|
240
|
+
],
|
|
241
|
+
fineGrainedLineages=[
|
|
242
|
+
FineGrainedLineageClass(
|
|
243
|
+
upstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET,
|
|
244
|
+
upstreams=[
|
|
245
|
+
make_schema_field_urn(
|
|
246
|
+
make_dataset_urn_with_platform_instance(
|
|
247
|
+
cl.input_dataset.platform,
|
|
248
|
+
cl.input_dataset.name,
|
|
249
|
+
cl.input_dataset.platform_instance,
|
|
250
|
+
cl.input_dataset.env,
|
|
251
|
+
),
|
|
252
|
+
cl.input_field,
|
|
253
|
+
)
|
|
254
|
+
],
|
|
255
|
+
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET,
|
|
256
|
+
downstreams=[
|
|
257
|
+
make_schema_field_urn(
|
|
258
|
+
make_dataset_urn_with_platform_instance(
|
|
259
|
+
cl.output_dataset.platform,
|
|
260
|
+
cl.output_dataset.name,
|
|
261
|
+
cl.output_dataset.platform_instance,
|
|
262
|
+
cl.output_dataset.env,
|
|
263
|
+
),
|
|
264
|
+
cl.output_field,
|
|
265
|
+
)
|
|
266
|
+
],
|
|
267
|
+
)
|
|
268
|
+
for cl in columns_mapping
|
|
269
|
+
],
|
|
270
|
+
),
|
|
271
|
+
).as_workunit()
|
|
272
|
+
|
|
273
|
+
def create_dataset_mcp(
|
|
274
|
+
self,
|
|
275
|
+
dataset_name: str,
|
|
276
|
+
dataset_display_name: str,
|
|
277
|
+
fields: list[dict],
|
|
278
|
+
platform: str = "snaplogic",
|
|
279
|
+
env: str = "PROD",
|
|
280
|
+
platform_instance: Optional[str] = None,
|
|
281
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
282
|
+
dataset_urn = make_dataset_urn_with_platform_instance(
|
|
283
|
+
platform=platform,
|
|
284
|
+
name=dataset_name,
|
|
285
|
+
env=env,
|
|
286
|
+
platform_instance=platform_instance,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Skip dataset creation if:
|
|
290
|
+
# 1. The platform is not "snaplogic" AND
|
|
291
|
+
# 2. Either:
|
|
292
|
+
# a) The config `create_non_snaplogic_datasets` is disabled (False), meaning
|
|
293
|
+
# we do not create datasets for non-snaplogic platforms, OR
|
|
294
|
+
# b) The dataset already exists in DataHub (`self.graph.exists(dataset_urn)`).
|
|
295
|
+
if platform != "snaplogic" and (
|
|
296
|
+
not self.config.create_non_snaplogic_datasets
|
|
297
|
+
or (self.graph and self.graph.exists(dataset_urn))
|
|
298
|
+
):
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
dataset_properties = DatasetPropertiesClass(
|
|
302
|
+
name=dataset_display_name,
|
|
303
|
+
qualifiedName=dataset_name,
|
|
304
|
+
)
|
|
305
|
+
schema_fields = [
|
|
306
|
+
SchemaFieldClass(
|
|
307
|
+
fieldPath=field["name"],
|
|
308
|
+
type=SnaplogicUtils.get_datahub_type(field.get("type", "Varchar")),
|
|
309
|
+
nativeDataType=field.get("type", "Varchar"),
|
|
310
|
+
)
|
|
311
|
+
for field in fields
|
|
312
|
+
]
|
|
313
|
+
schema_metadata = SchemaMetadataClass(
|
|
314
|
+
schemaName=dataset_name,
|
|
315
|
+
platform=make_data_platform_urn(platform),
|
|
316
|
+
version=0,
|
|
317
|
+
hash="",
|
|
318
|
+
platformSchema=OtherSchemaClass(rawSchema=""),
|
|
319
|
+
fields=schema_fields,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
yield MetadataChangeProposalWrapper(
|
|
323
|
+
entityUrn=dataset_urn, aspect=dataset_properties
|
|
324
|
+
).as_workunit()
|
|
325
|
+
|
|
326
|
+
yield MetadataChangeProposalWrapper(
|
|
327
|
+
entityUrn=dataset_urn, aspect=schema_metadata
|
|
328
|
+
).as_workunit()
|
|
329
|
+
|
|
330
|
+
def create_pipeline_mcp(
|
|
331
|
+
self, name: str, namespace: str, pipeline_snode_id: str
|
|
332
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
333
|
+
flow_urn = make_data_flow_urn(
|
|
334
|
+
orchestrator=namespace, flow_id=pipeline_snode_id, cluster="PROD"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
yield MetadataChangeProposalWrapper(
|
|
338
|
+
entityUrn=flow_urn,
|
|
339
|
+
aspect=DataFlowInfoClass(
|
|
340
|
+
name=name,
|
|
341
|
+
description="",
|
|
342
|
+
externalUrl=f"{self.config.base_url}/sl/designer.html?v=21818#pipe_snode={pipeline_snode_id}",
|
|
343
|
+
),
|
|
344
|
+
).as_workunit()
|
|
345
|
+
|
|
346
|
+
def get_report(self) -> SourceReport:
|
|
347
|
+
return self.report
|
|
348
|
+
|
|
349
|
+
def close(self) -> None:
|
|
350
|
+
super().close()
|
|
351
|
+
|
|
352
|
+
@classmethod
|
|
353
|
+
def create(cls, config_dict: dict, ctx: PipelineContext) -> "SnaplogicSource":
|
|
354
|
+
config = SnaplogicConfig.parse_obj(config_dict)
|
|
355
|
+
return cls(config, ctx)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, SecretStr
|
|
4
|
+
|
|
5
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
|
+
StatefulIngestionConfigBase,
|
|
7
|
+
StatefulStaleMetadataRemovalConfig,
|
|
8
|
+
)
|
|
9
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
10
|
+
StatefulLineageConfigMixin,
|
|
11
|
+
StatefulUsageConfigMixin,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SnaplogicConfig(
|
|
16
|
+
StatefulIngestionConfigBase, StatefulLineageConfigMixin, StatefulUsageConfigMixin
|
|
17
|
+
):
|
|
18
|
+
platform: str = "Snaplogic"
|
|
19
|
+
username: str = Field(description="Username")
|
|
20
|
+
password: SecretStr = Field(description="Password")
|
|
21
|
+
base_url: str = Field(
|
|
22
|
+
default="https://elastic.snaplogic.com",
|
|
23
|
+
description="Url to your Snaplogic instance: `https://elastic.snaplogic.com`, or similar. Used for making API calls to Snaplogic.",
|
|
24
|
+
)
|
|
25
|
+
org_name: str = Field(description="Organization name from Snaplogic instance")
|
|
26
|
+
namespace_mapping: dict = Field(
|
|
27
|
+
default={}, description="Mapping of namespaces to platform instances"
|
|
28
|
+
)
|
|
29
|
+
case_insensitive_namespaces: list = Field(
|
|
30
|
+
default=[],
|
|
31
|
+
description="List of namespaces that should be treated as case insensitive",
|
|
32
|
+
)
|
|
33
|
+
create_non_snaplogic_datasets: bool = Field(
|
|
34
|
+
default=False,
|
|
35
|
+
description="Whether to create datasets for non-Snaplogic datasets (e.g., databases, S3, etc.)",
|
|
36
|
+
)
|
|
37
|
+
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|