acryl-datahub 1.2.0.10rc4__py3-none-any.whl → 1.2.0.10rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.10rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.10rc5.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=SkqxUSRQtqthxeXaTNtU22wjLSszUJ_BVZbM342CgWI,324
4
+ datahub/_version.py,sha256=3JayCFLMEiiPnmNQMtSHZ8p5gHdHdXKS7AvrQiPCs3g,324
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -159,7 +159,7 @@ datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
159
159
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
160
160
  datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
161
161
  datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- datahub/ingestion/autogenerated/capability_summary.json,sha256=6n3XZj4xPZ3WVfQ29QQvqX5ancSXAqvjaMfpDqFQ8LI,110531
162
+ datahub/ingestion/autogenerated/capability_summary.json,sha256=cSA7jRLLphwkM9C9LK6HdbWRNM0s2febr-4Rh69vQss,111506
163
163
  datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
164
164
  datahub/ingestion/autogenerated/lineage_helper.py,sha256=I_k1pZSCCCjDbUVifPTfy6fkmV8jqdVhbirE8EkpmxI,4748
165
165
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -392,7 +392,7 @@ datahub/ingestion/source/looker/looker_file_loader.py,sha256=gb2Z97_w28MsybYe01J
392
392
  datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=6smUt_Ya7ZJMHWdGZl3TnhM7XHZVpYQ6gz2i5hHejZ4,11547
393
393
  datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=27WnOuTghayaH-HL4lLoq0IcHvNm1UybMqMnoaxN8Cs,5383
394
394
  datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
395
- datahub/ingestion/source/looker/looker_source.py,sha256=a-G_73NWHD0YPDetT-Eyvq5KenJjqbmb-bV5JMOByNU,65048
395
+ datahub/ingestion/source/looker/looker_source.py,sha256=7mRlIJq2DoM1h2y-heNdNoqok8sNl7Qmpwsx0dQsYP8,67273
396
396
  datahub/ingestion/source/looker/looker_template_language.py,sha256=5fZFPKFP3IYbJg3jLifjaji4wWg8wRy-1XDvc8Qucus,17949
397
397
  datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
398
398
  datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
@@ -482,6 +482,12 @@ datahub/ingestion/source/sigma/sigma.py,sha256=7dbkwk8_wp94XH9mhmtI_8ihR35cqYywt
482
482
  datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
483
483
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
484
484
  datahub/ingestion/source/slack/slack.py,sha256=JWanUfzFGynV_PWcH0YzJIbRcmL880DA6dEI9QW-QiQ,25800
485
+ datahub/ingestion/source/snaplogic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
486
+ datahub/ingestion/source/snaplogic/snaplogic.py,sha256=VoCRSBS0kE4RAsn4hOJgQy3uadFvelLIOv3I0_gne-w,13540
487
+ datahub/ingestion/source/snaplogic/snaplogic_config.py,sha256=oxuNBfAHWMHoOvh52gifOFcBOSN8aaPpFC8QgmgXwWI,1445
488
+ datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py,sha256=IzCYwXLN6IfRFqns7XXtZxzQjjHC-XNTGXgEMeSfW8U,3776
489
+ datahub/ingestion/source/snaplogic/snaplogic_parser.py,sha256=q5dRfWtOpSELPZrpyLbszOD49MJBXNbKgnITLMPiyGI,5783
490
+ datahub/ingestion/source/snaplogic/snaplogic_utils.py,sha256=SVrV9ZXVE2cKKPfoVsxjBN2fIcpYbs2PBLiyQIcJMVQ,1068
485
491
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
486
492
  datahub/ingestion/source/snowflake/constants.py,sha256=iDTamMozHwLYyglpRfqwTbxPxYPhb-uJGRHIgDRHUkA,2767
487
493
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
@@ -554,7 +560,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
554
560
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
555
561
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
556
562
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
557
- datahub/ingestion/source/tableau/tableau.py,sha256=Ml-xSQei-o1LFsQKNP9cUY-VQypa68blFy6amZKuxEU,158343
563
+ datahub/ingestion/source/tableau/tableau.py,sha256=BgR_-IuzUYuaSV3EOvMcefwBZ_h0dYAvNEn5Dv6meA4,158448
558
564
  datahub/ingestion/source/tableau/tableau_common.py,sha256=2vE7DIigPvMNcTCWSou0tliaVy9MgFR1qwqnE4pilw8,27086
559
565
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=2WPAHN-GAR83_c3eTTNd8cy0-zC8GIXeUdSxX_mNdas,2608
560
566
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9_g0aDclKwYkcoxugaWyAcyAMgBCAU,1136
@@ -612,6 +618,7 @@ datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py,sha256=jTURu
612
618
  datahub/ingestion/transformer/pattern_cleanup_ownership.py,sha256=YJH4lv1ztKAYwsdRz5RiUu6SX08h2GBr1S-9QlcmB18,3755
613
619
  datahub/ingestion/transformer/remove_dataset_ownership.py,sha256=kHiIcT19BDKNIuCQqAj827E1ZEvME0eGGrILEACALRc,1195
614
620
  datahub/ingestion/transformer/replace_external_url.py,sha256=Nw2V1m86fD1vMGLYGPlaoIJEORV0O9qAqWydL-1n2Ng,4058
621
+ datahub/ingestion/transformer/set_browse_path.py,sha256=K8Y4O9vjeJQCdYGsFuNE0aClT73HKBpmj51Yy6Jm_uQ,4065
615
622
  datahub/ingestion/transformer/tags_to_terms.py,sha256=VDcd7cM5hGCxo6QP1x4RNEw5Q9v4WDxjRhQMpAl95-A,5558
616
623
  datahub/ingestion/transformer/transform_registry.py,sha256=bartmA1zEaULNy5W1Q7gRF8h5Y57BFC6XNOGfCzh1Zw,251
617
624
  datahub/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -957,11 +964,11 @@ datahub/pydantic/compat.py,sha256=TUEo4kSEeOWVAhV6LQtst1phrpVgGtK4uif4OI5vQ2M,19
957
964
  datahub/sdk/__init__.py,sha256=66OOcFi7qlnL6q72c_yUX2mWU2HudbOdRsC5CIoDxow,1922
958
965
  datahub/sdk/_all_entities.py,sha256=eQAmD_fcEHlTShe1_nHpdvHxLDN9njk9bdLnuTrYg8M,905
959
966
  datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
960
- datahub/sdk/_shared.py,sha256=uSLPjXfUl_0SPt-kWirkhE6u1CKOC67q5c4hJzxS2uo,28812
967
+ datahub/sdk/_shared.py,sha256=tlyxyxgo7x-8sJhUG9VvUiLpxcOP-GIg4ToqPixagbw,34221
961
968
  datahub/sdk/_utils.py,sha256=oXE2BzsXE5zmSkCP3R1tObD4RHnPeH_ps83D_Dw9JaQ,1169
962
- datahub/sdk/chart.py,sha256=_gixCcKp6kCMizWMXwNH1Ip1ZqJ05_Iu2t94dmONQFM,11774
969
+ datahub/sdk/chart.py,sha256=gsyq_saUInGJKm0s4wwZ9pLMyWLmodrDhrwdZ_Y2h9E,13902
963
970
  datahub/sdk/container.py,sha256=IjnFVGDpSFDvgHuuMb7C3VdBxhJuIMq0q6crOs5PupE,7899
964
- datahub/sdk/dashboard.py,sha256=ekdchqZ57enVFIfeEOOl0Dk4ec-MFQSncArciTgU1sk,15109
971
+ datahub/sdk/dashboard.py,sha256=Vsy_geQzwk3xzqpSuywbgQJ6T__nEvfK5rmrtvY2DPo,16812
965
972
  datahub/sdk/dataflow.py,sha256=gdAPVVkyKvsKtsa1AwhN_LpzidG_XzV3nhtd1cjnzDA,11128
966
973
  datahub/sdk/datajob.py,sha256=5kU0txTDcn2ce3AhNry83TazPVhoYZ2rAPPNWM1_FP8,13677
967
974
  datahub/sdk/dataset.py,sha256=-C4TCJAs1PFkLAgkUZEU1JOg3orm7AAIkqjw7oo_4PQ,31400
@@ -1114,8 +1121,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1114
1121
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1115
1122
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1116
1123
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1117
- acryl_datahub-1.2.0.10rc4.dist-info/METADATA,sha256=KAedvBAi1vl6TlLfQHVllq9dHQqHoTCd3D0CK5Z-MJ0,182842
1118
- acryl_datahub-1.2.0.10rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
- acryl_datahub-1.2.0.10rc4.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
- acryl_datahub-1.2.0.10rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
- acryl_datahub-1.2.0.10rc4.dist-info/RECORD,,
1124
+ acryl_datahub-1.2.0.10rc5.dist-info/METADATA,sha256=Nm6aD5BhD2bytkuh33mzJCtDW23cDHNlIw5fboab3dU,184162
1125
+ acryl_datahub-1.2.0.10rc5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1126
+ acryl_datahub-1.2.0.10rc5.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1127
+ acryl_datahub-1.2.0.10rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1128
+ acryl_datahub-1.2.0.10rc5.dist-info/RECORD,,
@@ -94,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
94
94
  salesforce = datahub.ingestion.source.salesforce:SalesforceSource
95
95
  sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
96
96
  slack = datahub.ingestion.source.slack.slack:SlackSource
97
+ snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
97
98
  snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
98
99
  snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
99
100
  snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
@@ -130,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
130
131
  pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
131
132
  replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
132
133
  replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
134
+ set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
133
135
  set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
134
136
  simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
135
137
  simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.10rc4"
3
+ __version__ = "1.2.0.10rc5"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -2968,6 +2968,38 @@
2968
2968
  "platform_name": "Slack",
2969
2969
  "support_status": "TESTING"
2970
2970
  },
2971
+ "snaplogic": {
2972
+ "capabilities": [
2973
+ {
2974
+ "capability": "LINEAGE_FINE",
2975
+ "description": "Enabled by default",
2976
+ "subtype_modifier": null,
2977
+ "supported": true
2978
+ },
2979
+ {
2980
+ "capability": "DELETION_DETECTION",
2981
+ "description": "Not supported yet",
2982
+ "subtype_modifier": null,
2983
+ "supported": false
2984
+ },
2985
+ {
2986
+ "capability": "PLATFORM_INSTANCE",
2987
+ "description": "Snaplogic does not support platform instances",
2988
+ "subtype_modifier": null,
2989
+ "supported": false
2990
+ },
2991
+ {
2992
+ "capability": "LINEAGE_COARSE",
2993
+ "description": "Enabled by default",
2994
+ "subtype_modifier": null,
2995
+ "supported": true
2996
+ }
2997
+ ],
2998
+ "classname": "datahub.ingestion.source.snaplogic.snaplogic.SnaplogicSource",
2999
+ "platform_id": "snaplogic",
3000
+ "platform_name": "Snaplogic",
3001
+ "support_status": "TESTING"
3002
+ },
2971
3003
  "snowflake": {
2972
3004
  "capabilities": [
2973
3005
  {
@@ -3617,4 +3649,4 @@
3617
3649
  "support_status": "CERTIFIED"
3618
3650
  }
3619
3651
  }
3620
- }
3652
+ }
@@ -736,7 +736,16 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
736
736
  display_name=dashboard_element.title, # title is (deprecated) using display_name
737
737
  extra_aspects=chart_extra_aspects,
738
738
  input_datasets=dashboard_element.get_view_urns(self.source_config),
739
- last_modified=self._get_last_modified_time(dashboard),
739
+ last_modified=self._get_last_modified_time(
740
+ dashboard
741
+ ), # Inherited from Dashboard
742
+ last_modified_by=self._get_last_modified_by(
743
+ dashboard
744
+ ), # Inherited from Dashboard
745
+ created_at=self._get_created_at(dashboard), # Inherited from Dashboard
746
+ created_by=self._get_created_by(dashboard), # Inherited from Dashboard
747
+ deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
748
+ deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
740
749
  name=dashboard_element.get_urn_element_id(),
741
750
  owners=chart_ownership,
742
751
  parent_container=chart_parent_container,
@@ -803,6 +812,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
803
812
  display_name=looker_dashboard.title, # title is (deprecated) using display_name
804
813
  extra_aspects=dashboard_extra_aspects,
805
814
  last_modified=self._get_last_modified_time(looker_dashboard),
815
+ last_modified_by=self._get_last_modified_by(looker_dashboard),
816
+ created_at=self._get_created_at(looker_dashboard),
817
+ created_by=self._get_created_by(looker_dashboard),
818
+ deleted_on=self._get_deleted_on(looker_dashboard),
819
+ deleted_by=self._get_deleted_by(looker_dashboard),
806
820
  name=looker_dashboard.get_urn_dashboard_id(),
807
821
  owners=dashboard_ownership,
808
822
  parent_container=dashboard_parent_container,
@@ -988,9 +1002,44 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
988
1002
  def _get_last_modified_time(
989
1003
  self, looker_dashboard: Optional[LookerDashboard]
990
1004
  ) -> Optional[datetime.datetime]:
991
- if looker_dashboard is None:
1005
+ return looker_dashboard.last_updated_at if looker_dashboard else None
1006
+
1007
+ def _get_last_modified_by(
1008
+ self, looker_dashboard: Optional[LookerDashboard]
1009
+ ) -> Optional[str]:
1010
+ if not looker_dashboard or not looker_dashboard.last_updated_by:
1011
+ return None
1012
+ return looker_dashboard.last_updated_by.get_urn(
1013
+ self.source_config.strip_user_ids_from_email
1014
+ )
1015
+
1016
+ def _get_created_at(
1017
+ self, looker_dashboard: Optional[LookerDashboard]
1018
+ ) -> Optional[datetime.datetime]:
1019
+ return looker_dashboard.created_at if looker_dashboard else None
1020
+
1021
+ def _get_created_by(
1022
+ self, looker_dashboard: Optional[LookerDashboard]
1023
+ ) -> Optional[str]:
1024
+ if not looker_dashboard or not looker_dashboard.owner:
1025
+ return None
1026
+ return looker_dashboard.owner.get_urn(
1027
+ self.source_config.strip_user_ids_from_email
1028
+ )
1029
+
1030
+ def _get_deleted_on(
1031
+ self, looker_dashboard: Optional[LookerDashboard]
1032
+ ) -> Optional[datetime.datetime]:
1033
+ return looker_dashboard.deleted_at if looker_dashboard else None
1034
+
1035
+ def _get_deleted_by(
1036
+ self, looker_dashboard: Optional[LookerDashboard]
1037
+ ) -> Optional[str]:
1038
+ if not looker_dashboard or not looker_dashboard.deleted_by:
992
1039
  return None
993
- return looker_dashboard.last_updated_at
1040
+ return looker_dashboard.deleted_by.get_urn(
1041
+ self.source_config.strip_user_ids_from_email
1042
+ )
994
1043
 
995
1044
  def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
996
1045
  assert folder.id
File without changes
@@ -0,0 +1,355 @@
1
+ from typing import Iterable, List, Optional
2
+
3
+ from datahub.emitter.mce_builder import (
4
+ make_data_flow_urn,
5
+ make_data_job_urn,
6
+ make_data_platform_urn,
7
+ make_dataset_urn_with_platform_instance,
8
+ make_schema_field_urn,
9
+ )
10
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
11
+ from datahub.ingestion.api.common import PipelineContext
12
+ from datahub.ingestion.api.decorators import (
13
+ SupportStatus,
14
+ capability,
15
+ config_class,
16
+ platform_name,
17
+ support_status,
18
+ )
19
+ from datahub.ingestion.api.source import (
20
+ MetadataWorkUnitProcessor,
21
+ SourceCapability,
22
+ SourceReport,
23
+ )
24
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
25
+ from datahub.ingestion.graph.client import DataHubGraph
26
+ from datahub.ingestion.source.snaplogic.snaplogic_config import SnaplogicConfig
27
+ from datahub.ingestion.source.snaplogic.snaplogic_lineage_extractor import (
28
+ SnaplogicLineageExtractor,
29
+ )
30
+ from datahub.ingestion.source.snaplogic.snaplogic_parser import (
31
+ ColumnMapping,
32
+ Dataset,
33
+ SnapLogicParser,
34
+ )
35
+ from datahub.ingestion.source.snaplogic.snaplogic_utils import SnaplogicUtils
36
+ from datahub.ingestion.source.state.redundant_run_skip_handler import (
37
+ RedundantLineageRunSkipHandler,
38
+ )
39
+ from datahub.ingestion.source.state.stale_entity_removal_handler import (
40
+ StaleEntityRemovalHandler,
41
+ StaleEntityRemovalSourceReport,
42
+ )
43
+ from datahub.ingestion.source.state.stateful_ingestion_base import (
44
+ StatefulIngestionSourceBase,
45
+ )
46
+ from datahub.metadata.schema_classes import (
47
+ DataFlowInfoClass,
48
+ DataJobInfoClass,
49
+ DataJobInputOutputClass,
50
+ DatasetPropertiesClass,
51
+ FineGrainedLineageClass,
52
+ FineGrainedLineageDownstreamTypeClass,
53
+ OtherSchemaClass,
54
+ SchemaFieldClass,
55
+ SchemaMetadataClass,
56
+ )
57
+
58
+
59
+ @platform_name("Snaplogic")
60
+ @config_class(SnaplogicConfig)
61
+ @support_status(SupportStatus.TESTING)
62
+ @capability(
63
+ SourceCapability.PLATFORM_INSTANCE,
64
+ "Snaplogic does not support platform instances",
65
+ supported=False,
66
+ )
67
+ @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
68
+ @capability(SourceCapability.LINEAGE_FINE, "Enabled by default")
69
+ @capability(SourceCapability.DELETION_DETECTION, "Not supported yet", supported=False)
70
+ class SnaplogicSource(StatefulIngestionSourceBase):
71
+ """
72
+ A source plugin for ingesting lineage and metadata from Snaplogic.
73
+ """
74
+
75
+ def __init__(self, config: SnaplogicConfig, ctx: PipelineContext):
76
+ super().__init__(config, ctx)
77
+ self.config = config
78
+ self.report = StaleEntityRemovalSourceReport()
79
+ self.graph: Optional[DataHubGraph] = ctx.graph
80
+ self.snaplogic_parser = SnapLogicParser(
81
+ config.case_insensitive_namespaces, self.config.namespace_mapping
82
+ )
83
+ self.redundant_lineage_run_skip_handler: Optional[
84
+ RedundantLineageRunSkipHandler
85
+ ] = None
86
+ if self.config.enable_stateful_lineage_ingestion:
87
+ self.redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
88
+ source=self,
89
+ config=self.config,
90
+ pipeline_name=ctx.pipeline_name,
91
+ run_id=ctx.run_id,
92
+ )
93
+ self.snaplogic_lineage_extractor = SnaplogicLineageExtractor(
94
+ config=config,
95
+ redundant_run_skip_handler=self.redundant_lineage_run_skip_handler,
96
+ report=self.report,
97
+ )
98
+
99
+ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
100
+ try:
101
+ self.report.info(
102
+ message="Starting lineage ingestion from Snaplogic",
103
+ title="Lineage Ingestion",
104
+ )
105
+
106
+ records_processed = 0
107
+ for lineage in self.snaplogic_lineage_extractor.get_lineages():
108
+ try:
109
+ for workunit in self._process_lineage_record(lineage):
110
+ yield workunit
111
+ records_processed += 1
112
+
113
+ if records_processed % 20 == 0:
114
+ self.report.info(
115
+ message=f"Processed {records_processed} lineage records",
116
+ title="Lineage Ingestion Progress",
117
+ )
118
+ except Exception as e:
119
+ self.report.report_failure(
120
+ message="Failed to process lineage record",
121
+ context=str(lineage),
122
+ exc=e,
123
+ )
124
+ self.report.info(
125
+ message=f"Completed processing {records_processed} lineage records",
126
+ title="Lineage Ingestion Complete",
127
+ )
128
+ self.snaplogic_lineage_extractor.report_status("lineage_ingestion", True)
129
+ self.snaplogic_lineage_extractor.update_stats()
130
+ except Exception as e:
131
+ self.report.report_failure(message="Failed to fetch lineages", exc=e)
132
+ self.snaplogic_lineage_extractor.report_status("lineage_ingestion", False)
133
+
134
+ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
135
+ return [
136
+ *super().get_workunit_processors(),
137
+ StaleEntityRemovalHandler.create(
138
+ self, self.config, self.ctx
139
+ ).workunit_processor,
140
+ ]
141
+
142
+ def _process_lineage_record(self, lineage: dict) -> Iterable[MetadataWorkUnit]:
143
+ """Process a lineage record to create pipeline and task workunits with relationships."""
144
+ producer = lineage.get("producer")
145
+ if not producer:
146
+ return
147
+ pipeline_snode_id = producer.split("#pipe_snode=")[1]
148
+ if not pipeline_snode_id:
149
+ return
150
+ datasets = self.snaplogic_parser.extract_datasets_from_lineage(lineage)
151
+ pipeline = self.snaplogic_parser.extract_pipeline_from_lineage(lineage)
152
+ task = self.snaplogic_parser.extract_task_from_lineage(lineage)
153
+ columns_mapping = self.snaplogic_parser.extract_columns_mapping_from_lineage(
154
+ lineage
155
+ )
156
+
157
+ # Create pipeline MCP
158
+ for pipeline_workunit in self.create_pipeline_mcp(
159
+ name=pipeline.name,
160
+ pipeline_snode_id=pipeline.id,
161
+ namespace=pipeline.namespace,
162
+ ):
163
+ self.report.report_workunit(pipeline_workunit)
164
+ yield pipeline_workunit
165
+
166
+ # Create dataset MCP
167
+ for dataset in datasets:
168
+ for dataset_workunit in self.create_dataset_mcp(
169
+ dataset_name=dataset.name,
170
+ dataset_display_name=dataset.display_name,
171
+ fields=dataset.fields,
172
+ platform=dataset.platform,
173
+ platform_instance=dataset.platform_instance,
174
+ ):
175
+ self.report.report_workunit(dataset_workunit)
176
+ yield dataset_workunit
177
+
178
+ # Create task MCP
179
+ for task_workunit in self.create_task_mcp(
180
+ name=task.name,
181
+ task_id=task.id,
182
+ namespace=task.namespace,
183
+ pipeline_snode_id=pipeline_snode_id,
184
+ input_datasets=[dataset for dataset in datasets if dataset.type == "INPUT"],
185
+ output_datasets=[
186
+ dataset for dataset in datasets if dataset.type == "OUTPUT"
187
+ ],
188
+ columns_mapping=columns_mapping,
189
+ ):
190
+ self.report.report_workunit(task_workunit)
191
+ yield task_workunit
192
+
193
+ def create_task_mcp(
194
+ self,
195
+ task_id: str,
196
+ name: str,
197
+ namespace: str,
198
+ pipeline_snode_id: str,
199
+ input_datasets: list[Dataset],
200
+ output_datasets: list[Dataset],
201
+ columns_mapping: list[ColumnMapping],
202
+ ) -> Iterable[MetadataWorkUnit]:
203
+ """Create MCPs for a task (snap) including metadata and lineage."""
204
+ job_urn = make_data_job_urn(
205
+ orchestrator=namespace,
206
+ flow_id=pipeline_snode_id,
207
+ job_id=task_id,
208
+ cluster="PROD",
209
+ )
210
+ yield MetadataChangeProposalWrapper(
211
+ entityUrn=job_urn,
212
+ aspect=DataJobInfoClass(
213
+ name=name,
214
+ description="",
215
+ externalUrl=f"{self.config.base_url}/sl/designer.html?v=21818#pipe_snode={pipeline_snode_id}",
216
+ type="SNAPLOGIC_SNAP",
217
+ ),
218
+ ).as_workunit()
219
+
220
+ # Helper functions
221
+ def dataset_urn(d: Dataset) -> str:
222
+ return make_dataset_urn_with_platform_instance(
223
+ d.platform, d.name, d.platform_instance
224
+ )
225
+
226
+ def field_urn(d, f):
227
+ return make_schema_field_urn(dataset_urn(d), f["name"])
228
+
229
+ # Emit lineage
230
+ yield MetadataChangeProposalWrapper(
231
+ entityUrn=job_urn,
232
+ aspect=DataJobInputOutputClass(
233
+ inputDatasets=[dataset_urn(d) for d in input_datasets],
234
+ outputDatasets=[dataset_urn(d) for d in output_datasets],
235
+ inputDatasetFields=[
236
+ field_urn(d, f) for d in input_datasets for f in d.fields
237
+ ],
238
+ outputDatasetFields=[
239
+ field_urn(d, f) for d in output_datasets for f in d.fields
240
+ ],
241
+ fineGrainedLineages=[
242
+ FineGrainedLineageClass(
243
+ upstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET,
244
+ upstreams=[
245
+ make_schema_field_urn(
246
+ make_dataset_urn_with_platform_instance(
247
+ cl.input_dataset.platform,
248
+ cl.input_dataset.name,
249
+ cl.input_dataset.platform_instance,
250
+ cl.input_dataset.env,
251
+ ),
252
+ cl.input_field,
253
+ )
254
+ ],
255
+ downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD_SET,
256
+ downstreams=[
257
+ make_schema_field_urn(
258
+ make_dataset_urn_with_platform_instance(
259
+ cl.output_dataset.platform,
260
+ cl.output_dataset.name,
261
+ cl.output_dataset.platform_instance,
262
+ cl.output_dataset.env,
263
+ ),
264
+ cl.output_field,
265
+ )
266
+ ],
267
+ )
268
+ for cl in columns_mapping
269
+ ],
270
+ ),
271
+ ).as_workunit()
272
+
273
+ def create_dataset_mcp(
274
+ self,
275
+ dataset_name: str,
276
+ dataset_display_name: str,
277
+ fields: list[dict],
278
+ platform: str = "snaplogic",
279
+ env: str = "PROD",
280
+ platform_instance: Optional[str] = None,
281
+ ) -> Iterable[MetadataWorkUnit]:
282
+ dataset_urn = make_dataset_urn_with_platform_instance(
283
+ platform=platform,
284
+ name=dataset_name,
285
+ env=env,
286
+ platform_instance=platform_instance,
287
+ )
288
+
289
+ # Skip dataset creation if:
290
+ # 1. The platform is not "snaplogic" AND
291
+ # 2. Either:
292
+ # a) The config `create_non_snaplogic_datasets` is disabled (False), meaning
293
+ # we do not create datasets for non-snaplogic platforms, OR
294
+ # b) The dataset already exists in DataHub (`self.graph.exists(dataset_urn)`).
295
+ if platform != "snaplogic" and (
296
+ not self.config.create_non_snaplogic_datasets
297
+ or (self.graph and self.graph.exists(dataset_urn))
298
+ ):
299
+ return
300
+
301
+ dataset_properties = DatasetPropertiesClass(
302
+ name=dataset_display_name,
303
+ qualifiedName=dataset_name,
304
+ )
305
+ schema_fields = [
306
+ SchemaFieldClass(
307
+ fieldPath=field["name"],
308
+ type=SnaplogicUtils.get_datahub_type(field.get("type", "Varchar")),
309
+ nativeDataType=field.get("type", "Varchar"),
310
+ )
311
+ for field in fields
312
+ ]
313
+ schema_metadata = SchemaMetadataClass(
314
+ schemaName=dataset_name,
315
+ platform=make_data_platform_urn(platform),
316
+ version=0,
317
+ hash="",
318
+ platformSchema=OtherSchemaClass(rawSchema=""),
319
+ fields=schema_fields,
320
+ )
321
+
322
+ yield MetadataChangeProposalWrapper(
323
+ entityUrn=dataset_urn, aspect=dataset_properties
324
+ ).as_workunit()
325
+
326
+ yield MetadataChangeProposalWrapper(
327
+ entityUrn=dataset_urn, aspect=schema_metadata
328
+ ).as_workunit()
329
+
330
+ def create_pipeline_mcp(
331
+ self, name: str, namespace: str, pipeline_snode_id: str
332
+ ) -> Iterable[MetadataWorkUnit]:
333
+ flow_urn = make_data_flow_urn(
334
+ orchestrator=namespace, flow_id=pipeline_snode_id, cluster="PROD"
335
+ )
336
+
337
+ yield MetadataChangeProposalWrapper(
338
+ entityUrn=flow_urn,
339
+ aspect=DataFlowInfoClass(
340
+ name=name,
341
+ description="",
342
+ externalUrl=f"{self.config.base_url}/sl/designer.html?v=21818#pipe_snode={pipeline_snode_id}",
343
+ ),
344
+ ).as_workunit()
345
+
346
+ def get_report(self) -> SourceReport:
347
+ return self.report
348
+
349
+ def close(self) -> None:
350
+ super().close()
351
+
352
+ @classmethod
353
+ def create(cls, config_dict: dict, ctx: PipelineContext) -> "SnaplogicSource":
354
+ config = SnaplogicConfig.parse_obj(config_dict)
355
+ return cls(config, ctx)
@@ -0,0 +1,37 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import Field, SecretStr
4
+
5
+ from datahub.ingestion.source.state.stale_entity_removal_handler import (
6
+ StatefulIngestionConfigBase,
7
+ StatefulStaleMetadataRemovalConfig,
8
+ )
9
+ from datahub.ingestion.source.state.stateful_ingestion_base import (
10
+ StatefulLineageConfigMixin,
11
+ StatefulUsageConfigMixin,
12
+ )
13
+
14
+
15
+ class SnaplogicConfig(
16
+ StatefulIngestionConfigBase, StatefulLineageConfigMixin, StatefulUsageConfigMixin
17
+ ):
18
+ platform: str = "Snaplogic"
19
+ username: str = Field(description="Username")
20
+ password: SecretStr = Field(description="Password")
21
+ base_url: str = Field(
22
+ default="https://elastic.snaplogic.com",
23
+ description="Url to your Snaplogic instance: `https://elastic.snaplogic.com`, or similar. Used for making API calls to Snaplogic.",
24
+ )
25
+ org_name: str = Field(description="Organization name from Snaplogic instance")
26
+ namespace_mapping: dict = Field(
27
+ default={}, description="Mapping of namespaces to platform instances"
28
+ )
29
+ case_insensitive_namespaces: list = Field(
30
+ default=[],
31
+ description="List of namespaces that should be treated as case insensitive",
32
+ )
33
+ create_non_snaplogic_datasets: bool = Field(
34
+ default=False,
35
+ description="Whether to create datasets for non-Snaplogic datasets (e.g., databases, S3, etc.)",
36
+ )
37
+ stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None