acryl-datahub 1.2.0.10rc1__py3-none-any.whl → 1.2.0.10rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.10rc1.dist-info → acryl_datahub-1.2.0.10rc2.dist-info}/METADATA +2739 -2739
- {acryl_datahub-1.2.0.10rc1.dist-info → acryl_datahub-1.2.0.10rc2.dist-info}/RECORD +19 -19
- datahub/_version.py +1 -1
- datahub/ingestion/autogenerated/capability_summary.json +12 -0
- datahub/ingestion/source/dbt/dbt_common.py +65 -5
- datahub/ingestion/source/ge_data_profiler.py +15 -2
- datahub/ingestion/source/looker/looker_common.py +21 -0
- datahub/ingestion/source/looker/lookml_source.py +46 -88
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/superset.py +93 -11
- datahub/metadata/_internal_schema_classes.py +157 -1
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/schema.avsc +74 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
- datahub/metadata/schemas/LogicalParent.avsc +2 -1
- {acryl_datahub-1.2.0.10rc1.dist-info → acryl_datahub-1.2.0.10rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.10rc1.dist-info → acryl_datahub-1.2.0.10rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.10rc1.dist-info → acryl_datahub-1.2.0.10rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.10rc1.dist-info → acryl_datahub-1.2.0.10rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.
|
|
1
|
+
acryl_datahub-1.2.0.10rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=EmO6ZWHg0m-jKxy3tyNs5CnPt86iFlMSrEkhzdbO54M,324
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -159,7 +159,7 @@ datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
|
|
|
159
159
|
datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
|
|
160
160
|
datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
|
|
161
161
|
datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
-
datahub/ingestion/autogenerated/capability_summary.json,sha256=
|
|
162
|
+
datahub/ingestion/autogenerated/capability_summary.json,sha256=6n3XZj4xPZ3WVfQ29QQvqX5ancSXAqvjaMfpDqFQ8LI,110531
|
|
163
163
|
datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
|
|
164
164
|
datahub/ingestion/autogenerated/lineage_helper.py,sha256=I_k1pZSCCCjDbUVifPTfy6fkmV8jqdVhbirE8EkpmxI,4748
|
|
165
165
|
datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -211,7 +211,7 @@ datahub/ingestion/source/demo_data.py,sha256=PbtCHlZx3wrKlOPPgkWhDQuPm7ZfIx2neXJ
|
|
|
211
211
|
datahub/ingestion/source/elastic_search.py,sha256=2dwIcSbYMaq_RoSnxLGz4Q_20oJ8AGgMKunVIBIgYM8,23406
|
|
212
212
|
datahub/ingestion/source/feast.py,sha256=rAqT7huVgi4c7iRU9qSbohPbNRrxZVw4PIvnfxNsiUk,18798
|
|
213
213
|
datahub/ingestion/source/file.py,sha256=sHCWbtrQcXMMYPs_LUqofx0mk6IFN0G7Lyk9b0yRZMI,16082
|
|
214
|
-
datahub/ingestion/source/ge_data_profiler.py,sha256=
|
|
214
|
+
datahub/ingestion/source/ge_data_profiler.py,sha256=9lEQdLcMBa7znqa6Zz-QWA4Uiv8KiiCALMEERL37pgA,69318
|
|
215
215
|
datahub/ingestion/source/ge_profiling_config.py,sha256=sG_0BwPDRG3I4PnhfWGHf9AbePLDWG0kKcKEtlXHTuk,11544
|
|
216
216
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
217
217
|
datahub/ingestion/source/ldap.py,sha256=PKoA5pVjuIxFfW1TcbYNIWSm7-C7shK2FDn7Zo5mrVM,18705
|
|
@@ -224,11 +224,11 @@ datahub/ingestion/source/openapi.py,sha256=1eemGG8BM5H8T2OxNSg6kzGDminblLPgPVuPA
|
|
|
224
224
|
datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
|
|
225
225
|
datahub/ingestion/source/preset.py,sha256=fncn-fgYcITsYEHVsvV6cGTQ9_xc_R06ejrw6ZbY3QA,3966
|
|
226
226
|
datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
|
|
227
|
-
datahub/ingestion/source/redash.py,sha256=
|
|
227
|
+
datahub/ingestion/source/redash.py,sha256=C4cDikWymbL88fDqaIPX5WA3f2sIEtH7bmhJKkmXJsM,30652
|
|
228
228
|
datahub/ingestion/source/salesforce.py,sha256=UttN3y4Ylbx_yCFCr-33wUEZFR48nTiYeUfOjGIFj2E,40872
|
|
229
229
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
230
230
|
datahub/ingestion/source/sql_queries.py,sha256=Zf6Y84WFCD-j0v4HdtcXshAPrtGq5InncXCjnLaAuS4,14657
|
|
231
|
-
datahub/ingestion/source/superset.py,sha256=
|
|
231
|
+
datahub/ingestion/source/superset.py,sha256=OWllzS0D0K9zUUQE6w-ZzemJduu6ZV003rBVe2rxkaM,56814
|
|
232
232
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
233
233
|
datahub/ingestion/source/abs/config.py,sha256=WW9JWbzqAJDblAcJKtNeuBHqOeJsB57lW2PqSD65-BU,6729
|
|
234
234
|
datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
@@ -302,7 +302,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
302
302
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
303
303
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
304
304
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=ILkP6ui48VU8pClDQz6jaFR026gcF7lyUX4gt9im8Vg,18428
|
|
305
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
305
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=OGe9_tDMYuaM_9JZQ3fYtWkao09R5CB5x2SQlD1UDng,91881
|
|
306
306
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=WVI2ZYXOMxgFzJnJqsqmEGS-5xdfiVIDsCb78lvSeQ0,24930
|
|
307
307
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
|
|
308
308
|
datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -383,7 +383,7 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=QTMY0FmOHkTxfIC
|
|
|
383
383
|
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=A9q-u5IoV35swvoyMrzT75FVV9-SBeYGhLKDYRge-IQ,23845
|
|
384
384
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
385
385
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
386
|
-
datahub/ingestion/source/looker/looker_common.py,sha256=
|
|
386
|
+
datahub/ingestion/source/looker/looker_common.py,sha256=LKjGnPOKiWLD0cq-6eaFgXvIjzMdp-RPacwk_wMrXxA,68726
|
|
387
387
|
datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
|
|
388
388
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
389
389
|
datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
|
|
@@ -399,7 +399,7 @@ datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z
|
|
|
399
399
|
datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
|
|
400
400
|
datahub/ingestion/source/looker/lookml_config.py,sha256=lulLcjAS1d8ihQseBe4HYn6ALKmJX1vl0H5mxiBzZ74,11395
|
|
401
401
|
datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
|
|
402
|
-
datahub/ingestion/source/looker/lookml_source.py,sha256=
|
|
402
|
+
datahub/ingestion/source/looker/lookml_source.py,sha256=aNvEoW0njzqQSoj1KeHiDOBbEGYWWDEcjX-a2RYgqOY,42735
|
|
403
403
|
datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
|
|
404
404
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
405
405
|
datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
|
|
@@ -635,8 +635,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
635
635
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
636
636
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
637
637
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
638
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
639
|
-
datahub/metadata/schema.avsc,sha256=
|
|
638
|
+
datahub/metadata/_internal_schema_classes.py,sha256=gijo0J9PIU9wETlfly4ngC1ognj9LyoFSXyiQCoWdzk,1068035
|
|
639
|
+
datahub/metadata/schema.avsc,sha256=H39OyQNCy031TBDFbaa5ZGWiSYde8RZ5Q92GmhZ3F0M,708737
|
|
640
640
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
641
641
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
642
642
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -709,7 +709,7 @@ datahub/metadata/com/linkedin/pegasus2avro/schemafield/__init__.py,sha256=HTWezn
|
|
|
709
709
|
datahub/metadata/com/linkedin/pegasus2avro/secret/__init__.py,sha256=qk61EqqVZF6k1Ct6t4Uo-pLb0WtM1EwJKn1XjVy9LHE,305
|
|
710
710
|
datahub/metadata/com/linkedin/pegasus2avro/settings/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
711
711
|
datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py,sha256=ASD1mi7q19HVN9QKzbu2T3GxzdDQ_LqZNTehV3SI12c,531
|
|
712
|
-
datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py,sha256=
|
|
712
|
+
datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py,sha256=kTY87JYIsV2yvBiHE-QrzPc7PIrUFS5aWh_VFQm7tSg,1107
|
|
713
713
|
datahub/metadata/com/linkedin/pegasus2avro/step/__init__.py,sha256=HLNNbqBlyhcg09eXWx_AMD_JoOtBPYEi2kv12PE0R9E,329
|
|
714
714
|
datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py,sha256=Cry61gPw6m5MQuJpPxADRm3jhI0XVqzznyD3fVKMkvc,1013
|
|
715
715
|
datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py,sha256=Odb4mzloKJIlpoFHODEIxt_OIgFNrZExcyQtvXxjOFQ,290
|
|
@@ -857,7 +857,7 @@ datahub/metadata/schemas/Filter.avsc,sha256=PU-aGkc2-sI3ZXY7ci-Y0A7zp1jux3VW_6c8
|
|
|
857
857
|
datahub/metadata/schemas/FormInfo.avsc,sha256=tlNI-m6uWJ46yF6Ls6Q_DOEZM2bQgXTd4bSeH30N9MA,6438
|
|
858
858
|
datahub/metadata/schemas/FormKey.avsc,sha256=1-wE28B8T3WJ3JtexreNtFvP3To3n7U-jvYudCuSM9o,437
|
|
859
859
|
datahub/metadata/schemas/Forms.avsc,sha256=shmkhRoHN2gTaTsqGrGDRoNwe_z-nrFbbLjH9MtVDCs,10955
|
|
860
|
-
datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=
|
|
860
|
+
datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=xxwH79mYno0nDt-TZ1bTlEd_8Tv4t01iijYHgPl9838,14927
|
|
861
861
|
datahub/metadata/schemas/GlobalSettingsKey.avsc,sha256=Yj8s5IdM9yF7xrhJcLGCPCXBWqSsrPbufBaQjlZ3JlU,563
|
|
862
862
|
datahub/metadata/schemas/GlobalTags.avsc,sha256=-SurkodMqTDnPpkRV6qYqmpNWjQNvynUiPZX7EhL5uc,4624
|
|
863
863
|
datahub/metadata/schemas/GlossaryNodeInfo.avsc,sha256=G1Cb-w9VxIAEhNqyiEsDL_ABRO9QxyTpUANKU6DQrFw,1888
|
|
@@ -878,7 +878,7 @@ datahub/metadata/schemas/InstitutionalMemory.avsc,sha256=0dKK18Gr6eBIgUimZrm9zsB
|
|
|
878
878
|
datahub/metadata/schemas/IntendedUse.avsc,sha256=IKZSWdvc0uAyyT-FtdQOGbMC-P7RS9cO0vOVKWT6fbw,1361
|
|
879
879
|
datahub/metadata/schemas/InviteToken.avsc,sha256=8k_9MxHu9GVf7gvS0SlnQu7tJfpbXsRFdz6lQrFKPNc,737
|
|
880
880
|
datahub/metadata/schemas/InviteTokenKey.avsc,sha256=MuQUlQaeVjaBkjSshB9gsx5Fm0civYgWD8UhCiRLdOQ,434
|
|
881
|
-
datahub/metadata/schemas/LogicalParent.avsc,sha256=
|
|
881
|
+
datahub/metadata/schemas/LogicalParent.avsc,sha256=Wr58mqOVKNc2YIXZSWEjIHqfcJ_U0XH1CRMuMAD5R_Q,5477
|
|
882
882
|
datahub/metadata/schemas/MLFeatureKey.avsc,sha256=1XFF8P2T3_4mX2oHGY74UJx1LFr6WtTv2jTG9ApjQoA,1150
|
|
883
883
|
datahub/metadata/schemas/MLFeatureProperties.avsc,sha256=HpF7VcnH2FvDsqy2g5AMJPqU7upkTVADW1ps-9bjINo,6893
|
|
884
884
|
datahub/metadata/schemas/MLFeatureTableKey.avsc,sha256=hVxNEqsx4GgG11GVryn9ms16OyEzsYdwM5RuAHy_454,1424
|
|
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1114
1114
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1115
1115
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1116
1116
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1117
|
-
acryl_datahub-1.2.0.
|
|
1118
|
-
acryl_datahub-1.2.0.
|
|
1119
|
-
acryl_datahub-1.2.0.
|
|
1120
|
-
acryl_datahub-1.2.0.
|
|
1121
|
-
acryl_datahub-1.2.0.
|
|
1117
|
+
acryl_datahub-1.2.0.10rc2.dist-info/METADATA,sha256=bPXVHE45T0nHRzSOr4cTIhKQVXcO7TfksCB9yifFx0k,186878
|
|
1118
|
+
acryl_datahub-1.2.0.10rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1119
|
+
acryl_datahub-1.2.0.10rc2.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
|
|
1120
|
+
acryl_datahub-1.2.0.10rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1121
|
+
acryl_datahub-1.2.0.10rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -2372,6 +2372,12 @@
|
|
|
2372
2372
|
"subtype_modifier": null,
|
|
2373
2373
|
"supported": true
|
|
2374
2374
|
},
|
|
2375
|
+
{
|
|
2376
|
+
"capability": "TAGS",
|
|
2377
|
+
"description": "Supported by default",
|
|
2378
|
+
"subtype_modifier": null,
|
|
2379
|
+
"supported": true
|
|
2380
|
+
},
|
|
2375
2381
|
{
|
|
2376
2382
|
"capability": "LINEAGE_COARSE",
|
|
2377
2383
|
"description": "Supported by default",
|
|
@@ -3174,6 +3180,12 @@
|
|
|
3174
3180
|
"subtype_modifier": null,
|
|
3175
3181
|
"supported": true
|
|
3176
3182
|
},
|
|
3183
|
+
{
|
|
3184
|
+
"capability": "TAGS",
|
|
3185
|
+
"description": "Supported by default",
|
|
3186
|
+
"subtype_modifier": null,
|
|
3187
|
+
"supported": true
|
|
3188
|
+
},
|
|
3177
3189
|
{
|
|
3178
3190
|
"capability": "LINEAGE_COARSE",
|
|
3179
3191
|
"description": "Supported by default",
|
|
@@ -246,6 +246,23 @@ class DBTEntitiesEnabled(ConfigModel):
|
|
|
246
246
|
return self.model_performance == EmitDirective.YES
|
|
247
247
|
|
|
248
248
|
|
|
249
|
+
class MaterializedNodePatternConfig(ConfigModel):
|
|
250
|
+
"""Configuration for filtering materialized nodes based on their physical location"""
|
|
251
|
+
|
|
252
|
+
database_pattern: AllowDenyPattern = Field(
|
|
253
|
+
default=AllowDenyPattern.allow_all(),
|
|
254
|
+
description="Regex patterns for database names to filter materialized nodes.",
|
|
255
|
+
)
|
|
256
|
+
schema_pattern: AllowDenyPattern = Field(
|
|
257
|
+
default=AllowDenyPattern.allow_all(),
|
|
258
|
+
description="Regex patterns for schema names in format '{database}.{schema}' to filter materialized nodes.",
|
|
259
|
+
)
|
|
260
|
+
table_pattern: AllowDenyPattern = Field(
|
|
261
|
+
default=AllowDenyPattern.allow_all(),
|
|
262
|
+
description="Regex patterns for table/view names in format '{database}.{schema}.{table}' to filter materialized nodes.",
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
249
266
|
class DBTCommonConfig(
|
|
250
267
|
StatefulIngestionConfigBase,
|
|
251
268
|
PlatformInstanceConfigMixin,
|
|
@@ -294,6 +311,11 @@ class DBTCommonConfig(
|
|
|
294
311
|
default=AllowDenyPattern.allow_all(),
|
|
295
312
|
description="regex patterns for dbt model names to filter in ingestion.",
|
|
296
313
|
)
|
|
314
|
+
materialized_node_pattern: MaterializedNodePatternConfig = Field(
|
|
315
|
+
default=MaterializedNodePatternConfig(),
|
|
316
|
+
description="Advanced filtering for materialized nodes based on their physical database location. "
|
|
317
|
+
"Provides fine-grained control over database.schema.table patterns for catalog consistency.",
|
|
318
|
+
)
|
|
297
319
|
meta_mapping: Dict = Field(
|
|
298
320
|
default={},
|
|
299
321
|
description="mapping rules that will be executed against dbt meta properties. Refer to the section below on dbt meta automated mappings.",
|
|
@@ -1018,15 +1040,53 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1018
1040
|
all_nodes_map,
|
|
1019
1041
|
)
|
|
1020
1042
|
|
|
1021
|
-
def _is_allowed_node(self,
|
|
1022
|
-
|
|
1043
|
+
def _is_allowed_node(self, node: DBTNode) -> bool:
|
|
1044
|
+
"""
|
|
1045
|
+
Check whether a node should be processed, using multi-layer rules. Checks for materialized nodes might need to be restricted in the future to some cases
|
|
1046
|
+
"""
|
|
1047
|
+
if not self.config.node_name_pattern.allowed(node.dbt_name):
|
|
1048
|
+
return False
|
|
1049
|
+
|
|
1050
|
+
if not self._is_allowed_materialized_node(node):
|
|
1051
|
+
return False
|
|
1052
|
+
|
|
1053
|
+
return True
|
|
1054
|
+
|
|
1055
|
+
def _is_allowed_materialized_node(self, node: DBTNode) -> bool:
|
|
1056
|
+
"""Filter nodes based on their materialized database location for catalog consistency"""
|
|
1057
|
+
|
|
1058
|
+
# Database level filtering
|
|
1059
|
+
if not node.database:
|
|
1060
|
+
return True
|
|
1061
|
+
if not self.config.materialized_node_pattern.database_pattern.allowed(
|
|
1062
|
+
node.database
|
|
1063
|
+
):
|
|
1064
|
+
return False
|
|
1065
|
+
|
|
1066
|
+
# Schema level filtering: {database}.{schema}
|
|
1067
|
+
if not node.schema:
|
|
1068
|
+
return True
|
|
1069
|
+
if not self.config.materialized_node_pattern.schema_pattern.allowed(
|
|
1070
|
+
node._join_parts([node.database, node.schema])
|
|
1071
|
+
):
|
|
1072
|
+
return False
|
|
1073
|
+
|
|
1074
|
+
# Table level filtering: {database}.{schema}.{table}
|
|
1075
|
+
if not node.name:
|
|
1076
|
+
return True
|
|
1077
|
+
if not self.config.materialized_node_pattern.table_pattern.allowed(
|
|
1078
|
+
node.get_db_fqn()
|
|
1079
|
+
):
|
|
1080
|
+
return False
|
|
1081
|
+
|
|
1082
|
+
return True
|
|
1023
1083
|
|
|
1024
1084
|
def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]:
|
|
1025
1085
|
nodes: List[DBTNode] = []
|
|
1026
1086
|
for node in all_nodes:
|
|
1027
1087
|
key = node.dbt_name
|
|
1028
1088
|
|
|
1029
|
-
if not self._is_allowed_node(
|
|
1089
|
+
if not self._is_allowed_node(node):
|
|
1030
1090
|
self.report.nodes_filtered.append(key)
|
|
1031
1091
|
continue
|
|
1032
1092
|
|
|
@@ -1118,8 +1178,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1118
1178
|
cll_nodes.add(dbt_name)
|
|
1119
1179
|
schema_nodes.add(dbt_name)
|
|
1120
1180
|
|
|
1121
|
-
for dbt_name in all_nodes_map:
|
|
1122
|
-
if self._is_allowed_node(
|
|
1181
|
+
for dbt_name, dbt_node in all_nodes_map.items():
|
|
1182
|
+
if self._is_allowed_node(dbt_node):
|
|
1123
1183
|
add_node_to_cll_list(dbt_name)
|
|
1124
1184
|
|
|
1125
1185
|
return schema_nodes, cll_nodes
|
|
@@ -307,7 +307,6 @@ def _is_single_row_query_method(query: Any) -> bool:
|
|
|
307
307
|
"get_column_max",
|
|
308
308
|
"get_column_mean",
|
|
309
309
|
"get_column_stdev",
|
|
310
|
-
"get_column_nonnull_count",
|
|
311
310
|
"get_column_unique_count",
|
|
312
311
|
}
|
|
313
312
|
CONSTANT_ROW_QUERY_METHODS = {
|
|
@@ -331,6 +330,7 @@ def _is_single_row_query_method(query: Any) -> bool:
|
|
|
331
330
|
|
|
332
331
|
FIRST_PARTY_SINGLE_ROW_QUERY_METHODS = {
|
|
333
332
|
"get_column_unique_count_dh_patch",
|
|
333
|
+
"_get_column_cardinality",
|
|
334
334
|
}
|
|
335
335
|
|
|
336
336
|
# We'll do this the inefficient way since the arrays are pretty small.
|
|
@@ -497,7 +497,20 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
497
497
|
self, column_spec: _SingleColumnSpec, column: str
|
|
498
498
|
) -> None:
|
|
499
499
|
try:
|
|
500
|
-
|
|
500
|
+
# Don't use Great Expectations get_column_nonnull_count because it
|
|
501
|
+
# generates this SQL:
|
|
502
|
+
#
|
|
503
|
+
# sum(CASE WHEN (mycolumn IN (NULL) OR mycolumn IS NULL) THEN 1 ELSE 0 END)
|
|
504
|
+
#
|
|
505
|
+
# which fails for complex types (such as Databricks maps) that don't
|
|
506
|
+
# support the IN operator.
|
|
507
|
+
nonnull_count = convert_to_json_serializable(
|
|
508
|
+
self.dataset.engine.execute(
|
|
509
|
+
sa.select(sa.func.count(sa.column(column))).select_from(
|
|
510
|
+
self.dataset._table
|
|
511
|
+
)
|
|
512
|
+
).scalar()
|
|
513
|
+
)
|
|
501
514
|
column_spec.nonnull_count = nonnull_count
|
|
502
515
|
except Exception as e:
|
|
503
516
|
logger.debug(
|
|
@@ -255,6 +255,11 @@ class LookerViewId:
|
|
|
255
255
|
|
|
256
256
|
return generated_urn
|
|
257
257
|
|
|
258
|
+
def get_view_dataset_name(self, config: LookerCommonConfig) -> str:
|
|
259
|
+
n_mapping: ViewNamingPatternMapping = self.get_mapping(config)
|
|
260
|
+
n_mapping.file_path = self.preprocess_file_path(n_mapping.file_path)
|
|
261
|
+
return config.view_naming_pattern.replace_variables(n_mapping)
|
|
262
|
+
|
|
258
263
|
def get_browse_path(self, config: LookerCommonConfig) -> str:
|
|
259
264
|
browse_path = config.view_browse_pattern.replace_variables(
|
|
260
265
|
self.get_mapping(config)
|
|
@@ -282,6 +287,22 @@ class LookerViewId:
|
|
|
282
287
|
],
|
|
283
288
|
)
|
|
284
289
|
|
|
290
|
+
def get_view_dataset_parent_container(
|
|
291
|
+
self, config: LookerCommonConfig
|
|
292
|
+
) -> List[str]:
|
|
293
|
+
project_key = gen_project_key(config, self.project_name)
|
|
294
|
+
view_path = (
|
|
295
|
+
remove_suffix(self.file_path, ".view.lkml")
|
|
296
|
+
if "{file_path}" in config.view_browse_pattern.pattern
|
|
297
|
+
else os.path.dirname(self.file_path)
|
|
298
|
+
)
|
|
299
|
+
path_entries = view_path.split("/") if view_path else []
|
|
300
|
+
return [
|
|
301
|
+
"Develop",
|
|
302
|
+
project_key.as_urn(),
|
|
303
|
+
*path_entries,
|
|
304
|
+
]
|
|
305
|
+
|
|
285
306
|
|
|
286
307
|
class ViewFieldType(Enum):
|
|
287
308
|
DIMENSION = "Dimension"
|
|
@@ -4,7 +4,7 @@ import tempfile
|
|
|
4
4
|
from collections import OrderedDict
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from datetime import datetime, timezone
|
|
7
|
-
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
|
7
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import lkml
|
|
10
10
|
import lkml.simple
|
|
@@ -12,8 +12,7 @@ from looker_sdk.error import SDKError
|
|
|
12
12
|
|
|
13
13
|
from datahub.configuration.git import GitInfo
|
|
14
14
|
from datahub.emitter.mce_builder import make_schema_field_urn
|
|
15
|
-
from datahub.emitter.
|
|
16
|
-
from datahub.emitter.mcp_builder import gen_containers
|
|
15
|
+
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
17
16
|
from datahub.ingestion.api.common import PipelineContext
|
|
18
17
|
from datahub.ingestion.api.decorators import (
|
|
19
18
|
SupportStatus,
|
|
@@ -77,7 +76,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
|
77
76
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
78
77
|
StatefulIngestionSourceBase,
|
|
79
78
|
)
|
|
80
|
-
from datahub.metadata.com.linkedin.pegasus2avro.common import
|
|
79
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import Status
|
|
81
80
|
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
82
81
|
DatasetLineageTypeClass,
|
|
83
82
|
FineGrainedLineageDownstreamType,
|
|
@@ -85,18 +84,15 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
85
84
|
UpstreamLineage,
|
|
86
85
|
ViewProperties,
|
|
87
86
|
)
|
|
88
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
89
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
90
87
|
from datahub.metadata.schema_classes import (
|
|
91
88
|
AuditStampClass,
|
|
92
|
-
BrowsePathEntryClass,
|
|
93
|
-
BrowsePathsV2Class,
|
|
94
|
-
ContainerClass,
|
|
95
89
|
DatasetPropertiesClass,
|
|
96
90
|
FineGrainedLineageClass,
|
|
97
91
|
FineGrainedLineageUpstreamTypeClass,
|
|
98
|
-
SubTypesClass,
|
|
99
92
|
)
|
|
93
|
+
from datahub.sdk.container import Container
|
|
94
|
+
from datahub.sdk.dataset import Dataset
|
|
95
|
+
from datahub.sdk.entity import Entity
|
|
100
96
|
from datahub.sql_parsing.sqlglot_lineage import ColumnRef
|
|
101
97
|
|
|
102
98
|
VIEW_LANGUAGE_LOOKML: str = "lookml"
|
|
@@ -428,69 +424,40 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
428
424
|
|
|
429
425
|
return dataset_props
|
|
430
426
|
|
|
431
|
-
def
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
subTypeEvent = MetadataChangeProposalWrapper(
|
|
437
|
-
entityUrn=view_urn,
|
|
438
|
-
aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW]),
|
|
439
|
-
)
|
|
440
|
-
events = [subTypeEvent]
|
|
427
|
+
def _build_dataset_entities(self, looker_view: LookerView) -> Iterable[Dataset]:
|
|
428
|
+
dataset_extra_aspects: List[Union[ViewProperties, Status]] = [
|
|
429
|
+
Status(removed=False)
|
|
430
|
+
]
|
|
441
431
|
if looker_view.view_details is not None:
|
|
442
|
-
|
|
443
|
-
entityUrn=view_urn,
|
|
444
|
-
aspect=looker_view.view_details,
|
|
445
|
-
)
|
|
446
|
-
events.append(viewEvent)
|
|
447
|
-
|
|
448
|
-
project_key = gen_project_key(self.source_config, looker_view.id.project_name)
|
|
449
|
-
|
|
450
|
-
container = ContainerClass(container=project_key.as_urn())
|
|
451
|
-
events.append(
|
|
452
|
-
MetadataChangeProposalWrapper(entityUrn=view_urn, aspect=container)
|
|
453
|
-
)
|
|
454
|
-
|
|
455
|
-
events.append(
|
|
456
|
-
MetadataChangeProposalWrapper(
|
|
457
|
-
entityUrn=view_urn,
|
|
458
|
-
aspect=looker_view.id.get_browse_path_v2(self.source_config),
|
|
459
|
-
)
|
|
460
|
-
)
|
|
461
|
-
|
|
462
|
-
return events
|
|
463
|
-
|
|
464
|
-
def _build_dataset_mce(self, looker_view: LookerView) -> MetadataChangeEvent:
|
|
465
|
-
"""
|
|
466
|
-
Creates MetadataChangeEvent for the dataset, creating upstream lineage links
|
|
467
|
-
"""
|
|
468
|
-
logger.debug(f"looker_view = {looker_view.id}")
|
|
432
|
+
dataset_extra_aspects.append(looker_view.view_details)
|
|
469
433
|
|
|
470
|
-
dataset_snapshot = DatasetSnapshot(
|
|
471
|
-
urn=looker_view.id.get_urn(self.source_config),
|
|
472
|
-
aspects=[], # we append to this list later on
|
|
473
|
-
)
|
|
474
|
-
browse_paths = BrowsePaths(
|
|
475
|
-
paths=[looker_view.id.get_browse_path(self.source_config)]
|
|
476
|
-
)
|
|
477
|
-
|
|
478
|
-
dataset_snapshot.aspects.append(browse_paths)
|
|
479
|
-
dataset_snapshot.aspects.append(Status(removed=False))
|
|
480
|
-
upstream_lineage = self._get_upstream_lineage(looker_view)
|
|
481
|
-
if upstream_lineage is not None:
|
|
482
|
-
dataset_snapshot.aspects.append(upstream_lineage)
|
|
483
434
|
schema_metadata = LookerUtil._get_schema(
|
|
484
435
|
self.source_config.platform_name,
|
|
485
436
|
looker_view.id.view_name,
|
|
486
437
|
looker_view.fields,
|
|
487
438
|
self.reporter,
|
|
488
439
|
)
|
|
489
|
-
if schema_metadata is not None:
|
|
490
|
-
dataset_snapshot.aspects.append(schema_metadata)
|
|
491
|
-
dataset_snapshot.aspects.append(self._get_custom_properties(looker_view))
|
|
492
440
|
|
|
493
|
-
|
|
441
|
+
custom_properties: DatasetPropertiesClass = self._get_custom_properties(
|
|
442
|
+
looker_view
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
yield Dataset(
|
|
446
|
+
platform=self.source_config.platform_name,
|
|
447
|
+
name=looker_view.id.get_view_dataset_name(self.source_config),
|
|
448
|
+
display_name=looker_view.id.view_name,
|
|
449
|
+
platform_instance=self.source_config.platform_instance,
|
|
450
|
+
env=self.source_config.env,
|
|
451
|
+
subtype=DatasetSubTypes.VIEW,
|
|
452
|
+
parent_container=looker_view.id.get_view_dataset_parent_container(
|
|
453
|
+
self.source_config
|
|
454
|
+
),
|
|
455
|
+
schema=schema_metadata,
|
|
456
|
+
custom_properties=custom_properties.customProperties,
|
|
457
|
+
external_url=custom_properties.externalUrl,
|
|
458
|
+
upstreams=self._get_upstream_lineage(looker_view),
|
|
459
|
+
extra_aspects=dataset_extra_aspects,
|
|
460
|
+
)
|
|
494
461
|
|
|
495
462
|
def get_project_name(self, model_name: str) -> str:
|
|
496
463
|
if self.source_config.project_name is not None:
|
|
@@ -554,7 +521,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
554
521
|
).workunit_processor,
|
|
555
522
|
]
|
|
556
523
|
|
|
557
|
-
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
524
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
558
525
|
with tempfile.TemporaryDirectory("lookml_tmp") as tmp_dir:
|
|
559
526
|
# Clone the base_folder if necessary.
|
|
560
527
|
if not self.source_config.base_folder:
|
|
@@ -715,7 +682,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
715
682
|
tmp_dir, project, project_visited, manifest_constants
|
|
716
683
|
)
|
|
717
684
|
|
|
718
|
-
def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
|
|
685
|
+
def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]: # noqa: C901
|
|
719
686
|
assert self.source_config.base_folder
|
|
720
687
|
viewfile_loader = LookerViewFileLoader(
|
|
721
688
|
self.source_config.project_name,
|
|
@@ -949,7 +916,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
949
916
|
maybe_looker_view.id.project_name
|
|
950
917
|
not in self.processed_projects
|
|
951
918
|
):
|
|
952
|
-
yield from self.
|
|
919
|
+
yield from self.gen_project_containers(
|
|
953
920
|
maybe_looker_view.id.project_name
|
|
954
921
|
)
|
|
955
922
|
|
|
@@ -957,15 +924,10 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
957
924
|
maybe_looker_view.id.project_name
|
|
958
925
|
)
|
|
959
926
|
|
|
960
|
-
|
|
927
|
+
yield from self._build_dataset_entities(
|
|
961
928
|
maybe_looker_view
|
|
962
|
-
):
|
|
963
|
-
yield mcp.as_workunit()
|
|
964
|
-
mce = self._build_dataset_mce(maybe_looker_view)
|
|
965
|
-
yield MetadataWorkUnit(
|
|
966
|
-
id=f"lookml-view-{maybe_looker_view.id}",
|
|
967
|
-
mce=mce,
|
|
968
929
|
)
|
|
930
|
+
|
|
969
931
|
processed_view_files.add(include.include)
|
|
970
932
|
else:
|
|
971
933
|
(
|
|
@@ -994,28 +956,24 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
994
956
|
self.source_config.tag_measures_and_dimensions
|
|
995
957
|
and self.reporter.events_produced != 0
|
|
996
958
|
):
|
|
997
|
-
# Emit tag MCEs for measures and dimensions:
|
|
959
|
+
# Emit tag MCEs for measures and dimensions if we produced any explores:
|
|
998
960
|
for tag_mce in LookerUtil.get_tag_mces():
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
961
|
+
# Convert MCE to MCPs
|
|
962
|
+
for mcp in mcps_from_mce(tag_mce):
|
|
963
|
+
yield mcp.as_workunit()
|
|
1002
964
|
|
|
1003
|
-
def
|
|
965
|
+
def gen_project_containers(self, project_name: str) -> Iterable[Container]:
|
|
1004
966
|
project_key = gen_project_key(
|
|
1005
967
|
self.source_config,
|
|
1006
968
|
project_name,
|
|
1007
969
|
)
|
|
1008
|
-
|
|
970
|
+
|
|
971
|
+
yield Container(
|
|
1009
972
|
container_key=project_key,
|
|
1010
|
-
|
|
1011
|
-
|
|
973
|
+
display_name=project_name,
|
|
974
|
+
subtype=BIContainerSubTypes.LOOKML_PROJECT,
|
|
975
|
+
parent_container=["Folders"],
|
|
1012
976
|
)
|
|
1013
|
-
yield MetadataChangeProposalWrapper(
|
|
1014
|
-
entityUrn=project_key.as_urn(),
|
|
1015
|
-
aspect=BrowsePathsV2Class(
|
|
1016
|
-
path=[BrowsePathEntryClass("Folders")],
|
|
1017
|
-
),
|
|
1018
|
-
).as_workunit()
|
|
1019
977
|
|
|
1020
978
|
def report_skipped_unreachable_views(
|
|
1021
979
|
self,
|
|
@@ -447,7 +447,7 @@ class RedashSource(StatefulIngestionSourceBase):
|
|
|
447
447
|
dataset_urns = sql_parser_in_tables.in_tables
|
|
448
448
|
if sql_parser_in_tables.debug_info.table_error:
|
|
449
449
|
self.report.queries_problem_parsing.add(str(query_id))
|
|
450
|
-
self.
|
|
450
|
+
self.warn(
|
|
451
451
|
logger,
|
|
452
452
|
"sql-parsing",
|
|
453
453
|
f"exception {sql_parser_in_tables.debug_info.table_error} in parsing query-{query_id}-datasource-{data_source_id}",
|