acryl-datahub 0.15.0.1rc6__py3-none-any.whl → 0.15.0.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/METADATA +2544 -2544
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/RECORD +27 -26
- datahub/__init__.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +9 -0
- datahub/ingestion/source/looker/looker_source.py +19 -3
- datahub/ingestion/source/looker/looker_usage.py +23 -17
- datahub/ingestion/source/mode.py +14 -7
- datahub/ingestion/source/snowflake/snowflake_config.py +3 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +0 -9
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -5
- datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -6
- datahub/ingestion/source/tableau/tableau.py +51 -20
- datahub/ingestion/source_report/ingestion_stage.py +1 -0
- datahub/metadata/_schema_classes.py +195 -2
- datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
- datahub/metadata/schema.avsc +188 -4
- datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
- datahub/metadata/schemas/MLModelProperties.avsc +62 -2
- datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc6.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=dj0h5Hq8a33nXbLNFmlqql5K3OaWumjRX8IsgKQUCfs,576
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -197,7 +197,7 @@ datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suab
|
|
|
197
197
|
datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
|
|
198
198
|
datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
|
|
199
199
|
datahub/ingestion/source/mlflow.py,sha256=-yWUuAEVBiNN-elz8Pgn0UeGsC3fVB20z1zKNIr4LXI,12309
|
|
200
|
-
datahub/ingestion/source/mode.py,sha256=
|
|
200
|
+
datahub/ingestion/source/mode.py,sha256=fuDTByENqcbxViFyYjU70B86FyAYr3Pk9usIBI0Vl1U,63384
|
|
201
201
|
datahub/ingestion/source/mongodb.py,sha256=vZue4Nz0xaBoCUsQr3_0OIRkWRxeE_IH_Y_QKZ1s7S0,21077
|
|
202
202
|
datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPGPWQ,56146
|
|
203
203
|
datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
|
|
@@ -330,7 +330,7 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=ESuJE5SFLLvss9O
|
|
|
330
330
|
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=_765fSMDAWAe0Cf_F4VNHfOWKNhtqBA1Ep2jL3rf-qc,21263
|
|
331
331
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
332
332
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
333
|
-
datahub/ingestion/source/looker/looker_common.py,sha256=
|
|
333
|
+
datahub/ingestion/source/looker/looker_common.py,sha256=KObx5ZTfldN2EO11eb1LrHI-KjWHcFoe8n_XcvzuFFU,62047
|
|
334
334
|
datahub/ingestion/source/looker/looker_config.py,sha256=87WAgdJ_QWdTq25RBwgIqfc2kq7dubSpzbEtXb2ihMw,13182
|
|
335
335
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
336
336
|
datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
|
|
@@ -339,9 +339,9 @@ datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbw
|
|
|
339
339
|
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=0gaYjBv4wkbbLWVgvaAV6JyWAFb0utTG6TCve2d9xss,11511
|
|
340
340
|
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
|
|
341
341
|
datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
|
|
342
|
-
datahub/ingestion/source/looker/looker_source.py,sha256=
|
|
342
|
+
datahub/ingestion/source/looker/looker_source.py,sha256=hlQ9IMgnp_bzNy122SdD4PyIMjbM9Fhtk3QB80d8Iss,66375
|
|
343
343
|
datahub/ingestion/source/looker/looker_template_language.py,sha256=EG4ZfVZ0x53lgaYh2ohzL4ZCy9KsX0TA51XqCmsCd2Q,14328
|
|
344
|
-
datahub/ingestion/source/looker/looker_usage.py,sha256=
|
|
344
|
+
datahub/ingestion/source/looker/looker_usage.py,sha256=4N4R8LEocxX5TuWUgRG9meoh1boG-4SXvAu_b5-fHc0,23047
|
|
345
345
|
datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
|
|
346
346
|
datahub/ingestion/source/looker/lookml_concept_context.py,sha256=guaIfY_cP8UOboJhsf9a1b9r6xjLh8k5C9uMb4p5Neg,18066
|
|
347
347
|
datahub/ingestion/source/looker/lookml_config.py,sha256=Q0fMsu_Cvm8807R6VB14VJDLqjoLTyGF-WsiUD6xEk8,10519
|
|
@@ -429,22 +429,22 @@ datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_G
|
|
|
429
429
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
430
430
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
431
431
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
432
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
432
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=_Ew2nJRoKC9e-SyrhOqn730c4FEhQE3U4bbY6RFV004,17973
|
|
433
433
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
434
434
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
435
|
-
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=
|
|
435
|
+
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=5Lpy_irZlbOFJbvVkgsZSBjdLCT3VZNjlEvttzSQAU4,21121
|
|
436
436
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
437
437
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=8QEihOfivalVR9vLo6vCUL-vnZfAGgMio0uhPYX0jTo,25883
|
|
438
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=
|
|
438
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=885pyVnLf8wwTTuWkJ-Q01gKE7Xt518QPbFkrN-vd7o,38310
|
|
439
439
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=_-rD7Q4MzKY8fYzJHSBnGX4gurwujL3UoRzcP_TZURs,6468
|
|
440
440
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=z5ZPgh-TILAz0DeIwDxRCsj980CM2BbftXiFpM1dV_Y,21674
|
|
441
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
442
|
-
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=
|
|
441
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=3AxvKfK9WV9x2f2XNuJ-Cmy4szmXKm1Ky0haRVvyC6w,42340
|
|
442
|
+
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tTKRiNp9c_1muv6YfleSd3q0umI,6341
|
|
443
443
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
444
444
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
|
|
445
445
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd8juVqjokfuSPuH9ppcM0ruXamxA,24807
|
|
446
446
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=YczNEupY89jeegjR2_1pT4bPi9wQ69EIhGpzyCe9Jdg,12600
|
|
447
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
447
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=hIWtzlxuSQ_3w48o4AF2l9CQOcWIe6AmD07I89sH2B0,31860
|
|
448
448
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
449
449
|
datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
|
|
450
450
|
datahub/ingestion/source/sql/clickhouse.py,sha256=jzvaXP5Wr0SMhj2rtuvVE821xnfpKiXhO3cm0xblgHs,27299
|
|
@@ -491,7 +491,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
491
491
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
|
|
492
492
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
493
493
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
494
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
494
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=nu71B56vB6h5io5GcMXQPlYVCbE-UNAtdxHCm8nXr9o,139751
|
|
495
495
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
|
|
496
496
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
497
497
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
@@ -517,7 +517,7 @@ datahub/ingestion/source_config/csv_enricher.py,sha256=IROxxfFJA56dHkmmbjjhb7h1p
|
|
|
517
517
|
datahub/ingestion/source_config/operation_config.py,sha256=Q0NlqiEh4s4DFIII5NsAp5hxWTVyyJz-ldcQmH-B47s,3504
|
|
518
518
|
datahub/ingestion/source_config/pulsar.py,sha256=sklDkh62CrWV-i7Ifh6R3T3smYVso6gyRJG8HVc6RdA,5533
|
|
519
519
|
datahub/ingestion/source_report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
520
|
-
datahub/ingestion/source_report/ingestion_stage.py,sha256=
|
|
520
|
+
datahub/ingestion/source_report/ingestion_stage.py,sha256=w6qTnJm_-eoTiGxwS7cFnhdIfsv8omC6H5e0qw5t4Jc,1587
|
|
521
521
|
datahub/ingestion/source_report/pulsar.py,sha256=iKhzy644AjoFTV-gxyqBoXKMLwSMPxJFxU-3WDQRww0,1037
|
|
522
522
|
datahub/ingestion/source_report/time_window.py,sha256=9yI5l2S1DcF7ClvUHLeN8m62I5vlhV9k-aQqSZh2l7w,229
|
|
523
523
|
datahub/ingestion/transformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -566,8 +566,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
566
566
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
567
567
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
568
568
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
569
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
570
|
-
datahub/metadata/schema.avsc,sha256=
|
|
569
|
+
datahub/metadata/_schema_classes.py,sha256=NbZUezNRH72XQUl4i_DlV-oRT4KzEBYFYcHcwGqXq9A,962516
|
|
570
|
+
datahub/metadata/schema.avsc,sha256=Ulqzumt0EK7nD_OATi0hbCgw42ngoenja9SXWWsobIk,728543
|
|
571
571
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
572
572
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
573
573
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -614,7 +614,7 @@ datahub/metadata/com/linkedin/pegasus2avro/metadata/query/__init__.py,sha256=gsA
|
|
|
614
614
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha256=DBP_QtxkFmC5q_kuk4dGjb4uOKbB4xKgqTWXGxmNbBQ,532
|
|
615
615
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
|
|
616
616
|
datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
617
|
-
datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py,sha256=
|
|
617
|
+
datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py,sha256=qefB0n1xilQHCPla80b39wdjHOYoVtzBJT2jGc2szkM,3309
|
|
618
618
|
datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py,sha256=LqGp9QTLk_tiSsbHMGSUH7uPG00Bf_qQIMiU7vtO4Tk,973
|
|
619
619
|
datahub/metadata/com/linkedin/pegasus2avro/notebook/__init__.py,sha256=BcjOsz4YeHQbLLBb4Im4uJ7ux1hGHquQDmiIOiDXVtE,901
|
|
620
620
|
datahub/metadata/com/linkedin/pegasus2avro/ownership/__init__.py,sha256=r813MW_bkP1ZpC2NJf7uCHEOapjebl611c90vryKX4A,302
|
|
@@ -713,8 +713,8 @@ datahub/metadata/schemas/DataPlatformInstanceProperties.avsc,sha256=4-UrBTtVAR0r
|
|
|
713
713
|
datahub/metadata/schemas/DataPlatformKey.avsc,sha256=5Z2adruXKzSucmgCba768UXdsGsYBH9t9DvFF9L9mxo,461
|
|
714
714
|
datahub/metadata/schemas/DataProcessInfo.avsc,sha256=n4Zuk4kpHrHI2BdINhG-OucdCefb2GEsDv5mXQtSWIw,1558
|
|
715
715
|
datahub/metadata/schemas/DataProcessInstanceInput.avsc,sha256=NDbRQULcJ9erg3X24w-LUY5AclqKemZQq0LDz1CVsFE,847
|
|
716
|
-
datahub/metadata/schemas/DataProcessInstanceKey.avsc,sha256=
|
|
717
|
-
datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=
|
|
716
|
+
datahub/metadata/schemas/DataProcessInstanceKey.avsc,sha256=YSEVtSWql1IZ9AG37HmJZ4118pgi8kVCygI_GqFf3YA,945
|
|
717
|
+
datahub/metadata/schemas/DataProcessInstanceOutput.avsc,sha256=Ra00uvvXrKVlXD1O7B-1thvxebsvCtpeQzc4bnzAflU,868
|
|
718
718
|
datahub/metadata/schemas/DataProcessInstanceProperties.avsc,sha256=2qsDFeSA2-ag5IVetgD8mW2k--F6CwmYXM3KOE6edU8,3836
|
|
719
719
|
datahub/metadata/schemas/DataProcessInstanceRelationships.avsc,sha256=VhBpnyGGvO06WEnM6zy4PmjiT0nivRQfkSdJCUgIavw,2358
|
|
720
720
|
datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkLN8NbXW9PQWFG4X6TZkZwTQ1Wb53Y,6713
|
|
@@ -796,12 +796,13 @@ datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=gmXaUYxII8BVLnXOFdlPmy
|
|
|
796
796
|
datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=88nJ4uEBeT6kGrZnhYnZF4Co8rq2SBjqszQ-owtskQ4,3133
|
|
797
797
|
datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
|
|
798
798
|
datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=eb4qdIdQLvvQ7u1e_FHIHTkWrvIwYQji0yKoGrxCiWI,2460
|
|
799
|
-
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=
|
|
799
|
+
datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=r4ZeqDnn1wfpT6zdg8_rT8uciJWO6nMZO4kXZFALksE,3590
|
|
800
800
|
datahub/metadata/schemas/MLModelKey.avsc,sha256=deK5u7b9S9-qYUNtlflj2OUxqS_PlUYWN_NYRzZiVtI,2802
|
|
801
|
-
datahub/metadata/schemas/MLModelProperties.avsc,sha256=
|
|
801
|
+
datahub/metadata/schemas/MLModelProperties.avsc,sha256=FwhPz9P0k61LYFJLLUObFzKe24iAnsMyAPLbQCvqL7g,10079
|
|
802
802
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=mX4CQcoN3FC_VQDBCkhlmJk4pfQKDrSeuqqCTTXTmq8,1092
|
|
803
803
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=akhjegh2q_4pm4_C2mP0rWpCGVqmJ8Ta6X8lqNtbVbg,4468
|
|
804
|
-
datahub/metadata/schemas/
|
|
804
|
+
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
805
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=2cR4D52wiIrEtWUfJIINhuTSan1495Q8lznVk72lty8,371387
|
|
805
806
|
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
|
|
806
807
|
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
|
|
807
808
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
@@ -981,8 +982,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
981
982
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
982
983
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
983
984
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
984
|
-
acryl_datahub-0.15.0.
|
|
985
|
-
acryl_datahub-0.15.0.
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
985
|
+
acryl_datahub-0.15.0.1rc7.dist-info/METADATA,sha256=hl14lRgFU4pk8d2s_Qxx1Xtkbd2TQp6gEek2gpkea1o,173642
|
|
986
|
+
acryl_datahub-0.15.0.1rc7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
987
|
+
acryl_datahub-0.15.0.1rc7.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
988
|
+
acryl_datahub-0.15.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
989
|
+
acryl_datahub-0.15.0.1rc7.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -1408,6 +1408,15 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport):
|
|
|
1408
1408
|
dashboards_with_activity: LossySet[str] = dataclasses_field(
|
|
1409
1409
|
default_factory=LossySet
|
|
1410
1410
|
)
|
|
1411
|
+
|
|
1412
|
+
# Entities that don't seem to exist, so we don't emit usage aspects for them despite having usage data
|
|
1413
|
+
dashboards_skipped_for_usage: LossySet[str] = dataclasses_field(
|
|
1414
|
+
default_factory=LossySet
|
|
1415
|
+
)
|
|
1416
|
+
charts_skipped_for_usage: LossySet[str] = dataclasses_field(
|
|
1417
|
+
default_factory=LossySet
|
|
1418
|
+
)
|
|
1419
|
+
|
|
1411
1420
|
stage_latency: List[StageLatency] = dataclasses_field(default_factory=list)
|
|
1412
1421
|
_looker_explore_registry: Optional[LookerExploreRegistry] = None
|
|
1413
1422
|
total_explores: int = 0
|
|
@@ -68,6 +68,7 @@ from datahub.ingestion.source.looker.looker_common import (
|
|
|
68
68
|
ViewField,
|
|
69
69
|
ViewFieldType,
|
|
70
70
|
gen_model_key,
|
|
71
|
+
get_urn_looker_element_id,
|
|
71
72
|
)
|
|
72
73
|
from datahub.ingestion.source.looker.looker_config import LookerDashboardSourceConfig
|
|
73
74
|
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
|
|
@@ -165,6 +166,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
165
166
|
# Required, as we do not ingest all folders but only those that have dashboards/looks
|
|
166
167
|
self.processed_folders: List[str] = []
|
|
167
168
|
|
|
169
|
+
# Keep track of ingested chart urns, to omit usage for non-ingested entities
|
|
170
|
+
self.chart_urns: Set[str] = set()
|
|
171
|
+
|
|
168
172
|
@staticmethod
|
|
169
173
|
def test_connection(config_dict: dict) -> TestConnectionReport:
|
|
170
174
|
test_report = TestConnectionReport()
|
|
@@ -642,6 +646,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
642
646
|
chart_urn = self._make_chart_urn(
|
|
643
647
|
element_id=dashboard_element.get_urn_element_id()
|
|
644
648
|
)
|
|
649
|
+
self.chart_urns.add(chart_urn)
|
|
645
650
|
chart_snapshot = ChartSnapshot(
|
|
646
651
|
urn=chart_urn,
|
|
647
652
|
aspects=[Status(removed=False)],
|
|
@@ -1380,7 +1385,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1380
1385
|
yield from self._emit_folder_as_container(folder)
|
|
1381
1386
|
|
|
1382
1387
|
def extract_usage_stat(
|
|
1383
|
-
self,
|
|
1388
|
+
self,
|
|
1389
|
+
looker_dashboards: List[looker_usage.LookerDashboardForUsage],
|
|
1390
|
+
ingested_chart_urns: Set[str],
|
|
1384
1391
|
) -> List[MetadataChangeProposalWrapper]:
|
|
1385
1392
|
looks: List[looker_usage.LookerChartForUsage] = []
|
|
1386
1393
|
# filter out look from all dashboard
|
|
@@ -1391,6 +1398,15 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1391
1398
|
|
|
1392
1399
|
# dedup looks
|
|
1393
1400
|
looks = list({str(look.id): look for look in looks}.values())
|
|
1401
|
+
filtered_looks = []
|
|
1402
|
+
for look in looks:
|
|
1403
|
+
if not look.id:
|
|
1404
|
+
continue
|
|
1405
|
+
chart_urn = self._make_chart_urn(get_urn_looker_element_id(look.id))
|
|
1406
|
+
if chart_urn in ingested_chart_urns:
|
|
1407
|
+
filtered_looks.append(look)
|
|
1408
|
+
else:
|
|
1409
|
+
self.reporter.charts_skipped_for_usage.add(look.id)
|
|
1394
1410
|
|
|
1395
1411
|
# Keep stat generators to generate entity stat aspect later
|
|
1396
1412
|
stat_generator_config: looker_usage.StatGeneratorConfig = (
|
|
@@ -1414,7 +1430,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1414
1430
|
stat_generator_config,
|
|
1415
1431
|
self.reporter,
|
|
1416
1432
|
self._make_chart_urn,
|
|
1417
|
-
|
|
1433
|
+
filtered_looks,
|
|
1418
1434
|
)
|
|
1419
1435
|
|
|
1420
1436
|
mcps: List[MetadataChangeProposalWrapper] = []
|
|
@@ -1669,7 +1685,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1669
1685
|
if self.source_config.extract_usage_history:
|
|
1670
1686
|
self.reporter.report_stage_start("usage_extraction")
|
|
1671
1687
|
usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
|
|
1672
|
-
looker_dashboards_for_usage
|
|
1688
|
+
looker_dashboards_for_usage, self.chart_urns
|
|
1673
1689
|
)
|
|
1674
1690
|
for usage_mcp in usage_mcps:
|
|
1675
1691
|
yield usage_mcp.as_workunit()
|
|
@@ -42,6 +42,7 @@ from datahub.metadata.schema_classes import (
|
|
|
42
42
|
TimeWindowSizeClass,
|
|
43
43
|
_Aspect as AspectAbstract,
|
|
44
44
|
)
|
|
45
|
+
from datahub.utilities.lossy_collections import LossySet
|
|
45
46
|
|
|
46
47
|
logger = logging.getLogger(__name__)
|
|
47
48
|
|
|
@@ -170,7 +171,7 @@ class BaseStatGenerator(ABC):
|
|
|
170
171
|
self.config = config
|
|
171
172
|
self.looker_models = looker_models
|
|
172
173
|
# Later it will help to find out for what are the looker entities from query result
|
|
173
|
-
self.
|
|
174
|
+
self.id_to_model: Dict[str, ModelForUsage] = {
|
|
174
175
|
self.get_id(looker_object): looker_object for looker_object in looker_models
|
|
175
176
|
}
|
|
176
177
|
self.post_filter = len(self.looker_models) > 100
|
|
@@ -225,6 +226,10 @@ class BaseStatGenerator(ABC):
|
|
|
225
226
|
def get_id_from_row(self, row: dict) -> str:
|
|
226
227
|
pass
|
|
227
228
|
|
|
229
|
+
@abstractmethod
|
|
230
|
+
def report_skip_set(self) -> LossySet[str]:
|
|
231
|
+
pass
|
|
232
|
+
|
|
228
233
|
def create_mcp(
|
|
229
234
|
self, model: ModelForUsage, aspect: Aspect
|
|
230
235
|
) -> MetadataChangeProposalWrapper:
|
|
@@ -258,20 +263,11 @@ class BaseStatGenerator(ABC):
|
|
|
258
263
|
|
|
259
264
|
return entity_stat_aspect
|
|
260
265
|
|
|
261
|
-
def _process_absolute_aspect(self) -> List[Tuple[ModelForUsage, AspectAbstract]]:
|
|
262
|
-
aspects: List[Tuple[ModelForUsage, AspectAbstract]] = []
|
|
263
|
-
for looker_object in self.looker_models:
|
|
264
|
-
aspects.append(
|
|
265
|
-
(looker_object, self.to_entity_absolute_stat_aspect(looker_object))
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
return aspects
|
|
269
|
-
|
|
270
266
|
def _fill_user_stat_aspect(
|
|
271
267
|
self,
|
|
272
268
|
entity_usage_stat: Dict[Tuple[str, str], Aspect],
|
|
273
269
|
user_wise_rows: List[Dict],
|
|
274
|
-
) -> Iterable[Tuple[
|
|
270
|
+
) -> Iterable[Tuple[str, Aspect]]:
|
|
275
271
|
logger.debug("Entering fill user stat aspect")
|
|
276
272
|
|
|
277
273
|
# We first resolve all the users using a threadpool to warm up the cache
|
|
@@ -300,7 +296,7 @@ class BaseStatGenerator(ABC):
|
|
|
300
296
|
|
|
301
297
|
for row in user_wise_rows:
|
|
302
298
|
# Confirm looker object was given for stat generation
|
|
303
|
-
looker_object = self.
|
|
299
|
+
looker_object = self.id_to_model.get(self.get_id_from_row(row))
|
|
304
300
|
if looker_object is None:
|
|
305
301
|
logger.warning(
|
|
306
302
|
"Looker object with id({}) was not register with stat generator".format(
|
|
@@ -338,7 +334,7 @@ class BaseStatGenerator(ABC):
|
|
|
338
334
|
logger.debug("Starting to yield answers for user-wise counts")
|
|
339
335
|
|
|
340
336
|
for (id, _), aspect in entity_usage_stat.items():
|
|
341
|
-
yield
|
|
337
|
+
yield id, aspect
|
|
342
338
|
|
|
343
339
|
def _execute_query(self, query: LookerQuery, query_name: str) -> List[Dict]:
|
|
344
340
|
rows = []
|
|
@@ -357,7 +353,7 @@ class BaseStatGenerator(ABC):
|
|
|
357
353
|
)
|
|
358
354
|
if self.post_filter:
|
|
359
355
|
logger.debug("post filtering")
|
|
360
|
-
rows = [r for r in rows if self.get_id_from_row(r) in self.
|
|
356
|
+
rows = [r for r in rows if self.get_id_from_row(r) in self.id_to_model]
|
|
361
357
|
logger.debug("Filtered down to %d rows", len(rows))
|
|
362
358
|
except Exception as e:
|
|
363
359
|
logger.warning(f"Failed to execute {query_name} query: {e}")
|
|
@@ -378,7 +374,8 @@ class BaseStatGenerator(ABC):
|
|
|
378
374
|
return
|
|
379
375
|
|
|
380
376
|
# yield absolute stat for looker entities
|
|
381
|
-
for looker_object
|
|
377
|
+
for looker_object in self.looker_models:
|
|
378
|
+
aspect = self.to_entity_absolute_stat_aspect(looker_object)
|
|
382
379
|
yield self.create_mcp(looker_object, aspect)
|
|
383
380
|
|
|
384
381
|
# Execute query and process the raw json which contains stat information
|
|
@@ -399,10 +396,13 @@ class BaseStatGenerator(ABC):
|
|
|
399
396
|
)
|
|
400
397
|
user_wise_rows = self._execute_query(user_wise_query_with_filters, "user_query")
|
|
401
398
|
# yield absolute stat for entity
|
|
402
|
-
for
|
|
399
|
+
for object_id, aspect in self._fill_user_stat_aspect(
|
|
403
400
|
entity_usage_stat, user_wise_rows
|
|
404
401
|
):
|
|
405
|
-
|
|
402
|
+
if object_id in self.id_to_model:
|
|
403
|
+
yield self.create_mcp(self.id_to_model[object_id], aspect)
|
|
404
|
+
else:
|
|
405
|
+
self.report_skip_set().add(object_id)
|
|
406
406
|
|
|
407
407
|
|
|
408
408
|
class DashboardStatGenerator(BaseStatGenerator):
|
|
@@ -425,6 +425,9 @@ class DashboardStatGenerator(BaseStatGenerator):
|
|
|
425
425
|
def get_stats_generator_name(self) -> str:
|
|
426
426
|
return "DashboardStats"
|
|
427
427
|
|
|
428
|
+
def report_skip_set(self) -> LossySet[str]:
|
|
429
|
+
return self.report.dashboards_skipped_for_usage
|
|
430
|
+
|
|
428
431
|
def get_filter(self) -> Dict[ViewField, str]:
|
|
429
432
|
return {
|
|
430
433
|
HistoryViewField.HISTORY_DASHBOARD_ID: ",".join(
|
|
@@ -541,6 +544,9 @@ class LookStatGenerator(BaseStatGenerator):
|
|
|
541
544
|
def get_stats_generator_name(self) -> str:
|
|
542
545
|
return "ChartStats"
|
|
543
546
|
|
|
547
|
+
def report_skip_set(self) -> LossySet[str]:
|
|
548
|
+
return self.report.charts_skipped_for_usage
|
|
549
|
+
|
|
544
550
|
def get_filter(self) -> Dict[ViewField, str]:
|
|
545
551
|
return {
|
|
546
552
|
LookViewField.LOOK_ID: ",".join(
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -98,6 +98,7 @@ from datahub.metadata.schema_classes import (
|
|
|
98
98
|
TagPropertiesClass,
|
|
99
99
|
UpstreamClass,
|
|
100
100
|
UpstreamLineageClass,
|
|
101
|
+
ViewPropertiesClass,
|
|
101
102
|
)
|
|
102
103
|
from datahub.metadata.urns import QueryUrn
|
|
103
104
|
from datahub.sql_parsing.sqlglot_lineage import (
|
|
@@ -930,16 +931,13 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
930
931
|
|
|
931
932
|
dataset_props = DatasetPropertiesClass(
|
|
932
933
|
name=report_info.get("name") if is_mode_dataset else query_data.get("name"),
|
|
933
|
-
description=
|
|
934
|
-
``` sql
|
|
935
|
-
{query_data.get("raw_query")}
|
|
936
|
-
```
|
|
937
|
-
""",
|
|
934
|
+
description=None,
|
|
938
935
|
externalUrl=externalUrl,
|
|
939
936
|
customProperties=self.get_custom_props_from_dict(
|
|
940
937
|
query_data,
|
|
941
938
|
[
|
|
942
|
-
"id"
|
|
939
|
+
"id",
|
|
940
|
+
"created_at",
|
|
943
941
|
"updated_at",
|
|
944
942
|
"last_run_id",
|
|
945
943
|
"data_source_id",
|
|
@@ -949,7 +947,6 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
949
947
|
],
|
|
950
948
|
),
|
|
951
949
|
)
|
|
952
|
-
|
|
953
950
|
yield (
|
|
954
951
|
MetadataChangeProposalWrapper(
|
|
955
952
|
entityUrn=query_urn,
|
|
@@ -957,6 +954,16 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
957
954
|
).as_workunit()
|
|
958
955
|
)
|
|
959
956
|
|
|
957
|
+
if raw_query := query_data.get("raw_query"):
|
|
958
|
+
yield MetadataChangeProposalWrapper(
|
|
959
|
+
entityUrn=query_urn,
|
|
960
|
+
aspect=ViewPropertiesClass(
|
|
961
|
+
viewLogic=raw_query,
|
|
962
|
+
viewLanguage=QueryLanguageClass.SQL,
|
|
963
|
+
materialized=False,
|
|
964
|
+
),
|
|
965
|
+
).as_workunit()
|
|
966
|
+
|
|
960
967
|
if is_mode_dataset:
|
|
961
968
|
space_container_key = self.gen_space_key(space_token)
|
|
962
969
|
yield from add_dataset_to_container(
|
|
@@ -163,26 +163,13 @@ class SnowflakeConfig(
|
|
|
163
163
|
default=True,
|
|
164
164
|
description="If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role and Snowflake Enterprise Edition or above.",
|
|
165
165
|
)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
)
|
|
166
|
+
|
|
167
|
+
_include_view_lineage = pydantic_removed_field("include_view_lineage")
|
|
168
|
+
_include_view_column_lineage = pydantic_removed_field("include_view_column_lineage")
|
|
170
169
|
|
|
171
170
|
ignore_start_time_lineage: bool = False
|
|
172
171
|
upstream_lineage_in_report: bool = False
|
|
173
172
|
|
|
174
|
-
@pydantic.root_validator(skip_on_failure=True)
|
|
175
|
-
def validate_include_view_lineage(cls, values):
|
|
176
|
-
if (
|
|
177
|
-
"include_table_lineage" in values
|
|
178
|
-
and not values.get("include_table_lineage")
|
|
179
|
-
and values.get("include_view_lineage")
|
|
180
|
-
):
|
|
181
|
-
raise ValueError(
|
|
182
|
-
"include_table_lineage must be True for include_view_lineage to be set."
|
|
183
|
-
)
|
|
184
|
-
return values
|
|
185
|
-
|
|
186
173
|
|
|
187
174
|
class SnowflakeV2Config(
|
|
188
175
|
SnowflakeConfig,
|
|
@@ -222,11 +209,6 @@ class SnowflakeV2Config(
|
|
|
222
209
|
description="Populates table->table and view->table column lineage. Requires appropriate grants given to the role and the Snowflake Enterprise Edition or above.",
|
|
223
210
|
)
|
|
224
211
|
|
|
225
|
-
include_view_column_lineage: bool = Field(
|
|
226
|
-
default=True,
|
|
227
|
-
description="Populates view->view and table->view column lineage using DataHub's sql parser.",
|
|
228
|
-
)
|
|
229
|
-
|
|
230
212
|
use_queries_v2: bool = Field(
|
|
231
213
|
default=False,
|
|
232
214
|
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
|
@@ -355,10 +337,6 @@ class SnowflakeV2Config(
|
|
|
355
337
|
self, database=database, username=username, password=password, role=role
|
|
356
338
|
)
|
|
357
339
|
|
|
358
|
-
@property
|
|
359
|
-
def parse_view_ddl(self) -> bool:
|
|
360
|
-
return self.include_view_column_lineage
|
|
361
|
-
|
|
362
340
|
@validator("shares")
|
|
363
341
|
def validate_shares(
|
|
364
342
|
cls, shares: Optional[Dict[str, SnowflakeShareConfig]], values: Dict
|
|
@@ -8,7 +8,6 @@ from pydantic import BaseModel, Field, validator
|
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.datetimes import parse_absolute_time
|
|
10
10
|
from datahub.ingestion.api.closeable import Closeable
|
|
11
|
-
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
12
11
|
from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage
|
|
13
12
|
from datahub.ingestion.source.snowflake.constants import (
|
|
14
13
|
LINEAGE_PERMISSION_ERROR,
|
|
@@ -163,11 +162,11 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
163
162
|
self.config.end_time,
|
|
164
163
|
)
|
|
165
164
|
|
|
166
|
-
def
|
|
165
|
+
def add_time_based_lineage_to_aggregator(
|
|
167
166
|
self,
|
|
168
167
|
discovered_tables: List[str],
|
|
169
168
|
discovered_views: List[str],
|
|
170
|
-
) ->
|
|
169
|
+
) -> None:
|
|
171
170
|
if not self._should_ingest_lineage():
|
|
172
171
|
return
|
|
173
172
|
|
|
@@ -177,9 +176,7 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
177
176
|
# snowflake view/table -> snowflake table
|
|
178
177
|
self.populate_table_upstreams(discovered_tables)
|
|
179
178
|
|
|
180
|
-
|
|
181
|
-
yield mcp.as_workunit()
|
|
182
|
-
|
|
179
|
+
def update_state(self):
|
|
183
180
|
if self.redundant_run_skip_handler:
|
|
184
181
|
# Update the checkpoint state for this run.
|
|
185
182
|
self.redundant_run_skip_handler.update_state(
|
|
@@ -337,10 +334,6 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
337
334
|
start_time_millis=int(self.start_time.timestamp() * 1000),
|
|
338
335
|
end_time_millis=int(self.end_time.timestamp() * 1000),
|
|
339
336
|
upstreams_deny_pattern=self.config.temporary_tables_pattern,
|
|
340
|
-
# The self.config.include_view_lineage setting is about fetching upstreams of views.
|
|
341
|
-
# We always generate lineage pointing at views from tables, even if self.config.include_view_lineage is False.
|
|
342
|
-
# TODO: Remove this `include_view_lineage` flag, since it's effectively dead code.
|
|
343
|
-
include_view_lineage=True,
|
|
344
337
|
include_column_lineage=self.config.include_column_lineage,
|
|
345
338
|
)
|
|
346
339
|
try:
|
|
@@ -376,7 +376,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
376
376
|
def table_to_table_lineage_history_v2(
|
|
377
377
|
start_time_millis: int,
|
|
378
378
|
end_time_millis: int,
|
|
379
|
-
include_view_lineage: bool = True,
|
|
380
379
|
include_column_lineage: bool = True,
|
|
381
380
|
upstreams_deny_pattern: List[str] = DEFAULT_TEMP_TABLES_PATTERNS,
|
|
382
381
|
) -> str:
|
|
@@ -385,14 +384,12 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
385
384
|
start_time_millis,
|
|
386
385
|
end_time_millis,
|
|
387
386
|
upstreams_deny_pattern,
|
|
388
|
-
include_view_lineage,
|
|
389
387
|
)
|
|
390
388
|
else:
|
|
391
389
|
return SnowflakeQuery.table_upstreams_only(
|
|
392
390
|
start_time_millis,
|
|
393
391
|
end_time_millis,
|
|
394
392
|
upstreams_deny_pattern,
|
|
395
|
-
include_view_lineage,
|
|
396
393
|
)
|
|
397
394
|
|
|
398
395
|
@staticmethod
|
|
@@ -677,12 +674,9 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
677
674
|
start_time_millis: int,
|
|
678
675
|
end_time_millis: int,
|
|
679
676
|
upstreams_deny_pattern: List[str],
|
|
680
|
-
include_view_lineage: bool = True,
|
|
681
677
|
) -> str:
|
|
682
678
|
allowed_upstream_table_domains = (
|
|
683
679
|
SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER
|
|
684
|
-
if include_view_lineage
|
|
685
|
-
else SnowflakeQuery.ACCESS_HISTORY_TABLE_DOMAINS_FILTER
|
|
686
680
|
)
|
|
687
681
|
|
|
688
682
|
upstream_sql_filter = create_deny_regex_sql_filter(
|
|
@@ -847,12 +841,9 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
847
841
|
start_time_millis: int,
|
|
848
842
|
end_time_millis: int,
|
|
849
843
|
upstreams_deny_pattern: List[str],
|
|
850
|
-
include_view_lineage: bool = True,
|
|
851
844
|
) -> str:
|
|
852
845
|
allowed_upstream_table_domains = (
|
|
853
846
|
SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER
|
|
854
|
-
if include_view_lineage
|
|
855
|
-
else SnowflakeQuery.ACCESS_HISTORY_TABLE_DOMAINS_FILTER
|
|
856
847
|
)
|
|
857
848
|
|
|
858
849
|
upstream_sql_filter = create_deny_regex_sql_filter(
|
|
@@ -435,11 +435,7 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
435
435
|
)
|
|
436
436
|
|
|
437
437
|
if self.config.include_views:
|
|
438
|
-
if
|
|
439
|
-
self.aggregator
|
|
440
|
-
and self.config.include_view_lineage
|
|
441
|
-
and self.config.parse_view_ddl
|
|
442
|
-
):
|
|
438
|
+
if self.aggregator:
|
|
443
439
|
for view in views:
|
|
444
440
|
view_identifier = self.identifiers.get_dataset_identifier(
|
|
445
441
|
view.name, schema_name, db_name
|
|
@@ -72,7 +72,7 @@ class SnowflakeSharesHandler(SnowflakeCommonMixin):
|
|
|
72
72
|
assert len(sibling_dbs) == 1
|
|
73
73
|
# SnowflakeLineageExtractor is unaware of database->schema->table hierarchy
|
|
74
74
|
# hence this lineage code is not written in SnowflakeLineageExtractor
|
|
75
|
-
# also this is not governed by configs include_table_lineage
|
|
75
|
+
# also this is not governed by configs include_table_lineage
|
|
76
76
|
yield self.get_upstream_lineage_with_primary_sibling(
|
|
77
77
|
db.name, schema.name, table_name, sibling_dbs[0]
|
|
78
78
|
)
|
|
@@ -82,6 +82,7 @@ from datahub.ingestion.source_report.ingestion_stage import (
|
|
|
82
82
|
LINEAGE_EXTRACTION,
|
|
83
83
|
METADATA_EXTRACTION,
|
|
84
84
|
QUERIES_EXTRACTION,
|
|
85
|
+
VIEW_PARSING,
|
|
85
86
|
)
|
|
86
87
|
from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
|
|
87
88
|
from datahub.utilities.registries.domain_registry import DomainRegistry
|
|
@@ -103,7 +104,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
103
104
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
104
105
|
@capability(
|
|
105
106
|
SourceCapability.LINEAGE_COARSE,
|
|
106
|
-
"Enabled by default, can be disabled via configuration `include_table_lineage`
|
|
107
|
+
"Enabled by default, can be disabled via configuration `include_table_lineage`",
|
|
107
108
|
)
|
|
108
109
|
@capability(
|
|
109
110
|
SourceCapability.LINEAGE_FINE,
|
|
@@ -512,15 +513,14 @@ class SnowflakeV2Source(
|
|
|
512
513
|
discovered_datasets = discovered_tables + discovered_views
|
|
513
514
|
|
|
514
515
|
if self.config.use_queries_v2:
|
|
515
|
-
self.report.set_ingestion_stage("*",
|
|
516
|
-
assert self.aggregator is not None
|
|
516
|
+
self.report.set_ingestion_stage("*", VIEW_PARSING)
|
|
517
517
|
yield from auto_workunit(self.aggregator.gen_metadata())
|
|
518
518
|
|
|
519
519
|
self.report.set_ingestion_stage("*", QUERIES_EXTRACTION)
|
|
520
520
|
|
|
521
521
|
schema_resolver = self.aggregator._schema_resolver
|
|
522
522
|
|
|
523
|
-
queries_extractor
|
|
523
|
+
queries_extractor = SnowflakeQueriesExtractor(
|
|
524
524
|
connection=self.connection,
|
|
525
525
|
config=SnowflakeQueriesExtractorConfig(
|
|
526
526
|
window=self.config,
|
|
@@ -546,13 +546,21 @@ class SnowflakeV2Source(
|
|
|
546
546
|
queries_extractor.close()
|
|
547
547
|
|
|
548
548
|
else:
|
|
549
|
-
if self.
|
|
549
|
+
if self.lineage_extractor:
|
|
550
550
|
self.report.set_ingestion_stage("*", LINEAGE_EXTRACTION)
|
|
551
|
-
|
|
551
|
+
self.lineage_extractor.add_time_based_lineage_to_aggregator(
|
|
552
552
|
discovered_tables=discovered_tables,
|
|
553
553
|
discovered_views=discovered_views,
|
|
554
554
|
)
|
|
555
555
|
|
|
556
|
+
# This would emit view and external table ddl lineage
|
|
557
|
+
# as well as query lineage via lineage_extractor
|
|
558
|
+
for mcp in self.aggregator.gen_metadata():
|
|
559
|
+
yield mcp.as_workunit()
|
|
560
|
+
|
|
561
|
+
if self.lineage_extractor:
|
|
562
|
+
self.lineage_extractor.update_state()
|
|
563
|
+
|
|
556
564
|
if (
|
|
557
565
|
self.config.include_usage_stats or self.config.include_operational_stats
|
|
558
566
|
) and self.usage_extractor:
|