acryl-datahub 0.15.0.3rc1__py3-none-any.whl → 0.15.0.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
- datahub/__init__.py,sha256=2Qi1M0twhFDzi6WSn0GA-qtAgY8DJPzJGRgXBzJrkHc,576
1
+ datahub/__init__.py,sha256=xAoAiT2wj9RiduZiLAlc28hjzEtCWW6b2I4AOY-rBkc,576
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/entrypoints.py,sha256=IMtLWvGuiqoUSnNaCaFjhd86NHwuXSWXp2kUL-xDkk0,7950
3
+ datahub/entrypoints.py,sha256=vbkUx_jVIkr_V4wtoQhOpledna-pD_tco1mloRnb7QY,8029
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -61,6 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
62
62
  datahub/cli/cli_utils.py,sha256=onbG7z9hIm0zCAm0a2ulTOsHC_NVkdIsbg__EMj02DQ,13540
63
63
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
64
+ datahub/cli/container_cli.py,sha256=t5hEKqmTEi9LMAs6DTuHtIJBSU5LAXND3JyCLAdJOO8,2828
64
65
  datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
65
66
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
66
67
  datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
@@ -219,7 +220,7 @@ datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
219
220
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
220
221
  datahub/ingestion/source/aws/glue.py,sha256=9KYv53loNa-keVwCRLDb-5II_AjeHVRf1Fb2HXqtZXk,57653
221
222
  datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
222
- datahub/ingestion/source/aws/s3_util.py,sha256=pikTe9SuiKdN-TZ8eOhB0PYq0aUgUPDpxwtTLsVofRs,2834
223
+ datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
223
224
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
224
225
  datahub/ingestion/source/aws/sagemaker_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
225
226
  datahub/ingestion/source/aws/sagemaker_processors/common.py,sha256=NvYfI8LHgDvhEZE7qp6qF1NSZ0_SQKhg3ivtdjsdpFg,2172
@@ -243,7 +244,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256
243
244
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
244
245
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=qH8k8wyMlUVzUTVhSd3FgOMGCK1D5NYuC0KF8tez_Ys,7957
245
246
  datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
246
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=XdlTd4VxhYinbLt_UNsiZroaRqD7Fy7lzncskHzD_nc,50790
247
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c1hlsgat7l27fQN8GwvHkdme7rQ4LqIQKFwwA8z7kqw,50824
247
248
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
248
249
  datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
249
250
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Dkig1SEfPxw6zZDeSulUYnqsu4WGCVPXypGPEUVriyU,44907
@@ -274,7 +275,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
274
275
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
275
276
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
276
277
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=tNpSHbPlLq-oFGbJsdkWY9kIaWmpjcZLWhj1CSewGGY,17981
277
- datahub/ingestion/source/dbt/dbt_common.py,sha256=ivB2cnm7zGSZzP36Etk54bSn-VmLjfAQK7wl4thwYrc,80499
278
+ datahub/ingestion/source/dbt/dbt_common.py,sha256=y4VINaQQ-WhEf-rICGLGi1U88nKmRdVQPmh88OJROWg,80536
278
279
  datahub/ingestion/source/dbt/dbt_core.py,sha256=m6cA9vVd4Nh2arc-T2_xeQoxvreRbMhTDIJuYsx3wHc,22722
279
280
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
280
281
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
@@ -403,7 +404,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
403
404
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
404
405
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
405
406
  datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
406
- datahub/ingestion/source/s3/source.py,sha256=VvpO1kSLllD6hugVIU5eNRGd42X6hy0GLedh_w4t9yA,47261
407
+ datahub/ingestion/source/s3/source.py,sha256=IE_K_HE_S7w8fpGPT8OptU5-VmwapntsI5PePv_wUQA,47412
407
408
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
408
409
  datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
409
410
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -452,7 +453,7 @@ datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2
452
453
  datahub/ingestion/source/sql/druid.py,sha256=lhO9CCOlHV-6LjBuAxAxtB9I1pvPtsGSdr63bz6_ilA,2837
453
454
  datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
454
455
  datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnXYZT7vZE,30306
455
- datahub/ingestion/source/sql/hive_metastore.py,sha256=n9WvJzGBYVwjSUKuAWQcYuRJttH81k2S2zjHuw8gvME,36074
456
+ datahub/ingestion/source/sql/hive_metastore.py,sha256=65DI0PeJMpGOEhTfo6cygeybgaFqi93yGnLLRy58ATo,36117
456
457
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
457
458
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
458
459
  datahub/ingestion/source/sql/oracle.py,sha256=tVP3AiZO97psM8O8UzBb9C7__s8y4fkyQbXBv3m1LU4,24503
@@ -467,7 +468,7 @@ datahub/ingestion/source/sql/sql_types.py,sha256=uuU3taVe4oCTXkqg1wSMGzTwVleRyUR
467
468
  datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
468
469
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
469
470
  datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
470
- datahub/ingestion/source/sql/teradata.py,sha256=ioGzrSpBsKmf2WIAxDofhyq3FU2xpa31bTZjrQhz6-M,32707
471
+ datahub/ingestion/source/sql/teradata.py,sha256=5lTNMOOOmrG71fTAyTs7iYFroeTiGIdATwXQmH6sWJg,32741
471
472
  datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhllRJFUzfU8,17895
472
473
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
473
474
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
@@ -491,7 +492,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
491
492
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
492
493
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
493
494
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
494
- datahub/ingestion/source/tableau/tableau.py,sha256=F2SaJVvhCSe3yvoAvdM5mPpMkdyfY3_zKQ0lLbA1g38,152539
495
+ datahub/ingestion/source/tableau/tableau.py,sha256=YjWyzYZ7hGeMxlqAUQNNJ9LJXlHrc5fHqf7lBWkr1aE,153184
495
496
  datahub/ingestion/source/tableau/tableau_common.py,sha256=3AUgXxTGOKM609xvcDrRItGXhUfuNYku2LFaj8z2Hg4,26936
496
497
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
497
498
  datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
@@ -566,12 +567,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
566
567
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
567
568
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
568
569
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
569
- datahub/metadata/_schema_classes.py,sha256=sbCVtCvb9xI_4p_Q7WOqae_HqHa1dpteT6gNn8dd3bk,975061
570
- datahub/metadata/schema.avsc,sha256=59Q_iZ204Yr-t66h0ESaY7YTnGzuc99A6g2_4cvk04k,736072
570
+ datahub/metadata/_schema_classes.py,sha256=GMLN7Ov0m39EWaXlziVgINqxhihZDzNy2BztBIR9YM8,975061
571
+ datahub/metadata/schema.avsc,sha256=sAPtgHSNJ1a126Vz7OjVIMKFjrrIG9f4cvRH6SkJ0jc,640786
571
572
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
572
573
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
573
574
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
574
- datahub/metadata/_urns/urn_defs.py,sha256=7wLzbGE-UnPZiJlCm8RcU3hROhHJ3QtwxJLGFLLjJlw,109984
575
+ datahub/metadata/_urns/urn_defs.py,sha256=MdAOrpRL4CL5VKSBk1I_DTVYz2_rYuIQ9tAEmAdIK4I,109984
575
576
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
576
577
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
577
578
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -885,7 +886,7 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
885
886
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
886
887
  datahub/sql_parsing/schema_resolver.py,sha256=8dYz6pC3Y35pXBn41grOE2dKkSiSeLHOz-N138uWQg4,10796
887
888
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
888
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=tRr5o9_dcMa8sfVyo6iOS2aHT2-gxC90ZXW5-QX7998,70121
889
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=XNZWjeaRhzaT92mzsJZGJfYaxJENsyp5dSHTmL81RIc,70130
889
890
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
890
891
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
891
892
  datahub/sql_parsing/sqlglot_lineage.py,sha256=42n8yCmCt25bOR8fmq4n_nNubn5kLuw_Mx36SFC9Nj0,47460
@@ -918,6 +919,7 @@ datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,11
918
919
  datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
919
920
  datahub/utilities/file_backed_collections.py,sha256=B3gQS0isgbCM9cH3DEBzpA4PVixtSwr5vJoNGmEG-fg,21960
920
921
  datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
922
+ datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
921
923
  datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
922
924
  datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
923
925
  datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
@@ -990,8 +992,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
990
992
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
991
993
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
992
994
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
993
- acryl_datahub-0.15.0.3rc1.dist-info/METADATA,sha256=_OcIbELm2dNMPpLWj69rGVeCXvqg-Nw9OcomoonUAq0,173250
994
- acryl_datahub-0.15.0.3rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
995
- acryl_datahub-0.15.0.3rc1.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
996
- acryl_datahub-0.15.0.3rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
997
- acryl_datahub-0.15.0.3rc1.dist-info/RECORD,,
995
+ acryl_datahub-0.15.0.4rc1.dist-info/METADATA,sha256=78vUNhiirHpceObDU05VhkgjPwGVXn0TOFHunHTcnV0,173250
996
+ acryl_datahub-0.15.0.4rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
997
+ acryl_datahub-0.15.0.4rc1.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
998
+ acryl_datahub-0.15.0.4rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
999
+ acryl_datahub-0.15.0.4rc1.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.3rc1"
6
+ __version__ = "0.15.0.4rc1"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -0,0 +1,89 @@
1
+ import logging
2
+ from typing import List
3
+
4
+ import click
5
+
6
+ from datahub.ingestion.graph.client import get_default_graph
7
+ from datahub.metadata.schema_classes import (
8
+ GlossaryTermAssociationClass,
9
+ OwnerClass,
10
+ OwnershipTypeClass,
11
+ TagAssociationClass,
12
+ )
13
+ from datahub.specific.dataset import DatasetPatchBuilder
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @click.group()
19
+ def container() -> None:
20
+ """A group of commands to interact with containers in DataHub."""
21
+ pass
22
+
23
+
24
+ def apply_association_to_container(
25
+ container_urn: str,
26
+ association_urn: str,
27
+ association_type: str,
28
+ ) -> None:
29
+ """
30
+ Common function to add either tags, terms, or owners to child datasets (for now).
31
+
32
+ Args:
33
+ container_urn: The URN of the container
34
+ association_urn: The URN of the tag, term, or user to apply
35
+ association_type: One of 'tag', 'term', or 'owner'
36
+ """
37
+ urns: List[str] = []
38
+ graph = get_default_graph()
39
+ logger.info(f"Using {graph}")
40
+ urns.extend(
41
+ graph.get_urns_by_filter(
42
+ container=container_urn, batch_size=1000, entity_types=["dataset"]
43
+ )
44
+ )
45
+
46
+ for urn in urns:
47
+ logger.info(f"Adding {association_type} {association_urn} to {urn}")
48
+ builder = DatasetPatchBuilder(urn)
49
+
50
+ if association_type == "tag":
51
+ patches = builder.add_tag(TagAssociationClass(association_urn)).build()
52
+ elif association_type == "term":
53
+ patches = builder.add_term(
54
+ GlossaryTermAssociationClass(association_urn)
55
+ ).build()
56
+ elif association_type == "owner":
57
+ patches = builder.add_owner(
58
+ OwnerClass(
59
+ owner=association_urn,
60
+ type=OwnershipTypeClass.TECHNICAL_OWNER,
61
+ )
62
+ ).build()
63
+
64
+ for mcp in patches:
65
+ graph.emit(mcp)
66
+
67
+
68
+ @container.command()
69
+ @click.option("--container-urn", required=True, type=str)
70
+ @click.option("--tag-urn", required=True, type=str)
71
+ def tag(container_urn: str, tag_urn: str) -> None:
72
+ """Add patch to add a tag to all datasets in a container"""
73
+ apply_association_to_container(container_urn, tag_urn, "tag")
74
+
75
+
76
+ @container.command()
77
+ @click.option("--container-urn", required=True, type=str)
78
+ @click.option("--term-urn", required=True, type=str)
79
+ def term(container_urn: str, term_urn: str) -> None:
80
+ """Add patch to add a term to all datasets in a container"""
81
+ apply_association_to_container(container_urn, term_urn, "term")
82
+
83
+
84
+ @container.command()
85
+ @click.option("--container-urn", required=True, type=str)
86
+ @click.option("--owner-id", required=True, type=str)
87
+ def owner(container_urn: str, owner_id: str) -> None:
88
+ """Add patch to add a owner to all datasets in a container"""
89
+ apply_association_to_container(container_urn, owner_id, "owner")
datahub/entrypoints.py CHANGED
@@ -14,6 +14,7 @@ from datahub.cli.cli_utils import (
14
14
  make_shim_command,
15
15
  )
16
16
  from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config
17
+ from datahub.cli.container_cli import container
17
18
  from datahub.cli.delete_cli import delete
18
19
  from datahub.cli.docker_cli import docker
19
20
  from datahub.cli.env_utils import get_boolean_env_variable
@@ -180,6 +181,7 @@ datahub.add_command(properties)
180
181
  datahub.add_command(forms)
181
182
  datahub.add_command(datacontract)
182
183
  datahub.add_command(assertions)
184
+ datahub.add_command(container)
183
185
 
184
186
  try:
185
187
  from datahub.cli.lite_cli import lite
@@ -1,11 +1,6 @@
1
1
  import logging
2
2
  import os
3
- from collections import defaultdict
4
- from typing import TYPE_CHECKING, Dict, Iterable, List, Optional
5
-
6
- if TYPE_CHECKING:
7
- from mypy_boto3_s3.service_resource import ObjectSummary
8
-
3
+ from typing import Optional
9
4
 
10
5
  S3_PREFIXES = ["s3://", "s3n://", "s3a://"]
11
6
 
@@ -73,21 +68,3 @@ def get_key_prefix(s3_uri: str) -> str:
73
68
  f"Not an S3 URI. Must start with one of the following prefixes: {str(S3_PREFIXES)}"
74
69
  )
75
70
  return strip_s3_prefix(s3_uri).split("/", maxsplit=1)[1]
76
-
77
-
78
- def group_s3_objects_by_dirname(
79
- s3_objects: Iterable["ObjectSummary"],
80
- ) -> Dict[str, List["ObjectSummary"]]:
81
- """
82
- Groups S3 objects by their directory name.
83
-
84
- If a s3_object in the root directory (i.e., s3://bucket/file.txt), it is grouped under '/'.
85
- """
86
- grouped_s3_objs = defaultdict(list)
87
- for obj in s3_objects:
88
- if "/" in obj.key:
89
- dirname = obj.key.rsplit("/", 1)[0]
90
- else:
91
- dirname = "/"
92
- grouped_s3_objs[dirname].append(obj)
93
- return grouped_s3_objs
@@ -2,7 +2,6 @@ import logging
2
2
  import re
3
3
  from base64 import b32decode
4
4
  from collections import defaultdict
5
- from itertools import groupby
6
5
  from typing import Dict, Iterable, List, Optional, Set, Type, Union, cast
7
6
 
8
7
  from google.cloud.bigquery.table import TableListItem
@@ -101,6 +100,7 @@ from datahub.metadata.schema_classes import (
101
100
  from datahub.metadata.urns import TagUrn
102
101
  from datahub.sql_parsing.schema_resolver import SchemaResolver
103
102
  from datahub.utilities.file_backed_collections import FileBackedDict
103
+ from datahub.utilities.groupby import groupby_unsorted
104
104
  from datahub.utilities.hive_schema_to_avro import (
105
105
  HiveColumnToAvroConverter,
106
106
  get_schema_fields_for_hive_column,
@@ -730,7 +730,7 @@ class BigQuerySchemaGenerator:
730
730
  foreign_keys: List[BigqueryTableConstraint] = list(
731
731
  filter(lambda x: x.type == "FOREIGN KEY", table.constraints)
732
732
  )
733
- for key, group in groupby(
733
+ for key, group in groupby_unsorted(
734
734
  foreign_keys,
735
735
  lambda x: f"{x.referenced_project_id}.{x.referenced_dataset}.{x.referenced_table_name}",
736
736
  ):
@@ -1,4 +1,3 @@
1
- import itertools
2
1
  import logging
3
2
  import re
4
3
  from abc import abstractmethod
@@ -111,6 +110,7 @@ from datahub.sql_parsing.sqlglot_utils import (
111
110
  parse_statements_and_pick,
112
111
  try_format_query,
113
112
  )
113
+ from datahub.utilities.groupby import groupby_unsorted
114
114
  from datahub.utilities.lossy_collections import LossyList
115
115
  from datahub.utilities.mapping import Constants, OperationProcessor
116
116
  from datahub.utilities.time import datetime_to_ts_millis
@@ -1929,7 +1929,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1929
1929
  else None
1930
1930
  ),
1931
1931
  )
1932
- for downstream, upstreams in itertools.groupby(
1932
+ for downstream, upstreams in groupby_unsorted(
1933
1933
  node.upstream_cll, lambda x: x.downstream_col
1934
1934
  )
1935
1935
  ]
@@ -40,7 +40,6 @@ from datahub.ingestion.source.aws.s3_util import (
40
40
  get_bucket_name,
41
41
  get_bucket_relative_path,
42
42
  get_key_prefix,
43
- group_s3_objects_by_dirname,
44
43
  strip_s3_prefix,
45
44
  )
46
45
  from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
@@ -73,6 +72,7 @@ from datahub.metadata.schema_classes import (
73
72
  _Aspect,
74
73
  )
75
74
  from datahub.telemetry import stats, telemetry
75
+ from datahub.utilities.groupby import groupby_unsorted
76
76
  from datahub.utilities.perf_timer import PerfTimer
77
77
 
78
78
  if TYPE_CHECKING:
@@ -868,7 +868,11 @@ class S3Source(StatefulIngestionSourceBase):
868
868
  """
869
869
  partitions: List[Folder] = []
870
870
  s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
871
- for key, group in group_s3_objects_by_dirname(s3_objects).items():
871
+ grouped_s3_objects_by_dirname = groupby_unsorted(
872
+ s3_objects,
873
+ key=lambda obj: obj.key.rsplit("/", 1)[0],
874
+ )
875
+ for key, group in grouped_s3_objects_by_dirname:
872
876
  file_size = 0
873
877
  creation_time = None
874
878
  modification_time = None
@@ -2,7 +2,6 @@ import base64
2
2
  import json
3
3
  import logging
4
4
  from collections import namedtuple
5
- from itertools import groupby
6
5
  from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
7
6
 
8
7
  from pydantic.dataclasses import dataclass
@@ -58,6 +57,7 @@ from datahub.metadata.schema_classes import (
58
57
  SubTypesClass,
59
58
  ViewPropertiesClass,
60
59
  )
60
+ from datahub.utilities.groupby import groupby_unsorted
61
61
  from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
62
62
  from datahub.utilities.str_enum import StrEnum
63
63
 
@@ -490,7 +490,7 @@ class HiveMetastoreSource(SQLAlchemySource):
490
490
 
491
491
  iter_res = self._alchemy_client.execute_query(statement)
492
492
 
493
- for key, group in groupby(iter_res, self._get_table_key):
493
+ for key, group in groupby_unsorted(iter_res, self._get_table_key):
494
494
  schema_name = (
495
495
  f"{db_name}.{key.schema}"
496
496
  if self.config.include_catalog_name_in_ids
@@ -647,7 +647,7 @@ class HiveMetastoreSource(SQLAlchemySource):
647
647
  )
648
648
 
649
649
  iter_res = self._alchemy_client.execute_query(statement)
650
- for key, group in groupby(iter_res, self._get_table_key):
650
+ for key, group in groupby_unsorted(iter_res, self._get_table_key):
651
651
  db_name = self.get_db_name(inspector)
652
652
 
653
653
  schema_name = (
@@ -3,7 +3,6 @@ from collections import defaultdict
3
3
  from dataclasses import dataclass
4
4
  from datetime import datetime
5
5
  from functools import lru_cache
6
- from itertools import groupby
7
6
  from typing import (
8
7
  Any,
9
8
  Dict,
@@ -59,6 +58,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
59
58
  from datahub.metadata.schema_classes import SchemaMetadataClass
60
59
  from datahub.sql_parsing.schema_resolver import SchemaResolver
61
60
  from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
61
+ from datahub.utilities.groupby import groupby_unsorted
62
62
 
63
63
  logger: logging.Logger = logging.getLogger(__name__)
64
64
 
@@ -286,7 +286,7 @@ def optimized_get_foreign_keys(self, connection, table_name, schema=None, **kw):
286
286
 
287
287
  # TODO: Check if there's a better way
288
288
  fk_dicts = list()
289
- for constraint_info, constraint_cols in groupby(res, grouper):
289
+ for constraint_info, constraint_cols in groupby_unsorted(res, grouper):
290
290
  fk_dict = {
291
291
  "name": str(constraint_info["name"]),
292
292
  "constrained_columns": list(),
@@ -1147,23 +1147,36 @@ class TableauSiteSource:
1147
1147
  )
1148
1148
  # Set parent project name
1149
1149
  for _project_id, project in all_project_map.items():
1150
- if (
1151
- project.parent_id is not None
1152
- and project.parent_id in all_project_map
1153
- ):
1150
+ if project.parent_id is None:
1151
+ continue
1152
+
1153
+ if project.parent_id in all_project_map:
1154
1154
  project.parent_name = all_project_map[project.parent_id].name
1155
+ else:
1156
+ self.report.warning(
1157
+ title="Incomplete project hierarchy",
1158
+ message="Project details missing. Child projects will be ingested without reference to their parent project. We generally need Site Administrator Explorer permissions to extract the complete project hierarchy.",
1159
+ context=f"Missing {project.parent_id}, referenced by {project.id} {project.project_name}",
1160
+ )
1161
+ project.parent_id = None
1162
+
1163
+ # Post-condition
1164
+ assert all(
1165
+ [
1166
+ ((project.parent_id is None) == (project.parent_name is None))
1167
+ and (
1168
+ project.parent_id is None
1169
+ or project.parent_id in all_project_map
1170
+ )
1171
+ for project in all_project_map.values()
1172
+ ]
1173
+ ), "Parent project id and name should be consistent"
1155
1174
 
1156
1175
  def set_project_path():
1157
1176
  def form_path(project_id: str) -> List[str]:
1158
1177
  cur_proj = all_project_map[project_id]
1159
1178
  ancestors = [cur_proj.name]
1160
1179
  while cur_proj.parent_id is not None:
1161
- if cur_proj.parent_id not in all_project_map:
1162
- self.report.warning(
1163
- "project-issue",
1164
- f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.",
1165
- )
1166
- break
1167
1180
  cur_proj = all_project_map[cur_proj.parent_id]
1168
1181
  ancestors = [cur_proj.name, *ancestors]
1169
1182
  return ancestors