acryl-datahub 1.2.0.11rc2__py3-none-any.whl → 1.2.0.11rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (26) hide show
  1. {acryl_datahub-1.2.0.11rc2.dist-info → acryl_datahub-1.2.0.11rc4.dist-info}/METADATA +2609 -2609
  2. {acryl_datahub-1.2.0.11rc2.dist-info → acryl_datahub-1.2.0.11rc4.dist-info}/RECORD +26 -26
  3. datahub/_version.py +1 -1
  4. datahub/cli/docker_cli.py +1 -1
  5. datahub/configuration/common.py +11 -0
  6. datahub/configuration/kafka.py +19 -1
  7. datahub/ingestion/autogenerated/capability_summary.json +2 -2
  8. datahub/ingestion/graph/client.py +7 -7
  9. datahub/ingestion/graph/filters.py +30 -11
  10. datahub/ingestion/source/aws/s3_boto_utils.py +4 -1
  11. datahub/ingestion/source/data_lake_common/path_spec.py +39 -2
  12. datahub/ingestion/source/s3/source.py +125 -164
  13. datahub/ingestion/source/snaplogic/snaplogic.py +4 -4
  14. datahub/ingestion/source/snaplogic/snaplogic_config.py +4 -4
  15. datahub/ingestion/source/snowflake/snowflake_queries.py +23 -7
  16. datahub/ingestion/source/snowflake/snowflake_utils.py +9 -9
  17. datahub/metadata/_internal_schema_classes.py +1 -1
  18. datahub/metadata/schema.avsc +1 -1
  19. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  20. datahub/metadata/schemas/MetadataChangeEvent.avsc +1 -1
  21. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  22. datahub/sql_parsing/sqlglot_lineage.py +6 -1
  23. {acryl_datahub-1.2.0.11rc2.dist-info → acryl_datahub-1.2.0.11rc4.dist-info}/WHEEL +0 -0
  24. {acryl_datahub-1.2.0.11rc2.dist-info → acryl_datahub-1.2.0.11rc4.dist-info}/entry_points.txt +0 -0
  25. {acryl_datahub-1.2.0.11rc2.dist-info → acryl_datahub-1.2.0.11rc4.dist-info}/licenses/LICENSE +0 -0
  26. {acryl_datahub-1.2.0.11rc2.dist-info → acryl_datahub-1.2.0.11rc4.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.11rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.11rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=wA3SrSgI7aUwEw5r0jMdQQHnFvzLaVy7PuHAjLJoDJI,324
4
+ datahub/_version.py,sha256=_VILFvBE67asqk9L6Hqvhg65t1Q8zAbMrsKHYLWBj4I,324
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -73,7 +73,7 @@ datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4
73
73
  datahub/cli/container_cli.py,sha256=D0zWP3_3aww8_RTkMugOoOlILz3dPJ0TE9asQDLCm6E,1697
74
74
  datahub/cli/delete_cli.py,sha256=0YJeWuXPGY0kbSn1AXK1-8SfCGBxb78ZbO53RAgyjQg,26515
75
75
  datahub/cli/docker_check.py,sha256=CE6YNdX4XsXT8GYiQOLbOLi2x_-kK2aQqmTl0ZP0Uu4,12976
76
- datahub/cli/docker_cli.py,sha256=aAZIF3oblTSXzQVSanbphd3dCvdypHCyuMt1u3_QZ9M,33067
76
+ datahub/cli/docker_cli.py,sha256=3pzoe_qbWLhG27-M2wBU5MLLJM0xPlmC-EyoueoQEL4,33091
77
77
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
78
78
  datahub/cli/exists_cli.py,sha256=1cUYNh3GqNgVHWTrfMRGJoo9tFZNXcLetMaDbLaig6o,1233
79
79
  datahub/cli/get_cli.py,sha256=OUnXomfpki0uWbNqXoSvQDyQJcKjNLgg77e8jVvynKY,2327
@@ -100,14 +100,14 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=rgx8fhI7WYq8QLwIttkMysX7
100
100
  datahub/cli/specific/user_cli.py,sha256=HaASGg25b82Q18hKyOn98gPoQfmj1gW0utDMs0iR5WY,1897
101
101
  datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
102
102
  datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
103
- datahub/configuration/common.py,sha256=0jhfJTlMrNPEkSWwLbbcv4F4cB_wok51ahX1wQzwvz8,11461
103
+ datahub/configuration/common.py,sha256=HvMrc-q2ZocbAHj5KgBHUaQOjdWLvk_wRGDkTUVbhtU,11742
104
104
  datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
105
105
  datahub/configuration/connection_resolver.py,sha256=UsnV1_X8yivOykiifllkoKRn19eO6j_NTBWHC2Ob5Xg,1625
106
106
  datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
107
107
  datahub/configuration/git.py,sha256=OiqF2w6d9YWqrlHv_2jZ-cRiL-g0SeS4ecVtQwXWpnc,6415
108
108
  datahub/configuration/import_resolver.py,sha256=asGu7t6SLdasbV_rlldnXVWhU7tdOOiUCbQt7xKFraw,491
109
109
  datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
110
- datahub/configuration/kafka.py,sha256=qj4qNBzeXeS-mUtf441B1jj_22wPO6Eho_stErMD-GY,2586
110
+ datahub/configuration/kafka.py,sha256=-Bqy0mhlUG6EyWwbaZv-sPXa-MVnOWhn96A3gsHry2w,3167
111
111
  datahub/configuration/kafka_consumer_config.py,sha256=LivsObTt9yC3WoGnslJbF_x4ojfNdxMIMEhb8vvJfcA,2133
112
112
  datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
113
113
  datahub/configuration/pydantic_migration_helpers.py,sha256=NKoQUS2SM4FFdBxmPqgzJdYT1X-OXn-PrlgdfIDZpX0,1397
@@ -159,7 +159,7 @@ datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
159
159
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
160
160
  datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=0BwkpLhORbsiTHq0g_N_1cVVoZYdLR3qz02mNmsV9-M,4444
161
161
  datahub/ingestion/autogenerated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- datahub/ingestion/autogenerated/capability_summary.json,sha256=cSA7jRLLphwkM9C9LK6HdbWRNM0s2febr-4Rh69vQss,111506
162
+ datahub/ingestion/autogenerated/capability_summary.json,sha256=9Ns5gYfVq0LYogaYtb0ioDPfu8SVhftiq9R7l0irQwg,111506
163
163
  datahub/ingestion/autogenerated/lineage.json,sha256=8BdZF-5V5kJbX4mfFav8Zg-jHjzfkAEGk-pu1atLN4I,10029
164
164
  datahub/ingestion/autogenerated/lineage_helper.py,sha256=I_k1pZSCCCjDbUVifPTfy6fkmV8jqdVhbirE8EkpmxI,4748
165
165
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,11 +181,11 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
181
181
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
182
182
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
183
183
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
- datahub/ingestion/graph/client.py,sha256=Orf9BHyTeHufB_LRIC-LyJj1kii73tyWTN_I1HJ6-_k,74955
184
+ datahub/ingestion/graph/client.py,sha256=W42py2iVwAmxPw8P2BQmOVBrwRXvEtpV_PIybs2RXy4,75077
185
185
  datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
186
186
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
187
187
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
188
- datahub/ingestion/graph/filters.py,sha256=OfjKhuNRHHLvhHk6Tfwd2IbMLPbbIq4VUyHaSpcDvKk,8664
188
+ datahub/ingestion/graph/filters.py,sha256=WMZpLGjuaOWyScrEJHqOtR_nw2DR23s9sJnigVpZTDI,9461
189
189
  datahub/ingestion/graph/links.py,sha256=UwWSdx-j0dPttfJOjfTf4ZmlO7iIsRz5p3nIsqGVHUA,2169
190
190
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
191
191
  datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=w-OvKG4Xm6s91czFxkhtawskMOReHyso7WjPlGGY1Rw,10073
@@ -241,7 +241,7 @@ datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
241
241
  datahub/ingestion/source/aws/aws_common.py,sha256=DN49pHY-AOVHqI3smF9cVPwvPmh5UqOF4ga-wga_gPc,17928
242
242
  datahub/ingestion/source/aws/glue.py,sha256=dUaMWcI5Ed-TzbbSrF6suT4L1vcRHoHfFCdTvAINc4w,67423
243
243
  datahub/ingestion/source/aws/platform_resource_repository.py,sha256=0eUfGy1FbaBltCSNTtXyLrkrdqTc1KkTgDJB1Gd-Ydk,853
244
- datahub/ingestion/source/aws/s3_boto_utils.py,sha256=7JqDO4c013uGUbcRgZkR0lIrG_HfVZ0DTdL1WvyfkcE,6986
244
+ datahub/ingestion/source/aws/s3_boto_utils.py,sha256=rGlWAkKZpkeA1_wMvcJvSDvobvduShszowU-KcrQudg,7011
245
245
  datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
246
246
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
247
247
  datahub/ingestion/source/aws/tag_entities.py,sha256=Y9clf_0bAXd0lhINVcPcNZqM3TjegHTZY58t6qRO2rQ,10732
@@ -291,7 +291,7 @@ datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-
291
291
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
292
292
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=IYr5y8vy_6CtMtITqzn6OqovzH1cpe1i30M-75PouXo,7768
293
293
  datahub/ingestion/source/data_lake_common/object_store.py,sha256=i9Hgb8Ww23QD_jEjzj_2qxA8Nr56krnZfo1qyOWmH9M,23608
294
- datahub/ingestion/source/data_lake_common/path_spec.py,sha256=M2ZkLhMZsm3MrnYUjC7lG4fBQhLycYk46pyjqaKww8k,23719
294
+ datahub/ingestion/source/data_lake_common/path_spec.py,sha256=T6KwPMDdAC37wpe_KvVSf5KgOvJLuu6JBZQgKccObWQ,25139
295
295
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
296
296
  datahub/ingestion/source/datahub/config.py,sha256=zS-OgE6n-OnXHeCo5-OYx0i6E828H70JyHQNqokwp6A,5256
297
297
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
@@ -461,7 +461,7 @@ datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pL
461
461
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
462
462
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
463
463
  datahub/ingestion/source/s3/report.py,sha256=9Ej1UCChw963UpGw1-7asi5vFrOM232gfgG8bRdKPp0,667
464
- datahub/ingestion/source/s3/source.py,sha256=dADORK79xvoYvtnyO6THdRJFw97GovvimVd56GnMtKo,60481
464
+ datahub/ingestion/source/s3/source.py,sha256=rokdUHb2rPXc6JbHwq5GDYdK4COlhm6_E84jHNk-EAE,58063
465
465
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
466
466
  datahub/ingestion/source/sac/sac.py,sha256=0s_JxHGOhit3Wvgbg7qQi-Z9j9_TgBX_I1yOR3L6-rA,30243
467
467
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -483,8 +483,8 @@ datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrp
483
483
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
484
484
  datahub/ingestion/source/slack/slack.py,sha256=JWanUfzFGynV_PWcH0YzJIbRcmL880DA6dEI9QW-QiQ,25800
485
485
  datahub/ingestion/source/snaplogic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
486
- datahub/ingestion/source/snaplogic/snaplogic.py,sha256=VoCRSBS0kE4RAsn4hOJgQy3uadFvelLIOv3I0_gne-w,13540
487
- datahub/ingestion/source/snaplogic/snaplogic_config.py,sha256=oxuNBfAHWMHoOvh52gifOFcBOSN8aaPpFC8QgmgXwWI,1445
486
+ datahub/ingestion/source/snaplogic/snaplogic.py,sha256=T72zCO97xbQkC6DLq6Ka_h_kI5ZQZG6YRFJAHl_N5c0,13540
487
+ datahub/ingestion/source/snaplogic/snaplogic_config.py,sha256=lYXKeaD0lvLoZ8eJmPNHhuLlOnCPbelZMyihgUXbxD8,1445
488
488
  datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py,sha256=IzCYwXLN6IfRFqns7XXtZxzQjjHC-XNTGXgEMeSfW8U,3776
489
489
  datahub/ingestion/source/snaplogic/snaplogic_parser.py,sha256=q5dRfWtOpSELPZrpyLbszOD49MJBXNbKgnITLMPiyGI,5783
490
490
  datahub/ingestion/source/snaplogic/snaplogic_utils.py,sha256=SVrV9ZXVE2cKKPfoVsxjBN2fIcpYbs2PBLiyQIcJMVQ,1068
@@ -498,7 +498,7 @@ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=uSHdPqigRzjeNx
498
498
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
499
499
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=nam-bYV6wL9LfR-Tt50Qe_Kea61IuWS-lLu5__aDxk8,21853
500
500
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=PmQi-qDlRhdJ-PsJ7x-EScIiswWRAxDDOKHydvN3mTY,7404
501
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=Nb7zn531KmCLgKwJgSjyst6vV-QLxFWk4Z8aHqMcCwk,44866
501
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=lAMA--X3nbWFdNs1DTHNm7crctB3RilX_pB-zy47piI,45528
502
502
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=wLDaYZrWJ0794KKn69rB_QF0_8Bzu5l_7L6mD77KVc4,40469
503
503
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=GPuQTOaR8SCqMBme6Q55vX4zzT4m3ELmp83rp_grQA0,6808
504
504
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=Dh_p0WpSaWOFMCsBgd3FpkQ_65k3QPq2VUnapp2VKuY,41431
@@ -507,7 +507,7 @@ datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=maZyFkfrbVogEFM0tT
507
507
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=5Li4H8KuS4qBKR98L2P-JZI79UXsOjcAFxZZyio9NU0,5787
508
508
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
509
509
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=mM0v9b4PHRJAT-SdRids3wdzc5O96gWCCww3e42itV8,24982
510
- datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=dmgpwApayUIevyn6l55mSWBXzYK6nPs8ItwcrvdXdIA,15007
510
+ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=1c1YNmAxxOwAKy8IEFqVdp6x-EvCYJkN6UZ_RwUUVv0,15062
511
511
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=c6wg_s97Hrckqi0BgAbmnnRQRDDda1-BHFLlnRx0xuw,35753
512
512
  datahub/ingestion/source/snowflake/stored_proc_lineage.py,sha256=rOb78iHiWiK8v8WdVs1xDwVut4Y0OHmszej6IopQfCo,5341
513
513
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -642,8 +642,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
642
642
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
643
643
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
644
644
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
645
- datahub/metadata/_internal_schema_classes.py,sha256=BBxTUOoQF1h-WXtAHGQM-Rh3Mhirx5nJBPserPZKFeY,1069945
646
- datahub/metadata/schema.avsc,sha256=ykx9zsPrLioYBg84eMi4NGyev4POl6BkyAYFPXAjvMQ,771886
645
+ datahub/metadata/_internal_schema_classes.py,sha256=FYFjU83woitdWMNTk-fZhtCfuUVLv05-syASUGMVwGM,1069868
646
+ datahub/metadata/schema.avsc,sha256=AS5vIh4JGmKVeY_b0fNjUihKBmACH0voLdyxuDqqAXo,771809
647
647
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
648
648
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
649
649
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -755,7 +755,7 @@ datahub/metadata/schemas/CorpGroupEditableInfo.avsc,sha256=sItWMAGfQdYwmF5xLzMnY
755
755
  datahub/metadata/schemas/CorpGroupInfo.avsc,sha256=G83lndR7C6WPfccFg7qFE5Ely5vrdxC8x7zJB3fW3Sw,4740
756
756
  datahub/metadata/schemas/CorpGroupKey.avsc,sha256=B_RMHAFF_nd86qtO2p1slAZnxruCmBM7DUfILAU_UaI,953
757
757
  datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lPvtPsOWKffjSOpfzicl-o,980
758
- datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=6IrqWidbHP7mRryfVlWAQU0JS34THHTM8_aIKWqClUE,3843
758
+ datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=giomIxglSQGL-v1b_phavL-aIdA-qlMGoLb7I3wNSZg,3766
759
759
  datahub/metadata/schemas/CorpUserInfo.avsc,sha256=oObOza-5FLjZyCjj0FN4MNV1DodgTwJSV4APduAggjk,3955
760
760
  datahub/metadata/schemas/CorpUserKey.avsc,sha256=01sbbdr8G-ZP1yc2UfY3gR-YN6b7AvDbCbNpZJ-02J4,1025
761
761
  datahub/metadata/schemas/CorpUserSettings.avsc,sha256=38_ZXslpkgJC1PlRxPnas9cHBi5dQYUcsl1X4WSn5Gc,6806
@@ -902,7 +902,7 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
902
902
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
903
903
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
904
904
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
905
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=bs_f_quv7oDsJ1vgkkQLaP6rLks2lW2Fq6fE-_fTlh0,381341
905
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=xEHSnOcZYlUEDbBZuNxxF9gLSmjiol1Q8bBiC9w81qA,381264
906
906
  datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=soCmgrcEBE5yS-mQIm-RIefhb74ONj9Fqayxa0-59KE,13254
907
907
  datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=pT14vUmpj7VJ8hinQ0pcCUtRKx6RAGHWh1eJixkqaE8,12647
908
908
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
@@ -1001,7 +1001,7 @@ datahub/specific/aspect_helpers/custom_properties.py,sha256=a7w4Ml3mEFX31TwZLEpP
1001
1001
  datahub/specific/aspect_helpers/fine_grained_lineage.py,sha256=gXpP26gvNaoOqJCxunaO4ZHicmqC1h-hGFq5G87bGTI,2731
1002
1002
  datahub/specific/aspect_helpers/ownership.py,sha256=rNYiJSqb_FJQhFRSIQScg4mfxgYhPvjeaYyvutY6CN0,1861
1003
1003
  datahub/specific/aspect_helpers/siblings.py,sha256=91S7ocBbppPXCqWPTrl9n-N191t9RvMrny0_Nm5tMSU,2126
1004
- datahub/specific/aspect_helpers/structured_properties.py,sha256=EVnFS025r-PG5PAC7VENVJO-JvDYif2VeYonsC3Z8m8,2255
1004
+ datahub/specific/aspect_helpers/structured_properties.py,sha256=LPFd-Vk0BlCpCgcA_0djr19kH9O-HXPh8jXtGtr9TpA,3175
1005
1005
  datahub/specific/aspect_helpers/tags.py,sha256=YHcKfRaIvv12wcmfMc8-Dk6gf6xIvJedkn451uBuz-Y,1254
1006
1006
  datahub/specific/aspect_helpers/terms.py,sha256=l8xoOLQ2RsIl3UnKhLisQNwrGTFIPrzfvP4zjH-AhwI,1352
1007
1007
  datahub/sql_parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1015,7 +1015,7 @@ datahub/sql_parsing/split_statements.py,sha256=doCACwQ_Fx6m1djo7t3BnU9ZHki4EV2KJ
1015
1015
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=kxxSVe3YNoz_T2OG6-F30ZuXNSXuBZ-E54RqObo6qTI,72323
1016
1016
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
1017
1017
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
1018
- datahub/sql_parsing/sqlglot_lineage.py,sha256=05ryqMkFK5_KDvr-ytXb1FyiC-SzaFB9_USnbL36LEg,66316
1018
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=Zli78TtN8ow-uqNl_oloWT642a5bDGn22-FF0O0tqrs,66658
1019
1019
  datahub/sql_parsing/sqlglot_utils.py,sha256=zH8V9tAcSVO7Y8I3sIKPhs0D_9HzdNBlranBDmk1NB4,15454
1020
1020
  datahub/sql_parsing/tool_meta_extractor.py,sha256=5JsLPcKjuXSrPGxNIhRvX72dFPmlV33-hyvhJwlWxCY,7543
1021
1021
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1123,8 +1123,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1123
1123
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1124
1124
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1125
1125
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1126
- acryl_datahub-1.2.0.11rc2.dist-info/METADATA,sha256=7a2zx4CgdIqVcUawhQNKvcgQmDg6sxLzleaTZ1mnWkA,184162
1127
- acryl_datahub-1.2.0.11rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1128
- acryl_datahub-1.2.0.11rc2.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1129
- acryl_datahub-1.2.0.11rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1130
- acryl_datahub-1.2.0.11rc2.dist-info/RECORD,,
1126
+ acryl_datahub-1.2.0.11rc4.dist-info/METADATA,sha256=SxpOrYaLX-58eO685hRF4fMeo3zkiDS0yFIVNELypnI,184162
1127
+ acryl_datahub-1.2.0.11rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1128
+ acryl_datahub-1.2.0.11rc4.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1129
+ acryl_datahub-1.2.0.11rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1130
+ acryl_datahub-1.2.0.11rc4.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.11rc2"
3
+ __version__ = "1.2.0.11rc4"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
datahub/cli/docker_cli.py CHANGED
@@ -363,7 +363,7 @@ EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
363
363
  ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
364
364
  GRAPH_SERVICE_IMPL=elasticsearch
365
365
  KAFKA_BOOTSTRAP_SERVER=broker:29092
366
- KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/
366
+ KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080${DATAHUB_GMS_BASE_PATH}/schema-registry/api/
367
367
  SCHEMA_REGISTRY_TYPE=INTERNAL
368
368
 
369
369
  ELASTICSEARCH_HOST=search
@@ -169,6 +169,17 @@ class PermissiveConfigModel(ConfigModel):
169
169
  extra = Extra.allow
170
170
 
171
171
 
172
+ class ConnectionModel(BaseModel):
173
+ """Represents the config associated with a connection"""
174
+
175
+ class Config:
176
+ if PYDANTIC_VERSION_2: # noqa: SIM108
177
+ extra = "allow"
178
+ else:
179
+ extra = Extra.allow
180
+ underscore_attrs_are_private = True
181
+
182
+
172
183
  class TransformerSemantics(ConfigEnum):
173
184
  """Describes semantics for aspect changes"""
174
185
 
@@ -1,3 +1,5 @@
1
+ import os
2
+
1
3
  from pydantic import Field, validator
2
4
 
3
5
  from datahub.configuration.common import ConfigModel, ConfigurationError
@@ -5,12 +7,28 @@ from datahub.configuration.kafka_consumer_config import CallableConsumerConfig
5
7
  from datahub.configuration.validate_host_port import validate_host_port
6
8
 
7
9
 
10
+ def _get_schema_registry_url() -> str:
11
+ """Get schema registry URL with proper base path handling."""
12
+ explicit_url = os.getenv("KAFKA_SCHEMAREGISTRY_URL")
13
+ if explicit_url:
14
+ return explicit_url
15
+
16
+ base_path = os.getenv("DATAHUB_GMS_BASE_PATH", "")
17
+ if base_path in ("/", ""):
18
+ base_path = ""
19
+
20
+ return f"http://localhost:8080{base_path}/schema-registry/api/"
21
+
22
+
8
23
  class _KafkaConnectionConfig(ConfigModel):
9
24
  # bootstrap servers
10
25
  bootstrap: str = "localhost:9092"
11
26
 
12
27
  # schema registry location
13
- schema_registry_url: str = "http://localhost:8080/schema-registry/api/"
28
+ schema_registry_url: str = Field(
29
+ default_factory=_get_schema_registry_url,
30
+ description="Schema registry URL. Can be overridden with KAFKA_SCHEMAREGISTRY_URL environment variable, or will use DATAHUB_GMS_BASE_PATH if not set.",
31
+ )
14
32
 
15
33
  schema_registry_config: dict = Field(
16
34
  default_factory=dict,
@@ -2984,7 +2984,7 @@
2984
2984
  },
2985
2985
  {
2986
2986
  "capability": "PLATFORM_INSTANCE",
2987
- "description": "Snaplogic does not support platform instances",
2987
+ "description": "SnapLogic does not support platform instances",
2988
2988
  "subtype_modifier": null,
2989
2989
  "supported": false
2990
2990
  },
@@ -2997,7 +2997,7 @@
2997
2997
  ],
2998
2998
  "classname": "datahub.ingestion.source.snaplogic.snaplogic.SnaplogicSource",
2999
2999
  "platform_id": "snaplogic",
3000
- "platform_name": "Snaplogic",
3000
+ "platform_name": "SnapLogic",
3001
3001
  "support_status": "TESTING"
3002
3002
  },
3003
3003
  "snowflake": {
@@ -207,7 +207,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
207
207
  Note: Only supported with DataHub Cloud.
208
208
  """
209
209
 
210
- if not self.server_config:
210
+ if not hasattr(self, "server_config") or not self.server_config:
211
211
  self.test_connection()
212
212
 
213
213
  base_url = self.server_config.raw_config.get("baseUrl")
@@ -838,11 +838,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
838
838
  def _bulk_fetch_schema_info_by_filter(
839
839
  self,
840
840
  *,
841
- platform: Optional[str] = None,
841
+ platform: Union[None, str, List[str]] = None,
842
842
  platform_instance: Optional[str] = None,
843
843
  env: Optional[str] = None,
844
844
  query: Optional[str] = None,
845
- container: Optional[str] = None,
845
+ container: Union[None, str, List[str]] = None,
846
846
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
847
847
  batch_size: int = 100,
848
848
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
@@ -914,11 +914,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
914
914
  self,
915
915
  *,
916
916
  entity_types: Optional[Sequence[str]] = None,
917
- platform: Optional[str] = None,
917
+ platform: Union[None, str, List[str]] = None,
918
918
  platform_instance: Optional[str] = None,
919
919
  env: Optional[str] = None,
920
920
  query: Optional[str] = None,
921
- container: Optional[str] = None,
921
+ container: Union[None, str, List[str]] = None,
922
922
  status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
923
923
  batch_size: int = 5000,
924
924
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
@@ -1018,11 +1018,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1018
1018
  self,
1019
1019
  *,
1020
1020
  entity_types: Optional[List[str]] = None,
1021
- platform: Optional[str] = None,
1021
+ platform: Union[None, str, List[str]] = None,
1022
1022
  platform_instance: Optional[str] = None,
1023
1023
  env: Optional[str] = None,
1024
1024
  query: Optional[str] = None,
1025
- container: Optional[str] = None,
1025
+ container: Union[None, str, List[str]] = None,
1026
1026
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
1027
1027
  batch_size: int = 5000,
1028
1028
  extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
@@ -76,11 +76,21 @@ class RemovedStatusFilter(enum.Enum):
76
76
  """Search only soft-deleted entities."""
77
77
 
78
78
 
79
+ def _validate_or_filter_structure(
80
+ or_filters: List[Dict[str, List[SearchFilterRule]]],
81
+ ) -> None:
82
+ for filter_list in or_filters:
83
+ if "and" not in filter_list:
84
+ raise ValueError(f"Invalid or filter: {filter_list}")
85
+ if not isinstance(filter_list["and"], list):
86
+ raise ValueError(f"Invalid or filter: {filter_list}")
87
+
88
+
79
89
  def generate_filter(
80
- platform: Optional[str],
90
+ platform: Union[None, str, List[str]],
81
91
  platform_instance: Optional[str],
82
92
  env: Optional[str],
83
- container: Optional[str],
93
+ container: Union[None, str, List[str]],
84
94
  status: Optional[RemovedStatusFilter],
85
95
  extra_filters: Optional[List[RawSearchFilterRule]],
86
96
  extra_or_filters: Optional[RawSearchFilter] = None,
@@ -93,8 +103,7 @@ def generate_filter(
93
103
  :param container: The container to filter by.
94
104
  :param status: The status to filter by.
95
105
  :param extra_filters: Extra AND filters to apply.
96
- :param extra_or_filters: Extra OR filters to apply. These are combined with
97
- the AND filters using an OR at the top level.
106
+ :param extra_or_filters: Extra OR filters to apply. These are combined with the AND filters using an OR at the top level.
98
107
  """
99
108
  and_filters: List[RawSearchFilterRule] = []
100
109
 
@@ -218,23 +227,31 @@ def _get_status_filter(status: RemovedStatusFilter) -> Optional[SearchFilterRule
218
227
  raise ValueError(f"Invalid status filter: {status}")
219
228
 
220
229
 
221
- def _get_container_filter(container: str) -> SearchFilterRule:
230
+ def _get_container_filter(container: Union[str, List[str]]) -> SearchFilterRule:
231
+ if not isinstance(container, list):
232
+ container = [container]
233
+
222
234
  # Warn if container is not a fully qualified urn.
223
235
  # TODO: Change this once we have a first-class container urn type.
224
- if guess_entity_type(container) != "container":
225
- raise ValueError(f"Invalid container urn: {container}")
236
+ for cont in container:
237
+ if guess_entity_type(cont) != "container":
238
+ raise ValueError(f"Invalid container urn: {cont}")
226
239
 
227
240
  return SearchFilterRule(
228
241
  field="browsePathV2",
229
- values=[container],
242
+ values=container,
230
243
  condition="CONTAIN",
231
244
  )
232
245
 
233
246
 
234
247
  def _get_platform_instance_filter(
235
- platform: Optional[str], platform_instance: str
248
+ platform: Union[None, str, List[str]], platform_instance: str
236
249
  ) -> SearchFilterRule:
237
250
  if platform:
251
+ if isinstance(platform, list):
252
+ raise ValueError(
253
+ "Platform instance filter cannot be combined with a multi-value platform filter."
254
+ )
238
255
  # Massage the platform instance into a fully qualified urn, if necessary.
239
256
  platform_instance = make_dataplatform_instance_urn(platform, platform_instance)
240
257
 
@@ -250,9 +267,11 @@ def _get_platform_instance_filter(
250
267
  )
251
268
 
252
269
 
253
- def _get_platform_filter(platform: str) -> SearchFilterRule:
270
+ def _get_platform_filter(platform: Union[str, List[str]]) -> SearchFilterRule:
271
+ if not isinstance(platform, list):
272
+ platform = [platform]
254
273
  return SearchFilterRule(
255
274
  field="platform.keyword",
256
275
  condition="EQUAL",
257
- values=[make_data_platform_urn(platform)],
276
+ values=[make_data_platform_urn(plt) for plt in platform],
258
277
  )
@@ -126,7 +126,10 @@ def list_folders_path(
126
126
 
127
127
 
128
128
  def list_objects_recursive_path(
129
- s3_uri: str, *, startswith: str, aws_config: Optional[AwsConnectionConfig]
129
+ s3_uri: str,
130
+ *,
131
+ startswith: str = "",
132
+ aws_config: Optional[AwsConnectionConfig] = None,
130
133
  ) -> Iterable["ObjectSummary"]:
131
134
  """
132
135
  Given an S3 URI to a folder or bucket, return all objects underneath that URI, optionally
@@ -194,6 +194,9 @@ class PathSpec(ConfigModel):
194
194
  return True
195
195
 
196
196
  def dir_allowed(self, path: str) -> bool:
197
+ if not path.endswith("/"):
198
+ path += "/"
199
+
197
200
  if self.glob_include.endswith("**"):
198
201
  return self.allowed(path, ignore_ext=True)
199
202
 
@@ -221,9 +224,8 @@ class PathSpec(ConfigModel):
221
224
  ):
222
225
  return False
223
226
 
224
- file_name_pattern = self.include.rsplit("/", 1)[1]
225
227
  table_name, _ = self.extract_table_name_and_path(
226
- os.path.join(path, file_name_pattern)
228
+ path + self.get_remaining_glob_include(path)
227
229
  )
228
230
  if not self.tables_filter_pattern.allowed(table_name):
229
231
  return False
@@ -571,3 +573,38 @@ class PathSpec(ConfigModel):
571
573
  "/".join(path.split("/")[:depth]) + "/" + parsed_vars.named["table"]
572
574
  )
573
575
  return self._extract_table_name(parsed_vars.named), table_path
576
+
577
+ def has_correct_number_of_directory_components(self, path: str) -> bool:
578
+ """
579
+ Checks that a given path has the same number of components as the path spec
580
+ has directory components. Useful for checking if a path needs to descend further
581
+ into child directories or if the source can switch into file listing mode. If the
582
+ glob form of the path spec ends in "**", this always returns False.
583
+ """
584
+ if self.glob_include.endswith("**"):
585
+ return False
586
+
587
+ if not path.endswith("/"):
588
+ path += "/"
589
+ path_slash = path.count("/")
590
+ glob_slash = self.glob_include.count("/")
591
+ if path_slash == glob_slash:
592
+ return True
593
+ return False
594
+
595
+ def get_remaining_glob_include(self, path: str) -> str:
596
+ """
597
+ Given a path, return the remaining components of the path spec (if any
598
+ exist) in glob form. If the glob form of the path spec ends in "**", this
599
+ function's return value also always ends in "**", regardless of how
600
+ many components the input path has.
601
+ """
602
+ if not path.endswith("/"):
603
+ path += "/"
604
+ path_slash = path.count("/")
605
+ remainder = "/".join(self.glob_include.split("/")[path_slash:])
606
+ if remainder:
607
+ return remainder
608
+ if self.glob_include.endswith("**"):
609
+ return "**"
610
+ return ""