acryl-datahub 1.2.0.1__py3-none-any.whl → 1.2.0.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (27) hide show
  1. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/METADATA +2605 -2605
  2. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/RECORD +27 -27
  3. datahub/_version.py +1 -1
  4. datahub/emitter/rest_emitter.py +3 -1
  5. datahub/ingestion/source/abs/source.py +5 -29
  6. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  7. datahub/ingestion/source/dbt/dbt_common.py +69 -2
  8. datahub/ingestion/source/looker/looker_common.py +40 -4
  9. datahub/ingestion/source/s3/source.py +5 -33
  10. datahub/ingestion/source/sql/postgres.py +190 -1
  11. datahub/ingestion/source/sql_queries.py +112 -77
  12. datahub/metadata/_internal_schema_classes.py +81 -0
  13. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
  14. datahub/metadata/schema.avsc +60 -0
  15. datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
  16. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +33 -0
  17. datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
  18. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  19. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  20. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  21. datahub/sdk/search_filters.py +51 -2
  22. datahub/sql_parsing/sql_parsing_aggregator.py +1 -0
  23. datahub/upgrade/upgrade.py +5 -3
  24. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/WHEEL +0 -0
  25. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/entry_points.txt +0 -0
  26. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/licenses/LICENSE +0 -0
  27. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.2rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=Fjtqaraj595j7RFLjvpnFYKEC5JdGREvvHD_hnrl5ek,320
4
+ datahub/_version.py,sha256=1ST8gBZ8wWcQFJrAhpY_re_rMWhON0s6EftssFUvWKw,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,7 +132,7 @@ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxga
132
132
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
133
133
  datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
134
134
  datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
135
- datahub/emitter/rest_emitter.py,sha256=emL16tSbG7j7EzTlK1H6gew7iAUxYwR5za_r8eWo1Qs,38723
135
+ datahub/emitter/rest_emitter.py,sha256=lMqjtDyPOArIrNgL47kq1cbB4xiR17CHfRRxpGYriDY,38793
136
136
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
137
137
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
138
138
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -227,14 +227,14 @@ datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgM
227
227
  datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
228
228
  datahub/ingestion/source/salesforce.py,sha256=Pa_w1XszxFd8fyhpSWOfc2nOnevHwwstIvnRrQT4R9M,40584
229
229
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
230
- datahub/ingestion/source/sql_queries.py,sha256=iQxiRpqbseR7pQ9rswqVjdYusCNAtkU5WAMUWPJrQsI,9505
230
+ datahub/ingestion/source/sql_queries.py,sha256=4n8lTmfNtcH_qkGvnmPwidWCzfnjtWiBcd4AzAJGvbo,11810
231
231
  datahub/ingestion/source/superset.py,sha256=oi7F2jlvkVr9ItJ_r1Jm4bYfXHYu4vPAFPMPaGJKB84,50608
232
232
  datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
233
233
  datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
234
234
  datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
235
235
  datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
236
236
  datahub/ingestion/source/abs/report.py,sha256=CkRjsNn0Pab-ZPllxz3IUJI_r3x0T6urJePa_hJKi5U,586
237
- datahub/ingestion/source/abs/source.py,sha256=FvncxJG2ZjQ4Q5JeYo4zVYuqEBsfvHOP4ZSjy_CKG90,24826
237
+ datahub/ingestion/source/abs/source.py,sha256=uqMpvHmje3VPJRujW7l3Zmb0t3yFl91remXvznLuMO0,23783
238
238
  datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
239
239
  datahub/ingestion/source/apply/datahub_apply.py,sha256=xTD-Iq3UHhxcz61RwNuI2kJjRrnQEfZFSgvS1X6loV4,7703
240
240
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -288,7 +288,7 @@ datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNCl
288
288
  datahub/ingestion/source/common/subtypes.py,sha256=x8dv9SN23Y8t5oq7SUuQWUPAH8KM3KfTT27ma9XgrM0,4551
289
289
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
290
290
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
291
- datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=55mK0nsehqGDTUOol9Oi3jZs8Pb04PIsHdC2WPP0dkg,6576
291
+ datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=IYr5y8vy_6CtMtITqzn6OqovzH1cpe1i30M-75PouXo,7768
292
292
  datahub/ingestion/source/data_lake_common/object_store.py,sha256=i9Hgb8Ww23QD_jEjzj_2qxA8Nr56krnZfo1qyOWmH9M,23608
293
293
  datahub/ingestion/source/data_lake_common/path_spec.py,sha256=ekJAr4-PE2RhzQnmKb2xcSs_YncC1Dz95-UrXI67Vos,23584
294
294
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -301,7 +301,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
301
301
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
302
302
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
303
303
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=4gWOFSX0YU8EAJgO4J47NBE4QbNtJ-5nUe66vry-oGc,18160
304
- datahub/ingestion/source/dbt/dbt_common.py,sha256=QDttgzZQowsiZGq9EN7zGSbitUR17F3vo-JMkF8pJbw,82264
304
+ datahub/ingestion/source/dbt/dbt_common.py,sha256=ByCqzjkToXgfhOyxxc6VEuD8BZbYbPsD5yrLRMMPUcI,85640
305
305
  datahub/ingestion/source/dbt/dbt_core.py,sha256=WVI2ZYXOMxgFzJnJqsqmEGS-5xdfiVIDsCb78lvSeQ0,24930
306
306
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
307
307
  datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -367,7 +367,7 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=QTMY0FmOHkTxfIC
367
367
  datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=A9q-u5IoV35swvoyMrzT75FVV9-SBeYGhLKDYRge-IQ,23845
368
368
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
369
  datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
370
- datahub/ingestion/source/looker/looker_common.py,sha256=wwCRHyd0F799CEBboryQ4TZV057krnY2nMJOkUWc268,64765
370
+ datahub/ingestion/source/looker/looker_common.py,sha256=flWAl0LYY--FoCS_5bgHNThACukQ4WYVLhfiYfsF74I,66447
371
371
  datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
372
372
  datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
373
373
  datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
@@ -446,7 +446,7 @@ datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pL
446
446
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
447
447
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
448
448
  datahub/ingestion/source/s3/report.py,sha256=9Ej1UCChw963UpGw1-7asi5vFrOM232gfgG8bRdKPp0,667
449
- datahub/ingestion/source/s3/source.py,sha256=DuJ5G0S2ukZFRL-aO8b4ihzhTlxvbzbj2_GyS8zLb7s,60968
449
+ datahub/ingestion/source/s3/source.py,sha256=4_6VDBKIYTiS6fqU4BuGQ4XlJuD_1ehK7jb4TyMiv0c,59908
450
450
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
451
451
  datahub/ingestion/source/sac/sac.py,sha256=0s_JxHGOhit3Wvgbg7qQi-Z9j9_TgBX_I1yOR3L6-rA,30243
452
452
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -501,7 +501,7 @@ datahub/ingestion/source/sql/hive_metastore.py,sha256=jpAgND_n2W6VCF_KMElXV5Gnd4
501
501
  datahub/ingestion/source/sql/mariadb.py,sha256=om6QoG5UtDldt1N6AfIWp3T-HXNaaqFmpz2i0JAemfM,654
502
502
  datahub/ingestion/source/sql/mysql.py,sha256=34Vu3otULxUY0-JUEYdZw2aoyuTlc9KLcoJdQxe2yJs,3267
503
503
  datahub/ingestion/source/sql/oracle.py,sha256=BGrHln5OQ6gq7LTqY8e4ySS5-uJaJEan0TU1778ZEYs,29963
504
- datahub/ingestion/source/sql/postgres.py,sha256=t-28dTWLa3c_NgWlGS9ZVNFlZLxd6xTSguIKP4S4NHs,11887
504
+ datahub/ingestion/source/sql/postgres.py,sha256=fynj84tlNl0FO1I_2GkmeuFQ1HexL_woG0fAK3zHEzw,19019
505
505
  datahub/ingestion/source/sql/presto.py,sha256=tATa0M2q0PjUC_E9W_jSUsmKTP7cVJayLgrFMzG_eao,4223
506
506
  datahub/ingestion/source/sql/sql_common.py,sha256=qIH0Wnz8XvJDlYuS71X3XC5BfpeHkSGzpwn1aqPVY0o,52130
507
507
  datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
@@ -618,8 +618,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
618
618
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
619
619
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
620
620
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
621
- datahub/metadata/_internal_schema_classes.py,sha256=V43yFMGVycJf0UV_b-ZAaOigWedg_iItFj3rOoIx3Ro,1046990
622
- datahub/metadata/schema.avsc,sha256=NlFTKx_U18Je4-BV3GAKS6wlEZrBTRUt-UF9gUH-5z0,735189
621
+ datahub/metadata/_internal_schema_classes.py,sha256=i3KQdgYqA-gWAIlNa63f9OL1D5e_pmDtBjGKZxPsNIM,1050332
622
+ datahub/metadata/schema.avsc,sha256=q7trioUW-inMl6LMviL9GqpngzbraV6q-SSQmTZ3USA,737396
623
623
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
624
624
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
625
625
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -673,7 +673,7 @@ datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha
673
673
  datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
674
674
  datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
675
675
  datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py,sha256=qefB0n1xilQHCPla80b39wdjHOYoVtzBJT2jGc2szkM,3309
676
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py,sha256=Np5zhmoMzMEIioN7bPwPZkXBh_0aTvcEaVAdL0qrzSE,1108
676
+ datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py,sha256=oJb5Lp4Rxo2fxiVqbH_m6wcfIDN49iLuL2RxJvIe-Kc,1218
677
677
  datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py,sha256=LqGp9QTLk_tiSsbHMGSUH7uPG00Bf_qQIMiU7vtO4Tk,973
678
678
  datahub/metadata/com/linkedin/pegasus2avro/notebook/__init__.py,sha256=BcjOsz4YeHQbLLBb4Im4uJ7ux1hGHquQDmiIOiDXVtE,901
679
679
  datahub/metadata/com/linkedin/pegasus2avro/ownership/__init__.py,sha256=r813MW_bkP1ZpC2NJf7uCHEOapjebl611c90vryKX4A,302
@@ -732,7 +732,7 @@ datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lP
732
732
  datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=6IrqWidbHP7mRryfVlWAQU0JS34THHTM8_aIKWqClUE,3843
733
733
  datahub/metadata/schemas/CorpUserInfo.avsc,sha256=oObOza-5FLjZyCjj0FN4MNV1DodgTwJSV4APduAggjk,3955
734
734
  datahub/metadata/schemas/CorpUserKey.avsc,sha256=01sbbdr8G-ZP1yc2UfY3gR-YN6b7AvDbCbNpZJ-02J4,1025
735
- datahub/metadata/schemas/CorpUserSettings.avsc,sha256=ff-rvW3ihwo6s9usvStx0aXTmCNmcR1TjggDxhbCacc,6529
735
+ datahub/metadata/schemas/CorpUserSettings.avsc,sha256=38_ZXslpkgJC1PlRxPnas9cHBi5dQYUcsl1X4WSn5Gc,6806
736
736
  datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
737
737
  datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkIXXzGQ,1494
738
738
  datahub/metadata/schemas/DashboardInfo.avsc,sha256=li2lSV6R4V-nz6foOi-NYxt_8ShHWfoKRw6M2BG5530,12907
@@ -752,7 +752,7 @@ datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjO
752
752
  datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
753
753
  datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
754
754
  datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
755
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=NldfplvG_NKnbu1x0A1T6oTYKoTcGf_saa9AYFrcsTs,7618
755
+ datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=_tbXEufCkviQxmhgB7quEq5Qm6693Z6no_9B624ryX4,8757
756
756
  datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
757
757
  datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=0ndN64UNAADL6G_GVjJLHbe_dBnWhVRjtI3MilOlHQc,5651
758
758
  datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
@@ -876,9 +876,9 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
876
876
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
877
877
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
878
878
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
879
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=kwEwLUSPuqdrx_7uOX2XnEZ6Olm4p2ezYt0bTQSgaTk,377034
880
- datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=Cf5eECeShCA_XHFr2MRhRQpPE61F6Xv-z1jjoBLJLgc,12239
881
- datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=tvO5cGIqZAIvUbMon1RAKgSY4E0jvBqT5VmLWAuNGkY,9770
879
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=l3tVuQces7sKrwWsaIJrn3nMRUiCl3MHqCJJHcw7Ylc,377705
880
+ datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=soCmgrcEBE5yS-mQIm-RIefhb74ONj9Fqayxa0-59KE,13254
881
+ datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=pT14vUmpj7VJ8hinQ0pcCUtRKx6RAGHWh1eJixkqaE8,12647
882
882
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
883
883
  datahub/metadata/schemas/NativeGroupMembership.avsc,sha256=9mh9tzyj3ErmTIhX7ERRUm78j1QtGwXUl9UuIXPndBg,588
884
884
  datahub/metadata/schemas/NotebookContent.avsc,sha256=ck3yDt0qK5Hn3-mWTNLlYnohXCs6kMUUWPXm7o1JEWE,12496
@@ -916,7 +916,7 @@ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFL
916
916
  datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
917
917
  datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
918
918
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
919
- datahub/metadata/schemas/SystemMetadata.avsc,sha256=wDVdpa9LSAlMzHIiWw-fMLHTCrxcJdnDOY_n5CDNTN8,2068
919
+ datahub/metadata/schemas/SystemMetadata.avsc,sha256=XEU32-oZsyVwMii-DlQSVDaUTfKQ9n7K0ChMJ07KHvQ,4457
920
920
  datahub/metadata/schemas/TagKey.avsc,sha256=BfckMlx-wg_LV1_PFVgItfNBPtCQ8_erGeQM4LzOXmY,640
921
921
  datahub/metadata/schemas/TagProperties.avsc,sha256=Qzttxd7BB38JUwwl7tZzIV1Warnh-uQO-Ahw9Sd-vH4,883
922
922
  datahub/metadata/schemas/TelemetryClientId.avsc,sha256=GScej0kXFZxoBUcRVrVynzArFSYQpO_dnhY5Po5dlx0,408
@@ -954,7 +954,7 @@ datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
954
954
  datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
955
955
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
956
956
  datahub/sdk/search_client.py,sha256=hlk40VnD3eT88hMgwXAUv31-ENbDe50P-gsXUnGSNeo,3512
957
- datahub/sdk/search_filters.py,sha256=xk19K7V6Y3YflNqNXgMTn_BpaRuoFhlub7w4tLjQlc8,15619
957
+ datahub/sdk/search_filters.py,sha256=Z0f44KNOjmpmeaPweZUlvcDxNuyTckCHGJX8JIygiYY,17230
958
958
  datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
959
959
  datahub/secret/datahub_secret_store.py,sha256=xyNAZY62d6KSz_kYF9wN7RDMLvNhu2ayOzcYvubOX1E,2519
960
960
  datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
@@ -982,7 +982,7 @@ datahub/sql_parsing/fingerprint_utils.py,sha256=3hGiexaQXnE7eZLxo-t7hlTyVQz7womb
982
982
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
983
983
  datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
984
984
  datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41cE8Jkm9cBY,9542
985
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=ewRHmRT8a9e0D0c-6JioNNjrgeOwfa-8eQFTMmQTl1s,71928
985
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=1gDSf8n6AkINZbhWdcPw7JjeIPMJgAIep1XFQMOPR6Q,71991
986
986
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
987
987
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
988
988
  datahub/sql_parsing/sqlglot_lineage.py,sha256=oG7Zx2aOpm1tBQQowPgSufGlMpm5DaMGKTwk7gIkhX0,61450
@@ -1003,7 +1003,7 @@ datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,1
1003
1003
  datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
1004
1004
  datahub/testing/sdk_v2_helpers.py,sha256=FooqGn5PfdJJrCFm3x_uh02IMhDdLjqEf64W16WdvE0,424
1005
1005
  datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1006
- datahub/upgrade/upgrade.py,sha256=bxGjfLU-hSQXS9Q1RlhWeJMETTGKqLNGFDPtcheCO4o,18474
1006
+ datahub/upgrade/upgrade.py,sha256=zA28UAgxVzPRiCnjbZ6PqSflozMyomTkmGetX6DXC6E,18583
1007
1007
  datahub/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1008
1008
  datahub/utilities/_custom_package_loader.py,sha256=9kgPE7Y77E-hNee8l4sKtVby-btUNum3dBfDixMzcVA,2059
1009
1009
  datahub/utilities/_markupsafe_compat.py,sha256=QX7c9KiHs56ASl7bJlgR4FAf3CGiY94zIr0h6Ak15To,444
@@ -1093,8 +1093,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1093
1093
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1094
1094
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1095
1095
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1096
- acryl_datahub-1.2.0.1.dist-info/METADATA,sha256=-gp-iQ4Mqm6pNM6VWTNdAia94Hw6PmteDamE363gWEk,181884
1097
- acryl_datahub-1.2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1098
- acryl_datahub-1.2.0.1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1099
- acryl_datahub-1.2.0.1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1100
- acryl_datahub-1.2.0.1.dist-info/RECORD,,
1096
+ acryl_datahub-1.2.0.2rc1.dist-info/METADATA,sha256=dvJ_JwbJKfu4voRemOG8bC2x4cXQUyNPu9cmz68Ehvg,181893
1097
+ acryl_datahub-1.2.0.2rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1098
+ acryl_datahub-1.2.0.2rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
1099
+ acryl_datahub-1.2.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1100
+ acryl_datahub-1.2.0.2rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.1"
3
+ __version__ = "1.2.0.2rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -98,7 +98,9 @@ TRACE_BACKOFF_FACTOR = 2.0 # Double the wait time each attempt
98
98
  # The limit is 16mb. We will use a max of 15mb to have some space
99
99
  # for overhead like request headers.
100
100
  # This applies to pretty much all calls to GMS.
101
- INGEST_MAX_PAYLOAD_BYTES = 15 * 1024 * 1024
101
+ INGEST_MAX_PAYLOAD_BYTES = int(
102
+ os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_BYTES", 15 * 1024 * 1024)
103
+ )
102
104
 
103
105
  # This limit is somewhat arbitrary. All GMS endpoints will timeout
104
106
  # and return a 500 if processing takes too long. To avoid sending
@@ -44,7 +44,10 @@ from datahub.ingestion.source.azure.abs_utils import (
44
44
  get_key_prefix,
45
45
  strip_abs_prefix,
46
46
  )
47
- from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
47
+ from datahub.ingestion.source.data_lake_common.data_lake_utils import (
48
+ ContainerWUCreator,
49
+ add_partition_columns_to_schema,
50
+ )
48
51
  from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet
49
52
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
50
53
  StaleEntityRemovalHandler,
@@ -53,10 +56,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
53
56
  StatefulIngestionSourceBase,
54
57
  )
55
58
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
56
- SchemaField,
57
- SchemaFieldDataType,
58
59
  SchemaMetadata,
59
- StringTypeClass,
60
60
  )
61
61
  from datahub.metadata.schema_classes import (
62
62
  DataPlatformInstanceClass,
@@ -223,36 +223,12 @@ class ABSSource(StatefulIngestionSourceBase):
223
223
  fields = sorted(fields, key=lambda f: f.fieldPath)
224
224
 
225
225
  if self.source_config.add_partition_columns_to_schema:
226
- self.add_partition_columns_to_schema(
226
+ add_partition_columns_to_schema(
227
227
  fields=fields, path_spec=path_spec, full_path=table_data.full_path
228
228
  )
229
229
 
230
230
  return fields
231
231
 
232
- def add_partition_columns_to_schema(
233
- self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
234
- ) -> None:
235
- vars = path_spec.get_named_vars(full_path)
236
- if vars is not None and "partition" in vars:
237
- for partition in vars["partition"].values():
238
- partition_arr = partition.split("=")
239
- if len(partition_arr) != 2:
240
- logger.debug(
241
- f"Could not derive partition key from partition field {partition}"
242
- )
243
- continue
244
- partition_key = partition_arr[0]
245
- fields.append(
246
- SchemaField(
247
- fieldPath=f"{partition_key}",
248
- nativeDataType="string",
249
- type=SchemaFieldDataType(StringTypeClass()),
250
- isPartitioningKey=True,
251
- nullable=True,
252
- recursive=False,
253
- )
254
- )
255
-
256
232
  def _create_table_operation_aspect(self, table_data: TableData) -> OperationClass:
257
233
  reported_time = int(time.time() * 1000)
258
234
 
@@ -25,10 +25,16 @@ from datahub.ingestion.source.data_lake_common.object_store import (
25
25
  get_object_store_bucket_name,
26
26
  get_object_store_for_uri,
27
27
  )
28
+ from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
28
29
  from datahub.ingestion.source.gcs.gcs_utils import (
29
30
  get_gcs_prefix,
30
31
  is_gcs_uri,
31
32
  )
33
+ from datahub.metadata.schema_classes import (
34
+ SchemaFieldClass,
35
+ SchemaFieldDataTypeClass,
36
+ StringTypeClass,
37
+ )
32
38
 
33
39
  # hide annoying debug errors from py4j
34
40
  logging.getLogger("py4j").setLevel(logging.ERROR)
@@ -39,6 +45,37 @@ PLATFORM_GCS = "gcs"
39
45
  PLATFORM_ABS = "abs"
40
46
 
41
47
 
48
+ def add_partition_columns_to_schema(
49
+ path_spec: PathSpec, full_path: str, fields: List[SchemaFieldClass]
50
+ ) -> None:
51
+ # Check if using fieldPath v2 format
52
+ is_fieldpath_v2 = any(
53
+ field.fieldPath.startswith("[version=2.0]") for field in fields
54
+ )
55
+
56
+ # Extract partition information from path
57
+ partition_keys = path_spec.get_partition_from_path(full_path)
58
+ if not partition_keys:
59
+ return
60
+
61
+ # Add partition fields to schema
62
+ for partition_key in partition_keys:
63
+ fields.append(
64
+ SchemaFieldClass(
65
+ fieldPath=(
66
+ f"{partition_key[0]}"
67
+ if not is_fieldpath_v2
68
+ else f"[version=2.0].[type=string].{partition_key[0]}"
69
+ ),
70
+ nativeDataType="string",
71
+ type=SchemaFieldDataTypeClass(StringTypeClass()),
72
+ isPartitioningKey=True,
73
+ nullable=False,
74
+ recursive=False,
75
+ )
76
+ )
77
+
78
+
42
79
  class ContainerWUCreator:
43
80
  processed_containers: List[str]
44
81
 
@@ -145,6 +145,9 @@ class DBTSourceReport(StaleEntityRemovalSourceReport):
145
145
 
146
146
  nodes_filtered: LossyList[str] = field(default_factory=LossyList)
147
147
 
148
+ duplicate_sources_dropped: Optional[int] = None
149
+ duplicate_sources_references_updated: Optional[int] = None
150
+
148
151
 
149
152
  class EmitDirective(ConfigEnum):
150
153
  """A holder for directives for emission for specific types of entities"""
@@ -370,6 +373,12 @@ class DBTCommonConfig(
370
373
  "Set to False to skip it for engines like AWS Athena where it's not required.",
371
374
  )
372
375
 
376
+ drop_duplicate_sources: bool = Field(
377
+ default=True,
378
+ description="When enabled, drops sources that have the same name in the target platform as a model. "
379
+ "This ensures that lineage is generated reliably, but will lose any documentation associated only with the source.",
380
+ )
381
+
373
382
  @validator("target_platform")
374
383
  def validate_target_platform_value(cls, target_platform: str) -> str:
375
384
  if target_platform.lower() == DBT_PLATFORM:
@@ -509,7 +518,7 @@ class DBTNode:
509
518
  raw_code: Optional[str]
510
519
 
511
520
  dbt_adapter: str
512
- dbt_name: str
521
+ dbt_name: str # dbt unique identifier
513
522
  dbt_file_path: Optional[str]
514
523
  dbt_package_name: Optional[str] # this is pretty much always present
515
524
 
@@ -975,6 +984,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
975
984
  self._infer_schemas_and_update_cll(all_nodes_map)
976
985
 
977
986
  nodes = self._filter_nodes(all_nodes)
987
+ nodes = self._drop_duplicate_sources(nodes)
988
+
978
989
  non_test_nodes = [
979
990
  dataset_node for dataset_node in nodes if dataset_node.node_type != "test"
980
991
  ]
@@ -1000,7 +1011,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1000
1011
  return self.config.node_name_pattern.allowed(key)
1001
1012
 
1002
1013
  def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]:
1003
- nodes = []
1014
+ nodes: List[DBTNode] = []
1004
1015
  for node in all_nodes:
1005
1016
  key = node.dbt_name
1006
1017
 
@@ -1012,6 +1023,62 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1012
1023
 
1013
1024
  return nodes
1014
1025
 
1026
+ def _drop_duplicate_sources(self, original_nodes: List[DBTNode]) -> List[DBTNode]:
1027
+ """Detect and correct cases where a model and source have the same name.
1028
+
1029
+ In these cases, we don't want to generate both because they'll have the same
1030
+ urn and hence overwrite each other. Instead, we drop the source and update
1031
+ references to it to point at the model.
1032
+
1033
+ The risk here is that the source might have documentation that'd be lost,
1034
+ which is why we maintain optionality with a config flag.
1035
+ """
1036
+ if not self.config.drop_duplicate_sources:
1037
+ return original_nodes
1038
+
1039
+ self.report.duplicate_sources_dropped = 0
1040
+ self.report.duplicate_sources_references_updated = 0
1041
+
1042
+ # Pass 1 - find all model names in the warehouse.
1043
+ warehouse_model_names: Dict[str, str] = {} # warehouse name -> model unique id
1044
+ for node in original_nodes:
1045
+ if node.node_type == "model" and node.exists_in_target_platform:
1046
+ warehouse_model_names[node.get_db_fqn()] = node.dbt_name
1047
+
1048
+ # Pass 2 - identify + drop duplicate sources.
1049
+ source_references_to_update: Dict[
1050
+ str, str
1051
+ ] = {} # source unique id -> model unique id
1052
+ nodes: List[DBTNode] = []
1053
+ for node in original_nodes:
1054
+ if (
1055
+ node.node_type == "source"
1056
+ and node.exists_in_target_platform
1057
+ and (model_name := warehouse_model_names.get(node.get_db_fqn()))
1058
+ ):
1059
+ self.report.warning(
1060
+ title="Duplicate model and source names detected",
1061
+ message="We found a dbt model and dbt source with the same name. To ensure reliable lineage generation, the source node was ignored. "
1062
+ "If you associated documentation/tags/other metadata with the source, it will be lost. "
1063
+ "To avoid this, you should remove the source node from your dbt project and replace any `source(<source_name>)` calls with `ref(<model_name>)`.",
1064
+ context=f"{node.dbt_name} (called {node.get_db_fqn()} in {self.config.target_platform}) duplicates {model_name}",
1065
+ )
1066
+ self.report.duplicate_sources_dropped += 1
1067
+ source_references_to_update[node.dbt_name] = model_name
1068
+ else:
1069
+ nodes.append(node)
1070
+
1071
+ # Pass 3 - update references to the dropped sources.
1072
+ for node in nodes:
1073
+ for i, current_upstream in enumerate(node.upstream_nodes):
1074
+ if current_upstream in source_references_to_update:
1075
+ node.upstream_nodes[i] = source_references_to_update[
1076
+ current_upstream
1077
+ ]
1078
+ self.report.duplicate_sources_references_updated += 1
1079
+
1080
+ return nodes
1081
+
1015
1082
  @staticmethod
1016
1083
  def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo:
1017
1084
  return {column.fieldPath: column.nativeDataType for column in schema_fields}
@@ -242,13 +242,19 @@ class LookerViewId:
242
242
 
243
243
  dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
244
244
 
245
- return builder.make_dataset_urn_with_platform_instance(
245
+ generated_urn = builder.make_dataset_urn_with_platform_instance(
246
246
  platform=config.platform_name,
247
247
  name=dataset_name,
248
248
  platform_instance=config.platform_instance,
249
249
  env=config.env,
250
250
  )
251
251
 
252
+ logger.debug(
253
+ f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
254
+ )
255
+
256
+ return generated_urn
257
+
252
258
  def get_browse_path(self, config: LookerCommonConfig) -> str:
253
259
  browse_path = config.view_browse_pattern.replace_variables(
254
260
  self.get_mapping(config)
@@ -452,15 +458,36 @@ class ExploreUpstreamViewField:
452
458
  )
453
459
 
454
460
 
455
- def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]:
461
+ def create_view_project_map(
462
+ view_fields: List[ViewField],
463
+ explore_primary_view: Optional[str] = None,
464
+ explore_project_name: Optional[str] = None,
465
+ ) -> Dict[str, str]:
456
466
  """
457
467
  Each view in a model has unique name.
458
468
  Use this function in scope of a model.
469
+
470
+ Args:
471
+ view_fields: List of ViewField objects
472
+ explore_primary_view: The primary view name of the explore (explore.view_name)
473
+ explore_project_name: The project name of the explore (explore.project_name)
459
474
  """
460
475
  view_project_map: Dict[str, str] = {}
461
476
  for view_field in view_fields:
462
477
  if view_field.view_name is not None and view_field.project_name is not None:
463
- view_project_map[view_field.view_name] = view_field.project_name
478
+ # Override field-level project assignment for the primary view when different
479
+ if (
480
+ view_field.view_name == explore_primary_view
481
+ and explore_project_name is not None
482
+ and explore_project_name != view_field.project_name
483
+ ):
484
+ logger.debug(
485
+ f"Overriding project assignment for primary view '{view_field.view_name}': "
486
+ f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
487
+ )
488
+ view_project_map[view_field.view_name] = explore_project_name
489
+ else:
490
+ view_project_map[view_field.view_name] = view_field.project_name
464
491
 
465
492
  return view_project_map
466
493
 
@@ -953,6 +980,9 @@ class LookerExplore:
953
980
  f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
954
981
  )
955
982
  else:
983
+ logger.debug(
984
+ f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
985
+ )
956
986
  upstream_views.append(
957
987
  ProjectInclude(project=info[0].project, include=view_name)
958
988
  )
@@ -981,6 +1011,7 @@ class LookerExplore:
981
1011
  ) -> Optional["LookerExplore"]:
982
1012
  try:
983
1013
  explore = client.lookml_model_explore(model, explore_name)
1014
+
984
1015
  views: Set[str] = set()
985
1016
  lkml_fields: List[LookmlModelExploreField] = (
986
1017
  explore_field_set_to_lkml_fields(explore)
@@ -1117,7 +1148,11 @@ class LookerExplore:
1117
1148
  )
1118
1149
  )
1119
1150
 
1120
- view_project_map: Dict[str, str] = create_view_project_map(view_fields)
1151
+ view_project_map: Dict[str, str] = create_view_project_map(
1152
+ view_fields,
1153
+ explore_primary_view=explore.view_name,
1154
+ explore_project_name=explore.project_name,
1155
+ )
1121
1156
  if view_project_map:
1122
1157
  logger.debug(f"views and their projects: {view_project_map}")
1123
1158
 
@@ -1289,6 +1324,7 @@ class LookerExplore:
1289
1324
  if self.upstream_views_file_path[view_ref.include] is not None
1290
1325
  else ViewFieldValue.NOT_AVAILABLE.value
1291
1326
  )
1327
+
1292
1328
  view_urn = LookerViewId(
1293
1329
  project_name=(
1294
1330
  view_ref.project
@@ -41,7 +41,10 @@ from datahub.ingestion.source.aws.s3_util import (
41
41
  get_key_prefix,
42
42
  strip_s3_prefix,
43
43
  )
44
- from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
44
+ from datahub.ingestion.source.data_lake_common.data_lake_utils import (
45
+ ContainerWUCreator,
46
+ add_partition_columns_to_schema,
47
+ )
45
48
  from datahub.ingestion.source.data_lake_common.object_store import (
46
49
  create_object_store_adapter,
47
50
  )
@@ -58,9 +61,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
58
61
  )
59
62
  from datahub.metadata.com.linkedin.pegasus2avro.common import TimeStamp
60
63
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
61
- SchemaField,
62
64
  SchemaMetadata,
63
- StringTypeClass,
64
65
  )
65
66
  from datahub.metadata.schema_classes import (
66
67
  DataPlatformInstanceClass,
@@ -70,7 +71,6 @@ from datahub.metadata.schema_classes import (
70
71
  OtherSchemaClass,
71
72
  PartitionsSummaryClass,
72
73
  PartitionSummaryClass,
73
- SchemaFieldDataTypeClass,
74
74
  _Aspect,
75
75
  )
76
76
  from datahub.telemetry import stats, telemetry
@@ -474,7 +474,7 @@ class S3Source(StatefulIngestionSourceBase):
474
474
  fields = sorted(fields, key=lambda f: f.fieldPath)
475
475
 
476
476
  if self.source_config.add_partition_columns_to_schema and table_data.partitions:
477
- self.add_partition_columns_to_schema(
477
+ add_partition_columns_to_schema(
478
478
  fields=fields, path_spec=path_spec, full_path=table_data.full_path
479
479
  )
480
480
 
@@ -510,34 +510,6 @@ class S3Source(StatefulIngestionSourceBase):
510
510
  else:
511
511
  return None
512
512
 
513
- def add_partition_columns_to_schema(
514
- self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
515
- ) -> None:
516
- is_fieldpath_v2 = False
517
- for field in fields:
518
- if field.fieldPath.startswith("[version=2.0]"):
519
- is_fieldpath_v2 = True
520
- break
521
- partition_keys = path_spec.get_partition_from_path(full_path)
522
- if not partition_keys:
523
- return None
524
-
525
- for partition_key in partition_keys:
526
- fields.append(
527
- SchemaField(
528
- fieldPath=(
529
- f"{partition_key[0]}"
530
- if not is_fieldpath_v2
531
- else f"[version=2.0].[type=string].{partition_key[0]}"
532
- ),
533
- nativeDataType="string",
534
- type=SchemaFieldDataTypeClass(StringTypeClass()),
535
- isPartitioningKey=True,
536
- nullable=True,
537
- recursive=False,
538
- )
539
- )
540
-
541
513
  def get_table_profile(
542
514
  self, table_data: TableData, dataset_urn: str
543
515
  ) -> Iterable[MetadataWorkUnit]: