acryl-datahub 1.2.0.1rc1__py3-none-any.whl → 1.2.0.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/METADATA +2670 -2670
- {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/RECORD +27 -27
- datahub/_version.py +1 -1
- datahub/emitter/rest_emitter.py +3 -1
- datahub/ingestion/source/abs/source.py +5 -29
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/dbt/dbt_common.py +69 -2
- datahub/ingestion/source/looker/looker_common.py +40 -4
- datahub/ingestion/source/s3/source.py +5 -33
- datahub/ingestion/source/sql/postgres.py +190 -1
- datahub/ingestion/source/sql_queries.py +112 -77
- datahub/metadata/_internal_schema_classes.py +81 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
- datahub/metadata/schema.avsc +60 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +33 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/sdk/search_filters.py +51 -2
- datahub/sql_parsing/sql_parsing_aggregator.py +1 -0
- datahub/upgrade/upgrade.py +5 -3
- {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.
|
|
1
|
+
acryl_datahub-1.2.0.2rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=1ST8gBZ8wWcQFJrAhpY_re_rMWhON0s6EftssFUvWKw,323
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -132,7 +132,7 @@ datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxga
|
|
|
132
132
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
133
133
|
datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
|
|
134
134
|
datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
|
|
135
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
135
|
+
datahub/emitter/rest_emitter.py,sha256=lMqjtDyPOArIrNgL47kq1cbB4xiR17CHfRRxpGYriDY,38793
|
|
136
136
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
137
137
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
138
138
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -227,14 +227,14 @@ datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgM
|
|
|
227
227
|
datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
|
|
228
228
|
datahub/ingestion/source/salesforce.py,sha256=Pa_w1XszxFd8fyhpSWOfc2nOnevHwwstIvnRrQT4R9M,40584
|
|
229
229
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
230
|
-
datahub/ingestion/source/sql_queries.py,sha256=
|
|
230
|
+
datahub/ingestion/source/sql_queries.py,sha256=4n8lTmfNtcH_qkGvnmPwidWCzfnjtWiBcd4AzAJGvbo,11810
|
|
231
231
|
datahub/ingestion/source/superset.py,sha256=oi7F2jlvkVr9ItJ_r1Jm4bYfXHYu4vPAFPMPaGJKB84,50608
|
|
232
232
|
datahub/ingestion/source/abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
233
233
|
datahub/ingestion/source/abs/config.py,sha256=mBQe0JTaP-Rcv4HnMUUySoYbSr4r3jDEMioxaXHnxXU,6709
|
|
234
234
|
datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4VK8QAjzBiJFu85tOGMmJ0lJZ2Og,3600
|
|
235
235
|
datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
236
236
|
datahub/ingestion/source/abs/report.py,sha256=CkRjsNn0Pab-ZPllxz3IUJI_r3x0T6urJePa_hJKi5U,586
|
|
237
|
-
datahub/ingestion/source/abs/source.py,sha256=
|
|
237
|
+
datahub/ingestion/source/abs/source.py,sha256=uqMpvHmje3VPJRujW7l3Zmb0t3yFl91remXvznLuMO0,23783
|
|
238
238
|
datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
239
239
|
datahub/ingestion/source/apply/datahub_apply.py,sha256=xTD-Iq3UHhxcz61RwNuI2kJjRrnQEfZFSgvS1X6loV4,7703
|
|
240
240
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -288,7 +288,7 @@ datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNCl
|
|
|
288
288
|
datahub/ingestion/source/common/subtypes.py,sha256=x8dv9SN23Y8t5oq7SUuQWUPAH8KM3KfTT27ma9XgrM0,4551
|
|
289
289
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
290
290
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
291
|
-
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=
|
|
291
|
+
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=IYr5y8vy_6CtMtITqzn6OqovzH1cpe1i30M-75PouXo,7768
|
|
292
292
|
datahub/ingestion/source/data_lake_common/object_store.py,sha256=i9Hgb8Ww23QD_jEjzj_2qxA8Nr56krnZfo1qyOWmH9M,23608
|
|
293
293
|
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=ekJAr4-PE2RhzQnmKb2xcSs_YncC1Dz95-UrXI67Vos,23584
|
|
294
294
|
datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -301,7 +301,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
301
301
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
302
302
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
303
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=4gWOFSX0YU8EAJgO4J47NBE4QbNtJ-5nUe66vry-oGc,18160
|
|
304
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
304
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=ByCqzjkToXgfhOyxxc6VEuD8BZbYbPsD5yrLRMMPUcI,85640
|
|
305
305
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=WVI2ZYXOMxgFzJnJqsqmEGS-5xdfiVIDsCb78lvSeQ0,24930
|
|
306
306
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
|
|
307
307
|
datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -367,7 +367,7 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=QTMY0FmOHkTxfIC
|
|
|
367
367
|
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=A9q-u5IoV35swvoyMrzT75FVV9-SBeYGhLKDYRge-IQ,23845
|
|
368
368
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
369
369
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
370
|
-
datahub/ingestion/source/looker/looker_common.py,sha256=
|
|
370
|
+
datahub/ingestion/source/looker/looker_common.py,sha256=flWAl0LYY--FoCS_5bgHNThACukQ4WYVLhfiYfsF74I,66447
|
|
371
371
|
datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
|
|
372
372
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
373
373
|
datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
|
|
@@ -446,7 +446,7 @@ datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pL
|
|
|
446
446
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
447
447
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
448
448
|
datahub/ingestion/source/s3/report.py,sha256=9Ej1UCChw963UpGw1-7asi5vFrOM232gfgG8bRdKPp0,667
|
|
449
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
449
|
+
datahub/ingestion/source/s3/source.py,sha256=4_6VDBKIYTiS6fqU4BuGQ4XlJuD_1ehK7jb4TyMiv0c,59908
|
|
450
450
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
451
451
|
datahub/ingestion/source/sac/sac.py,sha256=0s_JxHGOhit3Wvgbg7qQi-Z9j9_TgBX_I1yOR3L6-rA,30243
|
|
452
452
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -501,7 +501,7 @@ datahub/ingestion/source/sql/hive_metastore.py,sha256=jpAgND_n2W6VCF_KMElXV5Gnd4
|
|
|
501
501
|
datahub/ingestion/source/sql/mariadb.py,sha256=om6QoG5UtDldt1N6AfIWp3T-HXNaaqFmpz2i0JAemfM,654
|
|
502
502
|
datahub/ingestion/source/sql/mysql.py,sha256=34Vu3otULxUY0-JUEYdZw2aoyuTlc9KLcoJdQxe2yJs,3267
|
|
503
503
|
datahub/ingestion/source/sql/oracle.py,sha256=BGrHln5OQ6gq7LTqY8e4ySS5-uJaJEan0TU1778ZEYs,29963
|
|
504
|
-
datahub/ingestion/source/sql/postgres.py,sha256=
|
|
504
|
+
datahub/ingestion/source/sql/postgres.py,sha256=fynj84tlNl0FO1I_2GkmeuFQ1HexL_woG0fAK3zHEzw,19019
|
|
505
505
|
datahub/ingestion/source/sql/presto.py,sha256=tATa0M2q0PjUC_E9W_jSUsmKTP7cVJayLgrFMzG_eao,4223
|
|
506
506
|
datahub/ingestion/source/sql/sql_common.py,sha256=qIH0Wnz8XvJDlYuS71X3XC5BfpeHkSGzpwn1aqPVY0o,52130
|
|
507
507
|
datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
|
|
@@ -618,8 +618,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
618
618
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
619
619
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
620
620
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
621
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
622
|
-
datahub/metadata/schema.avsc,sha256=
|
|
621
|
+
datahub/metadata/_internal_schema_classes.py,sha256=i3KQdgYqA-gWAIlNa63f9OL1D5e_pmDtBjGKZxPsNIM,1050332
|
|
622
|
+
datahub/metadata/schema.avsc,sha256=q7trioUW-inMl6LMviL9GqpngzbraV6q-SSQmTZ3USA,737396
|
|
623
623
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
624
624
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
625
625
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -673,7 +673,7 @@ datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha
|
|
|
673
673
|
datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
|
|
674
674
|
datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
675
675
|
datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py,sha256=qefB0n1xilQHCPla80b39wdjHOYoVtzBJT2jGc2szkM,3309
|
|
676
|
-
datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py,sha256=
|
|
676
|
+
datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py,sha256=oJb5Lp4Rxo2fxiVqbH_m6wcfIDN49iLuL2RxJvIe-Kc,1218
|
|
677
677
|
datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py,sha256=LqGp9QTLk_tiSsbHMGSUH7uPG00Bf_qQIMiU7vtO4Tk,973
|
|
678
678
|
datahub/metadata/com/linkedin/pegasus2avro/notebook/__init__.py,sha256=BcjOsz4YeHQbLLBb4Im4uJ7ux1hGHquQDmiIOiDXVtE,901
|
|
679
679
|
datahub/metadata/com/linkedin/pegasus2avro/ownership/__init__.py,sha256=r813MW_bkP1ZpC2NJf7uCHEOapjebl611c90vryKX4A,302
|
|
@@ -732,7 +732,7 @@ datahub/metadata/schemas/CorpUserCredentials.avsc,sha256=S7FkV9K_DGxhb4GFYbM5_lP
|
|
|
732
732
|
datahub/metadata/schemas/CorpUserEditableInfo.avsc,sha256=6IrqWidbHP7mRryfVlWAQU0JS34THHTM8_aIKWqClUE,3843
|
|
733
733
|
datahub/metadata/schemas/CorpUserInfo.avsc,sha256=oObOza-5FLjZyCjj0FN4MNV1DodgTwJSV4APduAggjk,3955
|
|
734
734
|
datahub/metadata/schemas/CorpUserKey.avsc,sha256=01sbbdr8G-ZP1yc2UfY3gR-YN6b7AvDbCbNpZJ-02J4,1025
|
|
735
|
-
datahub/metadata/schemas/CorpUserSettings.avsc,sha256=
|
|
735
|
+
datahub/metadata/schemas/CorpUserSettings.avsc,sha256=38_ZXslpkgJC1PlRxPnas9cHBi5dQYUcsl1X4WSn5Gc,6806
|
|
736
736
|
datahub/metadata/schemas/CorpUserStatus.avsc,sha256=yqojAXEQ9CjRhY58RPyTUxzmFbHSANGGaMMbqiYZZIE,2538
|
|
737
737
|
datahub/metadata/schemas/Cost.avsc,sha256=o4kYZSss2uEwJ6gCA9fhBUoyD5xUqcSxz78vkIXXzGQ,1494
|
|
738
738
|
datahub/metadata/schemas/DashboardInfo.avsc,sha256=li2lSV6R4V-nz6foOi-NYxt_8ShHWfoKRw6M2BG5530,12907
|
|
@@ -752,7 +752,7 @@ datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjO
|
|
|
752
752
|
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
|
|
753
753
|
datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
|
|
754
754
|
datahub/metadata/schemas/DataHubPageModuleKey.avsc,sha256=NyFN8cVO6s6rtgoLGJJGfcPfpGr5PfmZlIhM6ajldfQ,460
|
|
755
|
-
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=
|
|
755
|
+
datahub/metadata/schemas/DataHubPageModuleProperties.avsc,sha256=_tbXEufCkviQxmhgB7quEq5Qm6693Z6no_9B624ryX4,8757
|
|
756
756
|
datahub/metadata/schemas/DataHubPageTemplateKey.avsc,sha256=0sVqwL97Rp8YHPytp2RqUP5hIW048hmT2hPNP5k6arc,472
|
|
757
757
|
datahub/metadata/schemas/DataHubPageTemplateProperties.avsc,sha256=0ndN64UNAADL6G_GVjJLHbe_dBnWhVRjtI3MilOlHQc,5651
|
|
758
758
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
@@ -876,9 +876,9 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
|
|
|
876
876
|
datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=F3lgpMnHBhXsqGncHE9x06P-0RiNCrzbUUWlMkPJxFI,1132
|
|
877
877
|
datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
|
|
878
878
|
datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
|
|
879
|
-
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=
|
|
880
|
-
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=
|
|
881
|
-
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=
|
|
879
|
+
datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=l3tVuQces7sKrwWsaIJrn3nMRUiCl3MHqCJJHcw7Ylc,377705
|
|
880
|
+
datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=soCmgrcEBE5yS-mQIm-RIefhb74ONj9Fqayxa0-59KE,13254
|
|
881
|
+
datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=pT14vUmpj7VJ8hinQ0pcCUtRKx6RAGHWh1eJixkqaE8,12647
|
|
882
882
|
datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
|
|
883
883
|
datahub/metadata/schemas/NativeGroupMembership.avsc,sha256=9mh9tzyj3ErmTIhX7ERRUm78j1QtGwXUl9UuIXPndBg,588
|
|
884
884
|
datahub/metadata/schemas/NotebookContent.avsc,sha256=ck3yDt0qK5Hn3-mWTNLlYnohXCs6kMUUWPXm7o1JEWE,12496
|
|
@@ -916,7 +916,7 @@ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFL
|
|
|
916
916
|
datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
|
|
917
917
|
datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
|
|
918
918
|
datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
|
|
919
|
-
datahub/metadata/schemas/SystemMetadata.avsc,sha256=
|
|
919
|
+
datahub/metadata/schemas/SystemMetadata.avsc,sha256=XEU32-oZsyVwMii-DlQSVDaUTfKQ9n7K0ChMJ07KHvQ,4457
|
|
920
920
|
datahub/metadata/schemas/TagKey.avsc,sha256=BfckMlx-wg_LV1_PFVgItfNBPtCQ8_erGeQM4LzOXmY,640
|
|
921
921
|
datahub/metadata/schemas/TagProperties.avsc,sha256=Qzttxd7BB38JUwwl7tZzIV1Warnh-uQO-Ahw9Sd-vH4,883
|
|
922
922
|
datahub/metadata/schemas/TelemetryClientId.avsc,sha256=GScej0kXFZxoBUcRVrVynzArFSYQpO_dnhY5Po5dlx0,408
|
|
@@ -954,7 +954,7 @@ datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
|
|
|
954
954
|
datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
|
|
955
955
|
datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
|
|
956
956
|
datahub/sdk/search_client.py,sha256=hlk40VnD3eT88hMgwXAUv31-ENbDe50P-gsXUnGSNeo,3512
|
|
957
|
-
datahub/sdk/search_filters.py,sha256=
|
|
957
|
+
datahub/sdk/search_filters.py,sha256=Z0f44KNOjmpmeaPweZUlvcDxNuyTckCHGJX8JIygiYY,17230
|
|
958
958
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
959
959
|
datahub/secret/datahub_secret_store.py,sha256=xyNAZY62d6KSz_kYF9wN7RDMLvNhu2ayOzcYvubOX1E,2519
|
|
960
960
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -982,7 +982,7 @@ datahub/sql_parsing/fingerprint_utils.py,sha256=3hGiexaQXnE7eZLxo-t7hlTyVQz7womb
|
|
|
982
982
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
983
983
|
datahub/sql_parsing/schema_resolver.py,sha256=ISuingLcQnOJZkNXBkc73uPwYUbbOtERAjgGhJajDiQ,10782
|
|
984
984
|
datahub/sql_parsing/split_statements.py,sha256=OIQXA9e4k3G9Z1y7rbgdtZhMWt4FPnq41cE8Jkm9cBY,9542
|
|
985
|
-
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=
|
|
985
|
+
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=1gDSf8n6AkINZbhWdcPw7JjeIPMJgAIep1XFQMOPR6Q,71991
|
|
986
986
|
datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
|
|
987
987
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
988
988
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=oG7Zx2aOpm1tBQQowPgSufGlMpm5DaMGKTwk7gIkhX0,61450
|
|
@@ -1003,7 +1003,7 @@ datahub/testing/mcp_diff.py,sha256=1BpQ3hST46cOQi1SmKdsto3j6x6Sk6yHm0vG1w9IDL0,1
|
|
|
1003
1003
|
datahub/testing/pytest_hooks.py,sha256=eifmj0M68AIfjTn_-0vtaBkKl75vNKMjsbYX-pJqmGY,1417
|
|
1004
1004
|
datahub/testing/sdk_v2_helpers.py,sha256=FooqGn5PfdJJrCFm3x_uh02IMhDdLjqEf64W16WdvE0,424
|
|
1005
1005
|
datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1006
|
-
datahub/upgrade/upgrade.py,sha256=
|
|
1006
|
+
datahub/upgrade/upgrade.py,sha256=zA28UAgxVzPRiCnjbZ6PqSflozMyomTkmGetX6DXC6E,18583
|
|
1007
1007
|
datahub/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1008
1008
|
datahub/utilities/_custom_package_loader.py,sha256=9kgPE7Y77E-hNee8l4sKtVby-btUNum3dBfDixMzcVA,2059
|
|
1009
1009
|
datahub/utilities/_markupsafe_compat.py,sha256=QX7c9KiHs56ASl7bJlgR4FAf3CGiY94zIr0h6Ak15To,444
|
|
@@ -1093,8 +1093,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1093
1093
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1094
1094
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1095
1095
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1096
|
-
acryl_datahub-1.2.0.
|
|
1097
|
-
acryl_datahub-1.2.0.
|
|
1098
|
-
acryl_datahub-1.2.0.
|
|
1099
|
-
acryl_datahub-1.2.0.
|
|
1100
|
-
acryl_datahub-1.2.0.
|
|
1096
|
+
acryl_datahub-1.2.0.2rc1.dist-info/METADATA,sha256=dvJ_JwbJKfu4voRemOG8bC2x4cXQUyNPu9cmz68Ehvg,181893
|
|
1097
|
+
acryl_datahub-1.2.0.2rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1098
|
+
acryl_datahub-1.2.0.2rc1.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1099
|
+
acryl_datahub-1.2.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1100
|
+
acryl_datahub-1.2.0.2rc1.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -98,7 +98,9 @@ TRACE_BACKOFF_FACTOR = 2.0 # Double the wait time each attempt
|
|
|
98
98
|
# The limit is 16mb. We will use a max of 15mb to have some space
|
|
99
99
|
# for overhead like request headers.
|
|
100
100
|
# This applies to pretty much all calls to GMS.
|
|
101
|
-
INGEST_MAX_PAYLOAD_BYTES =
|
|
101
|
+
INGEST_MAX_PAYLOAD_BYTES = int(
|
|
102
|
+
os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_BYTES", 15 * 1024 * 1024)
|
|
103
|
+
)
|
|
102
104
|
|
|
103
105
|
# This limit is somewhat arbitrary. All GMS endpoints will timeout
|
|
104
106
|
# and return a 500 if processing takes too long. To avoid sending
|
|
@@ -44,7 +44,10 @@ from datahub.ingestion.source.azure.abs_utils import (
|
|
|
44
44
|
get_key_prefix,
|
|
45
45
|
strip_abs_prefix,
|
|
46
46
|
)
|
|
47
|
-
from datahub.ingestion.source.data_lake_common.data_lake_utils import
|
|
47
|
+
from datahub.ingestion.source.data_lake_common.data_lake_utils import (
|
|
48
|
+
ContainerWUCreator,
|
|
49
|
+
add_partition_columns_to_schema,
|
|
50
|
+
)
|
|
48
51
|
from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet
|
|
49
52
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
50
53
|
StaleEntityRemovalHandler,
|
|
@@ -53,10 +56,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
53
56
|
StatefulIngestionSourceBase,
|
|
54
57
|
)
|
|
55
58
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
56
|
-
SchemaField,
|
|
57
|
-
SchemaFieldDataType,
|
|
58
59
|
SchemaMetadata,
|
|
59
|
-
StringTypeClass,
|
|
60
60
|
)
|
|
61
61
|
from datahub.metadata.schema_classes import (
|
|
62
62
|
DataPlatformInstanceClass,
|
|
@@ -223,36 +223,12 @@ class ABSSource(StatefulIngestionSourceBase):
|
|
|
223
223
|
fields = sorted(fields, key=lambda f: f.fieldPath)
|
|
224
224
|
|
|
225
225
|
if self.source_config.add_partition_columns_to_schema:
|
|
226
|
-
|
|
226
|
+
add_partition_columns_to_schema(
|
|
227
227
|
fields=fields, path_spec=path_spec, full_path=table_data.full_path
|
|
228
228
|
)
|
|
229
229
|
|
|
230
230
|
return fields
|
|
231
231
|
|
|
232
|
-
def add_partition_columns_to_schema(
|
|
233
|
-
self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
|
|
234
|
-
) -> None:
|
|
235
|
-
vars = path_spec.get_named_vars(full_path)
|
|
236
|
-
if vars is not None and "partition" in vars:
|
|
237
|
-
for partition in vars["partition"].values():
|
|
238
|
-
partition_arr = partition.split("=")
|
|
239
|
-
if len(partition_arr) != 2:
|
|
240
|
-
logger.debug(
|
|
241
|
-
f"Could not derive partition key from partition field {partition}"
|
|
242
|
-
)
|
|
243
|
-
continue
|
|
244
|
-
partition_key = partition_arr[0]
|
|
245
|
-
fields.append(
|
|
246
|
-
SchemaField(
|
|
247
|
-
fieldPath=f"{partition_key}",
|
|
248
|
-
nativeDataType="string",
|
|
249
|
-
type=SchemaFieldDataType(StringTypeClass()),
|
|
250
|
-
isPartitioningKey=True,
|
|
251
|
-
nullable=True,
|
|
252
|
-
recursive=False,
|
|
253
|
-
)
|
|
254
|
-
)
|
|
255
|
-
|
|
256
232
|
def _create_table_operation_aspect(self, table_data: TableData) -> OperationClass:
|
|
257
233
|
reported_time = int(time.time() * 1000)
|
|
258
234
|
|
|
@@ -25,10 +25,16 @@ from datahub.ingestion.source.data_lake_common.object_store import (
|
|
|
25
25
|
get_object_store_bucket_name,
|
|
26
26
|
get_object_store_for_uri,
|
|
27
27
|
)
|
|
28
|
+
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
|
28
29
|
from datahub.ingestion.source.gcs.gcs_utils import (
|
|
29
30
|
get_gcs_prefix,
|
|
30
31
|
is_gcs_uri,
|
|
31
32
|
)
|
|
33
|
+
from datahub.metadata.schema_classes import (
|
|
34
|
+
SchemaFieldClass,
|
|
35
|
+
SchemaFieldDataTypeClass,
|
|
36
|
+
StringTypeClass,
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
# hide annoying debug errors from py4j
|
|
34
40
|
logging.getLogger("py4j").setLevel(logging.ERROR)
|
|
@@ -39,6 +45,37 @@ PLATFORM_GCS = "gcs"
|
|
|
39
45
|
PLATFORM_ABS = "abs"
|
|
40
46
|
|
|
41
47
|
|
|
48
|
+
def add_partition_columns_to_schema(
|
|
49
|
+
path_spec: PathSpec, full_path: str, fields: List[SchemaFieldClass]
|
|
50
|
+
) -> None:
|
|
51
|
+
# Check if using fieldPath v2 format
|
|
52
|
+
is_fieldpath_v2 = any(
|
|
53
|
+
field.fieldPath.startswith("[version=2.0]") for field in fields
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Extract partition information from path
|
|
57
|
+
partition_keys = path_spec.get_partition_from_path(full_path)
|
|
58
|
+
if not partition_keys:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
# Add partition fields to schema
|
|
62
|
+
for partition_key in partition_keys:
|
|
63
|
+
fields.append(
|
|
64
|
+
SchemaFieldClass(
|
|
65
|
+
fieldPath=(
|
|
66
|
+
f"{partition_key[0]}"
|
|
67
|
+
if not is_fieldpath_v2
|
|
68
|
+
else f"[version=2.0].[type=string].{partition_key[0]}"
|
|
69
|
+
),
|
|
70
|
+
nativeDataType="string",
|
|
71
|
+
type=SchemaFieldDataTypeClass(StringTypeClass()),
|
|
72
|
+
isPartitioningKey=True,
|
|
73
|
+
nullable=False,
|
|
74
|
+
recursive=False,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
42
79
|
class ContainerWUCreator:
|
|
43
80
|
processed_containers: List[str]
|
|
44
81
|
|
|
@@ -145,6 +145,9 @@ class DBTSourceReport(StaleEntityRemovalSourceReport):
|
|
|
145
145
|
|
|
146
146
|
nodes_filtered: LossyList[str] = field(default_factory=LossyList)
|
|
147
147
|
|
|
148
|
+
duplicate_sources_dropped: Optional[int] = None
|
|
149
|
+
duplicate_sources_references_updated: Optional[int] = None
|
|
150
|
+
|
|
148
151
|
|
|
149
152
|
class EmitDirective(ConfigEnum):
|
|
150
153
|
"""A holder for directives for emission for specific types of entities"""
|
|
@@ -370,6 +373,12 @@ class DBTCommonConfig(
|
|
|
370
373
|
"Set to False to skip it for engines like AWS Athena where it's not required.",
|
|
371
374
|
)
|
|
372
375
|
|
|
376
|
+
drop_duplicate_sources: bool = Field(
|
|
377
|
+
default=True,
|
|
378
|
+
description="When enabled, drops sources that have the same name in the target platform as a model. "
|
|
379
|
+
"This ensures that lineage is generated reliably, but will lose any documentation associated only with the source.",
|
|
380
|
+
)
|
|
381
|
+
|
|
373
382
|
@validator("target_platform")
|
|
374
383
|
def validate_target_platform_value(cls, target_platform: str) -> str:
|
|
375
384
|
if target_platform.lower() == DBT_PLATFORM:
|
|
@@ -509,7 +518,7 @@ class DBTNode:
|
|
|
509
518
|
raw_code: Optional[str]
|
|
510
519
|
|
|
511
520
|
dbt_adapter: str
|
|
512
|
-
dbt_name: str
|
|
521
|
+
dbt_name: str # dbt unique identifier
|
|
513
522
|
dbt_file_path: Optional[str]
|
|
514
523
|
dbt_package_name: Optional[str] # this is pretty much always present
|
|
515
524
|
|
|
@@ -975,6 +984,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
975
984
|
self._infer_schemas_and_update_cll(all_nodes_map)
|
|
976
985
|
|
|
977
986
|
nodes = self._filter_nodes(all_nodes)
|
|
987
|
+
nodes = self._drop_duplicate_sources(nodes)
|
|
988
|
+
|
|
978
989
|
non_test_nodes = [
|
|
979
990
|
dataset_node for dataset_node in nodes if dataset_node.node_type != "test"
|
|
980
991
|
]
|
|
@@ -1000,7 +1011,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1000
1011
|
return self.config.node_name_pattern.allowed(key)
|
|
1001
1012
|
|
|
1002
1013
|
def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]:
|
|
1003
|
-
nodes = []
|
|
1014
|
+
nodes: List[DBTNode] = []
|
|
1004
1015
|
for node in all_nodes:
|
|
1005
1016
|
key = node.dbt_name
|
|
1006
1017
|
|
|
@@ -1012,6 +1023,62 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1012
1023
|
|
|
1013
1024
|
return nodes
|
|
1014
1025
|
|
|
1026
|
+
def _drop_duplicate_sources(self, original_nodes: List[DBTNode]) -> List[DBTNode]:
|
|
1027
|
+
"""Detect and correct cases where a model and source have the same name.
|
|
1028
|
+
|
|
1029
|
+
In these cases, we don't want to generate both because they'll have the same
|
|
1030
|
+
urn and hence overwrite each other. Instead, we drop the source and update
|
|
1031
|
+
references to it to point at the model.
|
|
1032
|
+
|
|
1033
|
+
The risk here is that the source might have documentation that'd be lost,
|
|
1034
|
+
which is why we maintain optionality with a config flag.
|
|
1035
|
+
"""
|
|
1036
|
+
if not self.config.drop_duplicate_sources:
|
|
1037
|
+
return original_nodes
|
|
1038
|
+
|
|
1039
|
+
self.report.duplicate_sources_dropped = 0
|
|
1040
|
+
self.report.duplicate_sources_references_updated = 0
|
|
1041
|
+
|
|
1042
|
+
# Pass 1 - find all model names in the warehouse.
|
|
1043
|
+
warehouse_model_names: Dict[str, str] = {} # warehouse name -> model unique id
|
|
1044
|
+
for node in original_nodes:
|
|
1045
|
+
if node.node_type == "model" and node.exists_in_target_platform:
|
|
1046
|
+
warehouse_model_names[node.get_db_fqn()] = node.dbt_name
|
|
1047
|
+
|
|
1048
|
+
# Pass 2 - identify + drop duplicate sources.
|
|
1049
|
+
source_references_to_update: Dict[
|
|
1050
|
+
str, str
|
|
1051
|
+
] = {} # source unique id -> model unique id
|
|
1052
|
+
nodes: List[DBTNode] = []
|
|
1053
|
+
for node in original_nodes:
|
|
1054
|
+
if (
|
|
1055
|
+
node.node_type == "source"
|
|
1056
|
+
and node.exists_in_target_platform
|
|
1057
|
+
and (model_name := warehouse_model_names.get(node.get_db_fqn()))
|
|
1058
|
+
):
|
|
1059
|
+
self.report.warning(
|
|
1060
|
+
title="Duplicate model and source names detected",
|
|
1061
|
+
message="We found a dbt model and dbt source with the same name. To ensure reliable lineage generation, the source node was ignored. "
|
|
1062
|
+
"If you associated documentation/tags/other metadata with the source, it will be lost. "
|
|
1063
|
+
"To avoid this, you should remove the source node from your dbt project and replace any `source(<source_name>)` calls with `ref(<model_name>)`.",
|
|
1064
|
+
context=f"{node.dbt_name} (called {node.get_db_fqn()} in {self.config.target_platform}) duplicates {model_name}",
|
|
1065
|
+
)
|
|
1066
|
+
self.report.duplicate_sources_dropped += 1
|
|
1067
|
+
source_references_to_update[node.dbt_name] = model_name
|
|
1068
|
+
else:
|
|
1069
|
+
nodes.append(node)
|
|
1070
|
+
|
|
1071
|
+
# Pass 3 - update references to the dropped sources.
|
|
1072
|
+
for node in nodes:
|
|
1073
|
+
for i, current_upstream in enumerate(node.upstream_nodes):
|
|
1074
|
+
if current_upstream in source_references_to_update:
|
|
1075
|
+
node.upstream_nodes[i] = source_references_to_update[
|
|
1076
|
+
current_upstream
|
|
1077
|
+
]
|
|
1078
|
+
self.report.duplicate_sources_references_updated += 1
|
|
1079
|
+
|
|
1080
|
+
return nodes
|
|
1081
|
+
|
|
1015
1082
|
@staticmethod
|
|
1016
1083
|
def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo:
|
|
1017
1084
|
return {column.fieldPath: column.nativeDataType for column in schema_fields}
|
|
@@ -242,13 +242,19 @@ class LookerViewId:
|
|
|
242
242
|
|
|
243
243
|
dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
|
|
244
244
|
|
|
245
|
-
|
|
245
|
+
generated_urn = builder.make_dataset_urn_with_platform_instance(
|
|
246
246
|
platform=config.platform_name,
|
|
247
247
|
name=dataset_name,
|
|
248
248
|
platform_instance=config.platform_instance,
|
|
249
249
|
env=config.env,
|
|
250
250
|
)
|
|
251
251
|
|
|
252
|
+
logger.debug(
|
|
253
|
+
f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return generated_urn
|
|
257
|
+
|
|
252
258
|
def get_browse_path(self, config: LookerCommonConfig) -> str:
|
|
253
259
|
browse_path = config.view_browse_pattern.replace_variables(
|
|
254
260
|
self.get_mapping(config)
|
|
@@ -452,15 +458,36 @@ class ExploreUpstreamViewField:
|
|
|
452
458
|
)
|
|
453
459
|
|
|
454
460
|
|
|
455
|
-
def create_view_project_map(
|
|
461
|
+
def create_view_project_map(
|
|
462
|
+
view_fields: List[ViewField],
|
|
463
|
+
explore_primary_view: Optional[str] = None,
|
|
464
|
+
explore_project_name: Optional[str] = None,
|
|
465
|
+
) -> Dict[str, str]:
|
|
456
466
|
"""
|
|
457
467
|
Each view in a model has unique name.
|
|
458
468
|
Use this function in scope of a model.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
view_fields: List of ViewField objects
|
|
472
|
+
explore_primary_view: The primary view name of the explore (explore.view_name)
|
|
473
|
+
explore_project_name: The project name of the explore (explore.project_name)
|
|
459
474
|
"""
|
|
460
475
|
view_project_map: Dict[str, str] = {}
|
|
461
476
|
for view_field in view_fields:
|
|
462
477
|
if view_field.view_name is not None and view_field.project_name is not None:
|
|
463
|
-
|
|
478
|
+
# Override field-level project assignment for the primary view when different
|
|
479
|
+
if (
|
|
480
|
+
view_field.view_name == explore_primary_view
|
|
481
|
+
and explore_project_name is not None
|
|
482
|
+
and explore_project_name != view_field.project_name
|
|
483
|
+
):
|
|
484
|
+
logger.debug(
|
|
485
|
+
f"Overriding project assignment for primary view '{view_field.view_name}': "
|
|
486
|
+
f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
|
|
487
|
+
)
|
|
488
|
+
view_project_map[view_field.view_name] = explore_project_name
|
|
489
|
+
else:
|
|
490
|
+
view_project_map[view_field.view_name] = view_field.project_name
|
|
464
491
|
|
|
465
492
|
return view_project_map
|
|
466
493
|
|
|
@@ -953,6 +980,9 @@ class LookerExplore:
|
|
|
953
980
|
f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
|
|
954
981
|
)
|
|
955
982
|
else:
|
|
983
|
+
logger.debug(
|
|
984
|
+
f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
|
|
985
|
+
)
|
|
956
986
|
upstream_views.append(
|
|
957
987
|
ProjectInclude(project=info[0].project, include=view_name)
|
|
958
988
|
)
|
|
@@ -981,6 +1011,7 @@ class LookerExplore:
|
|
|
981
1011
|
) -> Optional["LookerExplore"]:
|
|
982
1012
|
try:
|
|
983
1013
|
explore = client.lookml_model_explore(model, explore_name)
|
|
1014
|
+
|
|
984
1015
|
views: Set[str] = set()
|
|
985
1016
|
lkml_fields: List[LookmlModelExploreField] = (
|
|
986
1017
|
explore_field_set_to_lkml_fields(explore)
|
|
@@ -1117,7 +1148,11 @@ class LookerExplore:
|
|
|
1117
1148
|
)
|
|
1118
1149
|
)
|
|
1119
1150
|
|
|
1120
|
-
view_project_map: Dict[str, str] = create_view_project_map(
|
|
1151
|
+
view_project_map: Dict[str, str] = create_view_project_map(
|
|
1152
|
+
view_fields,
|
|
1153
|
+
explore_primary_view=explore.view_name,
|
|
1154
|
+
explore_project_name=explore.project_name,
|
|
1155
|
+
)
|
|
1121
1156
|
if view_project_map:
|
|
1122
1157
|
logger.debug(f"views and their projects: {view_project_map}")
|
|
1123
1158
|
|
|
@@ -1289,6 +1324,7 @@ class LookerExplore:
|
|
|
1289
1324
|
if self.upstream_views_file_path[view_ref.include] is not None
|
|
1290
1325
|
else ViewFieldValue.NOT_AVAILABLE.value
|
|
1291
1326
|
)
|
|
1327
|
+
|
|
1292
1328
|
view_urn = LookerViewId(
|
|
1293
1329
|
project_name=(
|
|
1294
1330
|
view_ref.project
|
|
@@ -41,7 +41,10 @@ from datahub.ingestion.source.aws.s3_util import (
|
|
|
41
41
|
get_key_prefix,
|
|
42
42
|
strip_s3_prefix,
|
|
43
43
|
)
|
|
44
|
-
from datahub.ingestion.source.data_lake_common.data_lake_utils import
|
|
44
|
+
from datahub.ingestion.source.data_lake_common.data_lake_utils import (
|
|
45
|
+
ContainerWUCreator,
|
|
46
|
+
add_partition_columns_to_schema,
|
|
47
|
+
)
|
|
45
48
|
from datahub.ingestion.source.data_lake_common.object_store import (
|
|
46
49
|
create_object_store_adapter,
|
|
47
50
|
)
|
|
@@ -58,9 +61,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
58
61
|
)
|
|
59
62
|
from datahub.metadata.com.linkedin.pegasus2avro.common import TimeStamp
|
|
60
63
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
61
|
-
SchemaField,
|
|
62
64
|
SchemaMetadata,
|
|
63
|
-
StringTypeClass,
|
|
64
65
|
)
|
|
65
66
|
from datahub.metadata.schema_classes import (
|
|
66
67
|
DataPlatformInstanceClass,
|
|
@@ -70,7 +71,6 @@ from datahub.metadata.schema_classes import (
|
|
|
70
71
|
OtherSchemaClass,
|
|
71
72
|
PartitionsSummaryClass,
|
|
72
73
|
PartitionSummaryClass,
|
|
73
|
-
SchemaFieldDataTypeClass,
|
|
74
74
|
_Aspect,
|
|
75
75
|
)
|
|
76
76
|
from datahub.telemetry import stats, telemetry
|
|
@@ -474,7 +474,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
474
474
|
fields = sorted(fields, key=lambda f: f.fieldPath)
|
|
475
475
|
|
|
476
476
|
if self.source_config.add_partition_columns_to_schema and table_data.partitions:
|
|
477
|
-
|
|
477
|
+
add_partition_columns_to_schema(
|
|
478
478
|
fields=fields, path_spec=path_spec, full_path=table_data.full_path
|
|
479
479
|
)
|
|
480
480
|
|
|
@@ -510,34 +510,6 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
510
510
|
else:
|
|
511
511
|
return None
|
|
512
512
|
|
|
513
|
-
def add_partition_columns_to_schema(
|
|
514
|
-
self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
|
|
515
|
-
) -> None:
|
|
516
|
-
is_fieldpath_v2 = False
|
|
517
|
-
for field in fields:
|
|
518
|
-
if field.fieldPath.startswith("[version=2.0]"):
|
|
519
|
-
is_fieldpath_v2 = True
|
|
520
|
-
break
|
|
521
|
-
partition_keys = path_spec.get_partition_from_path(full_path)
|
|
522
|
-
if not partition_keys:
|
|
523
|
-
return None
|
|
524
|
-
|
|
525
|
-
for partition_key in partition_keys:
|
|
526
|
-
fields.append(
|
|
527
|
-
SchemaField(
|
|
528
|
-
fieldPath=(
|
|
529
|
-
f"{partition_key[0]}"
|
|
530
|
-
if not is_fieldpath_v2
|
|
531
|
-
else f"[version=2.0].[type=string].{partition_key[0]}"
|
|
532
|
-
),
|
|
533
|
-
nativeDataType="string",
|
|
534
|
-
type=SchemaFieldDataTypeClass(StringTypeClass()),
|
|
535
|
-
isPartitioningKey=True,
|
|
536
|
-
nullable=True,
|
|
537
|
-
recursive=False,
|
|
538
|
-
)
|
|
539
|
-
)
|
|
540
|
-
|
|
541
513
|
def get_table_profile(
|
|
542
514
|
self, table_data: TableData, dataset_urn: str
|
|
543
515
|
) -> Iterable[MetadataWorkUnit]:
|