acryl-datahub 1.0.0.1rc4__py3-none-any.whl → 1.0.0.1rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (29) hide show
  1. {acryl_datahub-1.0.0.1rc4.dist-info → acryl_datahub-1.0.0.1rc6.dist-info}/METADATA +2563 -2563
  2. {acryl_datahub-1.0.0.1rc4.dist-info → acryl_datahub-1.0.0.1rc6.dist-info}/RECORD +29 -29
  3. datahub/_version.py +1 -1
  4. datahub/cli/specific/dataset_cli.py +26 -10
  5. datahub/emitter/mce_builder.py +1 -3
  6. datahub/emitter/mcp_builder.py +8 -0
  7. datahub/emitter/response_helper.py +25 -18
  8. datahub/emitter/rest_emitter.py +21 -5
  9. datahub/errors.py +4 -0
  10. datahub/ingestion/graph/client.py +2 -2
  11. datahub/ingestion/sink/datahub_rest.py +2 -2
  12. datahub/ingestion/source/common/subtypes.py +1 -0
  13. datahub/ingestion/source/iceberg/iceberg.py +159 -102
  14. datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
  15. datahub/ingestion/source/powerbi/config.py +31 -4
  16. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  17. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
  18. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  19. datahub/ingestion/source/powerbi/powerbi.py +12 -1
  20. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -11
  21. datahub/ingestion/source/redshift/lineage_v2.py +2 -1
  22. datahub/ingestion/source/sigma/config.py +3 -4
  23. datahub/ingestion/source/sigma/sigma.py +10 -6
  24. datahub/ingestion/source/sql/oracle.py +51 -4
  25. datahub/ingestion/source/usage/usage_common.py +0 -65
  26. {acryl_datahub-1.0.0.1rc4.dist-info → acryl_datahub-1.0.0.1rc6.dist-info}/WHEEL +0 -0
  27. {acryl_datahub-1.0.0.1rc4.dist-info → acryl_datahub-1.0.0.1rc6.dist-info}/entry_points.txt +0 -0
  28. {acryl_datahub-1.0.0.1rc4.dist-info → acryl_datahub-1.0.0.1rc6.dist-info}/licenses/LICENSE +0 -0
  29. {acryl_datahub-1.0.0.1rc4.dist-info → acryl_datahub-1.0.0.1rc6.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
- acryl_datahub-1.0.0.1rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.1rc6.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=C5PxZfTY1_MHATsJ5uiJ0n1KBC0rumbfeq67GwRBzYQ,323
4
+ datahub/_version.py,sha256=7kPZA7tlu74CuyNJP7xcQUj0ju3f89-XcI4tNSrX7u0,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
- datahub/errors.py,sha256=w6h8b27j9XlmPbTwqpu7-wgiTrXlHzcnUOnJ_iOrwzo,520
6
+ datahub/errors.py,sha256=bwtiNzFdVFze0IVKDEXQutkwk5j7cZkfXCUYCZIDSYg,565
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
@@ -86,7 +86,7 @@ datahub/cli/specific/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
86
86
  datahub/cli/specific/assertions_cli.py,sha256=q0ODpyWS3LVR8UbF3NM6KRisabodJ0UUwyPty9a8AIk,5375
87
87
  datahub/cli/specific/datacontract_cli.py,sha256=IkBovwuPT5jNB8X-8AQJRO4C9cFSNm1at8v4YctLFgQ,2531
88
88
  datahub/cli/specific/dataproduct_cli.py,sha256=wSksU4xjGvAZJiiI7rSyjSItTu72oBPiXZ0-UL81zn0,15091
89
- datahub/cli/specific/dataset_cli.py,sha256=mR5YIDos4McjqlaqufNNVPx6YSUsbOm21eyNKqAFAJA,7839
89
+ datahub/cli/specific/dataset_cli.py,sha256=SYxhGLzv8ZClTiE3rZE99blxh15ZMyQCQqpJWx9SRKc,8570
90
90
  datahub/cli/specific/file_loader.py,sha256=YMyv_evdKyHSft5Tm_kOcqJ4ALpRmMm54ZJAyl7Nxqs,773
91
91
  datahub/cli/specific/forms_cli.py,sha256=OLVeG8NtK1eDBuUKCT5Ald35np8__f8mLzbZM_zUfWU,1484
92
92
  datahub/cli/specific/group_cli.py,sha256=xPUYk48VbVXLMj-z9VNW0RZzXOe4rQsc2jLwSOGCoec,1967
@@ -120,13 +120,13 @@ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP
120
120
  datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
- datahub/emitter/mce_builder.py,sha256=8UiG2VsYgC7n29h_y4qL6F9faGwwMZF3zGscl_CBT9s,16808
123
+ datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
124
124
  datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
125
- datahub/emitter/mcp_builder.py,sha256=Q1bX2BthNvZ7ae71XYF6ICoiN8IOqaAd_h3zOct57Q0,11752
125
+ datahub/emitter/mcp_builder.py,sha256=JyAC8obvkf6ZpINJ8I2p-Ofr52-tuoQBDbxp-bhjyrM,11871
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
127
127
  datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
128
- datahub/emitter/response_helper.py,sha256=lRMvzF-RPHNkN_ONl-N2uJjKh5XtRFrofrdGibVGn2U,4509
129
- datahub/emitter/rest_emitter.py,sha256=4l3_vnOmS8GKTj_HUejg5gJb28QCK0XH_nPVPE5AAp4,29841
128
+ datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
129
+ datahub/emitter/rest_emitter.py,sha256=PzZkt0JlCnexJgqzYQxSQgMcHj-qDg8lIsmhUn4GPfU,30631
130
130
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
131
131
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
132
132
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -171,7 +171,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
171
171
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
172
172
  datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
173
173
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
- datahub/ingestion/graph/client.py,sha256=rdX2DXqTXyLyS1_qiUzc3zzIE8CFheP2pYi1I68r6Dc,65567
174
+ datahub/ingestion/graph/client.py,sha256=791U-QMJXG3_RuNiQ4ennQ6NsOPQToSeKELHbncwzIQ,65573
175
175
  datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
176
176
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
177
177
  datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
@@ -190,7 +190,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
190
190
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
191
191
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
192
192
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
193
- datahub/ingestion/sink/datahub_rest.py,sha256=4hvMDUxHMJXGgk3Iy7fcYGKixjvVd9DHD03X-F3kOg0,12976
193
+ datahub/ingestion/sink/datahub_rest.py,sha256=0te9kxDXAJU8A7wfEhDb4R9VDBYYDpy-YMPJZWEEKCM,12982
194
194
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
195
195
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
196
196
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -272,7 +272,7 @@ datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
272
272
  datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
273
273
  datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
274
274
  datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNClLlmOfANS7U6rChhdthRX9s9iUv9k,2411
275
- datahub/ingestion/source/common/subtypes.py,sha256=nSGKiCD491Bz9MRWzJK3sb10z8vLWP2wwSuSq-L8ELk,3068
275
+ datahub/ingestion/source/common/subtypes.py,sha256=UZca0ZQUQdoXr5Z-3AIUT9gIlPt-XwbMNjj7WEEiR_4,3107
276
276
  datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
277
277
  datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
278
278
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
@@ -333,9 +333,9 @@ datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oF
333
333
  datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
334
334
  datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
335
335
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/iceberg/iceberg.py,sha256=eVeDXn0YZnnG4qkSbTCNi-Pcod9wEh-LnUkjPP66RtM,28753
336
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=i9o0ia2vQUGqoagN7GgsoaUlhjj9xKBNP-3ia2cMgHY,30762
337
337
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
338
- datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=CkBB5fryMVoqqCM6eLSIeb4yP85ABHONNRm0QqZKrnw,9977
338
+ datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
339
339
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
340
  datahub/ingestion/source/identity/azure_ad.py,sha256=9Hrvm4CSfc02yjnPUsCYSY4Qw9fXPnDFWLexab0mcpc,28559
341
341
  datahub/ingestion/source/identity/okta.py,sha256=jC21myJuMRTaPgj0OD9heaC-mz8ECjqpy2hSJwlUSwM,31943
@@ -375,22 +375,22 @@ datahub/ingestion/source/metadata/lineage.py,sha256=2iK-hsORWm7NSvMZcG4D5hb8_PH5
375
375
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
376
376
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=O3jjdnsx7IyYPBLbxowL85Qo4zs4H-maMOH4-6ZNCk4,13063
377
377
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
378
- datahub/ingestion/source/powerbi/config.py,sha256=1WFK-JxcgBEIZ2XTwuH1PvNXYcwqEJR-IYTUTv3Z4o8,22820
378
+ datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
379
379
  datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
380
380
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
381
- datahub/ingestion/source/powerbi/powerbi.py,sha256=VlXgaImfUYjdXkJ1cCrJawkzl-pE1R-XCVr27eVbZ-E,55512
381
+ datahub/ingestion/source/powerbi/powerbi.py,sha256=a5itVuGmg-0xAQK5a-cXB5UxpR3rLJx0o2x_lz-8ox8,55955
382
382
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
383
- datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
383
+ datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
384
384
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
385
385
  datahub/ingestion/source/powerbi/m_query/parser.py,sha256=5KqhUwj9H9yL9ZMPP9oSeVGiZjvXjw6Iu_HrGr95E5M,5876
386
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=CHGlpZ4ahNksZ6bUk3rrTF4__rLGhUgWZTa_Ivt_zaI,32565
387
- datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=t0n1dDYjlzElSJo5zteabdSHQuHlMug23f4RodUgmIk,16959
386
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=aOhAb8U4OEZnO4ufnb-Cm3KMpdy-JF6r9YMK3RNZs5A,35906
387
+ datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=ISH8Xjx51q2S81fn2v5RhCCU-kRAW3juxM0rMFs4TDo,17413
388
388
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=NIKNNHAE4kTJefTM1WR-StJi9NuingaRYn_mS_kV6A8,6180
389
389
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
390
390
  datahub/ingestion/source/powerbi/rest_api_wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
391
391
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py,sha256=4Kr7cLXpsWGtg-M18aXyhij9k9Ll5dGv3EaCS3d2DRk,8590
392
392
  datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py,sha256=FHBFSkf5tf8_o5Sjfuvo1pLVTlkSyxI5HpI8ZthPuhE,38569
393
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=NrhgwREmkWTvlhpEs7dAEEJfOxQRalA02ArKr2LLjeY,27666
393
+ datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py,sha256=wFAcldAYpDwW-9ll50LB7ac938-tn4AMnTJSNDoOO6Y,27651
394
394
  datahub/ingestion/source/powerbi/rest_api_wrapper/profiling_utils.py,sha256=bgcPheyqOj6KdRjDyANDK5yggItglcBIjbGFIwAxSds,1392
395
395
  datahub/ingestion/source/powerbi/rest_api_wrapper/query.py,sha256=VNw1Uvli6g0pnu9FpigYmnCdEPbVEipz7vdZU_WmHf4,616
396
396
  datahub/ingestion/source/powerbi_report_server/__init__.py,sha256=N9fGcrHXBbuPmx9rpGjd_jkMC3smXmfiwISDP1QZapk,324
@@ -410,7 +410,7 @@ datahub/ingestion/source/redshift/config.py,sha256=l_hlgsCjvlcgcFQpd5WMKlW8nqQUh
410
410
  datahub/ingestion/source/redshift/datashares.py,sha256=kH3YkoenOa59XZU12XeUf283lOOAITYD9jOXpy8R06E,9227
411
411
  datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
412
412
  datahub/ingestion/source/redshift/lineage.py,sha256=IPF8vHy2MFyhK-hu2-lxV2-kcnNAEzltPLnnIvwIBMY,44100
413
- datahub/ingestion/source/redshift/lineage_v2.py,sha256=vQ2LBa04hqYqIRK0CP3VDYRlvMLAqodzdieDl6LipiQ,17909
413
+ datahub/ingestion/source/redshift/lineage_v2.py,sha256=dbTvuaJBV5yvCWM_oEAqZIA1JOlGxLJOexbEB47A_xE,17962
414
414
  datahub/ingestion/source/redshift/profile.py,sha256=dq7m9YG3TvEMbplwVIutUpzbXLPH8KIj9SuWNo7PWWE,4323
415
415
  datahub/ingestion/source/redshift/query.py,sha256=vVIuNUaU4a7AfMFJZlgLuqi0cGVl0gVz8xZUSnPhWvs,47845
416
416
  datahub/ingestion/source/redshift/redshift.py,sha256=whMujnJxwNT2ZXnOVRrZQiy317hlsvbARzabKmI3oN8,43536
@@ -438,9 +438,9 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
438
438
  datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
439
439
  datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
440
440
  datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
441
- datahub/ingestion/source/sigma/config.py,sha256=zGh0ZU2Ty5NHfNXAVwFxVkK4NlsNSxtAyfCgMJJvzdc,3795
441
+ datahub/ingestion/source/sigma/config.py,sha256=yfdKQYvI5hKVl8gNAKIcJe-VW3klvdDqYbUP76gJQDI,3812
442
442
  datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
443
- datahub/ingestion/source/sigma/sigma.py,sha256=dgaIiiOGTu2trL_OVMOOEANA2UlB-M7LQ4TcDVBqiJA,24086
443
+ datahub/ingestion/source/sigma/sigma.py,sha256=ApVtToI1cnpscvdEMcC-3EuTYnczW9CH-s912z-pDpk,24262
444
444
  datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
445
445
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
446
446
  datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
@@ -475,7 +475,7 @@ datahub/ingestion/source/sql/hive.py,sha256=n0XCGkNkVAe-TEyXbxlefvohbmtALbWaC1a0
475
475
  datahub/ingestion/source/sql/hive_metastore.py,sha256=HW0zoHKarBYb8oVCy5fHvPOn-pTo25LctW_AusmH0hQ,36252
476
476
  datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
477
477
  datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
478
- datahub/ingestion/source/sql/oracle.py,sha256=it9qhUkGRHTq_F5DoEsCBLYnB02divzxDlBvXACH4Pk,27712
478
+ datahub/ingestion/source/sql/oracle.py,sha256=PhQZGiZbf_rRtNMlNV-MXSoN3geDBb9zXsGJRVvaKbo,29831
479
479
  datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
480
480
  datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
481
481
  datahub/ingestion/source/sql/sql_common.py,sha256=jsweel_-vesNtcPonnfS11OUrlcZnS3wGt5r0dYTPnM,48637
@@ -533,7 +533,7 @@ datahub/ingestion/source/unity/usage.py,sha256=0wETBAaZvHI_EGgBlxX3bKsVHEAdnUV8_
533
533
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
534
534
  datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e36wmYzs8xtpD632z6pLiw,9974
535
535
  datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
536
- datahub/ingestion/source/usage/usage_common.py,sha256=YGszLjmESiUXnpcPfnyQHtoM57HyWsIiLOQd5_sxECg,12238
536
+ datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
537
537
  datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
538
538
  datahub/ingestion/source/vertexai/vertexai.py,sha256=TXTa-Cm1C7xkbTNXNmKr_hi2FDJ9VG-ahrxk9yuxaTg,43635
539
539
  datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1043
1043
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1044
1044
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1045
1045
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1046
- acryl_datahub-1.0.0.1rc4.dist-info/METADATA,sha256=0QZSNfWv2u7u7GcupcTXvYmmBOqeB7vfGNKHUyQEoNs,176849
1047
- acryl_datahub-1.0.0.1rc4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
- acryl_datahub-1.0.0.1rc4.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
- acryl_datahub-1.0.0.1rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
- acryl_datahub-1.0.0.1rc4.dist-info/RECORD,,
1046
+ acryl_datahub-1.0.0.1rc6.dist-info/METADATA,sha256=8vR0C58PC28apLdQmFrOElrjYK-ArD0warcqjSc-fD8,176849
1047
+ acryl_datahub-1.0.0.1rc6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
+ acryl_datahub-1.0.0.1rc6.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
+ acryl_datahub-1.0.0.1rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
+ acryl_datahub-1.0.0.1rc6.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.1rc4"
3
+ __version__ = "1.0.0.1rc6"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -29,13 +29,16 @@ def dataset() -> None:
29
29
  name="upsert",
30
30
  )
31
31
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
32
+ @click.option(
33
+ "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
34
+ )
32
35
  @upgrade.check_upgrade
33
36
  @telemetry.with_telemetry()
34
- def upsert(file: Path) -> None:
37
+ def upsert(file: Path, dry_run: bool) -> None:
35
38
  """Upsert attributes to a Dataset in DataHub."""
36
39
  # Call the sync command with to_datahub=True to perform the upsert operation
37
40
  ctx = click.get_current_context()
38
- ctx.invoke(sync, file=str(file), to_datahub=True)
41
+ ctx.invoke(sync, file=str(file), dry_run=dry_run, to_datahub=True)
39
42
 
40
43
 
41
44
  @dataset.command(
@@ -167,11 +170,16 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
167
170
  )
168
171
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
169
172
  @click.option("--to-datahub/--from-datahub", required=True, is_flag=True)
173
+ @click.option(
174
+ "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
175
+ )
170
176
  @upgrade.check_upgrade
171
177
  @telemetry.with_telemetry()
172
- def sync(file: str, to_datahub: bool) -> None:
178
+ def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
173
179
  """Sync a Dataset file to/from DataHub"""
174
180
 
181
+ dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
182
+
175
183
  failures: List[str] = []
176
184
  with get_default_graph() as graph:
177
185
  datasets = Dataset.from_yaml(file)
@@ -189,7 +197,7 @@ def sync(file: str, to_datahub: bool) -> None:
189
197
  click.secho(
190
198
  "\n\t- ".join(
191
199
  [
192
- f"Skipping Dataset {dataset.urn} due to missing entity references: "
200
+ f"{dry_run_prefix}Skipping Dataset {dataset.urn} due to missing entity references: "
193
201
  ]
194
202
  + missing_entity_references
195
203
  ),
@@ -199,13 +207,18 @@ def sync(file: str, to_datahub: bool) -> None:
199
207
  continue
200
208
  try:
201
209
  for mcp in dataset.generate_mcp():
202
- graph.emit(mcp)
203
- click.secho(f"Update succeeded for urn {dataset.urn}.", fg="green")
210
+ if not dry_run:
211
+ graph.emit(mcp)
212
+ click.secho(
213
+ f"{dry_run_prefix}Update succeeded for urn {dataset.urn}.",
214
+ fg="green",
215
+ )
204
216
  except Exception as e:
205
217
  click.secho(
206
- f"Update failed for id {id}. due to {e}",
218
+ f"{dry_run_prefix}Update failed for id {id}. due to {e}",
207
219
  fg="red",
208
220
  )
221
+ failures.append(dataset.urn)
209
222
  else:
210
223
  # Sync from DataHub
211
224
  if graph.exists(dataset.urn):
@@ -215,13 +228,16 @@ def sync(file: str, to_datahub: bool) -> None:
215
228
  existing_dataset: Dataset = Dataset.from_datahub(
216
229
  graph=graph, urn=dataset.urn, config=dataset_get_config
217
230
  )
218
- existing_dataset.to_yaml(Path(file))
231
+ if not dry_run:
232
+ existing_dataset.to_yaml(Path(file))
233
+ else:
234
+ click.secho(f"{dry_run_prefix}Will update file {file}")
219
235
  else:
220
- click.secho(f"Dataset {dataset.urn} does not exist")
236
+ click.secho(f"{dry_run_prefix}Dataset {dataset.urn} does not exist")
221
237
  failures.append(dataset.urn)
222
238
  if failures:
223
239
  click.secho(
224
- f"\nFailed to sync the following Datasets: {', '.join(failures)}",
240
+ f"\n{dry_run_prefix}Failed to sync the following Datasets: {', '.join(failures)}",
225
241
  fg="red",
226
242
  )
227
243
  raise click.Abort()
@@ -125,9 +125,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
125
125
 
126
126
 
127
127
  def make_data_platform_urn(platform: str) -> str:
128
- if platform.startswith("urn:li:dataPlatform:"):
129
- return platform
130
- return DataPlatformUrn.create_from_id(platform).urn()
128
+ return DataPlatformUrn(platform).urn()
131
129
 
132
130
 
133
131
  def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
@@ -117,6 +117,14 @@ class ContainerKey(DatahubKey):
117
117
  PlatformKey = ContainerKey
118
118
 
119
119
 
120
+ class NamespaceKey(ContainerKey):
121
+ """
122
+ For Iceberg namespaces (databases/schemas)
123
+ """
124
+
125
+ namespace: str
126
+
127
+
120
128
  class DatabaseKey(ContainerKey):
121
129
  database: str
122
130
 
@@ -1,17 +1,21 @@
1
1
  import json
2
2
  import logging
3
+ import warnings
3
4
  from dataclasses import dataclass
4
5
  from typing import Dict, List, Optional, Sequence, Union
5
6
 
6
7
  from requests import Response
7
8
 
8
9
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
10
+ from datahub.errors import APITracingWarning
9
11
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
10
12
  MetadataChangeProposal,
11
13
  )
12
14
 
13
15
  logger = logging.getLogger(__name__)
14
16
 
17
+ _TRACE_HEADER_NAME = "traceparent"
18
+
15
19
 
16
20
  @dataclass
17
21
  class TraceData:
@@ -25,14 +29,11 @@ class TraceData:
25
29
  raise TypeError("data must be a dictionary")
26
30
 
27
31
 
28
- def _extract_trace_id(
29
- response: Response, trace_header: str = "traceparent"
30
- ) -> Optional[str]:
32
+ def _extract_trace_id(response: Response) -> Optional[str]:
31
33
  """
32
34
  Extract trace ID from response headers.
33
35
  Args:
34
36
  response: HTTP response object
35
- trace_header: Name of the trace header to use
36
37
  Returns:
37
38
  Trace ID if found and response is valid, None otherwise
38
39
  """
@@ -40,9 +41,17 @@ def _extract_trace_id(
40
41
  logger.debug(f"Invalid status code: {response.status_code}")
41
42
  return None
42
43
 
43
- trace_id = response.headers.get(trace_header)
44
+ trace_id = response.headers.get(_TRACE_HEADER_NAME)
44
45
  if not trace_id:
45
- logger.debug(f"Missing trace header: {trace_header}")
46
+ # This will only be printed if
47
+ # 1. we're in async mode (checked by the caller)
48
+ # 2. the server did not return a trace ID
49
+ logger.debug(f"Missing trace header: {_TRACE_HEADER_NAME}")
50
+ warnings.warn(
51
+ "No trace ID found in response headers. API tracing is not active - likely due to an outdated server version.",
52
+ APITracingWarning,
53
+ stacklevel=3,
54
+ )
46
55
  return None
47
56
 
48
57
  return trace_id
@@ -51,20 +60,19 @@ def _extract_trace_id(
51
60
  def extract_trace_data(
52
61
  response: Response,
53
62
  aspects_to_trace: Optional[List[str]] = None,
54
- trace_header: str = "traceparent",
55
63
  ) -> Optional[TraceData]:
56
- """
57
- Extract trace data from a response object.
64
+ """Extract trace data from a response object.
65
+
66
+ If we run into a JSONDecodeError, we'll log an error and return None.
67
+
58
68
  Args:
59
69
  response: HTTP response object
60
70
  aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
61
- trace_header: Name of the trace header to use (default: "traceparent")
71
+
62
72
  Returns:
63
73
  TraceData object if successful, None otherwise
64
- Raises:
65
- JSONDecodeError: If response body cannot be decoded as JSON
66
74
  """
67
- trace_id = _extract_trace_id(response, trace_header)
75
+ trace_id = _extract_trace_id(response)
68
76
  if not trace_id:
69
77
  return None
70
78
 
@@ -104,19 +112,18 @@ def extract_trace_data_from_mcps(
104
112
  response: Response,
105
113
  mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
106
114
  aspects_to_trace: Optional[List[str]] = None,
107
- trace_header: str = "traceparent",
108
115
  ) -> Optional[TraceData]:
109
- """
110
- Extract trace data from a response object and populate data from provided MCPs.
116
+ """Extract trace data from a response object and populate data from provided MCPs.
117
+
111
118
  Args:
112
119
  response: HTTP response object used only for trace_id extraction
113
120
  mcps: List of MCP URN and aspect data
114
121
  aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
115
- trace_header: Name of the trace header to use (default: "traceparent")
122
+
116
123
  Returns:
117
124
  TraceData object if successful, None otherwise
118
125
  """
119
- trace_id = _extract_trace_id(response, trace_header)
126
+ trace_id = _extract_trace_id(response)
120
127
  if not trace_id:
121
128
  return None
122
129
 
@@ -5,6 +5,7 @@ import json
5
5
  import logging
6
6
  import os
7
7
  import time
8
+ import warnings
8
9
  from collections import defaultdict
9
10
  from dataclasses import dataclass
10
11
  from datetime import datetime, timedelta
@@ -40,7 +41,7 @@ from datahub.configuration.common import (
40
41
  TraceTimeoutError,
41
42
  TraceValidationError,
42
43
  )
43
- from datahub.emitter.aspect import JSON_CONTENT_TYPE
44
+ from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
44
45
  from datahub.emitter.generic_emitter import Emitter
45
46
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
46
47
  from datahub.emitter.request_helper import make_curl_command
@@ -50,6 +51,7 @@ from datahub.emitter.response_helper import (
50
51
  extract_trace_data_from_mcps,
51
52
  )
52
53
  from datahub.emitter.serialization_helper import pre_json_transform
54
+ from datahub.errors import APITracingWarning
53
55
  from datahub.ingestion.api.closeable import Closeable
54
56
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
55
57
  MetadataChangeEvent,
@@ -107,9 +109,9 @@ class RestSinkEndpoint(ConfigEnum):
107
109
  OPENAPI = auto()
108
110
 
109
111
 
110
- DEFAULT_REST_SINK_ENDPOINT = pydantic.parse_obj_as(
112
+ DEFAULT_REST_EMITTER_ENDPOINT = pydantic.parse_obj_as(
111
113
  RestSinkEndpoint,
112
- os.getenv("DATAHUB_REST_SINK_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
114
+ os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT", RestSinkEndpoint.RESTLI),
113
115
  )
114
116
 
115
117
 
@@ -227,7 +229,9 @@ class DataHubRestEmitter(Closeable, Emitter):
227
229
  ca_certificate_path: Optional[str] = None,
228
230
  client_certificate_path: Optional[str] = None,
229
231
  disable_ssl_verification: bool = False,
230
- openapi_ingestion: bool = False,
232
+ openapi_ingestion: bool = (
233
+ DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI
234
+ ),
231
235
  default_trace_mode: bool = False,
232
236
  ):
233
237
  if not gms_server:
@@ -357,8 +361,14 @@ class DataHubRestEmitter(Closeable, Emitter):
357
361
  )["aspect"]["json"]
358
362
  else:
359
363
  obj = mcp.aspect.to_obj()
360
- if obj.get("value") and obj.get("contentType") == JSON_CONTENT_TYPE:
364
+ content_type = obj.get("contentType")
365
+ if obj.get("value") and content_type == JSON_CONTENT_TYPE:
366
+ # Undo double serialization.
361
367
  obj = json.loads(obj["value"])
368
+ elif content_type == JSON_PATCH_CONTENT_TYPE:
369
+ raise NotImplementedError(
370
+ "Patches are not supported for OpenAPI ingestion. Set the endpoint to RESTLI."
371
+ )
362
372
  aspect_value = pre_json_transform(obj)
363
373
  return (
364
374
  url,
@@ -749,6 +759,12 @@ class DataHubRestEmitter(Closeable, Emitter):
749
759
  trace_flag if trace_flag is not None else self._default_trace_mode
750
760
  )
751
761
  resolved_async_flag = async_flag if async_flag is not None else async_default
762
+ if resolved_trace_flag and not resolved_async_flag:
763
+ warnings.warn(
764
+ "API tracing is only available with async ingestion. For sync mode, API errors will be surfaced as exceptions.",
765
+ APITracingWarning,
766
+ stacklevel=3,
767
+ )
752
768
  return resolved_trace_flag and resolved_async_flag
753
769
 
754
770
  def __repr__(self) -> str:
datahub/errors.py CHANGED
@@ -33,3 +33,7 @@ class MultipleSubtypesWarning(Warning):
33
33
 
34
34
  class ExperimentalWarning(Warning):
35
35
  pass
36
+
37
+
38
+ class APITracingWarning(Warning):
39
+ pass
@@ -33,7 +33,7 @@ from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
33
33
  from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
34
34
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
35
35
  from datahub.emitter.rest_emitter import (
36
- DEFAULT_REST_SINK_ENDPOINT,
36
+ DEFAULT_REST_EMITTER_ENDPOINT,
37
37
  DEFAULT_REST_TRACE_MODE,
38
38
  DatahubRestEmitter,
39
39
  RestSinkEndpoint,
@@ -147,7 +147,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
147
147
  ca_certificate_path=self.config.ca_certificate_path,
148
148
  client_certificate_path=self.config.client_certificate_path,
149
149
  disable_ssl_verification=self.config.disable_ssl_verification,
150
- openapi_ingestion=DEFAULT_REST_SINK_ENDPOINT == RestSinkEndpoint.OPENAPI,
150
+ openapi_ingestion=DEFAULT_REST_EMITTER_ENDPOINT == RestSinkEndpoint.OPENAPI,
151
151
  default_trace_mode=DEFAULT_REST_TRACE_MODE == RestTraceMode.ENABLED,
152
152
  )
153
153
 
@@ -20,7 +20,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper
20
20
  from datahub.emitter.mcp_builder import mcps_from_mce
21
21
  from datahub.emitter.rest_emitter import (
22
22
  BATCH_INGEST_MAX_PAYLOAD_LENGTH,
23
- DEFAULT_REST_SINK_ENDPOINT,
23
+ DEFAULT_REST_EMITTER_ENDPOINT,
24
24
  DEFAULT_REST_TRACE_MODE,
25
25
  DataHubRestEmitter,
26
26
  RestSinkEndpoint,
@@ -70,7 +70,7 @@ _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
70
70
 
71
71
  class DatahubRestSinkConfig(DatahubClientConfig):
72
72
  mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
73
- endpoint: RestSinkEndpoint = DEFAULT_REST_SINK_ENDPOINT
73
+ endpoint: RestSinkEndpoint = DEFAULT_REST_EMITTER_ENDPOINT
74
74
  default_trace_mode: RestTraceMode = DEFAULT_REST_TRACE_MODE
75
75
 
76
76
  # These only apply in async modes.
@@ -45,6 +45,7 @@ class DatasetContainerSubTypes(StrEnum):
45
45
  GCS_BUCKET = "GCS bucket"
46
46
  ABS_CONTAINER = "ABS container"
47
47
  KEYSPACE = "Keyspace" # Cassandra
48
+ NAMESPACE = "Namespace" # Iceberg
48
49
 
49
50
 
50
51
  class BIContainerSubTypes(StrEnum):