acryl-datahub 0.15.0.1rc16__py3-none-any.whl → 0.15.0.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (25) hide show
  1. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2rc1.dist-info}/METADATA +2324 -2324
  2. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2rc1.dist-info}/RECORD +25 -25
  3. datahub/__init__.py +1 -1
  4. datahub/cli/cli_utils.py +12 -1
  5. datahub/emitter/rest_emitter.py +140 -92
  6. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +5 -3
  7. datahub/ingestion/api/source.py +4 -0
  8. datahub/ingestion/glossary/classifier.py +2 -3
  9. datahub/ingestion/graph/client.py +14 -11
  10. datahub/ingestion/graph/config.py +1 -1
  11. datahub/ingestion/source/aws/glue.py +52 -35
  12. datahub/ingestion/source/bigquery_v2/bigquery.py +2 -0
  13. datahub/ingestion/source/bigquery_v2/bigquery_config.py +8 -0
  14. datahub/ingestion/source/datahub/config.py +10 -0
  15. datahub/ingestion/source/datahub/datahub_database_reader.py +3 -17
  16. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  17. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +11 -7
  18. datahub/ingestion/source/snowflake/snowflake_config.py +8 -0
  19. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +4 -0
  20. datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
  21. datahub/ingestion/source/unity/source.py +0 -4
  22. datahub/sql_parsing/sql_parsing_aggregator.py +8 -5
  23. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2rc1.dist-info}/WHEEL +0 -0
  24. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2rc1.dist-info}/entry_points.txt +0 -0
  25. {acryl_datahub-0.15.0.1rc16.dist-info → acryl_datahub-0.15.0.2rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=0dgSJoggO_qJtX-oEnxH20rGzNGGCstuwsxqUKzbKUA,577
1
+ datahub/__init__.py,sha256=ubEB5EHYmuiGuDtVdhhKbNxGtfw-kzV0eBW81uVifQU,576
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -59,7 +59,7 @@ datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1
59
59
  datahub/api/graphql/operation.py,sha256=h7OXbVRrpJgoth1X4cgeIFhD5JY1MGKg2KjVlQK1gqE,5116
60
60
  datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
62
- datahub/cli/cli_utils.py,sha256=gFmcOGAT6IdrTwmpRFSwaqzGmoqS4dbWrxILB1uvlGk,13214
62
+ datahub/cli/cli_utils.py,sha256=d_Q9vPZTPxO7XyyghD-i1Nkr4DX0M8cs2IWrMUQAu0c,13539
63
63
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
64
64
  datahub/cli/delete_cli.py,sha256=VLeHi7MLFCtTk7MI4y8r_k_7aLcCUZIglU2MNLsXU6M,23051
65
65
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
@@ -119,7 +119,7 @@ datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
119
  datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
120
120
  datahub/emitter/mcp_patch_builder.py,sha256=oonC8iGOvDzqj890CxOjWlBdDEF1RnwvbSZy1sivlTY,4572
121
121
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
- datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
122
+ datahub/emitter/rest_emitter.py,sha256=O9IJ7r-AXL4Pi892pEFOygvUKTbD8V6ey8KObuqHqgk,17876
123
123
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
124
124
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
125
125
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -138,13 +138,13 @@ datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBm
138
138
  datahub/ingestion/api/report.py,sha256=zb5Y_9ogmWm00KqX7_64sIMT24Wfpk7txRwEfKacw5I,4652
139
139
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
140
140
  datahub/ingestion/api/sink.py,sha256=3jw7-x9gXGreOPwn49wG5fT3C8pYhaNMQITdMN6kbag,4478
141
- datahub/ingestion/api/source.py,sha256=pHfFIBZa57ySpZWnt03mmayWLdbbBAGOhWqWZnf1KUA,18815
141
+ datahub/ingestion/api/source.py,sha256=kSQ6AKDvLdFOIxaz9nPCmCSUsIMDdXHiOxzFiMdYN14,19001
142
142
  datahub/ingestion/api/source_helpers.py,sha256=AVO0ogiCKgYmX1ubJaSs6L30TCCgOIalp6awXPF5XM0,19643
143
143
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
144
144
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
145
145
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
146
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
147
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=9mzg3vhCFPI9zHSi_j3FvJtK8kUvb8PmRSz-TM3tt6k,4323
147
+ datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=5jrl7cEyonce-YdWe1Iw6y3Okw5smJosqwOm5e-nvqM,4363
148
148
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
149
  datahub/ingestion/extractor/extractor_registry.py,sha256=f7CLfW3pr29QZkXSHbp7HjUrsdw7ejQJmot-tiSPcqc,342
150
150
  datahub/ingestion/extractor/json_ref_patch.py,sha256=4g3ZWHn7rwS74jUvSXJiGpi-UKHhiSYKKgBeU4E5ukE,1448
@@ -160,12 +160,12 @@ datahub/ingestion/fs/local_fs.py,sha256=oWf-PZsl5sI-9eHWGeKlfKYagbQaSZ9fGfNbxcFj
160
160
  datahub/ingestion/fs/s3_fs.py,sha256=FM6UK9A48UdOjkAO-gh1rAa4N7FTXz0Wutmp8TeX7kY,3199
161
161
  datahub/ingestion/glossary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
162
  datahub/ingestion/glossary/classification_mixin.py,sha256=pkb0Rv2SQH7VwAV5DPLoJLJwkDwTjIhOhg4mbXiz9CI,13332
163
- datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGot6z9Cir5Vuc,2981
163
+ datahub/ingestion/glossary/classifier.py,sha256=zp8Fe3he80H5Zz1EwymKjThUPkTpw6PgEJQvlmqrJmQ,3006
164
164
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
165
165
  datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
166
166
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
- datahub/ingestion/graph/client.py,sha256=AYDFwP9a_M-fCZv-PcWMSr5tc53XWJl372SWKwdu37E,64651
168
- datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
167
+ datahub/ingestion/graph/client.py,sha256=R50K7NmE3TYgVXvdLnvLZn7N0fkiCXOK0MoJz9ueglA,64963
168
+ datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
169
169
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
170
170
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
171
171
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -217,7 +217,7 @@ datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm
217
217
  datahub/ingestion/source/abs/source.py,sha256=pzxW-R_cWGKPneEhX8JWdTZiX2k1kAZOPKgMxp9mAEI,24533
218
218
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
219
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
220
- datahub/ingestion/source/aws/glue.py,sha256=r7y1MPDK__BKX_mrJjVa_CEmSXM3Pa02gt19o0sSLE8,56815
220
+ datahub/ingestion/source/aws/glue.py,sha256=lJW3QHHz1_SWqLEB-vUSTxSuL0EgUQ0ptdQns_NLNds,57343
221
221
  datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Wyp9k9tapsCuw9dyH4FCXJr_wmeLaYFoCtKvrV6SEDk,3892
222
222
  datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
223
223
  datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
@@ -233,10 +233,10 @@ datahub/ingestion/source/azure/abs_folder_utils.py,sha256=7skXus-4fSIoKpqCeU-GG0
233
233
  datahub/ingestion/source/azure/abs_utils.py,sha256=KdAlCK-PMrn35kFHxz5vrsjajyx2PD5GRgoBKdoRvcg,2075
234
234
  datahub/ingestion/source/azure/azure_common.py,sha256=Zl0pPuE6L3QcM5B1P0LsPthZmD0h7fUUS0kg2okl6IY,4053
235
235
  datahub/ingestion/source/bigquery_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
236
- datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=YMsyj6s7fggzisWfDdbT4w1MKJ3eRdNERsCShnu0Zqo,13681
236
+ datahub/ingestion/source/bigquery_v2/bigquery.py,sha256=c7g8sWuDIMhCSAX0D76P2arxZgTmzd-e0qlO7yt_zJY,13841
237
237
  datahub/ingestion/source/bigquery_v2/bigquery_audit.py,sha256=IlbHA8a-gNJvnubgBfxVHpUk8rFNIG80gk5HWXa2lyE,25108
238
238
  datahub/ingestion/source/bigquery_v2/bigquery_audit_log_api.py,sha256=LuGJ6LgPViLIfDQfylxlQ3CA7fZYM5MDt8M-7sfzm84,5096
239
- datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=PqbYNqM4-KukCU1meuvsk0qbiWa7UFh5hqHrHsvOSWQ,25889
239
+ datahub/ingestion/source/bigquery_v2/bigquery_config.py,sha256=sjCW997-Su14cVgWd1ZVx1E67yqfTIV5Wjp9Me0hfOw,26289
240
240
  datahub/ingestion/source/bigquery_v2/bigquery_data_reader.py,sha256=DeT3v_Z82__8En0FcZ0kavBAWQoRvSZ5Rppm9eeDAb8,2393
241
241
  datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7cBso7ZzrWl_vO5PYSa6CpvqNx8,1554
242
242
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
@@ -265,11 +265,11 @@ datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmX
265
265
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
266
266
  datahub/ingestion/source/data_lake_common/path_spec.py,sha256=u3u2eMe70V5vur-j8mYtupZdoeA2hSeK262Whdsc2YU,23506
267
267
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
- datahub/ingestion/source/datahub/config.py,sha256=rqZFvEmjxjBcW2cTEPYDVTAk3OLzuGIjEFghXPNeZNY,3955
268
+ datahub/ingestion/source/datahub/config.py,sha256=xBAZJpcw25aMI2zHi2wXi21sAfdy1rlmbBq9tY3adV0,4304
269
269
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
270
- datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=F8JrOjSrmJ2B6m1MWh83A1EYFDcGMla749HUeQWMnL0,9464
270
+ datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=Rd61iHFhvrNmgzIk0jDDYxjxQUnEckbn1SKedoR5qic,8972
271
271
  datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=gnxhhlK-jrfnHqD_4eVmfcdtBNW6pi1N_qkDZ7uSb3o,4187
272
- datahub/ingestion/source/datahub/datahub_source.py,sha256=2jDnsHEzpGhr00qQI9unSUJYD6Cb1McYFKOVbA-Zcm4,8487
272
+ datahub/ingestion/source/datahub/datahub_source.py,sha256=5qGg_T0KJaO5WcvrsM0KM8_eTOjy0NvlMI4DUdIAiDo,8482
273
273
  datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
274
274
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
275
275
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -305,7 +305,7 @@ datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
305
305
  datahub/ingestion/source/gc/datahub_gc.py,sha256=W6uoeV7B4WIXdxT4tOEdDksdJm656WwwvkH79L7f_8Q,12969
306
306
  datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=86Tm3NNWMf0xM4TklNIEeNOjEingKpYy-XvCPeaAb4k,17125
307
307
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=9jsyCIspWSSYSAVPHjKHr05885rXxM6FCH7KzTBceic,10139
308
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=2JpESfsqoJRdLskV3AHYU8nRj_NvNtIaLZ4_RRNIod4,11229
308
+ datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=zRtgC_AcZui4qGf9jBASI3R-CrYZxNe3Pm-gNSLT3rw,11420
309
309
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
310
310
  datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
311
311
  datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
@@ -429,10 +429,10 @@ datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_G
429
429
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
430
430
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
431
431
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
432
- datahub/ingestion/source/snowflake/snowflake_config.py,sha256=jQGSa7ZQs3EsXB9ANShZ4xv9RqrhRfVHRSLeFiDwwxc,17974
432
+ datahub/ingestion/source/snowflake/snowflake_config.py,sha256=UehWUvqTXRsWmE5bBS53IoLjUL06-wJq6K4O2MTT2R8,18374
433
433
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
434
434
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
435
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=5Lpy_irZlbOFJbvVkgsZSBjdLCT3VZNjlEvttzSQAU4,21121
435
+ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
436
436
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
437
437
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=jTpnFWRqqFId6DKJvvAbNuFPxyNi1oQxxDUyMvh1iu4,26968
438
438
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=5po2FWz41UVowykJYbTFGxsltbmlHBCPcHG20VOhdOE,38469
@@ -444,7 +444,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
444
444
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
445
445
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=0rXgz8bvRiI9SYVMa0UGLeg_DcjqBy6kQsdq0Uq0HVk,24685
446
446
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=MoI8-DR9tuMuHMBQcpDo4GFjvcoQZWLNkdFZsTkgK-M,12786
447
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=hIWtzlxuSQ_3w48o4AF2l9CQOcWIe6AmD07I89sH2B0,31860
447
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=0doZaPPMO64Qi9uN4w8ZYe3gKkkieGJKI5xntF7vS6w,32020
448
448
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
449
449
  datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
450
450
  datahub/ingestion/source/sql/clickhouse.py,sha256=jzvaXP5Wr0SMhj2rtuvVE821xnfpKiXhO3cm0xblgHs,27299
@@ -506,7 +506,7 @@ datahub/ingestion/source/unity/proxy.py,sha256=_6kCI7M4-26pZ9ZMGJUh6LwYmbGAZlnvc
506
506
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
507
507
  datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
508
508
  datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
509
- datahub/ingestion/source/unity/source.py,sha256=ydjqJ91q-Mir_aJSmEdo1umgPvQ5la67rBhC04IyV78,41938
509
+ datahub/ingestion/source/unity/source.py,sha256=YdUPCMJtpmvYVnRNnpqb4BVowFobkLvSJ_K2gHwrvCI,41752
510
510
  datahub/ingestion/source/unity/usage.py,sha256=igRxYg8usukTAA229uJWi-0y-Zd0yOq9dEBi2k9f15o,11436
511
511
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
512
512
  datahub/ingestion/source/usage/clickhouse_usage.py,sha256=8nQqNAPKqivywjzsvqH0-HWFwjd4gECpw_xahLXk5ek,9970
@@ -881,7 +881,7 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
881
881
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
882
882
  datahub/sql_parsing/schema_resolver.py,sha256=9INZWdxA2dMSLK6RXaVqjbjyLY_VKMhCkQv_Xd6Ln3I,10848
883
883
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
884
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=jVF6TbyM71XdJ34K0Setz3LgJALvJrJs1mVKdxU_6d4,69830
884
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=ULvLZygN_LtZQg_DKLQ2lDzz3YsEhZBvZUx3wmYeP_Q,69976
885
885
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
886
886
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
887
887
  datahub/sql_parsing/sqlglot_lineage.py,sha256=gUVq3NwZUzQByJs43JZXz8lZf0ZVzVt0FzaW5wZOwK4,47460
@@ -986,8 +986,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
986
986
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
987
987
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
988
988
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
989
- acryl_datahub-0.15.0.1rc16.dist-info/METADATA,sha256=hMvfZy8EYOj5eb7yygEhb_kZJbHtpVx-bWNE6H6eu_c,173444
990
- acryl_datahub-0.15.0.1rc16.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
991
- acryl_datahub-0.15.0.1rc16.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
992
- acryl_datahub-0.15.0.1rc16.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
993
- acryl_datahub-0.15.0.1rc16.dist-info/RECORD,,
989
+ acryl_datahub-0.15.0.2rc1.dist-info/METADATA,sha256=cLoMIIavfob5z2bJbsj69AoG_J7tp489qvVvBf3W_Yo,173441
990
+ acryl_datahub-0.15.0.2rc1.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
991
+ acryl_datahub-0.15.0.2rc1.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
992
+ acryl_datahub-0.15.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
993
+ acryl_datahub-0.15.0.2rc1.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.1rc16"
6
+ __version__ = "0.15.0.2rc1"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
datahub/cli/cli_utils.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
3
  import time
4
4
  import typing
5
5
  from datetime import datetime
6
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
6
+ from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
7
 
8
8
  import click
9
9
  import requests
@@ -33,6 +33,15 @@ def first_non_null(ls: List[Optional[str]]) -> Optional[str]:
33
33
  return next((el for el in ls if el is not None and el.strip() != ""), None)
34
34
 
35
35
 
36
+ _T = TypeVar("_T")
37
+
38
+
39
+ def get_or_else(value: Optional[_T], default: _T) -> _T:
40
+ # Normally we'd use `value or default`. However, that runs into issues
41
+ # when value is falsey but not None.
42
+ return value if value is not None else default
43
+
44
+
36
45
  def parse_run_restli_response(response: requests.Response) -> dict:
37
46
  response_json = response.json()
38
47
  if response.status_code != 200:
@@ -321,6 +330,8 @@ def get_frontend_session_login_as(
321
330
  def _ensure_valid_gms_url_acryl_cloud(url: str) -> str:
322
331
  if "acryl.io" not in url:
323
332
  return url
333
+ if url.endswith(":8080"):
334
+ url = url.replace(":8080", "")
324
335
  if url.startswith("http://"):
325
336
  url = url.replace("http://", "https://")
326
337
  if url.endswith("acryl.io"):
@@ -1,9 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import functools
2
4
  import json
3
5
  import logging
4
6
  import os
5
7
  from json.decoder import JSONDecodeError
6
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Any,
11
+ Callable,
12
+ Dict,
13
+ List,
14
+ Optional,
15
+ Sequence,
16
+ Tuple,
17
+ Union,
18
+ )
7
19
 
8
20
  import requests
9
21
  from deprecated import deprecated
@@ -12,8 +24,13 @@ from requests.exceptions import HTTPError, RequestException
12
24
 
13
25
  from datahub import nice_version_name
14
26
  from datahub.cli import config_utils
15
- from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url
16
- from datahub.configuration.common import ConfigurationError, OperationalError
27
+ from datahub.cli.cli_utils import ensure_has_system_metadata, fixup_gms_url, get_or_else
28
+ from datahub.cli.env_utils import get_boolean_env_variable
29
+ from datahub.configuration.common import (
30
+ ConfigModel,
31
+ ConfigurationError,
32
+ OperationalError,
33
+ )
17
34
  from datahub.emitter.generic_emitter import Emitter
18
35
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
19
36
  from datahub.emitter.request_helper import make_curl_command
@@ -30,10 +47,8 @@ if TYPE_CHECKING:
30
47
 
31
48
  logger = logging.getLogger(__name__)
32
49
 
33
- _DEFAULT_CONNECT_TIMEOUT_SEC = 30 # 30 seconds should be plenty to connect
34
- _DEFAULT_READ_TIMEOUT_SEC = (
35
- 30 # Any ingest call taking longer than 30 seconds should be abandoned
36
- )
50
+ _DEFAULT_TIMEOUT_SEC = 30 # 30 seconds should be plenty to connect
51
+ _TIMEOUT_LOWER_BOUND_SEC = 1 # if below this, we log a warning
37
52
  _DEFAULT_RETRY_STATUS_CODES = [ # Additional status codes to retry on
38
53
  429,
39
54
  500,
@@ -46,6 +61,8 @@ _DEFAULT_RETRY_MAX_TIMES = int(
46
61
  os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4")
47
62
  )
48
63
 
64
+ _DATAHUB_EMITTER_TRACE = get_boolean_env_variable("DATAHUB_EMITTER_TRACE", False)
65
+
49
66
  # The limit is 16mb. We will use a max of 15mb to have some space
50
67
  # for overhead like request headers.
51
68
  # This applies to pretty much all calls to GMS.
@@ -60,15 +77,76 @@ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
60
77
  )
61
78
 
62
79
 
80
+ class RequestsSessionConfig(ConfigModel):
81
+ timeout: Union[float, Tuple[float, float], None] = _DEFAULT_TIMEOUT_SEC
82
+
83
+ retry_status_codes: List[int] = _DEFAULT_RETRY_STATUS_CODES
84
+ retry_methods: List[str] = _DEFAULT_RETRY_METHODS
85
+ retry_max_times: int = _DEFAULT_RETRY_MAX_TIMES
86
+
87
+ extra_headers: Dict[str, str] = {}
88
+
89
+ ca_certificate_path: Optional[str] = None
90
+ client_certificate_path: Optional[str] = None
91
+ disable_ssl_verification: bool = False
92
+
93
+ def build_session(self) -> requests.Session:
94
+ session = requests.Session()
95
+
96
+ if self.extra_headers:
97
+ session.headers.update(self.extra_headers)
98
+
99
+ if self.client_certificate_path:
100
+ session.cert = self.client_certificate_path
101
+
102
+ if self.ca_certificate_path:
103
+ session.verify = self.ca_certificate_path
104
+
105
+ if self.disable_ssl_verification:
106
+ session.verify = False
107
+
108
+ try:
109
+ # Set raise_on_status to False to propagate errors:
110
+ # https://stackoverflow.com/questions/70189330/determine-status-code-from-python-retry-exception
111
+ # Must call `raise_for_status` after making a request, which we do
112
+ retry_strategy = Retry(
113
+ total=self.retry_max_times,
114
+ status_forcelist=self.retry_status_codes,
115
+ backoff_factor=2,
116
+ allowed_methods=self.retry_methods,
117
+ raise_on_status=False,
118
+ )
119
+ except TypeError:
120
+ # Prior to urllib3 1.26, the Retry class used `method_whitelist` instead of `allowed_methods`.
121
+ retry_strategy = Retry(
122
+ total=self.retry_max_times,
123
+ status_forcelist=self.retry_status_codes,
124
+ backoff_factor=2,
125
+ method_whitelist=self.retry_methods,
126
+ raise_on_status=False,
127
+ )
128
+
129
+ adapter = HTTPAdapter(
130
+ pool_connections=100, pool_maxsize=100, max_retries=retry_strategy
131
+ )
132
+ session.mount("http://", adapter)
133
+ session.mount("https://", adapter)
134
+
135
+ if self.timeout is not None:
136
+ # Shim session.request to apply default timeout values.
137
+ # Via https://stackoverflow.com/a/59317604.
138
+ session.request = functools.partial( # type: ignore
139
+ session.request,
140
+ timeout=self.timeout,
141
+ )
142
+
143
+ return session
144
+
145
+
63
146
  class DataHubRestEmitter(Closeable, Emitter):
64
147
  _gms_server: str
65
148
  _token: Optional[str]
66
149
  _session: requests.Session
67
- _connect_timeout_sec: float = _DEFAULT_CONNECT_TIMEOUT_SEC
68
- _read_timeout_sec: float = _DEFAULT_READ_TIMEOUT_SEC
69
- _retry_status_codes: List[int] = _DEFAULT_RETRY_STATUS_CODES
70
- _retry_methods: List[str] = _DEFAULT_RETRY_METHODS
71
- _retry_max_times: int = _DEFAULT_RETRY_MAX_TIMES
72
150
 
73
151
  def __init__(
74
152
  self,
@@ -99,15 +177,13 @@ class DataHubRestEmitter(Closeable, Emitter):
99
177
 
100
178
  self._session = requests.Session()
101
179
 
102
- self._session.headers.update(
103
- {
104
- "X-RestLi-Protocol-Version": "2.0.0",
105
- "X-DataHub-Py-Cli-Version": nice_version_name(),
106
- "Content-Type": "application/json",
107
- }
108
- )
180
+ headers = {
181
+ "X-RestLi-Protocol-Version": "2.0.0",
182
+ "X-DataHub-Py-Cli-Version": nice_version_name(),
183
+ "Content-Type": "application/json",
184
+ }
109
185
  if token:
110
- self._session.headers.update({"Authorization": f"Bearer {token}"})
186
+ headers["Authorization"] = f"Bearer {token}"
111
187
  else:
112
188
  # HACK: When no token is provided but system auth env variables are set, we use them.
113
189
  # Ideally this should simply get passed in as config, instead of being sneakily injected
@@ -116,75 +192,43 @@ class DataHubRestEmitter(Closeable, Emitter):
116
192
  # rest emitter, and the rest sink uses the rest emitter under the hood.
117
193
  system_auth = config_utils.get_system_auth()
118
194
  if system_auth is not None:
119
- self._session.headers.update({"Authorization": system_auth})
120
-
121
- if extra_headers:
122
- self._session.headers.update(extra_headers)
123
-
124
- if client_certificate_path:
125
- self._session.cert = client_certificate_path
195
+ headers["Authorization"] = system_auth
126
196
 
127
- if ca_certificate_path:
128
- self._session.verify = ca_certificate_path
129
-
130
- if disable_ssl_verification:
131
- self._session.verify = False
132
-
133
- self._connect_timeout_sec = (
134
- connect_timeout_sec or timeout_sec or _DEFAULT_CONNECT_TIMEOUT_SEC
135
- )
136
- self._read_timeout_sec = (
137
- read_timeout_sec or timeout_sec or _DEFAULT_READ_TIMEOUT_SEC
138
- )
139
-
140
- if self._connect_timeout_sec < 1 or self._read_timeout_sec < 1:
141
- logger.warning(
142
- f"Setting timeout values lower than 1 second is not recommended. Your configuration is connect_timeout:{self._connect_timeout_sec}s, read_timeout:{self._read_timeout_sec}s"
143
- )
144
-
145
- if retry_status_codes is not None: # Only if missing. Empty list is allowed
146
- self._retry_status_codes = retry_status_codes
147
-
148
- if retry_methods is not None:
149
- self._retry_methods = retry_methods
150
-
151
- if retry_max_times:
152
- self._retry_max_times = retry_max_times
153
-
154
- try:
155
- # Set raise_on_status to False to propagate errors:
156
- # https://stackoverflow.com/questions/70189330/determine-status-code-from-python-retry-exception
157
- # Must call `raise_for_status` after making a request, which we do
158
- retry_strategy = Retry(
159
- total=self._retry_max_times,
160
- status_forcelist=self._retry_status_codes,
161
- backoff_factor=2,
162
- allowed_methods=self._retry_methods,
163
- raise_on_status=False,
164
- )
165
- except TypeError:
166
- # Prior to urllib3 1.26, the Retry class used `method_whitelist` instead of `allowed_methods`.
167
- retry_strategy = Retry(
168
- total=self._retry_max_times,
169
- status_forcelist=self._retry_status_codes,
170
- backoff_factor=2,
171
- method_whitelist=self._retry_methods,
172
- raise_on_status=False,
197
+ timeout: float | tuple[float, float]
198
+ if connect_timeout_sec is not None or read_timeout_sec is not None:
199
+ timeout = (
200
+ connect_timeout_sec or timeout_sec or _DEFAULT_TIMEOUT_SEC,
201
+ read_timeout_sec or timeout_sec or _DEFAULT_TIMEOUT_SEC,
173
202
  )
203
+ if (
204
+ timeout[0] < _TIMEOUT_LOWER_BOUND_SEC
205
+ or timeout[1] < _TIMEOUT_LOWER_BOUND_SEC
206
+ ):
207
+ logger.warning(
208
+ f"Setting timeout values lower than {_TIMEOUT_LOWER_BOUND_SEC} second is not recommended. Your configuration is (connect_timeout, read_timeout) = {timeout} seconds"
209
+ )
210
+ else:
211
+ timeout = get_or_else(timeout_sec, _DEFAULT_TIMEOUT_SEC)
212
+ if timeout < _TIMEOUT_LOWER_BOUND_SEC:
213
+ logger.warning(
214
+ f"Setting timeout values lower than {_TIMEOUT_LOWER_BOUND_SEC} second is not recommended. Your configuration is timeout = {timeout} seconds"
215
+ )
174
216
 
175
- adapter = HTTPAdapter(
176
- pool_connections=100, pool_maxsize=100, max_retries=retry_strategy
177
- )
178
- self._session.mount("http://", adapter)
179
- self._session.mount("https://", adapter)
180
-
181
- # Shim session.request to apply default timeout values.
182
- # Via https://stackoverflow.com/a/59317604.
183
- self._session.request = functools.partial( # type: ignore
184
- self._session.request,
185
- timeout=(self._connect_timeout_sec, self._read_timeout_sec),
217
+ self._session_config = RequestsSessionConfig(
218
+ timeout=timeout,
219
+ retry_status_codes=get_or_else(
220
+ retry_status_codes, _DEFAULT_RETRY_STATUS_CODES
221
+ ),
222
+ retry_methods=get_or_else(retry_methods, _DEFAULT_RETRY_METHODS),
223
+ retry_max_times=get_or_else(retry_max_times, _DEFAULT_RETRY_MAX_TIMES),
224
+ extra_headers={**headers, **(extra_headers or {})},
225
+ ca_certificate_path=ca_certificate_path,
226
+ client_certificate_path=client_certificate_path,
227
+ disable_ssl_verification=disable_ssl_verification,
186
228
  )
187
229
 
230
+ self._session = self._session_config.build_session()
231
+
188
232
  def test_connection(self) -> None:
189
233
  url = f"{self._gms_server}/config"
190
234
  response = self._session.get(url)
@@ -291,7 +335,8 @@ class DataHubRestEmitter(Closeable, Emitter):
291
335
  mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
292
336
  async_flag: Optional[bool] = None,
293
337
  ) -> int:
294
- logger.debug("Attempting to emit batch mcps")
338
+ if _DATAHUB_EMITTER_TRACE:
339
+ logger.debug(f"Attempting to emit MCP batch of size {len(mcps)}")
295
340
  url = f"{self._gms_server}/aspects?action=ingestProposalBatch"
296
341
  for mcp in mcps:
297
342
  ensure_has_system_metadata(mcp)
@@ -304,22 +349,25 @@ class DataHubRestEmitter(Closeable, Emitter):
304
349
  current_chunk_size = INGEST_MAX_PAYLOAD_BYTES
305
350
  for mcp_obj in mcp_objs:
306
351
  mcp_obj_size = len(json.dumps(mcp_obj))
307
- logger.debug(
308
- f"Iterating through object with size {mcp_obj_size} (type: {mcp_obj.get('aspectName')}"
309
- )
352
+ if _DATAHUB_EMITTER_TRACE:
353
+ logger.debug(
354
+ f"Iterating through object with size {mcp_obj_size} (type: {mcp_obj.get('aspectName')}"
355
+ )
310
356
 
311
357
  if (
312
358
  mcp_obj_size + current_chunk_size > INGEST_MAX_PAYLOAD_BYTES
313
359
  or len(mcp_obj_chunks[-1]) >= BATCH_INGEST_MAX_PAYLOAD_LENGTH
314
360
  ):
315
- logger.debug("Decided to create new chunk")
361
+ if _DATAHUB_EMITTER_TRACE:
362
+ logger.debug("Decided to create new chunk")
316
363
  mcp_obj_chunks.append([])
317
364
  current_chunk_size = 0
318
365
  mcp_obj_chunks[-1].append(mcp_obj)
319
366
  current_chunk_size += mcp_obj_size
320
- logger.debug(
321
- f"Decided to send {len(mcps)} mcps in {len(mcp_obj_chunks)} chunks"
322
- )
367
+ if len(mcp_obj_chunks) > 0:
368
+ logger.debug(
369
+ f"Decided to send {len(mcps)} MCP batch in {len(mcp_obj_chunks)} chunks"
370
+ )
323
371
 
324
372
  for mcp_obj_chunk in mcp_obj_chunks:
325
373
  # TODO: We're calling json.dumps on each MCP object twice, once to estimate
@@ -1,10 +1,9 @@
1
1
  import json
2
2
  import logging
3
- from typing import Iterable, List
3
+ from typing import TYPE_CHECKING, Iterable, List
4
4
 
5
5
  from datahub.emitter.rest_emitter import INGEST_MAX_PAYLOAD_BYTES
6
6
  from datahub.emitter.serialization_helper import pre_json_transform
7
- from datahub.ingestion.api.source import SourceReport
8
7
  from datahub.ingestion.api.workunit import MetadataWorkUnit
9
8
  from datahub.metadata.schema_classes import (
10
9
  DatasetProfileClass,
@@ -12,12 +11,15 @@ from datahub.metadata.schema_classes import (
12
11
  SchemaMetadataClass,
13
12
  )
14
13
 
14
+ if TYPE_CHECKING:
15
+ from datahub.ingestion.api.source import SourceReport
16
+
15
17
  logger = logging.getLogger(__name__)
16
18
 
17
19
 
18
20
  class EnsureAspectSizeProcessor:
19
21
  def __init__(
20
- self, report: SourceReport, payload_constraint: int = INGEST_MAX_PAYLOAD_BYTES
22
+ self, report: "SourceReport", payload_constraint: int = INGEST_MAX_PAYLOAD_BYTES
21
23
  ):
22
24
  self.report = report
23
25
  self.payload_constraint = payload_constraint
@@ -31,6 +31,9 @@ from datahub.emitter.mcp_builder import mcps_from_mce
31
31
  from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import (
32
32
  auto_patch_last_modified,
33
33
  )
34
+ from datahub.ingestion.api.auto_work_units.auto_ensure_aspect_size import (
35
+ EnsureAspectSizeProcessor,
36
+ )
34
37
  from datahub.ingestion.api.closeable import Closeable
35
38
  from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit
36
39
  from datahub.ingestion.api.report import Report
@@ -450,6 +453,7 @@ class Source(Closeable, metaclass=ABCMeta):
450
453
  browse_path_processor,
451
454
  partial(auto_workunit_reporter, self.get_report()),
452
455
  auto_patch_last_modified,
456
+ EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
453
457
  ]
454
458
 
455
459
  @staticmethod
@@ -1,4 +1,3 @@
1
- import os
2
1
  from abc import ABCMeta, abstractmethod
3
2
  from dataclasses import dataclass
4
3
  from typing import Any, Dict, List, Optional
@@ -38,8 +37,8 @@ class ClassificationConfig(ConfigModel):
38
37
  )
39
38
 
40
39
  max_workers: int = Field(
41
- default=(os.cpu_count() or 4),
42
- description="Number of worker processes to use for classification. Set to 1 to disable.",
40
+ default=1,
41
+ description="Number of worker processes to use for classification. Note that any number above 1 might lead to a deadlock. Set to 1 to disable.",
43
42
  )
44
43
 
45
44
  table_pattern: AllowDenyPattern = Field(
@@ -179,21 +179,24 @@ class DataHubGraph(DatahubRestEmitter):
179
179
 
180
180
  @classmethod
181
181
  def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
182
+ session_config = emitter._session_config
183
+ if isinstance(session_config.timeout, tuple):
184
+ # TODO: This is slightly lossy. Eventually, we want to modify the emitter
185
+ # to accept a tuple for timeout_sec, and then we'll be able to remove this.
186
+ timeout_sec: Optional[float] = session_config.timeout[0]
187
+ else:
188
+ timeout_sec = session_config.timeout
182
189
  return cls(
183
190
  DatahubClientConfig(
184
191
  server=emitter._gms_server,
185
192
  token=emitter._token,
186
- timeout_sec=emitter._read_timeout_sec,
187
- retry_status_codes=emitter._retry_status_codes,
188
- retry_max_times=emitter._retry_max_times,
189
- extra_headers=emitter._session.headers,
190
- disable_ssl_verification=emitter._session.verify is False,
191
- ca_certificate_path=(
192
- emitter._session.verify
193
- if isinstance(emitter._session.verify, str)
194
- else None
195
- ),
196
- client_certificate_path=emitter._session.cert,
193
+ timeout_sec=timeout_sec,
194
+ retry_status_codes=session_config.retry_status_codes,
195
+ retry_max_times=session_config.retry_max_times,
196
+ extra_headers=session_config.extra_headers,
197
+ disable_ssl_verification=session_config.disable_ssl_verification,
198
+ ca_certificate_path=session_config.ca_certificate_path,
199
+ client_certificate_path=session_config.client_certificate_path,
197
200
  )
198
201
  )
199
202
 
@@ -10,7 +10,7 @@ class DatahubClientConfig(ConfigModel):
10
10
  # by callers / the CLI, but the actual client should not have any magic.
11
11
  server: str
12
12
  token: Optional[str] = None
13
- timeout_sec: Optional[int] = None
13
+ timeout_sec: Optional[float] = None
14
14
  retry_status_codes: Optional[List[int]] = None
15
15
  retry_max_times: Optional[int] = None
16
16
  extra_headers: Optional[Dict[str, str]] = None