acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.1.1rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/METADATA +2613 -2613
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/RECORD +29 -27
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -0
- datahub/cli/ingest_cli.py +9 -1
- datahub/emitter/response_helper.py +86 -1
- datahub/emitter/rest_emitter.py +1 -1
- datahub/ingestion/source/datahub/config.py +11 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/openapi.py +12 -0
- datahub/ingestion/source/openapi_parser.py +56 -37
- datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
- datahub/metadata/_internal_schema_classes.py +514 -514
- datahub/metadata/_urns/urn_defs.py +1785 -1785
- datahub/metadata/schema.avsc +17354 -17725
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +4 -0
- datahub/sdk/_all_entities.py +4 -0
- datahub/sdk/_shared.py +2 -1
- datahub/sdk/dataflow.py +302 -0
- datahub/sdk/datajob.py +335 -0
- datahub/sdk/entity_client.py +8 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.1.1rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.1.
|
|
1
|
+
acryl_datahub-1.1.1rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=eN5Hk_vGfWfTdra8X3fmbgWvHtgAObi4UtnL4Fd3io8,321
|
|
5
5
|
datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -48,7 +48,7 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
|
|
|
48
48
|
datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
|
|
50
50
|
datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
datahub/api/entities/dataset/dataset.py,sha256=
|
|
51
|
+
datahub/api/entities/dataset/dataset.py,sha256=BLOn7o-A3OLD4D2JTq8vF_dwTTW9HHDnDwd6lhI1TMI,49512
|
|
52
52
|
datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/forms/forms.py,sha256=B1KnoMmaXwznWdbjltoLq3sH9qj-BpzyC7z7FcwwOM4,15812
|
|
54
54
|
datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
|
|
@@ -72,7 +72,7 @@ datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
|
72
72
|
datahub/cli/exists_cli.py,sha256=Md8_jGdZZXJpX9qBL-hkRJ_DzTXCPKP0fY69Y_sjZqs,1301
|
|
73
73
|
datahub/cli/get_cli.py,sha256=aKnepFef6dyk4sxnrUOnHoEi1yvyBaQT5U6st8NqKQE,2395
|
|
74
74
|
datahub/cli/iceberg_cli.py,sha256=L4QIBc7SiKMJF02S94mFpdOI0P1zIZESeW-o7waJSf4,23127
|
|
75
|
-
datahub/cli/ingest_cli.py,sha256=
|
|
75
|
+
datahub/cli/ingest_cli.py,sha256=VRzMZgLpUY_pXulY5xtue6S6cNxsb-memHjystXGXRo,21018
|
|
76
76
|
datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
|
|
77
77
|
datahub/cli/lite_cli.py,sha256=XKMejSuYUToKBvgN3YmmnxjRcaG5WPw23gJuQK8pgRc,13099
|
|
78
78
|
datahub/cli/migrate.py,sha256=QZKshJX7nDKPDM0UGt8bLDchQQ7Z_3oassf8-jLL5yA,17953
|
|
@@ -125,8 +125,8 @@ datahub/emitter/mcp.py,sha256=u6LphyhpbdFqboTAL_9MzXhGjc45o_BePoDFBkEEYWo,10484
|
|
|
125
125
|
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=2Sij9VJqgA7xZI6I7IuxsA8ioakbz0FJ3gvazxU_z3M,5738
|
|
128
|
-
datahub/emitter/response_helper.py,sha256=
|
|
129
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
128
|
+
datahub/emitter/response_helper.py,sha256=qGm45n43CepW7j6kP9wTXuP-U-SZnn7hQdJTdVaoqhQ,7504
|
|
129
|
+
datahub/emitter/rest_emitter.py,sha256=0_jZbHQNVm1DGlHKuFMJkTWvWvxy7HxcCZDgjldHbKk,36410
|
|
130
130
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
131
131
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
132
132
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -210,8 +210,8 @@ datahub/ingestion/source/mlflow.py,sha256=fh7izN9jlSwbpGIrEyJktlmwFZR5vNG9z9L5VQ
|
|
|
210
210
|
datahub/ingestion/source/mode.py,sha256=g3nhkpW5KS_w3a8JaKWoq3XBNOZKFlmxZq9XI2D5dXY,72161
|
|
211
211
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
212
212
|
datahub/ingestion/source/nifi.py,sha256=D1gBXxdpLuUQ0eurwofIR_SGg1rHGhwk3qxsWI1PT9c,56882
|
|
213
|
-
datahub/ingestion/source/openapi.py,sha256=
|
|
214
|
-
datahub/ingestion/source/openapi_parser.py,sha256=
|
|
213
|
+
datahub/ingestion/source/openapi.py,sha256=VaR2xYaH1IhvRixpTBC7-168F74eIIyKiEKb5EqTO64,19253
|
|
214
|
+
datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDCyKWnzIrVI5Alo,15420
|
|
215
215
|
datahub/ingestion/source/preset.py,sha256=bbh0ZWiAZMy2zuJDmaRY07_OuGJ9tdtKjwvIxqbY5II,3964
|
|
216
216
|
datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
|
|
217
217
|
datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
|
|
@@ -281,11 +281,11 @@ datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=55mK0nsehqGD
|
|
|
281
281
|
datahub/ingestion/source/data_lake_common/object_store.py,sha256=T7onzor_15MR-7ecxqcd6YnGY0-bGXHJzseg6nfo2Og,20302
|
|
282
282
|
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=8teyhkfFTy6QJdwH11tKw3cd5XV2wJOBctAtAiksoPM,24423
|
|
283
283
|
datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
284
|
-
datahub/ingestion/source/datahub/config.py,sha256=
|
|
284
|
+
datahub/ingestion/source/datahub/config.py,sha256=bjR1U3F85FbtgqmLDW-f_4dQvuw5AsJQxdQlOUeHDUk,5126
|
|
285
285
|
datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
|
|
286
|
-
datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=
|
|
286
|
+
datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=HyZxYyJi7o2eWKm2Sxgda6GUcC71g7zDWmRGS6nfnno,14755
|
|
287
287
|
datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=gnxhhlK-jrfnHqD_4eVmfcdtBNW6pi1N_qkDZ7uSb3o,4187
|
|
288
|
-
datahub/ingestion/source/datahub/datahub_source.py,sha256=
|
|
288
|
+
datahub/ingestion/source/datahub/datahub_source.py,sha256=LsfDntDtqt_HMYCWwYctFK3Utyo_VO2x26rDsf6FO0I,8730
|
|
289
289
|
datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
|
|
290
290
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
291
291
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -455,7 +455,7 @@ datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMP
|
|
|
455
455
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
456
456
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
457
457
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
458
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
458
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=98uovcOT9IIvMaOmDHfSK5Ndw_UZ77IVttvHByBbrTc,21079
|
|
459
459
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=3-nP3HHCblUnUHYo_fvFp5VOAteCtR4GNjaUEvyNTNQ,18175
|
|
460
460
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
461
461
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=eWUlWMc5X2X_Y1I2peworFWLLsXQjryEHxPDuSqrowg,21683
|
|
@@ -470,7 +470,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
|
|
|
470
470
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=eA9xh-G1Ydr1OwUUtrbXUWp26hE1jF0zvyKNky_i_nQ,8887
|
|
471
471
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
|
|
472
472
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=2lmvAeZELTjAzg4Y5E0oY41r1IzVEvg6OHAvVJftSFk,14081
|
|
473
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
473
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=P8iVQCp49hsOm9IJp2VwtTsB0ejmt1nMKbTn1weRKGc,34612
|
|
474
474
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
475
475
|
datahub/ingestion/source/sql/athena.py,sha256=HsBkm_EOw9wAmCAcA9cv93ctFhlJPk4cVKCq3jPxbpM,23916
|
|
476
476
|
datahub/ingestion/source/sql/clickhouse.py,sha256=9Fvaic9FZufRKdhVz2EcPUnEt5cA9VJbaX4xxFA7joU,25653
|
|
@@ -598,12 +598,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
598
598
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
599
599
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
600
600
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
601
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
602
|
-
datahub/metadata/schema.avsc,sha256=
|
|
601
|
+
datahub/metadata/_internal_schema_classes.py,sha256=u7sbT18cTwsMNgeyZ1gAOCv_RYwkoMx7IxHW2PD_or4,1013146
|
|
602
|
+
datahub/metadata/schema.avsc,sha256=g4WYvhK3XBZvUlpVQG-KAai50BiIpQgCYAs1WaehxXM,765487
|
|
603
603
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
604
604
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
605
605
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
606
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
606
|
+
datahub/metadata/_urns/urn_defs.py,sha256=irNlYfKJ-sqLcI-nOPeGYicdcMpxoDlJYEoQCmY5A8o,136344
|
|
607
607
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
608
608
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
609
609
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -799,7 +799,7 @@ datahub/metadata/schemas/EntityTypeInfo.avsc,sha256=QVZqy_y1tbxLhvh4k2Mb11otbSC-
|
|
|
799
799
|
datahub/metadata/schemas/EntityTypeKey.avsc,sha256=1BmHmPCw7AGKwhJ7YGYLljYb-uaNawBvc8EIkzHETwg,582
|
|
800
800
|
datahub/metadata/schemas/EthicalConsiderations.avsc,sha256=FSEs21RK6bc3i_TY-CB9gpmT-oMsyDvYzIfRX0_Xn8w,1957
|
|
801
801
|
datahub/metadata/schemas/EvaluationData.avsc,sha256=i2VFWNmw2f2EWpSOe7vXAFEG2JIdtU-51I4e4bPmT2Q,1714
|
|
802
|
-
datahub/metadata/schemas/ExecutionRequestInput.avsc,sha256=
|
|
802
|
+
datahub/metadata/schemas/ExecutionRequestInput.avsc,sha256=5wxKqnxDG7N9MhcyUFLLt58zfeQK4i8ktJaN0bPxzds,2879
|
|
803
803
|
datahub/metadata/schemas/ExecutionRequestKey.avsc,sha256=SvjnlTAGYsSnvVE0rZ9-7UP0f8oMDhK2YCJ9H3VIGE4,584
|
|
804
804
|
datahub/metadata/schemas/ExecutionRequestResult.avsc,sha256=kg3xMNr9kYLPnFsV-iqcGm1sh1muQVGJvxUt15L1yKo,2333
|
|
805
805
|
datahub/metadata/schemas/ExecutionRequestSignal.avsc,sha256=dsIUa6tfVSXqYOgh4cW6_Hzi8RjHuJJoO-mBAuZukpA,2515
|
|
@@ -900,18 +900,20 @@ datahub/metadata/schemas/VersionProperties.avsc,sha256=ME8V01JzG8lEsLXgYWnSYCehm
|
|
|
900
900
|
datahub/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh4HnUHNmBEU74uYdJR5g20NhRJU,659
|
|
901
901
|
datahub/metadata/schemas/VersionSetProperties.avsc,sha256=fxNxEMxGdUDi_-T0sd6KJks5BWEo5AzboQxpZYKLbiQ,1434
|
|
902
902
|
datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
|
|
903
|
-
datahub/metadata/schemas/__init__.py,sha256=
|
|
903
|
+
datahub/metadata/schemas/__init__.py,sha256=kCcak_fBn_KyuysZTJIoipAzZ8EO44Amk4DWSEvplEY,581
|
|
904
904
|
datahub/pydantic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
905
905
|
datahub/pydantic/compat.py,sha256=TUEo4kSEeOWVAhV6LQtst1phrpVgGtK4uif4OI5vQ2M,1937
|
|
906
|
-
datahub/sdk/__init__.py,sha256=
|
|
907
|
-
datahub/sdk/_all_entities.py,sha256=
|
|
906
|
+
datahub/sdk/__init__.py,sha256=3ClYM8oqa_ReEDnndi0bG4OCTOF5q99kDqwBDEzTqpE,1842
|
|
907
|
+
datahub/sdk/_all_entities.py,sha256=LdfDKN6sfl66m_dspDCH8J_hfMtag5fTEdukdvWs34s,629
|
|
908
908
|
datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
|
|
909
|
-
datahub/sdk/_shared.py,sha256=
|
|
909
|
+
datahub/sdk/_shared.py,sha256=5SMP-MuuHPWkChBvU7l_f5DssSdhKVYekmnU-XVEsnI,24840
|
|
910
910
|
datahub/sdk/_utils.py,sha256=oXE2BzsXE5zmSkCP3R1tObD4RHnPeH_ps83D_Dw9JaQ,1169
|
|
911
911
|
datahub/sdk/container.py,sha256=yw_vw9Jl1wOYNwMHxQHLz5ZvVQVDWWHi9CWBR3hOCd8,7547
|
|
912
|
+
datahub/sdk/dataflow.py,sha256=-oiMHaOqgpDWNXy5Wctx3hkSSgDnvvHQwpVHB6dBKf4,10776
|
|
913
|
+
datahub/sdk/datajob.py,sha256=3wAQvVAbHhhTXVzdaExm3SWLThKPbSx57N5Q-VQ8DuM,12189
|
|
912
914
|
datahub/sdk/dataset.py,sha256=7PlmqKDROJvsh1CtlhG8owOhLdelHVbSSg5hA5Kbwp0,29150
|
|
913
915
|
datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
|
|
914
|
-
datahub/sdk/entity_client.py,sha256=
|
|
916
|
+
datahub/sdk/entity_client.py,sha256=6y1Ulvecm3LyPJt1bGWoiM6gttNHgkAF2la55A38Mos,5566
|
|
915
917
|
datahub/sdk/lineage_client.py,sha256=NJu9SbTNMWplFsiT5WWgqJ4ypD76y7Sm6I3btZ78rdE,13368
|
|
916
918
|
datahub/sdk/main_client.py,sha256=yqfETYxqzZsspmPNLyzUrxI4Ktmz3RAzYHXDS_6pGts,4362
|
|
917
919
|
datahub/sdk/mlmodel.py,sha256=amS-hHg5tT7zAqEHG17kSA60Q7td2DFtO-W2rEfb2rY,10206
|
|
@@ -1057,8 +1059,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1057
1059
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1058
1060
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1059
1061
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1060
|
-
acryl_datahub-1.1.
|
|
1061
|
-
acryl_datahub-1.1.
|
|
1062
|
-
acryl_datahub-1.1.
|
|
1063
|
-
acryl_datahub-1.1.
|
|
1064
|
-
acryl_datahub-1.1.
|
|
1062
|
+
acryl_datahub-1.1.1rc4.dist-info/METADATA,sha256=VhIGX_FugXAZxeQHm5XW-5oW8VIDJdwIEqREwknv5bQ,180694
|
|
1063
|
+
acryl_datahub-1.1.1rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1064
|
+
acryl_datahub-1.1.1rc4.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1065
|
+
acryl_datahub-1.1.1rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1066
|
+
acryl_datahub-1.1.1rc4.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -786,6 +786,7 @@ class Dataset(StrictModel):
|
|
|
786
786
|
if schema_metadata:
|
|
787
787
|
# If the schema is built off of an avro schema, we only extract the fields if they have structured properties
|
|
788
788
|
# Otherwise, we extract all fields
|
|
789
|
+
schema_fields = []
|
|
789
790
|
if (
|
|
790
791
|
schema_metadata.platformSchema
|
|
791
792
|
and isinstance(schema_metadata.platformSchema, models.OtherSchemaClass)
|
datahub/cli/ingest_cli.py
CHANGED
|
@@ -388,7 +388,10 @@ def mcps(path: str) -> None:
|
|
|
388
388
|
@upgrade.check_upgrade
|
|
389
389
|
@telemetry.with_telemetry()
|
|
390
390
|
def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) -> None:
|
|
391
|
-
"""
|
|
391
|
+
"""
|
|
392
|
+
List ingestion source runs with their details, optionally filtered by URN or source.
|
|
393
|
+
Required the Manage Metadata Ingestion permission.
|
|
394
|
+
"""
|
|
392
395
|
|
|
393
396
|
query = """
|
|
394
397
|
query listIngestionRuns($input: ListIngestionSourcesInput!) {
|
|
@@ -446,6 +449,11 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
446
449
|
if not data:
|
|
447
450
|
click.echo("No response received from the server.")
|
|
448
451
|
return
|
|
452
|
+
if "errors" in data:
|
|
453
|
+
click.echo("Errors in response:")
|
|
454
|
+
for error in data["errors"]:
|
|
455
|
+
click.echo(f"- {error.get('message', 'Unknown error')}")
|
|
456
|
+
return
|
|
449
457
|
|
|
450
458
|
# a lot of responses can be null if there's errors in the run
|
|
451
459
|
ingestion_sources = (
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
3
4
|
import warnings
|
|
4
5
|
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timezone
|
|
5
7
|
from typing import Dict, List, Optional, Sequence, Union
|
|
6
8
|
|
|
7
9
|
from requests import Response
|
|
@@ -22,12 +24,95 @@ class TraceData:
|
|
|
22
24
|
trace_id: str
|
|
23
25
|
data: Dict[str, List[str]]
|
|
24
26
|
|
|
27
|
+
@staticmethod
|
|
28
|
+
def extract_trace_id(input_str: Optional[str]) -> Optional[str]:
|
|
29
|
+
"""
|
|
30
|
+
Extract the trace ID from various input formats.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
input_str (Optional[str]): Input string potentially containing a trace ID
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Optional[str]: Extracted trace ID or None if no valid trace ID found
|
|
37
|
+
"""
|
|
38
|
+
# Handle None or empty input
|
|
39
|
+
if input_str is None or not str(input_str).strip():
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
# Convert to string and clean
|
|
43
|
+
input_str = str(input_str).strip()
|
|
44
|
+
|
|
45
|
+
# Special case for test scenarios
|
|
46
|
+
if input_str == "test-trace-id":
|
|
47
|
+
return input_str
|
|
48
|
+
|
|
49
|
+
# Case 1: Full traceparent header (containing hyphens)
|
|
50
|
+
if "-" in input_str:
|
|
51
|
+
parts = input_str.split("-")
|
|
52
|
+
if len(parts) >= 2:
|
|
53
|
+
# The trace ID is the second part (index 1)
|
|
54
|
+
return parts[1]
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
# Case 2: Direct trace ID (32 hex characters)
|
|
58
|
+
if len(input_str) == 32 and re.match(r"^[0-9a-fA-F]+$", input_str):
|
|
59
|
+
return input_str
|
|
60
|
+
|
|
61
|
+
# Fallback: return the original input if it doesn't match strict criteria
|
|
62
|
+
return input_str
|
|
63
|
+
|
|
25
64
|
def __post_init__(self) -> None:
|
|
26
|
-
|
|
65
|
+
"""
|
|
66
|
+
Validate and potentially process the trace_id during initialization.
|
|
67
|
+
"""
|
|
68
|
+
# Explicitly check for None or empty string
|
|
69
|
+
if self.trace_id is None or self.trace_id == "":
|
|
27
70
|
raise ValueError("trace_id cannot be empty")
|
|
71
|
+
|
|
72
|
+
# Allow extracting trace ID from various input formats
|
|
73
|
+
extracted_id = self.extract_trace_id(self.trace_id)
|
|
74
|
+
if extracted_id is None:
|
|
75
|
+
raise ValueError("Invalid trace_id format")
|
|
76
|
+
|
|
77
|
+
# Update trace_id with the extracted version
|
|
78
|
+
self.trace_id = extracted_id
|
|
79
|
+
|
|
80
|
+
# Validate data
|
|
28
81
|
if not isinstance(self.data, dict):
|
|
29
82
|
raise TypeError("data must be a dictionary")
|
|
30
83
|
|
|
84
|
+
def extract_timestamp(self) -> datetime:
|
|
85
|
+
"""
|
|
86
|
+
Extract the timestamp from a trace ID generated by the TraceIdGenerator.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
datetime: The timestamp in UTC
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
ValueError: If the trace ID is invalid
|
|
93
|
+
"""
|
|
94
|
+
# Special case for test trace ID
|
|
95
|
+
if self.trace_id == "test-trace-id":
|
|
96
|
+
return datetime.fromtimestamp(0, tz=timezone.utc)
|
|
97
|
+
|
|
98
|
+
# Validate trace ID length for hex-based trace IDs
|
|
99
|
+
if len(self.trace_id) < 16 or not re.match(
|
|
100
|
+
r"^[0-9a-fA-F]+$", self.trace_id[:16]
|
|
101
|
+
):
|
|
102
|
+
raise ValueError("Invalid trace ID format")
|
|
103
|
+
|
|
104
|
+
# Extract the first 16 hex characters representing timestamp in microseconds
|
|
105
|
+
timestamp_micros_hex = self.trace_id[:16]
|
|
106
|
+
|
|
107
|
+
# Convert hex to integer
|
|
108
|
+
timestamp_micros = int(timestamp_micros_hex, 16)
|
|
109
|
+
|
|
110
|
+
# Convert microseconds to milliseconds
|
|
111
|
+
timestamp_millis = timestamp_micros // 1000
|
|
112
|
+
|
|
113
|
+
# Convert to datetime in UTC
|
|
114
|
+
return datetime.fromtimestamp(timestamp_millis / 1000, tz=timezone.utc)
|
|
115
|
+
|
|
31
116
|
|
|
32
117
|
def _extract_trace_id(response: Response) -> Optional[str]:
|
|
33
118
|
"""
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -852,7 +852,7 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
852
852
|
for aspect_name, aspect_status in aspects.items():
|
|
853
853
|
if not aspect_status["success"]:
|
|
854
854
|
error_msg = (
|
|
855
|
-
f"Unable to validate async write to DataHub GMS: "
|
|
855
|
+
f"Unable to validate async write {trace.trace_id} ({trace.extract_timestamp()}) to DataHub GMS: "
|
|
856
856
|
f"Persistence failure for URN '{urn}' aspect '{aspect_name}'. "
|
|
857
857
|
f"Status: {aspect_status}"
|
|
858
858
|
)
|
|
@@ -118,6 +118,17 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
|
|
|
118
118
|
"Useful if the source system has duplicate field paths in the db, but we're pushing to a system with server-side duplicate checking.",
|
|
119
119
|
)
|
|
120
120
|
|
|
121
|
+
structured_properties_template_cache_invalidation_interval: int = Field(
|
|
122
|
+
hidden_from_docs=True,
|
|
123
|
+
default=60,
|
|
124
|
+
description="Interval in seconds to invalidate the structured properties template cache.",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
query_timeout: Optional[int] = Field(
|
|
128
|
+
default=None,
|
|
129
|
+
description="Timeout for each query in seconds. ",
|
|
130
|
+
)
|
|
131
|
+
|
|
121
132
|
@root_validator(skip_on_failure=True)
|
|
122
133
|
def check_ingesting_data(cls, values):
|
|
123
134
|
if (
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import contextlib
|
|
2
1
|
import json
|
|
3
2
|
import logging
|
|
3
|
+
import time
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar
|
|
6
6
|
|
|
7
|
-
from sqlalchemy import create_engine
|
|
7
|
+
from sqlalchemy import create_engine, text
|
|
8
8
|
|
|
9
9
|
from datahub.emitter.aspect import ASPECT_MAP
|
|
10
10
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
19
19
|
|
|
20
20
|
# Should work for at least mysql, mariadb, postgres
|
|
21
21
|
DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
|
|
22
|
+
DATE_FORMAT = "%Y-%m-%d"
|
|
22
23
|
|
|
23
24
|
ROW = TypeVar("ROW", bound=Dict[str, Any])
|
|
24
25
|
|
|
@@ -85,6 +86,9 @@ class DataHubDatabaseReader:
|
|
|
85
86
|
**connection_config.options,
|
|
86
87
|
)
|
|
87
88
|
|
|
89
|
+
# Cache for available dates to avoid redundant queries
|
|
90
|
+
self.available_dates_cache: Optional[List[datetime]] = None
|
|
91
|
+
|
|
88
92
|
@property
|
|
89
93
|
def soft_deleted_urns_query(self) -> str:
|
|
90
94
|
return f"""
|
|
@@ -100,14 +104,12 @@ class DataHubDatabaseReader:
|
|
|
100
104
|
ORDER BY mav.urn
|
|
101
105
|
"""
|
|
102
106
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
+
def query(self, set_structured_properties_filter: bool) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Main query that gets data for specified date range with appropriate filters.
|
|
110
|
+
"""
|
|
111
|
+
structured_prop_filter = f" AND urn {'' if set_structured_properties_filter else 'NOT'} like 'urn:li:structuredProperty:%%'"
|
|
107
112
|
|
|
108
|
-
# Ensures stable order, chronological per (urn, aspect)
|
|
109
|
-
# Relies on createdon order to reflect version order
|
|
110
|
-
# Ordering of entries with the same createdon is handled by VersionOrderer
|
|
111
113
|
return f"""
|
|
112
114
|
SELECT *
|
|
113
115
|
FROM (
|
|
@@ -132,6 +134,7 @@ class DataHubDatabaseReader:
|
|
|
132
134
|
{"" if self.config.include_all_versions else "AND mav.version = 0"}
|
|
133
135
|
{"" if not self.config.exclude_aspects else "AND mav.aspect NOT IN %(exclude_aspects)s"}
|
|
134
136
|
AND mav.createdon >= %(since_createdon)s
|
|
137
|
+
AND mav.createdon < %(end_createdon)s
|
|
135
138
|
ORDER BY
|
|
136
139
|
createdon,
|
|
137
140
|
urn,
|
|
@@ -139,50 +142,194 @@ class DataHubDatabaseReader:
|
|
|
139
142
|
version
|
|
140
143
|
) as t
|
|
141
144
|
WHERE 1=1
|
|
142
|
-
{"" if self.config.include_soft_deleted_entities else "AND (removed = false or removed is NULL)"}
|
|
145
|
+
{"" if self.config.include_soft_deleted_entities else " AND (removed = false or removed is NULL)"}
|
|
146
|
+
{structured_prop_filter}
|
|
143
147
|
ORDER BY
|
|
144
148
|
createdon,
|
|
145
149
|
urn,
|
|
146
150
|
aspect,
|
|
147
151
|
version
|
|
152
|
+
LIMIT %(limit)s
|
|
153
|
+
OFFSET %(offset)s
|
|
148
154
|
"""
|
|
149
155
|
|
|
156
|
+
def execute_with_params(
|
|
157
|
+
self, query: str, params: Dict[str, Any]
|
|
158
|
+
) -> List[Dict[str, Any]]:
|
|
159
|
+
"""Execute query with proper parameter binding that works with your database"""
|
|
160
|
+
with self.engine.connect() as conn:
|
|
161
|
+
result = conn.execute(query, params or {})
|
|
162
|
+
return [dict(row) for row in result.fetchall()]
|
|
163
|
+
|
|
150
164
|
def execute_server_cursor(
|
|
151
165
|
self, query: str, params: Dict[str, Any]
|
|
152
166
|
) -> Iterable[Dict[str, Any]]:
|
|
167
|
+
"""Execute a query with server-side cursor"""
|
|
153
168
|
with self.engine.connect() as conn:
|
|
154
169
|
if self.engine.dialect.name in ["postgresql", "mysql", "mariadb"]:
|
|
155
170
|
with (
|
|
156
171
|
conn.begin()
|
|
157
172
|
): # Transaction required for PostgreSQL server-side cursor
|
|
158
|
-
#
|
|
159
|
-
|
|
173
|
+
# Set query timeout at the connection level
|
|
174
|
+
if self.config.query_timeout:
|
|
175
|
+
if self.engine.dialect.name == "postgresql":
|
|
176
|
+
conn.execute(
|
|
177
|
+
text(
|
|
178
|
+
f"SET statement_timeout = {self.config.query_timeout * 1000}"
|
|
179
|
+
)
|
|
180
|
+
) # milliseconds
|
|
181
|
+
elif self.engine.dialect.name in ["mysql", "mariadb"]:
|
|
182
|
+
conn.execute(
|
|
183
|
+
text(
|
|
184
|
+
f"SET max_execution_time = {self.config.query_timeout * 1000}"
|
|
185
|
+
)
|
|
186
|
+
) # milliseconds
|
|
187
|
+
|
|
188
|
+
# Stream results with batch size
|
|
160
189
|
conn = conn.execution_options(
|
|
161
190
|
stream_results=True,
|
|
162
191
|
yield_per=self.config.database_query_batch_size,
|
|
163
192
|
)
|
|
193
|
+
|
|
194
|
+
# Execute query - using native parameterization without text()
|
|
195
|
+
# to maintain compatibility with your original code
|
|
164
196
|
result = conn.execute(query, params)
|
|
165
197
|
for row in result:
|
|
166
198
|
yield dict(row)
|
|
199
|
+
|
|
200
|
+
return # Success, exit the retry loop
|
|
167
201
|
else:
|
|
168
202
|
raise ValueError(f"Unsupported dialect: {self.engine.dialect.name}")
|
|
169
203
|
|
|
170
204
|
def _get_rows(
|
|
171
|
-
self,
|
|
205
|
+
self,
|
|
206
|
+
start_date: datetime,
|
|
207
|
+
end_date: datetime,
|
|
208
|
+
set_structured_properties_filter: bool,
|
|
209
|
+
limit: int,
|
|
172
210
|
) -> Iterable[Dict[str, Any]]:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
"since_createdon": from_createdon.strftime(DATETIME_FORMAT),
|
|
176
|
-
}
|
|
177
|
-
yield from self.execute_server_cursor(self.query, params)
|
|
211
|
+
"""
|
|
212
|
+
Retrieves data rows within a specified date range using pagination.
|
|
178
213
|
|
|
179
|
-
|
|
214
|
+
Implements a hybrid pagination strategy that switches between time-based and
|
|
215
|
+
offset-based approaches depending on the returned data. Uses server-side
|
|
216
|
+
cursors for efficient memory usage.
|
|
217
|
+
|
|
218
|
+
Note: May return duplicate rows across batch boundaries when multiple rows
|
|
219
|
+
share the same 'createdon' timestamp. This is expected behavior when
|
|
220
|
+
transitioning between pagination methods.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
start_date: Beginning of date range (inclusive)
|
|
224
|
+
end_date: End of date range (exclusive)
|
|
225
|
+
set_structured_properties_filter: Whether to apply structured filtering
|
|
226
|
+
limit: Maximum rows to fetch per query
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
An iterable of database rows as dictionaries
|
|
230
|
+
"""
|
|
231
|
+
offset = 0
|
|
232
|
+
last_createdon = None
|
|
233
|
+
first_iteration = True
|
|
234
|
+
|
|
235
|
+
while True:
|
|
236
|
+
try:
|
|
237
|
+
# Set up query and parameters - using named parameters
|
|
238
|
+
query = self.query(set_structured_properties_filter)
|
|
239
|
+
params: Dict[str, Any] = {
|
|
240
|
+
"since_createdon": start_date.strftime(DATETIME_FORMAT),
|
|
241
|
+
"end_createdon": end_date.strftime(DATETIME_FORMAT),
|
|
242
|
+
"limit": limit,
|
|
243
|
+
"offset": offset,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
# Add exclude_aspects if needed
|
|
247
|
+
if (
|
|
248
|
+
hasattr(self.config, "exclude_aspects")
|
|
249
|
+
and self.config.exclude_aspects
|
|
250
|
+
):
|
|
251
|
+
params["exclude_aspects"] = tuple(self.config.exclude_aspects)
|
|
252
|
+
|
|
253
|
+
logger.info(
|
|
254
|
+
f"Querying data from {start_date.strftime(DATETIME_FORMAT)} to {end_date.strftime(DATETIME_FORMAT)} "
|
|
255
|
+
f"with limit {limit} and offset {offset} (inclusive range)"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Execute query with server-side cursor
|
|
259
|
+
rows = self.execute_server_cursor(query, params)
|
|
260
|
+
# Process and yield rows
|
|
261
|
+
rows_processed = 0
|
|
262
|
+
for row in rows:
|
|
263
|
+
if first_iteration:
|
|
264
|
+
start_date = row.get("createdon", start_date)
|
|
265
|
+
first_iteration = False
|
|
266
|
+
|
|
267
|
+
last_createdon = row.get("createdon")
|
|
268
|
+
rows_processed += 1
|
|
269
|
+
yield row
|
|
270
|
+
|
|
271
|
+
# If we processed fewer than the limit or no last_createdon, we're done
|
|
272
|
+
if rows_processed < limit or not last_createdon:
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
# Update parameters for next iteration
|
|
276
|
+
if start_date != last_createdon:
|
|
277
|
+
start_date = last_createdon
|
|
278
|
+
offset = 0
|
|
279
|
+
else:
|
|
280
|
+
offset += limit
|
|
281
|
+
|
|
282
|
+
logger.info(
|
|
283
|
+
f"Processed {rows_processed} rows for date range {start_date} to {end_date}. Continuing to next batch."
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
except Exception as e:
|
|
287
|
+
logger.error(
|
|
288
|
+
f"Error processing date range {start_date} to {end_date}: {str(e)}"
|
|
289
|
+
)
|
|
290
|
+
# Re-raise the exception after logging
|
|
291
|
+
raise
|
|
292
|
+
|
|
293
|
+
def get_all_aspects(
|
|
180
294
|
self, from_createdon: datetime, stop_time: datetime
|
|
295
|
+
) -> Iterable[Tuple[MetadataChangeProposalWrapper, datetime]]:
|
|
296
|
+
logger.info("Fetching Structured properties aspects")
|
|
297
|
+
yield from self.get_aspects(
|
|
298
|
+
from_createdon=from_createdon,
|
|
299
|
+
stop_time=stop_time,
|
|
300
|
+
set_structured_properties_filter=True,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
logger.info(
|
|
304
|
+
f"Waiting for {self.config.structured_properties_template_cache_invalidation_interval} seconds for structured properties cache to invalidate"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
time.sleep(
|
|
308
|
+
self.config.structured_properties_template_cache_invalidation_interval
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
logger.info("Fetching aspects")
|
|
312
|
+
yield from self.get_aspects(
|
|
313
|
+
from_createdon=from_createdon,
|
|
314
|
+
stop_time=stop_time,
|
|
315
|
+
set_structured_properties_filter=False,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
def get_aspects(
|
|
319
|
+
self,
|
|
320
|
+
from_createdon: datetime,
|
|
321
|
+
stop_time: datetime,
|
|
322
|
+
set_structured_properties_filter: bool = False,
|
|
181
323
|
) -> Iterable[Tuple[MetadataChangeProposalWrapper, datetime]]:
|
|
182
324
|
orderer = VersionOrderer[Dict[str, Any]](
|
|
183
325
|
enabled=self.config.include_all_versions
|
|
184
326
|
)
|
|
185
|
-
rows = self._get_rows(
|
|
327
|
+
rows = self._get_rows(
|
|
328
|
+
start_date=from_createdon,
|
|
329
|
+
end_date=stop_time,
|
|
330
|
+
set_structured_properties_filter=set_structured_properties_filter,
|
|
331
|
+
limit=self.config.database_query_batch_size,
|
|
332
|
+
)
|
|
186
333
|
for row in orderer(rows):
|
|
187
334
|
mcp = self._parse_row(row)
|
|
188
335
|
if mcp:
|
|
@@ -190,23 +337,29 @@ class DataHubDatabaseReader:
|
|
|
190
337
|
|
|
191
338
|
def get_soft_deleted_rows(self) -> Iterable[Dict[str, Any]]:
|
|
192
339
|
"""
|
|
193
|
-
Fetches all soft-deleted entities from the database.
|
|
340
|
+
Fetches all soft-deleted entities from the database using pagination.
|
|
194
341
|
|
|
195
342
|
Yields:
|
|
196
343
|
Row objects containing URNs of soft-deleted entities
|
|
197
344
|
"""
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
logger.debug("
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
345
|
+
try:
|
|
346
|
+
params: Dict = {}
|
|
347
|
+
|
|
348
|
+
logger.debug("Fetching soft-deleted URNs")
|
|
349
|
+
|
|
350
|
+
# Use server-side cursor implementation
|
|
351
|
+
rows = self.execute_server_cursor(self.soft_deleted_urns_query, params)
|
|
352
|
+
processed_rows = 0
|
|
353
|
+
# Process and yield rows
|
|
354
|
+
for row in rows:
|
|
355
|
+
processed_rows += 1
|
|
356
|
+
yield row
|
|
357
|
+
|
|
358
|
+
logger.debug(f"Fetched batch of {processed_rows} soft-deleted URNs")
|
|
359
|
+
|
|
360
|
+
except Exception:
|
|
361
|
+
logger.exception("Error fetching soft-deleted row", exc_info=True)
|
|
362
|
+
raise
|
|
210
363
|
|
|
211
364
|
def _parse_row(
|
|
212
365
|
self, row: Dict[str, Any]
|
|
@@ -117,7 +117,7 @@ class DataHubSource(StatefulIngestionSourceBase):
|
|
|
117
117
|
) -> Iterable[MetadataWorkUnit]:
|
|
118
118
|
logger.info(f"Fetching database aspects starting from {from_createdon}")
|
|
119
119
|
progress = ProgressTimer(report_every=timedelta(seconds=60))
|
|
120
|
-
mcps = reader.
|
|
120
|
+
mcps = reader.get_all_aspects(from_createdon, self.report.stop_time)
|
|
121
121
|
for i, (mcp, createdon) in enumerate(mcps):
|
|
122
122
|
if not self.urn_pattern.allowed(str(mcp.entityUrn)):
|
|
123
123
|
continue
|