acryl-datahub 1.2.0.8rc3__py3-none-any.whl → 1.2.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.8rc3.dist-info → acryl_datahub-1.2.0.9.dist-info}/METADATA +2534 -2534
- {acryl_datahub-1.2.0.8rc3.dist-info → acryl_datahub-1.2.0.9.dist-info}/RECORD +28 -28
- datahub/_version.py +1 -1
- datahub/ingestion/api/sink.py +26 -4
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/run/pipeline.py +17 -12
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +4 -12
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -0
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/fivetran/config.py +2 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/metadata/_internal_schema_classes.py +41 -1
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/schema.avsc +38 -1
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/sdk/_shared.py +7 -5
- datahub/sdk/chart.py +3 -3
- datahub/sdk/dashboard.py +7 -7
- datahub/sdk/dataset.py +4 -0
- {acryl_datahub-1.2.0.8rc3.dist-info → acryl_datahub-1.2.0.9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.8rc3.dist-info → acryl_datahub-1.2.0.9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.8rc3.dist-info → acryl_datahub-1.2.0.9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.8rc3.dist-info → acryl_datahub-1.2.0.9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.
|
|
1
|
+
acryl_datahub-1.2.0.9.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=81_11temFHpsBQrXUSspmrmh5GosGpfx5hEmej97X64,320
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -150,7 +150,7 @@ datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINq
|
|
|
150
150
|
datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
|
|
151
151
|
datahub/ingestion/api/report.py,sha256=1w63Y2yN49IaDLZaIvXEjRU3yVb_9t3wzymSI-fumZM,18959
|
|
152
152
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
153
|
-
datahub/ingestion/api/sink.py,sha256=
|
|
153
|
+
datahub/ingestion/api/sink.py,sha256=bureB3_sFXNISCM4yZSqhxMHW-ctDkAQqA0lJgQhJQ4,6047
|
|
154
154
|
datahub/ingestion/api/source.py,sha256=JASs7WygVB6g-tcwtchaftzv3lNtlVM31lEa242pn44,21853
|
|
155
155
|
datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
|
|
156
156
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
@@ -188,21 +188,21 @@ datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KP
|
|
|
188
188
|
datahub/ingestion/graph/filters.py,sha256=OfjKhuNRHHLvhHk6Tfwd2IbMLPbbIq4VUyHaSpcDvKk,8664
|
|
189
189
|
datahub/ingestion/graph/links.py,sha256=UwWSdx-j0dPttfJOjfTf4ZmlO7iIsRz5p3nIsqGVHUA,2169
|
|
190
190
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
191
|
-
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=
|
|
191
|
+
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=w-OvKG4Xm6s91czFxkhtawskMOReHyso7WjPlGGY1Rw,10073
|
|
192
192
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
193
193
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
194
194
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
195
195
|
datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
|
|
196
|
-
datahub/ingestion/run/pipeline.py,sha256=
|
|
196
|
+
datahub/ingestion/run/pipeline.py,sha256=bQf-aeuj41FepLAgCZikCgfso6PIcU1PT7KatEgyOnc,32602
|
|
197
197
|
datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
|
|
198
198
|
datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
|
|
199
199
|
datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
200
200
|
datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvSc7YOgY,557
|
|
201
201
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
202
|
-
datahub/ingestion/sink/datahub_kafka.py,sha256=
|
|
202
|
+
datahub/ingestion/sink/datahub_kafka.py,sha256=bRBTmvXK8mqNwnI08q846rJCNfwq33xmkm5LwTCbz58,2602
|
|
203
203
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
204
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
205
|
-
datahub/ingestion/sink/file.py,sha256=
|
|
204
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=Esh5bzvXVpfSlYsrmB90fuFByyYmwst-ZigolqTQtnk,13333
|
|
205
|
+
datahub/ingestion/sink/file.py,sha256=YA6fdnvMLiLlWlPM4LJ4m6BK-BRXVtBeqhGInvD9lzw,3295
|
|
206
206
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
207
207
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
208
208
|
datahub/ingestion/source/confluent_schema_registry.py,sha256=WednrFENtANY7bWvrmMKoxEfFK9lnrMDLB0C-hXdJDQ,18808
|
|
@@ -267,8 +267,8 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
|
|
|
267
267
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
|
|
268
268
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
|
|
269
269
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
|
|
270
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=
|
|
271
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=
|
|
270
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
|
|
271
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c9a-SlZDOYNiS__vC5ezVVNM0UHasXWxWNRZkkP_aOo,51552
|
|
272
272
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
273
273
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
|
|
274
274
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
|
|
@@ -278,8 +278,8 @@ datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAP
|
|
|
278
278
|
datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
|
|
279
279
|
datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
280
280
|
datahub/ingestion/source/cassandra/cassandra.py,sha256=pNy61Z4kTqL_wGcWIYee5fnZiuJDseDcRcQwsxeAssk,14487
|
|
281
|
-
datahub/ingestion/source/cassandra/cassandra_api.py,sha256=
|
|
282
|
-
datahub/ingestion/source/cassandra/cassandra_config.py,sha256=
|
|
281
|
+
datahub/ingestion/source/cassandra/cassandra_api.py,sha256=wCJx-1ZByGMgPkORBO420sGucKkxXXE4pOLWXxdpMIw,14222
|
|
282
|
+
datahub/ingestion/source/cassandra/cassandra_config.py,sha256=w9LBiT8XrGvXlrvpcAU_xm82GiE4nUfEg-VKIX6MRMY,4446
|
|
283
283
|
datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=FdgPK_8s8otTOJDqNM4rpF6Mn4lFWbnjTaKEChzn2iE,11011
|
|
284
284
|
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
|
|
285
285
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -333,7 +333,7 @@ datahub/ingestion/source/excel/report.py,sha256=oEkeI8J6is7zB9iz4RqASu_-Q5xl36lA
|
|
|
333
333
|
datahub/ingestion/source/excel/source.py,sha256=w_vOz4UD7BcXBBDKoo81_6-QFeOPITuXqkfjIMHCQj4,23827
|
|
334
334
|
datahub/ingestion/source/excel/util.py,sha256=YYmadYuCiT-4_MfQM0YSE7wuDcE0k8o2KrlOKM9Z6eI,406
|
|
335
335
|
datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
|
-
datahub/ingestion/source/fivetran/config.py,sha256=
|
|
336
|
+
datahub/ingestion/source/fivetran/config.py,sha256=MkeYBdxDJWH3QRhMIZZ_sfKLC2vgswasi-qlniQ6P8g,9075
|
|
337
337
|
datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
|
|
338
338
|
datahub/ingestion/source/fivetran/fivetran.py,sha256=s8wcECtmuugUoZ0Zdthq0SIPpTLvziZXuhhUX9bJ5N4,14492
|
|
339
339
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=PNzuykiiFTU8FhBIfUbW6udURZpz_35aq7rfffbpIfA,13010
|
|
@@ -411,7 +411,7 @@ datahub/ingestion/source/mock_data/datahub_mock_data.py,sha256=3i3SdBp267cZRszhm
|
|
|
411
411
|
datahub/ingestion/source/mock_data/datahub_mock_data_report.py,sha256=sV_H7JgcuVbrpIBqtGse_BBigMdqP32ZXuanpeXmwVI,331
|
|
412
412
|
datahub/ingestion/source/mock_data/table_naming_helper.py,sha256=zJtEBSJGDvVr-kiKjK7LbHAifK3sfE786M3yO--Bn2o,3493
|
|
413
413
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
414
|
-
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=
|
|
414
|
+
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=lxPVGL6bIZVbPtMRVcktZfuCosSnhNZH_z9GKbdQbfo,11914
|
|
415
415
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
416
416
|
datahub/ingestion/source/powerbi/config.py,sha256=KJmg8f0wNuhLAGFwVhVCGVqaOrXuLTbHGQNOW8LnlSM,26467
|
|
417
417
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
@@ -523,7 +523,7 @@ datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fx
|
|
|
523
523
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
524
524
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=Zr39j4SI1fPTx1JdopVJyBslFnyp3lZCeb1th9eEB5c,11723
|
|
525
525
|
datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6BpkwekxGrsvNMGYSGDio,2671
|
|
526
|
-
datahub/ingestion/source/sql/sql_types.py,sha256=
|
|
526
|
+
datahub/ingestion/source/sql/sql_types.py,sha256=AVeBBXw8aKB1_jw6Wtg58miu-YUfN_-7ZcXwSF-ESgA,16021
|
|
527
527
|
datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
|
|
528
528
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
529
529
|
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=u0ZvgdJjXZdo_vl7YIQfYuuWbGwpnH6OSozI2e8ZV4I,858
|
|
@@ -635,8 +635,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
635
635
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
636
636
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
637
637
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
638
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
639
|
-
datahub/metadata/schema.avsc,sha256=
|
|
638
|
+
datahub/metadata/_internal_schema_classes.py,sha256=C_RqtHu-dWkZUJWo85PNu9gs4evsVb4vMIbdAyRUM2o,1062642
|
|
639
|
+
datahub/metadata/schema.avsc,sha256=Z5bziVmjWxly-mPLjCWic2eXMCkVIYTbWqRFErSK5uk,705606
|
|
640
640
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
641
641
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
642
642
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -703,7 +703,7 @@ datahub/metadata/com/linkedin/pegasus2avro/policy/__init__.py,sha256=NOszM_xottE
|
|
|
703
703
|
datahub/metadata/com/linkedin/pegasus2avro/post/__init__.py,sha256=FFiWH2BSo057ZwsnFKjpCoAMo9szvC7Vn_RzGTSLnY0,518
|
|
704
704
|
datahub/metadata/com/linkedin/pegasus2avro/query/__init__.py,sha256=fRO1cOfPrSmM24FBO15eyXnh6DZn41w3kO8E5V3W8Dc,827
|
|
705
705
|
datahub/metadata/com/linkedin/pegasus2avro/retention/__init__.py,sha256=Yc7z_RnA-RtPuoDgHxqL_ZT0nJ3AccCTqrXiIXwqtfo,602
|
|
706
|
-
datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py,sha256=
|
|
706
|
+
datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py,sha256=Vm4oUNq8l_51ywOi3vgIzqnGipgc5Ls4jUppF2DPiVk,503
|
|
707
707
|
datahub/metadata/com/linkedin/pegasus2avro/schema/__init__.py,sha256=7JpzLs6S_EySr4orc6b3AvwcvELQHSahpU0DGVxnoHc,2955
|
|
708
708
|
datahub/metadata/com/linkedin/pegasus2avro/schemafield/__init__.py,sha256=HTWeznycKnHBfPEGcCHXPEz83Iq9ypjNaoSfeQeDU9g,397
|
|
709
709
|
datahub/metadata/com/linkedin/pegasus2avro/secret/__init__.py,sha256=qk61EqqVZF6k1Ct6t4Uo-pLb0WtM1EwJKn1XjVy9LHE,305
|
|
@@ -722,7 +722,7 @@ datahub/metadata/com/linkedin/pegasus2avro/usage/__init__.py,sha256=vhCBrCM6hTXc
|
|
|
722
722
|
datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py,sha256=TJ9PXc6rctPWNTY1yYW5lwIh91jD5EDoKex2WYxOLXM,406
|
|
723
723
|
datahub/metadata/com/linkedin/pegasus2avro/view/__init__.py,sha256=-Le-jOqUJKv3ppwMhJHFFV3WwKTKjj1ETTkl9r7fY0o,498
|
|
724
724
|
datahub/metadata/schemas/Access.avsc,sha256=gdEfWJLkvjIz-jzlceK4Dl5pBDdCHG423Ba_EYGQgUk,1562
|
|
725
|
-
datahub/metadata/schemas/Actors.avsc,sha256=
|
|
725
|
+
datahub/metadata/schemas/Actors.avsc,sha256=OUFUYD2YnX_75awkX63e58UDBrPLfkda-SN3as8rvBQ,2206
|
|
726
726
|
datahub/metadata/schemas/ApplicationKey.avsc,sha256=WUKo-n1XiCy-09a2Q76md5VhzChj3nK7TENJJN2hW78,638
|
|
727
727
|
datahub/metadata/schemas/ApplicationProperties.avsc,sha256=ZVjgnPEfi4SEvlyyIruK2sDj8xEjJeSeU3pbPINGum4,1560
|
|
728
728
|
datahub/metadata/schemas/Applications.avsc,sha256=zL6yIzsS3w2463cZvTfdsTqBT7Kii8d58qc28QR_OOs,854
|
|
@@ -957,14 +957,14 @@ datahub/pydantic/compat.py,sha256=TUEo4kSEeOWVAhV6LQtst1phrpVgGtK4uif4OI5vQ2M,19
|
|
|
957
957
|
datahub/sdk/__init__.py,sha256=66OOcFi7qlnL6q72c_yUX2mWU2HudbOdRsC5CIoDxow,1922
|
|
958
958
|
datahub/sdk/_all_entities.py,sha256=eQAmD_fcEHlTShe1_nHpdvHxLDN9njk9bdLnuTrYg8M,905
|
|
959
959
|
datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
|
|
960
|
-
datahub/sdk/_shared.py,sha256=
|
|
960
|
+
datahub/sdk/_shared.py,sha256=uSLPjXfUl_0SPt-kWirkhE6u1CKOC67q5c4hJzxS2uo,28812
|
|
961
961
|
datahub/sdk/_utils.py,sha256=oXE2BzsXE5zmSkCP3R1tObD4RHnPeH_ps83D_Dw9JaQ,1169
|
|
962
|
-
datahub/sdk/chart.py,sha256=
|
|
962
|
+
datahub/sdk/chart.py,sha256=_gixCcKp6kCMizWMXwNH1Ip1ZqJ05_Iu2t94dmONQFM,11774
|
|
963
963
|
datahub/sdk/container.py,sha256=IjnFVGDpSFDvgHuuMb7C3VdBxhJuIMq0q6crOs5PupE,7899
|
|
964
|
-
datahub/sdk/dashboard.py,sha256=
|
|
964
|
+
datahub/sdk/dashboard.py,sha256=BtOslP85IfthdnfWzDTSXJKXsTjXP17nIHlYvK7Zqhg,15173
|
|
965
965
|
datahub/sdk/dataflow.py,sha256=gdAPVVkyKvsKtsa1AwhN_LpzidG_XzV3nhtd1cjnzDA,11128
|
|
966
966
|
datahub/sdk/datajob.py,sha256=5kU0txTDcn2ce3AhNry83TazPVhoYZ2rAPPNWM1_FP8,13677
|
|
967
|
-
datahub/sdk/dataset.py,sha256
|
|
967
|
+
datahub/sdk/dataset.py,sha256=-C4TCJAs1PFkLAgkUZEU1JOg3orm7AAIkqjw7oo_4PQ,31400
|
|
968
968
|
datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
|
|
969
969
|
datahub/sdk/entity_client.py,sha256=NGVA2CwLqK16EgOPrPiIFodjPD6sM7eQ5E3w5Yl89cM,9428
|
|
970
970
|
datahub/sdk/lineage_client.py,sha256=DRwUCyi-dDCrH5r_ktqvrryCJNPZ5Tr91y6G7dGaKBk,33754
|
|
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1114
1114
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1115
1115
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1116
1116
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1117
|
-
acryl_datahub-1.2.0.
|
|
1118
|
-
acryl_datahub-1.2.0.
|
|
1119
|
-
acryl_datahub-1.2.0.
|
|
1120
|
-
acryl_datahub-1.2.0.
|
|
1121
|
-
acryl_datahub-1.2.0.
|
|
1117
|
+
acryl_datahub-1.2.0.9.dist-info/METADATA,sha256=k4545MzPRLqg4GECfo5TVFEB-R9pAinjGnZmW6MBGTk,186866
|
|
1118
|
+
acryl_datahub-1.2.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1119
|
+
acryl_datahub-1.2.0.9.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
|
|
1120
|
+
acryl_datahub-1.2.0.9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1121
|
+
acryl_datahub-1.2.0.9.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/ingestion/api/sink.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import datetime
|
|
2
|
+
import logging
|
|
2
3
|
from abc import ABCMeta, abstractmethod
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any, Generic, Optional, Type, TypeVar, cast
|
|
5
|
+
from typing import Any, Callable, Generic, List, Optional, Type, TypeVar, cast
|
|
5
6
|
|
|
6
7
|
from typing_extensions import Self
|
|
7
8
|
|
|
@@ -12,6 +13,8 @@ from datahub.ingestion.api.report import Report
|
|
|
12
13
|
from datahub.utilities.lossy_collections import LossyList
|
|
13
14
|
from datahub.utilities.type_annotations import get_class_from_annotation
|
|
14
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
@dataclass
|
|
17
20
|
class SinkReport(Report):
|
|
@@ -89,6 +92,7 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
89
92
|
ctx: PipelineContext
|
|
90
93
|
config: SinkConfig
|
|
91
94
|
report: SinkReportType
|
|
95
|
+
_pre_shutdown_callbacks: List[Callable[[], None]]
|
|
92
96
|
|
|
93
97
|
@classmethod
|
|
94
98
|
def get_config_class(cls) -> Type[SinkConfig]:
|
|
@@ -106,6 +110,7 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
106
110
|
self.ctx = ctx
|
|
107
111
|
self.config = config
|
|
108
112
|
self.report = self.get_report_class()()
|
|
113
|
+
self._pre_shutdown_callbacks = []
|
|
109
114
|
|
|
110
115
|
self.__post_init__()
|
|
111
116
|
|
|
@@ -144,11 +149,28 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
144
149
|
def get_report(self) -> SinkReportType:
|
|
145
150
|
return self.report
|
|
146
151
|
|
|
152
|
+
def register_pre_shutdown_callback(self, callback: Callable[[], None]) -> None:
|
|
153
|
+
"""Register a callback to be executed before the sink shuts down.
|
|
154
|
+
|
|
155
|
+
This is useful for components that need to send final reports or cleanup
|
|
156
|
+
operations before the sink's resources are released.
|
|
157
|
+
"""
|
|
158
|
+
self._pre_shutdown_callbacks.append(callback)
|
|
159
|
+
|
|
147
160
|
def close(self) -> None:
|
|
148
|
-
|
|
161
|
+
"""Close the sink and clean up resources.
|
|
149
162
|
|
|
150
|
-
|
|
151
|
-
|
|
163
|
+
This method executes any registered pre-shutdown callbacks before
|
|
164
|
+
performing the actual shutdown. Subclasses should override this method
|
|
165
|
+
to provide sink-specific cleanup logic while calling super().close()
|
|
166
|
+
to ensure callbacks are executed.
|
|
167
|
+
"""
|
|
168
|
+
# Execute pre-shutdown callbacks before shutdown
|
|
169
|
+
for callback in self._pre_shutdown_callbacks:
|
|
170
|
+
try:
|
|
171
|
+
callback()
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.warning(f"Pre-shutdown callback failed: {e}", exc_info=True)
|
|
152
174
|
|
|
153
175
|
def configured(self) -> str:
|
|
154
176
|
"""Override this method to output a human-readable and scrubbed version of the configured sink"""
|
|
@@ -265,6 +265,11 @@ class Pipeline:
|
|
|
265
265
|
with _add_init_error_context("configure transformers"):
|
|
266
266
|
self._configure_transforms()
|
|
267
267
|
|
|
268
|
+
# Register completion callback with sink to handle final reporting
|
|
269
|
+
self.sink.register_pre_shutdown_callback(
|
|
270
|
+
self._notify_reporters_on_ingestion_completion
|
|
271
|
+
)
|
|
272
|
+
|
|
268
273
|
# If all of the initialization succeeds, we can preserve the exit stack until the pipeline run.
|
|
269
274
|
# We need to use an exit stack so that if we have an exception during initialization,
|
|
270
275
|
# things that were already initialized are still cleaned up.
|
|
@@ -344,8 +349,8 @@ class Pipeline:
|
|
|
344
349
|
for reporter in self.reporters:
|
|
345
350
|
try:
|
|
346
351
|
reporter.on_start(ctx=self.ctx)
|
|
347
|
-
except Exception
|
|
348
|
-
logger.warning("Reporting failed on start", exc_info=
|
|
352
|
+
except Exception:
|
|
353
|
+
logger.warning("Reporting failed on start", exc_info=True)
|
|
349
354
|
|
|
350
355
|
def _warn_old_cli_version(self) -> None:
|
|
351
356
|
"""
|
|
@@ -373,11 +378,13 @@ class Pipeline:
|
|
|
373
378
|
)
|
|
374
379
|
current_version = version_stats.client.current.version
|
|
375
380
|
|
|
376
|
-
logger.debug(
|
|
381
|
+
logger.debug(
|
|
382
|
+
f"""
|
|
377
383
|
client_version: {current_version}
|
|
378
384
|
server_default_version: {server_default_version}
|
|
379
385
|
server_default_cli_ahead: True
|
|
380
|
-
"""
|
|
386
|
+
"""
|
|
387
|
+
)
|
|
381
388
|
|
|
382
389
|
self.source.get_report().warning(
|
|
383
390
|
title="Server default CLI version is ahead of CLI version",
|
|
@@ -405,8 +412,8 @@ class Pipeline:
|
|
|
405
412
|
report=self._get_structured_report(),
|
|
406
413
|
ctx=self.ctx,
|
|
407
414
|
)
|
|
408
|
-
except Exception
|
|
409
|
-
logger.warning("Reporting failed on completion", exc_info=
|
|
415
|
+
except Exception:
|
|
416
|
+
logger.warning("Reporting failed on completion", exc_info=True)
|
|
410
417
|
|
|
411
418
|
@classmethod
|
|
412
419
|
def create(
|
|
@@ -519,10 +526,10 @@ class Pipeline:
|
|
|
519
526
|
|
|
520
527
|
except (RuntimeError, SystemExit):
|
|
521
528
|
raise
|
|
522
|
-
except Exception
|
|
529
|
+
except Exception:
|
|
523
530
|
logger.error(
|
|
524
531
|
"Failed to process some records. Continuing.",
|
|
525
|
-
exc_info=
|
|
532
|
+
exc_info=True,
|
|
526
533
|
)
|
|
527
534
|
# TODO: Transformer errors should be reported more loudly / as part of the pipeline report.
|
|
528
535
|
|
|
@@ -551,17 +558,15 @@ class Pipeline:
|
|
|
551
558
|
|
|
552
559
|
self.process_commits()
|
|
553
560
|
self.final_status = PipelineStatus.COMPLETED
|
|
554
|
-
except (SystemExit, KeyboardInterrupt)
|
|
561
|
+
except (SystemExit, KeyboardInterrupt):
|
|
555
562
|
self.final_status = PipelineStatus.CANCELLED
|
|
556
|
-
logger.error("Caught error", exc_info=
|
|
563
|
+
logger.error("Caught error", exc_info=True)
|
|
557
564
|
raise
|
|
558
565
|
except Exception as exc:
|
|
559
566
|
self.final_status = PipelineStatus.ERROR
|
|
560
567
|
self._handle_uncaught_pipeline_exception(exc)
|
|
561
568
|
finally:
|
|
562
569
|
clear_global_warnings()
|
|
563
|
-
self.sink.flush()
|
|
564
|
-
self._notify_reporters_on_ingestion_completion()
|
|
565
570
|
|
|
566
571
|
def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
|
|
567
572
|
"""
|
|
@@ -5,7 +5,6 @@ import functools
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import threading
|
|
8
|
-
import time
|
|
9
8
|
import uuid
|
|
10
9
|
from enum import auto
|
|
11
10
|
from typing import List, Optional, Tuple, Union
|
|
@@ -349,18 +348,11 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
|
|
|
349
348
|
RecordEnvelope(item, metadata={}), NoopWriteCallback()
|
|
350
349
|
)
|
|
351
350
|
|
|
352
|
-
def flush(self) -> None:
|
|
353
|
-
"""Wait for all pending records to be written."""
|
|
354
|
-
i = 0
|
|
355
|
-
while self.report.pending_requests > 0:
|
|
356
|
-
time.sleep(0.1)
|
|
357
|
-
i += 1
|
|
358
|
-
if i % 1000 == 0:
|
|
359
|
-
logger.info(
|
|
360
|
-
f"Waiting for {self.report.pending_requests} records to be written"
|
|
361
|
-
)
|
|
362
|
-
|
|
363
351
|
def close(self):
|
|
352
|
+
# Execute pre-shutdown callbacks first (handled by parent class)
|
|
353
|
+
super().close()
|
|
354
|
+
|
|
355
|
+
# Then perform sink-specific shutdown
|
|
364
356
|
with self.report.main_thread_blocking_timer:
|
|
365
357
|
self.executor.shutdown()
|
|
366
358
|
|
datahub/ingestion/sink/file.py
CHANGED
|
@@ -283,23 +283,30 @@ class BigQuerySchemaApi:
|
|
|
283
283
|
with self.report.list_datasets_timer:
|
|
284
284
|
self.report.num_list_datasets_api_requests += 1
|
|
285
285
|
datasets = self.bq_client.list_datasets(project_id, max_results=maxResults)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
286
|
+
result = []
|
|
287
|
+
for d in datasets:
|
|
288
|
+
# TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
|
|
289
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
|
|
290
|
+
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
|
|
291
|
+
dataset = self.bq_client.get_dataset(d.reference)
|
|
292
|
+
|
|
293
|
+
location = (
|
|
294
|
+
d._properties.get("location")
|
|
295
|
+
if hasattr(d, "_properties") and isinstance(d._properties, dict)
|
|
296
|
+
else None
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
result.append(
|
|
300
|
+
BigqueryDataset(
|
|
301
|
+
name=d.dataset_id,
|
|
302
|
+
labels=d.labels,
|
|
303
|
+
location=location,
|
|
304
|
+
comment=dataset.description,
|
|
305
|
+
created=dataset.created,
|
|
306
|
+
last_altered=dataset.modified,
|
|
307
|
+
)
|
|
300
308
|
)
|
|
301
|
-
|
|
302
|
-
]
|
|
309
|
+
return result
|
|
303
310
|
|
|
304
311
|
# This is not used anywhere
|
|
305
312
|
def get_datasets_for_project_id_with_information_schema(
|
|
@@ -12,6 +12,7 @@ from datahub.emitter.mce_builder import (
|
|
|
12
12
|
make_dataset_urn_with_platform_instance,
|
|
13
13
|
make_schema_field_urn,
|
|
14
14
|
make_tag_urn,
|
|
15
|
+
make_ts_millis,
|
|
15
16
|
)
|
|
16
17
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
18
|
from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey
|
|
@@ -300,6 +301,8 @@ class BigQuerySchemaGenerator:
|
|
|
300
301
|
description: Optional[str] = None,
|
|
301
302
|
tags: Optional[Dict[str, str]] = None,
|
|
302
303
|
extra_properties: Optional[Dict[str, str]] = None,
|
|
304
|
+
created: Optional[int] = None,
|
|
305
|
+
last_modified: Optional[int] = None,
|
|
303
306
|
) -> Iterable[MetadataWorkUnit]:
|
|
304
307
|
schema_container_key = self.gen_dataset_key(project_id, dataset)
|
|
305
308
|
|
|
@@ -349,6 +352,8 @@ class BigQuerySchemaGenerator:
|
|
|
349
352
|
),
|
|
350
353
|
tags=tags_joined,
|
|
351
354
|
extra_properties=extra_properties,
|
|
355
|
+
created=created,
|
|
356
|
+
last_modified=last_modified,
|
|
352
357
|
)
|
|
353
358
|
|
|
354
359
|
def _process_project(
|
|
@@ -484,6 +489,12 @@ class BigQuerySchemaGenerator:
|
|
|
484
489
|
else None
|
|
485
490
|
),
|
|
486
491
|
description=bigquery_dataset.comment,
|
|
492
|
+
created=make_ts_millis(bigquery_dataset.created)
|
|
493
|
+
if bigquery_dataset.created
|
|
494
|
+
else None,
|
|
495
|
+
last_modified=make_ts_millis(bigquery_dataset.last_altered)
|
|
496
|
+
if bigquery_dataset.last_altered
|
|
497
|
+
else None,
|
|
487
498
|
)
|
|
488
499
|
|
|
489
500
|
columns = None
|
|
@@ -132,7 +132,23 @@ class CassandraAPI:
|
|
|
132
132
|
|
|
133
133
|
ssl_context = None
|
|
134
134
|
if self.config.ssl_ca_certs:
|
|
135
|
-
|
|
135
|
+
# Map SSL version string to ssl module constant
|
|
136
|
+
ssl_version_map = {
|
|
137
|
+
"TLS_CLIENT": ssl.PROTOCOL_TLS_CLIENT,
|
|
138
|
+
"TLSv1": ssl.PROTOCOL_TLSv1,
|
|
139
|
+
"TLSv1_1": ssl.PROTOCOL_TLSv1_1,
|
|
140
|
+
"TLSv1_2": ssl.PROTOCOL_TLSv1_2,
|
|
141
|
+
"TLSv1_3": ssl.PROTOCOL_TLSv1_2, # Python's ssl module uses TLSv1_2 for TLS 1.3
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
ssl_protocol = (
|
|
145
|
+
ssl_version_map.get(
|
|
146
|
+
self.config.ssl_version, ssl.PROTOCOL_TLS_CLIENT
|
|
147
|
+
)
|
|
148
|
+
if self.config.ssl_version
|
|
149
|
+
else ssl.PROTOCOL_TLS_CLIENT
|
|
150
|
+
)
|
|
151
|
+
ssl_context = ssl.SSLContext(ssl_protocol)
|
|
136
152
|
ssl_context.load_verify_locations(self.config.ssl_ca_certs)
|
|
137
153
|
if self.config.ssl_certfile and self.config.ssl_keyfile:
|
|
138
154
|
ssl_context.load_cert_chain(
|
|
@@ -94,6 +94,11 @@ class CassandraSourceConfig(
|
|
|
94
94
|
description="Path to the SSL key file for SSL connections.",
|
|
95
95
|
)
|
|
96
96
|
|
|
97
|
+
ssl_version: Optional[str] = Field(
|
|
98
|
+
default="TLS_CLIENT",
|
|
99
|
+
description="SSL protocol version to use for connections. Options: TLS_CLIENT, TLSv1, TLSv1_1, TLSv1_2, TLSv1_3. Defaults to TLS_CLIENT.",
|
|
100
|
+
)
|
|
101
|
+
|
|
97
102
|
keyspace_pattern: AllowDenyPattern = Field(
|
|
98
103
|
default=AllowDenyPattern.allow_all(),
|
|
99
104
|
description="Regex patterns to filter keyspaces for ingestion.",
|
|
@@ -70,6 +70,7 @@ class Constant:
|
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
KNOWN_DATA_PLATFORM_MAPPING = {
|
|
73
|
+
"google_cloud_postgresql": "postgres",
|
|
73
74
|
"postgres": "postgres",
|
|
74
75
|
"snowflake": "snowflake",
|
|
75
76
|
}
|
|
@@ -194,7 +195,7 @@ class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin
|
|
|
194
195
|
|
|
195
196
|
# Configuration for stateful ingestion
|
|
196
197
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field(
|
|
197
|
-
default=None, description="
|
|
198
|
+
default=None, description="Fivetran Stateful Ingestion Config."
|
|
198
199
|
)
|
|
199
200
|
|
|
200
201
|
# Fivetran connector all sources to platform instance mapping
|