acryl-datahub 0.15.0.1rc11__py3-none-any.whl → 0.15.0.1rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/METADATA +2320 -2324
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/RECORD +40 -39
- datahub/__init__.py +1 -1
- datahub/api/circuit_breaker/assertion_circuit_breaker.py +5 -4
- datahub/configuration/common.py +2 -5
- datahub/emitter/mce_builder.py +17 -1
- datahub/emitter/mcp_builder.py +2 -7
- datahub/emitter/mcp_patch_builder.py +2 -2
- datahub/emitter/rest_emitter.py +2 -2
- datahub/ingestion/api/closeable.py +3 -3
- datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +4 -7
- datahub/ingestion/api/report.py +4 -1
- datahub/ingestion/api/sink.py +4 -3
- datahub/ingestion/api/source_helpers.py +2 -6
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +44 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +5 -20
- datahub/ingestion/source/datahub/datahub_kafka_reader.py +2 -1
- datahub/ingestion/source/gc/dataprocess_cleanup.py +23 -10
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +159 -71
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/sql/hive.py +15 -0
- datahub/ingestion/source/sql/hive_metastore.py +7 -0
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +41 -102
- datahub/ingestion/source/sql/sql_generic_profiler.py +5 -6
- datahub/ingestion/source/sql/sql_report.py +2 -0
- datahub/ingestion/source/state/checkpoint.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +1 -4
- datahub/ingestion/source/unity/proxy.py +8 -27
- datahub/metadata/_schema_classes.py +61 -1
- datahub/metadata/_urns/urn_defs.py +168 -168
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +4 -0
- datahub/metadata/schema.avsc +64 -29
- datahub/metadata/schemas/DataJobKey.avsc +2 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +63 -0
- datahub/utilities/time.py +8 -3
- datahub/utilities/urns/_urn_base.py +5 -7
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc11.dist-info → acryl_datahub-0.15.0.1rc13.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=kKM5imQ7UziWDSMvn1Ic5ZENvcshwalM2y2qGjZxUHY,577
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -6,7 +6,7 @@ datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
|
6
6
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
7
7
|
datahub/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
datahub/api/circuit_breaker/__init__.py,sha256=-DEDzzFNdX9OCYYj5QYinyv3KORT5TZ_H5h4K8eWG3c,537
|
|
9
|
-
datahub/api/circuit_breaker/assertion_circuit_breaker.py,sha256=
|
|
9
|
+
datahub/api/circuit_breaker/assertion_circuit_breaker.py,sha256=TfebJJHwi8oeoCXLzfBCgUBLXXGlyUnRWzth5CLhyn4,5378
|
|
10
10
|
datahub/api/circuit_breaker/circuit_breaker.py,sha256=diowuNvlpG7rBypiqz1ZFoL0MGpbsLO_quaxDdrZkzw,1518
|
|
11
11
|
datahub/api/circuit_breaker/operation_circuit_breaker.py,sha256=6HctXeYCfxmzZnQDV1kxWkdjZYReKoS3qGtVe54ZgNE,2908
|
|
12
12
|
datahub/api/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -89,7 +89,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=qP7kHpN7y3cOR0IGZkD4PGlR
|
|
|
89
89
|
datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
|
|
90
90
|
datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
|
|
91
91
|
datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
|
|
92
|
-
datahub/configuration/common.py,sha256=
|
|
92
|
+
datahub/configuration/common.py,sha256=QN256dCSCZSxbl6JtS_rjcs1i-T3veTDxlZhMhYhkys,10416
|
|
93
93
|
datahub/configuration/config_loader.py,sha256=4V8rrbKvCbfEys2Tlw2uZXb3yC9Hpoubn2O8GXhGe3A,5785
|
|
94
94
|
datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
|
|
95
95
|
datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
|
|
@@ -114,32 +114,32 @@ datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
|
|
|
114
114
|
datahub/emitter/enum_helpers.py,sha256=ZeALUAPi10Q4Z6VM0_WiU9Y60_d0ugZHcUoVmuOCEec,321
|
|
115
115
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
116
116
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
117
|
-
datahub/emitter/mce_builder.py,sha256=
|
|
117
|
+
datahub/emitter/mce_builder.py,sha256=5oZHXs85GGwfL8tY72IPnicyYrRXraN4LgtVQQcZyq8,16417
|
|
118
118
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
119
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
120
|
-
datahub/emitter/mcp_patch_builder.py,sha256=
|
|
119
|
+
datahub/emitter/mcp_builder.py,sha256=eOcuz41c4a3oTkNk39yYl9bTxpksxqATPHLcqyhPGT0,9856
|
|
120
|
+
datahub/emitter/mcp_patch_builder.py,sha256=ykQFJshFrVF6DjkjcHQ8ZhDEws3ki0gmNjkHNfQtHwQ,4277
|
|
121
121
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
122
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
122
|
+
datahub/emitter/rest_emitter.py,sha256=oqyRuXG1o1dYjiEIH5TFMb1q0xhRbpxPIA5qkyz0iQ8,16407
|
|
123
123
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
124
124
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
125
125
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
126
126
|
datahub/ingestion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
127
|
datahub/ingestion/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
|
-
datahub/ingestion/api/closeable.py,sha256=
|
|
128
|
+
datahub/ingestion/api/closeable.py,sha256=k12AT--s4GDtZ-po_rVm5QKgvGIDteeRPByZPIOfecA,599
|
|
129
129
|
datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t0lvRb8o,886
|
|
130
130
|
datahub/ingestion/api/common.py,sha256=nJVL8YdvokYFajOjmVpSNlLbZJ5iVOFS4KJDlGtJ_jc,2735
|
|
131
131
|
datahub/ingestion/api/decorators.py,sha256=KTNdf2B20L-wlEPF8UsL89a8zwvRSOfA7gOOZnnYalY,3933
|
|
132
132
|
datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
|
|
133
133
|
datahub/ingestion/api/incremental_lineage_helper.py,sha256=Qke8T4Yba0c-ZzNfSzzXnFP0WjuqUQ1fuN4V3KK4gv4,5913
|
|
134
134
|
datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
|
|
135
|
-
datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=
|
|
135
|
+
datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
|
|
136
136
|
datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
|
|
137
137
|
datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBmWmD8,7262
|
|
138
|
-
datahub/ingestion/api/report.py,sha256=
|
|
138
|
+
datahub/ingestion/api/report.py,sha256=zb5Y_9ogmWm00KqX7_64sIMT24Wfpk7txRwEfKacw5I,4652
|
|
139
139
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
140
|
-
datahub/ingestion/api/sink.py,sha256=
|
|
140
|
+
datahub/ingestion/api/sink.py,sha256=3jw7-x9gXGreOPwn49wG5fT3C8pYhaNMQITdMN6kbag,4478
|
|
141
141
|
datahub/ingestion/api/source.py,sha256=pHfFIBZa57ySpZWnt03mmayWLdbbBAGOhWqWZnf1KUA,18815
|
|
142
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
142
|
+
datahub/ingestion/api/source_helpers.py,sha256=AVO0ogiCKgYmX1ubJaSs6L30TCCgOIalp6awXPF5XM0,19643
|
|
143
143
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
144
144
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
145
145
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -169,7 +169,7 @@ datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4
|
|
|
169
169
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
170
170
|
datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
|
|
171
171
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
-
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=
|
|
172
|
+
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=O2SGDU2_qMtyr_1BH9-WkNOojFWig2z4O3M21nTRo70,9908
|
|
173
173
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
174
174
|
datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
|
|
175
175
|
datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -242,7 +242,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
|
|
|
242
242
|
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
|
|
243
243
|
datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
|
|
244
244
|
datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
|
|
245
|
-
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=
|
|
245
|
+
datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=E5GOx4NWjyZM0xzdpBlNXbvDdKNfW9UtS64XtCYFpzI,31809
|
|
246
246
|
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Sv6BrK62nu3xpgjYGE-x1xdSTouvvnKDJtazPobhiKQ,50813
|
|
247
247
|
datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
|
|
248
248
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
|
|
@@ -268,7 +268,7 @@ datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
268
268
|
datahub/ingestion/source/datahub/config.py,sha256=rqZFvEmjxjBcW2cTEPYDVTAk3OLzuGIjEFghXPNeZNY,3955
|
|
269
269
|
datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
|
|
270
270
|
datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=F8JrOjSrmJ2B6m1MWh83A1EYFDcGMla749HUeQWMnL0,9464
|
|
271
|
-
datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=
|
|
271
|
+
datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=gnxhhlK-jrfnHqD_4eVmfcdtBNW6pi1N_qkDZ7uSb3o,4187
|
|
272
272
|
datahub/ingestion/source/datahub/datahub_source.py,sha256=2jDnsHEzpGhr00qQI9unSUJYD6Cb1McYFKOVbA-Zcm4,8487
|
|
273
273
|
datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
|
|
274
274
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
@@ -303,9 +303,9 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
|
|
|
303
303
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
304
304
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
305
305
|
datahub/ingestion/source/gc/datahub_gc.py,sha256=WOg3yIaNmwdbSTwytKeSfIUihsM7FMYBip9u2Dnwk3c,12849
|
|
306
|
-
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=
|
|
306
|
+
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=86Tm3NNWMf0xM4TklNIEeNOjEingKpYy-XvCPeaAb4k,17125
|
|
307
307
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=sZbdkg3MuPVGf8eeeRg_2khGMZ01QoH4dgJiTxf7Srg,9813
|
|
308
|
-
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=
|
|
308
|
+
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=LvDGTaAaI-T0OZ3fkaFwipLdzPePunuSVWoEuSBsfEM,11099
|
|
309
309
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
310
310
|
datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
|
|
311
311
|
datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
|
|
@@ -403,7 +403,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
|
|
|
403
403
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=k7S9Xcmgr3-CvWrd5NEX-V8JSrcAwkm7vbHPTVZicow,3620
|
|
404
404
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
405
405
|
datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
406
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
406
|
+
datahub/ingestion/source/s3/source.py,sha256=USjq86vUU7hKYKi8bhplBhHOjvoQTgguy91uFE24rUI,47336
|
|
407
407
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
408
408
|
datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
|
|
409
409
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -451,18 +451,18 @@ datahub/ingestion/source/sql/clickhouse.py,sha256=jzvaXP5Wr0SMhj2rtuvVE821xnfpKi
|
|
|
451
451
|
datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
|
|
452
452
|
datahub/ingestion/source/sql/druid.py,sha256=lhO9CCOlHV-6LjBuAxAxtB9I1pvPtsGSdr63bz6_ilA,2837
|
|
453
453
|
datahub/ingestion/source/sql/hana.py,sha256=0PIvcX0Rz59NyR7Ag5Bv1MBV_UbJwxl9UAopo_xe_CA,1342
|
|
454
|
-
datahub/ingestion/source/sql/hive.py,sha256=
|
|
455
|
-
datahub/ingestion/source/sql/hive_metastore.py,sha256=
|
|
454
|
+
datahub/ingestion/source/sql/hive.py,sha256=NRUrEWnR1JN5U0q4CHlRacdKzxJhS4unFXnXYZT7vZE,30306
|
|
455
|
+
datahub/ingestion/source/sql/hive_metastore.py,sha256=n9WvJzGBYVwjSUKuAWQcYuRJttH81k2S2zjHuw8gvME,36074
|
|
456
456
|
datahub/ingestion/source/sql/mariadb.py,sha256=Hm102kmfs_1rd4lsTYhzVMZq5S3B6cyfvpHSzJjqvMw,737
|
|
457
457
|
datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_k94o7g-0,3350
|
|
458
458
|
datahub/ingestion/source/sql/oracle.py,sha256=ibBtjaneCFto-Rw3k2OxsbT3YHgux1aCtPtv5oA8St4,24533
|
|
459
459
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
460
460
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
461
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
461
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=E1QmJ35ZuDLiZj-s1niHvIdNMyEsZrwvq_Wuy2EoYMQ,48586
|
|
462
462
|
datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
|
|
463
463
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
464
|
-
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=
|
|
465
|
-
datahub/ingestion/source/sql/sql_report.py,sha256=
|
|
464
|
+
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=oLjqgsxVKGerj5dZnCCRMremrxjp-kr5_P45gFOM4Pg,11602
|
|
465
|
+
datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6BpkwekxGrsvNMGYSGDio,2671
|
|
466
466
|
datahub/ingestion/source/sql/sql_types.py,sha256=lrJpavRTE7aDVAKOrKZcrp4CsKydiiaza1wt2ieqWzs,15041
|
|
467
467
|
datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
|
|
468
468
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
@@ -473,10 +473,10 @@ datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5f
|
|
|
473
473
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
474
474
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
475
475
|
datahub/ingestion/source/sql/mssql/job_models.py,sha256=ztXDrD4anhzwWvACIm9fucE2WhMDMKkJ4alMYOQOqWA,7083
|
|
476
|
-
datahub/ingestion/source/sql/mssql/source.py,sha256=
|
|
476
|
+
datahub/ingestion/source/sql/mssql/source.py,sha256=WV2rU_sN5pqd4MEu6p4kwQRpADFjG0qh27tx7qP5AOw,30931
|
|
477
477
|
datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
|
|
478
478
|
datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
479
|
-
datahub/ingestion/source/state/checkpoint.py,sha256
|
|
479
|
+
datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
|
|
480
480
|
datahub/ingestion/source/state/entity_removal_state.py,sha256=zvIsmYg7oiIu2FhecU0VfLBNToUqvKoKyDeiFfkOcyc,6611
|
|
481
481
|
datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-LjS4XF0p6BxSAcLY-xKRzM,512
|
|
482
482
|
datahub/ingestion/source/state/profiling_state_handler.py,sha256=IgvmTszxjR53JX-uTNjFx7ZMWK34DlAWjA3QKSLa644,4293
|
|
@@ -491,7 +491,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
491
491
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
|
|
492
492
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
493
493
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
494
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
494
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=SWEJi0LoIhb8rVVmmhVxngENo53QtXFvJE02aOIzG6Q,140034
|
|
495
495
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
|
|
496
496
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
497
497
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
@@ -502,7 +502,7 @@ datahub/ingestion/source/unity/config.py,sha256=m4-n7mYz4Ct4L1QdfJFklwHyj8boKCbV
|
|
|
502
502
|
datahub/ingestion/source/unity/connection_test.py,sha256=B143Wb28fS0V4GhygU9hzKqiArWBjsQO54IUCPf23dc,2586
|
|
503
503
|
datahub/ingestion/source/unity/ge_profiler.py,sha256=DFQKOqryMWFg-NqwfFGPklNH2hHSmZGKs8ij8QmXd7w,6402
|
|
504
504
|
datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
|
|
505
|
-
datahub/ingestion/source/unity/proxy.py,sha256=
|
|
505
|
+
datahub/ingestion/source/unity/proxy.py,sha256=_6kCI7M4-26pZ9ZMGJUh6LwYmbGAZlnvc8GY4yd6QAs,18403
|
|
506
506
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
507
507
|
datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
|
|
508
508
|
datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
|
|
@@ -566,12 +566,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
566
566
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
567
567
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
568
568
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
569
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
570
|
-
datahub/metadata/schema.avsc,sha256=
|
|
569
|
+
datahub/metadata/_schema_classes.py,sha256=IAWpWPxOeGmvmc96dapE0CySk1Rikbh-YieT-K9YTMY,964636
|
|
570
|
+
datahub/metadata/schema.avsc,sha256=CeVb_Z7k0e5kmeqDUXUW7JDL6KSKBCdfAZzqRI_mLZo,729869
|
|
571
571
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
572
572
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
573
573
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
574
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
574
|
+
datahub/metadata/_urns/urn_defs.py,sha256=gcUHCVwelD5aSuPgE1vmao242tQQiHe2m9kH8Fs1y1E,107287
|
|
575
575
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
576
576
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
577
577
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -581,7 +581,7 @@ datahub/metadata/com/linkedin/pegasus2avro/access/token/__init__.py,sha256=P9M7N
|
|
|
581
581
|
datahub/metadata/com/linkedin/pegasus2avro/assertion/__init__.py,sha256=PgK5O-6pVRaEcvmwXAsSkwRLe8NjGiLH8AVBXeArqK8,5751
|
|
582
582
|
datahub/metadata/com/linkedin/pegasus2avro/businessattribute/__init__.py,sha256=N8kO-eUi0_Rt7weizIExxlnJ2_kZRtPrZLWCC1xtDMA,653
|
|
583
583
|
datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py,sha256=RNyyHLBNp_fxgFcBOLWO2UsXR1ofD_JczcBdPEQSusg,848
|
|
584
|
-
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=
|
|
584
|
+
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py,sha256=ukX0VnveTrMx9G6uDaTkuk4Z2kxXr2hUK8srZuRPxj0,5520
|
|
585
585
|
datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py,sha256=FN63vLiB3FCmIRqBjTA-0Xt7M6i7h5NhaVzbA1ysv18,396
|
|
586
586
|
datahub/metadata/com/linkedin/pegasus2avro/connection/__init__.py,sha256=qRtw-dB14pzVzgQ0pDK8kyBplNdpRxVKNj4D70e_FqI,564
|
|
587
587
|
datahub/metadata/com/linkedin/pegasus2avro/container/__init__.py,sha256=3yWt36KqDKFhRc9pzvt0AMnbMTlhKurGvT3BUvc25QU,510
|
|
@@ -705,7 +705,7 @@ datahub/metadata/schemas/DataHubViewInfo.avsc,sha256=U3fBIoG9ietLUpOknfQGNekqBdP
|
|
|
705
705
|
datahub/metadata/schemas/DataHubViewKey.avsc,sha256=p53axIdSVbubo3r23Vpsed7NqRcQBMGveVikEHAVAok,424
|
|
706
706
|
datahub/metadata/schemas/DataJobInfo.avsc,sha256=--obUbt_4X2paB39EeRKP13sBSiK-r0nq070EamoV1w,7212
|
|
707
707
|
datahub/metadata/schemas/DataJobInputOutput.avsc,sha256=H1O8eAzZV34tvULdu67iBSWkdn08rt7wS208b8Nisbk,15268
|
|
708
|
-
datahub/metadata/schemas/DataJobKey.avsc,sha256=
|
|
708
|
+
datahub/metadata/schemas/DataJobKey.avsc,sha256=4F3myS-O6n7AlUqTvCkMSFvsYAjVhUq6uaQVbqLoYdM,1583
|
|
709
709
|
datahub/metadata/schemas/DataPlatformInfo.avsc,sha256=WGPFumBNHbR75vsLrivnRCbBc8vSCuxDw2UlylMieh4,2686
|
|
710
710
|
datahub/metadata/schemas/DataPlatformInstance.avsc,sha256=SNd3v_YyyLaDflv8Rd5cQR9GrVuky_cDTkYM6FqJiM8,1058
|
|
711
711
|
datahub/metadata/schemas/DataPlatformInstanceKey.avsc,sha256=sXUV5EMT6N-x8d6s8ebcJ5JdFIOsJCtiiU5Jtm-ncIk,800
|
|
@@ -721,6 +721,7 @@ datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkL
|
|
|
721
721
|
datahub/metadata/schemas/DataProcessKey.avsc,sha256=mY1BDiEYo8RchI9DckQEz9Vks5Ibt2RdWZU8OYGnrHA,2240
|
|
722
722
|
datahub/metadata/schemas/DataProductKey.avsc,sha256=tcdQNWk3pLA3xZzOnHvZuq2u4SQuk2YcAlsxE8CcEeU,621
|
|
723
723
|
datahub/metadata/schemas/DataProductProperties.avsc,sha256=nYEK6JgpTprU0iZaqWLZsBGYJLkh6HCi1qCu-wbYhvM,6925
|
|
724
|
+
datahub/metadata/schemas/DataTransformLogic.avsc,sha256=wDng1GK9znVoK0INHGiSCSa-AH5MrDkVdMzz4wOWmrY,2011
|
|
724
725
|
datahub/metadata/schemas/DataTypeInfo.avsc,sha256=MCjzal71P8uIXZg161LrU8rZTJocZeizK-YxYA0Det0,704
|
|
725
726
|
datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiFvJoEGHRzEilw,546
|
|
726
727
|
datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
|
|
@@ -937,7 +938,7 @@ datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA
|
|
|
937
938
|
datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
|
|
938
939
|
datahub/utilities/threaded_iterator_executor.py,sha256=WC4tvJ4TQRkH0VO_FD91GbedcKUqx0lc4tHDNOiF6ps,1770
|
|
939
940
|
datahub/utilities/threading_timeout.py,sha256=hOzDI55E3onXblHNwGsePJUWMXo5zqaWCnoYdL2-KPM,1316
|
|
940
|
-
datahub/utilities/time.py,sha256=
|
|
941
|
+
datahub/utilities/time.py,sha256=Q7S_Zyom8C2zcl2xFbjNw6K8nZsCub5XGAB4OEmIS34,1847
|
|
941
942
|
datahub/utilities/topological_sort.py,sha256=kcK5zPSR393fgItr-KSLV3bDqfJfBRS8E5kkCpPBgUY,1358
|
|
942
943
|
datahub/utilities/type_annotations.py,sha256=FvcB__a6X0CLoz-sBXwqpdceqSqTHgkLXGQ6wSmiV8w,970
|
|
943
944
|
datahub/utilities/unified_diff.py,sha256=8uRvM_kN-sdAzR4Ym6CgmpjrmO4CrcKtzZ4P-Cn6aEA,8422
|
|
@@ -947,7 +948,7 @@ datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMr
|
|
|
947
948
|
datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
948
949
|
datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
|
|
949
950
|
datahub/utilities/urns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
950
|
-
datahub/utilities/urns/_urn_base.py,sha256=
|
|
951
|
+
datahub/utilities/urns/_urn_base.py,sha256=oly7C6EWmEA0kAfy9TxidH8U97ouRFo2CyJuXZgfLi8,9211
|
|
951
952
|
datahub/utilities/urns/corp_group_urn.py,sha256=6H5Q6nZvAXu80IZBDCeM8xo_9ap9pgwtyi60QXx3hzY,75
|
|
952
953
|
datahub/utilities/urns/corpuser_urn.py,sha256=h-Yh-9QRbtQOhxxzxEBc7skoavpGaKDKVNrsxSXZ1yQ,88
|
|
953
954
|
datahub/utilities/urns/data_flow_urn.py,sha256=w1Z7ET1L1OtYD1w-xiUYtyCczsxZZ1l3LRyTRv5NdpE,73
|
|
@@ -982,8 +983,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
982
983
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
983
984
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
984
985
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
985
|
-
acryl_datahub-0.15.0.
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
989
|
-
acryl_datahub-0.15.0.
|
|
986
|
+
acryl_datahub-0.15.0.1rc13.dist-info/METADATA,sha256=KnCOYV5Kg855hgL3B3zmYHzPnXVeMoZYf_3ScEj1cyA,173444
|
|
987
|
+
acryl_datahub-0.15.0.1rc13.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
988
|
+
acryl_datahub-0.15.0.1rc13.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
989
|
+
acryl_datahub-0.15.0.1rc13.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
990
|
+
acryl_datahub-0.15.0.1rc13.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from datetime import datetime, timedelta
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from typing import Any, Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
from pydantic import Field
|
|
@@ -10,6 +10,7 @@ from datahub.api.circuit_breaker.circuit_breaker import (
|
|
|
10
10
|
CircuitBreakerConfig,
|
|
11
11
|
)
|
|
12
12
|
from datahub.api.graphql import Assertion, Operation
|
|
13
|
+
from datahub.emitter.mce_builder import parse_ts_millis
|
|
13
14
|
|
|
14
15
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
15
16
|
|
|
@@ -49,7 +50,7 @@ class AssertionCircuitBreaker(AbstractCircuitBreaker):
|
|
|
49
50
|
if not operations:
|
|
50
51
|
return None
|
|
51
52
|
else:
|
|
52
|
-
return
|
|
53
|
+
return parse_ts_millis(operations[0]["lastUpdatedTimestamp"])
|
|
53
54
|
|
|
54
55
|
def _check_if_assertion_failed(
|
|
55
56
|
self, assertions: List[Dict[str, Any]], last_updated: Optional[datetime] = None
|
|
@@ -93,7 +94,7 @@ class AssertionCircuitBreaker(AbstractCircuitBreaker):
|
|
|
93
94
|
logger.info(f"Found successful assertion: {assertion_urn}")
|
|
94
95
|
result = False
|
|
95
96
|
if last_updated is not None:
|
|
96
|
-
last_run =
|
|
97
|
+
last_run = parse_ts_millis(last_assertion.time)
|
|
97
98
|
if last_updated > last_run:
|
|
98
99
|
logger.error(
|
|
99
100
|
f"Missing assertion run for {assertion_urn}. The dataset was updated on {last_updated} but the last assertion run was at {last_run}"
|
|
@@ -117,7 +118,7 @@ class AssertionCircuitBreaker(AbstractCircuitBreaker):
|
|
|
117
118
|
)
|
|
118
119
|
|
|
119
120
|
if not last_updated:
|
|
120
|
-
last_updated = datetime.now() - self.config.time_delta
|
|
121
|
+
last_updated = datetime.now(tz=timezone.utc) - self.config.time_delta
|
|
121
122
|
logger.info(
|
|
122
123
|
f"Dataset {urn} doesn't have last updated or check_last_assertion_time is false, using calculated min assertion date {last_updated}"
|
|
123
124
|
)
|
datahub/configuration/common.py
CHANGED
|
@@ -10,7 +10,6 @@ from typing import (
|
|
|
10
10
|
List,
|
|
11
11
|
Optional,
|
|
12
12
|
Type,
|
|
13
|
-
TypeVar,
|
|
14
13
|
Union,
|
|
15
14
|
runtime_checkable,
|
|
16
15
|
)
|
|
@@ -19,14 +18,12 @@ import pydantic
|
|
|
19
18
|
from cached_property import cached_property
|
|
20
19
|
from pydantic import BaseModel, Extra, ValidationError
|
|
21
20
|
from pydantic.fields import Field
|
|
22
|
-
from typing_extensions import Protocol
|
|
21
|
+
from typing_extensions import Protocol, Self
|
|
23
22
|
|
|
24
23
|
from datahub.configuration._config_enum import ConfigEnum as ConfigEnum # noqa: I250
|
|
25
24
|
from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
|
|
26
25
|
from datahub.utilities.dedup_list import deduplicate_list
|
|
27
26
|
|
|
28
|
-
_ConfigSelf = TypeVar("_ConfigSelf", bound="ConfigModel")
|
|
29
|
-
|
|
30
27
|
REDACT_KEYS = {
|
|
31
28
|
"password",
|
|
32
29
|
"token",
|
|
@@ -109,7 +106,7 @@ class ConfigModel(BaseModel):
|
|
|
109
106
|
schema_extra = _schema_extra
|
|
110
107
|
|
|
111
108
|
@classmethod
|
|
112
|
-
def parse_obj_allow_extras(cls
|
|
109
|
+
def parse_obj_allow_extras(cls, obj: Any) -> Self:
|
|
113
110
|
if PYDANTIC_VERSION_2:
|
|
114
111
|
try:
|
|
115
112
|
with unittest.mock.patch.dict(
|
datahub/emitter/mce_builder.py
CHANGED
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
import os
|
|
7
7
|
import re
|
|
8
8
|
import time
|
|
9
|
-
from datetime import datetime
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
10
|
from enum import Enum
|
|
11
11
|
from typing import (
|
|
12
12
|
TYPE_CHECKING,
|
|
@@ -103,6 +103,22 @@ def make_ts_millis(ts: Optional[datetime]) -> Optional[int]:
|
|
|
103
103
|
return int(ts.timestamp() * 1000)
|
|
104
104
|
|
|
105
105
|
|
|
106
|
+
@overload
|
|
107
|
+
def parse_ts_millis(ts: float) -> datetime:
|
|
108
|
+
...
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@overload
|
|
112
|
+
def parse_ts_millis(ts: None) -> None:
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
|
|
117
|
+
if ts is None:
|
|
118
|
+
return None
|
|
119
|
+
return datetime.fromtimestamp(ts / 1000, tz=timezone.utc)
|
|
120
|
+
|
|
121
|
+
|
|
106
122
|
def make_data_platform_urn(platform: str) -> str:
|
|
107
123
|
if platform.startswith("urn:li:dataPlatform:"):
|
|
108
124
|
return platform
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -4,8 +4,8 @@ from pydantic.fields import Field
|
|
|
4
4
|
from pydantic.main import BaseModel
|
|
5
5
|
|
|
6
6
|
from datahub.cli.env_utils import get_boolean_env_variable
|
|
7
|
-
from datahub.emitter.enum_helpers import get_enum_options
|
|
8
7
|
from datahub.emitter.mce_builder import (
|
|
8
|
+
ALL_ENV_TYPES,
|
|
9
9
|
Aspect,
|
|
10
10
|
datahub_guid,
|
|
11
11
|
make_container_urn,
|
|
@@ -25,7 +25,6 @@ from datahub.metadata.schema_classes import (
|
|
|
25
25
|
ContainerClass,
|
|
26
26
|
DomainsClass,
|
|
27
27
|
EmbedClass,
|
|
28
|
-
FabricTypeClass,
|
|
29
28
|
GlobalTagsClass,
|
|
30
29
|
MetadataChangeEventClass,
|
|
31
30
|
OwnerClass,
|
|
@@ -206,11 +205,7 @@ def gen_containers(
|
|
|
206
205
|
# Extra validation on the env field.
|
|
207
206
|
# In certain cases (mainly for backwards compatibility), the env field will actually
|
|
208
207
|
# have a platform instance name.
|
|
209
|
-
env =
|
|
210
|
-
container_key.env
|
|
211
|
-
if container_key.env in get_enum_options(FabricTypeClass)
|
|
212
|
-
else None
|
|
213
|
-
)
|
|
208
|
+
env = container_key.env if container_key.env in ALL_ENV_TYPES else None
|
|
214
209
|
|
|
215
210
|
container_urn = container_key.as_urn()
|
|
216
211
|
|
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
import time
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from typing import Any, Dict,
|
|
5
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
6
6
|
|
|
7
7
|
from datahub.emitter.aspect import JSON_PATCH_CONTENT_TYPE
|
|
8
8
|
from datahub.emitter.serialization_helper import pre_json_transform
|
|
@@ -75,7 +75,7 @@ class MetadataPatchProposal:
|
|
|
75
75
|
# TODO: Validate that aspectName is a valid aspect for this entityType
|
|
76
76
|
self.patches[aspect_name].append(_Patch(op, path, value))
|
|
77
77
|
|
|
78
|
-
def build(self) ->
|
|
78
|
+
def build(self) -> List[MetadataChangeProposalClass]:
|
|
79
79
|
return [
|
|
80
80
|
MetadataChangeProposalClass(
|
|
81
81
|
entityUrn=self.urn,
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
from json.decoder import JSONDecodeError
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Union
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
9
|
from deprecated import deprecated
|
|
@@ -288,7 +288,7 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
288
288
|
|
|
289
289
|
def emit_mcps(
|
|
290
290
|
self,
|
|
291
|
-
mcps:
|
|
291
|
+
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
|
292
292
|
async_flag: Optional[bool] = None,
|
|
293
293
|
) -> int:
|
|
294
294
|
logger.debug("Attempting to emit batch mcps")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
2
|
from contextlib import AbstractContextManager
|
|
3
3
|
from types import TracebackType
|
|
4
|
-
from typing import Optional, Type
|
|
4
|
+
from typing import Optional, Type
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
from typing_extensions import Self
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Closeable(AbstractContextManager):
|
|
@@ -11,7 +11,7 @@ class Closeable(AbstractContextManager):
|
|
|
11
11
|
def close(self) -> None:
|
|
12
12
|
pass
|
|
13
13
|
|
|
14
|
-
def __enter__(self
|
|
14
|
+
def __enter__(self) -> Self:
|
|
15
15
|
# This method is mainly required for type checking.
|
|
16
16
|
return self
|
|
17
17
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import Any, Dict, NewType, Optional
|
|
3
|
+
from typing import Any, Dict, NewType, Optional
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
4
6
|
|
|
5
7
|
import datahub.emitter.mce_builder as builder
|
|
6
8
|
from datahub.configuration.common import ConfigModel
|
|
@@ -17,9 +19,6 @@ class IngestionCheckpointingProviderConfig(ConfigModel):
|
|
|
17
19
|
pass
|
|
18
20
|
|
|
19
21
|
|
|
20
|
-
_Self = TypeVar("_Self", bound="IngestionCheckpointingProviderBase")
|
|
21
|
-
|
|
22
|
-
|
|
23
22
|
@dataclass()
|
|
24
23
|
class IngestionCheckpointingProviderBase(StatefulCommittable[CheckpointJobStatesMap]):
|
|
25
24
|
"""
|
|
@@ -32,9 +31,7 @@ class IngestionCheckpointingProviderBase(StatefulCommittable[CheckpointJobStates
|
|
|
32
31
|
|
|
33
32
|
@classmethod
|
|
34
33
|
@abstractmethod
|
|
35
|
-
def create(
|
|
36
|
-
cls: Type[_Self], config_dict: Dict[str, Any], ctx: PipelineContext
|
|
37
|
-
) -> "_Self":
|
|
34
|
+
def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> Self:
|
|
38
35
|
pass
|
|
39
36
|
|
|
40
37
|
@abstractmethod
|
datahub/ingestion/api/report.py
CHANGED
|
@@ -42,7 +42,10 @@ class Report(SupportsAsObj):
|
|
|
42
42
|
return some_val.as_obj()
|
|
43
43
|
elif isinstance(some_val, pydantic.BaseModel):
|
|
44
44
|
return Report.to_pure_python_obj(some_val.dict())
|
|
45
|
-
elif dataclasses.is_dataclass(some_val):
|
|
45
|
+
elif dataclasses.is_dataclass(some_val) and not isinstance(some_val, type):
|
|
46
|
+
# The `is_dataclass` function returns `True` for both instances and classes.
|
|
47
|
+
# We need an extra check to ensure an instance was passed in.
|
|
48
|
+
# https://docs.python.org/3/library/dataclasses.html#dataclasses.is_dataclass
|
|
46
49
|
return dataclasses.asdict(some_val)
|
|
47
50
|
elif isinstance(some_val, list):
|
|
48
51
|
return [Report.to_pure_python_obj(v) for v in some_val if v is not None]
|
datahub/ingestion/api/sink.py
CHANGED
|
@@ -3,6 +3,8 @@ from abc import ABCMeta, abstractmethod
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from typing import Any, Generic, Optional, Type, TypeVar, cast
|
|
5
5
|
|
|
6
|
+
from typing_extensions import Self
|
|
7
|
+
|
|
6
8
|
from datahub.configuration.common import ConfigModel
|
|
7
9
|
from datahub.ingestion.api.closeable import Closeable
|
|
8
10
|
from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit
|
|
@@ -79,7 +81,6 @@ class NoopWriteCallback(WriteCallback):
|
|
|
79
81
|
|
|
80
82
|
SinkReportType = TypeVar("SinkReportType", bound=SinkReport, covariant=True)
|
|
81
83
|
SinkConfig = TypeVar("SinkConfig", bound=ConfigModel, covariant=True)
|
|
82
|
-
Self = TypeVar("Self", bound="Sink")
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
@@ -90,7 +91,7 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
90
91
|
report: SinkReportType
|
|
91
92
|
|
|
92
93
|
@classmethod
|
|
93
|
-
def get_config_class(cls
|
|
94
|
+
def get_config_class(cls) -> Type[SinkConfig]:
|
|
94
95
|
config_class = get_class_from_annotation(cls, Sink, ConfigModel)
|
|
95
96
|
assert config_class, "Sink subclasses must define a config class"
|
|
96
97
|
return cast(Type[SinkConfig], config_class)
|
|
@@ -112,7 +113,7 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
|
|
|
112
113
|
pass
|
|
113
114
|
|
|
114
115
|
@classmethod
|
|
115
|
-
def create(cls
|
|
116
|
+
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Self":
|
|
116
117
|
return cls(ctx, cls.get_config_class().parse_obj(config_dict))
|
|
117
118
|
|
|
118
119
|
def handle_work_unit_start(self, workunit: WorkUnit) -> None:
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from datetime import datetime, timezone
|
|
3
2
|
from typing import (
|
|
4
3
|
TYPE_CHECKING,
|
|
5
4
|
Dict,
|
|
@@ -14,7 +13,7 @@ from typing import (
|
|
|
14
13
|
)
|
|
15
14
|
|
|
16
15
|
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
17
|
-
from datahub.emitter.mce_builder import make_dataplatform_instance_urn
|
|
16
|
+
from datahub.emitter.mce_builder import make_dataplatform_instance_urn, parse_ts_millis
|
|
18
17
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
19
18
|
from datahub.emitter.mcp_builder import entity_supports_aspect
|
|
20
19
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
@@ -479,10 +478,7 @@ def auto_empty_dataset_usage_statistics(
|
|
|
479
478
|
if invalid_timestamps:
|
|
480
479
|
logger.warning(
|
|
481
480
|
f"Usage statistics with unexpected timestamps, bucket_duration={config.bucket_duration}:\n"
|
|
482
|
-
", ".join(
|
|
483
|
-
str(datetime.fromtimestamp(ts / 1000, tz=timezone.utc))
|
|
484
|
-
for ts in invalid_timestamps
|
|
485
|
-
)
|
|
481
|
+
", ".join(str(parse_ts_millis(ts)) for ts in invalid_timestamps)
|
|
486
482
|
)
|
|
487
483
|
|
|
488
484
|
for bucket in bucket_timestamps:
|
|
@@ -146,12 +146,55 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
|
|
|
146
146
|
aspect_value=source_info_aspect,
|
|
147
147
|
)
|
|
148
148
|
|
|
149
|
+
@staticmethod
|
|
150
|
+
def _convert_sets_to_lists(obj: Any) -> Any:
|
|
151
|
+
"""
|
|
152
|
+
Recursively converts all sets to lists in a Python object.
|
|
153
|
+
Works with nested dictionaries, lists, and sets.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
obj: Any Python object that might contain sets
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
The object with all sets converted to lists
|
|
160
|
+
"""
|
|
161
|
+
if isinstance(obj, dict):
|
|
162
|
+
return {
|
|
163
|
+
key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
|
|
164
|
+
for key, value in obj.items()
|
|
165
|
+
}
|
|
166
|
+
elif isinstance(obj, list):
|
|
167
|
+
return [
|
|
168
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
169
|
+
for element in obj
|
|
170
|
+
]
|
|
171
|
+
elif isinstance(obj, set):
|
|
172
|
+
return [
|
|
173
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
174
|
+
for element in obj
|
|
175
|
+
]
|
|
176
|
+
elif isinstance(obj, tuple):
|
|
177
|
+
return tuple(
|
|
178
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
179
|
+
for element in obj
|
|
180
|
+
)
|
|
181
|
+
else:
|
|
182
|
+
return obj
|
|
183
|
+
|
|
149
184
|
def _get_recipe_to_report(self, ctx: PipelineContext) -> str:
|
|
150
185
|
assert ctx.pipeline_config
|
|
151
186
|
if not self.report_recipe or not ctx.pipeline_config.get_raw_dict():
|
|
152
187
|
return ""
|
|
153
188
|
else:
|
|
154
|
-
|
|
189
|
+
redacted_recipe = redact_raw_config(ctx.pipeline_config.get_raw_dict())
|
|
190
|
+
# This is required otherwise json dumps will fail
|
|
191
|
+
# with a TypeError: Object of type set is not JSON serializable
|
|
192
|
+
converted_recipe = (
|
|
193
|
+
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(
|
|
194
|
+
redacted_recipe
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
return json.dumps(converted_recipe)
|
|
155
198
|
|
|
156
199
|
def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
|
|
157
200
|
self.sink.write_record_async(
|