acryl-datahub 1.0.0.1rc6__py3-none-any.whl → 1.0.0.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/METADATA +2518 -2518
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/RECORD +25 -25
- datahub/_version.py +1 -1
- datahub/cli/ingest_cli.py +4 -4
- datahub/emitter/mcp_builder.py +4 -0
- datahub/errors.py +4 -0
- datahub/ingestion/api/source.py +2 -1
- datahub/ingestion/api/source_helpers.py +9 -1
- datahub/ingestion/graph/client.py +122 -7
- datahub/ingestion/graph/filters.py +41 -16
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/ingestion/source/iceberg/iceberg.py +9 -9
- datahub/ingestion/source/mlflow.py +3 -7
- datahub/ingestion/source/powerbi/powerbi.py +14 -1
- datahub/ingestion/source/sql/trino.py +4 -3
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/vertexai/vertexai.py +7 -7
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +73 -11
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/threaded_iterator_executor.py +16 -3
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc6.dist-info → acryl_datahub-1.0.0.2rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.2rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=vzyBMegu61oWM-Gce9R3y5zLfMrINPSGDEFO-MHhthA,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
|
-
datahub/errors.py,sha256=
|
|
6
|
+
datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
datahub/_codegen/aspect.py,sha256=PJRa-Z4ouXHq3OkulfyWhwZn-fFUBDK_UPvmqaWdbWk,1063
|
|
@@ -72,7 +72,7 @@ datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
|
|
|
72
72
|
datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
|
|
73
73
|
datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
|
|
74
74
|
datahub/cli/iceberg_cli.py,sha256=-XT3wpkr8b-HFMafYk7lSon3Lys6XjTQA8U1b698ByM,23003
|
|
75
|
-
datahub/cli/ingest_cli.py,sha256=
|
|
75
|
+
datahub/cli/ingest_cli.py,sha256=Welutg0LOjuEiBOnQdNTnEZFDyKZiiWxqyQK-Go8dL8,20540
|
|
76
76
|
datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
|
|
77
77
|
datahub/cli/lite_cli.py,sha256=XKMejSuYUToKBvgN3YmmnxjRcaG5WPw23gJuQK8pgRc,13099
|
|
78
78
|
datahub/cli/migrate.py,sha256=3orGfLNsdh1Q7gkPaCaf2bBWM5b3Ih4fGFw3poe0wiA,17937
|
|
@@ -122,7 +122,7 @@ datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvVi
|
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
123
|
datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
|
|
124
124
|
datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
125
|
-
datahub/emitter/mcp_builder.py,sha256=
|
|
125
|
+
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
127
127
|
datahub/emitter/request_helper.py,sha256=HpI9a9W0TzoVbrs584rF8P8w-IT_iKLmvYmO_6IHhXs,1008
|
|
128
128
|
datahub/emitter/response_helper.py,sha256=h2hrZYiv4xfauD_lHPW_fN_AV8KhWNM4CVd-Lat2vT0,4608
|
|
@@ -145,8 +145,8 @@ datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX
|
|
|
145
145
|
datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwBhY,4644
|
|
146
146
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
147
147
|
datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
|
|
148
|
-
datahub/ingestion/api/source.py,sha256=
|
|
149
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
148
|
+
datahub/ingestion/api/source.py,sha256=HrQahSEBeapMDnW8S6wSEyNLLE9RCs2R6eUrVaibuuc,19349
|
|
149
|
+
datahub/ingestion/api/source_helpers.py,sha256=OhgBLdpUIuqF_gl4uV8Y2csp-z97zzXeFj2I5aUypCI,20158
|
|
150
150
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
151
151
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
152
152
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -171,11 +171,11 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
171
171
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
172
172
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
173
173
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
174
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
174
|
+
datahub/ingestion/graph/client.py,sha256=DUOy3fzwUMT0wJ2GAcLiYa5bzelwfZjBmzhbm95IIys,69918
|
|
175
175
|
datahub/ingestion/graph/config.py,sha256=_oha8Je7P80ZmrkZUAaRHyYbdMmTkMI5JkYjEP2Ri1Q,751
|
|
176
176
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
177
177
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
178
|
-
datahub/ingestion/graph/filters.py,sha256=
|
|
178
|
+
datahub/ingestion/graph/filters.py,sha256=hZ8YOQRxC0_mbAx_SLkgqyYXr0Fw3O4U2wo2UMuDHJY,8653
|
|
179
179
|
datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
180
|
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=iEulcZMLBQuUfe9MAYyobMekvMcNm4dqVcS_C_2KfrI,9736
|
|
181
181
|
datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
|
|
@@ -205,7 +205,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=FlWfXoVoayabVXNMB9qETEU0G
|
|
|
205
205
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
206
206
|
datahub/ingestion/source/ldap.py,sha256=CNr3foofIpoCXu_GGqfcajlQE2qkHr5isYwVcDutdkk,18695
|
|
207
207
|
datahub/ingestion/source/metabase.py,sha256=j8DRV2GvisezidL1JZ5HJLF_hdFdtvaoyDoEdEyh0Ks,32603
|
|
208
|
-
datahub/ingestion/source/mlflow.py,sha256=
|
|
208
|
+
datahub/ingestion/source/mlflow.py,sha256=6uN1fjyubs9rjAsdtkSRMKf7h3_89UvFfWDqkgdvPdY,32422
|
|
209
209
|
datahub/ingestion/source/mode.py,sha256=20vWflnRIMWqK8q2Mt2PorMBLnzRAA4bMjcWEyqhTio,64506
|
|
210
210
|
datahub/ingestion/source/mongodb.py,sha256=2C2Cxn8DXL53IbNiywIuKt8UT_EMcPg9f8su-OPSNGU,21237
|
|
211
211
|
datahub/ingestion/source/nifi.py,sha256=w5TPnqPmpotvzSsJROi6nUiHWPUVC6u1g0CzXIE6FNs,56903
|
|
@@ -263,7 +263,7 @@ datahub/ingestion/source/bigquery_v2/queries.py,sha256=c1BpeQP8p8y-FOhmiQkkY2IqG
|
|
|
263
263
|
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAPDsvN2JRd2dmM1t1J1mRylfKiPen4,19530
|
|
264
264
|
datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
|
|
265
265
|
datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
266
|
-
datahub/ingestion/source/cassandra/cassandra.py,sha256=
|
|
266
|
+
datahub/ingestion/source/cassandra/cassandra.py,sha256=lKvPP0Uahi9xw_yh9cArPPtwvAauXolaEk-6f-jhpz4,14558
|
|
267
267
|
datahub/ingestion/source/cassandra/cassandra_api.py,sha256=UVGQTsk6O57Q6wrWo54bQPLtStTWhw_Fq6fgW3Bjgk8,12515
|
|
268
268
|
datahub/ingestion/source/cassandra/cassandra_config.py,sha256=vIMUOzazWTGi03B51vI0-YMxaMJHUGmCxJJgd8pKhC8,3791
|
|
269
269
|
datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=DkSIryZNwLei5PaKuu9fNEKxEbhIrPI-T9gaVoM87NQ,11063
|
|
@@ -333,7 +333,7 @@ datahub/ingestion/source/hex/hex.py,sha256=DPpsi5e-sdUgbS0Okyvx1mvc00Adu47zA65oF
|
|
|
333
333
|
datahub/ingestion/source/hex/mapper.py,sha256=6dsGvvhPAOAbAG1ayxLwipgJGt1q7YanWYfMX3rZeiM,12603
|
|
334
334
|
datahub/ingestion/source/hex/model.py,sha256=hmMfOLEGZcKjwy2DW29OPf_9_Q_TesgnUTCen2br_fA,1471
|
|
335
335
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
|
-
datahub/ingestion/source/iceberg/iceberg.py,sha256=
|
|
336
|
+
datahub/ingestion/source/iceberg/iceberg.py,sha256=PhLLXWgBdfZ3hL7LgLvDr6aTK-QKmiZCFNz5jD-mxZM,30773
|
|
337
337
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
|
|
338
338
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
|
|
339
339
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -378,7 +378,7 @@ datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
378
378
|
datahub/ingestion/source/powerbi/config.py,sha256=5rG62dspGF9jIo8l6HLpB6ECv5n-t1un2ZyGiisD784,24219
|
|
379
379
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
|
|
380
380
|
datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=5df3qvalCS9hZ46DPXs6XDcw9-IofGf8Eol_rUC7LHI,20329
|
|
381
|
-
datahub/ingestion/source/powerbi/powerbi.py,sha256=
|
|
381
|
+
datahub/ingestion/source/powerbi/powerbi.py,sha256=f0vwzVdKAU7Qp7gRHgTOPq45ThUmsbFXCwcIDaS1S34,56464
|
|
382
382
|
datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
383
383
|
datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=yDi0C13ko2dVxdLJBYvUuGbT4Q2hxQRse3sL7Ul1ZU0,2050
|
|
384
384
|
datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
|
|
@@ -488,7 +488,7 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
|
|
|
488
488
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
489
489
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
490
490
|
datahub/ingestion/source/sql/teradata.py,sha256=9WdrxDy02lRJi9IZgsAATFsmxcQnIw5Gr6yCqHJQy5k,33507
|
|
491
|
-
datahub/ingestion/source/sql/trino.py,sha256=
|
|
491
|
+
datahub/ingestion/source/sql/trino.py,sha256=gSLDyETKavSVR8l9wdebrfoc41cqAWz6ApqIicW0BF8,17892
|
|
492
492
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
|
|
493
493
|
datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
|
|
494
494
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
@@ -504,7 +504,7 @@ datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-Lj
|
|
|
504
504
|
datahub/ingestion/source/state/profiling_state_handler.py,sha256=jDMiIrAq8k4GrYoh9Ymh0ZAmzejYFk8E1W7-kuw6lXg,4295
|
|
505
505
|
datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=h28twxcsMNvI74bUjAKleRYid8kfIyWS7Y11aBldDlY,9435
|
|
506
506
|
datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
|
|
507
|
-
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256
|
|
507
|
+
datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=Lr2HYGx_b2FQ8A36s7s11tl-4-mGIM13bfy5JbQ3LtM,14890
|
|
508
508
|
datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
|
|
509
509
|
datahub/ingestion/source/state/usage_common_state.py,sha256=TJyb0CpwibsduJYI854EFdtrwWnz7JC-IkzKUXVGDx0,983
|
|
510
510
|
datahub/ingestion/source/state/use_case_handler.py,sha256=3g8ddTvGXHe0dCiyTkyFeNmR8a3bhwywtIt8EpK5oQs,1271
|
|
@@ -535,7 +535,7 @@ datahub/ingestion/source/usage/clickhouse_usage.py,sha256=jJ-EUJdS7t4d9RVjLWQQ2e
|
|
|
535
535
|
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=O3EDEZsXPNdsKGD-jStREA8e4-iTlnqd3ocqtAYFKNA,10544
|
|
536
536
|
datahub/ingestion/source/usage/usage_common.py,sha256=uuCgIduhlRL2zIAN8rymZ5cZn1WF6akZ-ZbbaVYo9_w,9813
|
|
537
537
|
datahub/ingestion/source/vertexai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
538
|
-
datahub/ingestion/source/vertexai/vertexai.py,sha256=
|
|
538
|
+
datahub/ingestion/source/vertexai/vertexai.py,sha256=ipq2Zb2lXTZkUg9r78kvuIIhX7mc-5hr-o83395IWpo,43589
|
|
539
539
|
datahub/ingestion/source/vertexai/vertexai_config.py,sha256=uMnsv3b6TsPRH26u_JE_v1u0db7ANEAFlVxU5A6ELRM,989
|
|
540
540
|
datahub/ingestion/source/vertexai/vertexai_result_type_utils.py,sha256=fE2l_xXvKONqb4jabl4LtKRBZDnP3koMLJV520wEAMg,2555
|
|
541
541
|
datahub/ingestion/source_config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -906,8 +906,8 @@ datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
|
|
|
906
906
|
datahub/sdk/entity_client.py,sha256=Sxe6H6Vr_tqLJu5KW7MJfLWJ6mgh4mbsx7u7MOBpM64,5052
|
|
907
907
|
datahub/sdk/main_client.py,sha256=h2MKRhR-BO0zGCMhF7z2bTncX4hagKrAYwR3wTNTtzA,3666
|
|
908
908
|
datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
|
|
909
|
-
datahub/sdk/search_client.py,sha256=
|
|
910
|
-
datahub/sdk/search_filters.py,sha256=
|
|
909
|
+
datahub/sdk/search_client.py,sha256=BJR5t7Ff2oDNOGLcSCp9YHzrGKbgOQr7T8XQKGEpucw,3437
|
|
910
|
+
datahub/sdk/search_filters.py,sha256=BcMhvG5hGYAATtLPLz4WLRjKApX2oLYrrcGn-CG__ek,12901
|
|
911
911
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
912
912
|
datahub/secret/datahub_secret_store.py,sha256=9u9S87-15jwhj4h0EsAVIMdQLgvstKc8voQux2slxgU,2477
|
|
913
913
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
@@ -970,7 +970,7 @@ datahub/utilities/file_backed_collections.py,sha256=zW-Xy6zO1E6R-FRJKjlimMKr-emW
|
|
|
970
970
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
971
971
|
datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
|
|
972
972
|
datahub/utilities/hive_schema_to_avro.py,sha256=1MP0a6FFVEYxLg_4lKF7hPxbHJJy0uRQYkML5zRwV3Q,11622
|
|
973
|
-
datahub/utilities/ingest_utils.py,sha256=
|
|
973
|
+
datahub/utilities/ingest_utils.py,sha256=OgETzX_9g6DcYlxGH0_xgAmAlWLSROr25ydDL-mBhKM,3137
|
|
974
974
|
datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
|
|
975
975
|
datahub/utilities/logging_manager.py,sha256=bc-x5VZGvFUHT0HD-TF3Uz_nzw3dpKdJSbz6kjpAqAQ,10073
|
|
976
976
|
datahub/utilities/lossy_collections.py,sha256=5rdtfK2pjwvOrrzLf_KGFOMiVvLLmoXj5EVQXTFSR3E,5704
|
|
@@ -996,7 +996,7 @@ datahub/utilities/sqllineage_patch.py,sha256=0Buh50bmEqJFg1HFRCknCnePo1cecI4JmGx
|
|
|
996
996
|
datahub/utilities/stats_collections.py,sha256=CxaTcrF7J6am7iX5jPhFKne535UcyDk_oreVwR013fU,1625
|
|
997
997
|
datahub/utilities/str_enum.py,sha256=EsqCLPbrqyQ2YU_wt7QP-a6P5fnpIshXJ3AI8gLBlVA,474
|
|
998
998
|
datahub/utilities/tee_io.py,sha256=jBrsUfTPTk9IICntfGOG0HR-Fjp8BQMde-FPQ4r3kuI,601
|
|
999
|
-
datahub/utilities/threaded_iterator_executor.py,sha256=
|
|
999
|
+
datahub/utilities/threaded_iterator_executor.py,sha256=6BpCE0os3d-uMYxHBilPQC-JvEBkU6JQY4bGs06JKYI,2004
|
|
1000
1000
|
datahub/utilities/threading_timeout.py,sha256=hOzDI55E3onXblHNwGsePJUWMXo5zqaWCnoYdL2-KPM,1316
|
|
1001
1001
|
datahub/utilities/time.py,sha256=Q7S_Zyom8C2zcl2xFbjNw6K8nZsCub5XGAB4OEmIS34,1847
|
|
1002
1002
|
datahub/utilities/topological_sort.py,sha256=kcK5zPSR393fgItr-KSLV3bDqfJfBRS8E5kkCpPBgUY,1358
|
|
@@ -1043,8 +1043,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1043
1043
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1044
1044
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1045
1045
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1046
|
-
acryl_datahub-1.0.0.
|
|
1047
|
-
acryl_datahub-1.0.0.
|
|
1048
|
-
acryl_datahub-1.0.0.
|
|
1049
|
-
acryl_datahub-1.0.0.
|
|
1050
|
-
acryl_datahub-1.0.0.
|
|
1046
|
+
acryl_datahub-1.0.0.2rc1.dist-info/METADATA,sha256=IE26ZK9HREmhmiMf2zQds-JatSIyAh9gcaVjGyOAGLE,176849
|
|
1047
|
+
acryl_datahub-1.0.0.2rc1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
1048
|
+
acryl_datahub-1.0.0.2rc1.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1049
|
+
acryl_datahub-1.0.0.2rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1050
|
+
acryl_datahub-1.0.0.2rc1.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/cli/ingest_cli.py
CHANGED
|
@@ -216,9 +216,9 @@ def run(
|
|
|
216
216
|
@click.option(
|
|
217
217
|
"--executor-id",
|
|
218
218
|
type=str,
|
|
219
|
-
default="default",
|
|
220
219
|
help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.",
|
|
221
220
|
required=False,
|
|
221
|
+
default=None,
|
|
222
222
|
)
|
|
223
223
|
@click.option(
|
|
224
224
|
"--cli-version",
|
|
@@ -239,7 +239,7 @@ def run(
|
|
|
239
239
|
type=str,
|
|
240
240
|
help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
|
|
241
241
|
required=False,
|
|
242
|
-
default=
|
|
242
|
+
default=None,
|
|
243
243
|
)
|
|
244
244
|
@click.option(
|
|
245
245
|
"--debug", type=bool, help="Should we debug.", required=False, default=False
|
|
@@ -255,10 +255,10 @@ def deploy(
|
|
|
255
255
|
name: Optional[str],
|
|
256
256
|
config: str,
|
|
257
257
|
urn: Optional[str],
|
|
258
|
-
executor_id: str,
|
|
258
|
+
executor_id: Optional[str],
|
|
259
259
|
cli_version: Optional[str],
|
|
260
260
|
schedule: Optional[str],
|
|
261
|
-
time_zone: str,
|
|
261
|
+
time_zone: Optional[str],
|
|
262
262
|
extra_pip: Optional[str],
|
|
263
263
|
debug: bool = False,
|
|
264
264
|
) -> None:
|
datahub/emitter/mcp_builder.py
CHANGED
datahub/errors.py
CHANGED
datahub/ingestion/api/source.py
CHANGED
|
@@ -51,6 +51,7 @@ from datahub.ingestion.api.source_helpers import (
|
|
|
51
51
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
53
|
from datahub.metadata.schema_classes import UpstreamLineageClass
|
|
54
|
+
from datahub.sdk.entity import Entity
|
|
54
55
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
55
56
|
from datahub.utilities.type_annotations import get_class_from_annotation
|
|
56
57
|
|
|
@@ -480,7 +481,7 @@ class Source(Closeable, metaclass=ABCMeta):
|
|
|
480
481
|
|
|
481
482
|
def get_workunits_internal(
|
|
482
483
|
self,
|
|
483
|
-
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper]]:
|
|
484
|
+
) -> Iterable[Union[MetadataWorkUnit, MetadataChangeProposalWrapper, Entity]]:
|
|
484
485
|
raise NotImplementedError(
|
|
485
486
|
"get_workunits_internal must be implemented if get_workunits is not overriden."
|
|
486
487
|
)
|
|
@@ -35,6 +35,7 @@ from datahub.metadata.schema_classes import (
|
|
|
35
35
|
TimeWindowSizeClass,
|
|
36
36
|
)
|
|
37
37
|
from datahub.metadata.urns import DatasetUrn, GlossaryTermUrn, TagUrn, Urn
|
|
38
|
+
from datahub.sdk.entity import Entity
|
|
38
39
|
from datahub.specific.dataset import DatasetPatchBuilder
|
|
39
40
|
from datahub.telemetry import telemetry
|
|
40
41
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
@@ -49,7 +50,12 @@ logger = logging.getLogger(__name__)
|
|
|
49
50
|
|
|
50
51
|
def auto_workunit(
|
|
51
52
|
stream: Iterable[
|
|
52
|
-
Union[
|
|
53
|
+
Union[
|
|
54
|
+
MetadataChangeEventClass,
|
|
55
|
+
MetadataChangeProposalWrapper,
|
|
56
|
+
MetadataWorkUnit,
|
|
57
|
+
Entity,
|
|
58
|
+
]
|
|
53
59
|
],
|
|
54
60
|
) -> Iterable[MetadataWorkUnit]:
|
|
55
61
|
"""Convert a stream of MCEs and MCPs to a stream of :class:`MetadataWorkUnit`s."""
|
|
@@ -62,6 +68,8 @@ def auto_workunit(
|
|
|
62
68
|
)
|
|
63
69
|
elif isinstance(item, MetadataChangeProposalWrapper):
|
|
64
70
|
yield item.as_workunit()
|
|
71
|
+
elif isinstance(item, Entity):
|
|
72
|
+
yield from item.as_workunits()
|
|
65
73
|
else:
|
|
66
74
|
yield item
|
|
67
75
|
|
|
@@ -27,6 +27,7 @@ from pydantic import BaseModel
|
|
|
27
27
|
from requests.models import HTTPError
|
|
28
28
|
from typing_extensions import deprecated
|
|
29
29
|
|
|
30
|
+
from datahub._codegen.aspect import _Aspect
|
|
30
31
|
from datahub.cli import config_utils
|
|
31
32
|
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
|
|
32
33
|
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
@@ -49,6 +50,7 @@ from datahub.ingestion.graph.connections import (
|
|
|
49
50
|
)
|
|
50
51
|
from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
|
|
51
52
|
from datahub.ingestion.graph.filters import (
|
|
53
|
+
RawSearchFilter,
|
|
52
54
|
RawSearchFilterRule,
|
|
53
55
|
RemovedStatusFilter,
|
|
54
56
|
generate_filter,
|
|
@@ -75,10 +77,11 @@ from datahub.metadata.schema_classes import (
|
|
|
75
77
|
SystemMetadataClass,
|
|
76
78
|
TelemetryClientIdClass,
|
|
77
79
|
)
|
|
80
|
+
from datahub.metadata.urns import CorpUserUrn, Urn
|
|
78
81
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
79
82
|
from datahub.utilities.perf_timer import PerfTimer
|
|
80
83
|
from datahub.utilities.str_enum import StrEnum
|
|
81
|
-
from datahub.utilities.urns.urn import
|
|
84
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
82
85
|
|
|
83
86
|
if TYPE_CHECKING:
|
|
84
87
|
from datahub.ingestion.sink.datahub_rest import (
|
|
@@ -116,7 +119,7 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
116
119
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
117
120
|
|
|
118
121
|
# Hard-coded special cases.
|
|
119
|
-
if entity_type ==
|
|
122
|
+
if entity_type == CorpUserUrn.ENTITY_TYPE:
|
|
120
123
|
return "CORP_USER"
|
|
121
124
|
|
|
122
125
|
# Convert camelCase to UPPER_UNDERSCORE.
|
|
@@ -133,6 +136,14 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
133
136
|
return entity_type
|
|
134
137
|
|
|
135
138
|
|
|
139
|
+
def flexible_entity_type_to_graphql(entity_type: str) -> str:
|
|
140
|
+
if entity_type.upper() == entity_type:
|
|
141
|
+
# Assume that we were passed a graphql EntityType enum value,
|
|
142
|
+
# so no conversion is needed.
|
|
143
|
+
return entity_type
|
|
144
|
+
return entity_type_to_graphql(entity_type)
|
|
145
|
+
|
|
146
|
+
|
|
136
147
|
class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
137
148
|
def __init__(self, config: DatahubClientConfig) -> None:
|
|
138
149
|
self.config = config
|
|
@@ -805,7 +816,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
805
816
|
|
|
806
817
|
:return: An iterable of (urn, schema info) tuple that match the filters.
|
|
807
818
|
"""
|
|
808
|
-
types = [
|
|
819
|
+
types = self._get_types(["dataset"])
|
|
809
820
|
|
|
810
821
|
# Add the query default of * if no query is specified.
|
|
811
822
|
query = query or "*"
|
|
@@ -873,10 +884,10 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
873
884
|
env: Optional[str] = None,
|
|
874
885
|
query: Optional[str] = None,
|
|
875
886
|
container: Optional[str] = None,
|
|
876
|
-
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
887
|
+
status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
877
888
|
batch_size: int = 10000,
|
|
878
889
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
879
|
-
extra_or_filters: Optional[
|
|
890
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
880
891
|
) -> Iterable[str]:
|
|
881
892
|
"""Fetch all urns that match all of the given filters.
|
|
882
893
|
|
|
@@ -968,7 +979,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
968
979
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
969
980
|
batch_size: int = 10000,
|
|
970
981
|
extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
971
|
-
extra_or_filters: Optional[
|
|
982
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
972
983
|
extra_source_fields: Optional[List[str]] = None,
|
|
973
984
|
skip_cache: bool = False,
|
|
974
985
|
) -> Iterable[dict]:
|
|
@@ -1121,7 +1132,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1121
1132
|
)
|
|
1122
1133
|
|
|
1123
1134
|
types = [
|
|
1124
|
-
|
|
1135
|
+
flexible_entity_type_to_graphql(entity_type)
|
|
1136
|
+
for entity_type in entity_types
|
|
1125
1137
|
]
|
|
1126
1138
|
return types
|
|
1127
1139
|
|
|
@@ -1686,6 +1698,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1686
1698
|
|
|
1687
1699
|
return res["runAssertionsForAsset"]
|
|
1688
1700
|
|
|
1701
|
+
@deprecated("Use get_entities instead which returns typed aspects")
|
|
1689
1702
|
def get_entities_v2(
|
|
1690
1703
|
self,
|
|
1691
1704
|
entity_name: str,
|
|
@@ -1725,6 +1738,108 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1725
1738
|
retval[entity_urn][aspect_key] = aspect_value
|
|
1726
1739
|
return retval
|
|
1727
1740
|
|
|
1741
|
+
def get_entities(
|
|
1742
|
+
self,
|
|
1743
|
+
entity_name: str,
|
|
1744
|
+
urns: List[str],
|
|
1745
|
+
aspects: Optional[List[str]] = None,
|
|
1746
|
+
with_system_metadata: bool = False,
|
|
1747
|
+
) -> Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]]:
|
|
1748
|
+
"""
|
|
1749
|
+
Get entities using the OpenAPI v3 endpoint, deserializing aspects into typed objects.
|
|
1750
|
+
|
|
1751
|
+
Args:
|
|
1752
|
+
entity_name: The entity type name
|
|
1753
|
+
urns: List of entity URNs to fetch
|
|
1754
|
+
aspects: Optional list of aspect names to fetch. If None, all aspects will be fetched.
|
|
1755
|
+
with_system_metadata: If True, return system metadata along with each aspect.
|
|
1756
|
+
|
|
1757
|
+
Returns:
|
|
1758
|
+
A dictionary mapping URNs to a dictionary of aspect name to tuples of
|
|
1759
|
+
(typed aspect object, system metadata). If with_system_metadata is False,
|
|
1760
|
+
the system metadata in the tuple will be None.
|
|
1761
|
+
"""
|
|
1762
|
+
aspects = aspects or []
|
|
1763
|
+
|
|
1764
|
+
request_payload = []
|
|
1765
|
+
for urn in urns:
|
|
1766
|
+
entity_request: Dict[str, Any] = {"urn": urn}
|
|
1767
|
+
for aspect_name in aspects:
|
|
1768
|
+
entity_request[aspect_name] = {}
|
|
1769
|
+
request_payload.append(entity_request)
|
|
1770
|
+
|
|
1771
|
+
headers: Dict[str, Any] = {
|
|
1772
|
+
"Accept": "application/json",
|
|
1773
|
+
"Content-Type": "application/json",
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
url = f"{self.config.server}/openapi/v3/entity/{entity_name}/batchGet"
|
|
1777
|
+
if with_system_metadata:
|
|
1778
|
+
url += "?systemMetadata=true"
|
|
1779
|
+
|
|
1780
|
+
response = self._session.post(
|
|
1781
|
+
url, data=json.dumps(request_payload), headers=headers
|
|
1782
|
+
)
|
|
1783
|
+
response.raise_for_status()
|
|
1784
|
+
entities = response.json()
|
|
1785
|
+
|
|
1786
|
+
result: Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]] = {}
|
|
1787
|
+
|
|
1788
|
+
for entity in entities:
|
|
1789
|
+
entity_urn = entity.get("urn")
|
|
1790
|
+
if entity_urn is None:
|
|
1791
|
+
logger.warning(
|
|
1792
|
+
f"Missing URN in entity response: {entity}, skipping deserialization"
|
|
1793
|
+
)
|
|
1794
|
+
continue
|
|
1795
|
+
|
|
1796
|
+
entity_aspects: Dict[
|
|
1797
|
+
str, Tuple[_Aspect, Optional[SystemMetadataClass]]
|
|
1798
|
+
] = {}
|
|
1799
|
+
|
|
1800
|
+
for aspect_name, aspect_obj in entity.items():
|
|
1801
|
+
if aspect_name == "urn":
|
|
1802
|
+
continue
|
|
1803
|
+
|
|
1804
|
+
aspect_class = ASPECT_NAME_MAP.get(aspect_name)
|
|
1805
|
+
if aspect_class is None:
|
|
1806
|
+
logger.warning(
|
|
1807
|
+
f"Unknown aspect type {aspect_name}, skipping deserialization"
|
|
1808
|
+
)
|
|
1809
|
+
continue
|
|
1810
|
+
|
|
1811
|
+
aspect_value = aspect_obj.get("value")
|
|
1812
|
+
if aspect_value is None:
|
|
1813
|
+
logger.warning(
|
|
1814
|
+
f"Unknown aspect value for aspect {aspect_name}, skipping deserialization"
|
|
1815
|
+
)
|
|
1816
|
+
continue
|
|
1817
|
+
|
|
1818
|
+
try:
|
|
1819
|
+
post_json_obj = post_json_transform(aspect_value)
|
|
1820
|
+
typed_aspect = aspect_class.from_obj(post_json_obj)
|
|
1821
|
+
assert isinstance(typed_aspect, aspect_class) and isinstance(
|
|
1822
|
+
typed_aspect, _Aspect
|
|
1823
|
+
)
|
|
1824
|
+
|
|
1825
|
+
system_metadata = None
|
|
1826
|
+
if with_system_metadata:
|
|
1827
|
+
system_metadata_obj = aspect_obj.get("systemMetadata")
|
|
1828
|
+
if system_metadata_obj:
|
|
1829
|
+
system_metadata = SystemMetadataClass.from_obj(
|
|
1830
|
+
system_metadata_obj
|
|
1831
|
+
)
|
|
1832
|
+
|
|
1833
|
+
entity_aspects[aspect_name] = (typed_aspect, system_metadata)
|
|
1834
|
+
except Exception as e:
|
|
1835
|
+
logger.error(f"Error deserializing aspect {aspect_name}: {e}")
|
|
1836
|
+
raise
|
|
1837
|
+
|
|
1838
|
+
if entity_aspects:
|
|
1839
|
+
result[entity_urn] = entity_aspects
|
|
1840
|
+
|
|
1841
|
+
return result
|
|
1842
|
+
|
|
1728
1843
|
def upsert_custom_assertion(
|
|
1729
1844
|
self,
|
|
1730
1845
|
urn: Optional[str],
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import enum
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
4
5
|
|
|
5
6
|
from typing_extensions import TypeAlias
|
|
6
7
|
|
|
@@ -8,9 +9,14 @@ from datahub.emitter.mce_builder import (
|
|
|
8
9
|
make_data_platform_urn,
|
|
9
10
|
make_dataplatform_instance_urn,
|
|
10
11
|
)
|
|
12
|
+
from datahub.errors import SearchFilterWarning
|
|
11
13
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
12
14
|
|
|
13
|
-
RawSearchFilterRule = Dict[str,
|
|
15
|
+
RawSearchFilterRule: TypeAlias = Dict[str, Union[str, bool, List[str]]]
|
|
16
|
+
|
|
17
|
+
# This is a list of OR filters, each of which is a list of AND filters.
|
|
18
|
+
# This can be put directly into the orFilters parameter in GraphQL.
|
|
19
|
+
RawSearchFilter: TypeAlias = List[Dict[Literal["and"], List[RawSearchFilterRule]]]
|
|
14
20
|
|
|
15
21
|
# Mirrors our GraphQL enum: https://datahubproject.io/docs/graphql/enums#filteroperator
|
|
16
22
|
FilterOperator: TypeAlias = Literal[
|
|
@@ -39,12 +45,14 @@ class SearchFilterRule:
|
|
|
39
45
|
negated: bool = False
|
|
40
46
|
|
|
41
47
|
def to_raw(self) -> RawSearchFilterRule:
|
|
42
|
-
|
|
48
|
+
rule: RawSearchFilterRule = {
|
|
43
49
|
"field": self.field,
|
|
44
50
|
"condition": self.condition,
|
|
45
51
|
"values": self.values,
|
|
46
|
-
"negated": self.negated,
|
|
47
52
|
}
|
|
53
|
+
if self.negated:
|
|
54
|
+
rule["negated"] = True
|
|
55
|
+
return rule
|
|
48
56
|
|
|
49
57
|
def negate(self) -> "SearchFilterRule":
|
|
50
58
|
return SearchFilterRule(
|
|
@@ -73,10 +81,10 @@ def generate_filter(
|
|
|
73
81
|
platform_instance: Optional[str],
|
|
74
82
|
env: Optional[str],
|
|
75
83
|
container: Optional[str],
|
|
76
|
-
status: RemovedStatusFilter,
|
|
84
|
+
status: Optional[RemovedStatusFilter],
|
|
77
85
|
extra_filters: Optional[List[RawSearchFilterRule]],
|
|
78
|
-
extra_or_filters: Optional[
|
|
79
|
-
) ->
|
|
86
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
87
|
+
) -> RawSearchFilter:
|
|
80
88
|
"""
|
|
81
89
|
Generate a search filter based on the provided parameters.
|
|
82
90
|
:param platform: The platform to filter by.
|
|
@@ -105,15 +113,16 @@ def generate_filter(
|
|
|
105
113
|
and_filters.append(_get_container_filter(container).to_raw())
|
|
106
114
|
|
|
107
115
|
# Status filter.
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
116
|
+
if status:
|
|
117
|
+
status_filter = _get_status_filter(status)
|
|
118
|
+
if status_filter:
|
|
119
|
+
and_filters.append(status_filter.to_raw())
|
|
111
120
|
|
|
112
121
|
# Extra filters.
|
|
113
122
|
if extra_filters:
|
|
114
123
|
and_filters += extra_filters
|
|
115
124
|
|
|
116
|
-
or_filters:
|
|
125
|
+
or_filters: RawSearchFilter = [{"and": and_filters}]
|
|
117
126
|
|
|
118
127
|
# Env filter
|
|
119
128
|
if env:
|
|
@@ -127,11 +136,27 @@ def generate_filter(
|
|
|
127
136
|
|
|
128
137
|
# Extra OR filters are distributed across the top level and lists.
|
|
129
138
|
if extra_or_filters:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
for extra_or_filter in extra_or_filters
|
|
133
|
-
|
|
134
|
-
|
|
139
|
+
new_or_filters: RawSearchFilter = []
|
|
140
|
+
for and_filter in or_filters:
|
|
141
|
+
for extra_or_filter in extra_or_filters:
|
|
142
|
+
if isinstance(extra_or_filter, dict) and "and" in extra_or_filter:
|
|
143
|
+
new_or_filters.append(
|
|
144
|
+
{"and": and_filter["and"] + extra_or_filter["and"]}
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
# Hack for backwards compatibility.
|
|
148
|
+
# We have some code that erroneously passed a List[RawSearchFilterRule]
|
|
149
|
+
# instead of a List[Dict["and", List[RawSearchFilterRule]]].
|
|
150
|
+
warnings.warn(
|
|
151
|
+
"Passing a List[RawSearchFilterRule] to extra_or_filters is deprecated. "
|
|
152
|
+
"Please pass a List[Dict[str, List[RawSearchFilterRule]]] instead.",
|
|
153
|
+
SearchFilterWarning,
|
|
154
|
+
stacklevel=3,
|
|
155
|
+
)
|
|
156
|
+
new_or_filters.append(
|
|
157
|
+
{"and": and_filter["and"] + [extra_or_filter]} # type: ignore
|
|
158
|
+
)
|
|
159
|
+
or_filters = new_or_filters
|
|
135
160
|
|
|
136
161
|
return or_filters
|
|
137
162
|
|
|
@@ -123,16 +123,7 @@ class CassandraSource(StatefulIngestionSourceBase):
|
|
|
123
123
|
).workunit_processor,
|
|
124
124
|
]
|
|
125
125
|
|
|
126
|
-
def get_workunits_internal(
|
|
127
|
-
self,
|
|
128
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
129
|
-
for metadata in self._get_metadata():
|
|
130
|
-
if isinstance(metadata, MetadataWorkUnit):
|
|
131
|
-
yield metadata
|
|
132
|
-
else:
|
|
133
|
-
yield from metadata.as_workunits()
|
|
134
|
-
|
|
135
|
-
def _get_metadata(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
126
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
136
127
|
if not self.cassandra_api.authenticate():
|
|
137
128
|
return
|
|
138
129
|
keyspaces: List[CassandraKeyspace] = self.cassandra_api.get_keyspaces()
|
|
@@ -425,23 +425,21 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
425
425
|
def _get_dataset_properties_aspect(
|
|
426
426
|
self, dataset_name: str, table: Table
|
|
427
427
|
) -> DatasetPropertiesClass:
|
|
428
|
-
|
|
428
|
+
created: Optional[TimeStampClass] = None
|
|
429
429
|
custom_properties = table.metadata.properties.copy()
|
|
430
430
|
custom_properties["location"] = table.metadata.location
|
|
431
431
|
custom_properties["format-version"] = str(table.metadata.format_version)
|
|
432
432
|
custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
|
|
433
|
+
last_modified: Optional[int] = table.metadata.last_updated_ms
|
|
433
434
|
if table.current_snapshot():
|
|
434
435
|
custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id)
|
|
435
436
|
custom_properties["manifest-list"] = table.current_snapshot().manifest_list
|
|
436
|
-
|
|
437
|
-
int(table.current_snapshot().timestamp_ms)
|
|
438
|
-
)
|
|
437
|
+
if not last_modified:
|
|
438
|
+
last_modified = int(table.current_snapshot().timestamp_ms)
|
|
439
439
|
if "created-at" in custom_properties:
|
|
440
440
|
try:
|
|
441
441
|
dt = dateutil_parser.isoparse(custom_properties["created-at"])
|
|
442
|
-
|
|
443
|
-
int(dt.timestamp() * 1000)
|
|
444
|
-
)
|
|
442
|
+
created = TimeStampClass(int(dt.timestamp() * 1000))
|
|
445
443
|
except Exception as ex:
|
|
446
444
|
LOGGER.warning(
|
|
447
445
|
f"Exception while trying to parse creation date {custom_properties['created-at']}, ignoring: {ex}"
|
|
@@ -451,8 +449,10 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
451
449
|
name=table.name()[-1],
|
|
452
450
|
description=table.metadata.properties.get("comment", None),
|
|
453
451
|
customProperties=custom_properties,
|
|
454
|
-
lastModified=
|
|
455
|
-
|
|
452
|
+
lastModified=TimeStampClass(last_modified)
|
|
453
|
+
if last_modified is not None
|
|
454
|
+
else None,
|
|
455
|
+
created=created,
|
|
456
456
|
qualifiedName=dataset_name,
|
|
457
457
|
)
|
|
458
458
|
|