acryl-datahub 0.15.0rc14__py3-none-any.whl → 0.15.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/METADATA +2414 -2430
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/RECORD +24 -26
- datahub/__init__.py +1 -1
- datahub/cli/cli_utils.py +2 -0
- datahub/ingestion/api/incremental_properties_helper.py +69 -0
- datahub/ingestion/api/source_helpers.py +3 -1
- datahub/ingestion/sink/datahub_rest.py +3 -3
- datahub/ingestion/source/abs/source.py +4 -0
- datahub/ingestion/source/gc/datahub_gc.py +5 -5
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
- datahub/ingestion/source/mode.py +0 -23
- datahub/ingestion/source/redash.py +13 -63
- datahub/ingestion/source/redshift/config.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +2 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_v2.py +6 -0
- datahub/ingestion/source/tableau/tableau.py +107 -30
- datahub/ingestion/source/unity/source.py +2 -0
- datahub/ingestion/source/unity/usage.py +20 -11
- datahub/specific/datajob.py +4 -10
- datahub/utilities/partition_executor.py +1 -1
- datahub/utilities/sql_lineage_parser_impl.py +0 -160
- datahub/utilities/sql_parser.py +0 -94
- datahub/utilities/sql_parser_base.py +0 -21
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=rPlQTQrjyFLBRsOQhvB6o82Gor0VfK7UUrRTXzbe8kw,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -59,7 +59,7 @@ datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1
|
|
|
59
59
|
datahub/api/graphql/operation.py,sha256=h7OXbVRrpJgoth1X4cgeIFhD5JY1MGKg2KjVlQK1gqE,5116
|
|
60
60
|
datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
61
|
datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
|
|
62
|
-
datahub/cli/cli_utils.py,sha256=
|
|
62
|
+
datahub/cli/cli_utils.py,sha256=gFmcOGAT6IdrTwmpRFSwaqzGmoqS4dbWrxILB1uvlGk,13214
|
|
63
63
|
datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
|
|
64
64
|
datahub/cli/delete_cli.py,sha256=Z7iXNr4ZMmghCnldU8laK4SwTNrhQEEnnUH_TeaBKog,21838
|
|
65
65
|
datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
|
|
@@ -131,6 +131,7 @@ datahub/ingestion/api/common.py,sha256=nJVL8YdvokYFajOjmVpSNlLbZJ5iVOFS4KJDlGtJ_
|
|
|
131
131
|
datahub/ingestion/api/decorators.py,sha256=KTNdf2B20L-wlEPF8UsL89a8zwvRSOfA7gOOZnnYalY,3933
|
|
132
132
|
datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
|
|
133
133
|
datahub/ingestion/api/incremental_lineage_helper.py,sha256=Qke8T4Yba0c-ZzNfSzzXnFP0WjuqUQ1fuN4V3KK4gv4,5913
|
|
134
|
+
datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
|
|
134
135
|
datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=g5jfu3rEdwFRdWx0K5VoqYhXu1cVtwADCZHP7SdBLf0,1839
|
|
135
136
|
datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
|
|
136
137
|
datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBmWmD8,7262
|
|
@@ -138,7 +139,7 @@ datahub/ingestion/api/report.py,sha256=CpQHqLAoYGV4bxNIpYQugLY0EUoxROlp2NUM9ONHj
|
|
|
138
139
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
139
140
|
datahub/ingestion/api/sink.py,sha256=6g01wou8pv79s0leDWyK12cgl7eLtpiwSUHqOw08vx4,4503
|
|
140
141
|
datahub/ingestion/api/source.py,sha256=LktIC5KuRr_5ncLtGnU2bBSqE7Vy0yp9kBF5n9lq-4Y,18627
|
|
141
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
142
|
+
datahub/ingestion/api/source_helpers.py,sha256=ninruzG4MwJuEmkOzpqLONzVi4OOi2x3RLWoogoELY4,19708
|
|
142
143
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
143
144
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
144
145
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -179,7 +180,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
179
180
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
180
181
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
181
182
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
182
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
183
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=pU9z-vR-R7kGogqxkC7-9AZNctR9oUfAmfhhoD0-hwQ,12245
|
|
183
184
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
184
185
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
185
186
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -195,14 +196,14 @@ datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suab
|
|
|
195
196
|
datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
|
|
196
197
|
datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
|
|
197
198
|
datahub/ingestion/source/mlflow.py,sha256=SxCt4jtxQcpPWEI2rRNagCiE_6TWr2RroqmxRd_td1Y,11565
|
|
198
|
-
datahub/ingestion/source/mode.py,sha256=
|
|
199
|
+
datahub/ingestion/source/mode.py,sha256=n_5em3jADCr5gWTLDOP4O4bRS0Zt_TCZtW8uFPxn-DI,63043
|
|
199
200
|
datahub/ingestion/source/mongodb.py,sha256=vZue4Nz0xaBoCUsQr3_0OIRkWRxeE_IH_Y_QKZ1s7S0,21077
|
|
200
201
|
datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPGPWQ,56146
|
|
201
202
|
datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
|
|
202
203
|
datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
|
|
203
204
|
datahub/ingestion/source/preset.py,sha256=fByqamRLnXxsfCGdLPzWN_5LJR_s2_G2f_zwSKUc8EA,3981
|
|
204
205
|
datahub/ingestion/source/pulsar.py,sha256=H8XJC7xIX8Kdkd7006PxllAGVO_Pjza8Xx9VUBOvpPc,19827
|
|
205
|
-
datahub/ingestion/source/redash.py,sha256=
|
|
206
|
+
datahub/ingestion/source/redash.py,sha256=g-wBJ4e54EdA2A2D5XmoNBilCDyh5b32M_C_fY1bhmA,30055
|
|
206
207
|
datahub/ingestion/source/salesforce.py,sha256=S6LSM6mzl8-zKbrJPoINhM1SCpYfM244Xb74pbEI-J0,31792
|
|
207
208
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
208
209
|
datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
|
|
@@ -212,7 +213,7 @@ datahub/ingestion/source/abs/config.py,sha256=Doecl1mA6JshJTNar7oTVR7wnWl4gMu64M
|
|
|
212
213
|
datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=qh38q-Zw8TUTZD5RF0_hSoEfR6BilNGXyKPRsq1KQKE,3600
|
|
213
214
|
datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
214
215
|
datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
215
|
-
datahub/ingestion/source/abs/source.py,sha256=
|
|
216
|
+
datahub/ingestion/source/abs/source.py,sha256=pzxW-R_cWGKPneEhX8JWdTZiX2k1kAZOPKgMxp9mAEI,24533
|
|
216
217
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
217
218
|
datahub/ingestion/source/aws/aws_common.py,sha256=BqDe19yqHdwj6_sjIryGZ9_5lsAJ0PZhfPfGqLZrCcE,10649
|
|
218
219
|
datahub/ingestion/source/aws/glue.py,sha256=fX0dtaVVq174ZS0aBJvZFYK8ligfZX5EU3pdS3j1KQs,56215
|
|
@@ -300,10 +301,10 @@ datahub/ingestion/source/fivetran/fivetran.py,sha256=uKbM5czPz-6LOseoh1FwavWDIuL
|
|
|
300
301
|
datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP_CyAT5Cian2N4a-lb8x1NKHk,12776
|
|
301
302
|
datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
|
|
302
303
|
datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
303
|
-
datahub/ingestion/source/gc/datahub_gc.py,sha256=
|
|
304
|
+
datahub/ingestion/source/gc/datahub_gc.py,sha256=oWeaIGBDolz-Rf6qgGJ5VlQ9H1IY4hJFPAetoUWFyL4,12394
|
|
304
305
|
datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=ficLiRb9DEx4YFXZqWO8o-6ndVIrNW_yR-Yn2SXfDxc,15836
|
|
305
306
|
datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
|
|
306
|
-
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=
|
|
307
|
+
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=lEqZEfNMoC7FoUKsZJ91x3WHo14cH8sCaG7PZRuYCQU,7353
|
|
307
308
|
datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
308
309
|
datahub/ingestion/source/gcs/gcs_source.py,sha256=iwvj4JwjyVWRP1Vq106sUtQhh0GuOYVSu9zCa1wCZN0,6189
|
|
309
310
|
datahub/ingestion/source/gcs/gcs_utils.py,sha256=_78KM863XXgkVLmZLtYGF5PJNnZas1go-XRtOq-79lo,1047
|
|
@@ -381,13 +382,13 @@ datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=fXJAo4ctDIx08ZRK1uEwFJta6
|
|
|
381
382
|
datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
|
|
382
383
|
datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=CsWRFAOaRKJ7SDJKh6qT3sd5EaIFA_4JsEWSGG-6tHc,1856
|
|
383
384
|
datahub/ingestion/source/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
384
|
-
datahub/ingestion/source/redshift/config.py,sha256=
|
|
385
|
+
datahub/ingestion/source/redshift/config.py,sha256=nfyArJGBVIK_u4Q4ui8MYCQeMIgVlmi01VGmLN5HNn8,8853
|
|
385
386
|
datahub/ingestion/source/redshift/exception.py,sha256=dxzYUIv5B_FAWhOuzG2u5We7FX-ar4jhOXPXAlEIvgM,2055
|
|
386
387
|
datahub/ingestion/source/redshift/lineage.py,sha256=bUy0uJowrqSc33Z50fIxFlJkyhe-OPM_qgPh-smSTgM,43983
|
|
387
388
|
datahub/ingestion/source/redshift/lineage_v2.py,sha256=OcVW_27sSaZOYZPTd2j-LS9SzFQ1kXz6cMzM2ZDWhJQ,16751
|
|
388
389
|
datahub/ingestion/source/redshift/profile.py,sha256=T4H79ycq2tPobLM1tTLRtu581Qa8LlKxEok49m0AirU,4294
|
|
389
390
|
datahub/ingestion/source/redshift/query.py,sha256=bY1D9RoOHaw89LgcXal7GYlJN0RG7PxXRRC-YKIdC8E,43105
|
|
390
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
391
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=1SZmexnJIc7deGMsduZ0CB4N3Y59cHpxVY0ThHGLKcw,44428
|
|
391
392
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
392
393
|
datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
|
|
393
394
|
datahub/ingestion/source/redshift/report.py,sha256=M19aUHBkd9n-BVBX4fRhyRNdVkN2b9Es6ZqInRx5ZGI,2958
|
|
@@ -423,7 +424,7 @@ datahub/ingestion/source/snowflake/constants.py,sha256=22n-0r04nuy-ImxWFFpmbrt_G
|
|
|
423
424
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
424
425
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
425
426
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
426
|
-
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=
|
|
427
|
+
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
|
|
427
428
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
428
429
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
429
430
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUSIpb58aKOaxTDHfM5NvghutCVRicy4,23247
|
|
@@ -438,7 +439,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
|
|
|
438
439
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
|
|
439
440
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd8juVqjokfuSPuH9ppcM0ruXamxA,24807
|
|
440
441
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=Ux4sieWe79KZztquvrPkpJoOegLfTAWVv1A73UUlbGs,11365
|
|
441
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
442
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=lo_3asTuIZbF-LuEUcYL-9NIZ720n7oB9mYA6WVTWA4,31960
|
|
442
443
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
443
444
|
datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
|
|
444
445
|
datahub/ingestion/source/sql/clickhouse.py,sha256=jzvaXP5Wr0SMhj2rtuvVE821xnfpKiXhO3cm0xblgHs,27299
|
|
@@ -485,7 +486,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
485
486
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
|
|
486
487
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
487
488
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
488
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
489
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=khC6lPXMz-t7Oqbxw0GH-O47NTthJe38clIP1XXLzsg,135453
|
|
489
490
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=Dy_2pvkPucZJsG_LvQZLlxNEkjh-yOXHlZ4jurq9opM,26069
|
|
490
491
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
|
|
491
492
|
datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -498,8 +499,8 @@ datahub/ingestion/source/unity/proxy.py,sha256=2-pYQ-3B9UVUwO1yB9iTdi3DqgqZ2JrpQ
|
|
|
498
499
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
499
500
|
datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
|
|
500
501
|
datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
|
|
501
|
-
datahub/ingestion/source/unity/source.py,sha256=
|
|
502
|
-
datahub/ingestion/source/unity/usage.py,sha256=
|
|
502
|
+
datahub/ingestion/source/unity/source.py,sha256=YdUPCMJtpmvYVnRNnpqb4BVowFobkLvSJ_K2gHwrvCI,41752
|
|
503
|
+
datahub/ingestion/source/unity/usage.py,sha256=igRxYg8usukTAA229uJWi-0y-Zd0yOq9dEBi2k9f15o,11436
|
|
503
504
|
datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
504
505
|
datahub/ingestion/source/usage/clickhouse_usage.py,sha256=8nQqNAPKqivywjzsvqH0-HWFwjd4gECpw_xahLXk5ek,9970
|
|
505
506
|
datahub/ingestion/source/usage/starburst_trino_usage.py,sha256=R1DDs98tYn2WW0_tGXQhk7lqEU0ru2SgrvMBtV305ps,10542
|
|
@@ -853,7 +854,7 @@ datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,
|
|
|
853
854
|
datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,11324
|
|
854
855
|
datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
|
|
855
856
|
datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
|
|
856
|
-
datahub/specific/datajob.py,sha256=
|
|
857
|
+
datahub/specific/datajob.py,sha256=5pEBrN6llpgS7jWYEfrvqpbT2vMVVpepH71jIUJUo4U,18480
|
|
857
858
|
datahub/specific/dataproduct.py,sha256=Mt-QlndY4Die87XwakYTAcvyDzaB5fmyn1NpQGGcZyI,5235
|
|
858
859
|
datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
|
|
859
860
|
datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
|
|
@@ -908,7 +909,7 @@ datahub/utilities/memory_footprint.py,sha256=6tHu0Tvm0axMXKqSsx0FneXJlXwhZdnK969
|
|
|
908
909
|
datahub/utilities/openapi_utils.py,sha256=VNiNo1Pjvtn2MLShQ1vCjc27id7LmI-pnLdjlUatNk4,2233
|
|
909
910
|
datahub/utilities/ordered_set.py,sha256=p2DdvbD98ELTLCxgdZdOKQ50VnnMDKr3l9fmqCzu5g4,1135
|
|
910
911
|
datahub/utilities/parsing_util.py,sha256=FrMJRgkJWoyPXmjtIfPT3fc886uA2i5_PqhplRDN3NI,598
|
|
911
|
-
datahub/utilities/partition_executor.py,sha256=
|
|
912
|
+
datahub/utilities/partition_executor.py,sha256=Fz2lH3vrUagLE63ZZSiQ4-WzO1bm_2SUPlS0cA0nkTc,20988
|
|
912
913
|
datahub/utilities/perf_timer.py,sha256=HUGdu4JuI4mmRo5IlKiksvNHbEXri03TW7uGlWDBTSE,2896
|
|
913
914
|
datahub/utilities/prefix_batch_builder.py,sha256=ZAxXNh1QcrvdPUgRnjFRMFPJLznfJqsydXG7xueCS_c,2847
|
|
914
915
|
datahub/utilities/progress_timer.py,sha256=w0b3wIEGL8fQy2BKYVPiKDcO5ATUlt1kQr74aG1TlxI,1152
|
|
@@ -918,9 +919,6 @@ datahub/utilities/search_utils.py,sha256=BdZfixXrB6lcD_ec6pCotGtVKQ4ekTtmaZDFGye
|
|
|
918
919
|
datahub/utilities/serialized_lru_cache.py,sha256=EgjDXvtpCe9Hjzb4neOEuIZZVhX4NDsekoc3-vn_JGw,3217
|
|
919
920
|
datahub/utilities/server_config_util.py,sha256=ibCl4FedJWa9oc7_ZWS3vrHRNlsmKLB-eCDVCbUs25A,698
|
|
920
921
|
datahub/utilities/sql_formatter.py,sha256=tYXIsKjKmpKh0JXGxeAPrHkUWYd1SwJNLjUZsfQP2h0,1016
|
|
921
|
-
datahub/utilities/sql_lineage_parser_impl.py,sha256=jNNz-UaSbMV1nzZIoBZoJl0zoPDOWcOjbQ7UJppnPvI,6570
|
|
922
|
-
datahub/utilities/sql_parser.py,sha256=n8JD1sBH4tRrRe9jUlfi4WEvLPhD4t-09VjvkCYXDkM,3599
|
|
923
|
-
datahub/utilities/sql_parser_base.py,sha256=IqLpOcB4FGucshx4v8uTpdS29SnsTPBtMlSJdwmeY88,456
|
|
924
922
|
datahub/utilities/sqlalchemy_query_combiner.py,sha256=cPD8OUxsoplOqxI6OxfYbjDuG2i6BZLt6qwRiOs9lwI,15007
|
|
925
923
|
datahub/utilities/sqlalchemy_type_converter.py,sha256=H4S4xnnyPozDBHFhBh4rjjoXa5novFzYIUBJy2KSrVc,9805
|
|
926
924
|
datahub/utilities/sqllineage_patch.py,sha256=lxlOV8b8UblBc0F8ZMHU3dfSO_nUJv2k94CQLWlW8aA,1990
|
|
@@ -974,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
972
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
973
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
974
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
975
|
+
acryl_datahub-0.15.0rc16.dist-info/METADATA,sha256=ztX_Sh23KkAYHOFM12dj1iw70zfmILqPc65b0oSQg4w,173559
|
|
976
|
+
acryl_datahub-0.15.0rc16.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
977
|
+
acryl_datahub-0.15.0rc16.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
978
|
+
acryl_datahub-0.15.0rc16.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
979
|
+
acryl_datahub-0.15.0rc16.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
datahub/cli/cli_utils.py
CHANGED
|
@@ -327,6 +327,8 @@ def _ensure_valid_gms_url_acryl_cloud(url: str) -> str:
|
|
|
327
327
|
url = f"{url}/gms"
|
|
328
328
|
elif url.endswith("acryl.io/"):
|
|
329
329
|
url = f"{url}gms"
|
|
330
|
+
if url.endswith("acryl.io/api/gms"):
|
|
331
|
+
url = url.replace("acryl.io/api/gms", "acryl.io/gms")
|
|
330
332
|
|
|
331
333
|
return url
|
|
332
334
|
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Iterable, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic.fields import Field
|
|
5
|
+
|
|
6
|
+
from datahub.configuration.common import ConfigModel
|
|
7
|
+
from datahub.emitter.mce_builder import set_aspect
|
|
8
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
+
from datahub.ingestion.api.source_helpers import create_dataset_props_patch_builder
|
|
10
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
11
|
+
from datahub.metadata.schema_classes import (
|
|
12
|
+
DatasetPropertiesClass,
|
|
13
|
+
MetadataChangeEventClass,
|
|
14
|
+
SystemMetadataClass,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def convert_dataset_properties_to_patch(
|
|
21
|
+
urn: str,
|
|
22
|
+
aspect: DatasetPropertiesClass,
|
|
23
|
+
system_metadata: Optional[SystemMetadataClass],
|
|
24
|
+
) -> MetadataWorkUnit:
|
|
25
|
+
patch_builder = create_dataset_props_patch_builder(urn, aspect, system_metadata)
|
|
26
|
+
mcp = next(iter(patch_builder.build()))
|
|
27
|
+
return MetadataWorkUnit(id=MetadataWorkUnit.generate_workunit_id(mcp), mcp_raw=mcp)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def auto_incremental_properties(
|
|
31
|
+
incremental_properties: bool,
|
|
32
|
+
stream: Iterable[MetadataWorkUnit],
|
|
33
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
34
|
+
if not incremental_properties:
|
|
35
|
+
yield from stream
|
|
36
|
+
return # early exit
|
|
37
|
+
|
|
38
|
+
for wu in stream:
|
|
39
|
+
urn = wu.get_urn()
|
|
40
|
+
|
|
41
|
+
if isinstance(wu.metadata, MetadataChangeEventClass):
|
|
42
|
+
properties_aspect = wu.get_aspect_of_type(DatasetPropertiesClass)
|
|
43
|
+
set_aspect(wu.metadata, None, DatasetPropertiesClass)
|
|
44
|
+
if len(wu.metadata.proposedSnapshot.aspects) > 0:
|
|
45
|
+
yield wu
|
|
46
|
+
|
|
47
|
+
if properties_aspect:
|
|
48
|
+
yield convert_dataset_properties_to_patch(
|
|
49
|
+
urn, properties_aspect, wu.metadata.systemMetadata
|
|
50
|
+
)
|
|
51
|
+
elif isinstance(wu.metadata, MetadataChangeProposalWrapper) and isinstance(
|
|
52
|
+
wu.metadata.aspect, DatasetPropertiesClass
|
|
53
|
+
):
|
|
54
|
+
properties_aspect = wu.metadata.aspect
|
|
55
|
+
if properties_aspect:
|
|
56
|
+
yield convert_dataset_properties_to_patch(
|
|
57
|
+
urn, properties_aspect, wu.metadata.systemMetadata
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
yield wu
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# TODO: Use this in SQLCommonConfig. Currently only used in snowflake
|
|
64
|
+
class IncrementalPropertiesConfigMixin(ConfigModel):
|
|
65
|
+
incremental_properties: bool = Field(
|
|
66
|
+
default=False,
|
|
67
|
+
description="When enabled, emits dataset properties as incremental to existing dataset properties "
|
|
68
|
+
"in DataHub. When disabled, re-states dataset properties on each run.",
|
|
69
|
+
)
|
|
@@ -32,6 +32,7 @@ from datahub.metadata.schema_classes import (
|
|
|
32
32
|
SchemaFieldClass,
|
|
33
33
|
SchemaMetadataClass,
|
|
34
34
|
StatusClass,
|
|
35
|
+
SystemMetadataClass,
|
|
35
36
|
TimeWindowSizeClass,
|
|
36
37
|
)
|
|
37
38
|
from datahub.metadata.urns import DatasetUrn, GlossaryTermUrn, TagUrn, Urn
|
|
@@ -65,9 +66,10 @@ def auto_workunit(
|
|
|
65
66
|
def create_dataset_props_patch_builder(
|
|
66
67
|
dataset_urn: str,
|
|
67
68
|
dataset_properties: DatasetPropertiesClass,
|
|
69
|
+
system_metadata: Optional[SystemMetadataClass] = None,
|
|
68
70
|
) -> DatasetPatchBuilder:
|
|
69
71
|
"""Creates a patch builder with a table's or view's attributes and dataset properties"""
|
|
70
|
-
patch_builder = DatasetPatchBuilder(dataset_urn)
|
|
72
|
+
patch_builder = DatasetPatchBuilder(dataset_urn, system_metadata)
|
|
71
73
|
patch_builder.set_display_name(dataset_properties.name)
|
|
72
74
|
patch_builder.set_description(dataset_properties.description)
|
|
73
75
|
patch_builder.set_created(dataset_properties.created)
|
|
@@ -65,11 +65,11 @@ class DatahubRestSinkConfig(DatahubClientConfig):
|
|
|
65
65
|
mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
|
|
66
66
|
|
|
67
67
|
# These only apply in async modes.
|
|
68
|
-
max_threads:
|
|
69
|
-
max_pending_requests:
|
|
68
|
+
max_threads: pydantic.PositiveInt = _DEFAULT_REST_SINK_MAX_THREADS
|
|
69
|
+
max_pending_requests: pydantic.PositiveInt = 2000
|
|
70
70
|
|
|
71
71
|
# Only applies in async batch mode.
|
|
72
|
-
max_per_batch:
|
|
72
|
+
max_per_batch: pydantic.PositiveInt = 100
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
@dataclasses.dataclass
|
|
@@ -201,6 +201,10 @@ class ABSSource(StatefulIngestionSourceBase):
|
|
|
201
201
|
).infer_schema(file)
|
|
202
202
|
elif extension == ".json":
|
|
203
203
|
fields = json.JsonInferrer().infer_schema(file)
|
|
204
|
+
elif extension == ".jsonl":
|
|
205
|
+
fields = json.JsonInferrer(
|
|
206
|
+
max_rows=self.source_config.max_rows, format="jsonl"
|
|
207
|
+
).infer_schema(file)
|
|
204
208
|
elif extension == ".avro":
|
|
205
209
|
fields = avro.AvroInferrer().infer_schema(file)
|
|
206
210
|
else:
|
|
@@ -153,11 +153,6 @@ class DataHubGcSource(Source):
|
|
|
153
153
|
self.truncate_indices()
|
|
154
154
|
except Exception as e:
|
|
155
155
|
self.report.failure("While trying to truncate indices ", exc=e)
|
|
156
|
-
if self.dataprocess_cleanup:
|
|
157
|
-
try:
|
|
158
|
-
yield from self.dataprocess_cleanup.get_workunits_internal()
|
|
159
|
-
except Exception as e:
|
|
160
|
-
self.report.failure("While trying to cleanup data process ", exc=e)
|
|
161
156
|
if self.soft_deleted_entities_cleanup:
|
|
162
157
|
try:
|
|
163
158
|
self.soft_deleted_entities_cleanup.cleanup_soft_deleted_entities()
|
|
@@ -170,6 +165,11 @@ class DataHubGcSource(Source):
|
|
|
170
165
|
self.execution_request_cleanup.run()
|
|
171
166
|
except Exception as e:
|
|
172
167
|
self.report.failure("While trying to cleanup execution request ", exc=e)
|
|
168
|
+
if self.dataprocess_cleanup:
|
|
169
|
+
try:
|
|
170
|
+
yield from self.dataprocess_cleanup.get_workunits_internal()
|
|
171
|
+
except Exception as e:
|
|
172
|
+
self.report.failure("While trying to cleanup data process ", exc=e)
|
|
173
173
|
yield from []
|
|
174
174
|
|
|
175
175
|
def truncate_indices(self) -> None:
|
|
@@ -60,7 +60,7 @@ class SoftDeletedEntitiesCleanupConfig(ConfigModel):
|
|
|
60
60
|
description="Query to filter entities",
|
|
61
61
|
)
|
|
62
62
|
limit_entities_delete: Optional[int] = Field(
|
|
63
|
-
|
|
63
|
+
25000, description="Max number of entities to delete."
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
runtime_limit_seconds: Optional[int] = Field(
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -18,7 +18,6 @@ from pydantic import Field, validator
|
|
|
18
18
|
from requests.adapters import HTTPAdapter, Retry
|
|
19
19
|
from requests.exceptions import ConnectionError
|
|
20
20
|
from requests.models import HTTPBasicAuth, HTTPError
|
|
21
|
-
from sqllineage.runner import LineageRunner
|
|
22
21
|
from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
23
22
|
|
|
24
23
|
import datahub.emitter.mce_builder as builder
|
|
@@ -820,28 +819,6 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
820
819
|
)
|
|
821
820
|
return None
|
|
822
821
|
|
|
823
|
-
@lru_cache(maxsize=None)
|
|
824
|
-
def _get_source_from_query(self, raw_query: str) -> set:
|
|
825
|
-
query = self._replace_definitions(raw_query)
|
|
826
|
-
parser = LineageRunner(query)
|
|
827
|
-
source_paths = set()
|
|
828
|
-
try:
|
|
829
|
-
for table in parser.source_tables:
|
|
830
|
-
sources = str(table).split(".")
|
|
831
|
-
source_schema, source_table = sources[-2], sources[-1]
|
|
832
|
-
if source_schema == "<default>":
|
|
833
|
-
source_schema = str(self.config.default_schema)
|
|
834
|
-
|
|
835
|
-
source_paths.add(f"{source_schema}.{source_table}")
|
|
836
|
-
except Exception as e:
|
|
837
|
-
self.report.report_failure(
|
|
838
|
-
title="Failed to Extract Lineage From Query",
|
|
839
|
-
message="Unable to retrieve lineage from Mode query.",
|
|
840
|
-
context=f"Query: {raw_query}, Error: {str(e)}",
|
|
841
|
-
)
|
|
842
|
-
|
|
843
|
-
return source_paths
|
|
844
|
-
|
|
845
822
|
def _get_datasource_urn(
|
|
846
823
|
self,
|
|
847
824
|
platform: str,
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
import math
|
|
3
3
|
import sys
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from typing import Dict, Iterable, List, Optional, Set
|
|
5
|
+
from typing import Dict, Iterable, List, Optional, Set
|
|
6
6
|
|
|
7
7
|
import dateutil.parser as dp
|
|
8
8
|
from packaging import version
|
|
@@ -22,7 +22,6 @@ from datahub.ingestion.api.decorators import ( # SourceCapability,; capability,
|
|
|
22
22
|
platform_name,
|
|
23
23
|
support_status,
|
|
24
24
|
)
|
|
25
|
-
from datahub.ingestion.api.registry import import_path
|
|
26
25
|
from datahub.ingestion.api.source import Source, SourceCapability, SourceReport
|
|
27
26
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
28
27
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
@@ -39,9 +38,9 @@ from datahub.metadata.schema_classes import (
|
|
|
39
38
|
ChartTypeClass,
|
|
40
39
|
DashboardInfoClass,
|
|
41
40
|
)
|
|
41
|
+
from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result
|
|
42
42
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
43
43
|
from datahub.utilities.perf_timer import PerfTimer
|
|
44
|
-
from datahub.utilities.sql_parser_base import SQLParser
|
|
45
44
|
from datahub.utilities.threaded_iterator_executor import ThreadedIteratorExecutor
|
|
46
45
|
|
|
47
46
|
logger = logging.getLogger(__name__)
|
|
@@ -270,10 +269,6 @@ class RedashConfig(ConfigModel):
|
|
|
270
269
|
parse_table_names_from_sql: bool = Field(
|
|
271
270
|
default=False, description="See note below."
|
|
272
271
|
)
|
|
273
|
-
sql_parser: str = Field(
|
|
274
|
-
default="datahub.utilities.sql_parser.DefaultSQLParser",
|
|
275
|
-
description="custom SQL parser. See note below for details.",
|
|
276
|
-
)
|
|
277
272
|
|
|
278
273
|
env: str = Field(
|
|
279
274
|
default=DEFAULT_ENV,
|
|
@@ -354,7 +349,6 @@ class RedashSource(Source):
|
|
|
354
349
|
self.api_page_limit = self.config.api_page_limit or math.inf
|
|
355
350
|
|
|
356
351
|
self.parse_table_names_from_sql = self.config.parse_table_names_from_sql
|
|
357
|
-
self.sql_parser_path = self.config.sql_parser
|
|
358
352
|
|
|
359
353
|
logger.info(
|
|
360
354
|
f"Running Redash ingestion with parse_table_names_from_sql={self.parse_table_names_from_sql}"
|
|
@@ -380,31 +374,6 @@ class RedashSource(Source):
|
|
|
380
374
|
config = RedashConfig.parse_obj(config_dict)
|
|
381
375
|
return cls(ctx, config)
|
|
382
376
|
|
|
383
|
-
@classmethod
|
|
384
|
-
def _import_sql_parser_cls(cls, sql_parser_path: str) -> Type[SQLParser]:
|
|
385
|
-
assert "." in sql_parser_path, "sql_parser-path must contain a ."
|
|
386
|
-
parser_cls = import_path(sql_parser_path)
|
|
387
|
-
|
|
388
|
-
if not issubclass(parser_cls, SQLParser):
|
|
389
|
-
raise ValueError(f"must be derived from {SQLParser}; got {parser_cls}")
|
|
390
|
-
return parser_cls
|
|
391
|
-
|
|
392
|
-
@classmethod
|
|
393
|
-
def _get_sql_table_names(cls, sql: str, sql_parser_path: str) -> List[str]:
|
|
394
|
-
parser_cls = cls._import_sql_parser_cls(sql_parser_path)
|
|
395
|
-
|
|
396
|
-
try:
|
|
397
|
-
sql_table_names: List[str] = parser_cls(sql).get_tables()
|
|
398
|
-
except Exception as e:
|
|
399
|
-
logger.warning(f"Sql parser failed on {sql} with {e}")
|
|
400
|
-
return []
|
|
401
|
-
|
|
402
|
-
# Remove quotes from table names
|
|
403
|
-
sql_table_names = [t.replace('"', "") for t in sql_table_names]
|
|
404
|
-
sql_table_names = [t.replace("`", "") for t in sql_table_names]
|
|
405
|
-
|
|
406
|
-
return sql_table_names
|
|
407
|
-
|
|
408
377
|
def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
|
|
409
378
|
url = f"/api/data_sources/{data_source_id}"
|
|
410
379
|
resp = self.client._get(url).json()
|
|
@@ -441,14 +410,6 @@ class RedashSource(Source):
|
|
|
441
410
|
|
|
442
411
|
return database_name
|
|
443
412
|
|
|
444
|
-
def _construct_datalineage_urn(
|
|
445
|
-
self, platform: str, database_name: str, sql_table_name: str
|
|
446
|
-
) -> str:
|
|
447
|
-
full_dataset_name = get_full_qualified_name(
|
|
448
|
-
platform, database_name, sql_table_name
|
|
449
|
-
)
|
|
450
|
-
return builder.make_dataset_urn(platform, full_dataset_name, self.config.env)
|
|
451
|
-
|
|
452
413
|
def _get_datasource_urns(
|
|
453
414
|
self, data_source: Dict, sql_query_data: Dict = {}
|
|
454
415
|
) -> Optional[List[str]]:
|
|
@@ -464,34 +425,23 @@ class RedashSource(Source):
|
|
|
464
425
|
# Getting table lineage from SQL parsing
|
|
465
426
|
if self.parse_table_names_from_sql and data_source_syntax == "sql":
|
|
466
427
|
dataset_urns = list()
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
428
|
+
sql_parser_in_tables = create_lineage_sql_parsed_result(
|
|
429
|
+
query=query,
|
|
430
|
+
platform=platform,
|
|
431
|
+
env=self.config.env,
|
|
432
|
+
platform_instance=None,
|
|
433
|
+
default_db=database_name,
|
|
434
|
+
)
|
|
435
|
+
# make sure dataset_urns is not empty list
|
|
436
|
+
dataset_urns = sql_parser_in_tables.in_tables
|
|
437
|
+
if sql_parser_in_tables.debug_info.table_error:
|
|
472
438
|
self.report.queries_problem_parsing.add(str(query_id))
|
|
473
439
|
self.error(
|
|
474
440
|
logger,
|
|
475
441
|
"sql-parsing",
|
|
476
|
-
f"exception {
|
|
442
|
+
f"exception {sql_parser_in_tables.debug_info.table_error} in parsing query-{query_id}-datasource-{data_source_id}",
|
|
477
443
|
)
|
|
478
|
-
sql_table_names = []
|
|
479
|
-
for sql_table_name in sql_table_names:
|
|
480
|
-
try:
|
|
481
|
-
dataset_urns.append(
|
|
482
|
-
self._construct_datalineage_urn(
|
|
483
|
-
platform, database_name, sql_table_name
|
|
484
|
-
)
|
|
485
|
-
)
|
|
486
|
-
except Exception:
|
|
487
|
-
self.report.queries_problem_parsing.add(str(query_id))
|
|
488
|
-
self.warn(
|
|
489
|
-
logger,
|
|
490
|
-
"data-urn-invalid",
|
|
491
|
-
f"Problem making URN for {sql_table_name} parsed from query {query_id}",
|
|
492
|
-
)
|
|
493
444
|
|
|
494
|
-
# make sure dataset_urns is not empty list
|
|
495
445
|
return dataset_urns if len(dataset_urns) > 0 else None
|
|
496
446
|
|
|
497
447
|
else:
|
|
@@ -159,6 +159,7 @@ class RedshiftConfig(
|
|
|
159
159
|
description="Whether to extract column level lineage. This config works with rest-sink only.",
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
+
# TODO - use DatasetPropertiesConfigMixin instead
|
|
162
163
|
patch_custom_properties: bool = Field(
|
|
163
164
|
default=True,
|
|
164
165
|
description="Whether to patch custom properties on existing datasets rather than replace.",
|
|
@@ -831,6 +831,8 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
831
831
|
customProperties=custom_properties,
|
|
832
832
|
)
|
|
833
833
|
if self.config.patch_custom_properties:
|
|
834
|
+
# TODO: use auto_incremental_properties workunit processor instead
|
|
835
|
+
# Deprecate use of patch_custom_properties
|
|
834
836
|
patch_builder = create_dataset_props_patch_builder(
|
|
835
837
|
dataset_urn, dataset_properties
|
|
836
838
|
)
|
|
@@ -16,6 +16,9 @@ from datahub.configuration.source_common import (
|
|
|
16
16
|
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
17
17
|
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
18
18
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
19
|
+
from datahub.ingestion.api.incremental_properties_helper import (
|
|
20
|
+
IncrementalPropertiesConfigMixin,
|
|
21
|
+
)
|
|
19
22
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
20
23
|
ClassificationSourceConfigMixin,
|
|
21
24
|
)
|
|
@@ -188,6 +191,7 @@ class SnowflakeV2Config(
|
|
|
188
191
|
StatefulUsageConfigMixin,
|
|
189
192
|
StatefulProfilingConfigMixin,
|
|
190
193
|
ClassificationSourceConfigMixin,
|
|
194
|
+
IncrementalPropertiesConfigMixin,
|
|
191
195
|
):
|
|
192
196
|
include_usage_stats: bool = Field(
|
|
193
197
|
default=True,
|
|
@@ -17,6 +17,9 @@ from datahub.ingestion.api.decorators import (
|
|
|
17
17
|
support_status,
|
|
18
18
|
)
|
|
19
19
|
from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage
|
|
20
|
+
from datahub.ingestion.api.incremental_properties_helper import (
|
|
21
|
+
auto_incremental_properties,
|
|
22
|
+
)
|
|
20
23
|
from datahub.ingestion.api.source import (
|
|
21
24
|
CapabilityReport,
|
|
22
25
|
MetadataWorkUnitProcessor,
|
|
@@ -446,6 +449,9 @@ class SnowflakeV2Source(
|
|
|
446
449
|
functools.partial(
|
|
447
450
|
auto_incremental_lineage, self.config.incremental_lineage
|
|
448
451
|
),
|
|
452
|
+
functools.partial(
|
|
453
|
+
auto_incremental_properties, self.config.incremental_properties
|
|
454
|
+
),
|
|
449
455
|
StaleEntityRemovalHandler.create(
|
|
450
456
|
self, self.config, self.ctx
|
|
451
457
|
).workunit_processor,
|