acryl-datahub 1.1.0.5rc4__py3-none-any.whl → 1.1.0.5rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc4.dist-info → acryl_datahub-1.1.0.5rc6.dist-info}/METADATA +2495 -2495
- {acryl_datahub-1.1.0.5rc4.dist-info → acryl_datahub-1.1.0.5rc6.dist-info}/RECORD +25 -24
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +21 -4
- datahub/ingestion/api/decorators.py +14 -3
- datahub/ingestion/graph/client.py +71 -28
- datahub/ingestion/source/aws/glue.py +1 -1
- datahub/ingestion/source/bigquery_v2/queries.py +2 -2
- datahub/ingestion/source/common/subtypes.py +41 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/dbt/dbt_common.py +1 -1
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +348 -112
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sql/athena.py +110 -13
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/mssql/source.py +9 -0
- datahub/ingestion/source/sql/sql_common.py +3 -0
- datahub/ingestion/source/sql/teradata.py +4 -1
- datahub/ingestion/source/sql/vertica.py +8 -1
- datahub/ingestion/source/tableau/tableau.py +6 -1
- {acryl_datahub-1.1.0.5rc4.dist-info → acryl_datahub-1.1.0.5rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc4.dist-info → acryl_datahub-1.1.0.5rc6.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc4.dist-info → acryl_datahub-1.1.0.5rc6.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc4.dist-info → acryl_datahub-1.1.0.5rc6.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.1.0.
|
|
1
|
+
acryl_datahub-1.1.0.5rc6.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=P6EdFT0jTjQYZMQLbjiR8hZ8LYv0hNCOD3SH085iCVw,323
|
|
5
5
|
datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -67,7 +67,7 @@ datahub/api/graphql/assertion.py,sha256=o_q6SV7N1rJTVMNKSUBGJnZPk6TcVYoVShgDmPw6
|
|
|
67
67
|
datahub/api/graphql/base.py,sha256=zk724_oYSJ0nK7X7Z80MijnA6ry9JqpxnBsJeYuONKA,1737
|
|
68
68
|
datahub/api/graphql/operation.py,sha256=7E80HyE-5JLfLbFkQbgJeNwIaKngjBCrWES8eJO4OYc,5112
|
|
69
69
|
datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
datahub/cli/check_cli.py,sha256=
|
|
70
|
+
datahub/cli/check_cli.py,sha256=1R_OwSx47GPtWMP0e9Ljef7mBr-xxDG1O8ls7YG8khQ,15767
|
|
71
71
|
datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
|
|
72
72
|
datahub/cli/config_utils.py,sha256=EeBGfhmf4AxYoTfnZ4GSiGIgpzJFkduNjN_FwmxZGhA,4889
|
|
73
73
|
datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
|
|
@@ -141,7 +141,7 @@ datahub/ingestion/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
141
141
|
datahub/ingestion/api/closeable.py,sha256=k12AT--s4GDtZ-po_rVm5QKgvGIDteeRPByZPIOfecA,599
|
|
142
142
|
datahub/ingestion/api/committable.py,sha256=4S6GuBzvX2vb1A8P506NbspOKfZ1621sBG8t0lvRb8o,886
|
|
143
143
|
datahub/ingestion/api/common.py,sha256=26lXJiM4YfdnVH1xfe2bpZNp2VKCdJcJ8ynK7rhh0FY,3029
|
|
144
|
-
datahub/ingestion/api/decorators.py,sha256=
|
|
144
|
+
datahub/ingestion/api/decorators.py,sha256=okTDYtOQygyC9-Mu3jiTHuW0Wwt9Td25utxc5yMMSDY,4420
|
|
145
145
|
datahub/ingestion/api/global_context.py,sha256=OdSJg4a_RKE52nu8MSiEkK2UqRRDhDTyOleHEAzPKho,575
|
|
146
146
|
datahub/ingestion/api/incremental_lineage_helper.py,sha256=7a6FTJ_uz4EEJS1vPtbYB2KvNlcZB3py28_FKxmRiSk,5993
|
|
147
147
|
datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPsCtRa7ffDGPA1w1hgPUjeenZBU,2514
|
|
@@ -179,7 +179,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
179
179
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
180
180
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=O7wm6gQT1Jf2QSKdWjJQbS5oSzJwplXzfza26Gdq5Mg,7555
|
|
181
181
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
182
|
+
datahub/ingestion/graph/client.py,sha256=kuxNcwW6O3o9i_AHoRLabeMSrYBMOSCExiZYnzOBYV0,74221
|
|
183
183
|
datahub/ingestion/graph/config.py,sha256=rmkcqAL8fJoY9QyAeS0Xm8HvwHzV3pCjY-Om-50JJTI,1015
|
|
184
184
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
185
185
|
datahub/ingestion/graph/entity_versioning.py,sha256=nrcNz0Qm6kpE6oTu_mrYUQDx14KPspBTc6R9SyFUY6c,6901
|
|
@@ -223,7 +223,7 @@ datahub/ingestion/source/openapi_parser.py,sha256=T87e2r-oPGgQl_FDMHnSGFZzApvWDC
|
|
|
223
223
|
datahub/ingestion/source/preset.py,sha256=fncn-fgYcITsYEHVsvV6cGTQ9_xc_R06ejrw6ZbY3QA,3966
|
|
224
224
|
datahub/ingestion/source/pulsar.py,sha256=u5F8QnCLJsht5-7XCiUTsnfhCPIpKVB_l32CgMCU-As,20187
|
|
225
225
|
datahub/ingestion/source/redash.py,sha256=YxjSad-X_wPmxYH8dJmFz_VCFhiLTCTSlK99WdvcYiA,30653
|
|
226
|
-
datahub/ingestion/source/salesforce.py,sha256=
|
|
226
|
+
datahub/ingestion/source/salesforce.py,sha256=Pa_w1XszxFd8fyhpSWOfc2nOnevHwwstIvnRrQT4R9M,40584
|
|
227
227
|
datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
|
|
228
228
|
datahub/ingestion/source/sql_queries.py,sha256=Ip7UZub7fgMh7P5jL_zJPY7lSkc9GGTy8GJ8lqZrcsE,9502
|
|
229
229
|
datahub/ingestion/source/superset.py,sha256=4wEjhBj_Zf__1EhXDE9ltbaR9NTMZVdXFviHLf5VFL4,48387
|
|
@@ -237,7 +237,7 @@ datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
237
237
|
datahub/ingestion/source/apply/datahub_apply.py,sha256=xTD-Iq3UHhxcz61RwNuI2kJjRrnQEfZFSgvS1X6loV4,7703
|
|
238
238
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
239
239
|
datahub/ingestion/source/aws/aws_common.py,sha256=DN49pHY-AOVHqI3smF9cVPwvPmh5UqOF4ga-wga_gPc,17928
|
|
240
|
-
datahub/ingestion/source/aws/glue.py,sha256=
|
|
240
|
+
datahub/ingestion/source/aws/glue.py,sha256=Pa5QmQf8ZTQ9KAA6frp5LGrls3gFhRATEvhjaGTJnB8,66533
|
|
241
241
|
datahub/ingestion/source/aws/s3_boto_utils.py,sha256=Y54jlLV5gLcuZ4Zs57kIW5dYHD89RSFfsVNlFbRnSkQ,3901
|
|
242
242
|
datahub/ingestion/source/aws/s3_util.py,sha256=OFypcgmVC6jnZM90-gjcPpAMtTV1lbnreCaMhCzNlzs,2149
|
|
243
243
|
datahub/ingestion/source/aws/sagemaker.py,sha256=Bl2tkBYnrindgx61VHYgNovUF_Kp_fXNcivQn28vC2w,5254
|
|
@@ -270,7 +270,7 @@ datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5I
|
|
|
270
270
|
datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
|
|
271
271
|
datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
|
|
272
272
|
datahub/ingestion/source/bigquery_v2/profiler.py,sha256=oLf5jMjJf-ShNny9Dll2tCsOoPMF1DxAh7e7etpeLq4,10821
|
|
273
|
-
datahub/ingestion/source/bigquery_v2/queries.py,sha256=
|
|
273
|
+
datahub/ingestion/source/bigquery_v2/queries.py,sha256=gDvvgajptmNn5AiBglmDhGAC9LBh8fzw56_d8ewLbxA,20222
|
|
274
274
|
datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAPDsvN2JRd2dmM1t1J1mRylfKiPen4,19530
|
|
275
275
|
datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
|
|
276
276
|
datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -283,12 +283,12 @@ datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
283
283
|
datahub/ingestion/source/common/data_platforms.py,sha256=HhuP3YIEi2WpyKDjUU8RiM0a2qjHWQcvc8kcqub0cVo,548
|
|
284
284
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
285
285
|
datahub/ingestion/source/common/gcp_credentials_config.py,sha256=_NapGkAqZMbXNClLlmOfANS7U6rChhdthRX9s9iUv9k,2411
|
|
286
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
286
|
+
datahub/ingestion/source/common/subtypes.py,sha256=SvsaT2PM5VG7iH3c0iyvAK3oyWS_EpB3LhFOoX-e5Ws,4493
|
|
287
287
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
288
288
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
289
289
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=55mK0nsehqGDTUOol9Oi3jZs8Pb04PIsHdC2WPP0dkg,6576
|
|
290
290
|
datahub/ingestion/source/data_lake_common/object_store.py,sha256=T7onzor_15MR-7ecxqcd6YnGY0-bGXHJzseg6nfo2Og,20302
|
|
291
|
-
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=
|
|
291
|
+
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=ekJAr4-PE2RhzQnmKb2xcSs_YncC1Dz95-UrXI67Vos,23584
|
|
292
292
|
datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
293
293
|
datahub/ingestion/source/datahub/config.py,sha256=bjR1U3F85FbtgqmLDW-f_4dQvuw5AsJQxdQlOUeHDUk,5126
|
|
294
294
|
datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
|
|
@@ -299,7 +299,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
299
299
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
300
300
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
301
301
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=435blM28-RIE7ZfQReh69zBXppv8tjbku10oFcyJY8Q,17891
|
|
302
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
302
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=5iqlHbH1eaRcysxvFmDfgNrfVpygnmmeHbLq_sLg2P4,82190
|
|
303
303
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=WVI2ZYXOMxgFzJnJqsqmEGS-5xdfiVIDsCb78lvSeQ0,24930
|
|
304
304
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
|
|
305
305
|
datahub/ingestion/source/debug/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -443,8 +443,8 @@ datahub/ingestion/source/s3/__init__.py,sha256=HjqFPj11WtNFZM3kcVshlDb7kOsc19-l_
|
|
|
443
443
|
datahub/ingestion/source/s3/config.py,sha256=lElFXgEpKDT9SVoiXvtx98wV6Gp880qP4pLQaOGJGOo,7828
|
|
444
444
|
datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=FfrcgK-JEF94vw-l3q6pN6FENXb-wZzW2w1VUZVkwW8,3620
|
|
445
445
|
datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
446
|
-
datahub/ingestion/source/s3/report.py,sha256=
|
|
447
|
-
datahub/ingestion/source/s3/source.py,sha256=
|
|
446
|
+
datahub/ingestion/source/s3/report.py,sha256=9Ej1UCChw963UpGw1-7asi5vFrOM232gfgG8bRdKPp0,667
|
|
447
|
+
datahub/ingestion/source/s3/source.py,sha256=ISN4OoLEXRx54pD3seoUUQbPOV54uIrC-QJx5n-CiBI,60246
|
|
448
448
|
datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
449
449
|
datahub/ingestion/source/sac/sac.py,sha256=0s_JxHGOhit3Wvgbg7qQi-Z9j9_TgBX_I1yOR3L6-rA,30243
|
|
450
450
|
datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
|
|
@@ -487,7 +487,8 @@ datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCn
|
|
|
487
487
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=w-SftpjHSOf_6S2T2srHJyJta3MHS0usmn4Z9jgx4QE,13858
|
|
488
488
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=eqOVRYmFiWQPAIDWUcOo5QZkG05xanlEbxz4aDTvSNM,34706
|
|
489
489
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
490
|
-
datahub/ingestion/source/sql/athena.py,sha256=
|
|
490
|
+
datahub/ingestion/source/sql/athena.py,sha256=lQ_zniHtmtSFCnSmzM2dG7ND6zOZmhu6t-I4ZOeQKho,27652
|
|
491
|
+
datahub/ingestion/source/sql/athena_properties_extractor.py,sha256=n2SvqeUbNWxiWWdkDs8VYlUPlLwfZzZy9AIa-V4D7AY,28531
|
|
491
492
|
datahub/ingestion/source/sql/clickhouse.py,sha256=_uf-6fQYxI1-kMw0dNUvJf1f75XX0Qps4HMVlgzdpmo,25670
|
|
492
493
|
datahub/ingestion/source/sql/cockroachdb.py,sha256=WoOKCq7YjsuzSPm1SmKIYZ9CrvlSF8zWmP1fNHn4G3Q,1360
|
|
493
494
|
datahub/ingestion/source/sql/druid.py,sha256=_tzgTa5jhPUXk6WCmS7p10feCwJm6yUFcOgMZA-OcE8,2922
|
|
@@ -499,7 +500,7 @@ datahub/ingestion/source/sql/mysql.py,sha256=34Vu3otULxUY0-JUEYdZw2aoyuTlc9KLcoJ
|
|
|
499
500
|
datahub/ingestion/source/sql/oracle.py,sha256=ftnrk3iiEelyv9PBHPYbairuP1WgxZbi1gu6YdqY69E,29965
|
|
500
501
|
datahub/ingestion/source/sql/postgres.py,sha256=t-28dTWLa3c_NgWlGS9ZVNFlZLxd6xTSguIKP4S4NHs,11887
|
|
501
502
|
datahub/ingestion/source/sql/presto.py,sha256=tATa0M2q0PjUC_E9W_jSUsmKTP7cVJayLgrFMzG_eao,4223
|
|
502
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
503
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=wSCfcqEONichvl4D9_tQ1HQa8puYF-5BpNnES93f7-Y,51786
|
|
503
504
|
datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fxk9xei_CUIAXB0,8222
|
|
504
505
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
505
506
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=Zr39j4SI1fPTx1JdopVJyBslFnyp3lZCeb1th9eEB5c,11723
|
|
@@ -509,13 +510,13 @@ datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F
|
|
|
509
510
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
510
511
|
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=u0ZvgdJjXZdo_vl7YIQfYuuWbGwpnH6OSozI2e8ZV4I,858
|
|
511
512
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
512
|
-
datahub/ingestion/source/sql/teradata.py,sha256=
|
|
513
|
+
datahub/ingestion/source/sql/teradata.py,sha256=yRU_F3dp2rH0nMC0iiL4X2DWXLyNn7kSWZ_ewSIgYEA,33537
|
|
513
514
|
datahub/ingestion/source/sql/trino.py,sha256=zIfQ6GvW8Sbw4sxqsTcnibT51STka_nzNYvmld6HfHw,18947
|
|
514
515
|
datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=AB3Gtx4omAy_08zadHQpmUGmIGufkZ6o_ihWNnfvzYc,5783
|
|
515
|
-
datahub/ingestion/source/sql/vertica.py,sha256=
|
|
516
|
+
datahub/ingestion/source/sql/vertica.py,sha256=i1HLBW_cjAYbSCxB5MI-_7ZWD7Wmn4m6kRc_IZMD__o,33595
|
|
516
517
|
datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
|
|
517
518
|
datahub/ingestion/source/sql/mssql/job_models.py,sha256=nAo3rciu-w2-dXCz6_ekDEbGMEjCMEfh8WvSfXoF2l0,9359
|
|
518
|
-
datahub/ingestion/source/sql/mssql/source.py,sha256=
|
|
519
|
+
datahub/ingestion/source/sql/mssql/source.py,sha256=Uise_u6yXKU__9B_U3D3yObWNIVDzrz2AgEDZOlk6bQ,43101
|
|
519
520
|
datahub/ingestion/source/sql/stored_procedures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
520
521
|
datahub/ingestion/source/sql/stored_procedures/base.py,sha256=n0l5OaTuW-m3TRvkxs3TqvgMeWF6BagzW3tjyWUcC1A,8631
|
|
521
522
|
datahub/ingestion/source/sql/stored_procedures/lineage.py,sha256=fryLhuAlsjr9SHIjHJ-PmtCMx89bjzWVnJZ3f1bwQVU,1905
|
|
@@ -535,7 +536,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
|
|
|
535
536
|
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=DziD57PbHn2Tcy51tYXCG-GQgyTGMUxnkuzVS_xihFY,4079
|
|
536
537
|
datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
|
|
537
538
|
datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
538
|
-
datahub/ingestion/source/tableau/tableau.py,sha256=
|
|
539
|
+
datahub/ingestion/source/tableau/tableau.py,sha256=ryLxfWwCRXhz7-PhPl1y3OwX08KOFT5agBc9XWZRrGQ,155291
|
|
539
540
|
datahub/ingestion/source/tableau/tableau_common.py,sha256=MIkHKZg_v2IVCRk-YdPlLZl3m0LcWZm5Indwb3IV2ZQ,26931
|
|
540
541
|
datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
|
|
541
542
|
datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
|
|
@@ -1087,8 +1088,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1087
1088
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1088
1089
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1089
1090
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1090
|
-
acryl_datahub-1.1.0.
|
|
1091
|
-
acryl_datahub-1.1.0.
|
|
1092
|
-
acryl_datahub-1.1.0.
|
|
1093
|
-
acryl_datahub-1.1.0.
|
|
1094
|
-
acryl_datahub-1.1.0.
|
|
1091
|
+
acryl_datahub-1.1.0.5rc6.dist-info/METADATA,sha256=pK9PlvvTCMioPebxscqz_snMjStbpOTeZEkqOXiSW-s,182347
|
|
1092
|
+
acryl_datahub-1.1.0.5rc6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1093
|
+
acryl_datahub-1.1.0.5rc6.dist-info/entry_points.txt,sha256=bnGf6eX9UhiW8yVHtt6MJCVcmLErvrVQxTJAayA-PKc,9885
|
|
1094
|
+
acryl_datahub-1.1.0.5rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1095
|
+
acryl_datahub-1.1.0.5rc6.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
datahub/cli/check_cli.py
CHANGED
|
@@ -482,26 +482,43 @@ def server_config() -> None:
|
|
|
482
482
|
|
|
483
483
|
@check.command()
|
|
484
484
|
@click.option(
|
|
485
|
-
"--urn", required=
|
|
485
|
+
"--urn", required=False, help="The urn or urn pattern (supports % for wildcard)"
|
|
486
486
|
)
|
|
487
487
|
@click.option("--aspect", default=None, help="Filter to a specific aspect name.")
|
|
488
488
|
@click.option(
|
|
489
489
|
"--start", type=int, default=None, help="Row number of sql store to restore from."
|
|
490
490
|
)
|
|
491
491
|
@click.option("--batch-size", type=int, default=None, help="How many rows to restore.")
|
|
492
|
+
@click.option(
|
|
493
|
+
"--file",
|
|
494
|
+
required=False,
|
|
495
|
+
type=click.Path(exists=True, dir_okay=True, readable=True),
|
|
496
|
+
help="File absolute path containing URNs (one per line) to restore indices",
|
|
497
|
+
)
|
|
492
498
|
def restore_indices(
|
|
493
|
-
urn: str,
|
|
499
|
+
urn: Optional[str],
|
|
494
500
|
aspect: Optional[str],
|
|
495
501
|
start: Optional[int],
|
|
496
502
|
batch_size: Optional[int],
|
|
503
|
+
file: Optional[str],
|
|
497
504
|
) -> None:
|
|
498
505
|
"""Resync metadata changes into the search and graph indices."""
|
|
506
|
+
if urn is None and file is None:
|
|
507
|
+
raise click.UsageError("Either --urn or --file must be provided")
|
|
499
508
|
graph = get_default_graph(ClientMode.CLI)
|
|
500
509
|
|
|
501
|
-
|
|
510
|
+
graph.restore_indices(
|
|
502
511
|
urn_pattern=urn,
|
|
503
512
|
aspect=aspect,
|
|
504
513
|
start=start,
|
|
505
514
|
batch_size=batch_size,
|
|
515
|
+
file=file,
|
|
506
516
|
)
|
|
507
|
-
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@check.command()
|
|
520
|
+
def get_kafka_consumer_offsets() -> None:
|
|
521
|
+
"""Get Kafka consumer offsets from the DataHub API."""
|
|
522
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
523
|
+
result = graph.get_kafka_consumer_offsets()
|
|
524
|
+
pprint.pprint(result)
|
|
@@ -1,12 +1,16 @@
|
|
|
1
|
+
# So that SourceCapabilityModifier can be resolved at runtime
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
1
4
|
from dataclasses import dataclass
|
|
2
5
|
from enum import Enum, auto
|
|
3
|
-
from typing import Callable, Dict, Optional, Type
|
|
6
|
+
from typing import Callable, Dict, List, Optional, Type
|
|
4
7
|
|
|
5
8
|
from datahub.ingestion.api.common import PipelineContext
|
|
6
9
|
from datahub.ingestion.api.source import (
|
|
7
10
|
Source,
|
|
8
11
|
SourceCapability as SourceCapability,
|
|
9
12
|
)
|
|
13
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
def config_class(config_cls: Type) -> Callable[[Type], Type]:
|
|
@@ -88,10 +92,14 @@ class CapabilitySetting:
|
|
|
88
92
|
capability: SourceCapability
|
|
89
93
|
description: str
|
|
90
94
|
supported: bool
|
|
95
|
+
subtype_modifier: Optional[List[SourceCapabilityModifier]] = None
|
|
91
96
|
|
|
92
97
|
|
|
93
98
|
def capability(
|
|
94
|
-
capability_name: SourceCapability,
|
|
99
|
+
capability_name: SourceCapability,
|
|
100
|
+
description: str,
|
|
101
|
+
supported: bool = True,
|
|
102
|
+
subtype_modifier: Optional[List[SourceCapabilityModifier]] = None,
|
|
95
103
|
) -> Callable[[Type], Type]:
|
|
96
104
|
"""
|
|
97
105
|
A decorator to mark a source as having a certain capability
|
|
@@ -114,7 +122,10 @@ def capability(
|
|
|
114
122
|
cls.__capabilities.update(base_caps)
|
|
115
123
|
|
|
116
124
|
cls.__capabilities[capability_name] = CapabilitySetting(
|
|
117
|
-
capability=capability_name,
|
|
125
|
+
capability=capability_name,
|
|
126
|
+
description=description,
|
|
127
|
+
supported=supported,
|
|
128
|
+
subtype_modifier=subtype_modifier,
|
|
118
129
|
)
|
|
119
130
|
return cls
|
|
120
131
|
|
|
@@ -22,6 +22,7 @@ from typing import (
|
|
|
22
22
|
Union,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
+
import progressbar
|
|
25
26
|
from avro.schema import RecordSchema
|
|
26
27
|
from pydantic import BaseModel
|
|
27
28
|
from requests.models import HTTPError
|
|
@@ -504,7 +505,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
504
505
|
"limit": limit,
|
|
505
506
|
"filter": filter,
|
|
506
507
|
}
|
|
507
|
-
end_point = f"{self.
|
|
508
|
+
end_point = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues"
|
|
508
509
|
resp: Dict = self._post_generic(end_point, query_body)
|
|
509
510
|
|
|
510
511
|
values: Optional[List] = resp.get("value", {}).get("values")
|
|
@@ -524,7 +525,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
524
525
|
def get_entity_raw(
|
|
525
526
|
self, entity_urn: str, aspects: Optional[List[str]] = None
|
|
526
527
|
) -> Dict:
|
|
527
|
-
endpoint: str = f"{self.
|
|
528
|
+
endpoint: str = f"{self._gms_server}/entitiesV2/{Urn.url_encode(entity_urn)}"
|
|
528
529
|
if aspects is not None:
|
|
529
530
|
assert aspects, "if provided, aspects must be a non-empty list"
|
|
530
531
|
endpoint = f"{endpoint}?aspects=List(" + ",".join(aspects) + ")"
|
|
@@ -654,15 +655,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
654
655
|
|
|
655
656
|
@property
|
|
656
657
|
def _search_endpoint(self):
|
|
657
|
-
return f"{self.
|
|
658
|
+
return f"{self._gms_server}/entities?action=search"
|
|
658
659
|
|
|
659
660
|
@property
|
|
660
661
|
def _relationships_endpoint(self):
|
|
661
|
-
return f"{self.
|
|
662
|
+
return f"{self._gms_server}/openapi/relationships/v1/"
|
|
662
663
|
|
|
663
664
|
@property
|
|
664
665
|
def _aspect_count_endpoint(self):
|
|
665
|
-
return f"{self.
|
|
666
|
+
return f"{self._gms_server}/aspects?action=getCount"
|
|
666
667
|
|
|
667
668
|
def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]:
|
|
668
669
|
"""Retrieve a domain urn based on its name. Returns None if there is no match found"""
|
|
@@ -1209,7 +1210,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1209
1210
|
operation_name: Optional[str] = None,
|
|
1210
1211
|
format_exception: bool = True,
|
|
1211
1212
|
) -> Dict:
|
|
1212
|
-
url = f"{self.
|
|
1213
|
+
url = f"{self._gms_server}/api/graphql"
|
|
1213
1214
|
|
|
1214
1215
|
body: Dict = {
|
|
1215
1216
|
"query": query,
|
|
@@ -1434,40 +1435,82 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1434
1435
|
related_aspects = response.get("relatedAspects", [])
|
|
1435
1436
|
return reference_count, related_aspects
|
|
1436
1437
|
|
|
1438
|
+
def get_kafka_consumer_offsets(
|
|
1439
|
+
self,
|
|
1440
|
+
) -> dict:
|
|
1441
|
+
"""
|
|
1442
|
+
Get Kafka consumer offsets from the DataHub API.
|
|
1443
|
+
|
|
1444
|
+
Args:
|
|
1445
|
+
graph (DataHubGraph): The DataHub graph client
|
|
1446
|
+
|
|
1447
|
+
"""
|
|
1448
|
+
urls = {
|
|
1449
|
+
"mcp": f"{self.config.server}/openapi/operations/kafka/mcp/consumer/offsets",
|
|
1450
|
+
"mcl": f"{self.config.server}/openapi/operations/kafka/mcl/consumer/offsets",
|
|
1451
|
+
"mcl-timeseries": f"{self.config.server}/openapi/operations/kafka/mcl-timeseries/consumer/offsets",
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
params = {"skipCache": "true", "detailed": "true"}
|
|
1455
|
+
results = {}
|
|
1456
|
+
for key, url in urls.items():
|
|
1457
|
+
response = self._get_generic(url=url, params=params)
|
|
1458
|
+
results[key] = response
|
|
1459
|
+
if "errors" in response:
|
|
1460
|
+
logger.error(f"Error: {response['errors']}")
|
|
1461
|
+
return results
|
|
1462
|
+
|
|
1463
|
+
def _restore_index_call(self, payload_obj: dict) -> None:
|
|
1464
|
+
result = self._post_generic(
|
|
1465
|
+
f"{self._gms_server}/operations?action=restoreIndices", payload_obj
|
|
1466
|
+
)
|
|
1467
|
+
logger.debug(f"Restore indices result: {result}")
|
|
1468
|
+
|
|
1437
1469
|
def restore_indices(
|
|
1438
1470
|
self,
|
|
1439
|
-
urn_pattern: str,
|
|
1471
|
+
urn_pattern: Optional[str] = None,
|
|
1440
1472
|
aspect: Optional[str] = None,
|
|
1441
1473
|
start: Optional[int] = None,
|
|
1442
1474
|
batch_size: Optional[int] = None,
|
|
1443
|
-
|
|
1475
|
+
file: Optional[str] = None,
|
|
1476
|
+
) -> None:
|
|
1444
1477
|
"""Restore the indices for a given urn or urn-like pattern.
|
|
1445
1478
|
|
|
1446
1479
|
Args:
|
|
1447
|
-
urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs.
|
|
1480
|
+
urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs. If not provided, will restore indices from the file.
|
|
1448
1481
|
aspect: Optional aspect string to restore indices for a specific aspect.
|
|
1449
|
-
start: Optional integer to decide which row number of sql store to restore from. Default: 0.
|
|
1450
|
-
batch_size: Optional integer to decide how many rows to restore. Default: 10.
|
|
1482
|
+
start: Optional integer to decide which row number of sql store to restore from. Default: 0. Ignored in case file is provided.
|
|
1483
|
+
batch_size: Optional integer to decide how many rows to restore. Default: 10. Ignored in case file is provided.
|
|
1484
|
+
file: Optional file path to a file containing URNs to restore indices for.
|
|
1451
1485
|
|
|
1452
1486
|
Returns:
|
|
1453
1487
|
A string containing the result of the restore indices operation. This format is subject to change.
|
|
1454
1488
|
"""
|
|
1455
|
-
|
|
1456
|
-
|
|
1489
|
+
payload_obj = {}
|
|
1490
|
+
if file is not None:
|
|
1491
|
+
with open(file) as f:
|
|
1492
|
+
for urn in progressbar.progressbar(f.readlines()):
|
|
1493
|
+
urn = urn.strip()
|
|
1494
|
+
if "%" in urn:
|
|
1495
|
+
payload_obj["urnLike"] = urn
|
|
1496
|
+
else:
|
|
1497
|
+
payload_obj["urn"] = urn
|
|
1498
|
+
if aspect is not None:
|
|
1499
|
+
payload_obj["aspect"] = aspect
|
|
1500
|
+
self._restore_index_call(payload_obj)
|
|
1457
1501
|
else:
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
return result
|
|
1502
|
+
if urn_pattern is not None:
|
|
1503
|
+
if "%" in urn_pattern:
|
|
1504
|
+
payload_obj["urnLike"] = urn_pattern
|
|
1505
|
+
else:
|
|
1506
|
+
payload_obj["urn"] = urn_pattern
|
|
1507
|
+
if aspect is not None:
|
|
1508
|
+
payload_obj["aspect"] = aspect
|
|
1509
|
+
if start is not None:
|
|
1510
|
+
payload_obj["start"] = start
|
|
1511
|
+
if batch_size is not None:
|
|
1512
|
+
payload_obj["batchSize"] = batch_size
|
|
1513
|
+
self._restore_index_call(payload_obj)
|
|
1471
1514
|
|
|
1472
1515
|
@functools.lru_cache
|
|
1473
1516
|
def _make_schema_resolver(
|
|
@@ -1774,7 +1817,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1774
1817
|
"Accept": "application/json",
|
|
1775
1818
|
"Content-Type": "application/json",
|
|
1776
1819
|
}
|
|
1777
|
-
url = f"{self.
|
|
1820
|
+
url = f"{self._gms_server}/openapi/v2/entity/batch/{entity_name}"
|
|
1778
1821
|
response = self._session.post(url, data=json.dumps(payload), headers=headers)
|
|
1779
1822
|
response.raise_for_status()
|
|
1780
1823
|
|
|
@@ -1831,7 +1874,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1831
1874
|
"Content-Type": "application/json",
|
|
1832
1875
|
}
|
|
1833
1876
|
|
|
1834
|
-
url = f"{self.
|
|
1877
|
+
url = f"{self._gms_server}/openapi/v3/entity/{entity_name}/batchGet"
|
|
1835
1878
|
if with_system_metadata:
|
|
1836
1879
|
url += "?systemMetadata=true"
|
|
1837
1880
|
|
|
@@ -269,7 +269,7 @@ class GlueSourceReport(StaleEntityRemovalSourceReport):
|
|
|
269
269
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
270
270
|
@capability(
|
|
271
271
|
SourceCapability.DELETION_DETECTION,
|
|
272
|
-
"Enabled by default
|
|
272
|
+
"Enabled by default via stateful ingestion.",
|
|
273
273
|
)
|
|
274
274
|
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
|
|
275
275
|
@capability(
|
|
@@ -46,11 +46,11 @@ SELECT
|
|
|
46
46
|
t.is_insertable_into,
|
|
47
47
|
t.ddl,
|
|
48
48
|
ts.row_count as row_count,
|
|
49
|
-
ts.size_bytes as
|
|
49
|
+
ts.size_bytes as bytes,
|
|
50
50
|
p.num_partitions,
|
|
51
51
|
p.max_partition_id,
|
|
52
52
|
p.active_billable_bytes as active_billable_bytes,
|
|
53
|
-
|
|
53
|
+
IFNULL(p.long_term_billable_bytes, 0) as long_term_billable_bytes,
|
|
54
54
|
REGEXP_EXTRACT(t.table_name, r"(?:(?:.+\\D)[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$") as table_suffix,
|
|
55
55
|
REGEXP_REPLACE(t.table_name, r"(?:[_$]?)(\\d\\d\\d\\d(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01]))$", "") as table_base
|
|
56
56
|
|
|
@@ -1,5 +1,10 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
|
|
1
4
|
from datahub.utilities.str_enum import StrEnum
|
|
2
5
|
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
3
8
|
|
|
4
9
|
class DatasetSubTypes(StrEnum):
|
|
5
10
|
# Generic SubTypes
|
|
@@ -76,6 +81,9 @@ class JobContainerSubTypes(StrEnum):
|
|
|
76
81
|
|
|
77
82
|
|
|
78
83
|
class BIAssetSubTypes(StrEnum):
|
|
84
|
+
DASHBOARD = "Dashboard"
|
|
85
|
+
CHART = "Chart"
|
|
86
|
+
|
|
79
87
|
# Generic SubTypes
|
|
80
88
|
REPORT = "Report"
|
|
81
89
|
|
|
@@ -118,3 +126,36 @@ class MLAssetSubTypes(StrEnum):
|
|
|
118
126
|
VERTEX_PIPELINE = "Pipeline Job"
|
|
119
127
|
VERTEX_PIPELINE_TASK = "Pipeline Task"
|
|
120
128
|
VERTEX_PIPELINE_TASK_RUN = "Pipeline Task Run"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def create_source_capability_modifier_enum():
|
|
132
|
+
all_values: Dict[str, Any] = {}
|
|
133
|
+
source_enums = [
|
|
134
|
+
DatasetSubTypes,
|
|
135
|
+
DatasetContainerSubTypes,
|
|
136
|
+
BIContainerSubTypes,
|
|
137
|
+
FlowContainerSubTypes,
|
|
138
|
+
JobContainerSubTypes,
|
|
139
|
+
BIAssetSubTypes,
|
|
140
|
+
MLAssetSubTypes,
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
for enum_class in source_enums:
|
|
144
|
+
for member in enum_class: # type: ignore[var-annotated]
|
|
145
|
+
if member.name in all_values:
|
|
146
|
+
logger.error(
|
|
147
|
+
f"Warning: {member.name} already exists with value {all_values[member.name]}, skipping {member.value}"
|
|
148
|
+
)
|
|
149
|
+
continue
|
|
150
|
+
all_values[member.name] = member.value
|
|
151
|
+
|
|
152
|
+
enum_code = "class SourceCapabilityModifier(StrEnum):\n"
|
|
153
|
+
for name, value in all_values.items():
|
|
154
|
+
enum_code += f' {name} = "{value}"\n'
|
|
155
|
+
|
|
156
|
+
exec(enum_code, globals())
|
|
157
|
+
return globals()["SourceCapabilityModifier"]
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# This will have all values from the enums above
|
|
161
|
+
SourceCapabilityModifier = create_source_capability_modifier_enum()
|
|
@@ -166,7 +166,6 @@ class PathSpec(ConfigModel):
|
|
|
166
166
|
return False
|
|
167
167
|
|
|
168
168
|
def allowed(self, path: str, ignore_ext: bool = False) -> bool:
|
|
169
|
-
logger.debug(f"Checking file to inclusion: {path}")
|
|
170
169
|
if self.is_path_hidden(path) and not self.include_hidden_folders:
|
|
171
170
|
return False
|
|
172
171
|
|
|
@@ -174,19 +173,17 @@ class PathSpec(ConfigModel):
|
|
|
174
173
|
self.glob_include, flags=pathlib.GLOBSTAR
|
|
175
174
|
):
|
|
176
175
|
return False
|
|
177
|
-
|
|
176
|
+
|
|
178
177
|
if self.exclude:
|
|
179
178
|
for exclude_path in self.exclude:
|
|
180
179
|
if pathlib.PurePath(path).globmatch(
|
|
181
180
|
exclude_path, flags=pathlib.GLOBSTAR
|
|
182
181
|
):
|
|
183
182
|
return False
|
|
184
|
-
logger.debug(f"{path} is not excluded")
|
|
185
183
|
|
|
186
184
|
table_name, _ = self.extract_table_name_and_path(path)
|
|
187
185
|
if not self.tables_filter_pattern.allowed(table_name):
|
|
188
186
|
return False
|
|
189
|
-
logger.debug(f"{path} is passed table name check")
|
|
190
187
|
|
|
191
188
|
ext = os.path.splitext(path)[1].strip(".")
|
|
192
189
|
|
|
@@ -196,8 +193,6 @@ class PathSpec(ConfigModel):
|
|
|
196
193
|
):
|
|
197
194
|
return False
|
|
198
195
|
|
|
199
|
-
logger.debug(f"{path} had selected extension {ext}")
|
|
200
|
-
logger.debug(f"{path} allowed for dataset creation")
|
|
201
196
|
return True
|
|
202
197
|
|
|
203
198
|
def dir_allowed(self, path: str) -> bool:
|
|
@@ -219,10 +214,8 @@ class PathSpec(ConfigModel):
|
|
|
219
214
|
for _ in range(slash_to_remove_from_glob):
|
|
220
215
|
glob_include = glob_include.rsplit("/", 1)[0]
|
|
221
216
|
|
|
222
|
-
logger.debug(f"Checking dir to inclusion: {path}")
|
|
223
217
|
if not pathlib.PurePath(path).globmatch(glob_include, flags=pathlib.GLOBSTAR):
|
|
224
218
|
return False
|
|
225
|
-
logger.debug(f"{path} matched include ")
|
|
226
219
|
if self.exclude:
|
|
227
220
|
for exclude_path in self.exclude:
|
|
228
221
|
if pathlib.PurePath(path.rstrip("/")).globmatch(
|
|
@@ -236,7 +229,7 @@ class PathSpec(ConfigModel):
|
|
|
236
229
|
)
|
|
237
230
|
if not self.tables_filter_pattern.allowed(table_name):
|
|
238
231
|
return False
|
|
239
|
-
logger.debug(f"{path} is passed table name check")
|
|
232
|
+
# logger.debug(f"{path} is passed table name check")
|
|
240
233
|
|
|
241
234
|
return True
|
|
242
235
|
|
|
@@ -246,10 +239,10 @@ class PathSpec(ConfigModel):
|
|
|
246
239
|
if parsable_include.endswith("/{table}/**"):
|
|
247
240
|
# Remove the last two characters to make it parsable if it ends with {table}/** which marks autodetect partition
|
|
248
241
|
parsable_include = parsable_include[:-2]
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
242
|
+
|
|
243
|
+
# Replace all * with {folder[i]} to make it parsable
|
|
244
|
+
for i in range(parsable_include.count("*")):
|
|
245
|
+
parsable_include = parsable_include.replace("*", f"{{folder[{i}]}}", 1)
|
|
253
246
|
return parsable_include
|
|
254
247
|
|
|
255
248
|
def get_named_vars(self, path: str) -> Union[None, parse.Result, parse.Match]:
|
|
@@ -330,8 +323,6 @@ class PathSpec(ConfigModel):
|
|
|
330
323
|
if "{table}" in values["include"]:
|
|
331
324
|
v = "{table}"
|
|
332
325
|
else:
|
|
333
|
-
logger.debug(f"include fields: {compiled_include.named_fields}")
|
|
334
|
-
logger.debug(f"table_name fields: {parse.compile(v).named_fields}")
|
|
335
326
|
if not all(
|
|
336
327
|
x in compiled_include.named_fields
|
|
337
328
|
for x in parse.compile(v).named_fields
|
|
@@ -356,9 +347,7 @@ class PathSpec(ConfigModel):
|
|
|
356
347
|
@cached_property
|
|
357
348
|
def compiled_include(self):
|
|
358
349
|
parsable_include = PathSpec.get_parsable_include(self.include)
|
|
359
|
-
logger.debug(f"parsable_include: {parsable_include}")
|
|
360
350
|
compiled_include = parse.compile(parsable_include)
|
|
361
|
-
logger.debug(f"Setting compiled_include: {compiled_include}")
|
|
362
351
|
return compiled_include
|
|
363
352
|
|
|
364
353
|
@cached_property
|
|
@@ -366,9 +355,8 @@ class PathSpec(ConfigModel):
|
|
|
366
355
|
parsable_folder_include = PathSpec.get_parsable_include(self.include).rsplit(
|
|
367
356
|
"/", 1
|
|
368
357
|
)[0]
|
|
369
|
-
logger.debug(f"parsable_folder_include: {parsable_folder_include}")
|
|
370
358
|
compiled_folder_include = parse.compile(parsable_folder_include)
|
|
371
|
-
|
|
359
|
+
|
|
372
360
|
return compiled_folder_include
|
|
373
361
|
|
|
374
362
|
@cached_property
|
|
@@ -376,7 +364,8 @@ class PathSpec(ConfigModel):
|
|
|
376
364
|
# Regular expression to find all substrings enclosed in {}
|
|
377
365
|
pattern = r"\{(.*?)\}"
|
|
378
366
|
# Find all matches
|
|
379
|
-
|
|
367
|
+
split_parts = self.include.split("{table}/")
|
|
368
|
+
matches = re.findall(pattern, split_parts[1]) if len(split_parts) > 1 else []
|
|
380
369
|
return matches
|
|
381
370
|
|
|
382
371
|
def get_partition_from_path(self, path: str) -> Optional[List[Tuple[str, str]]]:
|
|
@@ -563,7 +552,7 @@ class PathSpec(ConfigModel):
|
|
|
563
552
|
f"{{{template_key}}}", var[key]
|
|
564
553
|
)
|
|
565
554
|
else:
|
|
566
|
-
partition_format.replace(f"{{{var_key}}}", var)
|
|
555
|
+
partition_format = partition_format.replace(f"{{{var_key}}}", var)
|
|
567
556
|
return datetime.datetime.strptime(partition_format, datetime_format).replace(
|
|
568
557
|
tzinfo=datetime.timezone.utc
|
|
569
558
|
)
|
|
@@ -355,7 +355,7 @@ class DBTCommonConfig(
|
|
|
355
355
|
# override default value to True.
|
|
356
356
|
incremental_lineage: bool = Field(
|
|
357
357
|
default=True,
|
|
358
|
-
description="When enabled, emits incremental/patch lineage for non-dbt entities. When disabled, re-states lineage on each run.",
|
|
358
|
+
description="When enabled, emits incremental/patch lineage for non-dbt entities. When disabled, re-states lineage on each run. This would also require enabling 'incremental_lineage' in the counterpart warehouse ingestion (_e.g._ BigQuery, Redshift, etc).",
|
|
359
359
|
)
|
|
360
360
|
|
|
361
361
|
_remove_use_compiled_code = pydantic_removed_field("use_compiled_code")
|