acryl-datahub 0.15.0rc4__py3-none-any.whl → 0.15.0rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc6.dist-info}/METADATA +2504 -2474
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc6.dist-info}/RECORD +24 -22
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc6.dist-info}/entry_points.txt +1 -0
- datahub/__init__.py +1 -1
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/dbt/dbt_common.py +7 -61
- datahub/ingestion/source/dremio/dremio_api.py +11 -0
- datahub/ingestion/source/dremio/dremio_aspects.py +19 -15
- datahub/ingestion/source/dremio/dremio_config.py +5 -0
- datahub/ingestion/source/dremio/dremio_entities.py +4 -0
- datahub/ingestion/source/dremio/dremio_source.py +3 -0
- datahub/ingestion/source/iceberg/iceberg.py +12 -5
- datahub/ingestion/source/kafka/kafka.py +21 -8
- datahub/ingestion/source/neo4j/__init__.py +0 -0
- datahub/ingestion/source/neo4j/neo4j_source.py +331 -0
- datahub/ingestion/source/qlik_sense/data_classes.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -0
- datahub/ingestion/source/sql/athena.py +46 -22
- datahub/ingestion/source/sql/sql_types.py +72 -7
- datahub/ingestion/source/unity/proxy_types.py +1 -0
- datahub/utilities/urn_encoder.py +2 -1
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc6.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=n6y65QeCMsW4IG81drsaM-il8e5WBN_r5cUcIbgDNm8,574
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -257,7 +257,7 @@ datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=Y1Gtb8QcXtdQaq1
|
|
|
257
257
|
datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=alZp0Pn-03nWKlFC121FmDrXPJG6TyPM4xMJKlhRRco,5232
|
|
258
258
|
datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
259
259
|
datahub/ingestion/source/common/data_reader.py,sha256=XbSxiRTYrk6seOz0ZjVjzSpGvP8lEjmqXrNI4cdYYmQ,1819
|
|
260
|
-
datahub/ingestion/source/common/subtypes.py,sha256=
|
|
260
|
+
datahub/ingestion/source/common/subtypes.py,sha256=zxBQkRxsG_XMMz6Pmw_yMQiuFOhapOFVUOtXw8yHz7Q,2287
|
|
261
261
|
datahub/ingestion/source/data_lake_common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
262
262
|
datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmXnVCy5eFj-2-2QLEOrAdbgk,359
|
|
263
263
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
@@ -272,7 +272,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
272
272
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
273
273
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
274
274
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=3bfcCi7xBvlCTGjnDCnyOShsxgVRn7wUYJOid_WT_Vk,17643
|
|
275
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
275
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=0ddiqNx9sUAGZYDQ8tSr5Qh5ti-kgC4saW1yRRNJXgg,80493
|
|
276
276
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=m6cA9vVd4Nh2arc-T2_xeQoxvreRbMhTDIJuYsx3wHc,22722
|
|
277
277
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=Q5KISW_AOOWqyxmyOgJQquyX7xlfOqKu9WhrHoLKC0M,9881
|
|
278
278
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
@@ -281,14 +281,14 @@ datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWM
|
|
|
281
281
|
datahub/ingestion/source/delta_lake/report.py,sha256=uqWWivPltlZ7dwpOOluTvHOKKsSusqihn67clCAwxoM,467
|
|
282
282
|
datahub/ingestion/source/delta_lake/source.py,sha256=jLCN6SeAv3bCD4w4ZDw15eIbFF3yVWcxVtBklovFEBg,13548
|
|
283
283
|
datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
284
|
-
datahub/ingestion/source/dremio/dremio_api.py,sha256=
|
|
285
|
-
datahub/ingestion/source/dremio/dremio_aspects.py,sha256=
|
|
286
|
-
datahub/ingestion/source/dremio/dremio_config.py,sha256=
|
|
284
|
+
datahub/ingestion/source/dremio/dremio_api.py,sha256=R7HLqAg845SdX4zWhl2Tm8AtxaFpUIX_zxRRvap2uCQ,28998
|
|
285
|
+
datahub/ingestion/source/dremio/dremio_aspects.py,sha256=3VeHzCw9q1ytngmsq_K4Ll9tWD2V8EDFySBImHdhPAw,18287
|
|
286
|
+
datahub/ingestion/source/dremio/dremio_config.py,sha256=5SP66ewGYN0OnyWgpU33EZOmtICsclTtBX5DSYLwl3c,5782
|
|
287
287
|
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=YkYC3-TB-Jn65z2GN_NMErQDovwU7krQ9b92DBh4uvY,3021
|
|
288
|
-
datahub/ingestion/source/dremio/dremio_entities.py,sha256=
|
|
288
|
+
datahub/ingestion/source/dremio/dremio_entities.py,sha256=3H3vIvj5ab4d8gmB9-rbZfwRgW87gT1DdjWiMjNgqJ4,15069
|
|
289
289
|
datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
|
|
290
290
|
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=IPgv7lOnhK6mQeqwRsPscKnXhzgVZG8Id3yNcsmG7nw,1273
|
|
291
|
-
datahub/ingestion/source/dremio/dremio_source.py,sha256=
|
|
291
|
+
datahub/ingestion/source/dremio/dremio_source.py,sha256=DMztf08dZ3jt1AKMsWVMgj8qpp2dkB-hh5yncKDBW_k,26210
|
|
292
292
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
|
|
293
293
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
294
294
|
datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
|
|
@@ -312,14 +312,14 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
|
|
|
312
312
|
datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
313
313
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
314
314
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
315
|
-
datahub/ingestion/source/iceberg/iceberg.py,sha256=
|
|
315
|
+
datahub/ingestion/source/iceberg/iceberg.py,sha256=fjqp3VBW5W5-54X_-ubkRZiAmdHvuMbxRbC4UYzEr4U,25900
|
|
316
316
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=TS3_ZYZ47Fe02CmzEo1z0pvy7yjXuG1VlwqNxa0U6pc,8506
|
|
317
317
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrRB1qbTiTe-YVGCof5TFHMyd8,9908
|
|
318
318
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
319
319
|
datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
|
|
320
320
|
datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
|
|
321
321
|
datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
322
|
-
datahub/ingestion/source/kafka/kafka.py,sha256=
|
|
322
|
+
datahub/ingestion/source/kafka/kafka.py,sha256=QUw8VCmqIhZJvUiFJmFmekFmy4nXCLD4EKJNC6jk6Y4,26092
|
|
323
323
|
datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
|
|
324
324
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
325
325
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -347,6 +347,8 @@ datahub/ingestion/source/looker/view_upstream.py,sha256=k278-uwh8uspdREpjE_uqks4
|
|
|
347
347
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
348
348
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
|
|
349
349
|
datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
|
|
350
|
+
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
|
+
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=L9WiZ5yZrIDMrgj3gYU9j6zz3TRMXYpcWxeTegD7sFg,12409
|
|
350
352
|
datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
351
353
|
datahub/ingestion/source/powerbi/config.py,sha256=LV8BOm2zzF9t0RMwQVVUNB0bStzBPo8A6JkaW0xlgsQ,23241
|
|
352
354
|
datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89lVPoCWlzc_RfUjDJwRQ11akPtnGpBTluBMCKio,2242
|
|
@@ -374,7 +376,7 @@ datahub/ingestion/source/profiling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
|
374
376
|
datahub/ingestion/source/profiling/common.py,sha256=4sZ58AeBV64KRfKAgjkg-UyNjAc3YERahQMmW4algAw,1426
|
|
375
377
|
datahub/ingestion/source/qlik_sense/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
376
378
|
datahub/ingestion/source/qlik_sense/config.py,sha256=oyCqkGrY9tmFJY9cPD9B7DdkmR7eQ30Awt-iqgY-HUs,3892
|
|
377
|
-
datahub/ingestion/source/qlik_sense/data_classes.py,sha256=
|
|
379
|
+
datahub/ingestion/source/qlik_sense/data_classes.py,sha256=3JBELAeadKTjDyfrhx6qhHwPChXRGOL95gRAUyPhKQU,6555
|
|
378
380
|
datahub/ingestion/source/qlik_sense/qlik_api.py,sha256=fXJAo4ctDIx08ZRK1uEwFJta6nNgTqrqKAYy6h6QC8M,13185
|
|
379
381
|
datahub/ingestion/source/qlik_sense/qlik_sense.py,sha256=bmhmOgSXzC6g-uqO1ljFLRNz2oo6Xjn400UQnWdMA1Y,22530
|
|
380
382
|
datahub/ingestion/source/qlik_sense/websocket_connection.py,sha256=CsWRFAOaRKJ7SDJKh6qT3sd5EaIFA_4JsEWSGG-6tHc,1856
|
|
@@ -385,7 +387,7 @@ datahub/ingestion/source/redshift/lineage.py,sha256=bUy0uJowrqSc33Z50fIxFlJkyhe-
|
|
|
385
387
|
datahub/ingestion/source/redshift/lineage_v2.py,sha256=OcVW_27sSaZOYZPTd2j-LS9SzFQ1kXz6cMzM2ZDWhJQ,16751
|
|
386
388
|
datahub/ingestion/source/redshift/profile.py,sha256=T4H79ycq2tPobLM1tTLRtu581Qa8LlKxEok49m0AirU,4294
|
|
387
389
|
datahub/ingestion/source/redshift/query.py,sha256=bY1D9RoOHaw89LgcXal7GYlJN0RG7PxXRRC-YKIdC8E,43105
|
|
388
|
-
datahub/ingestion/source/redshift/redshift.py,sha256=
|
|
390
|
+
datahub/ingestion/source/redshift/redshift.py,sha256=doGZowVJBThrEoyt25NdowfgytYuNlN4Ca1776onSPQ,44294
|
|
389
391
|
datahub/ingestion/source/redshift/redshift_data_reader.py,sha256=zc69jwXHdF-w8J4Hq-ZQ6BjHQ75Ij2iNDMpoRJlcmlU,1724
|
|
390
392
|
datahub/ingestion/source/redshift/redshift_schema.py,sha256=9IYeUsnISenq3eVB3k-s7zK8nInWDAYViFnDrNjtkb0,19149
|
|
391
393
|
datahub/ingestion/source/redshift/report.py,sha256=M19aUHBkd9n-BVBX4fRhyRNdVkN2b9Es6ZqInRx5ZGI,2958
|
|
@@ -430,7 +432,7 @@ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kH
|
|
|
430
432
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=oNmtg-ZVcZ3-w1X5t-JGv2qTH64Z0qzEnaZaRxbRquo,38035
|
|
431
433
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=KjNvYufQMVkFP7F5sEFumKorkiFAmFVCQ1jYqXr0ev0,6419
|
|
432
434
|
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fatrKpBUY9CnzXhLJcFlHkHGt0QWFhkYH9ZXwWoQCLA,20392
|
|
433
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
435
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=JjzhhyEN9QBUv-64sHhkq-4Vq1XhDtz9npLMiqlSICo,38893
|
|
434
436
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-gPdzwtON9dJa0eqHt-8Yr5h2Q,6366
|
|
435
437
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
436
438
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
|
|
@@ -438,7 +440,7 @@ datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd
|
|
|
438
440
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=Ux4sieWe79KZztquvrPkpJoOegLfTAWVv1A73UUlbGs,11365
|
|
439
441
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=gO7egHNYnbpQ-xQb1SWgr4K0GQSL2VTVSTMdfwfgl-A,31733
|
|
440
442
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
441
|
-
datahub/ingestion/source/sql/athena.py,sha256=
|
|
443
|
+
datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
|
|
442
444
|
datahub/ingestion/source/sql/clickhouse.py,sha256=jzvaXP5Wr0SMhj2rtuvVE821xnfpKiXhO3cm0xblgHs,27299
|
|
443
445
|
datahub/ingestion/source/sql/cockroachdb.py,sha256=XaD7eae34plU9ISRC6PzYX9q6RdT2qkzjH6CpTOgkx4,1443
|
|
444
446
|
datahub/ingestion/source/sql/druid.py,sha256=lhO9CCOlHV-6LjBuAxAxtB9I1pvPtsGSdr63bz6_ilA,2837
|
|
@@ -455,7 +457,7 @@ datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxv
|
|
|
455
457
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
456
458
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=6QbhkQH_F13GV1HsavVTq3BE9F7Pr_vfGOjCX2o2c60,11675
|
|
457
459
|
datahub/ingestion/source/sql/sql_report.py,sha256=19YVvatcCZsBP533HWn0X9Y30jo4TUxSkQ9rYpMQpT4,2487
|
|
458
|
-
datahub/ingestion/source/sql/sql_types.py,sha256=
|
|
460
|
+
datahub/ingestion/source/sql/sql_types.py,sha256=2GqYrW2sJyX_QU5goIUCyafxF2S07JEVydAgMFygNg4,14638
|
|
459
461
|
datahub/ingestion/source/sql/sql_utils.py,sha256=w9YFNm_qJNjOcWAWBI_lUoFMbd0wT8q0LoT7Ia71tIE,8100
|
|
460
462
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
461
463
|
datahub/ingestion/source/sql/sqlalchemy_uri_mapper.py,sha256=KOpbmDIE2h1hyYEsbVHJi2B7FlsyUMTXZx4diyzltQg,1826
|
|
@@ -494,7 +496,7 @@ datahub/ingestion/source/unity/ge_profiler.py,sha256=DFQKOqryMWFg-NqwfFGPklNH2hH
|
|
|
494
496
|
datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
|
|
495
497
|
datahub/ingestion/source/unity/proxy.py,sha256=2-pYQ-3B9UVUwO1yB9iTdi3DqgqZ2JrpQknLodI7UjM,18976
|
|
496
498
|
datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
|
|
497
|
-
datahub/ingestion/source/unity/proxy_types.py,sha256=
|
|
499
|
+
datahub/ingestion/source/unity/proxy_types.py,sha256=qrvHiwPzl5cPX-KRvcIGGeJVdr0I8XUQmoAI6ErZ-v8,9371
|
|
498
500
|
datahub/ingestion/source/unity/report.py,sha256=0Y-ciHVTI6ZKNCJ5zWoQh3Ze1c_GMqmTMKFwzXDuuOg,2788
|
|
499
501
|
datahub/ingestion/source/unity/source.py,sha256=i2WU0H6Gvce51I3qWVOoEp6lZ1FAxEm_9u2qS6zmsL8,41482
|
|
500
502
|
datahub/ingestion/source/unity/usage.py,sha256=r91-ishhv9QTNLevVhQ9HPZ47CRvVeeAMBtWuRsONxk,11089
|
|
@@ -932,7 +934,7 @@ datahub/utilities/topological_sort.py,sha256=kcK5zPSR393fgItr-KSLV3bDqfJfBRS8E5k
|
|
|
932
934
|
datahub/utilities/type_annotations.py,sha256=FvcB__a6X0CLoz-sBXwqpdceqSqTHgkLXGQ6wSmiV8w,970
|
|
933
935
|
datahub/utilities/unified_diff.py,sha256=8uRvM_kN-sdAzR4Ym6CgmpjrmO4CrcKtzZ4P-Cn6aEA,8422
|
|
934
936
|
datahub/utilities/url_util.py,sha256=CJ_mddw76p0RM7FqVjX-g8QgQ3Dq0IjkcP75sUaamJE,172
|
|
935
|
-
datahub/utilities/urn_encoder.py,sha256=
|
|
937
|
+
datahub/utilities/urn_encoder.py,sha256=YPSP23PYM1mrFJBqotjz3qA79xWxjD6gh8SJeqSQrHQ,1495
|
|
936
938
|
datahub/utilities/yaml_sync_utils.py,sha256=65IEe8quW3_zHCR8CyoDkZyopeZJazU-IyMrK9_0nj0,1054
|
|
937
939
|
datahub/utilities/registries/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
938
940
|
datahub/utilities/registries/domain_registry.py,sha256=0SfcZNop-PXBbl-AWw92vAyb28i0YXTr-TKdBwixmOw,2452
|
|
@@ -972,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
972
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
973
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
974
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
975
|
-
acryl_datahub-0.15.
|
|
976
|
-
acryl_datahub-0.15.
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc6.dist-info/METADATA,sha256=cr6WxunP6sUsI91SLZaMuuQDrseSlU3gn-UPTsadHS4,172484
|
|
978
|
+
acryl_datahub-0.15.0rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc6.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc6.dist-info/RECORD,,
|
|
@@ -68,6 +68,7 @@ mode = datahub.ingestion.source.mode:ModeSource
|
|
|
68
68
|
mongodb = datahub.ingestion.source.mongodb:MongoDBSource
|
|
69
69
|
mssql = datahub.ingestion.source.sql.mssql:SQLServerSource
|
|
70
70
|
mysql = datahub.ingestion.source.sql.mysql:MySQLSource
|
|
71
|
+
neo4j = datahub.ingestion.source.neo4j.neo4j_source:Neo4jSource
|
|
71
72
|
nifi = datahub.ingestion.source.nifi:NifiSource
|
|
72
73
|
okta = datahub.ingestion.source.identity.okta:OktaSource
|
|
73
74
|
openapi = datahub.ingestion.source.openapi:OpenApiSource
|
datahub/__init__.py
CHANGED
|
@@ -22,6 +22,8 @@ class DatasetSubTypes(StrEnum):
|
|
|
22
22
|
SAC_MODEL = "Model"
|
|
23
23
|
SAC_IMPORT_DATA_MODEL = "Import Data Model"
|
|
24
24
|
SAC_LIVE_DATA_MODEL = "Live Data Model"
|
|
25
|
+
NEO4J_NODE = "Neo4j Node"
|
|
26
|
+
NEO4J_RELATIONSHIP = "Neo4j Relationship"
|
|
25
27
|
|
|
26
28
|
# TODO: Create separate entity...
|
|
27
29
|
NOTEBOOK = "Notebook"
|
|
@@ -53,19 +53,7 @@ from datahub.ingestion.source.dbt.dbt_tests import (
|
|
|
53
53
|
make_assertion_from_test,
|
|
54
54
|
make_assertion_result_from_test,
|
|
55
55
|
)
|
|
56
|
-
from datahub.ingestion.source.sql.sql_types import
|
|
57
|
-
ATHENA_SQL_TYPES_MAP,
|
|
58
|
-
BIGQUERY_TYPES_MAP,
|
|
59
|
-
POSTGRES_TYPES_MAP,
|
|
60
|
-
SNOWFLAKE_TYPES_MAP,
|
|
61
|
-
SPARK_SQL_TYPES_MAP,
|
|
62
|
-
TRINO_SQL_TYPES_MAP,
|
|
63
|
-
VERTICA_SQL_TYPES_MAP,
|
|
64
|
-
resolve_athena_modified_type,
|
|
65
|
-
resolve_postgres_modified_type,
|
|
66
|
-
resolve_trino_modified_type,
|
|
67
|
-
resolve_vertica_modified_type,
|
|
68
|
-
)
|
|
56
|
+
from datahub.ingestion.source.sql.sql_types import resolve_sql_type
|
|
69
57
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
70
58
|
StaleEntityRemovalHandler,
|
|
71
59
|
StaleEntityRemovalSourceReport,
|
|
@@ -89,17 +77,11 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
89
77
|
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
90
78
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
91
79
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
92
|
-
BooleanTypeClass,
|
|
93
|
-
DateTypeClass,
|
|
94
80
|
MySqlDDL,
|
|
95
81
|
NullTypeClass,
|
|
96
|
-
NumberTypeClass,
|
|
97
|
-
RecordType,
|
|
98
82
|
SchemaField,
|
|
99
83
|
SchemaFieldDataType,
|
|
100
84
|
SchemaMetadata,
|
|
101
|
-
StringTypeClass,
|
|
102
|
-
TimeTypeClass,
|
|
103
85
|
)
|
|
104
86
|
from datahub.metadata.schema_classes import (
|
|
105
87
|
DataPlatformInstanceClass,
|
|
@@ -804,28 +786,6 @@ def make_mapping_upstream_lineage(
|
|
|
804
786
|
)
|
|
805
787
|
|
|
806
788
|
|
|
807
|
-
# See https://github.com/fishtown-analytics/dbt/blob/master/core/dbt/adapters/sql/impl.py
|
|
808
|
-
_field_type_mapping = {
|
|
809
|
-
"boolean": BooleanTypeClass,
|
|
810
|
-
"date": DateTypeClass,
|
|
811
|
-
"time": TimeTypeClass,
|
|
812
|
-
"numeric": NumberTypeClass,
|
|
813
|
-
"text": StringTypeClass,
|
|
814
|
-
"timestamp with time zone": DateTypeClass,
|
|
815
|
-
"timestamp without time zone": DateTypeClass,
|
|
816
|
-
"integer": NumberTypeClass,
|
|
817
|
-
"float8": NumberTypeClass,
|
|
818
|
-
"struct": RecordType,
|
|
819
|
-
**POSTGRES_TYPES_MAP,
|
|
820
|
-
**SNOWFLAKE_TYPES_MAP,
|
|
821
|
-
**BIGQUERY_TYPES_MAP,
|
|
822
|
-
**SPARK_SQL_TYPES_MAP,
|
|
823
|
-
**TRINO_SQL_TYPES_MAP,
|
|
824
|
-
**ATHENA_SQL_TYPES_MAP,
|
|
825
|
-
**VERTICA_SQL_TYPES_MAP,
|
|
826
|
-
}
|
|
827
|
-
|
|
828
|
-
|
|
829
789
|
def get_column_type(
|
|
830
790
|
report: DBTSourceReport,
|
|
831
791
|
dataset_name: str,
|
|
@@ -835,24 +795,10 @@ def get_column_type(
|
|
|
835
795
|
"""
|
|
836
796
|
Maps known DBT types to datahub types
|
|
837
797
|
"""
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
if dbt_adapter == "trino":
|
|
843
|
-
TypeClass = resolve_trino_modified_type(column_type)
|
|
844
|
-
elif dbt_adapter == "athena":
|
|
845
|
-
TypeClass = resolve_athena_modified_type(column_type)
|
|
846
|
-
elif dbt_adapter == "postgres" or dbt_adapter == "redshift":
|
|
847
|
-
# Redshift uses a variant of Postgres, so we can use the same logic.
|
|
848
|
-
TypeClass = resolve_postgres_modified_type(column_type)
|
|
849
|
-
elif dbt_adapter == "vertica":
|
|
850
|
-
TypeClass = resolve_vertica_modified_type(column_type)
|
|
851
|
-
elif dbt_adapter == "snowflake":
|
|
852
|
-
# Snowflake types are uppercase, so we check that.
|
|
853
|
-
TypeClass = _field_type_mapping.get(column_type.upper())
|
|
854
|
-
|
|
855
|
-
# if still not found, report the warning
|
|
798
|
+
|
|
799
|
+
TypeClass = resolve_sql_type(column_type, dbt_adapter)
|
|
800
|
+
|
|
801
|
+
# if still not found, report a warning
|
|
856
802
|
if TypeClass is None:
|
|
857
803
|
if column_type:
|
|
858
804
|
report.info(
|
|
@@ -861,9 +807,9 @@ def get_column_type(
|
|
|
861
807
|
context=f"{dataset_name} - {column_type}",
|
|
862
808
|
log=False,
|
|
863
809
|
)
|
|
864
|
-
TypeClass = NullTypeClass
|
|
810
|
+
TypeClass = NullTypeClass()
|
|
865
811
|
|
|
866
|
-
return SchemaFieldDataType(type=TypeClass
|
|
812
|
+
return SchemaFieldDataType(type=TypeClass)
|
|
867
813
|
|
|
868
814
|
|
|
869
815
|
@platform_name("dbt")
|
|
@@ -774,3 +774,14 @@ class DremioAPIOperations:
|
|
|
774
774
|
containers.extend(future.result())
|
|
775
775
|
|
|
776
776
|
return containers
|
|
777
|
+
|
|
778
|
+
def get_context_for_vds(self, resource_id: str) -> str:
|
|
779
|
+
context_array = self.get(
|
|
780
|
+
url=f"/catalog/{resource_id}",
|
|
781
|
+
).get("sqlContext")
|
|
782
|
+
if context_array:
|
|
783
|
+
return ".".join(
|
|
784
|
+
f'"{part}"' if "." in part else f"{part}" for part in context_array
|
|
785
|
+
)
|
|
786
|
+
else:
|
|
787
|
+
return ""
|
|
@@ -142,6 +142,7 @@ class DremioAspects:
|
|
|
142
142
|
platform: str,
|
|
143
143
|
ui_url: str,
|
|
144
144
|
env: str,
|
|
145
|
+
ingest_owner: bool,
|
|
145
146
|
domain: Optional[str] = None,
|
|
146
147
|
platform_instance: Optional[str] = None,
|
|
147
148
|
):
|
|
@@ -150,6 +151,7 @@ class DremioAspects:
|
|
|
150
151
|
self.env = env
|
|
151
152
|
self.domain = domain
|
|
152
153
|
self.ui_url = ui_url
|
|
154
|
+
self.ingest_owner = ingest_owner
|
|
153
155
|
|
|
154
156
|
def get_container_key(
|
|
155
157
|
self, name: Optional[str], path: Optional[List[str]]
|
|
@@ -426,21 +428,23 @@ class DremioAspects:
|
|
|
426
428
|
return f'{self.ui_url}/{container_type}/{dataset_url_path}"{dataset.resource_name}"'
|
|
427
429
|
|
|
428
430
|
def _create_ownership(self, dataset: DremioDataset) -> Optional[OwnershipClass]:
|
|
429
|
-
if
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
431
|
+
if self.ingest_owner and dataset.owner:
|
|
432
|
+
owner_urn = (
|
|
433
|
+
make_user_urn(dataset.owner)
|
|
434
|
+
if dataset.owner_type == "USER"
|
|
435
|
+
else make_group_urn(dataset.owner)
|
|
436
|
+
)
|
|
437
|
+
ownership: OwnershipClass = OwnershipClass(
|
|
438
|
+
owners=[
|
|
439
|
+
OwnerClass(
|
|
440
|
+
owner=owner_urn,
|
|
441
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
442
|
+
)
|
|
443
|
+
]
|
|
444
|
+
)
|
|
445
|
+
return ownership
|
|
446
|
+
|
|
447
|
+
return None
|
|
444
448
|
|
|
445
449
|
def _create_glossary_terms(self, entity: DremioDataset) -> GlossaryTermsClass:
|
|
446
450
|
return GlossaryTermsClass(
|
|
@@ -174,3 +174,8 @@ class DremioSourceConfig(
|
|
|
174
174
|
default=False,
|
|
175
175
|
description="Whether to include query-based lineage information.",
|
|
176
176
|
)
|
|
177
|
+
|
|
178
|
+
ingest_owner: bool = Field(
|
|
179
|
+
default=True,
|
|
180
|
+
description="Ingest Owner from source. This will override Owner info entered from UI",
|
|
181
|
+
)
|
|
@@ -200,6 +200,7 @@ class DremioDataset:
|
|
|
200
200
|
columns: List[DremioDatasetColumn]
|
|
201
201
|
sql_definition: Optional[str]
|
|
202
202
|
dataset_type: DremioDatasetType
|
|
203
|
+
default_schema: Optional[str]
|
|
203
204
|
owner: Optional[str]
|
|
204
205
|
owner_type: Optional[str]
|
|
205
206
|
created: str
|
|
@@ -235,6 +236,9 @@ class DremioDataset:
|
|
|
235
236
|
|
|
236
237
|
if self.sql_definition:
|
|
237
238
|
self.dataset_type = DremioDatasetType.VIEW
|
|
239
|
+
self.default_schema = api_operations.get_context_for_vds(
|
|
240
|
+
resource_id=self.resource_id
|
|
241
|
+
)
|
|
238
242
|
else:
|
|
239
243
|
self.dataset_type = DremioDatasetType.TABLE
|
|
240
244
|
|
|
@@ -97,6 +97,7 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
97
97
|
- Ownership and Glossary Terms:
|
|
98
98
|
- Metadata related to ownership of datasets, extracted from Dremio’s ownership model.
|
|
99
99
|
- Glossary terms and business metadata associated with datasets, providing additional context to the data.
|
|
100
|
+
- Note: Ownership information will only be available for the Cloud and Enterprise editions, it will not be available for the Community edition.
|
|
100
101
|
|
|
101
102
|
- Optional SQL Profiling (if enabled):
|
|
102
103
|
- Table, row, and column statistics can be profiled and ingested via optional SQL queries.
|
|
@@ -123,6 +124,7 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
123
124
|
self.dremio_aspects = DremioAspects(
|
|
124
125
|
platform=self.get_platform(),
|
|
125
126
|
domain=self.config.domain,
|
|
127
|
+
ingest_owner=self.config.ingest_owner,
|
|
126
128
|
platform_instance=self.config.platform_instance,
|
|
127
129
|
env=self.config.env,
|
|
128
130
|
ui_url=dremio_api.ui_url,
|
|
@@ -415,6 +417,7 @@ class DremioSource(StatefulIngestionSourceBase):
|
|
|
415
417
|
view_urn=dataset_urn,
|
|
416
418
|
view_definition=dataset_info.sql_definition,
|
|
417
419
|
default_db=self.default_db,
|
|
420
|
+
default_schema=dataset_info.default_schema,
|
|
418
421
|
)
|
|
419
422
|
|
|
420
423
|
elif dataset_info.dataset_type == DremioDatasetType.TABLE:
|
|
@@ -9,6 +9,7 @@ from pyiceberg.exceptions import (
|
|
|
9
9
|
NoSuchIcebergTableError,
|
|
10
10
|
NoSuchNamespaceError,
|
|
11
11
|
NoSuchPropertyException,
|
|
12
|
+
NoSuchTableError,
|
|
12
13
|
)
|
|
13
14
|
from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
|
|
14
15
|
from pyiceberg.table import Table
|
|
@@ -104,7 +105,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
104
105
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default.")
|
|
105
106
|
@capability(
|
|
106
107
|
SourceCapability.OWNERSHIP,
|
|
107
|
-
"
|
|
108
|
+
"Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
|
|
108
109
|
)
|
|
109
110
|
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
110
111
|
class IcebergSource(StatefulIngestionSourceBase):
|
|
@@ -192,9 +193,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
192
193
|
table = thread_local.local_catalog.load_table(dataset_path)
|
|
193
194
|
time_taken = timer.elapsed_seconds()
|
|
194
195
|
self.report.report_table_load_time(time_taken)
|
|
195
|
-
LOGGER.debug(
|
|
196
|
-
f"Loaded table: {table.identifier}, time taken: {time_taken}"
|
|
197
|
-
)
|
|
196
|
+
LOGGER.debug(f"Loaded table: {table.name()}, time taken: {time_taken}")
|
|
198
197
|
yield from self._create_iceberg_workunit(dataset_name, table)
|
|
199
198
|
except NoSuchPropertyException as e:
|
|
200
199
|
self.report.report_warning(
|
|
@@ -206,12 +205,20 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
206
205
|
)
|
|
207
206
|
except NoSuchIcebergTableError as e:
|
|
208
207
|
self.report.report_warning(
|
|
209
|
-
"
|
|
208
|
+
"not-an-iceberg-table",
|
|
210
209
|
f"Failed to create workunit for {dataset_name}. {e}",
|
|
211
210
|
)
|
|
212
211
|
LOGGER.warning(
|
|
213
212
|
f"NoSuchIcebergTableError while processing table {dataset_path}, skipping it.",
|
|
214
213
|
)
|
|
214
|
+
except NoSuchTableError as e:
|
|
215
|
+
self.report.report_warning(
|
|
216
|
+
"no-such-table",
|
|
217
|
+
f"Failed to create workunit for {dataset_name}. {e}",
|
|
218
|
+
)
|
|
219
|
+
LOGGER.warning(
|
|
220
|
+
f"NoSuchTableError while processing table {dataset_path}, skipping it.",
|
|
221
|
+
)
|
|
215
222
|
except Exception as e:
|
|
216
223
|
self.report.report_failure("general", f"Failed to create workunit: {e}")
|
|
217
224
|
LOGGER.exception(
|
|
@@ -148,7 +148,7 @@ def get_kafka_consumer(
|
|
|
148
148
|
) -> confluent_kafka.Consumer:
|
|
149
149
|
consumer = confluent_kafka.Consumer(
|
|
150
150
|
{
|
|
151
|
-
"group.id": "
|
|
151
|
+
"group.id": "datahub-kafka-ingestion",
|
|
152
152
|
"bootstrap.servers": connection.bootstrap,
|
|
153
153
|
**connection.consumer_config,
|
|
154
154
|
}
|
|
@@ -164,6 +164,25 @@ def get_kafka_consumer(
|
|
|
164
164
|
return consumer
|
|
165
165
|
|
|
166
166
|
|
|
167
|
+
def get_kafka_admin_client(
|
|
168
|
+
connection: KafkaConsumerConnectionConfig,
|
|
169
|
+
) -> AdminClient:
|
|
170
|
+
client = AdminClient(
|
|
171
|
+
{
|
|
172
|
+
"group.id": "datahub-kafka-ingestion",
|
|
173
|
+
"bootstrap.servers": connection.bootstrap,
|
|
174
|
+
**connection.consumer_config,
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
if CallableConsumerConfig.is_callable_config(connection.consumer_config):
|
|
178
|
+
# As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
|
|
179
|
+
# https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
|
|
180
|
+
logger.debug("Initiating polling for kafka admin client")
|
|
181
|
+
client.poll(timeout=30)
|
|
182
|
+
logger.debug("Initiated polling for kafka admin client")
|
|
183
|
+
return client
|
|
184
|
+
|
|
185
|
+
|
|
167
186
|
@dataclass
|
|
168
187
|
class KafkaSourceReport(StaleEntityRemovalSourceReport):
|
|
169
188
|
topics_scanned: int = 0
|
|
@@ -278,13 +297,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
278
297
|
def init_kafka_admin_client(self) -> None:
|
|
279
298
|
try:
|
|
280
299
|
# TODO: Do we require separate config than existing consumer_config ?
|
|
281
|
-
self.admin_client =
|
|
282
|
-
{
|
|
283
|
-
"group.id": "test",
|
|
284
|
-
"bootstrap.servers": self.source_config.connection.bootstrap,
|
|
285
|
-
**self.source_config.connection.consumer_config,
|
|
286
|
-
}
|
|
287
|
-
)
|
|
300
|
+
self.admin_client = get_kafka_admin_client(self.source_config.connection)
|
|
288
301
|
except Exception as e:
|
|
289
302
|
logger.debug(e, exc_info=e)
|
|
290
303
|
self.report.report_warning(
|
|
File without changes
|