acryl-datahub 0.15.0rc24__py3-none-any.whl → 0.15.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (32) hide show
  1. {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/METADATA +2456 -2456
  2. {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/RECORD +31 -27
  3. {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/entry_points.txt +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/api/entities/structuredproperties/structuredproperties.py +20 -8
  6. datahub/configuration/source_common.py +13 -0
  7. datahub/ingestion/source/iceberg/iceberg.py +27 -1
  8. datahub/ingestion/source/iceberg/iceberg_common.py +4 -0
  9. datahub/ingestion/source/kafka_connect/__init__.py +0 -0
  10. datahub/ingestion/source/kafka_connect/common.py +202 -0
  11. datahub/ingestion/source/kafka_connect/kafka_connect.py +367 -0
  12. datahub/ingestion/source/kafka_connect/sink_connectors.py +341 -0
  13. datahub/ingestion/source/kafka_connect/source_connectors.py +570 -0
  14. datahub/ingestion/source/looker/looker_common.py +54 -2
  15. datahub/ingestion/source/looker/looker_lib_wrapper.py +13 -1
  16. datahub/ingestion/source/looker/looker_source.py +12 -1
  17. datahub/ingestion/source/mlflow.py +30 -5
  18. datahub/ingestion/source/powerbi/config.py +1 -14
  19. datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +1 -1
  20. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +1 -1
  21. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -0
  22. datahub/ingestion/source/sql/mssql/job_models.py +30 -1
  23. datahub/ingestion/source/sql/mssql/source.py +14 -0
  24. datahub/ingestion/source/tableau/tableau.py +4 -5
  25. datahub/ingestion/source/tableau/tableau_constant.py +3 -1
  26. datahub/ingestion/source/tableau/tableau_server_wrapper.py +6 -2
  27. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  28. datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
  29. datahub/sql_parsing/tool_meta_extractor.py +116 -5
  30. datahub/ingestion/source/kafka/kafka_connect.py +0 -1468
  31. {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/WHEEL +0 -0
  32. {acryl_datahub-0.15.0rc24.dist-info → acryl_datahub-0.15.0.1rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=I7rWvDl7l3VZ5DC3mtaoQKDToqQCmmprWfOtkh9E_mM,575
1
+ datahub/__init__.py,sha256=gK5aLEGMHMZfg-QUDI5T7mr1ej_5OFVKhCrqqoj_QGk,576
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
52
52
  datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
54
54
  datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- datahub/api/entities/structuredproperties/structuredproperties.py,sha256=tYEVp2oqJa9FhlrnbAf2Zw82WqicJI9lF0P5U9soY9E,7502
55
+ datahub/api/entities/structuredproperties/structuredproperties.py,sha256=wtIt-zqW8_PamvSPpRR28tKkiOAZ7ME3lufoaTeGnlU,8116
56
56
  datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
57
57
  datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
58
58
  datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
@@ -100,7 +100,7 @@ datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVg
100
100
  datahub/configuration/kafka_consumer_config.py,sha256=LivsObTt9yC3WoGnslJbF_x4ojfNdxMIMEhb8vvJfcA,2133
101
101
  datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
102
102
  datahub/configuration/pydantic_migration_helpers.py,sha256=4C_COAVZ5iJ8yxcWNgXZNWsY7ULogICNZ368oNF7zWg,1462
103
- datahub/configuration/source_common.py,sha256=68LZOuB23zSEcfgQJE1wZQnyYQHVVnEZK3Sniv_nEQs,2107
103
+ datahub/configuration/source_common.py,sha256=CXs5xJzeFpnrKojVZbHZOvoBDnrkEogN-e8hqMERzfo,2606
104
104
  datahub/configuration/time_window_config.py,sha256=xuwoftrHWC069qfumErCV3BCuE-RWVsDLAefNWn_RFc,5347
105
105
  datahub/configuration/toml.py,sha256=Ohc5sAWLPoAinPYL8njyheZ3ak81fC2Sp8IbBbESPGg,380
106
106
  datahub/configuration/validate_field_deprecation.py,sha256=byBKOlGhPkRO_aCXlJ9BQk7-v3RqCnwI5_sho72ZDMQ,1268
@@ -195,7 +195,7 @@ datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz
195
195
  datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
196
196
  datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
197
197
  datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
198
- datahub/ingestion/source/mlflow.py,sha256=SxCt4jtxQcpPWEI2rRNagCiE_6TWr2RroqmxRd_td1Y,11565
198
+ datahub/ingestion/source/mlflow.py,sha256=-yWUuAEVBiNN-elz8Pgn0UeGsC3fVB20z1zKNIr4LXI,12309
199
199
  datahub/ingestion/source/mode.py,sha256=n_5em3jADCr5gWTLDOP4O4bRS0Zt_TCZtW8uFPxn-DI,63043
200
200
  datahub/ingestion/source/mongodb.py,sha256=vZue4Nz0xaBoCUsQr3_0OIRkWRxeE_IH_Y_QKZ1s7S0,21077
201
201
  datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPGPWQ,56146
@@ -313,28 +313,32 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
313
313
  datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
314
314
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
315
315
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
316
- datahub/ingestion/source/iceberg/iceberg.py,sha256=fjqp3VBW5W5-54X_-ubkRZiAmdHvuMbxRbC4UYzEr4U,25900
317
- datahub/ingestion/source/iceberg/iceberg_common.py,sha256=TS3_ZYZ47Fe02CmzEo1z0pvy7yjXuG1VlwqNxa0U6pc,8506
316
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=Pi2QD8v0HOpqr8M9la78Nlm3Be9iy3G4pCZqu2NitZM,27253
317
+ datahub/ingestion/source/iceberg/iceberg_common.py,sha256=4efWbnj8iWWNcO6_lFXFZRIzaKVPWhd1MmmxdJafemw,8684
318
318
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrRB1qbTiTe-YVGCof5TFHMyd8,9908
319
319
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
320
320
  datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
321
321
  datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
322
322
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
323
323
  datahub/ingestion/source/kafka/kafka.py,sha256=9SR7bqp9J0rPYde5IClhnAuVNy9ItsB8-ZeXtTc_mEY,26442
324
- datahub/ingestion/source/kafka/kafka_connect.py,sha256=Jm1MYky_OPIwvVHuEjgOjK0e6-jA-dYnsLZ7r-Y_9mA,56208
325
324
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
325
+ datahub/ingestion/source/kafka_connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
326
+ datahub/ingestion/source/kafka_connect/common.py,sha256=Ekb1K_J1eTgiH7LSP1AbEIf7NQh_2Vyu1lYX_Ggcqk4,7049
327
+ datahub/ingestion/source/kafka_connect/kafka_connect.py,sha256=8bjVFDkwjgs3gP7Y3itzABGfBcY_WbMQ5PWjrm-g93A,14249
328
+ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=ESuJE5SFLLvss9OwDEIB8SAko4rhzaWZ-4dKY0Dh0N8,12900
329
+ datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=_765fSMDAWAe0Cf_F4VNHfOWKNhtqBA1Ep2jL3rf-qc,21263
326
330
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
331
  datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
328
- datahub/ingestion/source/looker/looker_common.py,sha256=cIrsc0nDEsODxx9tjvSm2A3-OlCkPKCu4W2L8PxzLWs,59926
332
+ datahub/ingestion/source/looker/looker_common.py,sha256=0-5xhq7vE9YFj8tzbvRI7RnP1cD_oxnN4NpSXjKzxjE,61726
329
333
  datahub/ingestion/source/looker/looker_config.py,sha256=87WAgdJ_QWdTq25RBwgIqfc2kq7dubSpzbEtXb2ihMw,13182
330
334
  datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
331
335
  datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
332
336
  datahub/ingestion/source/looker/looker_dataclasses.py,sha256=ULWLFWsV2cKmTuOFavD8QjEBmnXmvjyr8RbUB62DwJQ,12178
333
337
  datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbwX9gL83kgL01vIETzzbmZIhmw,4267
334
- datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=QTTCW-rPNUoazQG_sTJbCARXJzQ7NKS-XKURp2AAWls,11106
338
+ datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=0gaYjBv4wkbbLWVgvaAV6JyWAFb0utTG6TCve2d9xss,11511
335
339
  datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
336
340
  datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
337
- datahub/ingestion/source/looker/looker_source.py,sha256=AByQxWVfOBqOtZPaR_cw9SB-tFZtfppiKRkFSbcK1GA,65346
341
+ datahub/ingestion/source/looker/looker_source.py,sha256=IUI2FpYG2_bhVnP_LZeQymjc5D0F7lqYTADktzV3yr8,65735
338
342
  datahub/ingestion/source/looker/looker_template_language.py,sha256=EG4ZfVZ0x53lgaYh2ohzL4ZCy9KsX0TA51XqCmsCd2Q,14328
339
343
  datahub/ingestion/source/looker/looker_usage.py,sha256=qegMr-Rnqz3xNGSBfsuD3S_BPXf7UEMhwFN7DPQeLNo,22914
340
344
  datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
@@ -351,15 +355,15 @@ datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwr
351
355
  datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
352
356
  datahub/ingestion/source/neo4j/neo4j_source.py,sha256=L9WiZ5yZrIDMrgj3gYU9j6zz3TRMXYpcWxeTegD7sFg,12409
353
357
  datahub/ingestion/source/powerbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
354
- datahub/ingestion/source/powerbi/config.py,sha256=LV8BOm2zzF9t0RMwQVVUNB0bStzBPo8A6JkaW0xlgsQ,23241
355
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=AIU89lVPoCWlzc_RfUjDJwRQ11akPtnGpBTluBMCKio,2242
358
+ datahub/ingestion/source/powerbi/config.py,sha256=9cgMrwp3DQqv14iTqulAONkbVe5nhycJ1ElkA_275go,22732
359
+ datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py,sha256=-njW1kJOy-LY5JFwJLhVQ0bMBj9NQz5TZhQqsSi_KsM,2285
356
360
  datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule,sha256=PRWzuZMMhKdOVoAaE8csHvUFbZHxYe5meJHgrqlgiuw,19795
357
361
  datahub/ingestion/source/powerbi/powerbi.py,sha256=7UsAEqaFlkWONcXJdQ2hotUYYn46ks6Fe71KXEMh7lI,54495
358
362
  datahub/ingestion/source/powerbi/m_query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
359
363
  datahub/ingestion/source/powerbi/m_query/data_classes.py,sha256=EbaEasEOGZ73jz0cQofH9ez65wSvRBof0R6GQaIVLnM,2009
360
364
  datahub/ingestion/source/powerbi/m_query/native_sql_parser.py,sha256=zzKVDGeUM3Yv3-zNah4D6mSnr6jXsstNuLmzczcPQEE,3683
361
365
  datahub/ingestion/source/powerbi/m_query/parser.py,sha256=pB1LGdb02Ryf8Pr8JPSgiOmLE6mEAgDbKodtKOOY6LU,5782
362
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=h1gBh9Gz1zF3m-hA0WMQaBdEtcZUW9vScNKEvqIWCfk,32442
366
+ datahub/ingestion/source/powerbi/m_query/pattern_handler.py,sha256=yDNdzPREzegarux_I9F_PsLe1hlSFa-uwoORrEgbO-c,32485
363
367
  datahub/ingestion/source/powerbi/m_query/resolver.py,sha256=TMt84_JaCazpP7vcneW0O3cUjtbIuh8Yid78JWfDxsI,16953
364
368
  datahub/ingestion/source/powerbi/m_query/tree_function.py,sha256=h77DunhlgOP0fAg8UXDXxxInOi7Pay85_d1Ca4YqyKs,6134
365
369
  datahub/ingestion/source/powerbi/m_query/validator.py,sha256=crG-VZy2XPieiDliP9yVMgiFcc8b2xbZyDFEATXqEAQ,1155
@@ -439,7 +443,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
439
443
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
440
444
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd8juVqjokfuSPuH9ppcM0ruXamxA,24807
441
445
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=YczNEupY89jeegjR2_1pT4bPi9wQ69EIhGpzyCe9Jdg,12600
442
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=lo_3asTuIZbF-LuEUcYL-9NIZ720n7oB9mYA6WVTWA4,31960
446
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=TlS5d1lpEN74ZP0c8UzUhJZIeBMO3ZIUxRler1p7lnA,31998
443
447
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
444
448
  datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
445
449
  datahub/ingestion/source/sql/clickhouse.py,sha256=jzvaXP5Wr0SMhj2rtuvVE821xnfpKiXhO3cm0xblgHs,27299
@@ -467,8 +471,8 @@ datahub/ingestion/source/sql/trino.py,sha256=FEn_BQ3pm23hKx94ek5kk5IXGNYcBqZEhll
467
471
  datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5foKrf71SydzEltc3WsVAhQc,5732
468
472
  datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
469
473
  datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
470
- datahub/ingestion/source/sql/mssql/job_models.py,sha256=eMyR0Efl5kvi7QNgNXzd5_6PdDKYly_552Y8OGSj9PY,6012
471
- datahub/ingestion/source/sql/mssql/source.py,sha256=-B0bnFKEReciYzQ4p_2xJJzdn-H8vYz2MQ_h-1B0ibs,30329
474
+ datahub/ingestion/source/sql/mssql/job_models.py,sha256=ztXDrD4anhzwWvACIm9fucE2WhMDMKkJ4alMYOQOqWA,7083
475
+ datahub/ingestion/source/sql/mssql/source.py,sha256=ODdsOIbDA3X0E7En6GT15mD49W6RW9sXLwRoUgw2a8I,30925
472
476
  datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
473
477
  datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
474
478
  datahub/ingestion/source/state/checkpoint.py,sha256=x9Xww-MIFXSKjeg1tOZXE72LehCm5OfKy3HfucgIRWM,8833
@@ -486,11 +490,11 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
486
490
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
487
491
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
488
492
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
489
- datahub/ingestion/source/tableau/tableau.py,sha256=P_DUuUvXk5u2ihA0JghtRkYc_KI_yQR2ZiQVe9IUvsU,138197
493
+ datahub/ingestion/source/tableau/tableau.py,sha256=80m5a8g2q5jnm5pFtL4s_SmSeGPQIuZGqGe9RGn6OSI,138248
490
494
  datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
491
- datahub/ingestion/source/tableau/tableau_constant.py,sha256=jVQMgLXND5aPL6XLETKp81BehRkvyLTU_Vhhe_1NOkI,2576
492
- datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=PEGfcoUcBdsnOa5EzCqy1IiuQ3OZ9fZVEMzDqhhHOto,922
493
- datahub/ingestion/source/tableau/tableau_validation.py,sha256=l0DuXUuxJwEXMzo61xLx-KLc5u6tiz2n0e9EepJdWEM,1808
495
+ datahub/ingestion/source/tableau/tableau_constant.py,sha256=ZcAeHsQUXVVL26ORly0ByZk_GJAFbxaKuJAlX_sYMac,2686
496
+ datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=nSyx9RzC6TCQDm-cTVJ657qT8iDwzk_8JMKpohhmOc4,1046
497
+ datahub/ingestion/source/tableau/tableau_validation.py,sha256=pd--LcTLTfrFsouhCOvGC_2IjeMfKbJV81EEo3ibMwE,1820
494
498
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
495
499
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
496
500
  datahub/ingestion/source/unity/config.py,sha256=m4-n7mYz4Ct4L1QdfJFklwHyj8boKCbV7Sb3Ou6AT3Q,14756
@@ -871,12 +875,12 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
871
875
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
872
876
  datahub/sql_parsing/schema_resolver.py,sha256=9INZWdxA2dMSLK6RXaVqjbjyLY_VKMhCkQv_Xd6Ln3I,10848
873
877
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
874
- datahub/sql_parsing/sql_parsing_aggregator.py,sha256=F-aj7yqOwbo7FpxduFO5a7cLWkojL_Npv3_dlfHPNGY,69877
878
+ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=LBs1RjRqh3natrx4WfgRQGNpI56o12jtbABO5ipEBWA,69889
875
879
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
876
880
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
877
881
  datahub/sql_parsing/sqlglot_lineage.py,sha256=CLDOc0HNqL_539eahOP3QOoldIYC6CF29id4Xe3TlEM,47018
878
882
  datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
879
- datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
883
+ datahub/sql_parsing/tool_meta_extractor.py,sha256=7tY4FAClhFcqwc23lGVlnT6Dequ_5Xcpbt0hDvnlLzM,6670
880
884
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
881
885
  datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
882
886
  datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
@@ -976,8 +980,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
976
980
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
977
981
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
978
982
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
979
- acryl_datahub-0.15.0rc24.dist-info/METADATA,sha256=z1GOrJZhoUNozAZuAKJuhaUEOtkFO6qXVGtHd5xC3mo,173559
980
- acryl_datahub-0.15.0rc24.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
981
- acryl_datahub-0.15.0rc24.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
982
- acryl_datahub-0.15.0rc24.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
983
- acryl_datahub-0.15.0rc24.dist-info/RECORD,,
983
+ acryl_datahub-0.15.0.1rc1.dist-info/METADATA,sha256=Ll6D3fw03bz2dVmCp9Mcez5IbFKIfXzUnMeSe6Ej4eM,173642
984
+ acryl_datahub-0.15.0.1rc1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
985
+ acryl_datahub-0.15.0.1rc1.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
986
+ acryl_datahub-0.15.0.1rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
987
+ acryl_datahub-0.15.0.1rc1.dist-info/RECORD,,
@@ -57,7 +57,7 @@ hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource
57
57
  iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource
58
58
  json-schema = datahub.ingestion.source.schema.json_schema:JsonSchemaSource
59
59
  kafka = datahub.ingestion.source.kafka.kafka:KafkaSource
60
- kafka-connect = datahub.ingestion.source.kafka.kafka_connect:KafkaConnectSource
60
+ kafka-connect = datahub.ingestion.source.kafka_connect.kafka_connect:KafkaConnectSource
61
61
  ldap = datahub.ingestion.source.ldap:LDAPSource
62
62
  looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource
63
63
  lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc24"
6
+ __version__ = "0.15.0.1rc1"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -14,7 +14,7 @@ from datahub.metadata.schema_classes import (
14
14
  PropertyValueClass,
15
15
  StructuredPropertyDefinitionClass,
16
16
  )
17
- from datahub.metadata.urns import StructuredPropertyUrn, Urn
17
+ from datahub.metadata.urns import DataTypeUrn, StructuredPropertyUrn, Urn
18
18
  from datahub.utilities.urns._urn_base import URN_TYPES
19
19
 
20
20
  logging.basicConfig(level=logging.INFO)
@@ -86,19 +86,31 @@ class StructuredProperties(ConfigModel):
86
86
 
87
87
  @validator("type")
88
88
  def validate_type(cls, v: str) -> str:
89
- # Convert to lowercase if needed
90
- if not v.islower():
89
+ # This logic is somewhat hacky, since we need to deal with
90
+ # 1. fully qualified urns
91
+ # 2. raw data types, that need to get the datahub namespace prefix
92
+ # While keeping the user-facing interface and error messages clean.
93
+
94
+ if not v.startswith("urn:li:") and not v.islower():
95
+ # Convert to lowercase if needed
96
+ v = v.lower()
91
97
  logger.warning(
92
- f"Structured property type should be lowercase. Updated to {v.lower()}"
98
+ f"Structured property type should be lowercase. Updated to {v}"
93
99
  )
94
- v = v.lower()
100
+
101
+ urn = Urn.make_data_type_urn(v)
95
102
 
96
103
  # Check if type is allowed
97
- if not AllowedTypes.check_allowed_type(v):
104
+ data_type_urn = DataTypeUrn.from_string(urn)
105
+ unqualified_data_type = data_type_urn.id
106
+ if unqualified_data_type.startswith("datahub."):
107
+ unqualified_data_type = unqualified_data_type[len("datahub.") :]
108
+ if not AllowedTypes.check_allowed_type(unqualified_data_type):
98
109
  raise ValueError(
99
- f"Type {v} is not allowed. Allowed types are {AllowedTypes.values()}"
110
+ f"Type {unqualified_data_type} is not allowed. Allowed types are {AllowedTypes.values()}"
100
111
  )
101
- return v
112
+
113
+ return urn
102
114
 
103
115
  @property
104
116
  def fqn(self) -> str:
@@ -63,3 +63,16 @@ class DatasetLineageProviderConfigBase(EnvConfigMixin):
63
63
  default=None,
64
64
  description="A holder for platform -> platform_instance mappings to generate correct dataset urns",
65
65
  )
66
+
67
+
68
+ class PlatformDetail(ConfigModel):
69
+ platform_instance: Optional[str] = Field(
70
+ default=None,
71
+ description="DataHub platform instance name. To generate correct urn for upstream dataset, this should match "
72
+ "with platform instance name used in ingestion "
73
+ "recipe of other datahub sources.",
74
+ )
75
+ env: str = Field(
76
+ default=DEFAULT_ENV,
77
+ description="The environment that all assets produced by DataHub platform ingestion source belong to",
78
+ )
@@ -10,6 +10,7 @@ from pyiceberg.exceptions import (
10
10
  NoSuchNamespaceError,
11
11
  NoSuchPropertyException,
12
12
  NoSuchTableError,
13
+ ServerError,
13
14
  )
14
15
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
15
16
  from pyiceberg.table import Table
@@ -145,6 +146,13 @@ class IcebergSource(StatefulIngestionSourceBase):
145
146
  self.report.report_no_listed_namespaces(len(namespaces))
146
147
  tables_count = 0
147
148
  for namespace in namespaces:
149
+ namespace_repr = ".".join(namespace)
150
+ if not self.config.namespace_pattern.allowed(namespace_repr):
151
+ LOGGER.info(
152
+ f"Namespace {namespace_repr} is not allowed by config pattern, skipping"
153
+ )
154
+ self.report.report_dropped(f"{namespace_repr}.*")
155
+ continue
148
156
  try:
149
157
  tables = catalog.list_tables(namespace)
150
158
  tables_count += len(tables)
@@ -181,6 +189,9 @@ class IcebergSource(StatefulIngestionSourceBase):
181
189
  if not self.config.table_pattern.allowed(dataset_name):
182
190
  # Dataset name is rejected by pattern, report as dropped.
183
191
  self.report.report_dropped(dataset_name)
192
+ LOGGER.debug(
193
+ f"Skipping table {dataset_name} due to not being allowed by the config pattern"
194
+ )
184
195
  return
185
196
  try:
186
197
  if not hasattr(thread_local, "local_catalog"):
@@ -219,6 +230,22 @@ class IcebergSource(StatefulIngestionSourceBase):
219
230
  LOGGER.warning(
220
231
  f"NoSuchTableError while processing table {dataset_path}, skipping it.",
221
232
  )
233
+ except FileNotFoundError as e:
234
+ self.report.report_warning(
235
+ "file-not-found",
236
+ f"Encountered FileNotFoundError when trying to read manifest file for {dataset_name}. {e}",
237
+ )
238
+ LOGGER.warning(
239
+ f"FileNotFoundError while processing table {dataset_path}, skipping it."
240
+ )
241
+ except ServerError as e:
242
+ self.report.report_warning(
243
+ "iceberg-rest-server-error",
244
+ f"Iceberg Rest Catalog returned 500 status due to an unhandled exception for {dataset_name}. Exception: {e}",
245
+ )
246
+ LOGGER.warning(
247
+ f"Iceberg Rest Catalog server error (500 status) encountered when processing table {dataset_path}, skipping it."
248
+ )
222
249
  except Exception as e:
223
250
  self.report.report_failure("general", f"Failed to create workunit: {e}")
224
251
  LOGGER.exception(
@@ -269,7 +296,6 @@ class IcebergSource(StatefulIngestionSourceBase):
269
296
  ] = table.current_snapshot().manifest_list
270
297
  dataset_properties = DatasetPropertiesClass(
271
298
  name=table.name()[-1],
272
- tags=[],
273
299
  description=table.metadata.properties.get("comment", None),
274
300
  customProperties=custom_properties,
275
301
  )
@@ -68,6 +68,10 @@ class IcebergSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin)
68
68
  default=AllowDenyPattern.allow_all(),
69
69
  description="Regex patterns for tables to filter in ingestion.",
70
70
  )
71
+ namespace_pattern: AllowDenyPattern = Field(
72
+ default=AllowDenyPattern.allow_all(),
73
+ description="Regex patterns for namespaces to filter in ingestion.",
74
+ )
71
75
  user_ownership_property: Optional[str] = Field(
72
76
  default="owner",
73
77
  description="Iceberg table property to look for a `CorpUser` owner. Can only hold a single user value. If property has no value, no owner information will be emitted.",
File without changes
@@ -0,0 +1,202 @@
1
+ import logging
2
+ from dataclasses import dataclass, field
3
+ from typing import Dict, Iterable, List, Optional
4
+
5
+ from pydantic.fields import Field
6
+
7
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel
8
+ from datahub.configuration.source_common import (
9
+ DatasetLineageProviderConfigBase,
10
+ PlatformInstanceConfigMixin,
11
+ )
12
+ from datahub.ingestion.source.state.stale_entity_removal_handler import (
13
+ StaleEntityRemovalSourceReport,
14
+ StatefulStaleMetadataRemovalConfig,
15
+ )
16
+ from datahub.ingestion.source.state.stateful_ingestion_base import (
17
+ StatefulIngestionConfigBase,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ KAFKA = "kafka"
23
+ SOURCE = "source"
24
+ SINK = "sink"
25
+ CONNECTOR_CLASS = "connector.class"
26
+
27
+
28
+ class ProvidedConfig(ConfigModel):
29
+ provider: str
30
+ path_key: str
31
+ value: str
32
+
33
+
34
+ class GenericConnectorConfig(ConfigModel):
35
+ connector_name: str
36
+ source_dataset: str
37
+ source_platform: str
38
+
39
+
40
+ class KafkaConnectSourceConfig(
41
+ PlatformInstanceConfigMixin,
42
+ DatasetLineageProviderConfigBase,
43
+ StatefulIngestionConfigBase,
44
+ ):
45
+ # See the Connect REST Interface for details
46
+ # https://docs.confluent.io/platform/current/connect/references/restapi.html#
47
+ connect_uri: str = Field(
48
+ default="http://localhost:8083/", description="URI to connect to."
49
+ )
50
+ username: Optional[str] = Field(default=None, description="Kafka Connect username.")
51
+ password: Optional[str] = Field(default=None, description="Kafka Connect password.")
52
+ cluster_name: Optional[str] = Field(
53
+ default="connect-cluster", description="Cluster to ingest from."
54
+ )
55
+ # convert lineage dataset's urns to lowercase
56
+ convert_lineage_urns_to_lowercase: bool = Field(
57
+ default=False,
58
+ description="Whether to convert the urns of ingested lineage dataset to lowercase",
59
+ )
60
+ connector_patterns: AllowDenyPattern = Field(
61
+ default=AllowDenyPattern.allow_all(),
62
+ description="regex patterns for connectors to filter for ingestion.",
63
+ )
64
+ provided_configs: Optional[List[ProvidedConfig]] = Field(
65
+ default=None, description="Provided Configurations"
66
+ )
67
+ connect_to_platform_map: Optional[Dict[str, Dict[str, str]]] = Field(
68
+ default=None,
69
+ description='Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { "postgres-connector-finance-db": "postgres": "core_finance_instance" }`',
70
+ )
71
+ platform_instance_map: Optional[Dict[str, str]] = Field(
72
+ default=None,
73
+ description='Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { "hive": "warehouse" }`',
74
+ )
75
+ generic_connectors: List[GenericConnectorConfig] = Field(
76
+ default=[],
77
+ description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector, Debezium Source Connector, and Mongo Source Connector",
78
+ )
79
+
80
+ stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
81
+
82
+
83
+ @dataclass
84
+ class KafkaConnectSourceReport(StaleEntityRemovalSourceReport):
85
+ connectors_scanned: int = 0
86
+ filtered: List[str] = field(default_factory=list)
87
+
88
+ def report_connector_scanned(self, connector: str) -> None:
89
+ self.connectors_scanned += 1
90
+
91
+ def report_dropped(self, connector: str) -> None:
92
+ self.filtered.append(connector)
93
+
94
+
95
+ @dataclass
96
+ class KafkaConnectLineage:
97
+ """Class to store Kafka Connect lineage mapping, Each instance is potential DataJob"""
98
+
99
+ source_platform: str
100
+ target_dataset: str
101
+ target_platform: str
102
+ job_property_bag: Optional[Dict[str, str]] = None
103
+ source_dataset: Optional[str] = None
104
+
105
+
106
+ @dataclass
107
+ class ConnectorManifest:
108
+ """Each instance is potential DataFlow"""
109
+
110
+ name: str
111
+ type: str
112
+ config: Dict
113
+ tasks: Dict
114
+ url: Optional[str] = None
115
+ flow_property_bag: Optional[Dict[str, str]] = None
116
+ lineages: List[KafkaConnectLineage] = field(default_factory=list)
117
+ topic_names: Iterable[str] = field(default_factory=list)
118
+
119
+
120
+ def remove_prefix(text: str, prefix: str) -> str:
121
+ if text.startswith(prefix):
122
+ index = len(prefix)
123
+ return text[index:]
124
+ return text
125
+
126
+
127
+ def unquote(
128
+ string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None
129
+ ) -> str:
130
+ """
131
+ If string starts and ends with a quote, unquote it
132
+ """
133
+ trailing_quote = trailing_quote if trailing_quote else leading_quote
134
+ if string.startswith(leading_quote) and string.endswith(trailing_quote):
135
+ string = string[1:-1]
136
+ return string
137
+
138
+
139
+ def get_dataset_name(
140
+ database_name: Optional[str],
141
+ source_table: str,
142
+ ) -> str:
143
+ if database_name:
144
+ dataset_name = database_name + "." + source_table
145
+ else:
146
+ dataset_name = source_table
147
+
148
+ return dataset_name
149
+
150
+
151
+ def get_platform_instance(
152
+ config: KafkaConnectSourceConfig, connector_name: str, platform: str
153
+ ) -> Optional[str]:
154
+ instance_name = None
155
+ if (
156
+ config.connect_to_platform_map
157
+ and config.connect_to_platform_map.get(connector_name)
158
+ and config.connect_to_platform_map[connector_name].get(platform)
159
+ ):
160
+ instance_name = config.connect_to_platform_map[connector_name][platform]
161
+ if config.platform_instance_map and config.platform_instance_map.get(platform):
162
+ logger.warning(
163
+ f"Same source platform {platform} configured in both platform_instance_map and connect_to_platform_map."
164
+ "Will prefer connector specific platform instance from connect_to_platform_map."
165
+ )
166
+ elif config.platform_instance_map and config.platform_instance_map.get(platform):
167
+ instance_name = config.platform_instance_map[platform]
168
+ logger.info(
169
+ f"Instance name assigned is: {instance_name} for Connector Name {connector_name} and platform {platform}"
170
+ )
171
+ return instance_name
172
+
173
+
174
+ def transform_connector_config(
175
+ connector_config: Dict, provided_configs: List[ProvidedConfig]
176
+ ) -> None:
177
+ """This method will update provided configs in connector config values, if any"""
178
+ lookupsByProvider = {}
179
+ for pconfig in provided_configs:
180
+ lookupsByProvider[f"${{{pconfig.provider}:{pconfig.path_key}}}"] = pconfig.value
181
+ for k, v in connector_config.items():
182
+ for key, value in lookupsByProvider.items():
183
+ if key in v:
184
+ connector_config[k] = connector_config[k].replace(key, value)
185
+
186
+
187
+ # TODO: Find a more automated way to discover new platforms with 3 level naming hierarchy.
188
+ def has_three_level_hierarchy(platform: str) -> bool:
189
+ return platform in ["postgres", "trino", "redshift", "snowflake"]
190
+
191
+
192
+ @dataclass
193
+ class BaseConnector:
194
+ connector_manifest: ConnectorManifest
195
+ config: KafkaConnectSourceConfig
196
+ report: KafkaConnectSourceReport
197
+
198
+ def extract_lineages(self) -> List[KafkaConnectLineage]:
199
+ return []
200
+
201
+ def extract_flow_property_bag(self) -> Optional[Dict[str, str]]:
202
+ return None