acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from pyiceberg.exceptions import (
12
12
  NoSuchNamespaceError,
13
13
  NoSuchPropertyException,
14
14
  NoSuchTableError,
15
- ServerError,
15
+ RESTError,
16
16
  )
17
17
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
18
18
  from pyiceberg.table import Table
@@ -118,7 +118,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
118
118
 
119
119
 
120
120
  @platform_name("Iceberg")
121
- @support_status(SupportStatus.TESTING)
121
+ @support_status(SupportStatus.INCUBATING)
122
122
  @config_class(IcebergSourceConfig)
123
123
  @capability(
124
124
  SourceCapability.PLATFORM_INSTANCE,
@@ -134,7 +134,9 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
134
134
  SourceCapability.OWNERSHIP,
135
135
  "Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
136
136
  )
137
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
137
+ @capability(
138
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
139
+ )
138
140
  class IcebergSource(StatefulIngestionSourceBase):
139
141
  """
140
142
  ## Integration Details
@@ -152,6 +154,10 @@ class IcebergSource(StatefulIngestionSourceBase):
152
154
  self.report: IcebergSourceReport = IcebergSourceReport()
153
155
  self.config: IcebergSourceConfig = config
154
156
  self.ctx: PipelineContext = ctx
157
+ self.stamping_processor = AutoSystemMetadata(
158
+ self.ctx
159
+ ) # single instance used only when processing namespaces
160
+ self.namespaces: List[Tuple[Identifier, str]] = []
155
161
 
156
162
  @classmethod
157
163
  def create(cls, config_dict: Dict, ctx: PipelineContext) -> "IcebergSource":
@@ -194,9 +200,9 @@ class IcebergSource(StatefulIngestionSourceBase):
194
200
  auto_lowercase_dataset_urns,
195
201
  auto_materialize_referenced_tags_terms,
196
202
  partial(
197
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
203
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
198
204
  ),
199
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
205
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
200
206
  partial(auto_workunit_reporter, self.get_report()),
201
207
  auto_patch_last_modified,
202
208
  EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
@@ -244,6 +250,13 @@ class IcebergSource(StatefulIngestionSourceBase):
244
250
  context=str(namespace),
245
251
  exc=e,
246
252
  )
253
+ except RESTError as e:
254
+ self.report.warning(
255
+ title="Iceberg REST Server Error",
256
+ message="Iceberg REST Server returned error status when trying to list tables for a namespace, skipping it.",
257
+ context=str(namespace),
258
+ exc=e,
259
+ )
247
260
  except Exception as e:
248
261
  self.report.report_failure(
249
262
  title="Error when processing a namespace",
@@ -320,10 +333,10 @@ class IcebergSource(StatefulIngestionSourceBase):
320
333
  context=dataset_name,
321
334
  exc=e,
322
335
  )
323
- except ServerError as e:
336
+ except RESTError as e:
324
337
  self.report.warning(
325
338
  title="Iceberg REST Server Error",
326
- message="Iceberg returned 500 HTTP status when trying to process a table, skipping it.",
339
+ message="Iceberg REST Server returned error status when trying to process a table, skipping it.",
327
340
  context=dataset_name,
328
341
  exc=e,
329
342
  )
@@ -363,7 +376,7 @@ class IcebergSource(StatefulIngestionSourceBase):
363
376
  )
364
377
 
365
378
  try:
366
- catalog = self.config.get_catalog()
379
+ self.catalog = self.config.get_catalog()
367
380
  except Exception as e:
368
381
  self.report.report_failure(
369
382
  title="Failed to initialize catalog object",
@@ -373,33 +386,7 @@ class IcebergSource(StatefulIngestionSourceBase):
373
386
  return
374
387
 
375
388
  try:
376
- stamping_processor = AutoSystemMetadata(self.ctx)
377
- namespace_ids = self._get_namespaces(catalog)
378
- namespaces: List[Tuple[Identifier, str]] = []
379
- for namespace in namespace_ids:
380
- namespace_repr = ".".join(namespace)
381
- LOGGER.debug(f"Processing namespace {namespace_repr}")
382
- namespace_urn = make_container_urn(
383
- NamespaceKey(
384
- namespace=namespace_repr,
385
- platform=self.platform,
386
- instance=self.config.platform_instance,
387
- env=self.config.env,
388
- )
389
- )
390
- namespace_properties: Properties = catalog.load_namespace_properties(
391
- namespace
392
- )
393
- namespaces.append((namespace, namespace_urn))
394
- for aspect in self._create_iceberg_namespace_aspects(
395
- namespace, namespace_properties
396
- ):
397
- yield stamping_processor.stamp_wu(
398
- MetadataChangeProposalWrapper(
399
- entityUrn=namespace_urn, aspect=aspect
400
- ).as_workunit()
401
- )
402
- LOGGER.debug("Namespaces ingestion completed")
389
+ yield from self._process_namespaces()
403
390
  except Exception as e:
404
391
  self.report.report_failure(
405
392
  title="Failed to list namespaces",
@@ -413,13 +400,70 @@ class IcebergSource(StatefulIngestionSourceBase):
413
400
  args_list=[
414
401
  (dataset_path, namespace_urn)
415
402
  for dataset_path, namespace_urn in self._get_datasets(
416
- catalog, namespaces
403
+ self.catalog, self.namespaces
417
404
  )
418
405
  ],
419
406
  max_workers=self.config.processing_threads,
420
407
  ):
421
408
  yield wu
422
409
 
410
+ def _try_processing_namespace(
411
+ self, namespace: Identifier
412
+ ) -> Iterable[MetadataWorkUnit]:
413
+ namespace_repr = ".".join(namespace)
414
+ try:
415
+ LOGGER.debug(f"Processing namespace {namespace_repr}")
416
+ namespace_urn = make_container_urn(
417
+ NamespaceKey(
418
+ namespace=namespace_repr,
419
+ platform=self.platform,
420
+ instance=self.config.platform_instance,
421
+ env=self.config.env,
422
+ )
423
+ )
424
+
425
+ namespace_properties: Properties = self.catalog.load_namespace_properties(
426
+ namespace
427
+ )
428
+ for aspect in self._create_iceberg_namespace_aspects(
429
+ namespace, namespace_properties
430
+ ):
431
+ yield self.stamping_processor.stamp_wu(
432
+ MetadataChangeProposalWrapper(
433
+ entityUrn=namespace_urn, aspect=aspect
434
+ ).as_workunit()
435
+ )
436
+ self.namespaces.append((namespace, namespace_urn))
437
+ except NoSuchNamespaceError as e:
438
+ self.report.report_warning(
439
+ title="Failed to retrieve namespace properties",
440
+ message="Couldn't find the namespace, was it deleted during the ingestion?",
441
+ context=namespace_repr,
442
+ exc=e,
443
+ )
444
+ return
445
+ except RESTError as e:
446
+ self.report.warning(
447
+ title="Iceberg REST Server Error",
448
+ message="Iceberg REST Server returned error status when trying to retrieve namespace properties, skipping it.",
449
+ context=str(namespace),
450
+ exc=e,
451
+ )
452
+ except Exception as e:
453
+ self.report.report_failure(
454
+ title="Failed to process namespace",
455
+ message="Unhandled exception happened during processing of the namespace",
456
+ context=namespace_repr,
457
+ exc=e,
458
+ )
459
+
460
+ def _process_namespaces(self) -> Iterable[MetadataWorkUnit]:
461
+ namespace_ids = self._get_namespaces(self.catalog)
462
+ for namespace in namespace_ids:
463
+ yield from self._try_processing_namespace(namespace)
464
+
465
+ LOGGER.debug("Namespaces ingestion completed")
466
+
423
467
  def _create_iceberg_table_aspects(
424
468
  self, dataset_name: str, table: Table, namespace_urn: str
425
469
  ) -> Iterable[_Aspect]:
@@ -522,11 +566,11 @@ class IcebergSource(StatefulIngestionSourceBase):
522
566
  custom_properties["format-version"] = str(table.metadata.format_version)
523
567
  custom_properties["partition-spec"] = str(self._get_partition_aspect(table))
524
568
  last_modified: Optional[int] = table.metadata.last_updated_ms
525
- if table.current_snapshot():
526
- custom_properties["snapshot-id"] = str(table.current_snapshot().snapshot_id)
527
- custom_properties["manifest-list"] = table.current_snapshot().manifest_list
569
+ if current_snapshot := table.current_snapshot():
570
+ custom_properties["snapshot-id"] = str(current_snapshot.snapshot_id)
571
+ custom_properties["manifest-list"] = current_snapshot.manifest_list
528
572
  if not last_modified:
529
- last_modified = int(table.current_snapshot().timestamp_ms)
573
+ last_modified = int(current_snapshot.timestamp_ms)
530
574
  if "created-at" in custom_properties:
531
575
  try:
532
576
  dt = dateutil_parser.isoparse(custom_properties["created-at"])
@@ -792,9 +836,6 @@ class ToAvroSchemaIcebergVisitor(SchemaVisitorPerPrimitiveType[Dict[str, Any]]):
792
836
  "native_data_type": str(timestamp_type),
793
837
  }
794
838
 
795
- # visit_timestamptz() is required when using pyiceberg >= 0.5.0, which is essentially a duplicate
796
- # of visit_timestampz(). The function has been renamed from visit_timestampz().
797
- # Once Datahub can upgrade its pyiceberg dependency to >=0.5.0, the visit_timestampz() function can be safely removed.
798
839
  def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> Dict[str, Any]:
799
840
  # Avro supports 2 types of timestamp:
800
841
  # - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
@@ -811,22 +852,6 @@ class ToAvroSchemaIcebergVisitor(SchemaVisitorPerPrimitiveType[Dict[str, Any]]):
811
852
  "native_data_type": str(timestamptz_type),
812
853
  }
813
854
 
814
- def visit_timestampz(self, timestamptz_type: TimestamptzType) -> Dict[str, Any]:
815
- # Avro supports 2 types of timestamp:
816
- # - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
817
- # - Local Timestamp: represents a timestamp in a local timezone, regardless of what specific time zone is considered local
818
- # utcAdjustment: bool = True
819
- return {
820
- "type": "long",
821
- "logicalType": "timestamp-micros",
822
- # Commented out since Avro's Python implementation (1.11.0) does not support local-timestamp-micros, even though it exists in the spec.
823
- # See bug report: https://issues.apache.org/jira/browse/AVRO-3476 and PR https://github.com/apache/avro/pull/1634
824
- # "logicalType": "timestamp-micros"
825
- # if timestamp_type.adjust_to_utc
826
- # else "local-timestamp-micros",
827
- "native_data_type": str(timestamptz_type),
828
- }
829
-
830
855
  def visit_string(self, string_type: StringType) -> Dict[str, Any]:
831
856
  return {
832
857
  "type": "string",
@@ -845,3 +870,42 @@ class ToAvroSchemaIcebergVisitor(SchemaVisitorPerPrimitiveType[Dict[str, Any]]):
845
870
  "type": "bytes",
846
871
  "native_data_type": str(binary_type),
847
872
  }
873
+
874
+ def visit_timestamp_ns(self, timestamp_ns_type: Any) -> Dict[str, Any]:
875
+ # Handle nanosecond precision timestamps
876
+ # Avro supports 2 types of timestamp:
877
+ # - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
878
+ # - Local Timestamp: represents a timestamp in a local timezone, regardless of what specific time zone is considered local
879
+ return {
880
+ "type": "long",
881
+ "logicalType": "timestamp-micros",
882
+ # Commented out since Avro's Python implementation (1.11.0) does not support local-timestamp-micros, even though it exists in the spec.
883
+ # See bug report: https://issues.apache.org/jira/browse/AVRO-3476 and PR https://github.com/apache/avro/pull/1634
884
+ # "logicalType": "timestamp-micros"
885
+ # if timestamp_ns_type.adjust_to_utc
886
+ # else "local-timestamp-micros",
887
+ "native_data_type": str(timestamp_ns_type),
888
+ }
889
+
890
+ def visit_timestamptz_ns(self, timestamptz_ns_type: Any) -> Dict[str, Any]:
891
+ # Handle nanosecond precision timestamps with timezone
892
+ # Avro supports 2 types of timestamp:
893
+ # - Timestamp: independent of a particular timezone or calendar (TZ information is lost)
894
+ # - Local Timestamp: represents a timestamp in a local timezone, regardless of what specific time zone is considered local
895
+ return {
896
+ "type": "long",
897
+ "logicalType": "timestamp-micros",
898
+ # Commented out since Avro's Python implementation (1.11.0) does not support local-timestamp-micros, even though it exists in the spec.
899
+ # See bug report: https://issues.apache.org/jira/browse/AVRO-3476 and PR https://github.com/apache/avro/pull/1634
900
+ # "logicalType": "timestamp-micros"
901
+ # if timestamptz_ns_type.adjust_to_utc
902
+ # else "local-timestamp-micros",
903
+ "native_data_type": str(timestamptz_ns_type),
904
+ }
905
+
906
+ def visit_unknown(self, unknown_type: Any) -> Dict[str, Any]:
907
+ # Handle unknown types
908
+ return {
909
+ "type": "string",
910
+ "native_data_type": str(unknown_type),
911
+ }
@@ -12,6 +12,7 @@ from pyiceberg.types import (
12
12
  IcebergType,
13
13
  IntegerType,
14
14
  LongType,
15
+ PrimitiveType,
15
16
  TimestampType,
16
17
  TimestamptzType,
17
18
  TimeType,
@@ -22,6 +23,7 @@ from pyiceberg.utils.datetime import (
22
23
  to_human_timestamp,
23
24
  to_human_timestamptz,
24
25
  )
26
+ from typing_extensions import TypeGuard
25
27
 
26
28
  from datahub.emitter.mce_builder import get_sys_time
27
29
  from datahub.ingestion.source.iceberg.iceberg_common import (
@@ -65,7 +67,7 @@ class IcebergProfiler:
65
67
  aggregated_values: Dict[int, Any],
66
68
  manifest_values: Dict[int, bytes],
67
69
  ) -> None:
68
- for field_id, value_encoded in manifest_values.items(): # type: int, Any
70
+ for field_id, value_encoded in manifest_values.items():
69
71
  try:
70
72
  field = schema.find_field(field_id)
71
73
  except ValueError:
@@ -240,7 +242,7 @@ class IcebergProfiler:
240
242
  return None
241
243
 
242
244
  @staticmethod
243
- def _is_numeric_type(type: IcebergType) -> bool:
245
+ def _is_numeric_type(type: IcebergType) -> TypeGuard[PrimitiveType]:
244
246
  return isinstance(
245
247
  type,
246
248
  (
@@ -167,7 +167,7 @@ class AzureADSourceReport(StaleEntityRemovalSourceReport):
167
167
  @config_class(AzureADConfig)
168
168
  @support_status(SupportStatus.CERTIFIED)
169
169
  @capability(
170
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
170
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
171
171
  )
172
172
  class AzureADSource(StatefulIngestionSourceBase):
173
173
  """
@@ -41,7 +41,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
41
41
  )
42
42
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
43
43
  from datahub.metadata.schema_classes import (
44
- ChangeTypeClass,
45
44
  CorpGroupInfoClass,
46
45
  CorpUserInfoClass,
47
46
  GroupMembershipClass,
@@ -202,7 +201,7 @@ class OktaSourceReport(StaleEntityRemovalSourceReport):
202
201
  @support_status(SupportStatus.CERTIFIED)
203
202
  @capability(SourceCapability.DESCRIPTIONS, "Optionally enabled via configuration")
204
203
  @capability(
205
- SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
204
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
206
205
  )
207
206
  class OktaSource(StatefulIngestionSourceBase):
208
207
  """
@@ -332,18 +331,12 @@ class OktaSource(StatefulIngestionSourceBase):
332
331
  yield MetadataWorkUnit(id=wu_id, mce=mce)
333
332
 
334
333
  yield MetadataChangeProposalWrapper(
335
- entityType="corpGroup",
336
334
  entityUrn=datahub_corp_group_snapshot.urn,
337
- changeType=ChangeTypeClass.UPSERT,
338
- aspectName="origin",
339
335
  aspect=OriginClass(OriginTypeClass.EXTERNAL, "OKTA"),
340
336
  ).as_workunit()
341
337
 
342
338
  yield MetadataChangeProposalWrapper(
343
- entityType="corpGroup",
344
339
  entityUrn=datahub_corp_group_snapshot.urn,
345
- changeType=ChangeTypeClass.UPSERT,
346
- aspectName="status",
347
340
  aspect=StatusClass(removed=False),
348
341
  ).as_workunit()
349
342
 
@@ -418,18 +411,12 @@ class OktaSource(StatefulIngestionSourceBase):
418
411
  yield MetadataWorkUnit(id=wu_id, mce=mce)
419
412
 
420
413
  yield MetadataChangeProposalWrapper(
421
- entityType="corpuser",
422
414
  entityUrn=datahub_corp_user_snapshot.urn,
423
- changeType=ChangeTypeClass.UPSERT,
424
- aspectName="origin",
425
415
  aspect=OriginClass(OriginTypeClass.EXTERNAL, "OKTA"),
426
416
  ).as_workunit()
427
417
 
428
418
  yield MetadataChangeProposalWrapper(
429
- entityType="corpuser",
430
419
  entityUrn=datahub_corp_user_snapshot.urn,
431
- changeType=ChangeTypeClass.UPSERT,
432
- aspectName="status",
433
420
  aspect=StatusClass(removed=False),
434
421
  ).as_workunit()
435
422
 
@@ -189,6 +189,22 @@ class KafkaConnectionTest:
189
189
  SourceCapability.SCHEMA_METADATA,
190
190
  "Schemas associated with each topic are extracted from the schema registry. Avro and Protobuf (certified), JSON (incubating). Schema references are supported.",
191
191
  )
192
+ @capability(
193
+ SourceCapability.DATA_PROFILING,
194
+ "Not supported",
195
+ supported=False,
196
+ )
197
+ @capability(
198
+ SourceCapability.LINEAGE_COARSE,
199
+ "Not supported. If you use Kafka Connect, the kafka-connect source can generate lineage.",
200
+ supported=False,
201
+ )
202
+ @capability(
203
+ SourceCapability.LINEAGE_FINE,
204
+ "Not supported",
205
+ supported=False,
206
+ )
207
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
192
208
  class KafkaSource(StatefulIngestionSourceBase, TestableSource):
193
209
  """
194
210
  This plugin extracts the following:
@@ -4,7 +4,7 @@ from typing import Dict, Iterable, List, Optional
4
4
 
5
5
  from pydantic.fields import Field
6
6
 
7
- from datahub.configuration.common import AllowDenyPattern, ConfigModel
7
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel, LaxStr
8
8
  from datahub.configuration.source_common import (
9
9
  DatasetLineageProviderConfigBase,
10
10
  PlatformInstanceConfigMixin,
@@ -29,7 +29,7 @@ CONNECTOR_CLASS = "connector.class"
29
29
  class ProvidedConfig(ConfigModel):
30
30
  provider: str
31
31
  path_key: str
32
- value: str
32
+ value: LaxStr
33
33
 
34
34
 
35
35
  class GenericConnectorConfig(ConfigModel):