acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -178,7 +178,9 @@ class SACSourceReport(StaleEntityRemovalSourceReport):
178
178
  SourceCapability.LINEAGE_COARSE,
179
179
  "Enabled by default (only for Live Data Models)",
180
180
  )
181
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
181
+ @capability(
182
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
183
+ )
182
184
  @capability(
183
185
  SourceCapability.SCHEMA_METADATA,
184
186
  "Enabled by default (only for Import Data Models)",
@@ -33,7 +33,10 @@ from datahub.ingestion.api.decorators import (
33
33
  )
34
34
  from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
35
35
  from datahub.ingestion.api.workunit import MetadataWorkUnit
36
- from datahub.ingestion.source.common.subtypes import DatasetSubTypes
36
+ from datahub.ingestion.source.common.subtypes import (
37
+ DatasetSubTypes,
38
+ SourceCapabilityModifier,
39
+ )
37
40
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
38
41
  StaleEntityRemovalHandler,
39
42
  StaleEntityRemovalSourceReport,
@@ -107,30 +110,33 @@ class SalesforceConfig(
107
110
  auth: SalesforceAuthType = SalesforceAuthType.USERNAME_PASSWORD
108
111
 
109
112
  # Username, Password Auth
110
- username: Optional[str] = Field(description="Salesforce username")
111
- password: Optional[str] = Field(description="Password for Salesforce user")
113
+ username: Optional[str] = Field(None, description="Salesforce username")
114
+ password: Optional[str] = Field(None, description="Password for Salesforce user")
112
115
  consumer_key: Optional[str] = Field(
113
- description="Consumer key for Salesforce JSON web token access"
116
+ None, description="Consumer key for Salesforce JSON web token access"
114
117
  )
115
118
  private_key: Optional[str] = Field(
116
- description="Private key as a string for Salesforce JSON web token access"
119
+ None, description="Private key as a string for Salesforce JSON web token access"
117
120
  )
118
121
  security_token: Optional[str] = Field(
119
- description="Security token for Salesforce username"
122
+ None, description="Security token for Salesforce username"
120
123
  )
121
124
  # client_id, client_secret not required
122
125
 
123
126
  # Direct - Instance URL, Access Token Auth
124
127
  instance_url: Optional[str] = Field(
125
- description="Salesforce instance url. e.g. https://MyDomainName.my.salesforce.com"
128
+ None,
129
+ description="Salesforce instance url. e.g. https://MyDomainName.my.salesforce.com",
126
130
  )
127
131
  # Flag to indicate whether the instance is production or sandbox
128
132
  is_sandbox: bool = Field(
129
133
  default=False, description="Connect to Sandbox instance of your Salesforce"
130
134
  )
131
- access_token: Optional[str] = Field(description="Access token for instance url")
135
+ access_token: Optional[str] = Field(
136
+ None, description="Access token for instance url"
137
+ )
132
138
 
133
- ingest_tags: Optional[bool] = Field(
139
+ ingest_tags: bool = Field(
134
140
  default=False,
135
141
  description="Ingest Tags from source. This will override Tags entered from UI",
136
142
  )
@@ -144,7 +150,8 @@ class SalesforceConfig(
144
150
  description='Regex patterns for tables/schemas to describe domain_key domain key (domain_key can be any string like "sales".) There can be multiple domain keys specified.',
145
151
  )
146
152
  api_version: Optional[str] = Field(
147
- description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'"
153
+ None,
154
+ description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'",
148
155
  )
149
156
 
150
157
  profiling: SalesforceProfilingConfig = SalesforceProfilingConfig()
@@ -520,7 +527,7 @@ class SalesforceApi:
520
527
 
521
528
  @platform_name("Salesforce")
522
529
  @config_class(SalesforceConfig)
523
- @support_status(SupportStatus.INCUBATING)
530
+ @support_status(SupportStatus.CERTIFIED)
524
531
  @capability(
525
532
  capability_name=SourceCapability.PLATFORM_INSTANCE,
526
533
  description="Can be equivalent to Salesforce organization",
@@ -532,11 +539,11 @@ class SalesforceApi:
532
539
  @capability(
533
540
  capability_name=SourceCapability.DATA_PROFILING,
534
541
  description="Only table level profiling is supported via `profiling.enabled` config field",
542
+ subtype_modifier=[SourceCapabilityModifier.TABLE],
535
543
  )
536
544
  @capability(
537
545
  capability_name=SourceCapability.DELETION_DETECTION,
538
- description="Not supported yet",
539
- supported=False,
546
+ description="Enabled by default via stateful ingestion",
540
547
  )
541
548
  @capability(
542
549
  capability_name=SourceCapability.SCHEMA_METADATA,
@@ -546,6 +553,14 @@ class SalesforceApi:
546
553
  capability_name=SourceCapability.TAGS,
547
554
  description="Enabled by default",
548
555
  )
556
+ @capability(
557
+ capability_name=SourceCapability.LINEAGE_COARSE,
558
+ description="Extract table-level lineage for Salesforce objects",
559
+ subtype_modifier=[
560
+ SourceCapabilityModifier.SALESFORCE_CUSTOM_OBJECT,
561
+ SourceCapabilityModifier.SALESFORCE_STANDARD_OBJECT,
562
+ ],
563
+ )
549
564
  class SalesforceSource(StatefulIngestionSourceBase):
550
565
  def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
551
566
  super().__init__(config, ctx)
@@ -4,7 +4,6 @@ import logging
4
4
  import os
5
5
  import tempfile
6
6
  import unittest
7
- import urllib.request
8
7
  from dataclasses import dataclass
9
8
  from os.path import basename, dirname
10
9
  from pathlib import Path
@@ -12,6 +11,7 @@ from typing import Any, Iterable, List, Optional, Union
12
11
  from urllib.parse import urlparse
13
12
 
14
13
  import jsonref
14
+ import requests
15
15
  from pydantic import AnyHttpUrl, DirectoryPath, FilePath, validator
16
16
  from pydantic.fields import Field
17
17
 
@@ -91,19 +91,18 @@ class JsonSchemaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMix
91
91
  )
92
92
 
93
93
  @validator("path")
94
- def download_http_url_to_temp_file(v):
94
+ def download_http_url_to_temp_file(cls, v):
95
95
  if isinstance(v, AnyHttpUrl):
96
96
  try:
97
- with urllib.request.urlopen(v) as response:
98
- schema_dict = json.load(response)
99
- if not JsonSchemaTranslator._get_id_from_any_schema(schema_dict):
100
- schema_dict["$id"] = str(v)
101
- with tempfile.NamedTemporaryFile(
102
- mode="w", delete=False
103
- ) as tmp_file:
104
- tmp_file.write(json.dumps(schema_dict))
105
- tmp_file.flush()
106
- return tmp_file.name
97
+ response = requests.get(str(v))
98
+ response.raise_for_status()
99
+ schema_dict = response.json()
100
+ if not JsonSchemaTranslator._get_id_from_any_schema(schema_dict):
101
+ schema_dict["$id"] = str(v)
102
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file:
103
+ tmp_file.write(json.dumps(schema_dict))
104
+ tmp_file.flush()
105
+ return tmp_file.name
107
106
  except Exception as e:
108
107
  logger.error(
109
108
  f"Failed to localize url {v} due to {e}. Run with --debug to get full stacktrace"
@@ -353,7 +352,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
353
352
  if self.config.platform_instance:
354
353
  browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
355
354
 
356
- if os.path.isdir(self.config.path):
355
+ if isinstance(self.config.path, Path) and self.config.path.is_dir():
357
356
  for root, _, files in os.walk(self.config.path, topdown=False):
358
357
  for file_name in [f for f in files if f.endswith(".json")]:
359
358
  try:
@@ -373,10 +372,11 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
373
372
 
374
373
  else:
375
374
  try:
375
+ assert isinstance(self.config.path, Path)
376
376
  yield from self._load_one_file(
377
377
  ref_loader,
378
378
  browse_prefix=browse_prefix,
379
- root_dir=Path(os.path.dirname(Path(self.config.path))),
379
+ root_dir=self.config.path.parent,
380
380
  file_name=str(self.config.path),
381
381
  )
382
382
  except Exception as e:
@@ -1,4 +1,4 @@
1
- from collections import Counter
1
+ from collections import Counter, defaultdict
2
2
  from typing import Any, Counter as CounterType, Dict, Sequence, Tuple, Union
3
3
 
4
4
  from typing_extensions import TypedDict
@@ -84,7 +84,7 @@ def is_nullable_collection(
84
84
 
85
85
 
86
86
  def construct_schema(
87
- collection: Sequence[Dict[str, Any]], delimiter: str
87
+ collection: Sequence[Dict[str, Any]], delimiter: str = "."
88
88
  ) -> Dict[Tuple[str, ...], SchemaDescription]:
89
89
  """
90
90
  Construct (infer) a schema from a collection of documents.
@@ -104,9 +104,11 @@ def construct_schema(
104
104
  string to concatenate field names by
105
105
  """
106
106
 
107
- schema: Dict[Tuple[str, ...], BasicSchemaDescription] = {}
107
+ schema: Dict[Tuple[str, ...], BasicSchemaDescription] = defaultdict(
108
+ lambda: {"types": Counter(), "count": 0}
109
+ )
108
110
 
109
- def append_to_schema(doc: Dict[str, Any], parent_prefix: Tuple[str, ...]) -> None:
111
+ def append_to_schema(doc: Dict[str, Any], parent_prefix: Tuple[str, ...]) -> int:
110
112
  """
111
113
  Recursively update the schema with a document, which may/may not contain nested fields.
112
114
 
@@ -118,18 +120,24 @@ def construct_schema(
118
120
  prefix of fields that the document is under, pass an empty tuple when initializing
119
121
  """
120
122
 
123
+ # we want to make sure that parents of nested structures are included first, before their children, so that
124
+ # they are displayed properly in the UI, also in the event of trimming the list (which happens, for example,
125
+ # in mongodb ingestor)
126
+ max_count = 0
121
127
  for key, value in doc.items():
122
128
  new_parent_prefix = parent_prefix + (key,)
123
129
 
124
130
  # if nested value, look at the types within
125
131
  if isinstance(value, dict):
126
- append_to_schema(value, new_parent_prefix)
132
+ max_count = max(append_to_schema(value, new_parent_prefix), max_count)
127
133
  # if array of values, check what types are within
128
134
  if isinstance(value, list):
129
135
  for item in value:
130
136
  # if dictionary, add it as a nested object
131
137
  if isinstance(item, dict):
132
- append_to_schema(item, new_parent_prefix)
138
+ max_count = max(
139
+ append_to_schema(item, new_parent_prefix), max_count
140
+ )
133
141
 
134
142
  # don't record None values (counted towards nullable)
135
143
  if value is not None:
@@ -143,6 +151,14 @@ def construct_schema(
143
151
  # update the type count
144
152
  schema[new_parent_prefix]["types"].update({type(value): 1})
145
153
  schema[new_parent_prefix]["count"] += 1
154
+ max_count = max(schema[new_parent_prefix]["count"], max_count)
155
+
156
+ if parent_prefix != ():
157
+ schema[parent_prefix]["count"] = max(
158
+ schema[parent_prefix]["count"], max_count
159
+ )
160
+
161
+ return max_count
146
162
 
147
163
  for document in collection:
148
164
  append_to_schema(document, ())
@@ -1,3 +1,4 @@
1
+ from copy import deepcopy
1
2
  from datetime import datetime
2
3
  from typing import Dict, List, Optional
3
4
 
@@ -23,6 +24,8 @@ class Workspace(BaseModel):
23
24
 
24
25
  @root_validator(pre=True)
25
26
  def update_values(cls, values: Dict) -> Dict:
27
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
28
+ values = deepcopy(values)
26
29
  # Update name if presonal workspace
27
30
  if values["name"] == "User Folder":
28
31
  values["name"] = "My documents"
@@ -30,6 +30,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
30
30
  from datahub.ingestion.source.common.subtypes import (
31
31
  BIContainerSubTypes,
32
32
  DatasetSubTypes,
33
+ SourceCapabilityModifier,
33
34
  )
34
35
  from datahub.ingestion.source.sigma.config import (
35
36
  PlatformDetail,
@@ -95,7 +96,11 @@ logger = logging.getLogger(__name__)
95
96
  @platform_name("Sigma")
96
97
  @config_class(SigmaSourceConfig)
97
98
  @support_status(SupportStatus.INCUBATING)
98
- @capability(SourceCapability.CONTAINERS, "Enabled by default")
99
+ @capability(
100
+ SourceCapability.CONTAINERS,
101
+ "Enabled by default",
102
+ subtype_modifier=[SourceCapabilityModifier.SIGMA_WORKSPACE],
103
+ )
99
104
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
100
105
  @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default.")
101
106
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@@ -105,6 +110,7 @@ logger = logging.getLogger(__name__)
105
110
  SourceCapability.OWNERSHIP,
106
111
  "Enabled by default, configured using `ingest_owner`",
107
112
  )
113
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
108
114
  class SigmaSource(StatefulIngestionSourceBase, TestableSource):
109
115
  """
110
116
  This plugin extracts the following:
@@ -23,6 +23,7 @@ from datahub.ingestion.api.source import (
23
23
  SourceReport,
24
24
  )
25
25
  from datahub.ingestion.api.workunit import MetadataWorkUnit
26
+ from datahub.ingestion.source.common.subtypes import DatasetSubTypes
26
27
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
27
28
  StaleEntityRemovalHandler,
28
29
  StaleEntityRemovalSourceReport,
@@ -202,38 +203,31 @@ class SlackSourceConfig(
202
203
  description="Bot token for the Slack workspace. Needs `users:read`, `users:read.email`, `users.profile:read`, and `team:read` scopes.",
203
204
  )
204
205
  enrich_user_metadata: bool = Field(
205
- type=bool,
206
- default=True,
206
+ True,
207
207
  description="When enabled, will enrich provisioned DataHub users' metadata with information from Slack.",
208
208
  )
209
209
  ingest_users: bool = Field(
210
- type=bool,
211
- default=True,
210
+ True,
212
211
  description="Whether to ingest users. When set to true, will ingest all users in the Slack workspace (as platform resources) to simplify user enrichment after they are provisioned on DataHub.",
213
212
  )
214
213
  api_requests_per_min: int = Field(
215
- type=int,
216
- default=10,
214
+ 10,
217
215
  description="Number of API requests per minute. Low-level config. Do not tweak unless you are facing any issues.",
218
216
  )
219
217
  ingest_public_channels: bool = Field(
220
- type=bool,
221
- default=False,
218
+ False,
222
219
  description="Whether to ingest public channels. If set to true needs `channels:read` scope.",
223
220
  )
224
221
  channels_iteration_limit: int = Field(
225
- type=int,
226
- default=200,
222
+ 200,
227
223
  description="Limit the number of channels to be ingested in a iteration. Low-level config. Do not tweak unless you are facing any issues.",
228
224
  )
229
225
  channel_min_members: int = Field(
230
- type=int,
231
- default=2,
226
+ 2,
232
227
  description="Ingest channels with at least this many members.",
233
228
  )
234
229
  should_ingest_archived_channels: bool = Field(
235
- type=bool,
236
- default=False,
230
+ False,
237
231
  description="Whether to ingest archived channels.",
238
232
  )
239
233
 
@@ -251,7 +245,7 @@ DATA_PLATFORM_SLACK_URN: str = builder.make_data_platform_urn(PLATFORM_NAME)
251
245
 
252
246
  @platform_name("Slack")
253
247
  @config_class(SlackSourceConfig)
254
- @support_status(SupportStatus.TESTING)
248
+ @support_status(SupportStatus.CERTIFIED)
255
249
  class SlackSource(StatefulIngestionSourceBase):
256
250
  def __init__(self, ctx: PipelineContext, config: SlackSourceConfig):
257
251
  super().__init__(config, ctx)
@@ -493,7 +487,7 @@ class SlackSource(StatefulIngestionSourceBase):
493
487
  mcp=MetadataChangeProposalWrapper(
494
488
  entityUrn=urn_channel,
495
489
  aspect=SubTypesClass(
496
- typeNames=["Slack Channel"],
490
+ typeNames=[DatasetSubTypes.SLACK_CHANNEL],
497
491
  ),
498
492
  ),
499
493
  )
File without changes