acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/dataset.py CHANGED
@@ -26,12 +26,14 @@ from datahub.sdk._shared import (
26
26
  HasInstitutionalMemory,
27
27
  HasOwnership,
28
28
  HasPlatformInstance,
29
+ HasStructuredProperties,
29
30
  HasSubtype,
30
31
  HasTags,
31
32
  HasTerms,
32
33
  LinksInputType,
33
34
  OwnersInputType,
34
35
  ParentContainerInputType,
36
+ StructuredPropertyInputType,
35
37
  TagInputType,
36
38
  TagsInputType,
37
39
  TermInputType,
@@ -44,6 +46,10 @@ from datahub.sdk.entity import Entity, ExtraAspectsType
44
46
  from datahub.utilities.sentinels import Unset, unset
45
47
 
46
48
  SchemaFieldInputType: TypeAlias = Union[
49
+ # There is no Enum variant for schema field types because that would force users to do a mapping
50
+ # to our enum from the raw source type, so additional complexity on their side.
51
+ # To avoid that, the raw source native type can be provided as a string,
52
+ # and we will do the mapping internally (in sql_types.py)
47
53
  Tuple[str, str], # (name, type)
48
54
  Tuple[str, str, str], # (name, type, description)
49
55
  models.SchemaFieldClass,
@@ -70,6 +76,11 @@ UpstreamLineageInputType: TypeAlias = Union[
70
76
  Dict[DatasetUrnOrStr, ColumnLineageMapping],
71
77
  ]
72
78
 
79
+ ViewDefinitionInputType: TypeAlias = Union[
80
+ str,
81
+ models.ViewPropertiesClass,
82
+ ]
83
+
73
84
 
74
85
  def _parse_upstream_input(
75
86
  upstream_input: UpstreamInputType,
@@ -87,7 +98,7 @@ def _parse_upstream_input(
87
98
  assert_never(upstream_input)
88
99
 
89
100
 
90
- def _parse_cll_mapping(
101
+ def parse_cll_mapping(
91
102
  *,
92
103
  upstream: DatasetUrnOrStr,
93
104
  downstream: DatasetUrnOrStr,
@@ -142,7 +153,7 @@ def _parse_upstream_lineage_input(
142
153
  )
143
154
  )
144
155
  cll.extend(
145
- _parse_cll_mapping(
156
+ parse_cll_mapping(
146
157
  upstream=dataset_urn,
147
158
  downstream=downstream_urn,
148
159
  cll_mapping=column_lineage,
@@ -428,12 +439,25 @@ class Dataset(
428
439
  HasTags,
429
440
  HasTerms,
430
441
  HasDomain,
442
+ HasStructuredProperties,
431
443
  Entity,
432
444
  ):
445
+ """Represents a dataset in DataHub.
446
+
447
+ A dataset represents a collection of data, such as a table, view, or file.
448
+ This class provides methods for managing dataset metadata including schema,
449
+ lineage, and various aspects like ownership, tags, and terms.
450
+ """
451
+
433
452
  __slots__ = ()
434
453
 
435
454
  @classmethod
436
455
  def get_urn_type(cls) -> Type[DatasetUrn]:
456
+ """Get the URN type for datasets.
457
+
458
+ Returns:
459
+ The DatasetUrn class.
460
+ """
437
461
  return DatasetUrn
438
462
 
439
463
  def __init__(
@@ -452,6 +476,7 @@ class Dataset(
452
476
  custom_properties: Optional[Dict[str, str]] = None,
453
477
  created: Optional[datetime] = None,
454
478
  last_modified: Optional[datetime] = None,
479
+ view_definition: Optional[ViewDefinitionInputType] = None,
455
480
  # Standard aspects.
456
481
  parent_container: ParentContainerInputType | Unset = unset,
457
482
  subtype: Optional[str] = None,
@@ -459,13 +484,39 @@ class Dataset(
459
484
  links: Optional[LinksInputType] = None,
460
485
  tags: Optional[TagsInputType] = None,
461
486
  terms: Optional[TermsInputType] = None,
462
- # TODO structured_properties
463
487
  domain: Optional[DomainInputType] = None,
464
- extra_aspects: ExtraAspectsType = None,
465
488
  # Dataset-specific aspects.
466
489
  schema: Optional[SchemaFieldsInputType] = None,
467
490
  upstreams: Optional[models.UpstreamLineageClass] = None,
491
+ structured_properties: Optional[StructuredPropertyInputType] = None,
492
+ extra_aspects: ExtraAspectsType = None,
468
493
  ):
494
+ """Initialize a new Dataset instance.
495
+
496
+ Args:
497
+ platform: The platform this dataset belongs to (e.g. "mysql", "snowflake").
498
+ name: The name of the dataset.
499
+ platform_instance: Optional platform instance identifier.
500
+ env: The environment this dataset belongs to (default: DEFAULT_ENV).
501
+ description: Optional description of the dataset.
502
+ display_name: Optional display name for the dataset.
503
+ qualified_name: Optional qualified name for the dataset.
504
+ external_url: Optional URL to external documentation or source.
505
+ custom_properties: Optional dictionary of custom properties.
506
+ created: Optional creation timestamp.
507
+ last_modified: Optional last modification timestamp.
508
+ view_definition: Optional view definition for the dataset.
509
+ parent_container: Optional parent container for this dataset.
510
+ subtype: Optional subtype of the dataset.
511
+ owners: Optional list of owners.
512
+ links: Optional list of links.
513
+ tags: Optional list of tags.
514
+ terms: Optional list of glossary terms.
515
+ domain: Optional domain this dataset belongs to.
516
+ extra_aspects: Optional list of additional aspects.
517
+ schema: Optional schema definition for the dataset.
518
+ upstreams: Optional upstream lineage information.
519
+ """
469
520
  urn = DatasetUrn.create_from_ids(
470
521
  platform_id=platform,
471
522
  table_name=name,
@@ -496,6 +547,8 @@ class Dataset(
496
547
  self.set_created(created)
497
548
  if last_modified is not None:
498
549
  self.set_last_modified(last_modified)
550
+ if view_definition is not None:
551
+ self.set_view_definition(view_definition)
499
552
 
500
553
  if parent_container is not unset:
501
554
  self._set_container(parent_container)
@@ -511,6 +564,9 @@ class Dataset(
511
564
  self.set_terms(terms)
512
565
  if domain is not None:
513
566
  self.set_domain(domain)
567
+ if structured_properties is not None:
568
+ for key, value in structured_properties.items():
569
+ self.set_structured_property(property_urn=key, values=value)
514
570
 
515
571
  @classmethod
516
572
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -539,6 +595,11 @@ class Dataset(
539
595
 
540
596
  @property
541
597
  def description(self) -> Optional[str]:
598
+ """Get the description of the dataset.
599
+
600
+ Returns:
601
+ The description if set, None otherwise.
602
+ """
542
603
  editable_props = self._get_editable_props()
543
604
  return first_non_null(
544
605
  [
@@ -548,6 +609,15 @@ class Dataset(
548
609
  )
549
610
 
550
611
  def set_description(self, description: str) -> None:
612
+ """Set the description of the dataset.
613
+
614
+ Args:
615
+ description: The description to set.
616
+
617
+ Note:
618
+ If called during ingestion, this will warn if overwriting
619
+ a non-ingestion description.
620
+ """
551
621
  if is_ingestion_attribution():
552
622
  editable_props = self._get_editable_props()
553
623
  if editable_props is not None and editable_props.description is not None:
@@ -565,46 +635,136 @@ class Dataset(
565
635
 
566
636
  @property
567
637
  def display_name(self) -> Optional[str]:
638
+ """Get the display name of the dataset.
639
+
640
+ Returns:
641
+ The display name if set, None otherwise.
642
+ """
568
643
  return self._ensure_dataset_props().name
569
644
 
570
645
  def set_display_name(self, display_name: str) -> None:
646
+ """Set the display name of the dataset.
647
+
648
+ Args:
649
+ display_name: The display name to set.
650
+ """
571
651
  self._ensure_dataset_props().name = display_name
572
652
 
573
653
  @property
574
654
  def qualified_name(self) -> Optional[str]:
655
+ """Get the qualified name of the dataset.
656
+
657
+ Returns:
658
+ The qualified name if set, None otherwise.
659
+ """
575
660
  return self._ensure_dataset_props().qualifiedName
576
661
 
577
662
  def set_qualified_name(self, qualified_name: str) -> None:
663
+ """Set the qualified name of the dataset.
664
+
665
+ Args:
666
+ qualified_name: The qualified name to set.
667
+ """
578
668
  self._ensure_dataset_props().qualifiedName = qualified_name
579
669
 
580
670
  @property
581
671
  def external_url(self) -> Optional[str]:
672
+ """Get the external URL of the dataset.
673
+
674
+ Returns:
675
+ The external URL if set, None otherwise.
676
+ """
582
677
  return self._ensure_dataset_props().externalUrl
583
678
 
584
679
  def set_external_url(self, external_url: str) -> None:
680
+ """Set the external URL of the dataset.
681
+
682
+ Args:
683
+ external_url: The external URL to set.
684
+ """
585
685
  self._ensure_dataset_props().externalUrl = external_url
586
686
 
587
687
  @property
588
688
  def custom_properties(self) -> Dict[str, str]:
689
+ """Get the custom properties of the dataset.
690
+
691
+ Returns:
692
+ Dictionary of custom properties.
693
+ """
589
694
  return self._ensure_dataset_props().customProperties
590
695
 
591
696
  def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
697
+ """Set the custom properties of the dataset.
698
+
699
+ Args:
700
+ custom_properties: Dictionary of custom properties to set.
701
+ """
592
702
  self._ensure_dataset_props().customProperties = custom_properties
593
703
 
594
704
  @property
595
705
  def created(self) -> Optional[datetime]:
706
+ """Get the creation timestamp of the dataset.
707
+
708
+ Returns:
709
+ The creation timestamp if set, None otherwise.
710
+ """
596
711
  return parse_time_stamp(self._ensure_dataset_props().created)
597
712
 
598
713
  def set_created(self, created: datetime) -> None:
714
+ """Set the creation timestamp of the dataset.
715
+
716
+ Args:
717
+ created: The creation timestamp to set.
718
+ """
599
719
  self._ensure_dataset_props().created = make_time_stamp(created)
600
720
 
601
721
  @property
602
722
  def last_modified(self) -> Optional[datetime]:
723
+ """Get the last modification timestamp of the dataset.
724
+
725
+ Returns:
726
+ The last modification timestamp if set, None otherwise.
727
+ """
603
728
  return parse_time_stamp(self._ensure_dataset_props().lastModified)
604
729
 
605
730
  def set_last_modified(self, last_modified: datetime) -> None:
606
731
  self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
607
732
 
733
+ @property
734
+ def view_definition(self) -> Optional[models.ViewPropertiesClass]:
735
+ """Get the view definition of the dataset.
736
+
737
+ Under typical usage, this will be present if the subtype is "View".
738
+
739
+ Returns:
740
+ The view definition if set, None otherwise.
741
+ """
742
+ return self._get_aspect(models.ViewPropertiesClass)
743
+
744
+ def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
745
+ """Set the view definition of the dataset.
746
+
747
+ If you're setting a view definition, subtype should typically be set to "view".
748
+
749
+ If a string is provided, it will be treated as a SQL view definition. To set
750
+ a custom language or other properties, provide a ViewPropertiesClass object.
751
+
752
+ Args:
753
+ view_definition: The view definition to set.
754
+ """
755
+ if isinstance(view_definition, models.ViewPropertiesClass):
756
+ self._set_aspect(view_definition)
757
+ elif isinstance(view_definition, str):
758
+ self._set_aspect(
759
+ models.ViewPropertiesClass(
760
+ materialized=False,
761
+ viewLogic=view_definition,
762
+ viewLanguage="SQL",
763
+ )
764
+ )
765
+ else:
766
+ assert_never(view_definition)
767
+
608
768
  def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
609
769
  schema_metadata = self._get_aspect(models.SchemaMetadataClass)
610
770
  if schema_metadata is None:
@@ -614,6 +774,11 @@ class Dataset(
614
774
  @property
615
775
  def schema(self) -> List[SchemaField]:
616
776
  # TODO: Add some caching here to avoid iterating over the schema every time.
777
+ """Get the schema fields of the dataset.
778
+
779
+ Returns:
780
+ List of SchemaField objects representing the dataset's schema.
781
+ """
617
782
  schema_dict = self._schema_dict()
618
783
  return [SchemaField(self, field_path) for field_path in schema_dict]
619
784
 
@@ -669,6 +834,17 @@ class Dataset(
669
834
 
670
835
  def __getitem__(self, field_path: str) -> SchemaField:
671
836
  # TODO: Automatically deal with field path v2?
837
+ """Get a schema field by its path.
838
+
839
+ Args:
840
+ field_path: The path of the field to retrieve.
841
+
842
+ Returns:
843
+ A SchemaField instance.
844
+
845
+ Raises:
846
+ SchemaFieldKeyError: If the field is not found.
847
+ """
672
848
  schema_dict = self._schema_dict()
673
849
  if field_path not in schema_dict:
674
850
  raise SchemaFieldKeyError(f"Field {field_path} not found in schema")
datahub/sdk/entity.py CHANGED
@@ -20,9 +20,24 @@ ExtraAspectsType = Union[None, List[AspectTypeVar]]
20
20
 
21
21
 
22
22
  class Entity:
23
+ """Base class for all DataHub entities.
24
+
25
+ This class provides the core functionality for working with DataHub entities,
26
+ including aspect management and URN handling. It should not be instantiated directly;
27
+ instead, use one of its subclasses like Dataset or Container.
28
+ """
29
+
23
30
  __slots__ = ("_urn", "_prev_aspects", "_aspects")
24
31
 
25
32
  def __init__(self, /, urn: Urn):
33
+ """Initialize a new Entity instance.
34
+
35
+ Args:
36
+ urn: The URN that uniquely identifies this entity.
37
+
38
+ Raises:
39
+ SdkUsageError: If this base class is instantiated directly.
40
+ """
26
41
  # This method is not meant for direct usage.
27
42
  if type(self) is Entity:
28
43
  raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
@@ -36,6 +51,15 @@ class Entity:
36
51
 
37
52
  @classmethod
38
53
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
54
+ """Create a new entity instance from graph data.
55
+
56
+ Args:
57
+ urn: The URN of the entity.
58
+ current_aspects: The current aspects of the entity from the graph.
59
+
60
+ Returns:
61
+ A new entity instance initialized with the graph data.
62
+ """
39
63
  # If an init method from a subclass adds required fields, it also needs to override this method.
40
64
  # An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
41
65
  # too hacky for my taste.
@@ -43,6 +67,14 @@ class Entity:
43
67
  return entity._init_from_graph(current_aspects)
44
68
 
45
69
  def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
70
+ """Initialize the entity with aspects from the graph.
71
+
72
+ Args:
73
+ current_aspects: The current aspects of the entity from the graph.
74
+
75
+ Returns:
76
+ The entity instance with initialized aspects.
77
+ """
46
78
  self._prev_aspects = current_aspects
47
79
 
48
80
  self._aspects = {}
@@ -54,14 +86,30 @@ class Entity:
54
86
 
55
87
  @classmethod
56
88
  @abc.abstractmethod
57
- def get_urn_type(cls) -> Type[_SpecificUrn]: ...
89
+ def get_urn_type(cls) -> Type[_SpecificUrn]:
90
+ """Get the URN type for this entity class.
91
+
92
+ Returns:
93
+ The URN type class that corresponds to this entity type.
94
+ """
95
+ ...
58
96
 
59
97
  @classmethod
60
98
  def entity_type_name(cls) -> str:
99
+ """Get the entity type name.
100
+
101
+ Returns:
102
+ The string name of this entity type.
103
+ """
61
104
  return cls.get_urn_type().ENTITY_TYPE
62
105
 
63
106
  @property
64
107
  def urn(self) -> _SpecificUrn:
108
+ """Get the entity's URN.
109
+
110
+ Returns:
111
+ The URN that uniquely identifies this entity.
112
+ """
65
113
  return self._urn
66
114
 
67
115
  def _get_aspect(
@@ -69,22 +117,51 @@ class Entity:
69
117
  aspect_type: Type[AspectTypeVar],
70
118
  /,
71
119
  ) -> Optional[AspectTypeVar]:
120
+ """Get an aspect of the entity by its type.
121
+
122
+ Args:
123
+ aspect_type: The type of aspect to retrieve.
124
+
125
+ Returns:
126
+ The aspect if it exists, None otherwise.
127
+ """
72
128
  return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
73
129
 
74
130
  def _set_aspect(self, value: AspectTypeVar, /) -> None:
131
+ """Set an aspect of the entity.
132
+
133
+ Args:
134
+ value: The aspect to set.
135
+ """
75
136
  self._aspects[value.ASPECT_NAME] = value # type: ignore
76
137
 
77
138
  def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
139
+ """Set a default aspect if it doesn't exist.
140
+
141
+ Args:
142
+ default_aspect: The default aspect to set if none exists.
143
+
144
+ Returns:
145
+ The existing aspect if one exists, otherwise the default aspect.
146
+ """
78
147
  # Similar semantics to dict.setdefault.
79
148
  if existing_aspect := self._get_aspect(type(default_aspect)):
80
149
  return existing_aspect
81
150
  self._set_aspect(default_aspect)
82
151
  return default_aspect
83
152
 
84
- def _as_mcps(
153
+ def as_mcps(
85
154
  self,
86
155
  change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
87
156
  ) -> List[MetadataChangeProposalWrapper]:
157
+ """Convert the entity's aspects to MetadataChangeProposals.
158
+
159
+ Args:
160
+ change_type: The type of change to apply (default: UPSERT).
161
+
162
+ Returns:
163
+ A list of MetadataChangeProposalWrapper objects.
164
+ """
88
165
  urn_str = str(self.urn)
89
166
 
90
167
  mcps = []
@@ -100,13 +177,32 @@ class Entity:
100
177
  return mcps
101
178
 
102
179
  def as_workunits(self) -> List[MetadataWorkUnit]:
103
- return [mcp.as_workunit() for mcp in self._as_mcps()]
180
+ """Convert the entity's aspects to MetadataWorkUnits.
181
+
182
+ Returns:
183
+ A list of MetadataWorkUnit objects.
184
+ """
185
+ return [mcp.as_workunit() for mcp in self.as_mcps()]
104
186
 
105
187
  def _set_extra_aspects(self, extra_aspects: ExtraAspectsType) -> None:
188
+ """Set additional aspects on the entity.
189
+
190
+ Args:
191
+ extra_aspects: List of additional aspects to set.
192
+
193
+ Note:
194
+ This method does not validate for conflicts between extra aspects
195
+ and standard aspects.
196
+ """
106
197
  # TODO: Add validation to ensure that an "extra aspect" does not conflict
107
198
  # with / get overridden by a standard aspect.
108
199
  for aspect in extra_aspects or []:
109
200
  self._set_aspect(aspect)
110
201
 
111
202
  def __repr__(self) -> str:
203
+ """Get a string representation of the entity.
204
+
205
+ Returns:
206
+ A string in the format "EntityClass('urn')".
207
+ """
112
208
  return f"{self.__class__.__name__}('{self.urn}')"