acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/cli/docker_cli.py CHANGED
@@ -20,6 +20,7 @@ import requests
20
20
  from expandvars import expandvars
21
21
  from requests_file import FileAdapter
22
22
 
23
+ from datahub._version import __version__, is_dev_mode, nice_version_name
23
24
  from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
24
25
  from datahub.cli.docker_check import (
25
26
  DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS,
@@ -28,45 +29,87 @@ from datahub.cli.docker_check import (
28
29
  DockerComposeVersionError,
29
30
  QuickstartStatus,
30
31
  check_docker_quickstart,
32
+ check_upgrade_supported,
31
33
  get_docker_client,
32
34
  run_quickstart_preflight_checks,
33
35
  )
34
- from datahub.cli.quickstart_versioning import QuickstartVersionMappingConfig
36
+ from datahub.cli.quickstart_versioning import (
37
+ QuickstartVersionMappingConfig,
38
+ )
39
+ from datahub.configuration.env_vars import get_docker_compose_base
35
40
  from datahub.ingestion.run.pipeline import Pipeline
36
41
  from datahub.telemetry import telemetry
37
42
  from datahub.upgrade import upgrade
38
43
  from datahub.utilities.perf_timer import PerfTimer
39
44
 
40
45
  logger = logging.getLogger(__name__)
41
- _ClickPositiveInt = click.IntRange(min=1)
42
46
 
43
- NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
44
- "docker/quickstart/docker-compose.quickstart.yml"
45
- )
46
- ELASTIC_QUICKSTART_COMPOSE_FILE = (
47
- "docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
48
- )
49
- NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
50
- "docker/quickstart/docker-compose-m1.quickstart.yml"
51
- )
52
- ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
53
- "docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
54
- )
55
- CONSUMERS_QUICKSTART_COMPOSE_FILE = (
56
- "docker/quickstart/docker-compose.consumers.quickstart.yml"
57
- )
58
- ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
59
- "docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
60
- )
61
- KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = (
62
- "docker/quickstart/docker-compose.kafka-setup.quickstart.yml"
63
- )
47
+ _ClickPositiveInt = click.IntRange(min=1)
64
48
 
49
+ QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose.quickstart-profile.yml"
65
50
 
66
51
  _QUICKSTART_MAX_WAIT_TIME = datetime.timedelta(minutes=10)
67
52
  _QUICKSTART_UP_TIMEOUT = datetime.timedelta(seconds=100)
68
53
  _QUICKSTART_STATUS_CHECK_INTERVAL = datetime.timedelta(seconds=2)
69
54
 
55
+ MIGRATION_REQUIRED_INSTRUCTIONS = f"""
56
+ Your existing DataHub server was installed with an \
57
+ older CLI and is incompatible with the current CLI (version {nice_version_name}).
58
+
59
+ Required steps to upgrade:
60
+ 1. Backup your data (recommended): datahub docker quickstart --backup
61
+ Guide: https://docs.datahub.com/docs/quickstart#back-up-datahub
62
+
63
+ 2. Remove old installation: datahub docker nuke
64
+
65
+ 3. Start fresh installation: datahub docker quickstart
66
+
67
+ 4. Restore data:
68
+ datahub docker quickstart --restore
69
+
70
+ ⚠️ Without backup, all existing data will be lost.
71
+
72
+ For fresh start (if data is not needed):
73
+ 1. Remove installation:
74
+ datahub docker nuke
75
+
76
+ 2. Start fresh:
77
+ datahub docker quickstart
78
+ """
79
+
80
+ REPAIR_REQUIRED_INSTRUCTIONS = f"""
81
+ Unhealthy DataHub Installation Detected
82
+
83
+ Your DataHub installation has issues that cannot be fixed with the current CLI.
84
+
85
+ Your options:
86
+
87
+ OPTION 1 - Preserve data (if needed):
88
+ 1. Downgrade CLI to version 1.1:
89
+ pip install acryl-datahub==1.1
90
+ 2. Fix the installation:
91
+ datahub docker quickstart
92
+ 3. Create backup:
93
+ datahub docker quickstart --backup
94
+ 4. Upgrade CLI back:
95
+ pip install acryl-datahub=={nice_version_name()}
96
+ 5. Migrate:
97
+ datahub docker nuke && datahub docker quickstart
98
+ 6. Restore data:
99
+ datahub docker quickstart --restore
100
+
101
+ OPTION 2 - Fresh start (if data not needed):
102
+ 1. Remove installation:
103
+ datahub docker nuke
104
+ 2. Start fresh:
105
+ datahub docker quickstart
106
+
107
+ ⚠️ The current CLI cannot repair installations created by older versions.
108
+
109
+ Additional information on backup and restore: https://docs.datahub.com/docs/quickstart#back-up-datahub
110
+ Troubleshooting guide: https://docs.datahub.com/docs/troubleshooting/quickstart
111
+ """
112
+
70
113
 
71
114
  class Architectures(Enum):
72
115
  x86 = "x86"
@@ -89,6 +132,14 @@ def _docker_subprocess_env() -> Dict[str, str]:
89
132
  return env
90
133
 
91
134
 
135
+ def show_migration_instructions():
136
+ click.secho(MIGRATION_REQUIRED_INSTRUCTIONS, fg="red")
137
+
138
+
139
+ def show_repair_instructions():
140
+ click.secho(REPAIR_REQUIRED_INSTRUCTIONS, fg="red")
141
+
142
+
92
143
  @click.group()
93
144
  def docker() -> None:
94
145
  """Helper commands for setting up and interacting with a local
@@ -97,19 +148,22 @@ def docker() -> None:
97
148
 
98
149
 
99
150
  @docker.command()
100
- @upgrade.check_upgrade
101
- @telemetry.with_telemetry()
102
151
  def check() -> None:
103
152
  """Check that the Docker containers are healthy"""
104
153
  status = check_docker_quickstart()
154
+
105
155
  if status.is_ok():
106
156
  click.secho("✔ No issues detected", fg="green")
157
+ if status.running_unsupported_version:
158
+ show_migration_instructions()
107
159
  else:
160
+ if status.running_unsupported_version:
161
+ show_repair_instructions()
108
162
  raise status.to_exception("The following issues were detected:")
109
163
 
110
164
 
111
- def is_m1() -> bool:
112
- """Check whether we are running on an M1 machine"""
165
+ def is_apple_silicon() -> bool:
166
+ """Check whether we are running on an Apple Silicon machine"""
113
167
  try:
114
168
  return (
115
169
  platform.uname().machine == "arm64" and platform.uname().system == "Darwin"
@@ -119,52 +173,11 @@ def is_m1() -> bool:
119
173
  return False
120
174
 
121
175
 
122
- def is_arch_m1(arch: Architectures) -> bool:
123
- return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
124
-
125
-
126
- def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
127
- if graph_service_override is not None:
128
- if graph_service_override == "elasticsearch":
129
- click.echo("Starting with elasticsearch due to graph-service-impl param\n")
130
- return False
131
- if graph_service_override == "neo4j":
132
- click.echo("Starting with neo4j due to graph-service-impl param\n")
133
- return True
134
- else:
135
- click.secho(
136
- graph_service_override
137
- + " is not a valid graph service option. Choose either `neo4j` or "
138
- "`elasticsearch`\n",
139
- fg="red",
140
- )
141
- raise ValueError(f"invalid graph service option: {graph_service_override}")
142
- with get_docker_client() as client:
143
- if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
144
- click.echo(
145
- "Datahub Neo4j volume found, starting with neo4j as graph service.\n"
146
- "If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
147
- )
148
- return True
149
-
150
- logger.debug(
151
- "No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
152
- "To use neo4j as a graph backend, run \n"
153
- "`datahub docker quickstart --graph-service-impl neo4j`"
154
- "\nfrom the root of the datahub repo\n"
155
- )
156
- return False
157
-
158
-
159
176
  def _set_environment_variables(
160
177
  version: Optional[str],
161
- mysql_version: Optional[str],
162
178
  mysql_port: Optional[int],
163
- zk_port: Optional[int],
164
179
  kafka_broker_port: Optional[int],
165
- schema_registry_port: Optional[int],
166
180
  elastic_port: Optional[int],
167
- kafka_setup: Optional[bool],
168
181
  ) -> None:
169
182
  if version is not None:
170
183
  if not version.startswith("v") and "." in version:
@@ -173,24 +186,25 @@ def _set_environment_variables(
173
186
  )
174
187
  version = f"v{version}"
175
188
  os.environ["DATAHUB_VERSION"] = version
176
- if mysql_version is not None:
177
- os.environ["DATAHUB_MYSQL_VERSION"] = mysql_version
178
189
  if mysql_port is not None:
179
190
  os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port)
180
191
 
181
- if zk_port is not None:
182
- os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port)
183
-
184
192
  if kafka_broker_port is not None:
185
193
  os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port)
186
194
 
187
- if schema_registry_port is not None:
188
- os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port)
189
-
190
195
  if elastic_port is not None:
191
196
  os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port)
192
- if kafka_setup:
193
- os.environ["DATAHUB_PRECREATE_TOPICS"] = "true"
197
+
198
+ os.environ["METADATA_SERVICE_AUTH_ENABLED"] = "false"
199
+
200
+ cliVersion = nice_version_name()
201
+ if is_dev_mode(): # This should only happen during development/CI.
202
+ cliVersion = __version__.replace(".dev0", "")
203
+ logger.info(
204
+ f"Development build: Using {cliVersion} instead of '{__version__}' version of CLI for UI ingestion"
205
+ )
206
+
207
+ os.environ["UI_INGESTION_DEFAULT_CLI_VERSION"] = cliVersion
194
208
 
195
209
 
196
210
  def _get_default_quickstart_compose_file() -> Optional[str]:
@@ -250,6 +264,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
250
264
  compose = _docker_compose_v2()
251
265
  base_command: List[str] = [
252
266
  *compose,
267
+ "--profile",
268
+ "quickstart",
253
269
  *itertools.chain.from_iterable(
254
270
  ("-f", f"{path}") for path in compose_files_for_stopping
255
271
  ),
@@ -346,12 +362,15 @@ EBEAN_DATASOURCE_HOST=mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}
346
362
  EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
347
363
  EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
348
364
  ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
349
-
365
+ GRAPH_SERVICE_IMPL=elasticsearch
350
366
  KAFKA_BOOTSTRAP_SERVER=broker:29092
351
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}
367
+ KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080${DATAHUB_GMS_BASE_PATH}/schema-registry/api/
368
+ SCHEMA_REGISTRY_TYPE=INTERNAL
352
369
 
353
- ELASTICSEARCH_HOST=elasticsearch
370
+ ELASTICSEARCH_HOST=search
354
371
  ELASTICSEARCH_PORT=${DATAHUB_MAPPED_ELASTIC_PORT:-9200}
372
+ ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
373
+ ELASTICSEARCH_PROTOCOL=http
355
374
 
356
375
  #NEO4J_HOST=http://<your-neo-host>:7474
357
376
  #NEO4J_URI=bolt://<your-neo-host>
@@ -385,6 +404,7 @@ DATAHUB_MAE_CONSUMER_PORT=9091
385
404
  logger.debug(f"Env file contents: {env_fp_reader.read()}")
386
405
 
387
406
  # continue to issue the restore indices command
407
+ # TODO Use --version if passed
388
408
  command = (
389
409
  "docker pull acryldata/datahub-upgrade:${DATAHUB_VERSION:-head}"
390
410
  + f" && docker run --network datahub_network --env-file {env_fp.name} "
@@ -412,12 +432,16 @@ DATAHUB_MAE_CONSUMER_PORT=9091
412
432
  return result.returncode
413
433
 
414
434
 
435
+ # TODO: Do we really need this? If someone wants to use a different arg, they can still pass the standard docker env var DOCKER_DEFAULT_PLATFORM
436
+ # We dont really need to select a different image unlike earlier (mysql vs mariadb) since we do publish both archs for all images (or are available for external images).
415
437
  def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
416
- running_on_m1 = is_m1()
417
- if running_on_m1:
418
- click.secho("Detected M1 machine", fg="yellow")
438
+ running_on_apple_silicon = is_apple_silicon()
439
+ if running_on_apple_silicon:
440
+ click.secho("Detected Apple Silicon", fg="yellow")
419
441
 
420
- quickstart_arch = Architectures.x86 if not running_on_m1 else Architectures.arm64
442
+ quickstart_arch = (
443
+ Architectures.x86 if not running_on_apple_silicon else Architectures.arm64
444
+ )
421
445
  if arch:
422
446
  matched_arch = [a for a in Architectures if arch.lower() == a.value]
423
447
  if not matched_arch:
@@ -437,13 +461,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
437
461
  default="default",
438
462
  help="Datahub version to be deployed. If not set, deploy using the defaults from the quickstart compose. Use 'stable' to start the latest stable version.",
439
463
  )
440
- @click.option(
441
- "--build-locally",
442
- type=bool,
443
- is_flag=True,
444
- default=False,
445
- help="Attempt to build the containers locally before starting",
446
- )
447
464
  @click.option(
448
465
  "--pull-images/--no-pull-images",
449
466
  type=bool,
@@ -466,13 +483,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
466
483
  default=False,
467
484
  help="If true, the docker-compose logs will be printed to console if something fails",
468
485
  )
469
- @click.option(
470
- "--graph-service-impl",
471
- type=str,
472
- is_flag=False,
473
- default=None,
474
- help="If set, forces docker-compose to use that graph service implementation",
475
- )
476
486
  @click.option(
477
487
  "--mysql-port",
478
488
  type=_ClickPositiveInt,
@@ -480,13 +490,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
480
490
  default=None,
481
491
  help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup",
482
492
  )
483
- @click.option(
484
- "--zk-port",
485
- type=_ClickPositiveInt,
486
- is_flag=False,
487
- default=None,
488
- help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup",
489
- )
490
493
  @click.option(
491
494
  "--kafka-broker-port",
492
495
  type=_ClickPositiveInt,
@@ -494,13 +497,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
494
497
  default=None,
495
498
  help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup",
496
499
  )
497
- @click.option(
498
- "--schema-registry-port",
499
- type=_ClickPositiveInt,
500
- is_flag=False,
501
- default=None,
502
- help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup",
503
- )
504
500
  @click.option(
505
501
  "--elastic-port",
506
502
  type=_ClickPositiveInt,
@@ -558,51 +554,29 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
558
554
  default=False,
559
555
  help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
560
556
  )
561
- @click.option(
562
- "--standalone_consumers",
563
- required=False,
564
- is_flag=True,
565
- default=False,
566
- help="Launches MAE & MCE consumers as stand alone docker containers",
567
- )
568
- @click.option(
569
- "--kafka-setup",
570
- required=False,
571
- is_flag=True,
572
- default=False,
573
- help="Launches Kafka setup job as part of the compose deployment",
574
- )
575
557
  @click.option(
576
558
  "--arch",
577
559
  required=False,
578
560
  help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
579
561
  )
580
- @upgrade.check_upgrade
581
562
  @telemetry.with_telemetry(
582
563
  capture_kwargs=[
583
564
  "version",
584
- "build_locally",
585
565
  "pull_images",
586
566
  "stop",
587
567
  "backup",
588
568
  "restore",
589
569
  "restore_indices",
590
- "standalone_consumers",
591
- "kafka_setup",
592
570
  "arch",
593
571
  ]
594
572
  )
595
573
  def quickstart(
596
574
  version: Optional[str],
597
- build_locally: bool,
598
575
  pull_images: bool,
599
576
  quickstart_compose_file: List[pathlib.Path],
600
577
  dump_logs_on_failure: bool,
601
- graph_service_impl: Optional[str],
602
578
  mysql_port: Optional[int],
603
- zk_port: Optional[int],
604
579
  kafka_broker_port: Optional[int],
605
- schema_registry_port: Optional[int],
606
580
  elastic_port: Optional[int],
607
581
  stop: bool,
608
582
  backup: bool,
@@ -611,8 +585,6 @@ def quickstart(
611
585
  restore_file: str,
612
586
  restore_indices: bool,
613
587
  no_restore_indices: bool,
614
- standalone_consumers: bool,
615
- kafka_setup: bool,
616
588
  arch: Optional[str],
617
589
  ) -> None:
618
590
  """Start an instance of DataHub locally using docker-compose.
@@ -641,8 +613,8 @@ def quickstart(
641
613
  )
642
614
  return
643
615
 
644
- quickstart_arch = detect_quickstart_arch(arch)
645
616
  quickstart_versioning = QuickstartVersionMappingConfig.fetch_quickstart_config()
617
+
646
618
  quickstart_execution_plan = quickstart_versioning.get_quickstart_execution_plan(
647
619
  version
648
620
  )
@@ -668,28 +640,26 @@ def quickstart(
668
640
  download_compose_files(
669
641
  quickstart_compose_file_name,
670
642
  quickstart_compose_file,
671
- graph_service_impl,
672
- kafka_setup,
673
- quickstart_arch,
674
- standalone_consumers,
675
643
  quickstart_execution_plan.composefile_git_ref,
676
644
  )
677
645
 
646
+ # check if running datahub can be upgraded to the latest version.
647
+ if not _check_upgrade_and_show_instructions(quickstart_compose_file):
648
+ sys.exit(1)
649
+
678
650
  # set version
679
651
  _set_environment_variables(
680
652
  version=quickstart_execution_plan.docker_tag,
681
- mysql_version=quickstart_execution_plan.mysql_tag,
682
653
  mysql_port=mysql_port,
683
- zk_port=zk_port,
684
654
  kafka_broker_port=kafka_broker_port,
685
- schema_registry_port=schema_registry_port,
686
655
  elastic_port=elastic_port,
687
- kafka_setup=kafka_setup,
688
656
  )
689
657
 
690
658
  compose = _docker_compose_v2()
691
659
  base_command: List[str] = [
692
660
  *compose,
661
+ "--profile",
662
+ "quickstart",
693
663
  *itertools.chain.from_iterable(
694
664
  ("-f", f"{path}") for path in quickstart_compose_file
695
665
  ),
@@ -697,6 +667,8 @@ def quickstart(
697
667
  DOCKER_COMPOSE_PROJECT_NAME,
698
668
  ]
699
669
 
670
+ click.echo(f"base_command: {base_command}")
671
+
700
672
  # Pull and possibly build the latest containers.
701
673
  try:
702
674
  if pull_images:
@@ -737,15 +709,6 @@ def quickstart(
737
709
  fg="red",
738
710
  )
739
711
 
740
- if build_locally:
741
- logger.info("Building docker images locally...")
742
- subprocess.run(
743
- base_command + ["build", "--pull", "-q"],
744
- check=True,
745
- env=_docker_subprocess_env(),
746
- )
747
- logger.info("Finished building docker images!")
748
-
749
712
  # Start it up! (with retries)
750
713
  click.echo("\nStarting up DataHub...")
751
714
  start_time = datetime.datetime.now()
@@ -830,42 +793,24 @@ def quickstart(
830
793
 
831
794
 
832
795
  def get_docker_compose_base_url(version_tag: str) -> str:
833
- if os.environ.get("DOCKER_COMPOSE_BASE"):
834
- return os.environ["DOCKER_COMPOSE_BASE"]
796
+ docker_compose_base = get_docker_compose_base()
797
+ if docker_compose_base:
798
+ return docker_compose_base
835
799
 
836
800
  return f"https://raw.githubusercontent.com/datahub-project/datahub/{version_tag}"
837
801
 
838
802
 
839
- def get_github_file_url(neo4j: bool, is_m1: bool, release_version_tag: str) -> str:
803
+ def get_github_file_url(release_version_tag: str) -> str:
840
804
  base_url = get_docker_compose_base_url(release_version_tag)
841
- if neo4j:
842
- github_file = (
843
- f"{base_url}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
844
- if not is_m1
845
- else f"{base_url}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
846
- )
847
- else:
848
- github_file = (
849
- f"{base_url}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
850
- if not is_m1
851
- else f"{base_url}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
852
- )
805
+ github_file = f"{base_url}/{QUICKSTART_COMPOSE_FILE}"
853
806
  return github_file
854
807
 
855
808
 
856
809
  def download_compose_files(
857
- quickstart_compose_file_name,
858
- quickstart_compose_file_list,
859
- graph_service_impl,
860
- kafka_setup,
861
- quickstart_arch,
862
- standalone_consumers,
863
- compose_git_ref,
810
+ quickstart_compose_file_name, quickstart_compose_file_list, compose_git_ref
864
811
  ):
865
812
  # download appropriate quickstart file
866
- should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
867
- is_m1 = is_arch_m1(quickstart_arch)
868
- github_file = get_github_file_url(should_use_neo4j, is_m1, compose_git_ref)
813
+ github_file = get_github_file_url(compose_git_ref)
869
814
  # also allow local files
870
815
  request_session = requests.Session()
871
816
  request_session.mount("file://", FileAdapter())
@@ -879,57 +824,14 @@ def download_compose_files(
879
824
  logger.info(f"Fetching docker-compose file {github_file} from GitHub")
880
825
  # Download the quickstart docker-compose file from GitHub.
881
826
  quickstart_download_response = request_session.get(github_file)
827
+ if quickstart_download_response.status_code == 404:
828
+ raise click.ClickException(
829
+ f"Could not find quickstart compose file for version {compose_git_ref}. "
830
+ "Please try a different version or check the version exists at https://github.com/datahub-project/datahub/releases"
831
+ )
882
832
  quickstart_download_response.raise_for_status()
883
833
  tmp_file.write(quickstart_download_response.content)
884
834
  logger.debug(f"Copied to {path}")
885
- if standalone_consumers:
886
- base_url = get_docker_compose_base_url(compose_git_ref)
887
- consumer_github_file = (
888
- f"{base_url}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
889
- if should_use_neo4j
890
- else f"{base_url}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
891
- )
892
-
893
- default_consumer_compose_file = (
894
- Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
895
- )
896
- with (
897
- open(default_consumer_compose_file, "wb")
898
- if default_consumer_compose_file
899
- else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
900
- ) as tmp_file:
901
- path = pathlib.Path(tmp_file.name)
902
- quickstart_compose_file_list.append(path)
903
- click.echo(
904
- f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
905
- )
906
- # Download the quickstart docker-compose file from GitHub.
907
- quickstart_download_response = request_session.get(consumer_github_file)
908
- quickstart_download_response.raise_for_status()
909
- tmp_file.write(quickstart_download_response.content)
910
- logger.debug(f"Copied to {path}")
911
- if kafka_setup:
912
- base_url = get_docker_compose_base_url(compose_git_ref)
913
- kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}"
914
-
915
- default_kafka_compose_file = (
916
- Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.kafka-setup.yml"
917
- )
918
- with (
919
- open(default_kafka_compose_file, "wb")
920
- if default_kafka_compose_file
921
- else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
922
- ) as tmp_file:
923
- path = pathlib.Path(tmp_file.name)
924
- quickstart_compose_file_list.append(path)
925
- click.echo(
926
- f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub"
927
- )
928
- # Download the quickstart docker-compose file from GitHub.
929
- quickstart_download_response = request_session.get(kafka_setup_github_file)
930
- quickstart_download_response.raise_for_status()
931
- tmp_file.write(quickstart_download_response.content)
932
- logger.debug(f"Copied to {path}")
933
835
 
934
836
 
935
837
  def valid_restore_options(
@@ -963,7 +865,7 @@ def valid_restore_options(
963
865
  default=None,
964
866
  help="The token to be used when ingesting, used when datahub is deployed with METADATA_SERVICE_AUTH_ENABLED=true",
965
867
  )
966
- @telemetry.with_telemetry()
868
+ @upgrade.check_upgrade
967
869
  def ingest_sample_data(token: Optional[str]) -> None:
968
870
  """Ingest sample data into a running DataHub instance."""
969
871
 
@@ -1031,3 +933,25 @@ def nuke(keep_data: bool) -> None:
1031
933
  click.echo(f"Removing networks in the {DOCKER_COMPOSE_PROJECT_NAME} project")
1032
934
  for network in client.networks.list(filters=DATAHUB_COMPOSE_PROJECT_FILTER):
1033
935
  network.remove()
936
+
937
+
938
+ def _check_upgrade_and_show_instructions(
939
+ quickstart_compose_file: List[pathlib.Path],
940
+ ) -> bool:
941
+ """Check if running datahub can be upgraded to the latest version and show appropriate instructions.
942
+
943
+ Args:
944
+ quickstart_compose_file: List of compose file paths
945
+
946
+ Returns:
947
+ bool: True if upgrade is supported, False otherwise
948
+ """
949
+ quickstart_status = check_docker_quickstart()
950
+
951
+ if not check_upgrade_supported(quickstart_compose_file, quickstart_status):
952
+ if quickstart_status.is_ok():
953
+ show_migration_instructions()
954
+ else:
955
+ show_repair_instructions()
956
+ return False
957
+ return True
datahub/cli/exists_cli.py CHANGED
@@ -7,7 +7,6 @@ from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.ingestion.graph.client import get_default_graph
9
9
  from datahub.ingestion.graph.config import ClientMode
10
- from datahub.telemetry import telemetry
11
10
  from datahub.upgrade import upgrade
12
11
 
13
12
  logger = logging.getLogger(__name__)
@@ -23,7 +22,6 @@ def exists() -> None:
23
22
  @click.option("--urn", required=False, type=str)
24
23
  @click.pass_context
25
24
  @upgrade.check_upgrade
26
- @telemetry.with_telemetry()
27
25
  def urn(ctx: Any, urn: Optional[str]) -> None:
28
26
  """
29
27
  Get metadata for an entity with an optional list of aspects to project.
datahub/cli/get_cli.py CHANGED
@@ -8,7 +8,6 @@ from click_default_group import DefaultGroup
8
8
  from datahub.cli.cli_utils import get_aspects_for_entity
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
10
  from datahub.ingestion.graph.config import ClientMode
11
- from datahub.telemetry import telemetry
12
11
  from datahub.upgrade import upgrade
13
12
 
14
13
  logger = logging.getLogger(__name__)
@@ -32,7 +31,6 @@ def get() -> None:
32
31
  )
33
32
  @click.pass_context
34
33
  @upgrade.check_upgrade
35
- @telemetry.with_telemetry()
36
34
  def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
37
35
  """
38
36
  Get metadata for an entity with an optional list of aspects to project.