acryl-datahub 1.1.1rc3__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (226) hide show
  1. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2559 -2532
  2. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +226 -190
  3. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/dataset/dataset.py +2 -1
  7. datahub/api/entities/external/__init__.py +0 -0
  8. datahub/api/entities/external/external_entities.py +239 -0
  9. datahub/api/entities/external/external_tag.py +145 -0
  10. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  11. datahub/api/entities/external/restricted_text.py +247 -0
  12. datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
  13. datahub/cli/check_cli.py +88 -7
  14. datahub/cli/cli_utils.py +63 -0
  15. datahub/cli/container_cli.py +5 -0
  16. datahub/cli/delete_cli.py +124 -27
  17. datahub/cli/docker_check.py +107 -12
  18. datahub/cli/docker_cli.py +149 -227
  19. datahub/cli/exists_cli.py +0 -2
  20. datahub/cli/get_cli.py +0 -2
  21. datahub/cli/iceberg_cli.py +5 -0
  22. datahub/cli/ingest_cli.py +12 -16
  23. datahub/cli/migrate.py +2 -0
  24. datahub/cli/put_cli.py +1 -4
  25. datahub/cli/quickstart_versioning.py +50 -7
  26. datahub/cli/specific/assertions_cli.py +0 -4
  27. datahub/cli/specific/datacontract_cli.py +0 -3
  28. datahub/cli/specific/dataproduct_cli.py +0 -11
  29. datahub/cli/specific/dataset_cli.py +1 -8
  30. datahub/cli/specific/forms_cli.py +0 -4
  31. datahub/cli/specific/group_cli.py +0 -2
  32. datahub/cli/specific/structuredproperties_cli.py +1 -4
  33. datahub/cli/specific/user_cli.py +0 -2
  34. datahub/cli/state_cli.py +0 -2
  35. datahub/cli/timeline_cli.py +0 -2
  36. datahub/emitter/response_helper.py +86 -1
  37. datahub/emitter/rest_emitter.py +71 -13
  38. datahub/entrypoints.py +4 -3
  39. datahub/ingestion/api/decorators.py +15 -3
  40. datahub/ingestion/api/report.py +332 -3
  41. datahub/ingestion/api/sink.py +3 -0
  42. datahub/ingestion/api/source.py +48 -44
  43. datahub/ingestion/autogenerated/__init__.py +0 -0
  44. datahub/ingestion/autogenerated/capability_summary.json +3449 -0
  45. datahub/ingestion/autogenerated/lineage.json +401 -0
  46. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  47. datahub/ingestion/extractor/schema_util.py +13 -4
  48. datahub/ingestion/glossary/classification_mixin.py +5 -0
  49. datahub/ingestion/graph/client.py +100 -15
  50. datahub/ingestion/graph/config.py +1 -0
  51. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  52. datahub/ingestion/run/pipeline.py +54 -2
  53. datahub/ingestion/sink/datahub_rest.py +13 -0
  54. datahub/ingestion/source/abs/source.py +1 -1
  55. datahub/ingestion/source/aws/aws_common.py +4 -0
  56. datahub/ingestion/source/aws/glue.py +489 -244
  57. datahub/ingestion/source/aws/tag_entities.py +292 -0
  58. datahub/ingestion/source/azure/azure_common.py +2 -2
  59. datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
  60. datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
  61. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
  62. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
  63. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  64. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  65. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  66. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  67. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  68. datahub/ingestion/source/common/subtypes.py +45 -0
  69. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  70. datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
  71. datahub/ingestion/source/datahub/config.py +11 -0
  72. datahub/ingestion/source/datahub/datahub_database_reader.py +187 -35
  73. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  74. datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
  75. datahub/ingestion/source/dbt/dbt_common.py +6 -2
  76. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  77. datahub/ingestion/source/debug/__init__.py +0 -0
  78. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  79. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  80. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  81. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  82. datahub/ingestion/source/dremio/dremio_source.py +94 -81
  83. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  84. datahub/ingestion/source/file.py +3 -0
  85. datahub/ingestion/source/fivetran/fivetran.py +34 -26
  86. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  87. datahub/ingestion/source/ge_data_profiler.py +76 -28
  88. datahub/ingestion/source/ge_profiling_config.py +11 -0
  89. datahub/ingestion/source/hex/api.py +26 -1
  90. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  91. datahub/ingestion/source/identity/azure_ad.py +1 -1
  92. datahub/ingestion/source/identity/okta.py +1 -14
  93. datahub/ingestion/source/kafka/kafka.py +16 -0
  94. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  95. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  96. datahub/ingestion/source/looker/looker_source.py +1 -0
  97. datahub/ingestion/source/mlflow.py +11 -1
  98. datahub/ingestion/source/mock_data/__init__.py +0 -0
  99. datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
  100. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  101. datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
  102. datahub/ingestion/source/nifi.py +1 -1
  103. datahub/ingestion/source/openapi.py +12 -0
  104. datahub/ingestion/source/openapi_parser.py +56 -37
  105. datahub/ingestion/source/powerbi/powerbi.py +1 -5
  106. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  107. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  108. datahub/ingestion/source/preset.py +2 -2
  109. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  110. datahub/ingestion/source/redshift/redshift.py +21 -1
  111. datahub/ingestion/source/redshift/usage.py +4 -3
  112. datahub/ingestion/source/s3/report.py +4 -2
  113. datahub/ingestion/source/s3/source.py +367 -115
  114. datahub/ingestion/source/sac/sac.py +3 -1
  115. datahub/ingestion/source/salesforce.py +6 -3
  116. datahub/ingestion/source/sigma/sigma.py +7 -1
  117. datahub/ingestion/source/slack/slack.py +2 -1
  118. datahub/ingestion/source/snowflake/snowflake_config.py +43 -7
  119. datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
  120. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  121. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  122. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  123. datahub/ingestion/source/snowflake/snowflake_v2.py +33 -8
  124. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  125. datahub/ingestion/source/sql/athena.py +119 -11
  126. datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
  127. datahub/ingestion/source/sql/clickhouse.py +3 -1
  128. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  129. datahub/ingestion/source/sql/hana.py +3 -1
  130. datahub/ingestion/source/sql/hive_metastore.py +3 -11
  131. datahub/ingestion/source/sql/mariadb.py +0 -1
  132. datahub/ingestion/source/sql/mssql/source.py +239 -34
  133. datahub/ingestion/source/sql/mysql.py +0 -1
  134. datahub/ingestion/source/sql/oracle.py +1 -1
  135. datahub/ingestion/source/sql/postgres.py +0 -1
  136. datahub/ingestion/source/sql/sql_common.py +121 -34
  137. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  138. datahub/ingestion/source/sql/teradata.py +997 -235
  139. datahub/ingestion/source/sql/vertica.py +10 -6
  140. datahub/ingestion/source/sql_queries.py +2 -2
  141. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  142. datahub/ingestion/source/superset.py +58 -3
  143. datahub/ingestion/source/tableau/tableau.py +58 -37
  144. datahub/ingestion/source/tableau/tableau_common.py +4 -2
  145. datahub/ingestion/source/tableau/tableau_constant.py +0 -4
  146. datahub/ingestion/source/unity/config.py +5 -0
  147. datahub/ingestion/source/unity/proxy.py +118 -0
  148. datahub/ingestion/source/unity/source.py +195 -17
  149. datahub/ingestion/source/unity/tag_entities.py +295 -0
  150. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  151. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  152. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  153. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  154. datahub/metadata/_internal_schema_classes.py +1446 -559
  155. datahub/metadata/_urns/urn_defs.py +1721 -1553
  156. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  157. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  158. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  159. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  160. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
  161. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  162. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  163. datahub/metadata/schema.avsc +18055 -17802
  164. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  165. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  166. datahub/metadata/schemas/Applications.avsc +38 -0
  167. datahub/metadata/schemas/ChartKey.avsc +1 -0
  168. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  169. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  170. datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
  171. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  172. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  173. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  174. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  175. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
  176. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  177. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  178. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  179. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  180. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  181. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  182. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  183. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  184. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  185. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  186. datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  187. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  188. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  189. datahub/metadata/schemas/LogicalParent.avsc +140 -0
  190. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  191. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  192. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  193. datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
  194. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  195. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  196. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
  197. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  198. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  199. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  200. datahub/sdk/__init__.py +6 -0
  201. datahub/sdk/_all_entities.py +11 -0
  202. datahub/sdk/_shared.py +118 -1
  203. datahub/sdk/chart.py +315 -0
  204. datahub/sdk/container.py +7 -0
  205. datahub/sdk/dashboard.py +432 -0
  206. datahub/sdk/dataflow.py +309 -0
  207. datahub/sdk/datajob.py +367 -0
  208. datahub/sdk/dataset.py +8 -2
  209. datahub/sdk/entity_client.py +90 -2
  210. datahub/sdk/lineage_client.py +683 -82
  211. datahub/sdk/main_client.py +46 -16
  212. datahub/sdk/mlmodel.py +101 -38
  213. datahub/sdk/mlmodelgroup.py +7 -0
  214. datahub/sdk/search_client.py +4 -3
  215. datahub/specific/chart.py +1 -1
  216. datahub/specific/dataproduct.py +4 -0
  217. datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
  218. datahub/sql_parsing/sqlglot_lineage.py +62 -13
  219. datahub/telemetry/telemetry.py +17 -11
  220. datahub/testing/sdk_v2_helpers.py +7 -1
  221. datahub/upgrade/upgrade.py +46 -13
  222. datahub/utilities/server_config_util.py +8 -0
  223. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  224. datahub/utilities/stats_collections.py +4 -0
  225. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
  226. {acryl_datahub-1.1.1rc3.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
datahub/cli/docker_cli.py CHANGED
@@ -20,6 +20,7 @@ import requests
20
20
  from expandvars import expandvars
21
21
  from requests_file import FileAdapter
22
22
 
23
+ from datahub._version import __version__, is_dev_mode, nice_version_name
23
24
  from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
24
25
  from datahub.cli.docker_check import (
25
26
  DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS,
@@ -28,45 +29,86 @@ from datahub.cli.docker_check import (
28
29
  DockerComposeVersionError,
29
30
  QuickstartStatus,
30
31
  check_docker_quickstart,
32
+ check_upgrade_supported,
31
33
  get_docker_client,
32
34
  run_quickstart_preflight_checks,
33
35
  )
34
- from datahub.cli.quickstart_versioning import QuickstartVersionMappingConfig
36
+ from datahub.cli.quickstart_versioning import (
37
+ QuickstartVersionMappingConfig,
38
+ )
35
39
  from datahub.ingestion.run.pipeline import Pipeline
36
40
  from datahub.telemetry import telemetry
37
41
  from datahub.upgrade import upgrade
38
42
  from datahub.utilities.perf_timer import PerfTimer
39
43
 
40
44
  logger = logging.getLogger(__name__)
41
- _ClickPositiveInt = click.IntRange(min=1)
42
45
 
43
- NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE = (
44
- "docker/quickstart/docker-compose.quickstart.yml"
45
- )
46
- ELASTIC_QUICKSTART_COMPOSE_FILE = (
47
- "docker/quickstart/docker-compose-without-neo4j.quickstart.yml"
48
- )
49
- NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
50
- "docker/quickstart/docker-compose-m1.quickstart.yml"
51
- )
52
- ELASTIC_M1_QUICKSTART_COMPOSE_FILE = (
53
- "docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
54
- )
55
- CONSUMERS_QUICKSTART_COMPOSE_FILE = (
56
- "docker/quickstart/docker-compose.consumers.quickstart.yml"
57
- )
58
- ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
59
- "docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
60
- )
61
- KAFKA_SETUP_QUICKSTART_COMPOSE_FILE = (
62
- "docker/quickstart/docker-compose.kafka-setup.quickstart.yml"
63
- )
46
+ _ClickPositiveInt = click.IntRange(min=1)
64
47
 
48
+ QUICKSTART_COMPOSE_FILE = "docker/quickstart/docker-compose.quickstart-profile.yml"
65
49
 
66
50
  _QUICKSTART_MAX_WAIT_TIME = datetime.timedelta(minutes=10)
67
51
  _QUICKSTART_UP_TIMEOUT = datetime.timedelta(seconds=100)
68
52
  _QUICKSTART_STATUS_CHECK_INTERVAL = datetime.timedelta(seconds=2)
69
53
 
54
+ MIGRATION_REQUIRED_INSTRUCTIONS = f"""
55
+ Your existing DataHub server was installed with an \
56
+ older CLI and is incompatible with the current CLI (version {nice_version_name}).
57
+
58
+ Required steps to upgrade:
59
+ 1. Backup your data (recommended): datahub docker quickstart --backup
60
+ Guide: https://docs.datahub.com/docs/quickstart#back-up-datahub
61
+
62
+ 2. Remove old installation: datahub docker nuke
63
+
64
+ 3. Start fresh installation: datahub docker quickstart
65
+
66
+ 4. Restore data:
67
+ datahub docker quickstart --restore
68
+
69
+ ⚠️ Without backup, all existing data will be lost.
70
+
71
+ For fresh start (if data is not needed):
72
+ 1. Remove installation:
73
+ datahub docker nuke
74
+
75
+ 2. Start fresh:
76
+ datahub docker quickstart
77
+ """
78
+
79
+ REPAIR_REQUIRED_INSTRUCTIONS = f"""
80
+ Unhealthy DataHub Installation Detected
81
+
82
+ Your DataHub installation has issues that cannot be fixed with the current CLI.
83
+
84
+ Your options:
85
+
86
+ OPTION 1 - Preserve data (if needed):
87
+ 1. Downgrade CLI to version 1.1:
88
+ pip install acryl-datahub==1.1
89
+ 2. Fix the installation:
90
+ datahub docker quickstart
91
+ 3. Create backup:
92
+ datahub docker quickstart --backup
93
+ 4. Upgrade CLI back:
94
+ pip install acryl-datahub=={nice_version_name()}
95
+ 5. Migrate:
96
+ datahub docker nuke && datahub docker quickstart
97
+ 6. Restore data:
98
+ datahub docker quickstart --restore
99
+
100
+ OPTION 2 - Fresh start (if data not needed):
101
+ 1. Remove installation:
102
+ datahub docker nuke
103
+ 2. Start fresh:
104
+ datahub docker quickstart
105
+
106
+ ⚠️ The current CLI cannot repair installations created by older versions.
107
+
108
+ Additional information on backup and restore: https://docs.datahub.com/docs/quickstart#back-up-datahub
109
+ Troubleshooting guide: https://docs.datahub.com/docs/troubleshooting/quickstart
110
+ """
111
+
70
112
 
71
113
  class Architectures(Enum):
72
114
  x86 = "x86"
@@ -89,6 +131,14 @@ def _docker_subprocess_env() -> Dict[str, str]:
89
131
  return env
90
132
 
91
133
 
134
+ def show_migration_instructions():
135
+ click.secho(MIGRATION_REQUIRED_INSTRUCTIONS, fg="red")
136
+
137
+
138
+ def show_repair_instructions():
139
+ click.secho(REPAIR_REQUIRED_INSTRUCTIONS, fg="red")
140
+
141
+
92
142
  @click.group()
93
143
  def docker() -> None:
94
144
  """Helper commands for setting up and interacting with a local
@@ -97,19 +147,22 @@ def docker() -> None:
97
147
 
98
148
 
99
149
  @docker.command()
100
- @upgrade.check_upgrade
101
- @telemetry.with_telemetry()
102
150
  def check() -> None:
103
151
  """Check that the Docker containers are healthy"""
104
152
  status = check_docker_quickstart()
153
+
105
154
  if status.is_ok():
106
155
  click.secho("✔ No issues detected", fg="green")
156
+ if status.running_unsupported_version:
157
+ show_migration_instructions()
107
158
  else:
159
+ if status.running_unsupported_version:
160
+ show_repair_instructions()
108
161
  raise status.to_exception("The following issues were detected:")
109
162
 
110
163
 
111
- def is_m1() -> bool:
112
- """Check whether we are running on an M1 machine"""
164
+ def is_apple_silicon() -> bool:
165
+ """Check whether we are running on an Apple Silicon machine"""
113
166
  try:
114
167
  return (
115
168
  platform.uname().machine == "arm64" and platform.uname().system == "Darwin"
@@ -119,52 +172,11 @@ def is_m1() -> bool:
119
172
  return False
120
173
 
121
174
 
122
- def is_arch_m1(arch: Architectures) -> bool:
123
- return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
124
-
125
-
126
- def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
127
- if graph_service_override is not None:
128
- if graph_service_override == "elasticsearch":
129
- click.echo("Starting with elasticsearch due to graph-service-impl param\n")
130
- return False
131
- if graph_service_override == "neo4j":
132
- click.echo("Starting with neo4j due to graph-service-impl param\n")
133
- return True
134
- else:
135
- click.secho(
136
- graph_service_override
137
- + " is not a valid graph service option. Choose either `neo4j` or "
138
- "`elasticsearch`\n",
139
- fg="red",
140
- )
141
- raise ValueError(f"invalid graph service option: {graph_service_override}")
142
- with get_docker_client() as client:
143
- if len(client.volumes.list(filters={"name": "datahub_neo4jdata"})) > 0:
144
- click.echo(
145
- "Datahub Neo4j volume found, starting with neo4j as graph service.\n"
146
- "If you want to run using elastic, run `datahub docker nuke` and re-ingest your data.\n"
147
- )
148
- return True
149
-
150
- logger.debug(
151
- "No Datahub Neo4j volume found, starting with elasticsearch as graph service.\n"
152
- "To use neo4j as a graph backend, run \n"
153
- "`datahub docker quickstart --graph-service-impl neo4j`"
154
- "\nfrom the root of the datahub repo\n"
155
- )
156
- return False
157
-
158
-
159
175
  def _set_environment_variables(
160
176
  version: Optional[str],
161
- mysql_version: Optional[str],
162
177
  mysql_port: Optional[int],
163
- zk_port: Optional[int],
164
178
  kafka_broker_port: Optional[int],
165
- schema_registry_port: Optional[int],
166
179
  elastic_port: Optional[int],
167
- kafka_setup: Optional[bool],
168
180
  ) -> None:
169
181
  if version is not None:
170
182
  if not version.startswith("v") and "." in version:
@@ -173,24 +185,25 @@ def _set_environment_variables(
173
185
  )
174
186
  version = f"v{version}"
175
187
  os.environ["DATAHUB_VERSION"] = version
176
- if mysql_version is not None:
177
- os.environ["DATAHUB_MYSQL_VERSION"] = mysql_version
178
188
  if mysql_port is not None:
179
189
  os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port)
180
190
 
181
- if zk_port is not None:
182
- os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port)
183
-
184
191
  if kafka_broker_port is not None:
185
192
  os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port)
186
193
 
187
- if schema_registry_port is not None:
188
- os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port)
189
-
190
194
  if elastic_port is not None:
191
195
  os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port)
192
- if kafka_setup:
193
- os.environ["DATAHUB_PRECREATE_TOPICS"] = "true"
196
+
197
+ os.environ["METADATA_SERVICE_AUTH_ENABLED"] = "false"
198
+
199
+ cliVersion = nice_version_name()
200
+ if is_dev_mode(): # This should only happen during development/CI.
201
+ cliVersion = __version__.replace(".dev0", "")
202
+ logger.info(
203
+ f"Development build: Using {cliVersion} instead of '{__version__}' version of CLI for UI ingestion"
204
+ )
205
+
206
+ os.environ["UI_INGESTION_DEFAULT_CLI_VERSION"] = cliVersion
194
207
 
195
208
 
196
209
  def _get_default_quickstart_compose_file() -> Optional[str]:
@@ -250,6 +263,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
250
263
  compose = _docker_compose_v2()
251
264
  base_command: List[str] = [
252
265
  *compose,
266
+ "--profile",
267
+ "quickstart",
253
268
  *itertools.chain.from_iterable(
254
269
  ("-f", f"{path}") for path in compose_files_for_stopping
255
270
  ),
@@ -346,12 +361,15 @@ EBEAN_DATASOURCE_HOST=mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}
346
361
  EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:${DATAHUB_MAPPED_MYSQL_PORT:-3306}/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
347
362
  EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
348
363
  ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
349
-
364
+ GRAPH_SERVICE_IMPL=elasticsearch
350
365
  KAFKA_BOOTSTRAP_SERVER=broker:29092
351
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}
366
+ KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/
367
+ SCHEMA_REGISTRY_TYPE=INTERNAL
352
368
 
353
- ELASTICSEARCH_HOST=elasticsearch
369
+ ELASTICSEARCH_HOST=search
354
370
  ELASTICSEARCH_PORT=${DATAHUB_MAPPED_ELASTIC_PORT:-9200}
371
+ ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
372
+ ELASTICSEARCH_PROTOCOL=http
355
373
 
356
374
  #NEO4J_HOST=http://<your-neo-host>:7474
357
375
  #NEO4J_URI=bolt://<your-neo-host>
@@ -385,6 +403,7 @@ DATAHUB_MAE_CONSUMER_PORT=9091
385
403
  logger.debug(f"Env file contents: {env_fp_reader.read()}")
386
404
 
387
405
  # continue to issue the restore indices command
406
+ # TODO Use --version if passed
388
407
  command = (
389
408
  "docker pull acryldata/datahub-upgrade:${DATAHUB_VERSION:-head}"
390
409
  + f" && docker run --network datahub_network --env-file {env_fp.name} "
@@ -412,12 +431,16 @@ DATAHUB_MAE_CONSUMER_PORT=9091
412
431
  return result.returncode
413
432
 
414
433
 
434
+ # TODO: Do we really need this? If someone wants to use a different arg, they can still pass the standard docker env var DOCKER_DEFAULT_PLATFORM
435
+ # We dont really need to select a different image unlike earlier (mysql vs mariadb) since we do publish both archs for all images (or are available for external images).
415
436
  def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
416
- running_on_m1 = is_m1()
417
- if running_on_m1:
418
- click.secho("Detected M1 machine", fg="yellow")
437
+ running_on_apple_silicon = is_apple_silicon()
438
+ if running_on_apple_silicon:
439
+ click.secho("Detected Apple Silicon", fg="yellow")
419
440
 
420
- quickstart_arch = Architectures.x86 if not running_on_m1 else Architectures.arm64
441
+ quickstart_arch = (
442
+ Architectures.x86 if not running_on_apple_silicon else Architectures.arm64
443
+ )
421
444
  if arch:
422
445
  matched_arch = [a for a in Architectures if arch.lower() == a.value]
423
446
  if not matched_arch:
@@ -437,13 +460,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
437
460
  default="default",
438
461
  help="Datahub version to be deployed. If not set, deploy using the defaults from the quickstart compose. Use 'stable' to start the latest stable version.",
439
462
  )
440
- @click.option(
441
- "--build-locally",
442
- type=bool,
443
- is_flag=True,
444
- default=False,
445
- help="Attempt to build the containers locally before starting",
446
- )
447
463
  @click.option(
448
464
  "--pull-images/--no-pull-images",
449
465
  type=bool,
@@ -466,13 +482,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
466
482
  default=False,
467
483
  help="If true, the docker-compose logs will be printed to console if something fails",
468
484
  )
469
- @click.option(
470
- "--graph-service-impl",
471
- type=str,
472
- is_flag=False,
473
- default=None,
474
- help="If set, forces docker-compose to use that graph service implementation",
475
- )
476
485
  @click.option(
477
486
  "--mysql-port",
478
487
  type=_ClickPositiveInt,
@@ -480,13 +489,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
480
489
  default=None,
481
490
  help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup",
482
491
  )
483
- @click.option(
484
- "--zk-port",
485
- type=_ClickPositiveInt,
486
- is_flag=False,
487
- default=None,
488
- help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup",
489
- )
490
492
  @click.option(
491
493
  "--kafka-broker-port",
492
494
  type=_ClickPositiveInt,
@@ -494,13 +496,6 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
494
496
  default=None,
495
497
  help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup",
496
498
  )
497
- @click.option(
498
- "--schema-registry-port",
499
- type=_ClickPositiveInt,
500
- is_flag=False,
501
- default=None,
502
- help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup",
503
- )
504
499
  @click.option(
505
500
  "--elastic-port",
506
501
  type=_ClickPositiveInt,
@@ -558,51 +553,29 @@ def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
558
553
  default=False,
559
554
  help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
560
555
  )
561
- @click.option(
562
- "--standalone_consumers",
563
- required=False,
564
- is_flag=True,
565
- default=False,
566
- help="Launches MAE & MCE consumers as stand alone docker containers",
567
- )
568
- @click.option(
569
- "--kafka-setup",
570
- required=False,
571
- is_flag=True,
572
- default=False,
573
- help="Launches Kafka setup job as part of the compose deployment",
574
- )
575
556
  @click.option(
576
557
  "--arch",
577
558
  required=False,
578
559
  help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
579
560
  )
580
- @upgrade.check_upgrade
581
561
  @telemetry.with_telemetry(
582
562
  capture_kwargs=[
583
563
  "version",
584
- "build_locally",
585
564
  "pull_images",
586
565
  "stop",
587
566
  "backup",
588
567
  "restore",
589
568
  "restore_indices",
590
- "standalone_consumers",
591
- "kafka_setup",
592
569
  "arch",
593
570
  ]
594
571
  )
595
572
  def quickstart(
596
573
  version: Optional[str],
597
- build_locally: bool,
598
574
  pull_images: bool,
599
575
  quickstart_compose_file: List[pathlib.Path],
600
576
  dump_logs_on_failure: bool,
601
- graph_service_impl: Optional[str],
602
577
  mysql_port: Optional[int],
603
- zk_port: Optional[int],
604
578
  kafka_broker_port: Optional[int],
605
- schema_registry_port: Optional[int],
606
579
  elastic_port: Optional[int],
607
580
  stop: bool,
608
581
  backup: bool,
@@ -611,8 +584,6 @@ def quickstart(
611
584
  restore_file: str,
612
585
  restore_indices: bool,
613
586
  no_restore_indices: bool,
614
- standalone_consumers: bool,
615
- kafka_setup: bool,
616
587
  arch: Optional[str],
617
588
  ) -> None:
618
589
  """Start an instance of DataHub locally using docker-compose.
@@ -641,8 +612,8 @@ def quickstart(
641
612
  )
642
613
  return
643
614
 
644
- quickstart_arch = detect_quickstart_arch(arch)
645
615
  quickstart_versioning = QuickstartVersionMappingConfig.fetch_quickstart_config()
616
+
646
617
  quickstart_execution_plan = quickstart_versioning.get_quickstart_execution_plan(
647
618
  version
648
619
  )
@@ -668,28 +639,26 @@ def quickstart(
668
639
  download_compose_files(
669
640
  quickstart_compose_file_name,
670
641
  quickstart_compose_file,
671
- graph_service_impl,
672
- kafka_setup,
673
- quickstart_arch,
674
- standalone_consumers,
675
642
  quickstart_execution_plan.composefile_git_ref,
676
643
  )
677
644
 
645
+ # check if running datahub can be upgraded to the latest version.
646
+ if not _check_upgrade_and_show_instructions(quickstart_compose_file):
647
+ sys.exit(1)
648
+
678
649
  # set version
679
650
  _set_environment_variables(
680
651
  version=quickstart_execution_plan.docker_tag,
681
- mysql_version=quickstart_execution_plan.mysql_tag,
682
652
  mysql_port=mysql_port,
683
- zk_port=zk_port,
684
653
  kafka_broker_port=kafka_broker_port,
685
- schema_registry_port=schema_registry_port,
686
654
  elastic_port=elastic_port,
687
- kafka_setup=kafka_setup,
688
655
  )
689
656
 
690
657
  compose = _docker_compose_v2()
691
658
  base_command: List[str] = [
692
659
  *compose,
660
+ "--profile",
661
+ "quickstart",
693
662
  *itertools.chain.from_iterable(
694
663
  ("-f", f"{path}") for path in quickstart_compose_file
695
664
  ),
@@ -697,6 +666,8 @@ def quickstart(
697
666
  DOCKER_COMPOSE_PROJECT_NAME,
698
667
  ]
699
668
 
669
+ click.echo(f"base_command: {base_command}")
670
+
700
671
  # Pull and possibly build the latest containers.
701
672
  try:
702
673
  if pull_images:
@@ -737,15 +708,6 @@ def quickstart(
737
708
  fg="red",
738
709
  )
739
710
 
740
- if build_locally:
741
- logger.info("Building docker images locally...")
742
- subprocess.run(
743
- base_command + ["build", "--pull", "-q"],
744
- check=True,
745
- env=_docker_subprocess_env(),
746
- )
747
- logger.info("Finished building docker images!")
748
-
749
711
  # Start it up! (with retries)
750
712
  click.echo("\nStarting up DataHub...")
751
713
  start_time = datetime.datetime.now()
@@ -836,36 +798,17 @@ def get_docker_compose_base_url(version_tag: str) -> str:
836
798
  return f"https://raw.githubusercontent.com/datahub-project/datahub/{version_tag}"
837
799
 
838
800
 
839
- def get_github_file_url(neo4j: bool, is_m1: bool, release_version_tag: str) -> str:
801
+ def get_github_file_url(release_version_tag: str) -> str:
840
802
  base_url = get_docker_compose_base_url(release_version_tag)
841
- if neo4j:
842
- github_file = (
843
- f"{base_url}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
844
- if not is_m1
845
- else f"{base_url}/{NEO4J_AND_ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
846
- )
847
- else:
848
- github_file = (
849
- f"{base_url}/{ELASTIC_QUICKSTART_COMPOSE_FILE}"
850
- if not is_m1
851
- else f"{base_url}/{ELASTIC_M1_QUICKSTART_COMPOSE_FILE}"
852
- )
803
+ github_file = f"{base_url}/{QUICKSTART_COMPOSE_FILE}"
853
804
  return github_file
854
805
 
855
806
 
856
807
  def download_compose_files(
857
- quickstart_compose_file_name,
858
- quickstart_compose_file_list,
859
- graph_service_impl,
860
- kafka_setup,
861
- quickstart_arch,
862
- standalone_consumers,
863
- compose_git_ref,
808
+ quickstart_compose_file_name, quickstart_compose_file_list, compose_git_ref
864
809
  ):
865
810
  # download appropriate quickstart file
866
- should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
867
- is_m1 = is_arch_m1(quickstart_arch)
868
- github_file = get_github_file_url(should_use_neo4j, is_m1, compose_git_ref)
811
+ github_file = get_github_file_url(compose_git_ref)
869
812
  # also allow local files
870
813
  request_session = requests.Session()
871
814
  request_session.mount("file://", FileAdapter())
@@ -879,57 +822,14 @@ def download_compose_files(
879
822
  logger.info(f"Fetching docker-compose file {github_file} from GitHub")
880
823
  # Download the quickstart docker-compose file from GitHub.
881
824
  quickstart_download_response = request_session.get(github_file)
825
+ if quickstart_download_response.status_code == 404:
826
+ raise click.ClickException(
827
+ f"Could not find quickstart compose file for version {compose_git_ref}. "
828
+ "Please try a different version or check the version exists at https://github.com/datahub-project/datahub/releases"
829
+ )
882
830
  quickstart_download_response.raise_for_status()
883
831
  tmp_file.write(quickstart_download_response.content)
884
832
  logger.debug(f"Copied to {path}")
885
- if standalone_consumers:
886
- base_url = get_docker_compose_base_url(compose_git_ref)
887
- consumer_github_file = (
888
- f"{base_url}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
889
- if should_use_neo4j
890
- else f"{base_url}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
891
- )
892
-
893
- default_consumer_compose_file = (
894
- Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
895
- )
896
- with (
897
- open(default_consumer_compose_file, "wb")
898
- if default_consumer_compose_file
899
- else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
900
- ) as tmp_file:
901
- path = pathlib.Path(tmp_file.name)
902
- quickstart_compose_file_list.append(path)
903
- click.echo(
904
- f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
905
- )
906
- # Download the quickstart docker-compose file from GitHub.
907
- quickstart_download_response = request_session.get(consumer_github_file)
908
- quickstart_download_response.raise_for_status()
909
- tmp_file.write(quickstart_download_response.content)
910
- logger.debug(f"Copied to {path}")
911
- if kafka_setup:
912
- base_url = get_docker_compose_base_url(compose_git_ref)
913
- kafka_setup_github_file = f"{base_url}/{KAFKA_SETUP_QUICKSTART_COMPOSE_FILE}"
914
-
915
- default_kafka_compose_file = (
916
- Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.kafka-setup.yml"
917
- )
918
- with (
919
- open(default_kafka_compose_file, "wb")
920
- if default_kafka_compose_file
921
- else tempfile.NamedTemporaryFile(suffix=".yml", delete=False)
922
- ) as tmp_file:
923
- path = pathlib.Path(tmp_file.name)
924
- quickstart_compose_file_list.append(path)
925
- click.echo(
926
- f"Fetching consumer docker-compose file {kafka_setup_github_file} from GitHub"
927
- )
928
- # Download the quickstart docker-compose file from GitHub.
929
- quickstart_download_response = request_session.get(kafka_setup_github_file)
930
- quickstart_download_response.raise_for_status()
931
- tmp_file.write(quickstart_download_response.content)
932
- logger.debug(f"Copied to {path}")
933
833
 
934
834
 
935
835
  def valid_restore_options(
@@ -963,7 +863,7 @@ def valid_restore_options(
963
863
  default=None,
964
864
  help="The token to be used when ingesting, used when datahub is deployed with METADATA_SERVICE_AUTH_ENABLED=true",
965
865
  )
966
- @telemetry.with_telemetry()
866
+ @upgrade.check_upgrade
967
867
  def ingest_sample_data(token: Optional[str]) -> None:
968
868
  """Ingest sample data into a running DataHub instance."""
969
869
 
@@ -1031,3 +931,25 @@ def nuke(keep_data: bool) -> None:
1031
931
  click.echo(f"Removing networks in the {DOCKER_COMPOSE_PROJECT_NAME} project")
1032
932
  for network in client.networks.list(filters=DATAHUB_COMPOSE_PROJECT_FILTER):
1033
933
  network.remove()
934
+
935
+
936
+ def _check_upgrade_and_show_instructions(
937
+ quickstart_compose_file: List[pathlib.Path],
938
+ ) -> bool:
939
+ """Check if running datahub can be upgraded to the latest version and show appropriate instructions.
940
+
941
+ Args:
942
+ quickstart_compose_file: List of compose file paths
943
+
944
+ Returns:
945
+ bool: True if upgrade is supported, False otherwise
946
+ """
947
+ quickstart_status = check_docker_quickstart()
948
+
949
+ if not check_upgrade_supported(quickstart_compose_file, quickstart_status):
950
+ if quickstart_status.is_ok():
951
+ show_migration_instructions()
952
+ else:
953
+ show_repair_instructions()
954
+ return False
955
+ return True
datahub/cli/exists_cli.py CHANGED
@@ -7,7 +7,6 @@ from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.ingestion.graph.client import get_default_graph
9
9
  from datahub.ingestion.graph.config import ClientMode
10
- from datahub.telemetry import telemetry
11
10
  from datahub.upgrade import upgrade
12
11
 
13
12
  logger = logging.getLogger(__name__)
@@ -23,7 +22,6 @@ def exists() -> None:
23
22
  @click.option("--urn", required=False, type=str)
24
23
  @click.pass_context
25
24
  @upgrade.check_upgrade
26
- @telemetry.with_telemetry()
27
25
  def urn(ctx: Any, urn: Optional[str]) -> None:
28
26
  """
29
27
  Get metadata for an entity with an optional list of aspects to project.
datahub/cli/get_cli.py CHANGED
@@ -8,7 +8,6 @@ from click_default_group import DefaultGroup
8
8
  from datahub.cli.cli_utils import get_aspects_for_entity
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
10
  from datahub.ingestion.graph.config import ClientMode
11
- from datahub.telemetry import telemetry
12
11
  from datahub.upgrade import upgrade
13
12
 
14
13
  logger = logging.getLogger(__name__)
@@ -32,7 +31,6 @@ def get() -> None:
32
31
  )
33
32
  @click.pass_context
34
33
  @upgrade.check_upgrade
35
- @telemetry.with_telemetry()
36
34
  def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
37
35
  """
38
36
  Get metadata for an entity with an optional list of aspects to project.
@@ -16,6 +16,7 @@ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
16
16
  from datahub.ingestion.graph.config import ClientMode
17
17
  from datahub.metadata.schema_classes import SystemMetadataClass
18
18
  from datahub.telemetry import telemetry
19
+ from datahub.upgrade import upgrade
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -164,6 +165,7 @@ def validate_warehouse(data_root: str) -> None:
164
165
  help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
165
166
  )
166
167
  @telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
168
+ @upgrade.check_upgrade
167
169
  def create(
168
170
  warehouse: str,
169
171
  description: Optional[str],
@@ -317,6 +319,7 @@ def create(
317
319
  help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
318
320
  )
319
321
  @telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
322
+ @upgrade.check_upgrade
320
323
  def update(
321
324
  warehouse: str,
322
325
  data_root: str,
@@ -403,6 +406,7 @@ def update(
403
406
 
404
407
  @iceberg.command()
405
408
  @telemetry.with_telemetry()
409
+ @upgrade.check_upgrade
406
410
  def list() -> None:
407
411
  """
408
412
  List iceberg warehouses
@@ -419,6 +423,7 @@ def list() -> None:
419
423
  "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
420
424
  )
421
425
  @telemetry.with_telemetry()
426
+ @upgrade.check_upgrade
422
427
  def get(warehouse: str) -> None:
423
428
  """Fetches the details of the specified iceberg warehouse"""
424
429
  client = get_default_graph(ClientMode.CLI)