acryl-datahub 1.3.0.1rc2__py3-none-any.whl → 1.3.0.1rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (51) hide show
  1. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/METADATA +2469 -2467
  2. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/RECORD +50 -48
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataproduct/dataproduct.py +26 -0
  5. datahub/cli/config_utils.py +18 -10
  6. datahub/cli/docker_check.py +2 -1
  7. datahub/cli/docker_cli.py +4 -2
  8. datahub/cli/graphql_cli.py +1422 -0
  9. datahub/cli/quickstart_versioning.py +2 -2
  10. datahub/cli/specific/dataproduct_cli.py +2 -4
  11. datahub/cli/specific/user_cli.py +172 -1
  12. datahub/configuration/env_vars.py +331 -0
  13. datahub/configuration/kafka.py +6 -4
  14. datahub/emitter/mce_builder.py +2 -4
  15. datahub/emitter/rest_emitter.py +15 -15
  16. datahub/entrypoints.py +2 -0
  17. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  18. datahub/ingestion/api/source.py +5 -0
  19. datahub/ingestion/graph/client.py +197 -0
  20. datahub/ingestion/graph/config.py +2 -2
  21. datahub/ingestion/sink/datahub_rest.py +6 -5
  22. datahub/ingestion/source/aws/aws_common.py +20 -13
  23. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -4
  24. datahub/ingestion/source/grafana/models.py +5 -0
  25. datahub/ingestion/source/iceberg/iceberg.py +39 -19
  26. datahub/ingestion/source/kafka_connect/source_connectors.py +4 -1
  27. datahub/ingestion/source/mode.py +13 -0
  28. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  29. datahub/ingestion/source/schema_inference/object.py +22 -6
  30. datahub/ingestion/source/snowflake/snowflake_schema.py +2 -2
  31. datahub/ingestion/source/sql/mssql/source.py +7 -1
  32. datahub/ingestion/source/sql/teradata.py +80 -65
  33. datahub/ingestion/source/unity/config.py +31 -0
  34. datahub/ingestion/source/unity/proxy.py +73 -0
  35. datahub/ingestion/source/unity/source.py +27 -70
  36. datahub/ingestion/source/unity/usage.py +46 -4
  37. datahub/metadata/_internal_schema_classes.py +544 -544
  38. datahub/metadata/_urns/urn_defs.py +1728 -1728
  39. datahub/metadata/schema.avsc +15157 -15157
  40. datahub/sql_parsing/sql_parsing_aggregator.py +14 -5
  41. datahub/sql_parsing/sqlglot_lineage.py +7 -0
  42. datahub/telemetry/telemetry.py +8 -3
  43. datahub/utilities/file_backed_collections.py +2 -2
  44. datahub/utilities/is_pytest.py +3 -2
  45. datahub/utilities/logging_manager.py +22 -6
  46. datahub/utilities/sample_data.py +5 -4
  47. datahub/emitter/sql_parsing_builder.py +0 -306
  48. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/WHEEL +0 -0
  49. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/entry_points.txt +0 -0
  50. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/licenses/LICENSE +0 -0
  51. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import logging
3
- import os
4
3
  import os.path
5
4
  import re
6
5
  from typing import Dict, Optional
@@ -13,10 +12,11 @@ from packaging.version import parse
13
12
  from pydantic import BaseModel
14
13
 
15
14
  from datahub._version import nice_version_name
15
+ from datahub.configuration.env_vars import get_force_local_quickstart_mapping
16
16
 
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
- LOCAL_QUICKSTART_MAPPING_FILE = os.environ.get("FORCE_LOCAL_QUICKSTART_MAPPING", "")
19
+ LOCAL_QUICKSTART_MAPPING_FILE = get_force_local_quickstart_mapping()
20
20
  DEFAULT_LOCAL_CONFIG_PATH = "~/.datahub/quickstart/quickstart_version_mapping.yaml"
21
21
  DEFAULT_REMOTE_CONFIG_PATH = "https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/quickstart_version_mapping.yaml"
22
22
 
@@ -1,7 +1,6 @@
1
1
  import difflib
2
2
  import json
3
3
  import logging
4
- import os
5
4
  import pathlib
6
5
  import sys
7
6
  from pathlib import Path
@@ -14,6 +13,7 @@ from click_default_group import DefaultGroup
14
13
 
15
14
  from datahub.api.entities.dataproduct.dataproduct import DataProduct
16
15
  from datahub.cli.specific.file_loader import load_file
16
+ from datahub.configuration.env_vars import get_dataproduct_external_url
17
17
  from datahub.emitter.mce_builder import (
18
18
  make_group_urn,
19
19
  make_user_urn,
@@ -84,9 +84,7 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
84
84
  with get_default_graph(ClientMode.CLI) as graph:
85
85
  data_product: DataProduct = DataProduct.from_yaml(file, graph)
86
86
  external_url_override = (
87
- external_url
88
- or os.getenv("DATAHUB_DATAPRODUCT_EXTERNAL_URL")
89
- or data_product.external_url
87
+ external_url or get_dataproduct_external_url() or data_product.external_url
90
88
  )
91
89
  data_product.external_url = external_url_override
92
90
  if upsert and not graph.exists(data_product.urn):
@@ -1,13 +1,15 @@
1
1
  import logging
2
2
  import pathlib
3
3
  from pathlib import Path
4
+ from typing import Optional
4
5
 
5
6
  import click
6
7
  from click_default_group import DefaultGroup
7
8
 
8
9
  from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
9
10
  from datahub.cli.specific.file_loader import load_file
10
- from datahub.ingestion.graph.client import get_default_graph
11
+ from datahub.configuration.common import OperationalError
12
+ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
11
13
  from datahub.ingestion.graph.config import ClientMode
12
14
  from datahub.upgrade import upgrade
13
15
 
@@ -55,3 +57,172 @@ def upsert(file: Path, override_editable: bool) -> None:
55
57
  f"Update failed for id {user_config.get('id')}. due to {e}",
56
58
  fg="red",
57
59
  )
60
+
61
+
62
+ def validate_user_id_options(
63
+ user_id: Optional[str], email_as_id: bool, email: str
64
+ ) -> str:
65
+ """
66
+ Validate user ID options and return the final user ID to use.
67
+
68
+ Args:
69
+ user_id: Optional explicit user ID
70
+ email_as_id: Whether to use email as the user ID
71
+ email: User's email address
72
+
73
+ Returns:
74
+ The final user ID to use for the URN
75
+
76
+ Raises:
77
+ ValueError: If validation fails (neither or both options provided)
78
+ """
79
+ if not user_id and not email_as_id:
80
+ raise ValueError("Must specify either --id or --email-as-id flag")
81
+
82
+ if user_id and email_as_id:
83
+ raise ValueError("Cannot specify both --id and --email-as-id flag")
84
+
85
+ if email_as_id:
86
+ return email
87
+
88
+ assert user_id is not None
89
+ return user_id
90
+
91
+
92
+ def create_native_user_in_datahub(
93
+ graph: DataHubGraph,
94
+ user_id: str,
95
+ email: str,
96
+ display_name: str,
97
+ password: str,
98
+ role: Optional[str] = None,
99
+ ) -> str:
100
+ """
101
+ Create a native DataHub user.
102
+
103
+ Args:
104
+ graph: DataHubGraph client
105
+ user_id: User identifier (used in URN)
106
+ email: User's email address
107
+ display_name: User's full display name
108
+ password: User's password
109
+ role: Optional role to assign (Admin, Editor, or Reader)
110
+
111
+ Returns:
112
+ The created user's URN
113
+
114
+ Raises:
115
+ ValueError: If user already exists or role is invalid
116
+ OperationalError: If user creation fails due to API/network errors
117
+ """
118
+ user_urn = f"urn:li:corpuser:{user_id}"
119
+
120
+ if graph.exists(user_urn):
121
+ raise ValueError(f"User with ID {user_id} already exists (urn: {user_urn})")
122
+
123
+ created_user_urn = graph.create_native_user(
124
+ user_id=user_id,
125
+ email=email,
126
+ display_name=display_name,
127
+ password=password,
128
+ role=role,
129
+ )
130
+
131
+ return created_user_urn
132
+
133
+
134
+ @user.command(name="add")
135
+ @click.option("--id", "user_id", type=str, help="User identifier (used in URN)")
136
+ @click.option("--email", required=True, type=str, help="User's email address")
137
+ @click.option(
138
+ "--email-as-id",
139
+ is_flag=True,
140
+ default=False,
141
+ help="Use email address as user ID (alternative to --id)",
142
+ )
143
+ @click.option(
144
+ "--display-name", required=True, type=str, help="User's full display name"
145
+ )
146
+ @click.option(
147
+ "--password",
148
+ is_flag=True,
149
+ default=False,
150
+ help="Prompt for password (hidden input)",
151
+ )
152
+ @click.option(
153
+ "--role",
154
+ required=False,
155
+ type=click.Choice(
156
+ ["Admin", "Editor", "Reader", "admin", "editor", "reader"], case_sensitive=False
157
+ ),
158
+ help="Optional role to assign (Admin, Editor, or Reader)",
159
+ )
160
+ @upgrade.check_upgrade
161
+ def add(
162
+ user_id: str,
163
+ email: str,
164
+ email_as_id: bool,
165
+ display_name: str,
166
+ password: bool,
167
+ role: str,
168
+ ) -> None:
169
+ """Create a native DataHub user with email/password authentication"""
170
+
171
+ try:
172
+ final_user_id = validate_user_id_options(user_id, email_as_id, email)
173
+ except ValueError as e:
174
+ click.secho(f"Error: {str(e)}", fg="red")
175
+ raise SystemExit(1) from e
176
+
177
+ if not password:
178
+ click.secho(
179
+ "Error: --password flag is required to prompt for password input",
180
+ fg="red",
181
+ )
182
+ raise SystemExit(1)
183
+
184
+ password_value = click.prompt(
185
+ "Enter password", hide_input=True, confirmation_prompt=True
186
+ )
187
+
188
+ with get_default_graph(ClientMode.CLI) as graph:
189
+ try:
190
+ created_user_urn = create_native_user_in_datahub(
191
+ graph, final_user_id, email, display_name, password_value, role
192
+ )
193
+
194
+ if role:
195
+ click.secho(
196
+ f"Successfully created user {final_user_id} with role {role.capitalize()} (URN: {created_user_urn})",
197
+ fg="green",
198
+ )
199
+ else:
200
+ click.secho(
201
+ f"Successfully created user {final_user_id} (URN: {created_user_urn})",
202
+ fg="green",
203
+ )
204
+ except ValueError as e:
205
+ click.secho(f"Error: {str(e)}", fg="red")
206
+ raise SystemExit(1) from e
207
+ except OperationalError as e:
208
+ error_msg = e.message if hasattr(e, "message") else str(e.args[0])
209
+ click.secho(f"Error: {error_msg}", fg="red")
210
+
211
+ if hasattr(e, "info") and e.info:
212
+ logger.debug(f"Error details: {e.info}")
213
+ if "status_code" in e.info:
214
+ click.secho(f" HTTP Status: {e.info['status_code']}", fg="red")
215
+ if "response_text" in e.info:
216
+ click.secho(
217
+ f" Response: {e.info['response_text'][:200]}", fg="red"
218
+ )
219
+
220
+ click.secho(
221
+ "\nTip: Run with DATAHUB_DEBUG=1 environment variable for detailed logs",
222
+ fg="yellow",
223
+ )
224
+ raise SystemExit(1) from e
225
+ except Exception as e:
226
+ click.secho(f"Unexpected error: {str(e)}", fg="red")
227
+ logger.exception("Unexpected error during user creation")
228
+ raise SystemExit(1) from e
@@ -0,0 +1,331 @@
1
+ # ABOUTME: Central registry for all environment variables used in metadata-ingestion.
2
+ # ABOUTME: All environment variable reads should go through this module for discoverability and maintainability.
3
+
4
+ import os
5
+ from typing import Optional
6
+
7
+ # ============================================================================
8
+ # Core DataHub Configuration
9
+ # ============================================================================
10
+
11
+
12
+ def get_gms_url() -> Optional[str]:
13
+ """Complete GMS URL (takes precedence over separate host/port)."""
14
+ return os.getenv("DATAHUB_GMS_URL")
15
+
16
+
17
+ def get_gms_host() -> Optional[str]:
18
+ """GMS host (fallback for URL, deprecated)."""
19
+ return os.getenv("DATAHUB_GMS_HOST")
20
+
21
+
22
+ def get_gms_port() -> Optional[str]:
23
+ """GMS port number."""
24
+ return os.getenv("DATAHUB_GMS_PORT")
25
+
26
+
27
+ def get_gms_protocol() -> str:
28
+ """Protocol for GMS connection (http/https)."""
29
+ return os.getenv("DATAHUB_GMS_PROTOCOL", "http")
30
+
31
+
32
+ def get_gms_token() -> Optional[str]:
33
+ """Authentication token for GMS."""
34
+ return os.getenv("DATAHUB_GMS_TOKEN")
35
+
36
+
37
+ def get_system_client_id() -> Optional[str]:
38
+ """System client ID for OAuth/auth."""
39
+ return os.getenv("DATAHUB_SYSTEM_CLIENT_ID")
40
+
41
+
42
+ def get_system_client_secret() -> Optional[str]:
43
+ """System client secret for OAuth/auth."""
44
+ return os.getenv("DATAHUB_SYSTEM_CLIENT_SECRET")
45
+
46
+
47
+ def get_skip_config() -> bool:
48
+ """Skip loading config file (forces env variables)."""
49
+ return os.getenv("DATAHUB_SKIP_CONFIG", "").lower() == "true"
50
+
51
+
52
+ def get_gms_base_path() -> str:
53
+ """Base path for GMS API endpoints."""
54
+ return os.getenv("DATAHUB_GMS_BASE_PATH", "")
55
+
56
+
57
+ # ============================================================================
58
+ # REST Emitter Configuration
59
+ # ============================================================================
60
+
61
+
62
+ def get_rest_emitter_default_retry_max_times() -> str:
63
+ """Max retry attempts for failed requests."""
64
+ return os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4")
65
+
66
+
67
+ def get_rest_emitter_batch_max_payload_bytes() -> int:
68
+ """Maximum payload size in bytes for batch operations."""
69
+ return int(
70
+ os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_BYTES", str(15 * 1024 * 1024))
71
+ )
72
+
73
+
74
+ def get_rest_emitter_batch_max_payload_length() -> int:
75
+ """Maximum number of MCPs per batch."""
76
+ return int(os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_LENGTH", "200"))
77
+
78
+
79
+ def get_emit_mode() -> Optional[str]:
80
+ """Emission mode (SYNC_PRIMARY, SYNC_WAIT, ASYNC, ASYNC_WAIT)."""
81
+ return os.getenv("DATAHUB_EMIT_MODE")
82
+
83
+
84
+ def get_rest_emitter_default_endpoint() -> Optional[str]:
85
+ """REST endpoint type (RESTLI or OPENAPI)."""
86
+ return os.getenv("DATAHUB_REST_EMITTER_DEFAULT_ENDPOINT")
87
+
88
+
89
+ def get_emitter_trace() -> bool:
90
+ """Enable detailed emitter tracing."""
91
+ return os.getenv("DATAHUB_EMITTER_TRACE", "").lower() == "true"
92
+
93
+
94
+ # ============================================================================
95
+ # REST Sink Configuration
96
+ # ============================================================================
97
+
98
+
99
+ def get_rest_sink_default_max_threads() -> int:
100
+ """Max thread pool size for async operations."""
101
+ return int(os.getenv("DATAHUB_REST_SINK_DEFAULT_MAX_THREADS", "15"))
102
+
103
+
104
+ def get_rest_sink_default_mode() -> Optional[str]:
105
+ """Sink mode (SYNC, ASYNC, ASYNC_BATCH)."""
106
+ return os.getenv("DATAHUB_REST_SINK_DEFAULT_MODE")
107
+
108
+
109
+ # ============================================================================
110
+ # Telemetry & Monitoring
111
+ # ============================================================================
112
+
113
+
114
+ def get_telemetry_timeout() -> str:
115
+ """Telemetry timeout in seconds."""
116
+ return os.getenv("DATAHUB_TELEMETRY_TIMEOUT", "10")
117
+
118
+
119
+ def get_sentry_dsn() -> Optional[str]:
120
+ """Sentry error tracking DSN."""
121
+ return os.getenv("SENTRY_DSN")
122
+
123
+
124
+ def get_sentry_environment() -> str:
125
+ """Sentry environment (dev/prod)."""
126
+ return os.getenv("SENTRY_ENVIRONMENT", "dev")
127
+
128
+
129
+ # ============================================================================
130
+ # Logging & Debug Configuration
131
+ # ============================================================================
132
+
133
+
134
+ def get_suppress_logging_manager() -> Optional[str]:
135
+ """Suppress DataHub logging manager initialization."""
136
+ return os.getenv("DATAHUB_SUPPRESS_LOGGING_MANAGER")
137
+
138
+
139
+ def get_no_color() -> bool:
140
+ """Disable colored logging output."""
141
+ return os.getenv("NO_COLOR", "").lower() == "true"
142
+
143
+
144
+ def get_test_mode() -> Optional[str]:
145
+ """Indicates running in test context."""
146
+ return os.getenv("DATAHUB_TEST_MODE")
147
+
148
+
149
+ def get_debug() -> bool:
150
+ """Enable debug mode."""
151
+ return os.getenv("DATAHUB_DEBUG", "").lower() == "true"
152
+
153
+
154
+ # ============================================================================
155
+ # Data Processing Configuration
156
+ # ============================================================================
157
+
158
+
159
+ def get_sql_agg_query_log() -> str:
160
+ """SQL aggregator query logging level."""
161
+ return os.getenv("DATAHUB_SQL_AGG_QUERY_LOG", "DISABLED")
162
+
163
+
164
+ def get_dataset_urn_to_lower() -> str:
165
+ """Convert dataset URNs to lowercase."""
166
+ return os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false")
167
+
168
+
169
+ # ============================================================================
170
+ # Integration-Specific Configuration
171
+ # ============================================================================
172
+
173
+
174
+ def get_kafka_schema_registry_url() -> Optional[str]:
175
+ """Kafka schema registry URL."""
176
+ return os.getenv("KAFKA_SCHEMAREGISTRY_URL")
177
+
178
+
179
+ def get_spark_version() -> Optional[str]:
180
+ """Spark version (for S3 source)."""
181
+ return os.getenv("SPARK_VERSION")
182
+
183
+
184
+ def get_bigquery_schema_parallelism() -> int:
185
+ """Parallelism level for BigQuery schema extraction."""
186
+ return int(os.getenv("DATAHUB_BIGQUERY_SCHEMA_PARALLELISM", "20"))
187
+
188
+
189
+ def get_snowflake_schema_parallelism() -> int:
190
+ """Parallelism level for Snowflake schema extraction."""
191
+ return int(os.getenv("DATAHUB_SNOWFLAKE_SCHEMA_PARALLELISM", "20"))
192
+
193
+
194
+ def get_powerbi_m_query_parse_timeout() -> int:
195
+ """Timeout for PowerBI M query parsing."""
196
+ return int(os.getenv("DATAHUB_POWERBI_M_QUERY_PARSE_TIMEOUT", "60"))
197
+
198
+
199
+ def get_trace_powerbi_mquery_parser() -> bool:
200
+ """Enable PowerBI M query parser tracing."""
201
+ return os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", "").lower() == "true"
202
+
203
+
204
+ def get_lookml_git_test_ssh_key() -> Optional[str]:
205
+ """SSH key for LookML Git tests."""
206
+ return os.getenv("DATAHUB_LOOKML_GIT_TEST_SSH_KEY")
207
+
208
+
209
+ # ============================================================================
210
+ # AWS/Cloud Configuration
211
+ # ============================================================================
212
+
213
+
214
+ def get_aws_lambda_function_name() -> Optional[str]:
215
+ """Indicates running in AWS Lambda."""
216
+ return os.getenv("AWS_LAMBDA_FUNCTION_NAME")
217
+
218
+
219
+ def get_aws_execution_env() -> Optional[str]:
220
+ """AWS execution environment."""
221
+ return os.getenv("AWS_EXECUTION_ENV")
222
+
223
+
224
+ def get_aws_web_identity_token_file() -> Optional[str]:
225
+ """OIDC token file path."""
226
+ return os.getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
227
+
228
+
229
+ def get_aws_role_arn() -> Optional[str]:
230
+ """AWS role ARN for OIDC."""
231
+ return os.getenv("AWS_ROLE_ARN")
232
+
233
+
234
+ def get_aws_app_runner_service_id() -> Optional[str]:
235
+ """AWS App Runner service ID."""
236
+ return os.getenv("AWS_APP_RUNNER_SERVICE_ID")
237
+
238
+
239
+ def get_ecs_container_metadata_uri_v4() -> Optional[str]:
240
+ """ECS metadata endpoint v4."""
241
+ return os.getenv("ECS_CONTAINER_METADATA_URI_V4")
242
+
243
+
244
+ def get_ecs_container_metadata_uri() -> Optional[str]:
245
+ """ECS metadata endpoint v3."""
246
+ return os.getenv("ECS_CONTAINER_METADATA_URI")
247
+
248
+
249
+ def get_elastic_beanstalk_environment_name() -> Optional[str]:
250
+ """Elastic Beanstalk environment."""
251
+ return os.getenv("ELASTIC_BEANSTALK_ENVIRONMENT_NAME")
252
+
253
+
254
+ # ============================================================================
255
+ # Docker & Local Development
256
+ # ============================================================================
257
+
258
+
259
+ def get_compose_project_name() -> str:
260
+ """Docker Compose project name."""
261
+ return os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
262
+
263
+
264
+ def get_docker_compose_base() -> Optional[str]:
265
+ """Base path for Docker Compose files."""
266
+ return os.getenv("DOCKER_COMPOSE_BASE")
267
+
268
+
269
+ def get_datahub_version() -> Optional[str]:
270
+ """DataHub version (set during docker init)."""
271
+ return os.getenv("DATAHUB_VERSION")
272
+
273
+
274
+ def get_mapped_mysql_port() -> Optional[str]:
275
+ """MySQL port mapping (set during docker init)."""
276
+ return os.getenv("DATAHUB_MAPPED_MYSQL_PORT")
277
+
278
+
279
+ def get_mapped_kafka_broker_port() -> Optional[str]:
280
+ """Kafka broker port mapping (set during docker init)."""
281
+ return os.getenv("DATAHUB_MAPPED_KAFKA_BROKER_PORT")
282
+
283
+
284
+ def get_mapped_elastic_port() -> Optional[str]:
285
+ """Elasticsearch port mapping (set during docker init)."""
286
+ return os.getenv("DATAHUB_MAPPED_ELASTIC_PORT")
287
+
288
+
289
+ def get_metadata_service_auth_enabled() -> str:
290
+ """Enable/disable auth in Docker."""
291
+ return os.getenv("METADATA_SERVICE_AUTH_ENABLED", "false")
292
+
293
+
294
+ def get_ui_ingestion_default_cli_version() -> Optional[str]:
295
+ """CLI version for UI ingestion (set during init)."""
296
+ return os.getenv("UI_INGESTION_DEFAULT_CLI_VERSION")
297
+
298
+
299
+ # ============================================================================
300
+ # Utility & Helper Configuration
301
+ # ============================================================================
302
+
303
+
304
+ def get_datahub_component() -> str:
305
+ """Component name for user agent tracking."""
306
+ return os.getenv("DATAHUB_COMPONENT", "datahub")
307
+
308
+
309
+ def get_force_local_quickstart_mapping() -> str:
310
+ """Force local quickstart mapping file."""
311
+ return os.getenv("FORCE_LOCAL_QUICKSTART_MAPPING", "")
312
+
313
+
314
+ def get_dataproduct_external_url() -> Optional[str]:
315
+ """External URL for data products."""
316
+ return os.getenv("DATAHUB_DATAPRODUCT_EXTERNAL_URL")
317
+
318
+
319
+ def get_override_sqlite_version_req() -> str:
320
+ """Override SQLite version requirement."""
321
+ return os.getenv("OVERRIDE_SQLITE_VERSION_REQ", "")
322
+
323
+
324
+ def get_update_entity_registry() -> str:
325
+ """Update entity registry during tests."""
326
+ return os.getenv("UPDATE_ENTITY_REGISTRY", "false")
327
+
328
+
329
+ def get_ci() -> Optional[str]:
330
+ """Indicates running in CI environment."""
331
+ return os.getenv("CI")
@@ -1,19 +1,21 @@
1
- import os
2
-
3
1
  from pydantic import Field, validator
4
2
 
5
3
  from datahub.configuration.common import ConfigModel, ConfigurationError
4
+ from datahub.configuration.env_vars import (
5
+ get_gms_base_path,
6
+ get_kafka_schema_registry_url,
7
+ )
6
8
  from datahub.configuration.kafka_consumer_config import CallableConsumerConfig
7
9
  from datahub.configuration.validate_host_port import validate_host_port
8
10
 
9
11
 
10
12
  def _get_schema_registry_url() -> str:
11
13
  """Get schema registry URL with proper base path handling."""
12
- explicit_url = os.getenv("KAFKA_SCHEMAREGISTRY_URL")
14
+ explicit_url = get_kafka_schema_registry_url()
13
15
  if explicit_url:
14
16
  return explicit_url
15
17
 
16
- base_path = os.getenv("DATAHUB_GMS_BASE_PATH", "")
18
+ base_path = get_gms_base_path()
17
19
  if base_path in ("/", ""):
18
20
  base_path = ""
19
21
 
@@ -3,7 +3,6 @@
3
3
  import hashlib
4
4
  import json
5
5
  import logging
6
- import os
7
6
  import re
8
7
  import time
9
8
  from datetime import datetime, timezone
@@ -26,6 +25,7 @@ import typing_inspect
26
25
  from avrogen.dict_wrapper import DictWrapper
27
26
  from typing_extensions import assert_never
28
27
 
28
+ from datahub.configuration.env_vars import get_dataset_urn_to_lower
29
29
  from datahub.emitter.enum_helpers import get_enum_options
30
30
  from datahub.metadata.schema_classes import (
31
31
  AssertionKeyClass,
@@ -72,9 +72,7 @@ ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass))
72
72
 
73
73
  DEFAULT_FLOW_CLUSTER = "prod"
74
74
  UNKNOWN_USER = "urn:li:corpuser:unknown"
75
- DATASET_URN_TO_LOWER: bool = (
76
- os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false") == "true"
77
- )
75
+ DATASET_URN_TO_LOWER: bool = get_dataset_urn_to_lower() == "true"
78
76
 
79
77
  if TYPE_CHECKING:
80
78
  from datahub.emitter.mcp_builder import DatahubKey