acryl-datahub 1.3.0.1rc2__py3-none-any.whl → 1.3.0.1rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (47) hide show
  1. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/METADATA +2563 -2561
  2. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/RECORD +46 -44
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataproduct/dataproduct.py +26 -0
  5. datahub/cli/config_utils.py +18 -10
  6. datahub/cli/docker_check.py +2 -1
  7. datahub/cli/docker_cli.py +4 -2
  8. datahub/cli/graphql_cli.py +1422 -0
  9. datahub/cli/quickstart_versioning.py +2 -2
  10. datahub/cli/specific/dataproduct_cli.py +2 -4
  11. datahub/cli/specific/user_cli.py +172 -1
  12. datahub/configuration/env_vars.py +331 -0
  13. datahub/configuration/kafka.py +6 -4
  14. datahub/emitter/mce_builder.py +2 -4
  15. datahub/emitter/rest_emitter.py +15 -15
  16. datahub/entrypoints.py +2 -0
  17. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  18. datahub/ingestion/api/source.py +5 -0
  19. datahub/ingestion/graph/client.py +197 -0
  20. datahub/ingestion/graph/config.py +2 -2
  21. datahub/ingestion/sink/datahub_rest.py +6 -5
  22. datahub/ingestion/source/aws/aws_common.py +20 -13
  23. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -4
  24. datahub/ingestion/source/grafana/models.py +5 -0
  25. datahub/ingestion/source/iceberg/iceberg.py +39 -19
  26. datahub/ingestion/source/kafka_connect/source_connectors.py +4 -1
  27. datahub/ingestion/source/mode.py +13 -0
  28. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  29. datahub/ingestion/source/snowflake/snowflake_schema.py +2 -2
  30. datahub/ingestion/source/sql/mssql/source.py +7 -1
  31. datahub/ingestion/source/sql/teradata.py +80 -65
  32. datahub/ingestion/source/unity/config.py +31 -0
  33. datahub/ingestion/source/unity/proxy.py +73 -0
  34. datahub/ingestion/source/unity/source.py +27 -70
  35. datahub/ingestion/source/unity/usage.py +46 -4
  36. datahub/sql_parsing/sql_parsing_aggregator.py +14 -5
  37. datahub/sql_parsing/sqlglot_lineage.py +7 -0
  38. datahub/telemetry/telemetry.py +8 -3
  39. datahub/utilities/file_backed_collections.py +2 -2
  40. datahub/utilities/is_pytest.py +3 -2
  41. datahub/utilities/logging_manager.py +22 -6
  42. datahub/utilities/sample_data.py +5 -4
  43. datahub/emitter/sql_parsing_builder.py +0 -306
  44. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/WHEEL +0 -0
  45. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/entry_points.txt +0 -0
  46. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/licenses/LICENSE +0 -0
  47. {acryl_datahub-1.3.0.1rc2.dist-info → acryl_datahub-1.3.0.1rc3.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,6 @@ from datahub.emitter.mcp_builder import (
30
30
  add_entity_to_container,
31
31
  gen_containers,
32
32
  )
33
- from datahub.emitter.sql_parsing_builder import SqlParsingBuilder
34
33
  from datahub.ingestion.api.common import PipelineContext
35
34
  from datahub.ingestion.api.decorators import (
36
35
  SupportStatus,
@@ -142,11 +141,7 @@ from datahub.metadata.schema_classes import (
142
141
  from datahub.metadata.urns import MlModelGroupUrn, MlModelUrn, TagUrn
143
142
  from datahub.sdk import MLModel, MLModelGroup
144
143
  from datahub.sql_parsing.schema_resolver import SchemaResolver
145
- from datahub.sql_parsing.sqlglot_lineage import (
146
- SqlParsingResult,
147
- sqlglot_lineage,
148
- view_definition_lineage_helper,
149
- )
144
+ from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator
150
145
  from datahub.utilities.file_backed_collections import FileBackedDict
151
146
  from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
152
147
  from datahub.utilities.registries.domain_registry import DomainRegistry
@@ -199,6 +194,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
199
194
  platform_resource_repository: Optional[UnityCatalogPlatformResourceRepository] = (
200
195
  None
201
196
  )
197
+ sql_parsing_aggregator: Optional[SqlParsingAggregator] = None
202
198
 
203
199
  def get_report(self) -> UnityCatalogReport:
204
200
  return self.report
@@ -218,6 +214,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
218
214
  report=self.report,
219
215
  hive_metastore_proxy=self.hive_metastore_proxy,
220
216
  lineage_data_source=config.lineage_data_source,
217
+ usage_data_source=config.usage_data_source,
221
218
  databricks_api_page_size=config.databricks_api_page_size,
222
219
  )
223
220
 
@@ -244,9 +241,6 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
244
241
  self.table_refs: Set[TableReference] = set()
245
242
  self.view_refs: Set[TableReference] = set()
246
243
  self.notebooks: FileBackedDict[Notebook] = FileBackedDict()
247
- self.view_definitions: FileBackedDict[Tuple[TableReference, str]] = (
248
- FileBackedDict()
249
- )
250
244
 
251
245
  # Global map of tables, for profiling
252
246
  self.tables: FileBackedDict[Table] = FileBackedDict()
@@ -290,6 +284,17 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
290
284
  platform_instance=self.config.platform_instance,
291
285
  env=self.config.env,
292
286
  )
287
+ self.sql_parsing_aggregator = SqlParsingAggregator(
288
+ platform=self.platform,
289
+ platform_instance=self.config.platform_instance,
290
+ env=self.config.env,
291
+ schema_resolver=self.sql_parser_schema_resolver,
292
+ generate_lineage=True,
293
+ generate_queries=False,
294
+ generate_usage_statistics=False,
295
+ generate_operations=False,
296
+ )
297
+ self.report.sql_aggregator = self.sql_parsing_aggregator.report
293
298
  except Exception as e:
294
299
  logger.debug("Exception", exc_info=True)
295
300
  self.warn(
@@ -629,8 +634,13 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
629
634
  self.sql_parser_schema_resolver.add_schema_metadata(
630
635
  dataset_urn, schema_metadata
631
636
  )
632
- if table.view_definition:
633
- self.view_definitions[dataset_urn] = (table.ref, table.view_definition)
637
+ if table.view_definition and self.sql_parsing_aggregator:
638
+ self.sql_parsing_aggregator.add_view_definition(
639
+ view_urn=dataset_urn,
640
+ view_definition=table.view_definition,
641
+ default_db=table.ref.catalog,
642
+ default_schema=table.ref.schema,
643
+ )
634
644
 
635
645
  if (
636
646
  table_props.customProperties.get("table_type")
@@ -1334,75 +1344,22 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
1334
1344
  )
1335
1345
  ]
1336
1346
 
1337
- def _run_sql_parser(
1338
- self, view_ref: TableReference, query: str, schema_resolver: SchemaResolver
1339
- ) -> Optional[SqlParsingResult]:
1340
- raw_lineage = sqlglot_lineage(
1341
- query,
1342
- schema_resolver=schema_resolver,
1343
- default_db=view_ref.catalog,
1344
- default_schema=view_ref.schema,
1345
- )
1346
- view_urn = self.gen_dataset_urn(view_ref)
1347
-
1348
- if raw_lineage.debug_info.table_error:
1349
- logger.debug(
1350
- f"Failed to parse lineage for view {view_ref}: "
1351
- f"{raw_lineage.debug_info.table_error}"
1352
- )
1353
- self.report.num_view_definitions_failed_parsing += 1
1354
- self.report.view_definitions_parsing_failures.append(
1355
- f"Table-level sql parsing error for view {view_ref}: {raw_lineage.debug_info.table_error}"
1356
- )
1357
- return None
1358
-
1359
- elif raw_lineage.debug_info.column_error:
1360
- self.report.num_view_definitions_failed_column_parsing += 1
1361
- self.report.view_definitions_parsing_failures.append(
1362
- f"Column-level sql parsing error for view {view_ref}: {raw_lineage.debug_info.column_error}"
1363
- )
1364
- else:
1365
- self.report.num_view_definitions_parsed += 1
1366
- if raw_lineage.out_tables != [view_urn]:
1367
- self.report.num_view_definitions_view_urn_mismatch += 1
1368
- return view_definition_lineage_helper(raw_lineage, view_urn)
1369
-
1370
1347
  def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
1371
1348
  if not (
1372
1349
  self.config.include_hive_metastore
1373
1350
  and self.config.include_table_lineage
1374
- and self.sql_parser_schema_resolver
1351
+ and self.sql_parsing_aggregator
1375
1352
  ):
1376
1353
  return
1377
- # This is only used for parsing view lineage. Usage, Operations are emitted elsewhere
1378
- builder = SqlParsingBuilder(
1379
- generate_lineage=True,
1380
- generate_usage_statistics=False,
1381
- generate_operations=False,
1382
- )
1383
- for dataset_name in self.view_definitions:
1384
- view_ref, view_definition = self.view_definitions[dataset_name]
1385
- result = self._run_sql_parser(
1386
- view_ref,
1387
- view_definition,
1388
- self.sql_parser_schema_resolver,
1389
- )
1390
- if result and result.out_tables:
1391
- # This does not yield any workunits but we use
1392
- # yield here to execute this method
1393
- yield from builder.process_sql_parsing_result(
1394
- result=result,
1395
- query=view_definition,
1396
- is_view_ddl=True,
1397
- include_column_lineage=self.config.include_view_column_lineage,
1398
- )
1399
- yield from builder.gen_workunits()
1354
+
1355
+ for mcp in self.sql_parsing_aggregator.gen_metadata():
1356
+ yield mcp.as_workunit()
1400
1357
 
1401
1358
  def close(self):
1402
1359
  if self.hive_metastore_proxy:
1403
1360
  self.hive_metastore_proxy.close()
1404
- if self.view_definitions:
1405
- self.view_definitions.close()
1361
+ if self.sql_parsing_aggregator:
1362
+ self.sql_parsing_aggregator.close()
1406
1363
  if self.sql_parser_schema_resolver:
1407
1364
  self.sql_parser_schema_resolver.close()
1408
1365
 
@@ -11,7 +11,10 @@ from databricks.sdk.service.sql import QueryStatementType
11
11
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
12
12
  from datahub.ingestion.api.source_helpers import auto_empty_dataset_usage_statistics
13
13
  from datahub.ingestion.api.workunit import MetadataWorkUnit
14
- from datahub.ingestion.source.unity.config import UnityCatalogSourceConfig
14
+ from datahub.ingestion.source.unity.config import (
15
+ UnityCatalogSourceConfig,
16
+ UsageDataSource,
17
+ )
15
18
  from datahub.ingestion.source.unity.proxy import UnityCatalogApiProxy
16
19
  from datahub.ingestion.source.unity.proxy_types import (
17
20
  OPERATION_STATEMENT_TYPES,
@@ -164,11 +167,50 @@ class UnityCatalogUsageExtractor:
164
167
  aspect=operation_aspect,
165
168
  ).as_workunit()
166
169
 
170
+ def _validate_usage_data_source_config(self) -> None:
171
+ """Validate usage data source configuration before execution."""
172
+ usage_data_source = self.config.usage_data_source
173
+
174
+ if (
175
+ usage_data_source == UsageDataSource.SYSTEM_TABLES
176
+ and not self.proxy.warehouse_id
177
+ ):
178
+ raise ValueError(
179
+ "usage_data_source is set to SYSTEM_TABLES but warehouse_id is not configured. "
180
+ "Either set warehouse_id or use AUTO/API mode."
181
+ )
182
+
167
183
  def _get_queries(self) -> Iterable[Query]:
168
184
  try:
169
- yield from self.proxy.query_history(
170
- self.config.start_time, self.config.end_time
171
- )
185
+ self._validate_usage_data_source_config()
186
+ usage_data_source = self.config.usage_data_source
187
+
188
+ if usage_data_source == UsageDataSource.AUTO:
189
+ if self.proxy.warehouse_id:
190
+ logger.info(
191
+ "Using system tables for usage query history (AUTO mode)"
192
+ )
193
+ yield from self.proxy.get_query_history_via_system_tables(
194
+ self.config.start_time, self.config.end_time
195
+ )
196
+ else:
197
+ logger.info(
198
+ "Using API for usage query history (AUTO mode, no warehouse)"
199
+ )
200
+ yield from self.proxy.query_history(
201
+ self.config.start_time, self.config.end_time
202
+ )
203
+ elif usage_data_source == UsageDataSource.SYSTEM_TABLES:
204
+ logger.info("Using system tables for usage query history (forced)")
205
+ yield from self.proxy.get_query_history_via_system_tables(
206
+ self.config.start_time, self.config.end_time
207
+ )
208
+ elif usage_data_source == UsageDataSource.API:
209
+ logger.info("Using API for usage query history (forced)")
210
+ yield from self.proxy.query_history(
211
+ self.config.start_time, self.config.end_time
212
+ )
213
+
172
214
  except Exception as e:
173
215
  logger.warning("Error getting queries", exc_info=True)
174
216
  self.report.report_warning("get-queries", str(e))
@@ -4,7 +4,6 @@ import enum
4
4
  import functools
5
5
  import json
6
6
  import logging
7
- import os
8
7
  import pathlib
9
8
  import tempfile
10
9
  import uuid
@@ -14,10 +13,10 @@ from typing import Callable, Dict, Iterable, List, Optional, Set, Union, cast
14
13
 
15
14
  import datahub.emitter.mce_builder as builder
16
15
  import datahub.metadata.schema_classes as models
16
+ from datahub.configuration.env_vars import get_sql_agg_query_log
17
17
  from datahub.configuration.time_window_config import get_time_bucket
18
18
  from datahub.emitter.mce_builder import get_sys_time, make_ts_millis
19
19
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
20
- from datahub.emitter.sql_parsing_builder import compute_upstream_fields
21
20
  from datahub.ingestion.api.closeable import Closeable
22
21
  from datahub.ingestion.api.report import Report
23
22
  from datahub.ingestion.api.workunit import MetadataWorkUnit
@@ -84,7 +83,7 @@ class QueryLogSetting(enum.Enum):
84
83
  _DEFAULT_USER_URN = CorpUserUrn("_ingestion")
85
84
  _MISSING_SESSION_ID = "__MISSING_SESSION_ID"
86
85
  _DEFAULT_QUERY_LOG_SETTING = QueryLogSetting[
87
- os.getenv("DATAHUB_SQL_AGG_QUERY_LOG") or QueryLogSetting.DISABLED.name
86
+ get_sql_agg_query_log() or QueryLogSetting.DISABLED.name
88
87
  ]
89
88
  MAX_UPSTREAM_TABLES_COUNT = 300
90
89
  MAX_FINEGRAINEDLINEAGE_COUNT = 2000
@@ -868,7 +867,7 @@ class SqlParsingAggregator(Closeable):
868
867
  downstream=parsed.out_tables[0] if parsed.out_tables else None,
869
868
  column_lineage=parsed.column_lineage,
870
869
  # TODO: We need a full list of columns referenced, not just the out tables.
871
- column_usage=compute_upstream_fields(parsed),
870
+ column_usage=self._compute_upstream_fields(parsed),
872
871
  inferred_schema=infer_output_schema(parsed),
873
872
  confidence_score=parsed.debug_info.confidence,
874
873
  extra_info=observed.extra_info,
@@ -1157,7 +1156,7 @@ class SqlParsingAggregator(Closeable):
1157
1156
  actor=None,
1158
1157
  upstreams=parsed.in_tables,
1159
1158
  column_lineage=parsed.column_lineage or [],
1160
- column_usage=compute_upstream_fields(parsed),
1159
+ column_usage=self._compute_upstream_fields(parsed),
1161
1160
  confidence_score=parsed.debug_info.confidence,
1162
1161
  )
1163
1162
  )
@@ -1741,6 +1740,16 @@ class SqlParsingAggregator(Closeable):
1741
1740
 
1742
1741
  return resolved_query
1743
1742
 
1743
+ @staticmethod
1744
+ def _compute_upstream_fields(
1745
+ result: SqlParsingResult,
1746
+ ) -> Dict[UrnStr, Set[UrnStr]]:
1747
+ upstream_fields: Dict[UrnStr, Set[UrnStr]] = defaultdict(set)
1748
+ for cl in result.column_lineage or []:
1749
+ for upstream in cl.upstreams:
1750
+ upstream_fields[upstream.table].add(upstream.column)
1751
+ return upstream_fields
1752
+
1744
1753
  def _gen_usage_statistics_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
1745
1754
  if not self._usage_aggregator:
1746
1755
  return
@@ -691,6 +691,13 @@ def _column_level_lineage(
691
691
  select_statement=select_statement,
692
692
  )
693
693
 
694
+ # Handle VALUES expressions separately - they have no upstream tables and no column lineage
695
+ if isinstance(select_statement, sqlglot.exp.Values):
696
+ return _ColumnLineageWithDebugInfo(
697
+ column_lineage=[],
698
+ select_statement=select_statement,
699
+ )
700
+
694
701
  assert isinstance(select_statement, _SupportedColumnLineageTypesTuple)
695
702
  try:
696
703
  root_scope = sqlglot.optimizer.build_scope(select_statement)
@@ -16,6 +16,11 @@ from datahub._version import __version__, nice_version_name
16
16
  from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER
17
17
  from datahub.cli.env_utils import get_boolean_env_variable
18
18
  from datahub.configuration.common import ExceptionWithProps
19
+ from datahub.configuration.env_vars import (
20
+ get_sentry_dsn,
21
+ get_sentry_environment,
22
+ get_telemetry_timeout,
23
+ )
19
24
  from datahub.metadata.schema_classes import _custom_package_path
20
25
  from datahub.utilities.perf_timer import PerfTimer
21
26
 
@@ -97,11 +102,11 @@ if any(var in os.environ for var in CI_ENV_VARS):
97
102
  if _custom_package_path:
98
103
  ENV_ENABLED = False
99
104
 
100
- TIMEOUT = int(os.environ.get("DATAHUB_TELEMETRY_TIMEOUT", "10"))
105
+ TIMEOUT = int(get_telemetry_timeout())
101
106
  MIXPANEL_ENDPOINT = "track.datahubproject.io/mp"
102
107
  MIXPANEL_TOKEN = "5ee83d940754d63cacbf7d34daa6f44a"
103
- SENTRY_DSN: Optional[str] = os.environ.get("SENTRY_DSN", None)
104
- SENTRY_ENVIRONMENT: str = os.environ.get("SENTRY_ENVIRONMENT", "dev")
108
+ SENTRY_DSN: Optional[str] = get_sentry_dsn()
109
+ SENTRY_ENVIRONMENT: str = get_sentry_environment()
105
110
 
106
111
 
107
112
  def _default_global_properties() -> Dict[str, Any]:
@@ -1,7 +1,6 @@
1
1
  import collections
2
2
  import gzip
3
3
  import logging
4
- import os
5
4
  import pathlib
6
5
  import pickle
7
6
  import shutil
@@ -28,6 +27,7 @@ from typing import (
28
27
  Union,
29
28
  )
30
29
 
30
+ from datahub.configuration.env_vars import get_override_sqlite_version_req
31
31
  from datahub.ingestion.api.closeable import Closeable
32
32
  from datahub.utilities.sentinels import Unset, unset
33
33
 
@@ -36,7 +36,7 @@ logger: logging.Logger = logging.getLogger(__name__)
36
36
 
37
37
  def _get_sqlite_version_override() -> bool:
38
38
  """Check if SQLite version requirement should be overridden at runtime."""
39
- override_str = os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
39
+ override_str = get_override_sqlite_version_req()
40
40
  return bool(override_str and override_str.lower() != "false")
41
41
 
42
42
 
@@ -1,6 +1,7 @@
1
- import os
2
1
  import sys
3
2
 
3
+ from datahub.configuration.env_vars import get_test_mode
4
+
4
5
 
5
6
  def is_pytest_running() -> bool:
6
- return "pytest" in sys.modules and os.environ.get("DATAHUB_TEST_MODE") == "1"
7
+ return "pytest" in sys.modules and get_test_mode() == "1"
@@ -15,13 +15,13 @@ import collections
15
15
  import contextlib
16
16
  import itertools
17
17
  import logging
18
- import os
19
18
  import pathlib
20
19
  import sys
21
20
  from typing import Deque, Iterator, Optional
22
21
 
23
22
  import click
24
23
 
24
+ from datahub.configuration.env_vars import get_no_color, get_suppress_logging_manager
25
25
  from datahub.utilities.tee_io import TeeIO
26
26
 
27
27
  BASE_LOGGING_FORMAT = (
@@ -38,7 +38,7 @@ IN_MEMORY_LOG_BUFFER_SIZE = 2000 # lines
38
38
  IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH = 2000 # characters
39
39
 
40
40
 
41
- NO_COLOR = os.environ.get("NO_COLOR", False)
41
+ NO_COLOR = get_no_color()
42
42
 
43
43
 
44
44
  def extract_name_from_filename(filename: str, fallback_name: str) -> str:
@@ -179,6 +179,18 @@ class _LogBuffer:
179
179
  return text
180
180
 
181
181
 
182
+ class _ResilientStreamHandler(logging.StreamHandler):
183
+ """StreamHandler that gracefully handles closed streams."""
184
+
185
+ def emit(self, record: logging.LogRecord) -> None:
186
+ try:
187
+ super().emit(record)
188
+ except (ValueError, OSError):
189
+ # Stream was closed (e.g., during pytest teardown)
190
+ # Silently ignore to prevent test failures
191
+ pass
192
+
193
+
182
194
  class _BufferLogHandler(logging.Handler):
183
195
  def __init__(self, storage: _LogBuffer) -> None:
184
196
  super().__init__()
@@ -201,7 +213,11 @@ class _BufferLogHandler(logging.Handler):
201
213
  def _remove_all_handlers(logger: logging.Logger) -> None:
202
214
  for handler in logger.handlers[:]:
203
215
  logger.removeHandler(handler)
204
- handler.close()
216
+ try:
217
+ handler.close()
218
+ except (ValueError, OSError):
219
+ # Handler stream may already be closed (e.g., during pytest teardown)
220
+ pass
205
221
 
206
222
 
207
223
  _log_buffer = _LogBuffer(maxlen=IN_MEMORY_LOG_BUFFER_SIZE)
@@ -219,14 +235,14 @@ _default_formatter = logging.Formatter(BASE_LOGGING_FORMAT)
219
235
  def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[None]:
220
236
  _log_buffer.clear()
221
237
 
222
- if os.environ.get("DATAHUB_SUPPRESS_LOGGING_MANAGER") == "1":
238
+ if get_suppress_logging_manager() == "1":
223
239
  # If we're running in pytest, we don't want to configure logging.
224
240
  yield
225
241
  return
226
242
 
227
243
  with contextlib.ExitStack() as stack:
228
244
  # Create stdout handler.
229
- stream_handler = logging.StreamHandler()
245
+ stream_handler = _ResilientStreamHandler()
230
246
  stream_handler.addFilter(_DatahubLogFilter(debug=debug))
231
247
  stream_handler.setFormatter(_stream_formatter)
232
248
 
@@ -237,7 +253,7 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N
237
253
  tee = TeeIO(sys.stdout, file)
238
254
  stack.enter_context(contextlib.redirect_stdout(tee)) # type: ignore
239
255
 
240
- file_handler = logging.StreamHandler(file)
256
+ file_handler = _ResilientStreamHandler(file)
241
257
  file_handler.addFilter(_DatahubLogFilter(debug=True))
242
258
  file_handler.setFormatter(_default_formatter)
243
259
  else:
@@ -1,12 +1,13 @@
1
- import os
2
1
  import pathlib
3
2
  import tempfile
4
3
 
5
4
  import requests
6
5
 
7
- DOCKER_COMPOSE_BASE = os.getenv(
8
- "DOCKER_COMPOSE_BASE",
9
- "https://raw.githubusercontent.com/datahub-project/datahub/master",
6
+ from datahub.configuration.env_vars import get_docker_compose_base
7
+
8
+ DOCKER_COMPOSE_BASE = (
9
+ get_docker_compose_base()
10
+ or "https://raw.githubusercontent.com/datahub-project/datahub/master"
10
11
  )
11
12
  BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
12
13
  BOOTSTRAP_MCES_URL = f"{DOCKER_COMPOSE_BASE}/{BOOTSTRAP_MCES_FILE}"