acryl-datahub 1.0.0.3rc9__py3-none-any.whl → 1.0.0.3rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/METADATA +2524 -2471
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/RECORD +87 -87
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/datajob/dataflow.py +3 -3
- datahub/api/entities/forms/forms.py +34 -34
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/operation.py +4 -4
- datahub/cli/check_cli.py +3 -2
- datahub/cli/config_utils.py +2 -2
- datahub/cli/delete_cli.py +6 -5
- datahub/cli/docker_cli.py +2 -2
- datahub/cli/exists_cli.py +2 -1
- datahub/cli/get_cli.py +2 -1
- datahub/cli/iceberg_cli.py +6 -5
- datahub/cli/ingest_cli.py +9 -6
- datahub/cli/migrate.py +4 -3
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +3 -2
- datahub/cli/specific/assertions_cli.py +2 -1
- datahub/cli/specific/datacontract_cli.py +3 -2
- datahub/cli/specific/dataproduct_cli.py +10 -9
- datahub/cli/specific/dataset_cli.py +4 -3
- datahub/cli/specific/forms_cli.py +2 -1
- datahub/cli/specific/group_cli.py +2 -1
- datahub/cli/specific/structuredproperties_cli.py +4 -3
- datahub/cli/specific/user_cli.py +2 -1
- datahub/cli/state_cli.py +2 -1
- datahub/cli/timeline_cli.py +2 -1
- datahub/configuration/source_common.py +1 -1
- datahub/emitter/request_helper.py +116 -3
- datahub/emitter/rest_emitter.py +163 -93
- datahub/entrypoints.py +2 -1
- datahub/ingestion/api/source.py +2 -5
- datahub/ingestion/glossary/classification_mixin.py +4 -2
- datahub/ingestion/graph/client.py +16 -7
- datahub/ingestion/graph/config.py +14 -0
- datahub/ingestion/graph/filters.py +1 -1
- datahub/ingestion/run/pipeline.py +3 -2
- datahub/ingestion/run/pipeline_config.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +5 -6
- datahub/ingestion/source/apply/datahub_apply.py +2 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
- datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
- datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
- datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
- datahub/ingestion/source/dbt/dbt_common.py +10 -2
- datahub/ingestion/source/dbt/dbt_core.py +82 -42
- datahub/ingestion/source/feast.py +4 -4
- datahub/ingestion/source/ge_data_profiler.py +2 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +7 -1
- datahub/ingestion/source/metadata/lineage.py +2 -1
- datahub/ingestion/source/mode.py +74 -28
- datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
- datahub/ingestion/source/powerbi/config.py +1 -1
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/redshift/usage.py +10 -9
- datahub/ingestion/source/sql/clickhouse.py +5 -1
- datahub/ingestion/source/sql/druid.py +7 -2
- datahub/ingestion/source/sql/oracle.py +6 -2
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
- datahub/integrations/assertion/common.py +3 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +490 -490
- datahub/metadata/_urns/urn_defs.py +1786 -1786
- datahub/metadata/schema.avsc +17364 -16988
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/main_client.py +2 -2
- datahub/secret/datahub_secret_store.py +2 -1
- datahub/telemetry/telemetry.py +2 -2
- datahub/testing/check_imports.py +1 -1
- datahub/upgrade/upgrade.py +10 -12
- datahub/utilities/logging_manager.py +8 -1
- datahub/utilities/server_config_util.py +378 -10
- datahub/utilities/sqlalchemy_query_combiner.py +4 -5
- datahub/utilities/urn_encoder.py +1 -1
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/top_level.txt +0 -0
|
@@ -20,16 +20,16 @@ from datahub.utilities._custom_package_loader import get_custom_models_package
|
|
|
20
20
|
_custom_package_path = get_custom_models_package()
|
|
21
21
|
|
|
22
22
|
if TYPE_CHECKING or not _custom_package_path:
|
|
23
|
-
from .
|
|
23
|
+
from ._internal_schema_classes import *
|
|
24
24
|
|
|
25
25
|
# Required explicitly because __all__ doesn't include _ prefixed names.
|
|
26
|
-
from .
|
|
26
|
+
from ._internal_schema_classes import __SCHEMA_TYPES
|
|
27
27
|
|
|
28
28
|
if IS_SPHINX_BUILD:
|
|
29
29
|
# Set __module__ to the current module so that Sphinx will document the
|
|
30
30
|
# classes as belonging to this module instead of the custom package.
|
|
31
31
|
for _cls in list(globals().values()):
|
|
32
|
-
if hasattr(_cls, "__module__") and "datahub.metadata.
|
|
32
|
+
if hasattr(_cls, "__module__") and "datahub.metadata._internal_schema_classes" in _cls.__module__:
|
|
33
33
|
_cls.__module__ = __name__
|
|
34
34
|
else:
|
|
35
35
|
_custom_package = importlib.import_module(_custom_package_path)
|
|
@@ -15,10 +15,10 @@ import pathlib
|
|
|
15
15
|
def _load_schema(schema_name: str) -> str:
|
|
16
16
|
return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
|
|
17
17
|
|
|
18
|
-
def getMetadataChangeProposalSchema() -> str:
|
|
19
|
-
return _load_schema("MetadataChangeProposal")
|
|
20
|
-
|
|
21
18
|
def getMetadataChangeEventSchema() -> str:
|
|
22
19
|
return _load_schema("MetadataChangeEvent")
|
|
23
20
|
|
|
21
|
+
def getMetadataChangeProposalSchema() -> str:
|
|
22
|
+
return _load_schema("MetadataChangeProposal")
|
|
23
|
+
|
|
24
24
|
# fmt: on
|
datahub/sdk/main_client.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import Optional, overload
|
|
|
4
4
|
|
|
5
5
|
from datahub.errors import SdkUsageError
|
|
6
6
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
7
|
-
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
7
|
+
from datahub.ingestion.graph.config import ClientMode, DatahubClientConfig
|
|
8
8
|
from datahub.sdk.entity_client import EntityClient
|
|
9
9
|
from datahub.sdk.lineage_client import LineageClient
|
|
10
10
|
from datahub.sdk.resolver_client import ResolverClient
|
|
@@ -84,7 +84,7 @@ class DataHubClient:
|
|
|
84
84
|
# Inspired by the DockerClient.from_env() method.
|
|
85
85
|
# TODO: This one also reads from ~/.datahubenv, so the "from_env" name might be a bit confusing.
|
|
86
86
|
# That file is part of the "environment", but is not a traditional "env variable".
|
|
87
|
-
graph = get_default_graph()
|
|
87
|
+
graph = get_default_graph(ClientMode.SDK)
|
|
88
88
|
|
|
89
89
|
return cls(graph=graph)
|
|
90
90
|
|
|
@@ -3,7 +3,8 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, validator
|
|
5
5
|
|
|
6
|
-
from datahub.ingestion.graph.client import
|
|
6
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
7
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
7
8
|
from datahub.secret.datahub_secrets_client import DataHubSecretsClient
|
|
8
9
|
from datahub.secret.secret_store import SecretStore
|
|
9
10
|
|
datahub/telemetry/telemetry.py
CHANGED
|
@@ -352,10 +352,10 @@ class Telemetry:
|
|
|
352
352
|
}
|
|
353
353
|
else:
|
|
354
354
|
return {
|
|
355
|
-
"server_type": server.server_config.get("datahub", {}).get(
|
|
355
|
+
"server_type": server.server_config.raw_config.get("datahub", {}).get(
|
|
356
356
|
"serverType", "missing"
|
|
357
357
|
),
|
|
358
|
-
"server_version": server.server_config.get("versions", {})
|
|
358
|
+
"server_version": server.server_config.raw_config.get("versions", {})
|
|
359
359
|
.get("acryldata/datahub", {})
|
|
360
360
|
.get("version", "missing"),
|
|
361
361
|
"server_id": server.server_id or "missing",
|
datahub/testing/check_imports.py
CHANGED
|
@@ -9,7 +9,7 @@ def ensure_no_indirect_model_imports(dirs: List[pathlib.Path]) -> None:
|
|
|
9
9
|
# If our needs become more complex, we should move to a proper linter.
|
|
10
10
|
denied_imports = {
|
|
11
11
|
"src.": "datahub.*",
|
|
12
|
-
"datahub.metadata.
|
|
12
|
+
"datahub.metadata._internal_schema_classes": "datahub.metadata.schema_classes",
|
|
13
13
|
"datahub.metadata._urns": "datahub.metadata.urns",
|
|
14
14
|
}
|
|
15
15
|
ignored_files = {
|
datahub/upgrade/upgrade.py
CHANGED
|
@@ -13,7 +13,9 @@ from pydantic import BaseModel
|
|
|
13
13
|
from datahub._version import __version__
|
|
14
14
|
from datahub.cli.config_utils import load_client_config
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
16
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
16
17
|
from datahub.utilities.perf_timer import PerfTimer
|
|
18
|
+
from datahub.utilities.server_config_util import RestServiceConfig
|
|
17
19
|
|
|
18
20
|
log = logging.getLogger(__name__)
|
|
19
21
|
|
|
@@ -109,7 +111,7 @@ async def get_github_stats():
|
|
|
109
111
|
return (latest_server_version, latest_server_date)
|
|
110
112
|
|
|
111
113
|
|
|
112
|
-
async def get_server_config(gms_url: str, token: Optional[str]) ->
|
|
114
|
+
async def get_server_config(gms_url: str, token: Optional[str]) -> RestServiceConfig:
|
|
113
115
|
import aiohttp
|
|
114
116
|
|
|
115
117
|
headers = {
|
|
@@ -124,7 +126,7 @@ async def get_server_config(gms_url: str, token: Optional[str]) -> dict:
|
|
|
124
126
|
config_endpoint = f"{gms_url}/config"
|
|
125
127
|
async with session.get(config_endpoint, headers=headers) as dh_response:
|
|
126
128
|
dh_response_json = await dh_response.json()
|
|
127
|
-
return dh_response_json
|
|
129
|
+
return RestServiceConfig(raw_config=dh_response_json)
|
|
128
130
|
|
|
129
131
|
|
|
130
132
|
async def get_server_version_stats(
|
|
@@ -132,11 +134,12 @@ async def get_server_version_stats(
|
|
|
132
134
|
) -> Tuple[Optional[str], Optional[Version], Optional[datetime]]:
|
|
133
135
|
import aiohttp
|
|
134
136
|
|
|
135
|
-
server_config = None
|
|
137
|
+
server_config: Optional[RestServiceConfig] = None
|
|
136
138
|
if not server:
|
|
137
139
|
try:
|
|
138
140
|
# let's get the server from the cli config
|
|
139
141
|
client_config = load_client_config()
|
|
142
|
+
client_config.client_mode = ClientMode.CLI
|
|
140
143
|
host = client_config.server
|
|
141
144
|
token = client_config.token
|
|
142
145
|
server_config = await get_server_config(host, token)
|
|
@@ -150,15 +153,10 @@ async def get_server_version_stats(
|
|
|
150
153
|
server_version: Optional[Version] = None
|
|
151
154
|
current_server_release_date = None
|
|
152
155
|
if server_config:
|
|
153
|
-
server_version_string =
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
)
|
|
158
|
-
commit_hash = (
|
|
159
|
-
server_config.get("versions", {}).get("acryldata/datahub", {}).get("commit")
|
|
160
|
-
)
|
|
161
|
-
server_type = server_config.get("datahub", {}).get("serverType", "unknown")
|
|
156
|
+
server_version_string = server_config.service_version
|
|
157
|
+
commit_hash = server_config.commit_hash
|
|
158
|
+
server_type = server_config.server_type
|
|
159
|
+
|
|
162
160
|
if server_type == "quickstart" and commit_hash:
|
|
163
161
|
async with aiohttp.ClientSession(
|
|
164
162
|
headers={"Accept": "application/vnd.github.v3+json"}
|
|
@@ -161,6 +161,7 @@ class _LogBuffer:
|
|
|
161
161
|
self._buffer: Deque[str] = collections.deque(maxlen=maxlen)
|
|
162
162
|
|
|
163
163
|
def write(self, line: str) -> None:
|
|
164
|
+
# We do not expect `line` to have a trailing newline.
|
|
164
165
|
if len(line) > IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH:
|
|
165
166
|
line = line[:IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH] + "[truncated]"
|
|
166
167
|
|
|
@@ -188,7 +189,13 @@ class _BufferLogHandler(logging.Handler):
|
|
|
188
189
|
message = self.format(record)
|
|
189
190
|
except TypeError as e:
|
|
190
191
|
message = f"Error formatting log message: {e}\nMessage: {record.msg}, Args: {record.args}"
|
|
191
|
-
|
|
192
|
+
|
|
193
|
+
# For exception stack traces, the message is split over multiple lines,
|
|
194
|
+
# but we store it as a single string. Because we truncate based on line
|
|
195
|
+
# length, it's better for us to split it into multiple lines so that we
|
|
196
|
+
# don't lose any information on deeper stack traces.
|
|
197
|
+
for line in message.split("\n"):
|
|
198
|
+
self._storage.write(line)
|
|
192
199
|
|
|
193
200
|
|
|
194
201
|
def _remove_all_handlers(logger: logging.Logger) -> None:
|
|
@@ -1,23 +1,391 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import (
|
|
6
|
+
Any,
|
|
7
|
+
Dict,
|
|
8
|
+
Optional,
|
|
9
|
+
Tuple,
|
|
10
|
+
Union,
|
|
11
|
+
)
|
|
2
12
|
|
|
13
|
+
import requests
|
|
14
|
+
|
|
15
|
+
from datahub.configuration.common import (
|
|
16
|
+
ConfigurationError,
|
|
17
|
+
)
|
|
3
18
|
from datahub.telemetry.telemetry import suppress_telemetry
|
|
4
19
|
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
5
22
|
# Only to be written to for logging server related information
|
|
6
23
|
global_debug: Dict[str, Any] = {}
|
|
7
24
|
|
|
8
25
|
|
|
9
|
-
def
|
|
26
|
+
def get_gms_config() -> Dict:
|
|
27
|
+
return global_debug.get("gms_config", {})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ServiceFeature(Enum):
|
|
31
|
+
"""
|
|
32
|
+
Enum representing supported features in the REST service.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
OPEN_API_SDK = "openapi_sdk"
|
|
36
|
+
API_TRACING = "api_tracing"
|
|
37
|
+
NO_CODE = "no_code"
|
|
38
|
+
STATEFUL_INGESTION = "stateful_ingestion"
|
|
39
|
+
IMPACT_ANALYSIS = "impact_analysis"
|
|
40
|
+
PATCH_CAPABLE = "patch_capable"
|
|
41
|
+
CLI_TELEMETRY = "cli_telemetry"
|
|
42
|
+
DATAHUB_CLOUD = "datahub_cloud"
|
|
43
|
+
# Add more features as needed
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
_REQUIRED_VERSION_OPENAPI_TRACING = {
|
|
47
|
+
"acryl": (
|
|
48
|
+
0,
|
|
49
|
+
3,
|
|
50
|
+
11,
|
|
51
|
+
0,
|
|
52
|
+
), # Requires v0.3.11.0 or higher for acryl versions
|
|
53
|
+
"cloud": (0, 3, 11, 0), # Special case for '-cloud' suffix
|
|
54
|
+
"any_suffix": (0, 3, 11, 0), # Generic requirement for any other suffix
|
|
55
|
+
"none": (1, 0, 1, 0), # Requirement for versions without suffix
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class RestServiceConfig:
|
|
61
|
+
"""
|
|
62
|
+
A class to represent REST service configuration with semantic version parsing capabilities.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
session: Optional[requests.Session] = None
|
|
66
|
+
url: Optional[str] = None
|
|
67
|
+
raw_config: Dict[str, Any] = field(default_factory=dict)
|
|
68
|
+
_version_cache: Optional[Tuple[int, int, int, int]] = None
|
|
69
|
+
|
|
70
|
+
def fetch_config(self) -> Dict[str, Any]:
|
|
71
|
+
"""
|
|
72
|
+
Fetch configuration from the server if not already loaded.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The configuration dictionary
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ConfigurationError: If there's an error fetching or validating the configuration
|
|
79
|
+
"""
|
|
80
|
+
if not self.raw_config:
|
|
81
|
+
if self.session is None or self.url is None:
|
|
82
|
+
raise ConfigurationError(
|
|
83
|
+
"Session and URL are required to load configuration"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
response = self.session.get(self.url)
|
|
87
|
+
|
|
88
|
+
if response.status_code == 200:
|
|
89
|
+
config = response.json()
|
|
90
|
+
|
|
91
|
+
# Validate that we're connected to the correct service
|
|
92
|
+
if config.get("noCode") == "true":
|
|
93
|
+
self.raw_config = config
|
|
94
|
+
else:
|
|
95
|
+
raise ConfigurationError(
|
|
96
|
+
"You seem to have connected to the frontend service instead of the GMS endpoint. "
|
|
97
|
+
"The rest emitter should connect to DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms). "
|
|
98
|
+
"For Acryl users, the endpoint should be https://<name>.acryl.io/gms"
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
logger.debug(
|
|
102
|
+
f"Unable to connect to {self.url} with status_code: {response.status_code}. Response: {response.text}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if response.status_code == 401:
|
|
106
|
+
message = f"Unable to connect to {self.url} - got an authentication error: {response.text}."
|
|
107
|
+
else:
|
|
108
|
+
message = f"Unable to connect to {self.url} with status_code: {response.status_code}."
|
|
109
|
+
|
|
110
|
+
message += "\nPlease check your configuration and make sure you are talking to the DataHub GMS (usually <datahub-gms-host>:8080) or Frontend GMS API (usually <frontend>:9002/api/gms)."
|
|
111
|
+
raise ConfigurationError(message)
|
|
112
|
+
|
|
113
|
+
return self.raw_config
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def config(self) -> Dict[str, Any]:
|
|
117
|
+
"""
|
|
118
|
+
Get the full configuration dictionary, loading it if necessary.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
The configuration dictionary
|
|
122
|
+
"""
|
|
123
|
+
return self.fetch_config()
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def commit_hash(self) -> Optional[str]:
|
|
127
|
+
"""
|
|
128
|
+
Get the commit hash for the current version.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
The commit hash or None if not found
|
|
132
|
+
"""
|
|
133
|
+
versions = self.config.get("versions") or {}
|
|
134
|
+
datahub_info = versions.get("acryldata/datahub") or {}
|
|
135
|
+
return datahub_info.get("commit")
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def server_type(self) -> str:
|
|
139
|
+
"""
|
|
140
|
+
Get the server type.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
The server type or "unknown" if not found
|
|
144
|
+
"""
|
|
145
|
+
datahub = self.config.get("datahub") or {}
|
|
146
|
+
return datahub.get("serverType", "unknown")
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def service_version(self) -> Optional[str]:
|
|
150
|
+
"""
|
|
151
|
+
Get the raw service version string.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The version string or None if not found
|
|
155
|
+
"""
|
|
156
|
+
config = self.fetch_config()
|
|
157
|
+
versions = config.get("versions") or {}
|
|
158
|
+
datahub_info = versions.get("acryldata/datahub") or {}
|
|
159
|
+
return datahub_info.get("version")
|
|
160
|
+
|
|
161
|
+
def _parse_version(
|
|
162
|
+
self, version_str: Optional[str] = None
|
|
163
|
+
) -> Tuple[int, int, int, int]:
|
|
164
|
+
"""
|
|
165
|
+
Parse a semantic version string into its components, ignoring rc and suffixes.
|
|
166
|
+
Supports standard three-part versions (1.0.0) and four-part versions (1.0.0.1).
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
version_str: Version string to parse. If None, uses the service version.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple of (major, minor, patch, build) version numbers where build is 0 for three-part versions
|
|
173
|
+
|
|
174
|
+
Raises:
|
|
175
|
+
ValueError: If the version string cannot be parsed
|
|
176
|
+
"""
|
|
177
|
+
if version_str is None:
|
|
178
|
+
version_str = self.service_version
|
|
179
|
+
|
|
180
|
+
if not version_str:
|
|
181
|
+
return (0, 0, 0, 0)
|
|
182
|
+
|
|
183
|
+
# Remove 'v' prefix if present
|
|
184
|
+
if version_str.startswith("v"):
|
|
185
|
+
version_str = version_str[1:]
|
|
186
|
+
|
|
187
|
+
# Extract the semantic version part (before any rc or suffix)
|
|
188
|
+
# This pattern will match both three-part (1.0.0) and four-part (1.0.0.1) versions
|
|
189
|
+
match = re.match(r"(\d+)\.(\d+)\.(\d+)(?:\.(\d+))?(?:rc\d+|-.*)?", version_str)
|
|
190
|
+
if not match:
|
|
191
|
+
raise ValueError(f"Invalid version format: {version_str}")
|
|
192
|
+
|
|
193
|
+
major = int(match.group(1))
|
|
194
|
+
minor = int(match.group(2))
|
|
195
|
+
patch = int(match.group(3))
|
|
196
|
+
build = (
|
|
197
|
+
int(match.group(4)) if match.group(4) else 0
|
|
198
|
+
) # Default to 0 if not present
|
|
199
|
+
|
|
200
|
+
return (major, minor, patch, build)
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def parsed_version(self) -> Optional[Tuple[int, int, int, int]]:
|
|
204
|
+
"""
|
|
205
|
+
Get the parsed semantic version of the service.
|
|
206
|
+
Uses caching for efficiency.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Tuple of (major, minor, patch) version numbers
|
|
210
|
+
"""
|
|
211
|
+
if self._version_cache is None:
|
|
212
|
+
self._version_cache = self._parse_version()
|
|
213
|
+
return self._version_cache
|
|
214
|
+
|
|
215
|
+
def is_version_at_least(
|
|
216
|
+
self, major: int, minor: int = 0, patch: int = 0, build: int = 0
|
|
217
|
+
) -> bool:
|
|
218
|
+
"""
|
|
219
|
+
Check if the service version is at least the specified version.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
major: Major version to check against
|
|
223
|
+
minor: Minor version to check against
|
|
224
|
+
patch: Patch version to check against
|
|
225
|
+
build: Build version to check against (for four-part versions)
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
True if the service version is at least the specified version
|
|
229
|
+
"""
|
|
230
|
+
current_version = self.parsed_version or (0, 0, 0, 0)
|
|
231
|
+
requested_version = (major, minor, patch, build)
|
|
232
|
+
|
|
233
|
+
return current_version >= requested_version
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def is_no_code_enabled(self) -> bool:
|
|
237
|
+
"""
|
|
238
|
+
Check if noCode is enabled.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
True if noCode is set to "true"
|
|
242
|
+
"""
|
|
243
|
+
return self.config.get("noCode") == "true"
|
|
244
|
+
|
|
245
|
+
@property
|
|
246
|
+
def is_managed_ingestion_enabled(self) -> bool:
|
|
247
|
+
"""
|
|
248
|
+
Check if managedIngestion is enabled.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True if managedIngestion.enabled is True
|
|
252
|
+
"""
|
|
253
|
+
managed_ingestion = self.config.get("managedIngestion") or {}
|
|
254
|
+
return managed_ingestion.get("enabled", False)
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def is_datahub_cloud(self) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Check if DataHub Cloud is enabled.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
True if the server environment is not 'oss'
|
|
263
|
+
"""
|
|
264
|
+
datahub_config = self.config.get("datahub") or {}
|
|
265
|
+
server_env = datahub_config.get("serverEnv")
|
|
266
|
+
|
|
267
|
+
# Return False if serverEnv is None or empty string
|
|
268
|
+
if not server_env:
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
return server_env != "oss"
|
|
272
|
+
|
|
273
|
+
def supports_feature(self, feature: ServiceFeature) -> bool:
|
|
274
|
+
"""
|
|
275
|
+
Determines whether a specific feature is supported based on service version.
|
|
276
|
+
|
|
277
|
+
Version categorization follows these rules:
|
|
278
|
+
1. Has '-acryl' suffix (highest priority)
|
|
279
|
+
2. Has a specific known suffix (e.g. '-other')
|
|
280
|
+
3. Has some other suffix (catchall for any suffix)
|
|
281
|
+
4. No suffix
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
feature: Feature enum value to check
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Boolean indicating whether the feature is supported
|
|
288
|
+
"""
|
|
289
|
+
version = self.service_version
|
|
290
|
+
if not version:
|
|
291
|
+
return False
|
|
292
|
+
|
|
293
|
+
# Determine the suffix category
|
|
294
|
+
suffix_category = "none" # Default: no suffix
|
|
295
|
+
|
|
296
|
+
if "-" in version:
|
|
297
|
+
suffix = version.split("-", 1)[1]
|
|
298
|
+
|
|
299
|
+
if suffix == "acryl":
|
|
300
|
+
suffix_category = "acryl"
|
|
301
|
+
elif suffix == "cloud": # Example of a specific override
|
|
302
|
+
suffix_category = "cloud"
|
|
303
|
+
else:
|
|
304
|
+
suffix_category = "any_suffix" # Catchall for any other suffix
|
|
305
|
+
|
|
306
|
+
# Define feature requirements based on version scheme
|
|
307
|
+
# This can be expanded to include more features
|
|
308
|
+
feature_requirements = {
|
|
309
|
+
ServiceFeature.OPEN_API_SDK: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
310
|
+
ServiceFeature.API_TRACING: _REQUIRED_VERSION_OPENAPI_TRACING,
|
|
311
|
+
# Additional features can be defined here
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
# Special handling for features that rely on config flags instead of version
|
|
315
|
+
config_based_features = {
|
|
316
|
+
ServiceFeature.NO_CODE: lambda: self.is_no_code_enabled,
|
|
317
|
+
ServiceFeature.STATEFUL_INGESTION: lambda: self.config.get(
|
|
318
|
+
"statefulIngestionCapable", False
|
|
319
|
+
)
|
|
320
|
+
is True,
|
|
321
|
+
ServiceFeature.IMPACT_ANALYSIS: lambda: self.config.get(
|
|
322
|
+
"supportsImpactAnalysis", False
|
|
323
|
+
)
|
|
324
|
+
is True,
|
|
325
|
+
ServiceFeature.PATCH_CAPABLE: lambda: self.config.get("patchCapable", False)
|
|
326
|
+
is True,
|
|
327
|
+
ServiceFeature.CLI_TELEMETRY: lambda: (
|
|
328
|
+
self.config.get("telemetry") or {}
|
|
329
|
+
).get("enabledCli", None),
|
|
330
|
+
# Add more config-based feature checks as needed
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
# Check if this is a config-based feature
|
|
334
|
+
if feature in config_based_features:
|
|
335
|
+
return config_based_features[feature]()
|
|
336
|
+
|
|
337
|
+
# Check if the feature exists in our requirements dictionary
|
|
338
|
+
if feature not in feature_requirements:
|
|
339
|
+
# Unknown feature, assume not supported
|
|
340
|
+
return False
|
|
341
|
+
|
|
342
|
+
# Get version requirements for this feature and version category
|
|
343
|
+
feature_reqs = feature_requirements[feature]
|
|
344
|
+
requirements = feature_reqs.get(suffix_category)
|
|
345
|
+
|
|
346
|
+
if not requirements:
|
|
347
|
+
# Fallback to the no-suffix requirements if specific requirements aren't defined
|
|
348
|
+
requirements = feature_reqs.get(
|
|
349
|
+
"none", (99, 99, 99, 99)
|
|
350
|
+
) # Very high version if none defined
|
|
351
|
+
|
|
352
|
+
# Check if the current version meets the requirements
|
|
353
|
+
req_major, req_minor, req_patch, req_build = requirements
|
|
354
|
+
return self.is_version_at_least(req_major, req_minor, req_patch, req_build)
|
|
355
|
+
|
|
356
|
+
def __str__(self) -> str:
|
|
357
|
+
"""
|
|
358
|
+
Return a string representation of the configuration as JSON.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
A string representation of the configuration dictionary
|
|
362
|
+
"""
|
|
363
|
+
return str(self.config)
|
|
364
|
+
|
|
365
|
+
def __repr__(self) -> str:
|
|
366
|
+
"""
|
|
367
|
+
Return a representation of the object that can be used to recreate it.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
A string representation that can be used with pprint
|
|
371
|
+
"""
|
|
372
|
+
return str(self.config)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def set_gms_config(config: Union[Dict[str, Any], RestServiceConfig]) -> None:
|
|
10
376
|
global_debug["gms_config"] = config
|
|
11
377
|
|
|
12
|
-
|
|
13
|
-
|
|
378
|
+
config_obj = (
|
|
379
|
+
config
|
|
380
|
+
if isinstance(config, RestServiceConfig)
|
|
381
|
+
else RestServiceConfig(raw_config=config)
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
cli_telemetry_enabled = is_cli_telemetry_enabled(config_obj)
|
|
385
|
+
if cli_telemetry_enabled is not None and not cli_telemetry_enabled:
|
|
14
386
|
# server requires telemetry to be disabled on client
|
|
15
387
|
suppress_telemetry()
|
|
16
388
|
|
|
17
389
|
|
|
18
|
-
def
|
|
19
|
-
return
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def is_cli_telemetry_enabled() -> Optional[bool]:
|
|
23
|
-
return get_gms_config().get("telemetry", {}).get("enabledCli", None)
|
|
390
|
+
def is_cli_telemetry_enabled(config: RestServiceConfig) -> bool:
|
|
391
|
+
return config.supports_feature(ServiceFeature.CLI_TELEMETRY)
|
|
@@ -272,11 +272,10 @@ class SQLAlchemyQueryCombiner:
|
|
|
272
272
|
self.report.uncombined_queries_issued += 1
|
|
273
273
|
return _sa_execute_underlying_method(conn, query, *args, **kwargs)
|
|
274
274
|
|
|
275
|
-
with _sa_execute_method_patching_lock
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
yield self
|
|
275
|
+
with _sa_execute_method_patching_lock, unittest.mock.patch(
|
|
276
|
+
"sqlalchemy.engine.Connection.execute", _sa_execute_fake
|
|
277
|
+
):
|
|
278
|
+
yield self
|
|
280
279
|
|
|
281
280
|
def run(self, method: Callable[[], None]) -> None:
|
|
282
281
|
"""
|
datahub/utilities/urn_encoder.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import List
|
|
|
4
4
|
# NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
|
|
5
5
|
# If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
|
|
6
6
|
# We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
|
|
7
|
-
# Also see https://
|
|
7
|
+
# Also see https://docs.datahub.com/docs/what/urn/#restrictions
|
|
8
8
|
RESERVED_CHARS = {",", "(", ")", "␟"}
|
|
9
9
|
RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})
|
|
10
10
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|