acryl-datahub 0.15.0.6rc3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2552 -2523
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +204 -191
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +1 -1
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +3 -5
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +3 -3
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +6 -12
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +7 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +251 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +29 -5
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"load_golden_flags",
|
|
8
|
+
"get_golden_settings",
|
|
9
|
+
"pytest_addoption",
|
|
10
|
+
"GoldenFileSettings",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class GoldenFileSettings:
|
|
16
|
+
update_golden: bool
|
|
17
|
+
copy_output: bool
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_registered: bool = False
|
|
21
|
+
_settings: Optional[GoldenFileSettings] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def pytest_addoption(parser: pytest.Parser) -> None:
|
|
25
|
+
parser.addoption(
|
|
26
|
+
"--update-golden-files",
|
|
27
|
+
action="store_true",
|
|
28
|
+
default=False,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# TODO: Deprecate and remove this flag.
|
|
32
|
+
parser.addoption("--copy-output-files", action="store_true", default=False)
|
|
33
|
+
|
|
34
|
+
global _registered
|
|
35
|
+
_registered = True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
39
|
+
def load_golden_flags(pytestconfig: pytest.Config) -> None:
|
|
40
|
+
global _settings
|
|
41
|
+
_settings = GoldenFileSettings(
|
|
42
|
+
update_golden=pytestconfig.getoption("--update-golden-files"),
|
|
43
|
+
copy_output=pytestconfig.getoption("--copy-output-files"),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_golden_settings() -> GoldenFileSettings:
|
|
48
|
+
if not _registered:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
"Golden files aren't set up properly. Call register_golden_flags from a conftest pytest_addoptions method."
|
|
51
|
+
)
|
|
52
|
+
if not _settings:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"Golden files aren't set up properly. Ensure load_golden_flags is imported in your conftest."
|
|
55
|
+
)
|
|
56
|
+
return _settings
|
datahub/upgrade/upgrade.py
CHANGED
|
@@ -293,9 +293,9 @@ def is_client_server_compatible(client: VersionStats, server: VersionStats) -> i
|
|
|
293
293
|
return server.version.micro - client.version.micro
|
|
294
294
|
|
|
295
295
|
|
|
296
|
-
def _maybe_print_upgrade_message(
|
|
296
|
+
def _maybe_print_upgrade_message(
|
|
297
297
|
version_stats: Optional[DataHubVersionStats],
|
|
298
|
-
) -> None:
|
|
298
|
+
) -> None:
|
|
299
299
|
days_before_cli_stale = 7
|
|
300
300
|
days_before_quickstart_stale = 7
|
|
301
301
|
|
|
@@ -10,13 +10,11 @@ import tempfile
|
|
|
10
10
|
import threading
|
|
11
11
|
from dataclasses import dataclass, field
|
|
12
12
|
from datetime import datetime
|
|
13
|
-
from enum import Enum
|
|
14
13
|
from types import TracebackType
|
|
15
14
|
from typing import (
|
|
16
15
|
Any,
|
|
17
16
|
Callable,
|
|
18
17
|
Dict,
|
|
19
|
-
Final,
|
|
20
18
|
Generic,
|
|
21
19
|
Iterator,
|
|
22
20
|
List,
|
|
@@ -31,6 +29,7 @@ from typing import (
|
|
|
31
29
|
)
|
|
32
30
|
|
|
33
31
|
from datahub.ingestion.api.closeable import Closeable
|
|
32
|
+
from datahub.utilities.sentinels import Unset, unset
|
|
34
33
|
|
|
35
34
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
36
35
|
|
|
@@ -59,16 +58,6 @@ SqliteValue = Union[int, float, str, bytes, datetime, None]
|
|
|
59
58
|
_VT = TypeVar("_VT")
|
|
60
59
|
|
|
61
60
|
|
|
62
|
-
class Unset(Enum):
|
|
63
|
-
token = 0
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
# It's pretty annoying to create a true sentinel that works with typing.
|
|
67
|
-
# https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
|
|
68
|
-
# Can't wait for https://peps.python.org/pep-0661/
|
|
69
|
-
_unset: Final = Unset.token
|
|
70
|
-
|
|
71
|
-
|
|
72
61
|
class ConnectionWrapper:
|
|
73
62
|
"""
|
|
74
63
|
Wraps a SQlite connection, allowing connection reuse across multiple FileBacked* objects.
|
|
@@ -372,7 +361,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
372
361
|
self,
|
|
373
362
|
/,
|
|
374
363
|
key: str,
|
|
375
|
-
default: Union[_VT, Unset] =
|
|
364
|
+
default: Union[_VT, Unset] = unset,
|
|
376
365
|
) -> _VT:
|
|
377
366
|
# If key is in the dictionary, this is similar to __getitem__ + mark_dirty.
|
|
378
367
|
# If key is not in the dictionary, this is similar to __setitem__.
|
|
@@ -383,7 +372,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
383
372
|
self.mark_dirty(key)
|
|
384
373
|
return value
|
|
385
374
|
except KeyError:
|
|
386
|
-
if default is
|
|
375
|
+
if default is unset:
|
|
387
376
|
raise
|
|
388
377
|
|
|
389
378
|
self[key] = default
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from datahub.configuration.common import ConfigModel
|
|
8
|
+
from datahub.configuration.config_loader import load_config_file
|
|
9
|
+
from datahub.emitter.mce_builder import datahub_guid
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _make_ingestion_urn(name: str) -> str:
|
|
15
|
+
guid = datahub_guid(
|
|
16
|
+
{
|
|
17
|
+
"name": name,
|
|
18
|
+
}
|
|
19
|
+
)
|
|
20
|
+
return f"urn:li:dataHubIngestionSource:deploy-{guid}"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DeployOptions(ConfigModel):
|
|
24
|
+
name: str
|
|
25
|
+
schedule: Optional[str] = None
|
|
26
|
+
time_zone: str = "UTC"
|
|
27
|
+
cli_version: Optional[str] = None
|
|
28
|
+
executor_id: str = "default"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def deploy_source_vars(
|
|
32
|
+
name: Optional[str],
|
|
33
|
+
config: str,
|
|
34
|
+
urn: Optional[str],
|
|
35
|
+
executor_id: str,
|
|
36
|
+
cli_version: Optional[str],
|
|
37
|
+
schedule: Optional[str],
|
|
38
|
+
time_zone: str,
|
|
39
|
+
extra_pip: Optional[str],
|
|
40
|
+
debug: bool = False,
|
|
41
|
+
) -> dict:
|
|
42
|
+
pipeline_config = load_config_file(
|
|
43
|
+
config,
|
|
44
|
+
allow_stdin=True,
|
|
45
|
+
allow_remote=True,
|
|
46
|
+
resolve_env_vars=False,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
deploy_options_raw = pipeline_config.pop("deployment", None)
|
|
50
|
+
if deploy_options_raw is not None:
|
|
51
|
+
deploy_options = DeployOptions.parse_obj(deploy_options_raw)
|
|
52
|
+
|
|
53
|
+
if name:
|
|
54
|
+
logger.info(f"Overriding deployment name {deploy_options.name} with {name}")
|
|
55
|
+
deploy_options.name = name
|
|
56
|
+
else:
|
|
57
|
+
if not name:
|
|
58
|
+
raise click.UsageError(
|
|
59
|
+
"Either --name must be set or deployment_name specified in the config"
|
|
60
|
+
)
|
|
61
|
+
deploy_options = DeployOptions(name=name)
|
|
62
|
+
|
|
63
|
+
# Use remaining CLI args to override deploy_options
|
|
64
|
+
if schedule:
|
|
65
|
+
deploy_options.schedule = schedule
|
|
66
|
+
if time_zone:
|
|
67
|
+
deploy_options.time_zone = time_zone
|
|
68
|
+
if cli_version:
|
|
69
|
+
deploy_options.cli_version = cli_version
|
|
70
|
+
if executor_id:
|
|
71
|
+
deploy_options.executor_id = executor_id
|
|
72
|
+
|
|
73
|
+
logger.info(f"Using {repr(deploy_options)}")
|
|
74
|
+
|
|
75
|
+
if not urn:
|
|
76
|
+
# When urn/name is not specified, we will generate a unique urn based on the deployment name.
|
|
77
|
+
urn = _make_ingestion_urn(deploy_options.name)
|
|
78
|
+
logger.info(f"Using recipe urn: {urn}")
|
|
79
|
+
|
|
80
|
+
variables: dict = {
|
|
81
|
+
"urn": urn,
|
|
82
|
+
"input": {
|
|
83
|
+
"name": deploy_options.name,
|
|
84
|
+
"type": pipeline_config["source"]["type"],
|
|
85
|
+
"config": {
|
|
86
|
+
"recipe": json.dumps(pipeline_config),
|
|
87
|
+
"executorId": deploy_options.executor_id,
|
|
88
|
+
"debugMode": debug,
|
|
89
|
+
"version": deploy_options.cli_version,
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if deploy_options.schedule is not None:
|
|
95
|
+
variables["input"]["schedule"] = {
|
|
96
|
+
"interval": deploy_options.schedule,
|
|
97
|
+
"timezone": deploy_options.time_zone,
|
|
98
|
+
}
|
|
99
|
+
if extra_pip is not None:
|
|
100
|
+
extra_args_list = (
|
|
101
|
+
variables.get("input", {}).get("config", {}).get("extraArgs", [])
|
|
102
|
+
)
|
|
103
|
+
extra_args_list.append({"key": "extra_pip_requirements", "value": extra_pip})
|
|
104
|
+
variables["input"]["config"]["extraArgs"] = extra_args_list
|
|
105
|
+
|
|
106
|
+
return variables
|
datahub/utilities/mapping.py
CHANGED
|
@@ -171,7 +171,7 @@ class OperationProcessor:
|
|
|
171
171
|
self.owner_source_type = owner_source_type
|
|
172
172
|
self.match_nested_props = match_nested_props
|
|
173
173
|
|
|
174
|
-
def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]:
|
|
174
|
+
def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]:
|
|
175
175
|
# Defining the following local variables -
|
|
176
176
|
# operations_map - the final resulting map when operations are processed.
|
|
177
177
|
# Against each operation the values to be applied are stored.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from collections import deque
|
|
2
2
|
from itertools import chain
|
|
3
3
|
from sys import getsizeof
|
|
4
|
-
from typing import Any, Iterator
|
|
4
|
+
from typing import Any, Iterator, Optional
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def total_size(o: Any, handlers: Any =
|
|
7
|
+
def total_size(o: Any, handlers: Optional[Any] = None) -> int:
|
|
8
8
|
"""Returns the approximate memory footprint an object and all of its contents.
|
|
9
9
|
Automatically finds the contents of the following builtin containers and
|
|
10
10
|
their subclasses: tuple, list, deque, dict, set and frozenset.
|
|
@@ -14,6 +14,7 @@ def total_size(o: Any, handlers: Any = {}) -> int:
|
|
|
14
14
|
|
|
15
15
|
Based on https://github.com/ActiveState/recipe-577504-compute-mem-footprint/blob/master/recipe.py
|
|
16
16
|
"""
|
|
17
|
+
handlers = handlers or {}
|
|
17
18
|
|
|
18
19
|
def dict_handler(d: dict) -> Iterator[Any]:
|
|
19
20
|
return chain.from_iterable(d.items())
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Final
|
|
3
|
+
|
|
4
|
+
# It's pretty annoying to create a true sentinel that works with typing.
|
|
5
|
+
# This approach using enums is inspired by:
|
|
6
|
+
# https://peps.python.org/pep-0484/#support-for-singleton-types-in-unions
|
|
7
|
+
#
|
|
8
|
+
# Can't wait for https://peps.python.org/pep-0661/
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Unset(Enum):
|
|
12
|
+
token = 0
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
unset: Final = Unset.token
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Auto(Enum):
|
|
19
|
+
token = 0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
auto: Final = Auto.token
|
|
@@ -2,8 +2,12 @@ import logging
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import List, Tuple
|
|
4
4
|
|
|
5
|
+
from datahub.cli.env_utils import get_boolean_env_variable
|
|
6
|
+
|
|
7
|
+
_debug_diff = get_boolean_env_variable("DATAHUB_DEBUG_DIFF_PATCHER")
|
|
8
|
+
|
|
5
9
|
logger = logging.getLogger(__name__)
|
|
6
|
-
logger.setLevel(logging.INFO)
|
|
10
|
+
logger.setLevel(logging.DEBUG if _debug_diff else logging.INFO)
|
|
7
11
|
|
|
8
12
|
_LOOKAROUND_LINES = 300
|
|
9
13
|
|
|
File without changes
|
|
File without changes
|