acryl-datahub 1.2.0.9rc2__py3-none-any.whl → 1.2.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/METADATA +2553 -2611
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/RECORD +118 -111
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +6 -3
- datahub/api/entities/dataset/dataset.py +9 -18
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/docker_check.py +2 -2
- datahub/configuration/common.py +29 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/pydantic_migration_helpers.py +0 -9
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +5 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/ingestion/autogenerated/capability_summary.json +45 -1
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/source/azure/azure_common.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
- datahub/ingestion/source/datahub/config.py +8 -9
- datahub/ingestion/source/dbt/dbt_common.py +65 -5
- datahub/ingestion/source/delta_lake/config.py +1 -1
- datahub/ingestion/source/dremio/dremio_config.py +3 -4
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +19 -2
- datahub/ingestion/source/ge_data_profiler.py +15 -2
- datahub/ingestion/source/ge_profiling_config.py +26 -22
- datahub/ingestion/source/grafana/grafana_config.py +2 -2
- datahub/ingestion/source/grafana/models.py +12 -14
- datahub/ingestion/source/hex/hex.py +6 -1
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/looker/looker_common.py +76 -75
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_source.py +493 -547
- datahub/ingestion/source/looker/lookml_config.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +46 -88
- datahub/ingestion/source/metabase.py +9 -2
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +1 -1
- datahub/ingestion/source/mode.py +13 -5
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +14 -21
- datahub/ingestion/source/preset.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +6 -3
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/s3/source.py +26 -24
- datahub/ingestion/source/salesforce.py +13 -9
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
- datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +12 -7
- datahub/ingestion/source/sql/cockroachdb.py +5 -3
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +7 -9
- datahub/ingestion/source/sql/mssql/source.py +2 -2
- datahub/ingestion/source/sql/mysql.py +2 -2
- datahub/ingestion/source/sql/oracle.py +3 -3
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -4
- datahub/ingestion/source/sql/trino.py +2 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +1 -1
- datahub/ingestion/source/sql_queries.py +6 -6
- datahub/ingestion/source/state/checkpoint.py +5 -1
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
- datahub/ingestion/source/superset.py +122 -15
- datahub/ingestion/source/tableau/tableau.py +68 -14
- datahub/ingestion/source/tableau/tableau_common.py +5 -0
- datahub/ingestion/source/tableau/tableau_constant.py +1 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +7 -3
- datahub/ingestion/source/usage/usage_common.py +3 -3
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/metadata/_internal_schema_classes.py +728 -528
- datahub/metadata/_urns/urn_defs.py +1702 -1702
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/schema.avsc +17434 -17732
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
- datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
- datahub/metadata/schemas/LogicalParent.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
- datahub/sdk/_shared.py +126 -0
- datahub/sdk/chart.py +87 -30
- datahub/sdk/dashboard.py +79 -34
- datahub/sdk/entity_client.py +11 -4
- datahub/sdk/lineage_client.py +3 -3
- datahub/sdk/search_filters.py +1 -7
- datahub/sql_parsing/split_statements.py +13 -0
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/top_level.txt +0 -0
|
@@ -94,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
|
|
|
94
94
|
salesforce = datahub.ingestion.source.salesforce:SalesforceSource
|
|
95
95
|
sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
|
|
96
96
|
slack = datahub.ingestion.source.slack.slack:SlackSource
|
|
97
|
+
snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
|
|
97
98
|
snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
|
|
98
99
|
snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
|
|
99
100
|
snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
|
|
@@ -130,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
|
|
|
130
131
|
pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
|
|
131
132
|
replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
|
|
132
133
|
replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
|
|
134
|
+
set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
|
|
133
135
|
set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
|
|
134
136
|
simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
|
|
135
137
|
simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
|
datahub/_version.py
CHANGED
|
@@ -71,7 +71,7 @@ class CorpGroup(BaseModel):
|
|
|
71
71
|
_rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
|
|
72
72
|
|
|
73
73
|
@pydantic.validator("owners", "members", each_item=True)
|
|
74
|
-
def make_urn_if_needed(v):
|
|
74
|
+
def make_urn_if_needed(cls, v):
|
|
75
75
|
if isinstance(v, str):
|
|
76
76
|
return builder.make_user_urn(v)
|
|
77
77
|
return v
|
|
@@ -6,9 +6,10 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
|
|
6
6
|
|
|
7
7
|
import pydantic
|
|
8
8
|
from ruamel.yaml import YAML
|
|
9
|
+
from typing_extensions import assert_never
|
|
9
10
|
|
|
10
11
|
import datahub.emitter.mce_builder as builder
|
|
11
|
-
from datahub.configuration.common import ConfigModel
|
|
12
|
+
from datahub.configuration.common import ConfigModel, LaxStr
|
|
12
13
|
from datahub.emitter.generic_emitter import Emitter
|
|
13
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
15
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
@@ -110,7 +111,7 @@ class DataProduct(ConfigModel):
|
|
|
110
111
|
description: Optional[str] = None
|
|
111
112
|
tags: Optional[List[str]] = None
|
|
112
113
|
terms: Optional[List[str]] = None
|
|
113
|
-
properties: Optional[Dict[str,
|
|
114
|
+
properties: Optional[Dict[str, LaxStr]] = None
|
|
114
115
|
external_url: Optional[str] = None
|
|
115
116
|
_original_yaml_dict: Optional[dict] = None
|
|
116
117
|
|
|
@@ -414,7 +415,9 @@ class DataProduct(ConfigModel):
|
|
|
414
415
|
"type": new_owner_type_map[owner_urn],
|
|
415
416
|
}
|
|
416
417
|
else:
|
|
417
|
-
patches_drop[i] = o
|
|
418
|
+
patches_drop[i] = o.model_dump()
|
|
419
|
+
else:
|
|
420
|
+
assert_never(o)
|
|
418
421
|
|
|
419
422
|
# Figure out what if any are new owners to add
|
|
420
423
|
new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched)
|
|
@@ -27,7 +27,7 @@ from typing_extensions import TypeAlias
|
|
|
27
27
|
|
|
28
28
|
import datahub.metadata.schema_classes as models
|
|
29
29
|
from datahub.api.entities.structuredproperties.structuredproperties import AllowedTypes
|
|
30
|
-
from datahub.configuration.common import ConfigModel
|
|
30
|
+
from datahub.configuration.common import ConfigModel, LaxStr
|
|
31
31
|
from datahub.emitter.mce_builder import (
|
|
32
32
|
make_data_platform_urn,
|
|
33
33
|
make_dataset_urn,
|
|
@@ -143,7 +143,6 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
143
143
|
jsonPath: Union[None, str] = None
|
|
144
144
|
nullable: bool = False
|
|
145
145
|
description: Union[None, str] = None
|
|
146
|
-
doc: Union[None, str] = None # doc is an alias for description
|
|
147
146
|
label: Optional[str] = None
|
|
148
147
|
created: Optional[dict] = None
|
|
149
148
|
lastModified: Optional[dict] = None
|
|
@@ -221,14 +220,14 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
221
220
|
return v
|
|
222
221
|
|
|
223
222
|
@root_validator(pre=True)
|
|
224
|
-
def
|
|
225
|
-
"""Synchronize doc
|
|
223
|
+
def sync_doc_into_description(cls, values: Dict) -> Dict:
|
|
224
|
+
"""Synchronize doc into description field if doc is provided."""
|
|
226
225
|
description = values.get("description")
|
|
227
|
-
doc = values.
|
|
226
|
+
doc = values.pop("doc", None)
|
|
228
227
|
|
|
229
|
-
if
|
|
230
|
-
|
|
231
|
-
|
|
228
|
+
if doc is not None:
|
|
229
|
+
if description is not None:
|
|
230
|
+
raise ValueError("doc and description cannot both be provided")
|
|
232
231
|
values["description"] = doc
|
|
233
232
|
|
|
234
233
|
return values
|
|
@@ -296,10 +295,6 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
296
295
|
"""Custom dict method for Pydantic v1 to handle YAML serialization properly."""
|
|
297
296
|
exclude = kwargs.pop("exclude", None) or set()
|
|
298
297
|
|
|
299
|
-
# If description and doc are identical, exclude doc from the output
|
|
300
|
-
if self.description == self.doc and self.description is not None:
|
|
301
|
-
exclude.add("doc")
|
|
302
|
-
|
|
303
298
|
# if nativeDataType and type are identical, exclude nativeDataType from the output
|
|
304
299
|
if self.nativeDataType == self.type and self.nativeDataType is not None:
|
|
305
300
|
exclude.add("nativeDataType")
|
|
@@ -327,10 +322,6 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
327
322
|
"""Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
|
|
328
323
|
exclude = kwargs.pop("exclude", None) or set()
|
|
329
324
|
|
|
330
|
-
# If description and doc are identical, exclude doc from the output
|
|
331
|
-
if self.description == self.doc and self.description is not None:
|
|
332
|
-
exclude.add("doc")
|
|
333
|
-
|
|
334
325
|
# if nativeDataType and type are identical, exclude nativeDataType from the output
|
|
335
326
|
if self.nativeDataType == self.type and self.nativeDataType is not None:
|
|
336
327
|
exclude.add("nativeDataType")
|
|
@@ -387,7 +378,7 @@ class Dataset(StrictModel):
|
|
|
387
378
|
name: Optional[str] = Field(None, validate_default=True)
|
|
388
379
|
schema_metadata: Optional[SchemaSpecification] = Field(default=None, alias="schema")
|
|
389
380
|
downstreams: Optional[List[str]] = None
|
|
390
|
-
properties: Optional[Dict[str,
|
|
381
|
+
properties: Optional[Dict[str, LaxStr]] = None
|
|
391
382
|
subtype: Optional[str] = None
|
|
392
383
|
subtypes: Optional[List[str]] = None
|
|
393
384
|
tags: Optional[List[str]] = None
|
|
@@ -605,7 +596,7 @@ class Dataset(StrictModel):
|
|
|
605
596
|
],
|
|
606
597
|
platformSchema=OtherSchemaClass(
|
|
607
598
|
rawSchema=yaml.dump(
|
|
608
|
-
self.schema_metadata.
|
|
599
|
+
self.schema_metadata.model_dump(
|
|
609
600
|
exclude_none=True, exclude_unset=True
|
|
610
601
|
)
|
|
611
602
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Iterable, List, Optional, Union
|
|
4
|
+
from typing import Iterable, List, Optional, Type, Union
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
7
|
from pydantic import Field, StrictStr, validator
|
|
@@ -48,7 +48,7 @@ VALID_ENTITY_TYPE_URNS = [
|
|
|
48
48
|
_VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def _validate_entity_type_urn(v: str) -> str:
|
|
51
|
+
def _validate_entity_type_urn(cls: Type, v: str) -> str:
|
|
52
52
|
urn = Urn.make_entity_type_urn(v)
|
|
53
53
|
if urn not in VALID_ENTITY_TYPE_URNS:
|
|
54
54
|
raise ValueError(
|
datahub/api/graphql/operation.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
|
-
from gql import
|
|
4
|
+
from gql import GraphQLRequest
|
|
5
5
|
|
|
6
6
|
from datahub.api.graphql.base import BaseApi
|
|
7
7
|
|
|
@@ -79,10 +79,12 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
79
79
|
if custom_properties is not None:
|
|
80
80
|
variable_values["customProperties"] = custom_properties
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
request = GraphQLRequest(
|
|
83
|
+
Operation.REPORT_OPERATION_MUTATION, variable_values=variable_values
|
|
84
84
|
)
|
|
85
85
|
|
|
86
|
+
result = self.client.execute(request)
|
|
87
|
+
|
|
86
88
|
return result["reportOperation"]
|
|
87
89
|
|
|
88
90
|
def query_operations(
|
|
@@ -109,12 +111,12 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
109
111
|
:param partition: The partition to check the operation.
|
|
110
112
|
"""
|
|
111
113
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
+
request = GraphQLRequest(
|
|
115
|
+
Operation.QUERY_OPERATIONS,
|
|
114
116
|
variable_values={
|
|
115
117
|
"urn": urn,
|
|
116
118
|
"startTimeMillis": start_time_millis,
|
|
117
|
-
"
|
|
119
|
+
"endTimeMillis": end_time_millis,
|
|
118
120
|
"limit": limit,
|
|
119
121
|
"filter": self.gen_filter(
|
|
120
122
|
{
|
|
@@ -125,6 +127,8 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
125
127
|
),
|
|
126
128
|
},
|
|
127
129
|
)
|
|
130
|
+
|
|
131
|
+
result = self.client.execute(request)
|
|
128
132
|
if "dataset" in result and "operations" in result["dataset"]:
|
|
129
133
|
operations = []
|
|
130
134
|
if source_type is not None:
|
datahub/cli/docker_check.py
CHANGED
|
@@ -13,8 +13,8 @@ import yaml
|
|
|
13
13
|
from datahub.configuration.common import ExceptionWithProps
|
|
14
14
|
|
|
15
15
|
# Docker seems to under-report memory allocated, so we also need a bit of buffer to account for it.
|
|
16
|
-
MIN_MEMORY_NEEDED =
|
|
17
|
-
MIN_DISK_SPACE_NEEDED =
|
|
16
|
+
MIN_MEMORY_NEEDED = 4 # GB
|
|
17
|
+
MIN_DISK_SPACE_NEEDED = 13 # GB
|
|
18
18
|
|
|
19
19
|
DOCKER_COMPOSE_PROJECT_NAME = os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
|
|
20
20
|
DATAHUB_COMPOSE_PROJECT_FILTER = {
|
datahub/configuration/common.py
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import re
|
|
2
3
|
import unittest.mock
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from enum import auto
|
|
5
6
|
from typing import (
|
|
6
7
|
IO,
|
|
8
|
+
TYPE_CHECKING,
|
|
9
|
+
Annotated,
|
|
7
10
|
Any,
|
|
8
11
|
ClassVar,
|
|
9
12
|
Dict,
|
|
10
13
|
List,
|
|
11
14
|
Optional,
|
|
12
15
|
Type,
|
|
16
|
+
TypeVar,
|
|
13
17
|
Union,
|
|
14
18
|
runtime_checkable,
|
|
15
19
|
)
|
|
16
20
|
|
|
17
21
|
import pydantic
|
|
22
|
+
import pydantic_core
|
|
18
23
|
from cached_property import cached_property
|
|
19
24
|
from pydantic import BaseModel, Extra, ValidationError
|
|
20
25
|
from pydantic.fields import Field
|
|
@@ -83,6 +88,29 @@ def redact_raw_config(obj: Any) -> Any:
|
|
|
83
88
|
return obj
|
|
84
89
|
|
|
85
90
|
|
|
91
|
+
if TYPE_CHECKING:
|
|
92
|
+
AnyType = TypeVar("AnyType")
|
|
93
|
+
HiddenFromDocs = Annotated[AnyType, ...]
|
|
94
|
+
else:
|
|
95
|
+
HiddenFromDocs = pydantic.json_schema.SkipJsonSchema
|
|
96
|
+
|
|
97
|
+
LaxStr = Annotated[str, pydantic.BeforeValidator(lambda v: str(v))]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclasses.dataclass(frozen=True)
|
|
101
|
+
class SupportedSources:
|
|
102
|
+
sources: List[str]
|
|
103
|
+
|
|
104
|
+
def __get_pydantic_json_schema__(
|
|
105
|
+
self,
|
|
106
|
+
core_schema: pydantic_core.core_schema.CoreSchema,
|
|
107
|
+
handler: pydantic.GetJsonSchemaHandler,
|
|
108
|
+
) -> pydantic.json_schema.JsonSchemaValue:
|
|
109
|
+
json_schema = handler(core_schema)
|
|
110
|
+
json_schema.setdefault("schema_extra", {})["supported_sources"] = self.sources
|
|
111
|
+
return json_schema
|
|
112
|
+
|
|
113
|
+
|
|
86
114
|
class ConfigModel(BaseModel):
|
|
87
115
|
class Config:
|
|
88
116
|
@staticmethod
|
|
@@ -334,4 +362,4 @@ class KeyValuePattern(ConfigModel):
|
|
|
334
362
|
|
|
335
363
|
|
|
336
364
|
class VersionedConfig(ConfigModel):
|
|
337
|
-
version:
|
|
365
|
+
version: LaxStr = "1"
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
-
from typing import Type
|
|
1
|
+
from typing import TYPE_CHECKING, Type
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.api.global_context import get_graph_context
|
|
6
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def auto_connection_resolver(
|
|
9
12
|
connection_field: str = "connection",
|
|
10
|
-
) ->
|
|
13
|
+
) -> "V1RootValidator":
|
|
11
14
|
def _resolve_connection(cls: Type, values: dict) -> dict:
|
|
12
15
|
if connection_field in values:
|
|
13
16
|
connection_urn = values.pop(connection_field)
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
from typing import TypeVar, Union
|
|
1
|
+
from typing import TYPE_CHECKING, Type, TypeVar, Union
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.api.registry import import_path
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from pydantic.deprecated.class_validators import V1Validator
|
|
8
9
|
|
|
10
|
+
_T = TypeVar("_T")
|
|
9
11
|
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
def _pydantic_resolver(cls: Type, v: Union[str, _T]) -> _T:
|
|
11
14
|
return import_path(v) if isinstance(v, str) else v
|
|
12
15
|
|
|
13
16
|
|
|
14
|
-
def pydantic_resolve_key(field: str) ->
|
|
17
|
+
def pydantic_resolve_key(field: str) -> "V1Validator":
|
|
15
18
|
return pydantic.validator(field, pre=True, allow_reuse=True)(_pydantic_resolver)
|
|
@@ -9,14 +9,6 @@ PYDANTIC_VERSION_2 = _pydantic_version >= Version("2.0")
|
|
|
9
9
|
# https://docs.pydantic.dev/latest/changelog/#v250-2023-11-13
|
|
10
10
|
PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR = _pydantic_version >= Version("2.5.0")
|
|
11
11
|
|
|
12
|
-
# This can be used to silence deprecation warnings while we migrate.
|
|
13
|
-
if PYDANTIC_VERSION_2:
|
|
14
|
-
from pydantic import PydanticDeprecatedSince20 # type: ignore
|
|
15
|
-
else:
|
|
16
|
-
|
|
17
|
-
class PydanticDeprecatedSince20(Warning): # type: ignore
|
|
18
|
-
pass
|
|
19
|
-
|
|
20
12
|
|
|
21
13
|
if PYDANTIC_VERSION_2:
|
|
22
14
|
from pydantic import BaseModel as GenericModel
|
|
@@ -52,7 +44,6 @@ class v1_ConfigModel(v1_BaseModel):
|
|
|
52
44
|
__all__ = [
|
|
53
45
|
"PYDANTIC_VERSION_2",
|
|
54
46
|
"PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR",
|
|
55
|
-
"PydanticDeprecatedSince20",
|
|
56
47
|
"GenericModel",
|
|
57
48
|
"v1_ConfigModel",
|
|
58
49
|
"v1_Field",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Dict, Optional
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import pydantic
|
|
4
4
|
from pydantic.fields import Field
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigModel
|
|
@@ -30,7 +30,8 @@ class EnvConfigMixin(ConfigModel):
|
|
|
30
30
|
description="The environment that all assets produced by this connector belong to",
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
@
|
|
33
|
+
@pydantic.field_validator("env", mode="after")
|
|
34
|
+
@classmethod
|
|
34
35
|
def env_must_be_one_of(cls, v: str) -> str:
|
|
35
36
|
if v.upper() not in ALL_ENV_TYPES:
|
|
36
37
|
raise ValueError(f"env must be one of {ALL_ENV_TYPES}, found {v}")
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import Any, Optional, Type
|
|
2
|
+
from typing import TYPE_CHECKING, Any, Optional, Type
|
|
3
3
|
|
|
4
4
|
import pydantic
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigurationWarning
|
|
7
7
|
from datahub.utilities.global_warning_util import add_global_warning
|
|
8
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
11
|
+
|
|
9
12
|
_unset = object()
|
|
10
13
|
|
|
11
14
|
|
|
@@ -13,7 +16,7 @@ def pydantic_field_deprecated(
|
|
|
13
16
|
field: str,
|
|
14
17
|
warn_if_value_is_not: Any = _unset,
|
|
15
18
|
message: Optional[str] = None,
|
|
16
|
-
) ->
|
|
19
|
+
) -> "V1RootValidator":
|
|
17
20
|
if message:
|
|
18
21
|
output = message
|
|
19
22
|
else:
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import Type
|
|
2
|
+
from typing import TYPE_CHECKING, Type
|
|
3
3
|
|
|
4
4
|
import pydantic
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigurationWarning
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
def pydantic_removed_field(
|
|
10
13
|
field: str,
|
|
11
14
|
print_warning: bool = True,
|
|
12
|
-
) ->
|
|
15
|
+
) -> "V1RootValidator":
|
|
13
16
|
def _validate_field_removal(cls: Type, values: dict) -> dict:
|
|
14
17
|
if field in values:
|
|
15
18
|
if print_warning:
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import Callable, Type, TypeVar
|
|
2
|
+
from typing import TYPE_CHECKING, Callable, Type, TypeVar
|
|
3
3
|
|
|
4
4
|
import pydantic
|
|
5
5
|
|
|
6
6
|
from datahub.configuration.common import ConfigurationWarning
|
|
7
7
|
from datahub.utilities.global_warning_util import add_global_warning
|
|
8
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
11
|
+
|
|
9
12
|
_T = TypeVar("_T")
|
|
10
13
|
|
|
11
14
|
|
|
@@ -18,7 +21,7 @@ def pydantic_renamed_field(
|
|
|
18
21
|
new_name: str,
|
|
19
22
|
transform: Callable = _default_rename_transform,
|
|
20
23
|
print_warning: bool = True,
|
|
21
|
-
) ->
|
|
24
|
+
) -> "V1RootValidator":
|
|
22
25
|
def _validate_field_rename(cls: Type, values: dict) -> dict:
|
|
23
26
|
if old_name in values:
|
|
24
27
|
if new_name in values:
|
|
@@ -49,6 +52,4 @@ def pydantic_renamed_field(
|
|
|
49
52
|
# validator with pre=True gets all the values that were passed in.
|
|
50
53
|
# Given that a renamed field doesn't show up in the fields list, we can't use
|
|
51
54
|
# the field-level validator, even with a different field name.
|
|
52
|
-
return pydantic.root_validator(pre=True,
|
|
53
|
-
_validate_field_rename
|
|
54
|
-
)
|
|
55
|
+
return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_rename)
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
from typing import Optional, Type, Union
|
|
1
|
+
from typing import TYPE_CHECKING, Optional, Type, Union
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from pydantic.deprecated.class_validators import V1Validator
|
|
5
7
|
|
|
6
|
-
|
|
8
|
+
|
|
9
|
+
def pydantic_multiline_string(field: str) -> "V1Validator":
|
|
7
10
|
"""If the field is present and contains an escaped newline, replace it with a real newline.
|
|
8
11
|
|
|
9
12
|
This makes the assumption that the field value is never supposed to have a
|
|
@@ -2372,6 +2372,12 @@
|
|
|
2372
2372
|
"subtype_modifier": null,
|
|
2373
2373
|
"supported": true
|
|
2374
2374
|
},
|
|
2375
|
+
{
|
|
2376
|
+
"capability": "TAGS",
|
|
2377
|
+
"description": "Supported by default",
|
|
2378
|
+
"subtype_modifier": null,
|
|
2379
|
+
"supported": true
|
|
2380
|
+
},
|
|
2375
2381
|
{
|
|
2376
2382
|
"capability": "LINEAGE_COARSE",
|
|
2377
2383
|
"description": "Supported by default",
|
|
@@ -2962,6 +2968,38 @@
|
|
|
2962
2968
|
"platform_name": "Slack",
|
|
2963
2969
|
"support_status": "TESTING"
|
|
2964
2970
|
},
|
|
2971
|
+
"snaplogic": {
|
|
2972
|
+
"capabilities": [
|
|
2973
|
+
{
|
|
2974
|
+
"capability": "LINEAGE_FINE",
|
|
2975
|
+
"description": "Enabled by default",
|
|
2976
|
+
"subtype_modifier": null,
|
|
2977
|
+
"supported": true
|
|
2978
|
+
},
|
|
2979
|
+
{
|
|
2980
|
+
"capability": "DELETION_DETECTION",
|
|
2981
|
+
"description": "Not supported yet",
|
|
2982
|
+
"subtype_modifier": null,
|
|
2983
|
+
"supported": false
|
|
2984
|
+
},
|
|
2985
|
+
{
|
|
2986
|
+
"capability": "PLATFORM_INSTANCE",
|
|
2987
|
+
"description": "Snaplogic does not support platform instances",
|
|
2988
|
+
"subtype_modifier": null,
|
|
2989
|
+
"supported": false
|
|
2990
|
+
},
|
|
2991
|
+
{
|
|
2992
|
+
"capability": "LINEAGE_COARSE",
|
|
2993
|
+
"description": "Enabled by default",
|
|
2994
|
+
"subtype_modifier": null,
|
|
2995
|
+
"supported": true
|
|
2996
|
+
}
|
|
2997
|
+
],
|
|
2998
|
+
"classname": "datahub.ingestion.source.snaplogic.snaplogic.SnaplogicSource",
|
|
2999
|
+
"platform_id": "snaplogic",
|
|
3000
|
+
"platform_name": "Snaplogic",
|
|
3001
|
+
"support_status": "TESTING"
|
|
3002
|
+
},
|
|
2965
3003
|
"snowflake": {
|
|
2966
3004
|
"capabilities": [
|
|
2967
3005
|
{
|
|
@@ -3174,6 +3212,12 @@
|
|
|
3174
3212
|
"subtype_modifier": null,
|
|
3175
3213
|
"supported": true
|
|
3176
3214
|
},
|
|
3215
|
+
{
|
|
3216
|
+
"capability": "TAGS",
|
|
3217
|
+
"description": "Supported by default",
|
|
3218
|
+
"subtype_modifier": null,
|
|
3219
|
+
"supported": true
|
|
3220
|
+
},
|
|
3177
3221
|
{
|
|
3178
3222
|
"capability": "LINEAGE_COARSE",
|
|
3179
3223
|
"description": "Supported by default",
|
|
@@ -3605,4 +3649,4 @@
|
|
|
3605
3649
|
"support_status": "CERTIFIED"
|
|
3606
3650
|
}
|
|
3607
3651
|
}
|
|
3608
|
-
}
|
|
3652
|
+
}
|
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional
|
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, validator
|
|
8
8
|
|
|
9
|
-
from datahub.configuration.common import ConfigModel, DynamicTypedConfig
|
|
9
|
+
from datahub.configuration.common import ConfigModel, DynamicTypedConfig, HiddenFromDocs
|
|
10
10
|
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
11
11
|
from datahub.ingestion.sink.file import FileSinkConfig
|
|
12
12
|
|
|
@@ -85,7 +85,7 @@ class PipelineConfig(ConfigModel):
|
|
|
85
85
|
source: SourceConfig
|
|
86
86
|
sink: Optional[DynamicTypedConfig] = None
|
|
87
87
|
transformers: Optional[List[DynamicTypedConfig]] = None
|
|
88
|
-
flags: FlagsConfig =
|
|
88
|
+
flags: HiddenFromDocs[FlagsConfig] = FlagsConfig()
|
|
89
89
|
reporting: List[ReporterConfig] = []
|
|
90
90
|
run_id: str = DEFAULT_RUN_ID
|
|
91
91
|
datahub_api: Optional[DatahubClientConfig] = None
|
|
@@ -81,7 +81,7 @@ class AzureConnectionConfig(ConfigModel):
|
|
|
81
81
|
)
|
|
82
82
|
return self.sas_token if self.sas_token is not None else self.account_key
|
|
83
83
|
|
|
84
|
-
@root_validator()
|
|
84
|
+
@root_validator(skip_on_failure=True)
|
|
85
85
|
def _check_credential_values(cls, values: Dict) -> Dict:
|
|
86
86
|
if (
|
|
87
87
|
values.get("account_key")
|