acryl-datahub 1.3.1__py3-none-any.whl → 1.3.1.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/METADATA +2501 -2501
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/RECORD +193 -193
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +2 -2
- datahub/api/entities/corpgroup/corpgroup.py +11 -6
- datahub/api/entities/corpuser/corpuser.py +11 -11
- datahub/api/entities/dataproduct/dataproduct.py +47 -27
- datahub/api/entities/dataset/dataset.py +32 -21
- datahub/api/entities/external/lake_formation_external_entites.py +5 -6
- datahub/api/entities/external/unity_catalog_external_entites.py +5 -7
- datahub/api/entities/forms/forms.py +16 -14
- datahub/api/entities/structuredproperties/structuredproperties.py +23 -16
- datahub/cli/check_cli.py +2 -2
- datahub/cli/config_utils.py +3 -3
- datahub/cli/lite_cli.py +9 -7
- datahub/cli/migrate.py +4 -4
- datahub/cli/quickstart_versioning.py +3 -3
- datahub/cli/specific/group_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +1 -1
- datahub/cli/specific/user_cli.py +1 -1
- datahub/configuration/common.py +14 -2
- datahub/configuration/connection_resolver.py +2 -2
- datahub/configuration/git.py +47 -30
- datahub/configuration/import_resolver.py +2 -2
- datahub/configuration/kafka.py +4 -3
- datahub/configuration/time_window_config.py +26 -26
- datahub/configuration/validate_field_deprecation.py +2 -2
- datahub/configuration/validate_field_removal.py +2 -2
- datahub/configuration/validate_field_rename.py +2 -2
- datahub/configuration/validate_multiline_string.py +2 -1
- datahub/emitter/kafka_emitter.py +3 -1
- datahub/emitter/rest_emitter.py +2 -4
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/report.py +1 -1
- datahub/ingestion/api/sink.py +1 -1
- datahub/ingestion/api/source.py +1 -1
- datahub/ingestion/glossary/datahub_classifier.py +11 -8
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/reporting/file_reporter.py +5 -4
- datahub/ingestion/run/pipeline.py +6 -6
- datahub/ingestion/run/pipeline_config.py +12 -14
- datahub/ingestion/run/sink_callback.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +6 -4
- datahub/ingestion/source/abs/config.py +19 -19
- datahub/ingestion/source/abs/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/abs/source.py +2 -2
- datahub/ingestion/source/aws/aws_common.py +1 -1
- datahub/ingestion/source/aws/glue.py +6 -4
- datahub/ingestion/source/aws/sagemaker.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +8 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +43 -30
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -1
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/common/gcp_credentials_config.py +10 -10
- datahub/ingestion/source/data_lake_common/path_spec.py +85 -89
- datahub/ingestion/source/datahub/config.py +8 -8
- datahub/ingestion/source/datahub/datahub_source.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +9 -3
- datahub/ingestion/source/dbt/dbt_common.py +39 -37
- datahub/ingestion/source/dbt/dbt_core.py +10 -12
- datahub/ingestion/source/debug/datahub_debug.py +1 -1
- datahub/ingestion/source/delta_lake/config.py +6 -4
- datahub/ingestion/source/dremio/dremio_config.py +10 -6
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/elastic_search.py +4 -3
- datahub/ingestion/source/excel/source.py +1 -1
- datahub/ingestion/source/feast.py +1 -1
- datahub/ingestion/source/file.py +5 -4
- datahub/ingestion/source/fivetran/config.py +17 -16
- datahub/ingestion/source/fivetran/fivetran.py +2 -2
- datahub/ingestion/source/gc/datahub_gc.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +8 -10
- datahub/ingestion/source/ge_profiling_config.py +8 -5
- datahub/ingestion/source/grafana/grafana_api.py +2 -2
- datahub/ingestion/source/grafana/grafana_config.py +4 -3
- datahub/ingestion/source/grafana/grafana_source.py +1 -1
- datahub/ingestion/source/grafana/models.py +23 -5
- datahub/ingestion/source/hex/api.py +7 -5
- datahub/ingestion/source/hex/hex.py +4 -3
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/iceberg/iceberg_common.py +5 -3
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +10 -10
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +7 -5
- datahub/ingestion/source/looker/looker_config.py +21 -20
- datahub/ingestion/source/looker/lookml_config.py +47 -47
- datahub/ingestion/source/metabase.py +8 -8
- datahub/ingestion/source/metadata/business_glossary.py +2 -2
- datahub/ingestion/source/metadata/lineage.py +13 -8
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/mode.py +6 -4
- datahub/ingestion/source/mongodb.py +4 -3
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +17 -23
- datahub/ingestion/source/openapi.py +6 -8
- datahub/ingestion/source/powerbi/config.py +33 -32
- datahub/ingestion/source/powerbi/dataplatform_instance_resolver.py +2 -2
- datahub/ingestion/source/powerbi/powerbi.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +2 -2
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +8 -6
- datahub/ingestion/source/preset.py +8 -8
- datahub/ingestion/source/pulsar.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +15 -8
- datahub/ingestion/source/qlik_sense/qlik_api.py +7 -7
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -1
- datahub/ingestion/source/redshift/config.py +18 -20
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/usage.py +23 -3
- datahub/ingestion/source/s3/config.py +83 -62
- datahub/ingestion/source/s3/datalake_profiler_config.py +11 -13
- datahub/ingestion/source/s3/source.py +8 -5
- datahub/ingestion/source/sac/sac.py +5 -4
- datahub/ingestion/source/salesforce.py +3 -2
- datahub/ingestion/source/schema/json_schema.py +2 -2
- datahub/ingestion/source/sigma/data_classes.py +3 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/sigma/sigma_api.py +7 -7
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snaplogic/snaplogic.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_assertion.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +35 -31
- datahub/ingestion/source/snowflake/snowflake_connection.py +35 -13
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -1
- datahub/ingestion/source/sql/athena.py +1 -1
- datahub/ingestion/source/sql/clickhouse.py +4 -2
- datahub/ingestion/source/sql/cockroachdb.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -1
- datahub/ingestion/source/sql/hana.py +1 -1
- datahub/ingestion/source/sql/hive.py +7 -5
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +13 -6
- datahub/ingestion/source/sql/mysql.py +1 -1
- datahub/ingestion/source/sql/oracle.py +17 -10
- datahub/ingestion/source/sql/postgres.py +2 -2
- datahub/ingestion/source/sql/presto.py +1 -1
- datahub/ingestion/source/sql/sql_config.py +8 -9
- datahub/ingestion/source/sql/sql_generic.py +1 -1
- datahub/ingestion/source/sql/teradata.py +1 -1
- datahub/ingestion/source/sql/trino.py +1 -1
- datahub/ingestion/source/sql/vertica.py +5 -4
- datahub/ingestion/source/sql_queries.py +11 -8
- datahub/ingestion/source/state/checkpoint.py +2 -2
- datahub/ingestion/source/state/entity_removal_state.py +2 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +55 -45
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +9 -9
- datahub/ingestion/source/tableau/tableau.py +14 -16
- datahub/ingestion/source/unity/config.py +33 -34
- datahub/ingestion/source/unity/proxy.py +203 -0
- datahub/ingestion/source/unity/proxy_types.py +91 -0
- datahub/ingestion/source/unity/source.py +27 -2
- datahub/ingestion/source/usage/clickhouse_usage.py +1 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +1 -1
- datahub/ingestion/source/usage/usage_common.py +5 -3
- datahub/ingestion/source_config/csv_enricher.py +7 -6
- datahub/ingestion/source_config/operation_config.py +7 -4
- datahub/ingestion/source_config/pulsar.py +11 -15
- datahub/ingestion/transformer/add_dataset_browse_path.py +1 -1
- datahub/ingestion/transformer/add_dataset_dataproduct.py +6 -5
- datahub/ingestion/transformer/add_dataset_ownership.py +3 -3
- datahub/ingestion/transformer/add_dataset_properties.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_tags.py +2 -2
- datahub/ingestion/transformer/add_dataset_schema_terms.py +2 -2
- datahub/ingestion/transformer/add_dataset_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +1 -1
- datahub/ingestion/transformer/extract_dataset_tags.py +1 -1
- datahub/ingestion/transformer/extract_ownership_from_tags.py +1 -1
- datahub/ingestion/transformer/mark_dataset_status.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py +1 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +1 -1
- datahub/ingestion/transformer/remove_dataset_ownership.py +1 -1
- datahub/ingestion/transformer/replace_external_url.py +2 -2
- datahub/ingestion/transformer/set_browse_path.py +1 -1
- datahub/ingestion/transformer/tags_to_terms.py +1 -1
- datahub/lite/duckdb_lite.py +1 -1
- datahub/lite/lite_util.py +2 -2
- datahub/sdk/search_filters.py +68 -40
- datahub/secret/datahub_secret_store.py +7 -4
- datahub/secret/file_secret_store.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +5 -2
- datahub/testing/check_sql_parser_result.py +2 -2
- datahub/utilities/ingest_utils.py +1 -1
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.1.dist-info → acryl_datahub-1.3.1.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Dict, Optional
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
|
-
from pydantic import
|
|
5
|
+
from pydantic import field_validator, model_validator
|
|
6
6
|
from pydantic.fields import Field
|
|
7
7
|
|
|
8
8
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
@@ -55,16 +55,16 @@ class PresetConfig(SupersetConfig):
|
|
|
55
55
|
description="Can be used to change mapping for database names in superset to what you have in datahub",
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
-
@
|
|
58
|
+
@field_validator("connect_uri", "display_uri", mode="after")
|
|
59
|
+
@classmethod
|
|
59
60
|
def remove_trailing_slash(cls, v):
|
|
60
61
|
return config_clean.remove_trailing_slashes(v)
|
|
61
62
|
|
|
62
|
-
@
|
|
63
|
-
def default_display_uri_to_connect_uri(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return values
|
|
63
|
+
@model_validator(mode="after")
|
|
64
|
+
def default_display_uri_to_connect_uri(self) -> "PresetConfig":
|
|
65
|
+
if self.display_uri is None:
|
|
66
|
+
self.display_uri = self.connect_uri
|
|
67
|
+
return self
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
@@ -235,7 +235,7 @@ class PulsarSource(StatefulIngestionSourceBase):
|
|
|
235
235
|
|
|
236
236
|
@classmethod
|
|
237
237
|
def create(cls, config_dict, ctx):
|
|
238
|
-
config = PulsarSourceConfig.
|
|
238
|
+
config = PulsarSourceConfig.model_validate(config_dict)
|
|
239
239
|
|
|
240
240
|
# Do not include each individual partition for partitioned topics,
|
|
241
241
|
if config.exclude_individual_partitions:
|
|
@@ -3,7 +3,7 @@ from datetime import datetime
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Dict, List, Optional, Type, Union
|
|
5
5
|
|
|
6
|
-
from pydantic import BaseModel, ConfigDict, Field,
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
7
7
|
|
|
8
8
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
9
9
|
from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
|
|
@@ -92,7 +92,8 @@ class Space(_QlikBaseModel):
|
|
|
92
92
|
updatedAt: datetime
|
|
93
93
|
ownerId: Optional[str] = None
|
|
94
94
|
|
|
95
|
-
@
|
|
95
|
+
@model_validator(mode="before")
|
|
96
|
+
@classmethod
|
|
96
97
|
def update_values(cls, values: Dict) -> Dict:
|
|
97
98
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
98
99
|
values = deepcopy(values)
|
|
@@ -121,7 +122,8 @@ class SchemaField(_QlikBaseModel):
|
|
|
121
122
|
primaryKey: Optional[bool] = None
|
|
122
123
|
nullable: Optional[bool] = None
|
|
123
124
|
|
|
124
|
-
@
|
|
125
|
+
@model_validator(mode="before")
|
|
126
|
+
@classmethod
|
|
125
127
|
def update_values(cls, values: Dict) -> Dict:
|
|
126
128
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
127
129
|
values = deepcopy(values)
|
|
@@ -138,7 +140,8 @@ class QlikDataset(Item):
|
|
|
138
140
|
itemId: str
|
|
139
141
|
datasetSchema: List[SchemaField]
|
|
140
142
|
|
|
141
|
-
@
|
|
143
|
+
@model_validator(mode="before")
|
|
144
|
+
@classmethod
|
|
142
145
|
def update_values(cls, values: Dict) -> Dict:
|
|
143
146
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
144
147
|
values = deepcopy(values)
|
|
@@ -174,7 +177,8 @@ class Chart(_QlikBaseModel):
|
|
|
174
177
|
qDimension: List[AxisProperty]
|
|
175
178
|
qMeasure: List[AxisProperty]
|
|
176
179
|
|
|
177
|
-
@
|
|
180
|
+
@model_validator(mode="before")
|
|
181
|
+
@classmethod
|
|
178
182
|
def update_values(cls, values: Dict) -> Dict:
|
|
179
183
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
180
184
|
values = deepcopy(values)
|
|
@@ -193,7 +197,8 @@ class Sheet(_QlikBaseModel):
|
|
|
193
197
|
updatedAt: datetime
|
|
194
198
|
charts: List[Chart] = []
|
|
195
199
|
|
|
196
|
-
@
|
|
200
|
+
@model_validator(mode="before")
|
|
201
|
+
@classmethod
|
|
197
202
|
def update_values(cls, values: Dict) -> Dict:
|
|
198
203
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
199
204
|
values = deepcopy(values)
|
|
@@ -220,7 +225,8 @@ class QlikTable(_QlikBaseModel):
|
|
|
220
225
|
databaseName: Optional[str] = None
|
|
221
226
|
schemaName: Optional[str] = None
|
|
222
227
|
|
|
223
|
-
@
|
|
228
|
+
@model_validator(mode="before")
|
|
229
|
+
@classmethod
|
|
224
230
|
def update_values(cls, values: Dict) -> Dict:
|
|
225
231
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
226
232
|
values = deepcopy(values)
|
|
@@ -239,7 +245,8 @@ class App(Item):
|
|
|
239
245
|
sheets: List[Sheet] = []
|
|
240
246
|
tables: List[QlikTable] = []
|
|
241
247
|
|
|
242
|
-
@
|
|
248
|
+
@model_validator(mode="before")
|
|
249
|
+
@classmethod
|
|
243
250
|
def update_values(cls, values: Dict) -> Dict:
|
|
244
251
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
245
252
|
values = deepcopy(values)
|
|
@@ -56,7 +56,7 @@ class QlikAPI:
|
|
|
56
56
|
response.raise_for_status()
|
|
57
57
|
response_dict = response.json()
|
|
58
58
|
for space_dict in response_dict[Constant.DATA]:
|
|
59
|
-
space = Space.
|
|
59
|
+
space = Space.model_validate(space_dict)
|
|
60
60
|
spaces.append(space)
|
|
61
61
|
self.spaces[space.id] = space.name
|
|
62
62
|
if Constant.NEXT in response_dict[Constant.LINKS]:
|
|
@@ -64,7 +64,7 @@ class QlikAPI:
|
|
|
64
64
|
else:
|
|
65
65
|
break
|
|
66
66
|
# Add personal space entity
|
|
67
|
-
spaces.append(Space.
|
|
67
|
+
spaces.append(Space.model_validate(PERSONAL_SPACE_DICT))
|
|
68
68
|
self.spaces[PERSONAL_SPACE_DICT[Constant.ID]] = PERSONAL_SPACE_DICT[
|
|
69
69
|
Constant.NAME
|
|
70
70
|
]
|
|
@@ -78,7 +78,7 @@ class QlikAPI:
|
|
|
78
78
|
response.raise_for_status()
|
|
79
79
|
response_dict = response.json()
|
|
80
80
|
response_dict[Constant.ITEMID] = item_id
|
|
81
|
-
return QlikDataset.
|
|
81
|
+
return QlikDataset.model_validate(response_dict)
|
|
82
82
|
except Exception as e:
|
|
83
83
|
self._log_http_error(
|
|
84
84
|
message=f"Unable to fetch dataset with id {dataset_id}. Exception: {e}"
|
|
@@ -119,7 +119,7 @@ class QlikAPI:
|
|
|
119
119
|
f"Chart with id {chart_id} of sheet {sheet_id} does not have hypercube. q_layout: {q_layout}"
|
|
120
120
|
)
|
|
121
121
|
return None
|
|
122
|
-
return Chart.
|
|
122
|
+
return Chart.model_validate(q_layout)
|
|
123
123
|
except Exception as e:
|
|
124
124
|
self._log_http_error(
|
|
125
125
|
message=f"Unable to fetch chart {chart_id} of sheet {sheet_id}. Exception: {e}"
|
|
@@ -140,7 +140,7 @@ class QlikAPI:
|
|
|
140
140
|
if Constant.OWNERID not in sheet_dict[Constant.QMETA]:
|
|
141
141
|
# That means sheet is private sheet
|
|
142
142
|
return None
|
|
143
|
-
sheet = Sheet.
|
|
143
|
+
sheet = Sheet.model_validate(sheet_dict[Constant.QMETA])
|
|
144
144
|
if Constant.QCHILDLIST not in sheet_dict:
|
|
145
145
|
logger.warning(
|
|
146
146
|
f"Sheet {sheet.title} with id {sheet_id} does not have any charts. sheet_dict: {sheet_dict}"
|
|
@@ -222,7 +222,7 @@ class QlikAPI:
|
|
|
222
222
|
return []
|
|
223
223
|
response = websocket_connection.websocket_send_request(method="GetLayout")
|
|
224
224
|
for table_dict in response[Constant.QLAYOUT][Constant.TABLES]:
|
|
225
|
-
tables.append(QlikTable.
|
|
225
|
+
tables.append(QlikTable.model_validate(table_dict))
|
|
226
226
|
websocket_connection.handle.pop()
|
|
227
227
|
self._add_qri_of_tables(tables, app_id)
|
|
228
228
|
except Exception as e:
|
|
@@ -270,7 +270,7 @@ class QlikAPI:
|
|
|
270
270
|
response = websocket_connection.websocket_send_request(
|
|
271
271
|
method="GetAppLayout"
|
|
272
272
|
)
|
|
273
|
-
app = App.
|
|
273
|
+
app = App.model_validate(response[Constant.QLAYOUT])
|
|
274
274
|
app.sheets = self._get_app_sheets(websocket_connection, app_id)
|
|
275
275
|
app.tables = self._get_app_used_tables(websocket_connection, app_id)
|
|
276
276
|
websocket_connection.close_websocket()
|
|
@@ -148,7 +148,7 @@ class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
148
148
|
|
|
149
149
|
@classmethod
|
|
150
150
|
def create(cls, config_dict, ctx):
|
|
151
|
-
config = QlikSourceConfig.
|
|
151
|
+
config = QlikSourceConfig.model_validate(config_dict)
|
|
152
152
|
return cls(config, ctx)
|
|
153
153
|
|
|
154
154
|
def _gen_space_key(self, space_id: str) -> SpaceKey:
|
|
@@ -3,7 +3,7 @@ from copy import deepcopy
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Any, Dict, List, Optional
|
|
5
5
|
|
|
6
|
-
from pydantic import
|
|
6
|
+
from pydantic import model_validator
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
9
|
from datahub.configuration import ConfigModel
|
|
@@ -182,7 +182,8 @@ class RedshiftConfig(
|
|
|
182
182
|
description="Whether to skip EXTERNAL tables.",
|
|
183
183
|
)
|
|
184
184
|
|
|
185
|
-
@
|
|
185
|
+
@model_validator(mode="before")
|
|
186
|
+
@classmethod
|
|
186
187
|
def check_email_is_set_on_usage(cls, values):
|
|
187
188
|
if values.get("include_usage_statistics"):
|
|
188
189
|
assert "email_domain" in values and values["email_domain"], (
|
|
@@ -190,31 +191,28 @@ class RedshiftConfig(
|
|
|
190
191
|
)
|
|
191
192
|
return values
|
|
192
193
|
|
|
193
|
-
@
|
|
194
|
-
def check_database_is_set(
|
|
195
|
-
assert
|
|
196
|
-
return
|
|
197
|
-
|
|
198
|
-
@root_validator(skip_on_failure=True)
|
|
199
|
-
def backward_compatibility_configs_set(cls, values: Dict) -> Dict:
|
|
200
|
-
match_fully_qualified_names = values.get("match_fully_qualified_names")
|
|
201
|
-
|
|
202
|
-
schema_pattern: Optional[AllowDenyPattern] = values.get("schema_pattern")
|
|
194
|
+
@model_validator(mode="after")
|
|
195
|
+
def check_database_is_set(self) -> "RedshiftConfig":
|
|
196
|
+
assert self.database, "database must be set"
|
|
197
|
+
return self
|
|
203
198
|
|
|
199
|
+
@model_validator(mode="after")
|
|
200
|
+
def backward_compatibility_configs_set(self) -> "RedshiftConfig":
|
|
204
201
|
if (
|
|
205
|
-
schema_pattern is not None
|
|
206
|
-
and schema_pattern != AllowDenyPattern.allow_all()
|
|
207
|
-
and match_fully_qualified_names is not None
|
|
208
|
-
and not match_fully_qualified_names
|
|
202
|
+
self.schema_pattern is not None
|
|
203
|
+
and self.schema_pattern != AllowDenyPattern.allow_all()
|
|
204
|
+
and self.match_fully_qualified_names is not None
|
|
205
|
+
and not self.match_fully_qualified_names
|
|
209
206
|
):
|
|
210
207
|
logger.warning(
|
|
211
208
|
"Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
|
|
212
209
|
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
|
|
213
210
|
"The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`."
|
|
214
211
|
)
|
|
215
|
-
return
|
|
212
|
+
return self
|
|
216
213
|
|
|
217
|
-
@
|
|
214
|
+
@model_validator(mode="before")
|
|
215
|
+
@classmethod
|
|
218
216
|
def connection_config_compatibility_set(cls, values: Dict) -> Dict:
|
|
219
217
|
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
220
218
|
values = deepcopy(values)
|
|
@@ -231,8 +229,8 @@ class RedshiftConfig(
|
|
|
231
229
|
if "options" in values and "connect_args" in values["options"]:
|
|
232
230
|
values["extra_client_options"] = values["options"]["connect_args"]
|
|
233
231
|
|
|
234
|
-
if values
|
|
235
|
-
if values
|
|
232
|
+
if values.get("extra_client_options"):
|
|
233
|
+
if values.get("options"):
|
|
236
234
|
values["options"]["connect_args"] = values["extra_client_options"]
|
|
237
235
|
else:
|
|
238
236
|
values["options"] = {"connect_args": values["extra_client_options"]}
|
|
@@ -236,7 +236,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
236
236
|
RedshiftConfig.Config.extra = (
|
|
237
237
|
pydantic.Extra.allow
|
|
238
238
|
) # we are okay with extra fields during this stage
|
|
239
|
-
config = RedshiftConfig.
|
|
239
|
+
config = RedshiftConfig.model_validate(config_dict)
|
|
240
240
|
# source = RedshiftSource(config, report)
|
|
241
241
|
connection: redshift_connector.Connection = (
|
|
242
242
|
RedshiftSource.get_redshift_connection(config)
|
|
@@ -316,7 +316,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
316
316
|
|
|
317
317
|
@classmethod
|
|
318
318
|
def create(cls, config_dict, ctx):
|
|
319
|
-
config = RedshiftConfig.
|
|
319
|
+
config = RedshiftConfig.model_validate(config_dict)
|
|
320
320
|
return cls(config, ctx)
|
|
321
321
|
|
|
322
322
|
@staticmethod
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import logging
|
|
3
3
|
import time
|
|
4
|
-
from datetime import datetime
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
5
|
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
|
|
6
6
|
|
|
7
7
|
import cachetools
|
|
8
|
-
import pydantic.error_wrappers
|
|
9
8
|
import redshift_connector
|
|
9
|
+
from pydantic import ValidationError, field_validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
from pydantic.main import BaseModel
|
|
12
12
|
|
|
@@ -64,6 +64,26 @@ class RedshiftAccessEvent(BaseModel):
|
|
|
64
64
|
starttime: datetime
|
|
65
65
|
endtime: datetime
|
|
66
66
|
|
|
67
|
+
@field_validator("starttime", "endtime", mode="before")
|
|
68
|
+
@classmethod
|
|
69
|
+
def ensure_utc_datetime(cls, v):
|
|
70
|
+
"""Ensure datetime fields are treated as UTC for consistency with Pydantic V1 behavior.
|
|
71
|
+
|
|
72
|
+
Pydantic V2 assumes local timezone for naive datetime strings, whereas Pydantic V1 assumed UTC.
|
|
73
|
+
This validator restores V1 behavior to maintain timestamp consistency.
|
|
74
|
+
"""
|
|
75
|
+
if isinstance(v, str):
|
|
76
|
+
# Parse as naive datetime, then assume UTC (matching V1 behavior)
|
|
77
|
+
dt = datetime.fromisoformat(v)
|
|
78
|
+
if dt.tzinfo is None:
|
|
79
|
+
# Treat naive datetime as UTC (this was the V1 behavior)
|
|
80
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
81
|
+
return dt
|
|
82
|
+
elif isinstance(v, datetime) and v.tzinfo is None:
|
|
83
|
+
# If we get a naive datetime object, assume UTC
|
|
84
|
+
return v.replace(tzinfo=timezone.utc)
|
|
85
|
+
return v
|
|
86
|
+
|
|
67
87
|
|
|
68
88
|
class RedshiftUsageExtractor:
|
|
69
89
|
"""
|
|
@@ -291,7 +311,7 @@ class RedshiftUsageExtractor:
|
|
|
291
311
|
else None
|
|
292
312
|
),
|
|
293
313
|
)
|
|
294
|
-
except
|
|
314
|
+
except ValidationError as e:
|
|
295
315
|
logging.warning(
|
|
296
316
|
f"Validation error on access event creation from row {row}. The error was: {e} Skipping ...."
|
|
297
317
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, Dict,
|
|
2
|
+
from typing import Any, Dict, Optional, Union
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
from pydantic import ValidationInfo, field_validator, model_validator
|
|
5
5
|
from pydantic.fields import Field
|
|
6
6
|
|
|
7
7
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -12,7 +12,6 @@ from datahub.configuration.validate_field_deprecation import pydantic_field_depr
|
|
|
12
12
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
13
13
|
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
|
|
14
14
|
from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
|
|
15
|
-
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
|
16
15
|
from datahub.ingestion.source.s3.datalake_profiler_config import DataLakeProfilerConfig
|
|
17
16
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
18
17
|
StatefulStaleMetadataRemovalConfig,
|
|
@@ -117,69 +116,91 @@ class DataLakeSourceConfig(
|
|
|
117
116
|
self.profiling.operation_config
|
|
118
117
|
)
|
|
119
118
|
|
|
120
|
-
@
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
) -> List[PathSpec]:
|
|
119
|
+
@field_validator("path_specs", mode="before")
|
|
120
|
+
@classmethod
|
|
121
|
+
def check_path_specs(cls, path_specs: Any, info: ValidationInfo) -> Any:
|
|
124
122
|
if len(path_specs) == 0:
|
|
125
123
|
raise ValueError("path_specs must not be empty")
|
|
126
124
|
|
|
127
|
-
#
|
|
128
|
-
guessed_platforms = {
|
|
129
|
-
"s3" if path_spec.is_s3 else "file" for path_spec in path_specs
|
|
130
|
-
}
|
|
131
|
-
if len(guessed_platforms) > 1:
|
|
132
|
-
raise ValueError(
|
|
133
|
-
f"Cannot have multiple platforms in path_specs: {guessed_platforms}"
|
|
134
|
-
)
|
|
135
|
-
guessed_platform = guessed_platforms.pop()
|
|
136
|
-
|
|
137
|
-
# Ensure s3 configs aren't used for file sources.
|
|
138
|
-
if guessed_platform != "s3" and (
|
|
139
|
-
values.get("use_s3_object_tags") or values.get("use_s3_bucket_tags")
|
|
140
|
-
):
|
|
141
|
-
raise ValueError(
|
|
142
|
-
"Cannot grab s3 object/bucket tags when platform is not s3. Remove the flag or use s3."
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
# Infer platform if not specified.
|
|
146
|
-
if values.get("platform") and values["platform"] != guessed_platform:
|
|
147
|
-
raise ValueError(
|
|
148
|
-
f"All path_specs belong to {guessed_platform} platform, but platform is set to {values['platform']}"
|
|
149
|
-
)
|
|
150
|
-
else:
|
|
151
|
-
logger.debug(f'Setting config "platform": {guessed_platform}')
|
|
152
|
-
values["platform"] = guessed_platform
|
|
125
|
+
# Basic validation - path specs consistency and S3 config validation is now handled in model_validator
|
|
153
126
|
|
|
154
127
|
return path_specs
|
|
155
128
|
|
|
156
|
-
@
|
|
157
|
-
def
|
|
158
|
-
|
|
159
|
-
platform = platform or inferred_platform
|
|
160
|
-
if not platform:
|
|
161
|
-
raise ValueError("platform must not be empty")
|
|
162
|
-
|
|
163
|
-
if platform != "s3" and values.get("use_s3_bucket_tags"):
|
|
164
|
-
raise ValueError(
|
|
165
|
-
"Cannot grab s3 bucket tags when platform is not s3. Remove the flag or ingest from s3."
|
|
166
|
-
)
|
|
167
|
-
if platform != "s3" and values.get("use_s3_object_tags"):
|
|
168
|
-
raise ValueError(
|
|
169
|
-
"Cannot grab s3 object tags when platform is not s3. Remove the flag or ingest from s3."
|
|
170
|
-
)
|
|
171
|
-
if platform != "s3" and values.get("use_s3_content_type"):
|
|
172
|
-
raise ValueError(
|
|
173
|
-
"Cannot grab s3 object content type when platform is not s3. Remove the flag or ingest from s3."
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
return platform
|
|
177
|
-
|
|
178
|
-
@pydantic.root_validator(skip_on_failure=True)
|
|
179
|
-
def ensure_profiling_pattern_is_passed_to_profiling(
|
|
180
|
-
cls, values: Dict[str, Any]
|
|
181
|
-
) -> Dict[str, Any]:
|
|
182
|
-
profiling: Optional[DataLakeProfilerConfig] = values.get("profiling")
|
|
129
|
+
@model_validator(mode="after")
|
|
130
|
+
def ensure_profiling_pattern_is_passed_to_profiling(self) -> "DataLakeSourceConfig":
|
|
131
|
+
profiling = self.profiling
|
|
183
132
|
if profiling is not None and profiling.enabled:
|
|
184
|
-
profiling._allow_deny_patterns =
|
|
185
|
-
return
|
|
133
|
+
profiling._allow_deny_patterns = self.profile_patterns
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
@model_validator(mode="after")
|
|
137
|
+
def validate_platform_and_config_consistency(self) -> "DataLakeSourceConfig":
|
|
138
|
+
"""Infer platform from path_specs and validate config consistency."""
|
|
139
|
+
# Track whether platform was explicitly provided
|
|
140
|
+
platform_was_explicit = bool(self.platform)
|
|
141
|
+
|
|
142
|
+
# Infer platform from path_specs if not explicitly set
|
|
143
|
+
if not self.platform and self.path_specs:
|
|
144
|
+
guessed_platforms = set()
|
|
145
|
+
for path_spec in self.path_specs:
|
|
146
|
+
if (
|
|
147
|
+
hasattr(path_spec, "include")
|
|
148
|
+
and path_spec.include
|
|
149
|
+
and path_spec.include.startswith("s3://")
|
|
150
|
+
):
|
|
151
|
+
guessed_platforms.add("s3")
|
|
152
|
+
else:
|
|
153
|
+
guessed_platforms.add("file")
|
|
154
|
+
|
|
155
|
+
# Ensure all path specs belong to the same platform
|
|
156
|
+
if len(guessed_platforms) > 1:
|
|
157
|
+
raise ValueError(
|
|
158
|
+
f"Cannot have multiple platforms in path_specs: {guessed_platforms}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if guessed_platforms:
|
|
162
|
+
guessed_platform = guessed_platforms.pop()
|
|
163
|
+
logger.debug(f"Inferred platform: {guessed_platform}")
|
|
164
|
+
self.platform = guessed_platform
|
|
165
|
+
else:
|
|
166
|
+
self.platform = "file"
|
|
167
|
+
elif not self.platform:
|
|
168
|
+
self.platform = "file"
|
|
169
|
+
|
|
170
|
+
# Validate platform consistency only when platform was inferred (not explicitly set)
|
|
171
|
+
# This allows sources like GCS to set platform="gcs" with s3:// URIs for correct container subtypes
|
|
172
|
+
if not platform_was_explicit and self.platform and self.path_specs:
|
|
173
|
+
expected_platforms = set()
|
|
174
|
+
for path_spec in self.path_specs:
|
|
175
|
+
if (
|
|
176
|
+
hasattr(path_spec, "include")
|
|
177
|
+
and path_spec.include
|
|
178
|
+
and path_spec.include.startswith("s3://")
|
|
179
|
+
):
|
|
180
|
+
expected_platforms.add("s3")
|
|
181
|
+
else:
|
|
182
|
+
expected_platforms.add("file")
|
|
183
|
+
|
|
184
|
+
if len(expected_platforms) == 1:
|
|
185
|
+
expected_platform = expected_platforms.pop()
|
|
186
|
+
if self.platform != expected_platform:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"All path_specs belong to {expected_platform} platform, but platform was inferred as {self.platform}"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Validate S3-specific configurations
|
|
192
|
+
if self.platform != "s3":
|
|
193
|
+
if self.use_s3_bucket_tags:
|
|
194
|
+
raise ValueError(
|
|
195
|
+
"Cannot grab s3 bucket tags when platform is not s3. Remove the flag or ingest from s3."
|
|
196
|
+
)
|
|
197
|
+
if self.use_s3_object_tags:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
"Cannot grab s3 object tags when platform is not s3. Remove the flag or ingest from s3."
|
|
200
|
+
)
|
|
201
|
+
if self.use_s3_content_type:
|
|
202
|
+
raise ValueError(
|
|
203
|
+
"Cannot grab s3 object content type when platform is not s3. Remove the flag or ingest from s3."
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
return self
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
|
+
from pydantic import model_validator
|
|
4
5
|
from pydantic.fields import Field
|
|
5
6
|
|
|
6
7
|
from datahub.configuration import ConfigModel
|
|
@@ -72,21 +73,18 @@ class DataLakeProfilerConfig(ConfigModel):
|
|
|
72
73
|
description="Whether to profile for the sample values for all columns.",
|
|
73
74
|
)
|
|
74
75
|
|
|
75
|
-
@
|
|
76
|
-
def ensure_field_level_settings_are_normalized(
|
|
77
|
-
|
|
78
|
-
) -> Dict[str, Any]:
|
|
79
|
-
max_num_fields_to_profile_key = "max_number_of_fields_to_profile"
|
|
80
|
-
max_num_fields_to_profile = values.get(max_num_fields_to_profile_key)
|
|
76
|
+
@model_validator(mode="after")
|
|
77
|
+
def ensure_field_level_settings_are_normalized(self) -> "DataLakeProfilerConfig":
|
|
78
|
+
max_num_fields_to_profile = self.max_number_of_fields_to_profile
|
|
81
79
|
|
|
82
80
|
# Disable all field-level metrics.
|
|
83
|
-
if
|
|
84
|
-
for
|
|
85
|
-
if
|
|
86
|
-
|
|
81
|
+
if self.profile_table_level_only:
|
|
82
|
+
for field_name in self.__fields__:
|
|
83
|
+
if field_name.startswith("include_field_"):
|
|
84
|
+
setattr(self, field_name, False)
|
|
87
85
|
|
|
88
86
|
assert max_num_fields_to_profile is None, (
|
|
89
|
-
|
|
87
|
+
"max_number_of_fields_to_profile should be set to None"
|
|
90
88
|
)
|
|
91
89
|
|
|
92
|
-
return
|
|
90
|
+
return self
|
|
@@ -53,8 +53,11 @@ from datahub.ingestion.source.data_lake_common.data_lake_utils import (
|
|
|
53
53
|
from datahub.ingestion.source.data_lake_common.object_store import (
|
|
54
54
|
create_object_store_adapter,
|
|
55
55
|
)
|
|
56
|
-
from datahub.ingestion.source.data_lake_common.path_spec import
|
|
57
|
-
|
|
56
|
+
from datahub.ingestion.source.data_lake_common.path_spec import (
|
|
57
|
+
FolderTraversalMethod,
|
|
58
|
+
PathSpec,
|
|
59
|
+
)
|
|
60
|
+
from datahub.ingestion.source.s3.config import DataLakeSourceConfig
|
|
58
61
|
from datahub.ingestion.source.s3.report import DataLakeSourceReport
|
|
59
62
|
from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet
|
|
60
63
|
from datahub.ingestion.source.schema_inference.base import SchemaInferenceBase
|
|
@@ -261,7 +264,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
261
264
|
)
|
|
262
265
|
|
|
263
266
|
config_report = {
|
|
264
|
-
config_option: config.
|
|
267
|
+
config_option: config.model_dump().get(config_option)
|
|
265
268
|
for config_option in config_options_to_report
|
|
266
269
|
}
|
|
267
270
|
config_report = {
|
|
@@ -278,7 +281,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
278
281
|
telemetry.telemetry_instance.ping(
|
|
279
282
|
"data_lake_profiling_config",
|
|
280
283
|
{
|
|
281
|
-
config_flag: config.profiling.
|
|
284
|
+
config_flag: config.profiling.model_dump().get(config_flag)
|
|
282
285
|
for config_flag in profiling_flags_to_report
|
|
283
286
|
},
|
|
284
287
|
)
|
|
@@ -370,7 +373,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
370
373
|
|
|
371
374
|
@classmethod
|
|
372
375
|
def create(cls, config_dict, ctx):
|
|
373
|
-
config = DataLakeSourceConfig.
|
|
376
|
+
config = DataLakeSourceConfig.model_validate(config_dict)
|
|
374
377
|
|
|
375
378
|
return cls(config, ctx)
|
|
376
379
|
|
|
@@ -8,7 +8,7 @@ import pyodata
|
|
|
8
8
|
import pyodata.v2.model
|
|
9
9
|
import pyodata.v2.service
|
|
10
10
|
from authlib.integrations.requests_client import OAuth2Session
|
|
11
|
-
from pydantic import Field, SecretStr,
|
|
11
|
+
from pydantic import Field, SecretStr, field_validator
|
|
12
12
|
from requests.adapters import HTTPAdapter
|
|
13
13
|
from urllib3.util.retry import Retry
|
|
14
14
|
|
|
@@ -159,7 +159,8 @@ class SACSourceConfig(
|
|
|
159
159
|
description="Template for generating dataset urns of consumed queries, the placeholder {query} can be used within the template for inserting the name of the query",
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
-
@
|
|
162
|
+
@field_validator("tenant_url", "token_url", mode="after")
|
|
163
|
+
@classmethod
|
|
163
164
|
def remove_trailing_slash(cls, v):
|
|
164
165
|
return config_clean.remove_trailing_slashes(v)
|
|
165
166
|
|
|
@@ -209,7 +210,7 @@ class SACSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
209
210
|
|
|
210
211
|
@classmethod
|
|
211
212
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "SACSource":
|
|
212
|
-
config = SACSourceConfig.
|
|
213
|
+
config = SACSourceConfig.model_validate(config_dict)
|
|
213
214
|
return cls(config, ctx)
|
|
214
215
|
|
|
215
216
|
@staticmethod
|
|
@@ -217,7 +218,7 @@ class SACSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
217
218
|
test_report = TestConnectionReport()
|
|
218
219
|
|
|
219
220
|
try:
|
|
220
|
-
config = SACSourceConfig.
|
|
221
|
+
config = SACSourceConfig.model_validate(config_dict)
|
|
221
222
|
|
|
222
223
|
# when creating the pyodata.Client, the metadata is automatically parsed and validated
|
|
223
224
|
session, _ = SACSource.get_sac_connection(config)
|
|
@@ -7,7 +7,7 @@ from enum import Enum
|
|
|
7
7
|
from typing import Any, Dict, Iterable, List, Literal, Optional, TypedDict
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
-
from pydantic import Field,
|
|
10
|
+
from pydantic import Field, field_validator
|
|
11
11
|
from simple_salesforce import Salesforce
|
|
12
12
|
from simple_salesforce.exceptions import SalesforceAuthenticationFailed
|
|
13
13
|
|
|
@@ -172,7 +172,8 @@ class SalesforceConfig(
|
|
|
172
172
|
self.profiling.operation_config
|
|
173
173
|
)
|
|
174
174
|
|
|
175
|
-
@
|
|
175
|
+
@field_validator("instance_url", mode="after")
|
|
176
|
+
@classmethod
|
|
176
177
|
def remove_trailing_slash(cls, v):
|
|
177
178
|
return config_clean.remove_trailing_slashes(v)
|
|
178
179
|
|
|
@@ -12,7 +12,7 @@ from urllib.parse import urlparse
|
|
|
12
12
|
|
|
13
13
|
import jsonref
|
|
14
14
|
import requests
|
|
15
|
-
from pydantic import AnyHttpUrl, DirectoryPath, FilePath,
|
|
15
|
+
from pydantic import AnyHttpUrl, DirectoryPath, FilePath, field_validator
|
|
16
16
|
from pydantic.fields import Field
|
|
17
17
|
|
|
18
18
|
import datahub.metadata.schema_classes as models
|
|
@@ -90,7 +90,7 @@ class JsonSchemaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMix
|
|
|
90
90
|
description="Use this if URI-s need to be modified during reference resolution. Simple string match - replace capabilities are supported.",
|
|
91
91
|
)
|
|
92
92
|
|
|
93
|
-
@
|
|
93
|
+
@field_validator("path", mode="after")
|
|
94
94
|
def download_http_url_to_temp_file(cls, v):
|
|
95
95
|
if isinstance(v, AnyHttpUrl):
|
|
96
96
|
try:
|