acryl-datahub 1.2.0.9rc2__py3-none-any.whl → 1.2.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (118) hide show
  1. {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/METADATA +2553 -2611
  2. {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/RECORD +118 -111
  3. {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/dataproduct/dataproduct.py +6 -3
  8. datahub/api/entities/dataset/dataset.py +9 -18
  9. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  10. datahub/api/graphql/operation.py +10 -6
  11. datahub/cli/docker_check.py +2 -2
  12. datahub/configuration/common.py +29 -1
  13. datahub/configuration/connection_resolver.py +5 -2
  14. datahub/configuration/import_resolver.py +7 -4
  15. datahub/configuration/pydantic_migration_helpers.py +0 -9
  16. datahub/configuration/source_common.py +3 -2
  17. datahub/configuration/validate_field_deprecation.py +5 -2
  18. datahub/configuration/validate_field_removal.py +5 -2
  19. datahub/configuration/validate_field_rename.py +6 -5
  20. datahub/configuration/validate_multiline_string.py +5 -2
  21. datahub/ingestion/autogenerated/capability_summary.json +45 -1
  22. datahub/ingestion/run/pipeline_config.py +2 -2
  23. datahub/ingestion/source/azure/azure_common.py +1 -1
  24. datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
  25. datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
  26. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  27. datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
  28. datahub/ingestion/source/datahub/config.py +8 -9
  29. datahub/ingestion/source/dbt/dbt_common.py +65 -5
  30. datahub/ingestion/source/delta_lake/config.py +1 -1
  31. datahub/ingestion/source/dremio/dremio_config.py +3 -4
  32. datahub/ingestion/source/feast.py +8 -10
  33. datahub/ingestion/source/fivetran/config.py +1 -1
  34. datahub/ingestion/source/gcs/gcs_source.py +19 -2
  35. datahub/ingestion/source/ge_data_profiler.py +15 -2
  36. datahub/ingestion/source/ge_profiling_config.py +26 -22
  37. datahub/ingestion/source/grafana/grafana_config.py +2 -2
  38. datahub/ingestion/source/grafana/models.py +12 -14
  39. datahub/ingestion/source/hex/hex.py +6 -1
  40. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  41. datahub/ingestion/source/kafka_connect/common.py +2 -2
  42. datahub/ingestion/source/looker/looker_common.py +76 -75
  43. datahub/ingestion/source/looker/looker_config.py +15 -4
  44. datahub/ingestion/source/looker/looker_source.py +493 -547
  45. datahub/ingestion/source/looker/lookml_config.py +1 -1
  46. datahub/ingestion/source/looker/lookml_source.py +46 -88
  47. datahub/ingestion/source/metabase.py +9 -2
  48. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  49. datahub/ingestion/source/metadata/lineage.py +1 -1
  50. datahub/ingestion/source/mode.py +13 -5
  51. datahub/ingestion/source/nifi.py +1 -1
  52. datahub/ingestion/source/powerbi/config.py +14 -21
  53. datahub/ingestion/source/preset.py +1 -1
  54. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  55. datahub/ingestion/source/redash.py +1 -1
  56. datahub/ingestion/source/redshift/config.py +6 -3
  57. datahub/ingestion/source/redshift/query.py +23 -19
  58. datahub/ingestion/source/s3/source.py +26 -24
  59. datahub/ingestion/source/salesforce.py +13 -9
  60. datahub/ingestion/source/schema/json_schema.py +14 -14
  61. datahub/ingestion/source/sigma/data_classes.py +3 -0
  62. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  63. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  64. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  65. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  66. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  67. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  68. datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
  69. datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
  70. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
  71. datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
  72. datahub/ingestion/source/sql/athena.py +2 -1
  73. datahub/ingestion/source/sql/clickhouse.py +12 -7
  74. datahub/ingestion/source/sql/cockroachdb.py +5 -3
  75. datahub/ingestion/source/sql/druid.py +2 -2
  76. datahub/ingestion/source/sql/hive.py +4 -3
  77. datahub/ingestion/source/sql/hive_metastore.py +7 -9
  78. datahub/ingestion/source/sql/mssql/source.py +2 -2
  79. datahub/ingestion/source/sql/mysql.py +2 -2
  80. datahub/ingestion/source/sql/oracle.py +3 -3
  81. datahub/ingestion/source/sql/presto.py +2 -1
  82. datahub/ingestion/source/sql/teradata.py +4 -4
  83. datahub/ingestion/source/sql/trino.py +2 -1
  84. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  85. datahub/ingestion/source/sql/vertica.py +1 -1
  86. datahub/ingestion/source/sql_queries.py +6 -6
  87. datahub/ingestion/source/state/checkpoint.py +5 -1
  88. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  89. datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
  90. datahub/ingestion/source/superset.py +122 -15
  91. datahub/ingestion/source/tableau/tableau.py +68 -14
  92. datahub/ingestion/source/tableau/tableau_common.py +5 -0
  93. datahub/ingestion/source/tableau/tableau_constant.py +1 -0
  94. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  95. datahub/ingestion/source/unity/config.py +7 -3
  96. datahub/ingestion/source/usage/usage_common.py +3 -3
  97. datahub/ingestion/source_config/pulsar.py +3 -1
  98. datahub/ingestion/transformer/set_browse_path.py +112 -0
  99. datahub/metadata/_internal_schema_classes.py +728 -528
  100. datahub/metadata/_urns/urn_defs.py +1702 -1702
  101. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  102. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  103. datahub/metadata/schema.avsc +17434 -17732
  104. datahub/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
  105. datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
  106. datahub/metadata/schemas/LogicalParent.avsc +2 -1
  107. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  108. datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
  109. datahub/sdk/_shared.py +126 -0
  110. datahub/sdk/chart.py +87 -30
  111. datahub/sdk/dashboard.py +79 -34
  112. datahub/sdk/entity_client.py +11 -4
  113. datahub/sdk/lineage_client.py +3 -3
  114. datahub/sdk/search_filters.py +1 -7
  115. datahub/sql_parsing/split_statements.py +13 -0
  116. {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/WHEEL +0 -0
  117. {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/licenses/LICENSE +0 -0
  118. {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/top_level.txt +0 -0
@@ -94,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
94
94
  salesforce = datahub.ingestion.source.salesforce:SalesforceSource
95
95
  sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
96
96
  slack = datahub.ingestion.source.slack.slack:SlackSource
97
+ snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
97
98
  snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
98
99
  snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
99
100
  snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
@@ -130,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
130
131
  pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
131
132
  replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
132
133
  replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
134
+ set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
133
135
  set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
134
136
  simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
135
137
  simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.9rc2"
3
+ __version__ = "1.2.0.10"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -53,5 +53,5 @@ class BaseEntityAssertion(BaseAssertion):
53
53
  )
54
54
 
55
55
  trigger: Optional[AssertionTrigger] = v1_Field(
56
- description="The trigger schedule for assertion", alias="schedule"
56
+ default=None, description="The trigger schedule for assertion", alias="schedule"
57
57
  )
@@ -71,7 +71,7 @@ class CorpGroup(BaseModel):
71
71
  _rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
72
72
 
73
73
  @pydantic.validator("owners", "members", each_item=True)
74
- def make_urn_if_needed(v):
74
+ def make_urn_if_needed(cls, v):
75
75
  if isinstance(v, str):
76
76
  return builder.make_user_urn(v)
77
77
  return v
@@ -6,9 +6,10 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
6
6
 
7
7
  import pydantic
8
8
  from ruamel.yaml import YAML
9
+ from typing_extensions import assert_never
9
10
 
10
11
  import datahub.emitter.mce_builder as builder
11
- from datahub.configuration.common import ConfigModel
12
+ from datahub.configuration.common import ConfigModel, LaxStr
12
13
  from datahub.emitter.generic_emitter import Emitter
13
14
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
15
  from datahub.ingestion.graph.client import DataHubGraph
@@ -110,7 +111,7 @@ class DataProduct(ConfigModel):
110
111
  description: Optional[str] = None
111
112
  tags: Optional[List[str]] = None
112
113
  terms: Optional[List[str]] = None
113
- properties: Optional[Dict[str, str]] = None
114
+ properties: Optional[Dict[str, LaxStr]] = None
114
115
  external_url: Optional[str] = None
115
116
  _original_yaml_dict: Optional[dict] = None
116
117
 
@@ -414,7 +415,9 @@ class DataProduct(ConfigModel):
414
415
  "type": new_owner_type_map[owner_urn],
415
416
  }
416
417
  else:
417
- patches_drop[i] = o
418
+ patches_drop[i] = o.model_dump()
419
+ else:
420
+ assert_never(o)
418
421
 
419
422
  # Figure out what if any are new owners to add
420
423
  new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched)
@@ -27,7 +27,7 @@ from typing_extensions import TypeAlias
27
27
 
28
28
  import datahub.metadata.schema_classes as models
29
29
  from datahub.api.entities.structuredproperties.structuredproperties import AllowedTypes
30
- from datahub.configuration.common import ConfigModel
30
+ from datahub.configuration.common import ConfigModel, LaxStr
31
31
  from datahub.emitter.mce_builder import (
32
32
  make_data_platform_urn,
33
33
  make_dataset_urn,
@@ -143,7 +143,6 @@ class SchemaFieldSpecification(StrictModel):
143
143
  jsonPath: Union[None, str] = None
144
144
  nullable: bool = False
145
145
  description: Union[None, str] = None
146
- doc: Union[None, str] = None # doc is an alias for description
147
146
  label: Optional[str] = None
148
147
  created: Optional[dict] = None
149
148
  lastModified: Optional[dict] = None
@@ -221,14 +220,14 @@ class SchemaFieldSpecification(StrictModel):
221
220
  return v
222
221
 
223
222
  @root_validator(pre=True)
224
- def sync_description_and_doc(cls, values: Dict) -> Dict:
225
- """Synchronize doc and description fields if one is provided but not the other."""
223
+ def sync_doc_into_description(cls, values: Dict) -> Dict:
224
+ """Synchronize doc into description field if doc is provided."""
226
225
  description = values.get("description")
227
- doc = values.get("doc")
226
+ doc = values.pop("doc", None)
228
227
 
229
- if description is not None and doc is None:
230
- values["doc"] = description
231
- elif doc is not None and description is None:
228
+ if doc is not None:
229
+ if description is not None:
230
+ raise ValueError("doc and description cannot both be provided")
232
231
  values["description"] = doc
233
232
 
234
233
  return values
@@ -296,10 +295,6 @@ class SchemaFieldSpecification(StrictModel):
296
295
  """Custom dict method for Pydantic v1 to handle YAML serialization properly."""
297
296
  exclude = kwargs.pop("exclude", None) or set()
298
297
 
299
- # If description and doc are identical, exclude doc from the output
300
- if self.description == self.doc and self.description is not None:
301
- exclude.add("doc")
302
-
303
298
  # if nativeDataType and type are identical, exclude nativeDataType from the output
304
299
  if self.nativeDataType == self.type and self.nativeDataType is not None:
305
300
  exclude.add("nativeDataType")
@@ -327,10 +322,6 @@ class SchemaFieldSpecification(StrictModel):
327
322
  """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
328
323
  exclude = kwargs.pop("exclude", None) or set()
329
324
 
330
- # If description and doc are identical, exclude doc from the output
331
- if self.description == self.doc and self.description is not None:
332
- exclude.add("doc")
333
-
334
325
  # if nativeDataType and type are identical, exclude nativeDataType from the output
335
326
  if self.nativeDataType == self.type and self.nativeDataType is not None:
336
327
  exclude.add("nativeDataType")
@@ -387,7 +378,7 @@ class Dataset(StrictModel):
387
378
  name: Optional[str] = Field(None, validate_default=True)
388
379
  schema_metadata: Optional[SchemaSpecification] = Field(default=None, alias="schema")
389
380
  downstreams: Optional[List[str]] = None
390
- properties: Optional[Dict[str, str]] = None
381
+ properties: Optional[Dict[str, LaxStr]] = None
391
382
  subtype: Optional[str] = None
392
383
  subtypes: Optional[List[str]] = None
393
384
  tags: Optional[List[str]] = None
@@ -605,7 +596,7 @@ class Dataset(StrictModel):
605
596
  ],
606
597
  platformSchema=OtherSchemaClass(
607
598
  rawSchema=yaml.dump(
608
- self.schema_metadata.dict(
599
+ self.schema_metadata.model_dump(
609
600
  exclude_none=True, exclude_unset=True
610
601
  )
611
602
  )
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from enum import Enum
3
3
  from pathlib import Path
4
- from typing import Iterable, List, Optional, Union
4
+ from typing import Iterable, List, Optional, Type, Union
5
5
 
6
6
  import yaml
7
7
  from pydantic import Field, StrictStr, validator
@@ -48,7 +48,7 @@ VALID_ENTITY_TYPE_URNS = [
48
48
  _VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
49
49
 
50
50
 
51
- def _validate_entity_type_urn(v: str) -> str:
51
+ def _validate_entity_type_urn(cls: Type, v: str) -> str:
52
52
  urn = Urn.make_entity_type_urn(v)
53
53
  if urn not in VALID_ENTITY_TYPE_URNS:
54
54
  raise ValueError(
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
- from gql import gql
4
+ from gql import GraphQLRequest
5
5
 
6
6
  from datahub.api.graphql.base import BaseApi
7
7
 
@@ -79,10 +79,12 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
79
79
  if custom_properties is not None:
80
80
  variable_values["customProperties"] = custom_properties
81
81
 
82
- result = self.client.execute(
83
- gql(Operation.REPORT_OPERATION_MUTATION), variable_values=variable_values
82
+ request = GraphQLRequest(
83
+ Operation.REPORT_OPERATION_MUTATION, variable_values=variable_values
84
84
  )
85
85
 
86
+ result = self.client.execute(request)
87
+
86
88
  return result["reportOperation"]
87
89
 
88
90
  def query_operations(
@@ -109,12 +111,12 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
109
111
  :param partition: The partition to check the operation.
110
112
  """
111
113
 
112
- result = self.client.execute(
113
- gql(Operation.QUERY_OPERATIONS),
114
+ request = GraphQLRequest(
115
+ Operation.QUERY_OPERATIONS,
114
116
  variable_values={
115
117
  "urn": urn,
116
118
  "startTimeMillis": start_time_millis,
117
- "end_time_millis": end_time_millis,
119
+ "endTimeMillis": end_time_millis,
118
120
  "limit": limit,
119
121
  "filter": self.gen_filter(
120
122
  {
@@ -125,6 +127,8 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
125
127
  ),
126
128
  },
127
129
  )
130
+
131
+ result = self.client.execute(request)
128
132
  if "dataset" in result and "operations" in result["dataset"]:
129
133
  operations = []
130
134
  if source_type is not None:
@@ -13,8 +13,8 @@ import yaml
13
13
  from datahub.configuration.common import ExceptionWithProps
14
14
 
15
15
  # Docker seems to under-report memory allocated, so we also need a bit of buffer to account for it.
16
- MIN_MEMORY_NEEDED = 3.8 # GB
17
- MIN_DISK_SPACE_NEEDED = 12 # GB
16
+ MIN_MEMORY_NEEDED = 4 # GB
17
+ MIN_DISK_SPACE_NEEDED = 13 # GB
18
18
 
19
19
  DOCKER_COMPOSE_PROJECT_NAME = os.getenv("DATAHUB_COMPOSE_PROJECT_NAME", "datahub")
20
20
  DATAHUB_COMPOSE_PROJECT_FILTER = {
@@ -1,20 +1,25 @@
1
+ import dataclasses
1
2
  import re
2
3
  import unittest.mock
3
4
  from abc import ABC, abstractmethod
4
5
  from enum import auto
5
6
  from typing import (
6
7
  IO,
8
+ TYPE_CHECKING,
9
+ Annotated,
7
10
  Any,
8
11
  ClassVar,
9
12
  Dict,
10
13
  List,
11
14
  Optional,
12
15
  Type,
16
+ TypeVar,
13
17
  Union,
14
18
  runtime_checkable,
15
19
  )
16
20
 
17
21
  import pydantic
22
+ import pydantic_core
18
23
  from cached_property import cached_property
19
24
  from pydantic import BaseModel, Extra, ValidationError
20
25
  from pydantic.fields import Field
@@ -83,6 +88,29 @@ def redact_raw_config(obj: Any) -> Any:
83
88
  return obj
84
89
 
85
90
 
91
+ if TYPE_CHECKING:
92
+ AnyType = TypeVar("AnyType")
93
+ HiddenFromDocs = Annotated[AnyType, ...]
94
+ else:
95
+ HiddenFromDocs = pydantic.json_schema.SkipJsonSchema
96
+
97
+ LaxStr = Annotated[str, pydantic.BeforeValidator(lambda v: str(v))]
98
+
99
+
100
+ @dataclasses.dataclass(frozen=True)
101
+ class SupportedSources:
102
+ sources: List[str]
103
+
104
+ def __get_pydantic_json_schema__(
105
+ self,
106
+ core_schema: pydantic_core.core_schema.CoreSchema,
107
+ handler: pydantic.GetJsonSchemaHandler,
108
+ ) -> pydantic.json_schema.JsonSchemaValue:
109
+ json_schema = handler(core_schema)
110
+ json_schema.setdefault("schema_extra", {})["supported_sources"] = self.sources
111
+ return json_schema
112
+
113
+
86
114
  class ConfigModel(BaseModel):
87
115
  class Config:
88
116
  @staticmethod
@@ -334,4 +362,4 @@ class KeyValuePattern(ConfigModel):
334
362
 
335
363
 
336
364
  class VersionedConfig(ConfigModel):
337
- version: str = "1"
365
+ version: LaxStr = "1"
@@ -1,13 +1,16 @@
1
- from typing import Type
1
+ from typing import TYPE_CHECKING, Type
2
2
 
3
3
  import pydantic
4
4
 
5
5
  from datahub.ingestion.api.global_context import get_graph_context
6
6
 
7
+ if TYPE_CHECKING:
8
+ from pydantic.deprecated.class_validators import V1RootValidator
9
+
7
10
 
8
11
  def auto_connection_resolver(
9
12
  connection_field: str = "connection",
10
- ) -> classmethod:
13
+ ) -> "V1RootValidator":
11
14
  def _resolve_connection(cls: Type, values: dict) -> dict:
12
15
  if connection_field in values:
13
16
  connection_urn = values.pop(connection_field)
@@ -1,15 +1,18 @@
1
- from typing import TypeVar, Union
1
+ from typing import TYPE_CHECKING, Type, TypeVar, Union
2
2
 
3
3
  import pydantic
4
4
 
5
5
  from datahub.ingestion.api.registry import import_path
6
6
 
7
- T = TypeVar("T")
7
+ if TYPE_CHECKING:
8
+ from pydantic.deprecated.class_validators import V1Validator
8
9
 
10
+ _T = TypeVar("_T")
9
11
 
10
- def _pydantic_resolver(v: Union[T, str]) -> T:
12
+
13
+ def _pydantic_resolver(cls: Type, v: Union[str, _T]) -> _T:
11
14
  return import_path(v) if isinstance(v, str) else v
12
15
 
13
16
 
14
- def pydantic_resolve_key(field: str) -> classmethod:
17
+ def pydantic_resolve_key(field: str) -> "V1Validator":
15
18
  return pydantic.validator(field, pre=True, allow_reuse=True)(_pydantic_resolver)
@@ -9,14 +9,6 @@ PYDANTIC_VERSION_2 = _pydantic_version >= Version("2.0")
9
9
  # https://docs.pydantic.dev/latest/changelog/#v250-2023-11-13
10
10
  PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR = _pydantic_version >= Version("2.5.0")
11
11
 
12
- # This can be used to silence deprecation warnings while we migrate.
13
- if PYDANTIC_VERSION_2:
14
- from pydantic import PydanticDeprecatedSince20 # type: ignore
15
- else:
16
-
17
- class PydanticDeprecatedSince20(Warning): # type: ignore
18
- pass
19
-
20
12
 
21
13
  if PYDANTIC_VERSION_2:
22
14
  from pydantic import BaseModel as GenericModel
@@ -52,7 +44,6 @@ class v1_ConfigModel(v1_BaseModel):
52
44
  __all__ = [
53
45
  "PYDANTIC_VERSION_2",
54
46
  "PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR",
55
- "PydanticDeprecatedSince20",
56
47
  "GenericModel",
57
48
  "v1_ConfigModel",
58
49
  "v1_Field",
@@ -1,6 +1,6 @@
1
1
  from typing import Dict, Optional
2
2
 
3
- from pydantic import validator
3
+ import pydantic
4
4
  from pydantic.fields import Field
5
5
 
6
6
  from datahub.configuration.common import ConfigModel
@@ -30,7 +30,8 @@ class EnvConfigMixin(ConfigModel):
30
30
  description="The environment that all assets produced by this connector belong to",
31
31
  )
32
32
 
33
- @validator("env")
33
+ @pydantic.field_validator("env", mode="after")
34
+ @classmethod
34
35
  def env_must_be_one_of(cls, v: str) -> str:
35
36
  if v.upper() not in ALL_ENV_TYPES:
36
37
  raise ValueError(f"env must be one of {ALL_ENV_TYPES}, found {v}")
@@ -1,11 +1,14 @@
1
1
  import warnings
2
- from typing import Any, Optional, Type
2
+ from typing import TYPE_CHECKING, Any, Optional, Type
3
3
 
4
4
  import pydantic
5
5
 
6
6
  from datahub.configuration.common import ConfigurationWarning
7
7
  from datahub.utilities.global_warning_util import add_global_warning
8
8
 
9
+ if TYPE_CHECKING:
10
+ from pydantic.deprecated.class_validators import V1RootValidator
11
+
9
12
  _unset = object()
10
13
 
11
14
 
@@ -13,7 +16,7 @@ def pydantic_field_deprecated(
13
16
  field: str,
14
17
  warn_if_value_is_not: Any = _unset,
15
18
  message: Optional[str] = None,
16
- ) -> classmethod:
19
+ ) -> "V1RootValidator":
17
20
  if message:
18
21
  output = message
19
22
  else:
@@ -1,15 +1,18 @@
1
1
  import warnings
2
- from typing import Type
2
+ from typing import TYPE_CHECKING, Type
3
3
 
4
4
  import pydantic
5
5
 
6
6
  from datahub.configuration.common import ConfigurationWarning
7
7
 
8
+ if TYPE_CHECKING:
9
+ from pydantic.deprecated.class_validators import V1RootValidator
10
+
8
11
 
9
12
  def pydantic_removed_field(
10
13
  field: str,
11
14
  print_warning: bool = True,
12
- ) -> classmethod:
15
+ ) -> "V1RootValidator":
13
16
  def _validate_field_removal(cls: Type, values: dict) -> dict:
14
17
  if field in values:
15
18
  if print_warning:
@@ -1,11 +1,14 @@
1
1
  import warnings
2
- from typing import Callable, Type, TypeVar
2
+ from typing import TYPE_CHECKING, Callable, Type, TypeVar
3
3
 
4
4
  import pydantic
5
5
 
6
6
  from datahub.configuration.common import ConfigurationWarning
7
7
  from datahub.utilities.global_warning_util import add_global_warning
8
8
 
9
+ if TYPE_CHECKING:
10
+ from pydantic.deprecated.class_validators import V1RootValidator
11
+
9
12
  _T = TypeVar("_T")
10
13
 
11
14
 
@@ -18,7 +21,7 @@ def pydantic_renamed_field(
18
21
  new_name: str,
19
22
  transform: Callable = _default_rename_transform,
20
23
  print_warning: bool = True,
21
- ) -> classmethod:
24
+ ) -> "V1RootValidator":
22
25
  def _validate_field_rename(cls: Type, values: dict) -> dict:
23
26
  if old_name in values:
24
27
  if new_name in values:
@@ -49,6 +52,4 @@ def pydantic_renamed_field(
49
52
  # validator with pre=True gets all the values that were passed in.
50
53
  # Given that a renamed field doesn't show up in the fields list, we can't use
51
54
  # the field-level validator, even with a different field name.
52
- return pydantic.root_validator(pre=True, skip_on_failure=True, allow_reuse=True)(
53
- _validate_field_rename
54
- )
55
+ return pydantic.root_validator(pre=True, allow_reuse=True)(_validate_field_rename)
@@ -1,9 +1,12 @@
1
- from typing import Optional, Type, Union
1
+ from typing import TYPE_CHECKING, Optional, Type, Union
2
2
 
3
3
  import pydantic
4
4
 
5
+ if TYPE_CHECKING:
6
+ from pydantic.deprecated.class_validators import V1Validator
5
7
 
6
- def pydantic_multiline_string(field: str) -> classmethod:
8
+
9
+ def pydantic_multiline_string(field: str) -> "V1Validator":
7
10
  """If the field is present and contains an escaped newline, replace it with a real newline.
8
11
 
9
12
  This makes the assumption that the field value is never supposed to have a
@@ -2372,6 +2372,12 @@
2372
2372
  "subtype_modifier": null,
2373
2373
  "supported": true
2374
2374
  },
2375
+ {
2376
+ "capability": "TAGS",
2377
+ "description": "Supported by default",
2378
+ "subtype_modifier": null,
2379
+ "supported": true
2380
+ },
2375
2381
  {
2376
2382
  "capability": "LINEAGE_COARSE",
2377
2383
  "description": "Supported by default",
@@ -2962,6 +2968,38 @@
2962
2968
  "platform_name": "Slack",
2963
2969
  "support_status": "TESTING"
2964
2970
  },
2971
+ "snaplogic": {
2972
+ "capabilities": [
2973
+ {
2974
+ "capability": "LINEAGE_FINE",
2975
+ "description": "Enabled by default",
2976
+ "subtype_modifier": null,
2977
+ "supported": true
2978
+ },
2979
+ {
2980
+ "capability": "DELETION_DETECTION",
2981
+ "description": "Not supported yet",
2982
+ "subtype_modifier": null,
2983
+ "supported": false
2984
+ },
2985
+ {
2986
+ "capability": "PLATFORM_INSTANCE",
2987
+ "description": "Snaplogic does not support platform instances",
2988
+ "subtype_modifier": null,
2989
+ "supported": false
2990
+ },
2991
+ {
2992
+ "capability": "LINEAGE_COARSE",
2993
+ "description": "Enabled by default",
2994
+ "subtype_modifier": null,
2995
+ "supported": true
2996
+ }
2997
+ ],
2998
+ "classname": "datahub.ingestion.source.snaplogic.snaplogic.SnaplogicSource",
2999
+ "platform_id": "snaplogic",
3000
+ "platform_name": "Snaplogic",
3001
+ "support_status": "TESTING"
3002
+ },
2965
3003
  "snowflake": {
2966
3004
  "capabilities": [
2967
3005
  {
@@ -3174,6 +3212,12 @@
3174
3212
  "subtype_modifier": null,
3175
3213
  "supported": true
3176
3214
  },
3215
+ {
3216
+ "capability": "TAGS",
3217
+ "description": "Supported by default",
3218
+ "subtype_modifier": null,
3219
+ "supported": true
3220
+ },
3177
3221
  {
3178
3222
  "capability": "LINEAGE_COARSE",
3179
3223
  "description": "Supported by default",
@@ -3605,4 +3649,4 @@
3605
3649
  "support_status": "CERTIFIED"
3606
3650
  }
3607
3651
  }
3608
- }
3652
+ }
@@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional
6
6
 
7
7
  from pydantic import Field, validator
8
8
 
9
- from datahub.configuration.common import ConfigModel, DynamicTypedConfig
9
+ from datahub.configuration.common import ConfigModel, DynamicTypedConfig, HiddenFromDocs
10
10
  from datahub.ingestion.graph.config import DatahubClientConfig
11
11
  from datahub.ingestion.sink.file import FileSinkConfig
12
12
 
@@ -85,7 +85,7 @@ class PipelineConfig(ConfigModel):
85
85
  source: SourceConfig
86
86
  sink: Optional[DynamicTypedConfig] = None
87
87
  transformers: Optional[List[DynamicTypedConfig]] = None
88
- flags: FlagsConfig = Field(default=FlagsConfig(), hidden_from_docs=True)
88
+ flags: HiddenFromDocs[FlagsConfig] = FlagsConfig()
89
89
  reporting: List[ReporterConfig] = []
90
90
  run_id: str = DEFAULT_RUN_ID
91
91
  datahub_api: Optional[DatahubClientConfig] = None
@@ -81,7 +81,7 @@ class AzureConnectionConfig(ConfigModel):
81
81
  )
82
82
  return self.sas_token if self.sas_token is not None else self.account_key
83
83
 
84
- @root_validator()
84
+ @root_validator(skip_on_failure=True)
85
85
  def _check_credential_values(cls, values: Dict) -> Dict:
86
86
  if (
87
87
  values.get("account_key")