acryl-datahub 1.1.0rc3__py3-none-any.whl → 1.1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (87) hide show
  1. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/METADATA +2532 -2530
  2. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/RECORD +87 -70
  3. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +9 -8
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/restricted_text.py +247 -0
  10. datahub/api/entities/external/unity_catalog_external_entites.py +170 -0
  11. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  12. datahub/cli/delete_cli.py +4 -4
  13. datahub/cli/ingest_cli.py +9 -1
  14. datahub/emitter/mce_builder.py +3 -1
  15. datahub/emitter/response_helper.py +86 -1
  16. datahub/emitter/rest_emitter.py +1 -1
  17. datahub/ingestion/graph/client.py +3 -3
  18. datahub/ingestion/source/apply/datahub_apply.py +4 -4
  19. datahub/ingestion/source/data_lake_common/data_lake_utils.py +22 -10
  20. datahub/ingestion/source/data_lake_common/object_store.py +644 -0
  21. datahub/ingestion/source/datahub/config.py +11 -0
  22. datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
  23. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  24. datahub/ingestion/source/dbt/dbt_common.py +30 -11
  25. datahub/ingestion/source/gcs/gcs_source.py +22 -7
  26. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  27. datahub/ingestion/source/hex/query_fetcher.py +9 -3
  28. datahub/ingestion/source/openapi.py +12 -0
  29. datahub/ingestion/source/openapi_parser.py +56 -37
  30. datahub/ingestion/source/s3/source.py +65 -6
  31. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  32. datahub/ingestion/source/snowflake/snowflake_queries.py +44 -21
  33. datahub/ingestion/source/snowflake/snowflake_query.py +0 -7
  34. datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
  35. datahub/ingestion/source/sql/athena.py +1 -0
  36. datahub/ingestion/source/sql/hive.py +2 -3
  37. datahub/ingestion/source/sql/sql_common.py +98 -34
  38. datahub/ingestion/source/sql/sql_types.py +5 -2
  39. datahub/ingestion/source/unity/config.py +5 -0
  40. datahub/ingestion/source/unity/proxy.py +117 -0
  41. datahub/ingestion/source/unity/source.py +167 -15
  42. datahub/ingestion/source/unity/tag_entities.py +295 -0
  43. datahub/metadata/_internal_schema_classes.py +667 -522
  44. datahub/metadata/_urns/urn_defs.py +1804 -1748
  45. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  46. datahub/metadata/schema.avsc +17358 -17584
  47. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  48. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  49. datahub/metadata/schemas/Applications.avsc +38 -0
  50. datahub/metadata/schemas/ChartKey.avsc +1 -0
  51. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  52. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  53. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  54. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  55. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  56. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  57. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  58. datahub/metadata/schemas/DatasetKey.avsc +1 -0
  59. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  60. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  61. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  62. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  63. datahub/metadata/schemas/MLModelGroupKey.avsc +1 -0
  64. datahub/metadata/schemas/MLModelKey.avsc +1 -0
  65. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  66. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  67. datahub/metadata/schemas/__init__.py +3 -3
  68. datahub/sdk/__init__.py +6 -0
  69. datahub/sdk/_all_entities.py +11 -0
  70. datahub/sdk/_shared.py +118 -1
  71. datahub/sdk/chart.py +315 -0
  72. datahub/sdk/container.py +7 -0
  73. datahub/sdk/dashboard.py +432 -0
  74. datahub/sdk/dataflow.py +309 -0
  75. datahub/sdk/datajob.py +342 -0
  76. datahub/sdk/dataset.py +8 -2
  77. datahub/sdk/entity_client.py +90 -2
  78. datahub/sdk/lineage_client.py +681 -82
  79. datahub/sdk/main_client.py +27 -8
  80. datahub/sdk/mlmodel.py +101 -38
  81. datahub/sdk/mlmodelgroup.py +7 -0
  82. datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
  83. datahub/testing/mce_helpers.py +421 -0
  84. datahub/testing/sdk_v2_helpers.py +18 -0
  85. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/entry_points.txt +0 -0
  86. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/licenses/LICENSE +0 -0
  87. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,239 @@
1
+ import logging
2
+ from abc import abstractmethod
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ from typing import Iterable, List, Optional, Union
6
+
7
+ import cachetools
8
+ from pydantic import BaseModel
9
+
10
+ from datahub.api.entities.platformresource.platform_resource import (
11
+ PlatformResource,
12
+ PlatformResourceKey,
13
+ )
14
+ from datahub.ingestion.graph.client import DataHubGraph
15
+ from datahub.metadata.urns import PlatformResourceUrn, Urn
16
+ from datahub.utilities.search_utils import ElasticDocumentQuery
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class PlatformResourceRepository:
22
+ def __init__(self, graph: DataHubGraph):
23
+ self.graph = graph
24
+ self.cache: cachetools.TTLCache = cachetools.TTLCache(maxsize=1000, ttl=60 * 5)
25
+
26
+ def search_by_filter(
27
+ self, filter: ElasticDocumentQuery, add_to_cache: bool = True
28
+ ) -> Iterable[PlatformResource]:
29
+ results = PlatformResource.search_by_filters(self.graph, filter)
30
+ for platform_resource in results:
31
+ if add_to_cache:
32
+ self.cache[platform_resource.id] = platform_resource
33
+ yield platform_resource
34
+
35
+ def create(self, platform_resource: PlatformResource) -> None:
36
+ platform_resource.to_datahub(self.graph)
37
+ self.cache[platform_resource.id] = platform_resource
38
+
39
+ def get(self, key: PlatformResourceKey) -> Optional[PlatformResource]:
40
+ return self.cache.get(key.id)
41
+
42
+ def delete(self, key: PlatformResourceKey) -> None:
43
+ self.graph.delete_entity(urn=PlatformResourceUrn(key.id).urn(), hard=True)
44
+ del self.cache[key.id]
45
+
46
+
47
+ class ExternalEntityId:
48
+ """
49
+ ExternalEntityId is a unique
50
+ identifier for an ExternalEntity.
51
+ """
52
+
53
+ @abstractmethod
54
+ def to_platform_resource_key(self) -> PlatformResourceKey:
55
+ """
56
+ Converts the ExternalEntityId to a PlatformResourceKey.
57
+ """
58
+ pass
59
+
60
+
61
+ class CaseSensitivity(Enum):
62
+ UPPER = "upper"
63
+ LOWER = "lower"
64
+ MIXED = "mixed"
65
+
66
+ @staticmethod
67
+ def detect_case_sensitivity(value: str) -> "CaseSensitivity":
68
+ if value.isupper():
69
+ return CaseSensitivity.UPPER
70
+ elif value.islower():
71
+ return CaseSensitivity.LOWER
72
+ return CaseSensitivity.MIXED
73
+
74
+ @staticmethod
75
+ def detect_for_many(values: List[str]) -> "CaseSensitivity":
76
+ """
77
+ Detects the case sensitivity for a list of strings.
78
+ Returns CaseSensitivity.MIXED if the case sensitivity is mixed.
79
+ """
80
+ if len(values) == 0:
81
+ return CaseSensitivity.MIXED
82
+
83
+ if all(
84
+ CaseSensitivity.detect_case_sensitivity(value) == CaseSensitivity.UPPER
85
+ for value in values
86
+ ):
87
+ return CaseSensitivity.UPPER
88
+ elif all(
89
+ CaseSensitivity.detect_case_sensitivity(value) == CaseSensitivity.LOWER
90
+ for value in values
91
+ ):
92
+ return CaseSensitivity.LOWER
93
+ return CaseSensitivity.MIXED
94
+
95
+
96
+ class LinkedResourceSet(BaseModel):
97
+ """
98
+ A LinkedResourceSet is a set of DataHub URNs that are linked to an ExternalEntity.
99
+ """
100
+
101
+ urns: List[str]
102
+
103
+ def _has_conflict(self, urn: Urn) -> bool:
104
+ """
105
+ Detects if the urn is safe to add into the set
106
+ This is used to detect conflicts between DataHub URNs that are linked to
107
+ the same ExternalEntity.
108
+ e.g. Case sensitivity of URNs
109
+ Mixing tags and terms in the same set etc.
110
+ Return True if the urn is not safe to add into the set, else False.
111
+ If the urn is already in the set, we don't need to add it again, but
112
+ that is not a conflict.
113
+ """
114
+ if urn.urn() in self.urns:
115
+ return False
116
+
117
+ # Detect the entity_type of the urns in the existing set
118
+ detected_entity_type = None
119
+ for existing_urn in self.urns:
120
+ try:
121
+ parsed_urn = Urn.from_string(existing_urn)
122
+ entity_type = parsed_urn.entity_type
123
+ if detected_entity_type is None:
124
+ detected_entity_type = entity_type
125
+ elif detected_entity_type != entity_type:
126
+ logger.warning(
127
+ f"Detected entity_type {detected_entity_type} is not equals to {entity_type}"
128
+ )
129
+ return True
130
+ except ValueError:
131
+ # Not a valid URN
132
+ logger.warning(f"Invalid URN {existing_urn} in LinkedResourceSet")
133
+ return True
134
+ try:
135
+ parsed_urn = urn
136
+ if (
137
+ detected_entity_type is not None
138
+ and parsed_urn.entity_type != detected_entity_type
139
+ ):
140
+ logger.warning(
141
+ f"Detected entity_type {detected_entity_type} is not equals to parsed_urn's entity_type: {parsed_urn.entity_type}"
142
+ )
143
+ return True
144
+ except ValueError:
145
+ # Not a valid URN
146
+ logger.warning(f"Invalid URN: {urn} in LinkedResourceSet")
147
+ return True
148
+ return False
149
+
150
+ def add(self, urn: Union[str, Urn]) -> bool:
151
+ """
152
+ Adds a URN to the set.
153
+ Returns True if the URN was added, False if it was already in the set.
154
+ Raises a ValueError if the URN is in conflict with the existing set.
155
+ """
156
+ # Deduplicate the URNs if we have somehow duplicate items from concurrent runs
157
+ self.urns = list(set(self.urns))
158
+ if isinstance(urn, str):
159
+ urn = Urn.from_string(urn)
160
+ if self._has_conflict(urn):
161
+ raise ValueError(f"Conflict detected when adding URN {urn} to the set")
162
+ if urn.urn() not in self.urns:
163
+ self.urns.append(urn.urn())
164
+ return True
165
+ return False
166
+
167
+
168
+ class ExternalEntity:
169
+ """
170
+ An ExternalEntity is a representation of an entity that external to DataHub
171
+ but could be linked to one or more DataHub entities.
172
+ """
173
+
174
+ @abstractmethod
175
+ def is_managed_by_datahub(self) -> bool:
176
+ """
177
+ Returns whether the entity is managed by DataHub.
178
+ """
179
+ pass
180
+
181
+ @abstractmethod
182
+ def datahub_linked_resources(self) -> LinkedResourceSet:
183
+ """
184
+ Returns the URNs of the DataHub entities linked to the external entity.
185
+ Empty list if no linked entities.
186
+ """
187
+ pass
188
+
189
+ @abstractmethod
190
+ def as_platform_resource(self) -> PlatformResource:
191
+ """
192
+ Converts the ExternalEntity to a PlatformResource.
193
+ """
194
+ pass
195
+
196
+ @abstractmethod
197
+ def get_id(self) -> ExternalEntityId:
198
+ """
199
+ Returns the ExternalEntityId for the ExternalEntity.
200
+ """
201
+ pass
202
+
203
+
204
+ @dataclass
205
+ class MissingExternalEntity(ExternalEntity):
206
+ id: ExternalEntityId
207
+
208
+ def is_managed_by_datahub(self) -> bool:
209
+ return False
210
+
211
+ def datahub_linked_resources(self) -> LinkedResourceSet:
212
+ return LinkedResourceSet(urns=[])
213
+
214
+ def as_platform_resource(self) -> Optional[PlatformResource]: # type: ignore[override]
215
+ return None
216
+
217
+ def get_id(self) -> ExternalEntityId:
218
+ return self.id
219
+
220
+
221
+ class ExternalSystem:
222
+ @abstractmethod
223
+ def exists(self, external_entity_id: ExternalEntityId) -> bool:
224
+ """
225
+ Returns whether the ExternalEntityId exists in the external system.
226
+ """
227
+ pass
228
+
229
+ @abstractmethod
230
+ def get(
231
+ self,
232
+ external_entity_id: ExternalEntityId,
233
+ platform_resource_repository: PlatformResourceRepository,
234
+ ) -> Optional[ExternalEntity]:
235
+ """
236
+ Returns the ExternalEntity for the ExternalEntityId.
237
+ Uses the platform resource repository to enrich the ExternalEntity with DataHub URNs.
238
+ """
239
+ pass
@@ -0,0 +1,145 @@
1
+ """
2
+ External Tags Module
3
+
4
+ This module provides tag types that integrate with external systems like DataHub and Unity Catalog.
5
+ It builds on top of RestrictedText to provide sanitized, truncated tag handling with original value preservation.
6
+
7
+ Classes:
8
+ - ExternalTag: DataHub-compatible tag with key/value parsing from URNs
9
+
10
+ Example Usage:
11
+ # DataHub Tags
12
+ tag = ExternalTag.from_urn("urn:li:tag:environment:production")
13
+ datahub_urn = tag.get_datahub_tag # Returns TagUrn object or string
14
+
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import Any, Optional, Tuple, Union
20
+
21
+ from pydantic import BaseModel
22
+
23
+ from datahub.api.entities.external.restricted_text import RestrictedText
24
+ from datahub.metadata.urns import TagUrn
25
+
26
+
27
+ class ExternalTag(BaseModel):
28
+ """A tag type that parses DataHub Tag URNs into key-value pairs with RestrictedText properties."""
29
+
30
+ key: RestrictedText
31
+ value: Optional[RestrictedText] = None
32
+
33
+ def __init__(
34
+ self,
35
+ key: Optional[Union[str, RestrictedText]] = None,
36
+ value: Optional[Union[str, RestrictedText]] = None,
37
+ **data: Any,
38
+ ) -> None:
39
+ """
40
+ Initialize ExternalTag from either a DataHub Tag URN or explicit key/value.
41
+
42
+ Args:
43
+ key: Explicit key value (optional for Pydantic initialization)
44
+ value: Explicit value (optional)
45
+ **data: Additional Pydantic data
46
+ """
47
+ if key is not None:
48
+ # Direct initialization with key/value
49
+ processed_key = (
50
+ RestrictedText(key) if not isinstance(key, RestrictedText) else key
51
+ )
52
+ processed_value = None
53
+ if value is not None:
54
+ processed_value = (
55
+ RestrictedText(value)
56
+ if not isinstance(value, RestrictedText)
57
+ else value
58
+ )
59
+
60
+ super().__init__(
61
+ key=processed_key,
62
+ value=processed_value,
63
+ **data,
64
+ )
65
+ else:
66
+ # Standard pydantic initialization
67
+ super().__init__(**data)
68
+
69
+ @staticmethod
70
+ def _parse_tag_name(tag_name: str) -> Tuple[str, Optional[str]]:
71
+ """
72
+ Parse tag name into key and optional value.
73
+
74
+ If tag_name contains ':', split on first ':' into key:value
75
+ Otherwise, use entire tag_name as key with no value.
76
+
77
+ Args:
78
+ tag_name: The tag name portion from the URN
79
+
80
+ Returns:
81
+ Tuple of (key, value) where value may be None
82
+ """
83
+ if ":" in tag_name:
84
+ parts = tag_name.split(":", 1) # Split on first ':' only
85
+ return parts[0], parts[1]
86
+ else:
87
+ return tag_name, None
88
+
89
+ def to_datahub_tag_urn(self) -> TagUrn:
90
+ """
91
+ Generate a DataHub Tag URN from the key and value.
92
+ This method creates the URN using the original (unprocessed) values.
93
+
94
+ Returns:
95
+ 'urn:li:tag:key:value' if value exists, otherwise 'urn:li:tag:key'
96
+ """
97
+ if self.value is not None:
98
+ tag_name = f"{self.key.original}:{self.value.original}"
99
+ else:
100
+ tag_name = self.key.original
101
+
102
+ return TagUrn(name=tag_name)
103
+
104
+ @classmethod
105
+ def from_urn(cls, tag_urn: Union[str, "TagUrn"]) -> "ExternalTag":
106
+ """
107
+ Create an ExternalTag from a DataHub Tag URN.
108
+
109
+ Args:
110
+ tag_urn: DataHub Tag URN string or TagUrn object
111
+
112
+ Returns:
113
+ ExternalTag instance
114
+ """
115
+ if isinstance(tag_urn, str):
116
+ tag_urn = TagUrn.from_string(tag_urn)
117
+ key, value = cls._parse_tag_name(tag_urn.name)
118
+ return cls(key=key, value=value)
119
+
120
+ @classmethod
121
+ def from_key_value(cls, key: str, value: Optional[str] = None) -> "ExternalTag":
122
+ """
123
+ Create an ExternalTag from explicit key and value.
124
+
125
+ Args:
126
+ key: Tag key
127
+ value: Optional tag value
128
+
129
+ Returns:
130
+ ExternalTag instance
131
+ """
132
+ return cls(key=key, value=value)
133
+
134
+ def __str__(self) -> str:
135
+ """String representation of the tag."""
136
+ if self.value is not None:
137
+ return f"{self.key}:{self.value}"
138
+ else:
139
+ return str(self.key)
140
+
141
+ def __repr__(self) -> str:
142
+ if self.value is not None:
143
+ return f"ExternalTag(key={self.key!r}, value={self.value!r})"
144
+ else:
145
+ return f"ExternalTag(key={self.key!r})"
@@ -0,0 +1,247 @@
1
+ """The `RestrictedText` module provides a custom Pydantic type that stores the original
2
+ value but returns a truncated and sanitized version when accessed.
3
+
4
+ Features:
5
+ - Configurable maximum length with truncation
6
+ - Character replacement (default replaces with underscore)
7
+ - Preserves original value internally
8
+ - Customizable truncation suffix
9
+ - Compatible with both Pydantic v1 and v2
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any, ClassVar, Optional, Set, Union
15
+
16
+ # Check Pydantic version and import accordingly
17
+ try:
18
+ from pydantic import VERSION
19
+
20
+ PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
21
+ except (ImportError, AttributeError):
22
+ # Fallback for older versions that don't have VERSION
23
+ PYDANTIC_V2 = False
24
+
25
+ if PYDANTIC_V2:
26
+ from pydantic import GetCoreSchemaHandler # type: ignore[attr-defined]
27
+ from pydantic_core import core_schema
28
+ else:
29
+ from pydantic.validators import str_validator
30
+
31
+
32
+ class RestrictedTextConfig:
33
+ """Configuration class for RestrictedText."""
34
+
35
+ def __init__(
36
+ self,
37
+ max_length: Optional[int] = None,
38
+ replace_chars: Optional[Set[str]] = None,
39
+ replacement_char: Optional[str] = None,
40
+ truncation_suffix: Optional[str] = None,
41
+ ):
42
+ self.max_length = max_length
43
+ self.replace_chars = replace_chars
44
+ self.replacement_char = replacement_char
45
+ self.truncation_suffix = truncation_suffix
46
+
47
+
48
+ class RestrictedText(str):
49
+ """A string type that stores the original value but returns a truncated and sanitized version.
50
+
51
+ This type allows you to:
52
+ - Set a maximum length for the displayed value
53
+ - Replace specific characters with a replacement character
54
+ - Access both the original and processed values
55
+
56
+ ```python
57
+ from pydantic import BaseModel
58
+
59
+ class TestModel(BaseModel):
60
+ # Basic usage with default settings
61
+ name: RestrictedText
62
+
63
+ # Custom max length and character replacement using Field
64
+ custom_field: RestrictedText = RestrictedText.with_config(
65
+ max_length=10,
66
+ forbidden_chars={' ', '-', '.'},
67
+ replacement_char='_'
68
+ )
69
+
70
+ # Usage example
71
+ model = TestModel(
72
+ name="This is a very long string with special characters!",
73
+ custom_field="hello-world.test"
74
+ )
75
+
76
+ print(model.name) # Truncated and sanitized version
77
+ print(model.name.original) # Original value
78
+ print(model.custom_field) # "hello_worl..."
79
+ ```
80
+ """
81
+
82
+ # Default configuration
83
+ _default_max_length: ClassVar[Optional[int]] = 50
84
+ _default_replace_chars: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
85
+ _default_replacement_char: ClassVar[str] = "_"
86
+ _default_truncation_suffix: ClassVar[str] = "..."
87
+
88
+ def __new__(cls, value: str = "") -> "RestrictedText":
89
+ """Create a new string instance."""
90
+ instance = str.__new__(cls, "") # We'll set the display value later
91
+ return instance
92
+
93
+ def __init__(self, value: str = ""):
94
+ """Initialize the RestrictedText with a value."""
95
+ self.original: str = value
96
+ self.max_length = self._default_max_length
97
+ self.replace_chars = self._default_replace_chars
98
+ self.replacement_char = self._default_replacement_char
99
+ self.truncation_suffix = self._default_truncation_suffix
100
+
101
+ # Process the value
102
+ self._processed_value = self._process_value(value)
103
+
104
+ def _configure(
105
+ self,
106
+ max_length: Optional[int] = None,
107
+ replace_chars: Optional[Set[str]] = None,
108
+ replacement_char: Optional[str] = None,
109
+ truncation_suffix: Optional[str] = None,
110
+ ) -> "RestrictedText":
111
+ """Configure this instance with custom settings."""
112
+ if max_length is not None:
113
+ self.max_length = max_length
114
+ if replace_chars is not None:
115
+ self.replace_chars = replace_chars
116
+ if replacement_char is not None:
117
+ self.replacement_char = replacement_char
118
+ if truncation_suffix is not None:
119
+ self.truncation_suffix = truncation_suffix
120
+
121
+ # Reprocess the value with new configuration
122
+ self._processed_value = self._process_value(self.original)
123
+ return self
124
+
125
+ def _process_value(self, value: str) -> str:
126
+ """Process the value by replacing characters and truncating."""
127
+ # Replace specified characters
128
+ processed = value
129
+ for char in self.replace_chars:
130
+ processed = processed.replace(char, self.replacement_char)
131
+
132
+ # Truncate if necessary
133
+ if self.max_length is not None and len(processed) > self.max_length:
134
+ if len(self.truncation_suffix) >= self.max_length:
135
+ # If suffix is too long, just truncate without suffix
136
+ processed = processed[: self.max_length]
137
+ else:
138
+ # Truncate and add suffix
139
+ truncate_length = self.max_length - len(self.truncation_suffix)
140
+ processed = processed[:truncate_length] + self.truncation_suffix
141
+
142
+ return processed
143
+
144
+ def __str__(self) -> str:
145
+ """Return the processed (truncated and sanitized) value."""
146
+ return self._processed_value
147
+
148
+ def __repr__(self) -> str:
149
+ return f"{self.__class__.__name__}({self._processed_value!r})"
150
+
151
+ @property
152
+ def processed(self) -> str:
153
+ """Get the processed (truncated and sanitized) value."""
154
+ return self._processed_value
155
+
156
+ @classmethod
157
+ def with_config(
158
+ cls,
159
+ max_length: Optional[int] = None,
160
+ forbidden_chars: Optional[Set[str]] = None,
161
+ replacement_char: Optional[str] = None,
162
+ truncation_suffix: Optional[str] = None,
163
+ ) -> RestrictedTextConfig:
164
+ """Create a configuration object for use as field default.
165
+
166
+ Args:
167
+ max_length: Maximum length of the processed string
168
+ forbidden_chars: Set of characters to replace
169
+ replacement_char: Character to use as replacement
170
+ truncation_suffix: Suffix to add when truncating
171
+
172
+ Returns:
173
+ A configuration object that can be used as field default
174
+ """
175
+ return RestrictedTextConfig(
176
+ max_length=max_length,
177
+ replace_chars=forbidden_chars,
178
+ replacement_char=replacement_char,
179
+ truncation_suffix=truncation_suffix,
180
+ )
181
+
182
+ # Pydantic v2 methods
183
+ if PYDANTIC_V2:
184
+
185
+ @classmethod
186
+ def _validate(
187
+ cls,
188
+ __input_value: Union[str, "RestrictedText"],
189
+ _: core_schema.ValidationInfo,
190
+ ) -> "RestrictedText":
191
+ """Validate and create a RestrictedText instance."""
192
+ if isinstance(__input_value, RestrictedText):
193
+ return __input_value
194
+ return cls(__input_value)
195
+
196
+ @classmethod
197
+ def __get_pydantic_core_schema__(
198
+ cls, source: type[Any], handler: GetCoreSchemaHandler
199
+ ) -> core_schema.CoreSchema:
200
+ """Get the Pydantic core schema for this type."""
201
+ return core_schema.with_info_after_validator_function(
202
+ cls._validate,
203
+ core_schema.str_schema(),
204
+ field_name=cls.__name__,
205
+ )
206
+
207
+ # Pydantic v1 methods
208
+ else:
209
+
210
+ @classmethod
211
+ def __get_validators__(cls):
212
+ """Pydantic v1 validator method."""
213
+ yield cls.validate
214
+
215
+ @classmethod
216
+ def validate(cls, v, field=None):
217
+ """Validate and create a RestrictedText instance for Pydantic v1."""
218
+ if isinstance(v, RestrictedText):
219
+ return v
220
+
221
+ if not isinstance(v, str):
222
+ # Let pydantic handle the string validation
223
+ v = str_validator(v)
224
+
225
+ # Create instance
226
+ instance = cls(v)
227
+
228
+ # Check if there's a field default that contains configuration
229
+ if (
230
+ field
231
+ and hasattr(field, "default")
232
+ and isinstance(field.default, RestrictedTextConfig)
233
+ ):
234
+ config = field.default
235
+ instance._configure(
236
+ max_length=config.max_length,
237
+ replace_chars=config.replace_chars,
238
+ replacement_char=config.replacement_char,
239
+ truncation_suffix=config.truncation_suffix,
240
+ )
241
+
242
+ return instance
243
+
244
+ @classmethod
245
+ def __modify_schema__(cls, field_schema):
246
+ """Modify the JSON schema for Pydantic v1."""
247
+ field_schema.update(type="string", examples=["example string"])