acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (221) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2558 -2531
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +221 -187
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +1 -1
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  10. datahub/api/entities/external/restricted_text.py +247 -0
  11. datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
  12. datahub/cli/check_cli.py +88 -7
  13. datahub/cli/cli_utils.py +63 -0
  14. datahub/cli/container_cli.py +5 -0
  15. datahub/cli/delete_cli.py +124 -27
  16. datahub/cli/docker_check.py +107 -12
  17. datahub/cli/docker_cli.py +149 -227
  18. datahub/cli/exists_cli.py +0 -2
  19. datahub/cli/get_cli.py +0 -2
  20. datahub/cli/iceberg_cli.py +5 -0
  21. datahub/cli/ingest_cli.py +3 -15
  22. datahub/cli/migrate.py +2 -0
  23. datahub/cli/put_cli.py +1 -4
  24. datahub/cli/quickstart_versioning.py +50 -7
  25. datahub/cli/specific/assertions_cli.py +0 -4
  26. datahub/cli/specific/datacontract_cli.py +0 -3
  27. datahub/cli/specific/dataproduct_cli.py +0 -11
  28. datahub/cli/specific/dataset_cli.py +1 -8
  29. datahub/cli/specific/forms_cli.py +0 -4
  30. datahub/cli/specific/group_cli.py +0 -2
  31. datahub/cli/specific/structuredproperties_cli.py +1 -4
  32. datahub/cli/specific/user_cli.py +0 -2
  33. datahub/cli/state_cli.py +0 -2
  34. datahub/cli/timeline_cli.py +0 -2
  35. datahub/emitter/rest_emitter.py +70 -12
  36. datahub/entrypoints.py +4 -3
  37. datahub/ingestion/api/decorators.py +15 -3
  38. datahub/ingestion/api/report.py +332 -3
  39. datahub/ingestion/api/sink.py +3 -0
  40. datahub/ingestion/api/source.py +48 -44
  41. datahub/ingestion/autogenerated/__init__.py +0 -0
  42. datahub/ingestion/autogenerated/capability_summary.json +3449 -0
  43. datahub/ingestion/autogenerated/lineage.json +401 -0
  44. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  45. datahub/ingestion/extractor/schema_util.py +13 -4
  46. datahub/ingestion/glossary/classification_mixin.py +5 -0
  47. datahub/ingestion/graph/client.py +100 -15
  48. datahub/ingestion/graph/config.py +1 -0
  49. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  50. datahub/ingestion/run/pipeline.py +54 -2
  51. datahub/ingestion/sink/datahub_rest.py +13 -0
  52. datahub/ingestion/source/abs/source.py +1 -1
  53. datahub/ingestion/source/aws/aws_common.py +4 -0
  54. datahub/ingestion/source/aws/glue.py +489 -244
  55. datahub/ingestion/source/aws/tag_entities.py +292 -0
  56. datahub/ingestion/source/azure/azure_common.py +2 -2
  57. datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
  58. datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
  59. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
  60. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
  61. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  62. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  63. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  64. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  65. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  66. datahub/ingestion/source/common/subtypes.py +45 -0
  67. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  68. datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
  69. datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
  70. datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
  71. datahub/ingestion/source/dbt/dbt_common.py +6 -2
  72. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  73. datahub/ingestion/source/debug/__init__.py +0 -0
  74. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  75. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  76. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  77. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  78. datahub/ingestion/source/dremio/dremio_source.py +94 -81
  79. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  80. datahub/ingestion/source/file.py +3 -0
  81. datahub/ingestion/source/fivetran/fivetran.py +34 -26
  82. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  83. datahub/ingestion/source/ge_data_profiler.py +76 -28
  84. datahub/ingestion/source/ge_profiling_config.py +11 -0
  85. datahub/ingestion/source/hex/api.py +26 -1
  86. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  87. datahub/ingestion/source/identity/azure_ad.py +1 -1
  88. datahub/ingestion/source/identity/okta.py +1 -14
  89. datahub/ingestion/source/kafka/kafka.py +16 -0
  90. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  91. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  92. datahub/ingestion/source/looker/looker_source.py +1 -0
  93. datahub/ingestion/source/mlflow.py +11 -1
  94. datahub/ingestion/source/mock_data/__init__.py +0 -0
  95. datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
  96. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  97. datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
  98. datahub/ingestion/source/nifi.py +1 -1
  99. datahub/ingestion/source/powerbi/powerbi.py +1 -5
  100. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  101. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  102. datahub/ingestion/source/preset.py +2 -2
  103. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  104. datahub/ingestion/source/redshift/redshift.py +21 -1
  105. datahub/ingestion/source/redshift/usage.py +4 -3
  106. datahub/ingestion/source/s3/report.py +4 -2
  107. datahub/ingestion/source/s3/source.py +367 -115
  108. datahub/ingestion/source/sac/sac.py +3 -1
  109. datahub/ingestion/source/salesforce.py +6 -3
  110. datahub/ingestion/source/sigma/sigma.py +7 -1
  111. datahub/ingestion/source/slack/slack.py +2 -1
  112. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  113. datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
  114. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  115. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  116. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  117. datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
  118. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  119. datahub/ingestion/source/sql/athena.py +119 -11
  120. datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
  121. datahub/ingestion/source/sql/clickhouse.py +3 -1
  122. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  123. datahub/ingestion/source/sql/hana.py +3 -1
  124. datahub/ingestion/source/sql/hive_metastore.py +3 -11
  125. datahub/ingestion/source/sql/mariadb.py +0 -1
  126. datahub/ingestion/source/sql/mssql/source.py +239 -34
  127. datahub/ingestion/source/sql/mysql.py +0 -1
  128. datahub/ingestion/source/sql/oracle.py +1 -1
  129. datahub/ingestion/source/sql/postgres.py +0 -1
  130. datahub/ingestion/source/sql/sql_common.py +121 -34
  131. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  132. datahub/ingestion/source/sql/teradata.py +997 -235
  133. datahub/ingestion/source/sql/vertica.py +10 -6
  134. datahub/ingestion/source/sql_queries.py +2 -2
  135. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  136. datahub/ingestion/source/superset.py +58 -3
  137. datahub/ingestion/source/tableau/tableau.py +58 -37
  138. datahub/ingestion/source/tableau/tableau_common.py +4 -2
  139. datahub/ingestion/source/tableau/tableau_constant.py +0 -4
  140. datahub/ingestion/source/unity/config.py +5 -0
  141. datahub/ingestion/source/unity/proxy.py +118 -0
  142. datahub/ingestion/source/unity/source.py +195 -17
  143. datahub/ingestion/source/unity/tag_entities.py +295 -0
  144. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  145. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  146. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  147. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  148. datahub/metadata/_internal_schema_classes.py +1433 -546
  149. datahub/metadata/_urns/urn_defs.py +1826 -1658
  150. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  151. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  152. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  153. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  154. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
  155. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  156. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  157. datahub/metadata/schema.avsc +17736 -17112
  158. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  159. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  160. datahub/metadata/schemas/Applications.avsc +38 -0
  161. datahub/metadata/schemas/ChartKey.avsc +1 -0
  162. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  163. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  164. datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
  165. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  166. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  167. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  168. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  169. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
  170. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  171. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  172. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  173. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  174. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  175. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  176. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  177. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  178. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  179. datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  180. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  181. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  182. datahub/metadata/schemas/LogicalParent.avsc +140 -0
  183. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  184. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  185. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  186. datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
  187. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  188. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  189. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
  190. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  191. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  192. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  193. datahub/metadata/schemas/__init__.py +3 -3
  194. datahub/sdk/__init__.py +2 -0
  195. datahub/sdk/_all_entities.py +7 -0
  196. datahub/sdk/_shared.py +116 -0
  197. datahub/sdk/chart.py +315 -0
  198. datahub/sdk/container.py +7 -0
  199. datahub/sdk/dashboard.py +432 -0
  200. datahub/sdk/dataflow.py +7 -0
  201. datahub/sdk/datajob.py +45 -13
  202. datahub/sdk/dataset.py +8 -2
  203. datahub/sdk/entity_client.py +82 -2
  204. datahub/sdk/lineage_client.py +683 -82
  205. datahub/sdk/main_client.py +46 -16
  206. datahub/sdk/mlmodel.py +101 -38
  207. datahub/sdk/mlmodelgroup.py +7 -0
  208. datahub/sdk/search_client.py +4 -3
  209. datahub/specific/chart.py +1 -1
  210. datahub/specific/dataproduct.py +4 -0
  211. datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
  212. datahub/sql_parsing/sqlglot_lineage.py +62 -13
  213. datahub/telemetry/telemetry.py +17 -11
  214. datahub/testing/sdk_v2_helpers.py +7 -1
  215. datahub/upgrade/upgrade.py +46 -13
  216. datahub/utilities/server_config_util.py +8 -0
  217. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  218. datahub/utilities/stats_collections.py +4 -0
  219. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +0 -0
  220. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
  221. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,247 @@
1
+ """The `RestrictedText` module provides a custom Pydantic type that stores the original
2
+ value but returns a truncated and sanitized version when accessed.
3
+
4
+ Features:
5
+ - Configurable maximum length with truncation
6
+ - Character replacement (default replaces with underscore)
7
+ - Preserves original value internally
8
+ - Customizable truncation suffix
9
+ - Compatible with both Pydantic v1 and v2
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Any, ClassVar, Optional, Set, Union
15
+
16
+ # Check Pydantic version and import accordingly
17
+ try:
18
+ from pydantic import VERSION
19
+
20
+ PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
21
+ except (ImportError, AttributeError):
22
+ # Fallback for older versions that don't have VERSION
23
+ PYDANTIC_V2 = False
24
+
25
+ if PYDANTIC_V2:
26
+ from pydantic import GetCoreSchemaHandler # type: ignore[attr-defined]
27
+ from pydantic_core import core_schema
28
+ else:
29
+ from pydantic.validators import str_validator
30
+
31
+
32
+ class RestrictedTextConfig:
33
+ """Configuration class for RestrictedText."""
34
+
35
+ def __init__(
36
+ self,
37
+ max_length: Optional[int] = None,
38
+ forbidden_chars: Optional[Set[str]] = None,
39
+ replacement_char: Optional[str] = None,
40
+ truncation_suffix: Optional[str] = None,
41
+ ):
42
+ self.max_length = max_length
43
+ self.forbidden_chars = forbidden_chars
44
+ self.replacement_char = replacement_char
45
+ self.truncation_suffix = truncation_suffix
46
+
47
+
48
+ class RestrictedText(str):
49
+ """A string type that stores the original value but returns a truncated and sanitized version.
50
+
51
+ This type allows you to:
52
+ - Set a maximum length for the displayed value
53
+ - Replace specific characters with a replacement character
54
+ - Access both the original and processed values
55
+
56
+ ```python
57
+ from pydantic import BaseModel
58
+
59
+ class TestModel(BaseModel):
60
+ # Basic usage with default settings
61
+ name: RestrictedText
62
+
63
+ # Custom max length and character replacement using Field
64
+ custom_field: RestrictedText = RestrictedText.with_config(
65
+ max_length=10,
66
+ forbidden_chars={' ', '-', '.'},
67
+ replacement_char='_'
68
+ )
69
+
70
+ # Usage example
71
+ model = TestModel(
72
+ name="This is a very long string with special characters!",
73
+ custom_field="hello-world.test"
74
+ )
75
+
76
+ print(model.name) # Truncated and sanitized version
77
+ print(model.name.original) # Original value
78
+ print(model.custom_field) # "hello_worl..."
79
+ ```
80
+ """
81
+
82
+ # Default configuration
83
+ _default_max_length: ClassVar[Optional[int]] = 50
84
+ _default_forbidden_chars: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
85
+ _default_replacement_char: ClassVar[str] = "_"
86
+ _default_truncation_suffix: ClassVar[str] = "..."
87
+
88
+ def __new__(cls, value: str = "") -> "RestrictedText":
89
+ """Create a new string instance."""
90
+ instance = str.__new__(cls, "") # We'll set the display value later
91
+ return instance
92
+
93
+ def __init__(self, value: str = ""):
94
+ """Initialize the RestrictedText with a value."""
95
+ self.original: str = value
96
+ self.max_length = self._default_max_length
97
+ self.forbidden_chars = self._default_forbidden_chars
98
+ self.replacement_char = self._default_replacement_char
99
+ self.truncation_suffix = self._default_truncation_suffix
100
+
101
+ # Process the value
102
+ self._processed_value = self._process_value(value)
103
+
104
+ def _configure(
105
+ self,
106
+ max_length: Optional[int] = None,
107
+ forbidden_chars: Optional[Set[str]] = None,
108
+ replacement_char: Optional[str] = None,
109
+ truncation_suffix: Optional[str] = None,
110
+ ) -> "RestrictedText":
111
+ """Configure this instance with custom settings."""
112
+ if max_length is not None:
113
+ self.max_length = max_length
114
+ if forbidden_chars is not None:
115
+ self.forbidden_chars = forbidden_chars
116
+ if replacement_char is not None:
117
+ self.replacement_char = replacement_char
118
+ if truncation_suffix is not None:
119
+ self.truncation_suffix = truncation_suffix
120
+
121
+ # Reprocess the value with new configuration
122
+ self._processed_value = self._process_value(self.original)
123
+ return self
124
+
125
+ def _process_value(self, value: str) -> str:
126
+ """Process the value by replacing characters and truncating."""
127
+ # Replace specified characters
128
+ processed = value
129
+ for char in self.forbidden_chars:
130
+ processed = processed.replace(char, self.replacement_char)
131
+
132
+ # Truncate if necessary
133
+ if self.max_length is not None and len(processed) > self.max_length:
134
+ if len(self.truncation_suffix) >= self.max_length:
135
+ # If suffix is too long, just truncate without suffix
136
+ processed = processed[: self.max_length]
137
+ else:
138
+ # Truncate and add suffix
139
+ truncate_length = self.max_length - len(self.truncation_suffix)
140
+ processed = processed[:truncate_length] + self.truncation_suffix
141
+
142
+ return processed
143
+
144
+ def __str__(self) -> str:
145
+ """Return the processed (truncated and sanitized) value."""
146
+ return self._processed_value
147
+
148
+ def __repr__(self) -> str:
149
+ return f"{self.__class__.__name__}({self._processed_value!r})"
150
+
151
+ @property
152
+ def processed(self) -> str:
153
+ """Get the processed (truncated and sanitized) value."""
154
+ return self._processed_value
155
+
156
+ @classmethod
157
+ def with_config(
158
+ cls,
159
+ max_length: Optional[int] = None,
160
+ forbidden_chars: Optional[Set[str]] = None,
161
+ replacement_char: Optional[str] = None,
162
+ truncation_suffix: Optional[str] = None,
163
+ ) -> RestrictedTextConfig:
164
+ """Create a configuration object for use as field default.
165
+
166
+ Args:
167
+ max_length: Maximum length of the processed string
168
+ forbidden_chars: Set of characters to replace
169
+ replacement_char: Character to use as replacement
170
+ truncation_suffix: Suffix to add when truncating
171
+
172
+ Returns:
173
+ A configuration object that can be used as field default
174
+ """
175
+ return RestrictedTextConfig(
176
+ max_length=max_length,
177
+ forbidden_chars=forbidden_chars,
178
+ replacement_char=replacement_char,
179
+ truncation_suffix=truncation_suffix,
180
+ )
181
+
182
+ # Pydantic v2 methods
183
+ if PYDANTIC_V2:
184
+
185
+ @classmethod
186
+ def _validate(
187
+ cls,
188
+ __input_value: Union[str, "RestrictedText"],
189
+ _: core_schema.ValidationInfo,
190
+ ) -> "RestrictedText":
191
+ """Validate and create a RestrictedText instance."""
192
+ if isinstance(__input_value, RestrictedText):
193
+ return __input_value
194
+ return cls(__input_value)
195
+
196
+ @classmethod
197
+ def __get_pydantic_core_schema__(
198
+ cls, source: type[Any], handler: GetCoreSchemaHandler
199
+ ) -> core_schema.CoreSchema:
200
+ """Get the Pydantic core schema for this type."""
201
+ return core_schema.with_info_after_validator_function(
202
+ cls._validate,
203
+ core_schema.str_schema(),
204
+ field_name=cls.__name__,
205
+ )
206
+
207
+ # Pydantic v1 methods
208
+ else:
209
+
210
+ @classmethod
211
+ def __get_validators__(cls):
212
+ """Pydantic v1 validator method."""
213
+ yield cls.validate
214
+
215
+ @classmethod
216
+ def validate(cls, v, field=None):
217
+ """Validate and create a RestrictedText instance for Pydantic v1."""
218
+ if isinstance(v, RestrictedText):
219
+ return v
220
+
221
+ if not isinstance(v, str):
222
+ # Let pydantic handle the string validation
223
+ v = str_validator(v)
224
+
225
+ # Create instance
226
+ instance = cls(v)
227
+
228
+ # Check if there's a field default that contains configuration
229
+ if (
230
+ field
231
+ and hasattr(field, "default")
232
+ and isinstance(field.default, RestrictedTextConfig)
233
+ ):
234
+ config = field.default
235
+ instance._configure(
236
+ max_length=config.max_length,
237
+ forbidden_chars=config.forbidden_chars,
238
+ replacement_char=config.replacement_char,
239
+ truncation_suffix=config.truncation_suffix,
240
+ )
241
+
242
+ return instance
243
+
244
+ @classmethod
245
+ def __modify_schema__(cls, field_schema):
246
+ """Modify the JSON schema for Pydantic v1."""
247
+ field_schema.update(type="string", examples=["example string"])
@@ -0,0 +1,173 @@
1
+ # Import RestrictedText from your existing module
2
+ # Uncomment and adjust the import path as needed:
3
+ # from your_restricted_text_module import RestrictedText
4
+ # The following is a list of tag constraints:
5
+ # You can assign a maximum of 50 tags to a single securable object.
6
+ # The maximum length of a tag key is 255 characters.
7
+ # The maximum length of a tag value is 1000 characters.
8
+ # The following characters are not allowed in tag keys:
9
+ # . , - = / :
10
+ # Tag search using the workspace search UI is supported only for tables, views, and table columns.
11
+ # Tag search requires exact term matching.
12
+ # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
13
+ from typing import Any, Dict, Optional, Set, Union
14
+
15
+ from typing_extensions import ClassVar
16
+
17
+ from datahub.api.entities.external.external_tag import ExternalTag
18
+ from datahub.api.entities.external.restricted_text import RestrictedText
19
+
20
+
21
+ class UnityCatalogTagKeyText(RestrictedText):
22
+ """RestrictedText configured for Unity Catalog tag keys."""
23
+
24
+ _default_max_length: ClassVar[int] = 255
25
+ # Unity Catalog tag keys: alphanumeric, hyphens, underscores, periods only
26
+ _default_forbidden_chars: ClassVar[Set[str]] = {
27
+ "\t",
28
+ "\n",
29
+ "\r",
30
+ ".",
31
+ ",",
32
+ "-",
33
+ "=",
34
+ "/",
35
+ ":",
36
+ }
37
+ _default_replacement_char: ClassVar[str] = "_"
38
+ _default_truncation_suffix: ClassVar[str] = "" # No suffix for clean identifiers
39
+
40
+
41
+ class UnityCatalogTagValueText(RestrictedText):
42
+ """RestrictedText configured for Unity Catalog tag values."""
43
+
44
+ _default_max_length: ClassVar[int] = 1000
45
+ # Unity Catalog tag values are more permissive but still have some restrictions
46
+ _default_forbidden_chars: ClassVar[Set[str]] = {"\t", "\n", "\r"}
47
+ _default_replacement_char: ClassVar[str] = " "
48
+ _default_truncation_suffix: ClassVar[str] = "..."
49
+
50
+
51
+ class UnityCatalogTag(ExternalTag):
52
+ """
53
+ A tag type specifically designed for Unity Catalog tag restrictions.
54
+
55
+ Unity Catalog Tag Restrictions:
56
+ - Key: Max 127 characters, alphanumeric + hyphens, underscores, periods only
57
+ - Value: Max 256 characters, more permissive but no control characters
58
+ """
59
+
60
+ key: UnityCatalogTagKeyText
61
+ value: Optional[UnityCatalogTagValueText] = None
62
+
63
+ def __init__(
64
+ self,
65
+ key: Optional[Union[str, UnityCatalogTagKeyText]] = None,
66
+ value: Optional[Union[str, UnityCatalogTagValueText]] = None,
67
+ **data: Any,
68
+ ) -> None:
69
+ """
70
+ Initialize UnityCatalogTag from either a DataHub Tag URN or explicit key/value.
71
+
72
+ Args:
73
+ key: Explicit key value (optional for Pydantic initialization)
74
+ value: Explicit value (optional)
75
+ **data: Additional Pydantic data
76
+ """
77
+ if key is not None:
78
+ # Direct initialization with key/value
79
+ processed_key = (
80
+ UnityCatalogTagKeyText(key)
81
+ if not isinstance(key, UnityCatalogTagKeyText)
82
+ else key
83
+ )
84
+ processed_value = None
85
+ if value is not None:
86
+ processed_value = (
87
+ UnityCatalogTagValueText(value)
88
+ if not isinstance(value, UnityCatalogTagValueText)
89
+ else value
90
+ )
91
+ # If value is an empty string, set it to None to not generater empty value in DataHub tag which results in key: tags
92
+ if not str(value):
93
+ processed_value = None
94
+
95
+ super().__init__(
96
+ key=processed_key,
97
+ value=processed_value,
98
+ **data,
99
+ )
100
+ else:
101
+ # Standard pydantic initialization
102
+ super().__init__(**data)
103
+
104
+ def __eq__(self, other: object) -> bool:
105
+ """Check equality based on key and value."""
106
+ if not isinstance(other, UnityCatalogTag):
107
+ return False
108
+ return str(self.key) == str(other.key) and (
109
+ str(self.value) if self.value else None
110
+ ) == (str(other.value) if other.value else None)
111
+
112
+ def __hash__(self) -> int:
113
+ """Make UnityCatalogTag hashable based on key and value."""
114
+ return hash((str(self.key), str(self.value) if self.value else None))
115
+
116
+ @classmethod
117
+ def from_dict(cls, tag_dict: Dict[str, Any]) -> "UnityCatalogTag":
118
+ """
119
+ Create a UnityCatalogTag from a dictionary with 'key' and optional 'value'.
120
+
121
+ Args:
122
+ tag_dict: Dictionary with 'key' and optional 'value' keys
123
+
124
+ Returns:
125
+ UnityCatalogTag instance
126
+ """
127
+ return cls(key=tag_dict["key"], value=tag_dict.get("value"))
128
+
129
+ @classmethod
130
+ def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
131
+ """
132
+ Create a UnityCatalogTag from explicit key and value.
133
+
134
+ Overrides the parent method to return the correct type.
135
+
136
+ Args:
137
+ key: Tag key
138
+ value: Optional tag value
139
+
140
+ Returns:
141
+ UnityCatalogTag instance
142
+ """
143
+ return cls(key=key, value=value)
144
+
145
+ def to_dict(self) -> Dict[str, str]:
146
+ """
147
+ Convert to dictionary format suitable for Unity Catalog API.
148
+
149
+ Returns:
150
+ Dictionary with 'key' and optionally 'value'
151
+ """
152
+ result: Dict[str, str] = {"key": self.key.original}
153
+ if self.value is not None:
154
+ result["value"] = self.value.original
155
+ return result
156
+
157
+ def to_display_dict(self) -> Dict[str, str]:
158
+ """
159
+ Convert to dictionary format showing processed values.
160
+
161
+ Returns:
162
+ Dictionary with processed 'key' and optional 'value'
163
+ """
164
+ result: Dict[str, str] = {"key": str(self.key)}
165
+ if self.value is not None:
166
+ result["value"] = str(self.value)
167
+ return result
168
+
169
+ def __repr__(self) -> str:
170
+ if self.value:
171
+ return f"UnityCatalogTag(key={self.key!r}, value={self.value!r})"
172
+ else:
173
+ return f"UnityCatalogTag(key={self.key!r})"
datahub/cli/check_cli.py CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime
9
9
  from typing import Any, Dict, List, Optional, Union
10
10
 
11
11
  import click
12
+ from tabulate import tabulate
12
13
 
13
14
  from datahub._version import __package_name__
14
15
  from datahub.cli.json_file import check_mce_file
@@ -21,7 +22,7 @@ from datahub.ingestion.run.pipeline import Pipeline
21
22
  from datahub.ingestion.sink.sink_registry import sink_registry
22
23
  from datahub.ingestion.source.source_registry import source_registry
23
24
  from datahub.ingestion.transformer.transform_registry import transform_registry
24
- from datahub.telemetry import telemetry
25
+ from datahub.upgrade import upgrade
25
26
  from datahub.utilities.file_backed_collections import (
26
27
  ConnectionWrapper,
27
28
  FileBackedDict,
@@ -47,7 +48,6 @@ def check() -> None:
47
48
  @click.option(
48
49
  "--unpack-mces", default=False, is_flag=True, help="Converts MCEs into MCPs"
49
50
  )
50
- @telemetry.with_telemetry()
51
51
  def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
52
52
  """Check the schema of a metadata (MCE or MCP) JSON file."""
53
53
 
@@ -105,7 +105,6 @@ def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
105
105
  default=(),
106
106
  help="[Advanced] Paths in the deepdiff object to ignore",
107
107
  )
108
- @telemetry.with_telemetry()
109
108
  def metadata_diff(
110
109
  actual_file: str, expected_file: str, verbose: bool, ignore_path: List[str]
111
110
  ) -> None:
@@ -142,7 +141,6 @@ def metadata_diff(
142
141
  type=str,
143
142
  default=None,
144
143
  )
145
- @telemetry.with_telemetry()
146
144
  def plugins(source: Optional[str], verbose: bool) -> None:
147
145
  """List the enabled ingestion plugins."""
148
146
 
@@ -234,7 +232,7 @@ def sql_format(sql: str, platform: str) -> None:
234
232
  default=True,
235
233
  help="Run in offline mode and disable schema-aware parsing.",
236
234
  )
237
- @telemetry.with_telemetry()
235
+ @upgrade.check_upgrade
238
236
  def sql_lineage(
239
237
  sql: Optional[str],
240
238
  sql_file: Optional[str],
@@ -297,7 +295,6 @@ def sql_lineage(
297
295
  type=str,
298
296
  help="the input to validate",
299
297
  )
300
- @telemetry.with_telemetry()
301
298
  def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
302
299
  """Test input string against AllowDeny pattern in a DataHub recipe.
303
300
 
@@ -346,7 +343,6 @@ def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
346
343
  type=str,
347
344
  help="The input to validate",
348
345
  )
349
- @telemetry.with_telemetry()
350
346
  def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
351
347
  """Test input path string against PathSpec patterns in a DataHub recipe.
352
348
 
@@ -471,6 +467,7 @@ WHERE
471
467
 
472
468
 
473
469
  @check.command()
470
+ @upgrade.check_upgrade
474
471
  def server_config() -> None:
475
472
  """Print the server config."""
476
473
  graph = get_default_graph(ClientMode.CLI)
@@ -478,3 +475,87 @@ def server_config() -> None:
478
475
  server_config = graph.get_server_config()
479
476
 
480
477
  click.echo(pprint.pformat(server_config))
478
+
479
+
480
+ @check.command()
481
+ @click.option(
482
+ "--urn", required=False, help="The urn or urn pattern (supports % for wildcard)"
483
+ )
484
+ @click.option("--aspect", default=None, help="Filter to a specific aspect name.")
485
+ @click.option(
486
+ "--start", type=int, default=None, help="Row number of sql store to restore from."
487
+ )
488
+ @click.option("--batch-size", type=int, default=None, help="How many rows to restore.")
489
+ @click.option(
490
+ "--file",
491
+ required=False,
492
+ type=click.Path(exists=True, dir_okay=True, readable=True),
493
+ help="File absolute path containing URNs (one per line) to restore indices",
494
+ )
495
+ @upgrade.check_upgrade
496
+ def restore_indices(
497
+ urn: Optional[str],
498
+ aspect: Optional[str],
499
+ start: Optional[int],
500
+ batch_size: Optional[int],
501
+ file: Optional[str],
502
+ ) -> None:
503
+ """Resync metadata changes into the search and graph indices."""
504
+ if urn is None and file is None:
505
+ raise click.UsageError("Either --urn or --file must be provided")
506
+ graph = get_default_graph(ClientMode.CLI)
507
+
508
+ graph.restore_indices(
509
+ urn_pattern=urn,
510
+ aspect=aspect,
511
+ start=start,
512
+ batch_size=batch_size,
513
+ file=file,
514
+ )
515
+
516
+
517
+ @check.command()
518
+ @upgrade.check_upgrade
519
+ def get_kafka_consumer_offsets() -> None:
520
+ """Get Kafka consumer offsets from the DataHub API."""
521
+ graph = get_default_graph(ClientMode.CLI)
522
+ result = graph.get_kafka_consumer_offsets()
523
+
524
+ table_data = []
525
+ headers = [
526
+ "Topic",
527
+ "Consumer Group",
528
+ "Schema",
529
+ "Partition",
530
+ "Offset",
531
+ "Lag",
532
+ "Avg Lag",
533
+ "Max Lag",
534
+ "Total Lag",
535
+ ]
536
+
537
+ for topic, consumers in result.items():
538
+ for consumer_group, schemas in consumers.items():
539
+ for schema, data in schemas.items():
540
+ metrics = data.get("metrics", {})
541
+ partitions = data.get("partitions", {})
542
+
543
+ for partition, partition_data in partitions.items():
544
+ table_data.append(
545
+ [
546
+ topic,
547
+ consumer_group,
548
+ schema,
549
+ partition,
550
+ partition_data.get("offset", "N/A"),
551
+ partition_data.get("lag", "N/A"),
552
+ metrics.get("avgLag", "N/A"),
553
+ metrics.get("maxLag", "N/A"),
554
+ metrics.get("totalLag", "N/A"),
555
+ ]
556
+ )
557
+
558
+ if table_data:
559
+ click.echo(tabulate(table_data, headers=headers, tablefmt="grid"))
560
+ else:
561
+ click.echo("No Kafka consumer offset data found.")
datahub/cli/cli_utils.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
  import time
4
4
  import typing
5
5
  from datetime import datetime
6
+ from functools import wraps
6
7
  from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
8
 
8
9
  import click
@@ -424,3 +425,65 @@ def ensure_has_system_metadata(
424
425
  props = metadata.properties
425
426
  props["clientId"] = datahub_version.__package_name__
426
427
  props["clientVersion"] = datahub_version.__version__
428
+
429
+
430
+ def enable_auto_decorators(main_group: click.Group) -> None:
431
+ """
432
+ Enable automatic decorators for all click commands.
433
+ This wraps existing command callback functions to add upgrade and telemetry decorators.
434
+ """
435
+
436
+ def has_decorator(func: Any, module_pattern: str, function_pattern: str) -> bool:
437
+ """Check if function already has a specific decorator"""
438
+ if hasattr(func, "__wrapped__"):
439
+ current_func = func
440
+ while hasattr(current_func, "__wrapped__"):
441
+ # Check if this wrapper matches the module and function patterns
442
+ if (
443
+ hasattr(current_func, "__module__")
444
+ and module_pattern in current_func.__module__
445
+ and hasattr(current_func, "__name__")
446
+ and function_pattern in current_func.__name__
447
+ ):
448
+ return True
449
+ current_func = current_func.__wrapped__
450
+ return False
451
+
452
+ def has_telemetry_decorator(func):
453
+ return has_decorator(func, "telemetry", "with_telemetry")
454
+
455
+ def wrap_command_callback(command_obj):
456
+ """Wrap a command's callback function to add decorators"""
457
+ if hasattr(command_obj, "callback") and command_obj.callback:
458
+ original_callback = command_obj.callback
459
+
460
+ # Import here to avoid circular imports
461
+ from datahub.telemetry import telemetry
462
+
463
+ decorated_callback = original_callback
464
+
465
+ if not has_telemetry_decorator(decorated_callback):
466
+ log.debug(
467
+ f"Applying telemetry decorator to {original_callback.__module__}.{original_callback.__name__}"
468
+ )
469
+ decorated_callback = telemetry.with_telemetry()(decorated_callback)
470
+
471
+ # Preserve the original function's metadata
472
+ decorated_callback = wraps(original_callback)(decorated_callback)
473
+
474
+ command_obj.callback = decorated_callback
475
+
476
+ def wrap_group_commands(group_obj):
477
+ """Recursively wrap all commands in a group"""
478
+ if hasattr(group_obj, "commands"):
479
+ for _, command_obj in group_obj.commands.items():
480
+ if isinstance(command_obj, click.Group):
481
+ # Recursively wrap sub-groups
482
+ wrap_group_commands(command_obj)
483
+ else:
484
+ # Wrap individual commands
485
+ wrap_command_callback(command_obj)
486
+
487
+ wrap_group_commands(main_group)
488
+
489
+ log.debug("Auto-decorators enabled successfully")
@@ -3,6 +3,7 @@ import logging
3
3
  import click
4
4
 
5
5
  from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
6
+ from datahub.upgrade import upgrade
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
@@ -16,6 +17,7 @@ def container() -> None:
16
17
  @container.command()
17
18
  @click.option("--container-urn", required=True, type=str)
18
19
  @click.option("--tag-urn", required=True, type=str)
20
+ @upgrade.check_upgrade
19
21
  def tag(container_urn: str, tag_urn: str) -> None:
20
22
  """Add patch to add a tag to all datasets in a container"""
21
23
  apply_association_to_container(container_urn, tag_urn, "tag")
@@ -24,6 +26,7 @@ def tag(container_urn: str, tag_urn: str) -> None:
24
26
  @container.command()
25
27
  @click.option("--container-urn", required=True, type=str)
26
28
  @click.option("--term-urn", required=True, type=str)
29
+ @upgrade.check_upgrade
27
30
  def term(container_urn: str, term_urn: str) -> None:
28
31
  """Add patch to add a term to all datasets in a container"""
29
32
  apply_association_to_container(container_urn, term_urn, "term")
@@ -32,6 +35,7 @@ def term(container_urn: str, term_urn: str) -> None:
32
35
  @container.command()
33
36
  @click.option("--container-urn", required=True, type=str)
34
37
  @click.option("--owner-urn", required=True, type=str)
38
+ @upgrade.check_upgrade
35
39
  def owner(container_urn: str, owner_urn: str) -> None:
36
40
  """Add patch to add a owner to all datasets in a container"""
37
41
  apply_association_to_container(container_urn, owner_urn, "owner")
@@ -40,6 +44,7 @@ def owner(container_urn: str, owner_urn: str) -> None:
40
44
  @container.command()
41
45
  @click.option("--container-urn", required=True, type=str)
42
46
  @click.option("--domain-urn", required=True, type=str)
47
+ @upgrade.check_upgrade
43
48
  def domain(container_urn: str, domain_urn: str) -> None:
44
49
  """Add patch to add a domain to all datasets in a container"""
45
50
  apply_association_to_container(container_urn, domain_urn, "domain")