acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ """
2
+ External Tags Module
3
+
4
+ This module provides tag types that integrate with external systems like DataHub and Unity Catalog.
5
+ It builds on top of RestrictedText to provide sanitized, truncated tag handling with original value preservation.
6
+
7
+ Classes:
8
+ - ExternalTag: DataHub-compatible tag with key/value parsing from URNs
9
+
10
+ Example Usage:
11
+ # DataHub Tags
12
+ tag = ExternalTag.from_urn("urn:li:tag:environment:production")
13
+ datahub_urn = tag.get_datahub_tag # Returns TagUrn object or string
14
+
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import Any, Optional, Tuple, Union
20
+
21
+ from pydantic import BaseModel
22
+
23
+ from datahub.api.entities.external.restricted_text import RestrictedText
24
+ from datahub.metadata.urns import TagUrn
25
+
26
+
27
+ class ExternalTag(BaseModel):
28
+ """A tag type that parses DataHub Tag URNs into key-value pairs with RestrictedText properties."""
29
+
30
+ key: RestrictedText
31
+ value: Optional[RestrictedText] = None
32
+
33
+ def __init__(
34
+ self,
35
+ key: Optional[Union[str, RestrictedText]] = None,
36
+ value: Optional[Union[str, RestrictedText]] = None,
37
+ **data: Any,
38
+ ) -> None:
39
+ """
40
+ Initialize ExternalTag from either a DataHub Tag URN or explicit key/value.
41
+
42
+ Args:
43
+ key: Explicit key value (optional for Pydantic initialization)
44
+ value: Explicit value (optional)
45
+ **data: Additional Pydantic data
46
+ """
47
+ if key is not None:
48
+ # Direct initialization with key/value
49
+ processed_key = (
50
+ RestrictedText(raw_text=key)
51
+ if not isinstance(key, RestrictedText)
52
+ else key
53
+ )
54
+ processed_value = None
55
+ if value is not None:
56
+ processed_value = (
57
+ RestrictedText(raw_text=value)
58
+ if not isinstance(value, RestrictedText)
59
+ else value
60
+ )
61
+
62
+ super().__init__(
63
+ key=processed_key,
64
+ value=processed_value,
65
+ **data,
66
+ )
67
+ else:
68
+ # Standard pydantic initialization
69
+ super().__init__(**data)
70
+
71
+ @staticmethod
72
+ def _parse_tag_name(tag_name: str) -> Tuple[str, Optional[str]]:
73
+ """
74
+ Parse tag name into key and optional value.
75
+
76
+ If tag_name contains ':', split on first ':' into key:value
77
+ Otherwise, use entire tag_name as key with no value.
78
+
79
+ Args:
80
+ tag_name: The tag name portion from the URN
81
+
82
+ Returns:
83
+ Tuple of (key, value) where value may be None
84
+ """
85
+ if ":" in tag_name:
86
+ parts = tag_name.split(":", 1) # Split on first ':' only
87
+ return parts[0], parts[1]
88
+ else:
89
+ return tag_name, None
90
+
91
+ def to_datahub_tag_urn(self) -> TagUrn:
92
+ """
93
+ Generate a DataHub Tag URN from the key and value.
94
+ This method creates the URN using the original (unprocessed) values.
95
+
96
+ Returns:
97
+ 'urn:li:tag:key:value' if value exists, otherwise 'urn:li:tag:key'
98
+ """
99
+ if self.value is not None:
100
+ tag_name = f"{self.key.raw_text}:{self.value.raw_text}"
101
+ else:
102
+ tag_name = self.key.raw_text
103
+
104
+ return TagUrn(name=tag_name)
105
+
106
+ @classmethod
107
+ def from_urn(cls, tag_urn: Union[str, "TagUrn"]) -> "ExternalTag":
108
+ """
109
+ Create an ExternalTag from a DataHub Tag URN.
110
+
111
+ Args:
112
+ tag_urn: DataHub Tag URN string or TagUrn object
113
+
114
+ Returns:
115
+ ExternalTag instance
116
+ """
117
+ if isinstance(tag_urn, str):
118
+ tag_urn = TagUrn.from_string(tag_urn)
119
+ key, value = cls._parse_tag_name(tag_urn.name)
120
+ return cls(key=key, value=value)
121
+
122
+ @classmethod
123
+ def from_key_value(cls, key: str, value: Optional[str] = None) -> "ExternalTag":
124
+ """
125
+ Create an ExternalTag from explicit key and value.
126
+
127
+ Args:
128
+ key: Tag key
129
+ value: Optional tag value
130
+
131
+ Returns:
132
+ ExternalTag instance
133
+ """
134
+ return cls(key=key, value=value)
135
+
136
+ def __str__(self) -> str:
137
+ """String representation of the tag."""
138
+ if self.value is not None:
139
+ return f"{self.key}:{self.value}"
140
+ else:
141
+ return str(self.key)
142
+
143
+ def __repr__(self) -> str:
144
+ if self.value is not None:
145
+ return f"ExternalTag(key={self.key!r}, value={self.value!r})"
146
+ else:
147
+ return f"ExternalTag(key={self.key!r})"
@@ -0,0 +1,162 @@
1
+ # Import RestrictedText from your existing module
2
+ # Uncomment and adjust the import path as needed:
3
+ # from your_restricted_text_module import RestrictedText
4
+ # The following is a list of tag constraints:
5
+ # You can assign a maximum of 50 tags to a single securable object.
6
+ # The maximum length of a tag key is 255 characters.
7
+ # The maximum length of a tag value is 1000 characters.
8
+ # The following characters are not allowed in tag keys:
9
+ # . , - = / :
10
+ # Tag search using the workspace search UI is supported only for tables, views, and table columns.
11
+ # Tag search requires exact term matching.
12
+ # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
13
+ from typing import Any, Dict, Optional
14
+
15
+ from pydantic import validator
16
+ from typing_extensions import ClassVar
17
+
18
+ from datahub.api.entities.external.external_tag import ExternalTag
19
+ from datahub.api.entities.external.restricted_text import RestrictedText
20
+
21
+
22
+ class LakeFormationTagKeyText(RestrictedText):
23
+ """RestrictedText configured for Lake Formation tag keys."""
24
+
25
+ DEFAULT_MAX_LENGTH: ClassVar[int] = 50
26
+ # Lake Formation tag keys restrictions
27
+ DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
28
+ DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
29
+
30
+
31
+ class LakeFormationTagValueText(RestrictedText):
32
+ """RestrictedText configured for Lake Formation tag values."""
33
+
34
+ DEFAULT_MAX_LENGTH: ClassVar[int] = 50
35
+ # Lake Formation tag values restrictions
36
+ DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
37
+ DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
38
+
39
+
40
+ class LakeFormationTag(ExternalTag):
41
+ """
42
+ A tag type specifically designed for LakeFormation tag restrictions.
43
+
44
+ LakeFormation Tag Restrictions:
45
+ - Key: Max 127 characters, alphanumeric + hyphens, underscores, periods only
46
+ - Value: Max 256 characters, more permissive but no control characters
47
+ """
48
+
49
+ key: LakeFormationTagKeyText
50
+ value: Optional[LakeFormationTagValueText] = None
51
+ catalog: Optional[str] = None
52
+
53
+ # Pydantic v1 validators
54
+ @validator("key", pre=True)
55
+ @classmethod
56
+ def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
57
+ """Validate and convert key field for Pydantic v1."""
58
+ if isinstance(v, LakeFormationTagKeyText):
59
+ return v
60
+
61
+ # If we get a RestrictedText object from parent class validation, use its raw_text value
62
+ if hasattr(v, "raw_text"):
63
+ return LakeFormationTagKeyText(raw_text=v.raw_text)
64
+
65
+ return LakeFormationTagKeyText(raw_text=v)
66
+
67
+ @validator("value", pre=True)
68
+ @classmethod
69
+ def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
70
+ """Validate and convert value field for Pydantic v1."""
71
+ if v is None:
72
+ return None
73
+
74
+ if isinstance(v, LakeFormationTagValueText):
75
+ return v
76
+
77
+ # If we get a RestrictedText object from parent class validation, use its raw_text value
78
+ if hasattr(v, "raw_text"):
79
+ text_value = v.raw_text
80
+ # If value is an empty string, set it to None to not generate empty value in DataHub tag
81
+ if not str(text_value):
82
+ return None
83
+ return LakeFormationTagValueText(raw_text=text_value)
84
+
85
+ # If value is an empty string, set it to None to not generate empty value in DataHub tag
86
+ if not str(v):
87
+ return None
88
+
89
+ return LakeFormationTagValueText(raw_text=v)
90
+
91
+ def __eq__(self, other: object) -> bool:
92
+ """Check equality based on key and value."""
93
+ if not isinstance(other, LakeFormationTag):
94
+ return False
95
+ return str(self.key) == str(other.key) and (
96
+ str(self.value) if self.value else None
97
+ ) == (str(other.value) if other.value else None)
98
+
99
+ def __hash__(self) -> int:
100
+ """Make LakeFormationTag hashable based on key and value."""
101
+ return hash((str(self.key), str(self.value) if self.value else None))
102
+
103
+ @classmethod
104
+ def from_dict(cls, tag_dict: Dict[str, Any]) -> "LakeFormationTag":
105
+ """
106
+ Create a LakeFormationTag from a dictionary with 'key' and optional 'value'.
107
+
108
+ Args:
109
+ tag_dict: Dictionary with 'key' and optional 'value' keys
110
+
111
+ Returns:
112
+ LakeFormationTag instance
113
+ """
114
+ return cls(key=tag_dict["key"], value=tag_dict.get("value"))
115
+
116
+ @classmethod
117
+ def from_key_value(
118
+ cls, key: str, value: Optional[str] = None
119
+ ) -> "LakeFormationTag":
120
+ """
121
+ Create a LakeFormationTagPlatformResource from explicit key and value.
122
+
123
+ Overrides the parent method to return the correct type.
124
+
125
+ Args:
126
+ key: Tag key
127
+ value: Optional tag value
128
+
129
+ Returns:
130
+ LakeFormationTag instance
131
+ """
132
+ return cls(key=key, value=value)
133
+
134
+ def to_dict(self) -> Dict[str, str]:
135
+ """
136
+ Convert to dictionary format suitable for LakeFormation tag.
137
+
138
+ Returns:
139
+ Dictionary with 'key' and optionally 'value'
140
+ """
141
+ result: Dict[str, str] = {"key": self.key.raw_text}
142
+ if self.value is not None:
143
+ result["value"] = self.value.raw_text
144
+ return result
145
+
146
+ def to_display_dict(self) -> Dict[str, str]:
147
+ """
148
+ Convert to dictionary format showing processed values.
149
+
150
+ Returns:
151
+ Dictionary with processed 'key' and optional 'value'
152
+ """
153
+ result: Dict[str, str] = {"key": str(self.key)}
154
+ if self.value is not None:
155
+ result["value"] = str(self.value)
156
+ return result
157
+
158
+ def __repr__(self) -> str:
159
+ if self.value:
160
+ return f"LakeFormationTag(key={self.key!r}, value={self.value!r})"
161
+ else:
162
+ return f"LakeFormationTag(key={self.key!r})"
@@ -0,0 +1,172 @@
1
+ """The `RestrictedText` module provides a custom Pydantic type that stores the original
2
+ value but returns a truncated and sanitized version when accessed.
3
+
4
+ Features:
5
+ - Configurable maximum length with truncation
6
+ - Character replacement (default replaces with underscore)
7
+ - Preserves original value internally
8
+ - Customizable truncation suffix
9
+ - Compatible with both Pydantic v1 and v2
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import ClassVar, Optional, Set
15
+
16
+ from datahub.configuration.common import ConfigModel
17
+
18
+
19
+ class RestrictedText(ConfigModel):
20
+ """A string type that stores the original value but returns a truncated and sanitized version.
21
+
22
+ This type allows you to:
23
+ - Set a maximum length for the displayed value
24
+ - Replace specific characters with a replacement character
25
+ - Access both the original and processed values
26
+
27
+ ```python
28
+ from pydantic import BaseModel
29
+
30
+ class TestModel(BaseModel):
31
+ # Basic usage with default settings
32
+ name: RestrictedText
33
+
34
+ # Custom max length and character replacement
35
+ custom_field: RestrictedText = RestrictedText(
36
+ text="hello-world.test",
37
+ max_length=10,
38
+ forbidden_chars={' ', '-', '.'},
39
+ replacement_char='_'
40
+ )
41
+
42
+ # Usage example
43
+ model = TestModel(
44
+ name="This is a very long string with special characters!",
45
+ custom_field="hello-world.test"
46
+ )
47
+
48
+ print(model.name) # Truncated and sanitized version
49
+ print(model.name.text) # Original value
50
+ print(model.custom_field) # "hello_worl..."
51
+ ```
52
+ """
53
+
54
+ # Default configuration
55
+ DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50
56
+ DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
57
+ DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
58
+ DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
59
+
60
+ raw_text: str
61
+ max_length: Optional[int] = None
62
+ forbidden_chars: Optional[Set[str]] = None
63
+ replacement_char: Optional[str] = None
64
+ truncation_suffix: Optional[str] = None
65
+ _processed_value: Optional[str] = None
66
+
67
+ def __init__(self, **data):
68
+ super().__init__(**data)
69
+ self.validate_text()
70
+
71
+ @classmethod
72
+ def __get_validators__(cls):
73
+ yield cls.pydantic_accept_raw_text
74
+ yield cls.validate
75
+ yield cls.pydantic_validate_text
76
+
77
+ @classmethod
78
+ def pydantic_accept_raw_text(cls, v):
79
+ if isinstance(v, (RestrictedText, dict)):
80
+ return v
81
+ assert isinstance(v, str), "text must be a string"
82
+ return {"text": v}
83
+
84
+ @classmethod
85
+ def pydantic_validate_text(cls, v):
86
+ assert isinstance(v, RestrictedText)
87
+ assert v.validate_text()
88
+ return v
89
+
90
+ @classmethod
91
+ def validate(cls, v):
92
+ """Validate and create a RestrictedText instance."""
93
+ if isinstance(v, RestrictedText):
94
+ return v
95
+
96
+ # This should be a dict at this point from pydantic_accept_raw_text
97
+ if isinstance(v, dict):
98
+ instance = cls(**v)
99
+ instance.validate_text()
100
+ return instance
101
+
102
+ raise ValueError(f"Unable to validate RestrictedText from {type(v)}")
103
+
104
+ def validate_text(self) -> bool:
105
+ """Validate the text and apply restrictions."""
106
+ # Set defaults if not provided
107
+ max_length = (
108
+ self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH
109
+ )
110
+ forbidden_chars = (
111
+ self.forbidden_chars
112
+ if self.forbidden_chars is not None
113
+ else self.DEFAULT_FORBIDDEN_CHARS
114
+ )
115
+ replacement_char = (
116
+ self.replacement_char
117
+ if self.replacement_char is not None
118
+ else self.DEFAULT_REPLACEMENT_CHAR
119
+ )
120
+ truncation_suffix = (
121
+ self.truncation_suffix
122
+ if self.truncation_suffix is not None
123
+ else self.DEFAULT_TRUNCATION_SUFFIX
124
+ )
125
+
126
+ # Store processed value
127
+ self._processed_value = self._process_value(
128
+ self.raw_text,
129
+ max_length,
130
+ forbidden_chars,
131
+ replacement_char,
132
+ truncation_suffix,
133
+ )
134
+ return True
135
+
136
+ def _process_value(
137
+ self,
138
+ value: str,
139
+ max_length: Optional[int],
140
+ forbidden_chars: Set[str],
141
+ replacement_char: str,
142
+ truncation_suffix: str,
143
+ ) -> str:
144
+ """Process the value by replacing characters and truncating."""
145
+ # Replace specified characters
146
+ processed = value
147
+ for char in forbidden_chars:
148
+ processed = processed.replace(char, replacement_char)
149
+
150
+ # Truncate if necessary
151
+ if max_length is not None and len(processed) > max_length:
152
+ if len(truncation_suffix) >= max_length:
153
+ # If suffix is too long, just truncate without suffix
154
+ processed = processed[:max_length]
155
+ else:
156
+ # Truncate and add suffix
157
+ truncate_length = max_length - len(truncation_suffix)
158
+ processed = processed[:truncate_length] + truncation_suffix
159
+
160
+ return processed
161
+
162
+ def __str__(self) -> str:
163
+ """Return the processed (truncated and sanitized) value."""
164
+ return self._processed_value or ""
165
+
166
+ def __repr__(self) -> str:
167
+ return f"{self.__class__.__name__}({self.raw_text!r})"
168
+
169
+ @property
170
+ def processed(self) -> str:
171
+ """Get the processed (truncated and sanitized) value."""
172
+ return self._processed_value or ""
@@ -0,0 +1,172 @@
1
+ # Import RestrictedText from your existing module
2
+ # Uncomment and adjust the import path as needed:
3
+ # from your_restricted_text_module import RestrictedText
4
+ # The following is a list of tag constraints:
5
+ # You can assign a maximum of 50 tags to a single securable object.
6
+ # The maximum length of a tag key is 255 characters.
7
+ # The maximum length of a tag value is 1000 characters.
8
+ # The following characters are not allowed in tag keys:
9
+ # . , - = / :
10
+ # Tag search using the workspace search UI is supported only for tables, views, and table columns.
11
+ # Tag search requires exact term matching.
12
+ # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
13
+ from typing import Any, Dict, Optional, Set
14
+
15
+ # Import validator for Pydantic v1 (always needed since we removed conditional logic)
16
+ from pydantic import validator
17
+ from typing_extensions import ClassVar
18
+
19
+ from datahub.api.entities.external.external_tag import ExternalTag
20
+ from datahub.api.entities.external.restricted_text import RestrictedText
21
+
22
+
23
+ class UnityCatalogTagKeyText(RestrictedText):
24
+ """RestrictedText configured for Unity Catalog tag keys."""
25
+
26
+ DEFAULT_MAX_LENGTH: ClassVar[int] = 255
27
+ # Unity Catalog tag keys: forbidden characters based on constraints
28
+ DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {
29
+ "\t",
30
+ "\n",
31
+ "\r",
32
+ ".",
33
+ ",",
34
+ "-",
35
+ "=",
36
+ "/",
37
+ ":",
38
+ }
39
+ DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
40
+ DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
41
+
42
+
43
+ class UnityCatalogTagValueText(RestrictedText):
44
+ """RestrictedText configured for Unity Catalog tag values."""
45
+
46
+ DEFAULT_MAX_LENGTH: ClassVar[int] = 1000
47
+ # Unity Catalog tag values are more permissive but still have some restrictions
48
+ DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"}
49
+ DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
50
+ DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
51
+
52
+
53
+ class UnityCatalogTag(ExternalTag):
54
+ """
55
+ A tag type specifically designed for Unity Catalog tag restrictions.
56
+
57
+ Unity Catalog Tag Restrictions:
58
+ - Key: Max 127 characters, alphanumeric + hyphens, underscores, periods only
59
+ - Value: Max 256 characters, more permissive but no control characters
60
+ """
61
+
62
+ key: UnityCatalogTagKeyText
63
+ value: Optional[UnityCatalogTagValueText] = None
64
+
65
+ # Pydantic v1 validators
66
+ @validator("key", pre=True)
67
+ @classmethod
68
+ def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
69
+ """Validate and convert key field for Pydantic v1."""
70
+ if isinstance(v, UnityCatalogTagKeyText):
71
+ return v
72
+
73
+ # If we get a RestrictedText object from parent class validation, use its raw_text value
74
+ if hasattr(v, "raw_text"):
75
+ return UnityCatalogTagKeyText(raw_text=v.raw_text)
76
+
77
+ return UnityCatalogTagKeyText(raw_text=v)
78
+
79
+ @validator("value", pre=True)
80
+ @classmethod
81
+ def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
82
+ """Validate and convert value field for Pydantic v1."""
83
+ if v is None:
84
+ return None
85
+
86
+ if isinstance(v, UnityCatalogTagValueText):
87
+ return v
88
+
89
+ # If we get a RestrictedText object from parent class validation, use its raw_text value
90
+ if hasattr(v, "raw_text"):
91
+ text_value = v.raw_text
92
+ # If value is an empty string, set it to None to not generate empty value in DataHub tag
93
+ if not str(text_value):
94
+ return None
95
+ return UnityCatalogTagValueText(raw_text=text_value)
96
+
97
+ # If value is an empty string, set it to None to not generate empty value in DataHub tag
98
+ if not str(v):
99
+ return None
100
+
101
+ return UnityCatalogTagValueText(raw_text=v)
102
+
103
+ def __eq__(self, other: object) -> bool:
104
+ """Check equality based on key and value."""
105
+ if not isinstance(other, UnityCatalogTag):
106
+ return False
107
+ return str(self.key) == str(other.key) and (
108
+ str(self.value) if self.value else None
109
+ ) == (str(other.value) if other.value else None)
110
+
111
+ def __hash__(self) -> int:
112
+ """Make UnityCatalogTag hashable based on key and value."""
113
+ return hash((str(self.key), str(self.value) if self.value else None))
114
+
115
+ @classmethod
116
+ def from_dict(cls, tag_dict: Dict[str, Any]) -> "UnityCatalogTag":
117
+ """
118
+ Create a UnityCatalogTag from a dictionary with 'key' and optional 'value'.
119
+
120
+ Args:
121
+ tag_dict: Dictionary with 'key' and optional 'value' keys
122
+
123
+ Returns:
124
+ UnityCatalogTag instance
125
+ """
126
+ return cls(**tag_dict)
127
+
128
+ @classmethod
129
+ def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
130
+ """
131
+ Create a UnityCatalogTag from explicit key and value.
132
+
133
+ Overrides the parent method to return the correct type.
134
+
135
+ Args:
136
+ key: Tag key
137
+ value: Optional tag value
138
+
139
+ Returns:
140
+ UnityCatalogTag instance
141
+ """
142
+ return cls(key=key, value=value)
143
+
144
+ def to_dict(self) -> Dict[str, str]:
145
+ """
146
+ Convert to dictionary format suitable for Unity Catalog API.
147
+
148
+ Returns:
149
+ Dictionary with 'key' and optionally 'value'
150
+ """
151
+ result: Dict[str, str] = {"key": self.key.raw_text}
152
+ if self.value is not None:
153
+ result["value"] = self.value.raw_text
154
+ return result
155
+
156
+ def to_display_dict(self) -> Dict[str, str]:
157
+ """
158
+ Convert to dictionary format showing processed values.
159
+
160
+ Returns:
161
+ Dictionary with processed 'key' and optional 'value'
162
+ """
163
+ result: Dict[str, str] = {"key": str(self.key)}
164
+ if self.value is not None:
165
+ result["value"] = str(self.value)
166
+ return result
167
+
168
+ def __repr__(self) -> str:
169
+ if self.value:
170
+ return f"UnityCatalogTag(key={self.key!r}, value={self.value!r})"
171
+ else:
172
+ return f"UnityCatalogTag(key={self.key!r})"
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import List, Optional, Union
6
6
 
7
7
  import yaml
8
- from pydantic import validator
8
+ from pydantic import Field, validator
9
9
  from ruamel.yaml import YAML
10
10
  from typing_extensions import Literal
11
11
 
@@ -67,7 +67,7 @@ class Prompt(ConfigModel):
67
67
  description: Optional[str] = None
68
68
  type: str
69
69
  structured_property_id: Optional[str] = None
70
- structured_property_urn: Optional[str] = None
70
+ structured_property_urn: Optional[str] = Field(default=None, validate_default=True)
71
71
  required: Optional[bool] = None
72
72
 
73
73
  @validator("structured_property_urn", pre=True, always=True)
@@ -111,7 +111,7 @@ class Actors(ConfigModel):
111
111
 
112
112
  class Forms(ConfigModel):
113
113
  id: Optional[str] = None
114
- urn: Optional[str] = None
114
+ urn: Optional[str] = Field(default=None, validate_default=True)
115
115
  name: str
116
116
  description: Optional[str] = None
117
117
  prompts: List[Prompt] = []