acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (159) hide show
  1. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
  2. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
  3. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  6. datahub/api/entities/datacontract/datacontract.py +35 -3
  7. datahub/api/entities/datajob/dataflow.py +3 -3
  8. datahub/api/entities/datajob/datajob.py +7 -4
  9. datahub/api/entities/dataset/dataset.py +9 -11
  10. datahub/api/entities/forms/forms.py +34 -34
  11. datahub/api/graphql/assertion.py +1 -1
  12. datahub/api/graphql/operation.py +4 -4
  13. datahub/cli/check_cli.py +3 -2
  14. datahub/cli/config_utils.py +2 -2
  15. datahub/cli/delete_cli.py +6 -5
  16. datahub/cli/docker_cli.py +2 -2
  17. datahub/cli/exists_cli.py +2 -1
  18. datahub/cli/get_cli.py +2 -1
  19. datahub/cli/iceberg_cli.py +6 -5
  20. datahub/cli/ingest_cli.py +9 -6
  21. datahub/cli/migrate.py +4 -3
  22. datahub/cli/migration_utils.py +4 -3
  23. datahub/cli/put_cli.py +3 -2
  24. datahub/cli/specific/assertions_cli.py +2 -1
  25. datahub/cli/specific/datacontract_cli.py +3 -2
  26. datahub/cli/specific/dataproduct_cli.py +10 -9
  27. datahub/cli/specific/dataset_cli.py +4 -3
  28. datahub/cli/specific/forms_cli.py +2 -1
  29. datahub/cli/specific/group_cli.py +2 -1
  30. datahub/cli/specific/structuredproperties_cli.py +4 -3
  31. datahub/cli/specific/user_cli.py +2 -1
  32. datahub/cli/state_cli.py +2 -1
  33. datahub/cli/timeline_cli.py +2 -1
  34. datahub/configuration/common.py +5 -0
  35. datahub/configuration/source_common.py +1 -1
  36. datahub/emitter/mcp.py +20 -5
  37. datahub/emitter/request_helper.py +116 -3
  38. datahub/emitter/rest_emitter.py +163 -93
  39. datahub/entrypoints.py +2 -1
  40. datahub/errors.py +4 -0
  41. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  42. datahub/ingestion/api/source.py +2 -5
  43. datahub/ingestion/api/source_helpers.py +1 -0
  44. datahub/ingestion/glossary/classification_mixin.py +4 -2
  45. datahub/ingestion/graph/client.py +33 -8
  46. datahub/ingestion/graph/config.py +14 -0
  47. datahub/ingestion/graph/filters.py +1 -1
  48. datahub/ingestion/graph/links.py +53 -0
  49. datahub/ingestion/run/pipeline.py +9 -6
  50. datahub/ingestion/run/pipeline_config.py +1 -1
  51. datahub/ingestion/sink/datahub_rest.py +5 -6
  52. datahub/ingestion/source/apply/datahub_apply.py +2 -1
  53. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  54. datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
  55. datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
  56. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
  57. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
  58. datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
  59. datahub/ingestion/source/common/subtypes.py +3 -0
  60. datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
  61. datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
  62. datahub/ingestion/source/dbt/dbt_common.py +10 -2
  63. datahub/ingestion/source/dbt/dbt_core.py +82 -42
  64. datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
  65. datahub/ingestion/source/feast.py +4 -4
  66. datahub/ingestion/source/fivetran/config.py +1 -1
  67. datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
  68. datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
  69. datahub/ingestion/source/ge_data_profiler.py +27 -1
  70. datahub/ingestion/source/hex/api.py +1 -20
  71. datahub/ingestion/source/hex/query_fetcher.py +4 -1
  72. datahub/ingestion/source/iceberg/iceberg.py +20 -4
  73. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  74. datahub/ingestion/source/ldap.py +1 -1
  75. datahub/ingestion/source/looker/looker_common.py +17 -2
  76. datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
  77. datahub/ingestion/source/looker/looker_source.py +34 -5
  78. datahub/ingestion/source/looker/lookml_source.py +7 -1
  79. datahub/ingestion/source/metadata/lineage.py +2 -1
  80. datahub/ingestion/source/mlflow.py +19 -6
  81. datahub/ingestion/source/mode.py +74 -28
  82. datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
  83. datahub/ingestion/source/powerbi/config.py +13 -1
  84. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  85. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  86. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
  87. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  88. datahub/ingestion/source/redshift/usage.py +10 -9
  89. datahub/ingestion/source/sigma/config.py +74 -6
  90. datahub/ingestion/source/sigma/sigma.py +16 -1
  91. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  92. datahub/ingestion/source/slack/slack.py +4 -52
  93. datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
  94. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
  95. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  96. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  97. datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
  98. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  99. datahub/ingestion/source/sql/athena.py +2 -1
  100. datahub/ingestion/source/sql/clickhouse.py +5 -1
  101. datahub/ingestion/source/sql/druid.py +7 -2
  102. datahub/ingestion/source/sql/hive.py +7 -2
  103. datahub/ingestion/source/sql/hive_metastore.py +5 -5
  104. datahub/ingestion/source/sql/mssql/source.py +1 -1
  105. datahub/ingestion/source/sql/oracle.py +6 -2
  106. datahub/ingestion/source/sql/sql_config.py +1 -34
  107. datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
  108. datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
  109. datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
  110. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  111. datahub/ingestion/source/tableau/tableau.py +31 -6
  112. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  113. datahub/ingestion/source/unity/config.py +2 -1
  114. datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
  115. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
  116. datahub/ingestion/source/vertexai/vertexai.py +316 -4
  117. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
  118. datahub/integrations/assertion/common.py +3 -2
  119. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
  120. datahub/metadata/_urns/urn_defs.py +1819 -1763
  121. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  122. datahub/metadata/schema.avsc +17296 -16883
  123. datahub/metadata/schema_classes.py +3 -3
  124. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  125. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  126. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  127. datahub/metadata/schemas/FormInfo.avsc +5 -0
  128. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  129. datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  130. datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
  131. datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  132. datahub/metadata/schemas/QueryProperties.avsc +4 -2
  133. datahub/metadata/schemas/SystemMetadata.avsc +86 -0
  134. datahub/metadata/schemas/__init__.py +3 -3
  135. datahub/sdk/_all_entities.py +4 -0
  136. datahub/sdk/_shared.py +142 -4
  137. datahub/sdk/_utils.py +4 -0
  138. datahub/sdk/dataset.py +2 -2
  139. datahub/sdk/entity_client.py +8 -0
  140. datahub/sdk/lineage_client.py +235 -0
  141. datahub/sdk/main_client.py +6 -3
  142. datahub/sdk/mlmodel.py +301 -0
  143. datahub/sdk/mlmodelgroup.py +233 -0
  144. datahub/secret/datahub_secret_store.py +2 -1
  145. datahub/specific/dataset.py +12 -0
  146. datahub/sql_parsing/fingerprint_utils.py +6 -0
  147. datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
  148. datahub/sql_parsing/sqlglot_utils.py +18 -14
  149. datahub/telemetry/telemetry.py +2 -2
  150. datahub/testing/check_imports.py +1 -1
  151. datahub/testing/mcp_diff.py +15 -2
  152. datahub/upgrade/upgrade.py +10 -12
  153. datahub/utilities/logging_manager.py +8 -1
  154. datahub/utilities/server_config_util.py +350 -10
  155. datahub/utilities/sqlalchemy_query_combiner.py +4 -5
  156. datahub/utilities/urn_encoder.py +1 -1
  157. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
  158. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
  159. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
datahub/cli/put_cli.py CHANGED
@@ -8,6 +8,7 @@ from datahub.cli.cli_utils import post_entity
8
8
  from datahub.configuration.config_loader import load_config_file
9
9
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
10
10
  from datahub.ingestion.graph.client import get_default_graph
11
+ from datahub.ingestion.graph.config import ClientMode
11
12
  from datahub.metadata.schema_classes import (
12
13
  DataPlatformInfoClass as DataPlatformInfo,
13
14
  PlatformTypeClass,
@@ -53,7 +54,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
53
54
  aspect_data, allow_stdin=True, resolve_env_vars=False, process_directives=False
54
55
  )
55
56
 
56
- client = get_default_graph()
57
+ client = get_default_graph(ClientMode.CLI)
57
58
 
58
59
  system_metadata: Union[None, SystemMetadataClass] = None
59
60
  if run_id:
@@ -118,7 +119,7 @@ def platform(
118
119
  displayName=display_name or platform_name,
119
120
  logoUrl=logo,
120
121
  )
121
- datahub_graph = get_default_graph()
122
+ datahub_graph = get_default_graph(ClientMode.CLI)
122
123
  mcp = MetadataChangeProposalWrapper(
123
124
  entityUrn=str(platform_urn),
124
125
  aspect=data_platform_info,
@@ -15,6 +15,7 @@ from datahub.api.entities.assertion.compiler_interface import (
15
15
  from datahub.emitter.mce_builder import make_assertion_urn
16
16
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
17
  from datahub.ingestion.graph.client import get_default_graph
18
+ from datahub.ingestion.graph.config import ClientMode
18
19
  from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS
19
20
  from datahub.telemetry import telemetry
20
21
  from datahub.upgrade import upgrade
@@ -39,7 +40,7 @@ def upsert(file: str) -> None:
39
40
 
40
41
  assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
41
42
 
42
- with get_default_graph() as graph:
43
+ with get_default_graph(ClientMode.CLI) as graph:
43
44
  for assertion_spec in assertions_spec.assertions:
44
45
  try:
45
46
  mcp = MetadataChangeProposalWrapper(
@@ -6,6 +6,7 @@ from click_default_group import DefaultGroup
6
6
 
7
7
  from datahub.api.entities.datacontract.datacontract import DataContract
8
8
  from datahub.ingestion.graph.client import get_default_graph
9
+ from datahub.ingestion.graph.config import ClientMode
9
10
  from datahub.telemetry import telemetry
10
11
  from datahub.upgrade import upgrade
11
12
 
@@ -28,7 +29,7 @@ def upsert(file: str) -> None:
28
29
  data_contract: DataContract = DataContract.from_yaml(file)
29
30
  urn = data_contract.urn
30
31
 
31
- with get_default_graph() as graph:
32
+ with get_default_graph(ClientMode.CLI) as graph:
32
33
  if not graph.exists(data_contract.entity):
33
34
  raise ValueError(
34
35
  f"Cannot define a data contract for non-existent entity {data_contract.entity}"
@@ -72,7 +73,7 @@ def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
72
73
  data_contract = DataContract.from_yaml(file)
73
74
  urn = data_contract.urn
74
75
 
75
- with get_default_graph() as graph:
76
+ with get_default_graph(ClientMode.CLI) as graph:
76
77
  if not graph.exists(urn):
77
78
  raise ValueError(f"Data Contract {urn} does not exist")
78
79
 
@@ -20,6 +20,7 @@ from datahub.emitter.mce_builder import (
20
20
  validate_ownership_type,
21
21
  )
22
22
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
23
+ from datahub.ingestion.graph.config import ClientMode
23
24
  from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
24
25
  from datahub.specific.dataproduct import DataProductPatchBuilder
25
26
  from datahub.telemetry import telemetry
@@ -81,7 +82,7 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
81
82
 
82
83
  config_dict = load_file(pathlib.Path(file))
83
84
  id = config_dict.get("id") if isinstance(config_dict, dict) else None
84
- with get_default_graph() as graph:
85
+ with get_default_graph(ClientMode.CLI) as graph:
85
86
  data_product: DataProduct = DataProduct.from_yaml(file, graph)
86
87
  external_url_override = (
87
88
  external_url
@@ -162,7 +163,7 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
162
163
  def diff(file: Path, update: bool) -> None:
163
164
  """Diff a Data Product file with its twin in DataHub"""
164
165
 
165
- with get_default_graph() as emitter:
166
+ with get_default_graph(ClientMode.CLI) as emitter:
166
167
  id: Optional[str] = None
167
168
  try:
168
169
  data_product_local: DataProduct = DataProduct.from_yaml(file, emitter)
@@ -216,7 +217,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
216
217
  raise click.Abort()
217
218
 
218
219
  graph: DataHubGraph
219
- with get_default_graph() as graph:
220
+ with get_default_graph(ClientMode.CLI) as graph:
220
221
  data_product_urn = (
221
222
  urn if urn.startswith("urn:li:dataProduct") else f"urn:li:dataProduct:{urn}"
222
223
  )
@@ -248,7 +249,7 @@ def get(urn: str, to_file: str) -> None:
248
249
  if not urn.startswith("urn:li:dataProduct:"):
249
250
  urn = f"urn:li:dataProduct:{urn}"
250
251
 
251
- with get_default_graph() as graph:
252
+ with get_default_graph(ClientMode.CLI) as graph:
252
253
  if graph.exists(urn):
253
254
  dataproduct: DataProduct = DataProduct.from_datahub(graph=graph, id=urn)
254
255
  click.secho(
@@ -306,7 +307,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
306
307
 
307
308
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
308
309
  dataproduct_patcher.set_description(description)
309
- with get_default_graph() as graph:
310
+ with get_default_graph(ClientMode.CLI) as graph:
310
311
  _abort_if_non_existent_urn(graph, urn, "set description")
311
312
  for mcp in dataproduct_patcher.build():
312
313
  graph.emit(mcp)
@@ -342,7 +343,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
342
343
  owner=_get_owner_urn(owner), type=owner_type, typeUrn=owner_type_urn
343
344
  )
344
345
  )
345
- with get_default_graph() as graph:
346
+ with get_default_graph(ClientMode.CLI) as graph:
346
347
  _abort_if_non_existent_urn(graph, urn, "add owners")
347
348
  for mcp in dataproduct_patcher.build():
348
349
  graph.emit(mcp)
@@ -360,7 +361,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
360
361
  urn = f"urn:li:dataProduct:{urn}"
361
362
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
362
363
  dataproduct_patcher.remove_owner(owner=_get_owner_urn(owner_urn))
363
- with get_default_graph() as graph:
364
+ with get_default_graph(ClientMode.CLI) as graph:
364
365
  _abort_if_non_existent_urn(graph, urn, "remove owners")
365
366
  for mcp in dataproduct_patcher.build():
366
367
  click.echo(json.dumps(mcp.to_obj()))
@@ -382,7 +383,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
382
383
  urn = f"urn:li:dataProduct:{urn}"
383
384
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
384
385
  dataproduct_patcher.add_asset(asset)
385
- with get_default_graph() as graph:
386
+ with get_default_graph(ClientMode.CLI) as graph:
386
387
  _abort_if_non_existent_urn(graph, urn, "add assets")
387
388
  if validate_assets:
388
389
  _abort_if_non_existent_urn(
@@ -409,7 +410,7 @@ def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
409
410
  urn = f"urn:li:dataProduct:{urn}"
410
411
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
411
412
  dataproduct_patcher.remove_asset(asset)
412
- with get_default_graph() as graph:
413
+ with get_default_graph(ClientMode.CLI) as graph:
413
414
  _abort_if_non_existent_urn(graph, urn, "remove assets")
414
415
  if validate_assets:
415
416
  _abort_if_non_existent_urn(
@@ -12,6 +12,7 @@ from click_default_group import DefaultGroup
12
12
  from datahub.api.entities.dataset.dataset import Dataset, DatasetRetrievalConfig
13
13
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
14
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
15
+ from datahub.ingestion.graph.config import ClientMode
15
16
  from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
16
17
  from datahub.telemetry import telemetry
17
18
  from datahub.upgrade import upgrade
@@ -54,7 +55,7 @@ def get(urn: str, to_file: str) -> None:
54
55
  if not urn.startswith("urn:li:dataset:"):
55
56
  urn = f"urn:li:dataset:{urn}"
56
57
 
57
- with get_default_graph() as graph:
58
+ with get_default_graph(ClientMode.CLI) as graph:
58
59
  if graph.exists(urn):
59
60
  dataset: Dataset = Dataset.from_datahub(graph=graph, urn=urn)
60
61
  click.secho(
@@ -82,7 +83,7 @@ def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
82
83
  all_urns.add(urn)
83
84
  for sibling_urn in sibling_urns:
84
85
  all_urns.add(sibling_urn)
85
- with get_default_graph() as graph:
86
+ with get_default_graph(ClientMode.CLI) as graph:
86
87
  for _urn in all_urns:
87
88
  _emit_sibling(graph, urn, _urn, all_urns)
88
89
 
@@ -181,7 +182,7 @@ def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
181
182
  dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
182
183
 
183
184
  failures: List[str] = []
184
- with get_default_graph() as graph:
185
+ with get_default_graph(ClientMode.CLI) as graph:
185
186
  datasets = Dataset.from_yaml(file)
186
187
  for dataset in datasets:
187
188
  assert (
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.api.entities.forms.forms import Forms
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
+ from datahub.ingestion.graph.config import ClientMode
10
11
  from datahub.telemetry import telemetry
11
12
  from datahub.upgrade import upgrade
12
13
 
@@ -40,7 +41,7 @@ def upsert(file: Path) -> None:
40
41
  @telemetry.with_telemetry()
41
42
  def get(urn: str, to_file: str) -> None:
42
43
  """Get form from DataHub"""
43
- with get_default_graph() as graph:
44
+ with get_default_graph(ClientMode.CLI) as graph:
44
45
  if graph.exists(urn):
45
46
  form: Forms = Forms.from_datahub(graph=graph, urn=urn)
46
47
  click.secho(
@@ -10,6 +10,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
10
10
  )
11
11
  from datahub.cli.specific.file_loader import load_file
12
12
  from datahub.ingestion.graph.client import get_default_graph
13
+ from datahub.ingestion.graph.config import ClientMode
13
14
  from datahub.telemetry import telemetry
14
15
  from datahub.upgrade import upgrade
15
16
 
@@ -40,7 +41,7 @@ def upsert(file: Path, override_editable: bool) -> None:
40
41
 
41
42
  config_dict = load_file(file)
42
43
  group_configs = config_dict if isinstance(config_dict, list) else [config_dict]
43
- with get_default_graph() as emitter:
44
+ with get_default_graph(ClientMode.CLI) as emitter:
44
45
  for group_config in group_configs:
45
46
  try:
46
47
  datahub_group = CorpGroup.parse_obj(group_config)
@@ -11,6 +11,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
11
11
  StructuredProperties,
12
12
  )
13
13
  from datahub.ingestion.graph.client import get_default_graph
14
+ from datahub.ingestion.graph.config import ClientMode
14
15
  from datahub.telemetry import telemetry
15
16
  from datahub.upgrade import upgrade
16
17
  from datahub.utilities.urns.urn import Urn
@@ -33,7 +34,7 @@ def properties() -> None:
33
34
  def upsert(file: Path) -> None:
34
35
  """Upsert structured properties in DataHub."""
35
36
 
36
- with get_default_graph() as graph:
37
+ with get_default_graph(ClientMode.CLI) as graph:
37
38
  StructuredProperties.create(str(file), graph)
38
39
 
39
40
 
@@ -48,7 +49,7 @@ def get(urn: str, to_file: str) -> None:
48
49
  """Get structured properties from DataHub"""
49
50
  urn = Urn.make_structured_property_urn(urn)
50
51
 
51
- with get_default_graph() as graph:
52
+ with get_default_graph(ClientMode.CLI) as graph:
52
53
  if graph.exists(urn):
53
54
  structuredproperties: StructuredProperties = (
54
55
  StructuredProperties.from_datahub(graph=graph, urn=urn)
@@ -117,7 +118,7 @@ def list(details: bool, to_file: str) -> None:
117
118
  with open(file, "w") as fp:
118
119
  yaml.dump(serialized_objects, fp)
119
120
 
120
- with get_default_graph() as graph:
121
+ with get_default_graph(ClientMode.CLI) as graph:
121
122
  if details:
122
123
  logger.info(
123
124
  "Listing structured properties with details. Use --no-details for urns only"
@@ -8,6 +8,7 @@ from click_default_group import DefaultGroup
8
8
  from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
9
9
  from datahub.cli.specific.file_loader import load_file
10
10
  from datahub.ingestion.graph.client import get_default_graph
11
+ from datahub.ingestion.graph.config import ClientMode
11
12
  from datahub.telemetry import telemetry
12
13
  from datahub.upgrade import upgrade
13
14
 
@@ -38,7 +39,7 @@ def upsert(file: Path, override_editable: bool) -> None:
38
39
 
39
40
  config_dict = load_file(pathlib.Path(file))
40
41
  user_configs = config_dict if isinstance(config_dict, list) else [config_dict]
41
- with get_default_graph() as emitter:
42
+ with get_default_graph(ClientMode.CLI) as emitter:
42
43
  for user_config in user_configs:
43
44
  try:
44
45
  datahub_user: CorpUser = CorpUser.parse_obj(user_config)
datahub/cli/state_cli.py CHANGED
@@ -5,6 +5,7 @@ import click
5
5
  from click_default_group import DefaultGroup
6
6
 
7
7
  from datahub.ingestion.graph.client import get_default_graph
8
+ from datahub.ingestion.graph.config import ClientMode
8
9
  from datahub.telemetry import telemetry
9
10
  from datahub.upgrade import upgrade
10
11
 
@@ -28,7 +29,7 @@ def inspect(pipeline_name: str, platform: str) -> None:
28
29
  Only works for state entity removal for now.
29
30
  """
30
31
 
31
- datahub_graph = get_default_graph()
32
+ datahub_graph = get_default_graph(ClientMode.CLI)
32
33
  checkpoint = datahub_graph.get_latest_pipeline_checkpoint(pipeline_name, platform)
33
34
  if not checkpoint:
34
35
  click.secho("No ingestion state found.", fg="red")
@@ -9,6 +9,7 @@ from requests import Response
9
9
 
10
10
  from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
11
11
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
12
+ from datahub.ingestion.graph.config import ClientMode
12
13
  from datahub.telemetry import telemetry
13
14
  from datahub.upgrade import upgrade
14
15
  from datahub.utilities.urns.urn import Urn
@@ -63,7 +64,7 @@ def get_timeline(
63
64
  diff: bool,
64
65
  graph: Optional[DataHubGraph] = None,
65
66
  ) -> Any:
66
- client = graph if graph else get_default_graph()
67
+ client = graph if graph else get_default_graph(ClientMode.CLI)
67
68
  session = client._session
68
69
  host = client.config.server
69
70
  if urn.startswith("urn%3A"):
@@ -33,10 +33,15 @@ REDACT_KEYS = {
33
33
  }
34
34
  REDACT_SUFFIXES = {
35
35
  "_password",
36
+ "-password",
36
37
  "_secret",
38
+ "-secret",
37
39
  "_token",
40
+ "-token",
38
41
  "_key",
42
+ "-key",
39
43
  "_key_id",
44
+ "-key-id",
40
45
  }
41
46
 
42
47
 
@@ -16,7 +16,7 @@ class PlatformInstanceConfigMixin(ConfigModel):
16
16
  default=None,
17
17
  description="The instance of the platform that all assets produced by this recipe belong to. "
18
18
  "This should be unique within the platform. "
19
- "See https://datahubproject.io/docs/platform-instances/ for more details.",
19
+ "See https://docs.datahub.com/docs/platform-instances/ for more details.",
20
20
  )
21
21
 
22
22
 
datahub/emitter/mcp.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import dataclasses
2
2
  import json
3
- from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
3
+ import warnings
4
+ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
4
5
 
5
6
  from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
6
7
  from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
8
+ from datahub.errors import DataHubDeprecationWarning
7
9
  from datahub.metadata.schema_classes import (
8
10
  ChangeTypeClass,
9
11
  DictWrapper,
@@ -69,18 +71,28 @@ class MetadataChangeProposalWrapper:
69
71
  aspectName: Union[None, str] = None
70
72
  aspect: Union[None, _Aspect] = None
71
73
  systemMetadata: Union[None, SystemMetadataClass] = None
74
+ headers: Union[None, Dict[str, str]] = None
72
75
 
73
76
  def __post_init__(self) -> None:
74
77
  if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
75
78
  self.entityType = guess_entity_type(self.entityUrn)
76
79
  elif self.entityUrn and self.entityType:
77
- guessed_entity_type = guess_entity_type(self.entityUrn).lower()
78
- # Entity type checking is actually case insensitive.
79
- # Note that urns are case sensitive, but entity types are not.
80
- if self.entityType.lower() != guessed_entity_type:
80
+ guessed_entity_type = guess_entity_type(self.entityUrn)
81
+ if self.entityType.lower() != guessed_entity_type.lower():
82
+ # If they aren't a case-ignored match, raise an error.
81
83
  raise ValueError(
82
84
  f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
83
85
  )
86
+ elif self.entityType != guessed_entity_type:
87
+ # If they only differ in case, normalize and print a warning.
88
+ self.entityType = guessed_entity_type
89
+ warnings.warn(
90
+ f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
91
+ "This will be automatically corrected for now, but will become an error in a future release. "
92
+ "Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
93
+ DataHubDeprecationWarning,
94
+ stacklevel=3,
95
+ )
84
96
  elif self.entityType == _ENTITY_TYPE_UNSET:
85
97
  raise ValueError("entityType must be set if entityUrn is not set")
86
98
 
@@ -112,6 +124,7 @@ class MetadataChangeProposalWrapper:
112
124
  auditHeader=self.auditHeader,
113
125
  aspectName=self.aspectName,
114
126
  systemMetadata=self.systemMetadata,
127
+ headers=self.headers,
115
128
  )
116
129
 
117
130
  def make_mcp(self) -> MetadataChangeProposalClass:
@@ -211,6 +224,7 @@ class MetadataChangeProposalWrapper:
211
224
  aspectName=mcpc.aspectName,
212
225
  aspect=aspect,
213
226
  systemMetadata=mcpc.systemMetadata,
227
+ headers=mcpc.headers,
214
228
  )
215
229
  else:
216
230
  return None
@@ -228,6 +242,7 @@ class MetadataChangeProposalWrapper:
228
242
  changeType=mcl.changeType,
229
243
  auditHeader=mcl.auditHeader,
230
244
  systemMetadata=mcl.systemMetadata,
245
+ headers=mcl.headers,
231
246
  )
232
247
  return cls.try_from_mcpc(mcpc) or mcpc
233
248
 
@@ -1,14 +1,31 @@
1
+ import json
1
2
  import shlex
2
- from typing import List, Optional, Union
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Union
3
5
 
4
6
  import requests
5
7
  from requests.auth import HTTPBasicAuth
6
8
 
9
+ from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
10
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
11
+ from datahub.emitter.serialization_helper import pre_json_transform
12
+ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
13
+ MetadataChangeProposal,
14
+ )
15
+ from datahub.metadata.schema_classes import ChangeTypeClass
16
+
17
+
18
+ def _decode_bytes(value: Union[str, bytes]) -> str:
19
+ """Decode bytes to string, if necessary."""
20
+ if isinstance(value, bytes):
21
+ return value.decode()
22
+ return value
23
+
7
24
 
8
25
  def _format_header(name: str, value: Union[str, bytes]) -> str:
9
26
  if name == "Authorization":
10
27
  return f"{name!s}: <redacted>"
11
- return f"{name!s}: {value!s}"
28
+ return f"{name!s}: {_decode_bytes(value)}"
12
29
 
13
30
 
14
31
  def make_curl_command(
@@ -21,7 +38,9 @@ def make_curl_command(
21
38
 
22
39
  if session.auth:
23
40
  if isinstance(session.auth, HTTPBasicAuth):
24
- fragments.extend(["-u", f"{session.auth.username}:<redacted>"])
41
+ fragments.extend(
42
+ ["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
43
+ )
25
44
  else:
26
45
  # For other auth types, they should be handled via headers
27
46
  fragments.extend(["-H", "<unknown auth type>"])
@@ -31,3 +50,97 @@ def make_curl_command(
31
50
 
32
51
  fragments.append(url)
33
52
  return shlex.join(fragments)
53
+
54
+
55
+ @dataclass
56
+ class OpenApiRequest:
57
+ """Represents an OpenAPI request for entity operations."""
58
+
59
+ method: str
60
+ url: str
61
+ payload: List[Dict[str, Any]]
62
+
63
+ @classmethod
64
+ def from_mcp(
65
+ cls,
66
+ mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
67
+ gms_server: str,
68
+ async_flag: Optional[bool] = None,
69
+ async_default: bool = False,
70
+ ) -> Optional["OpenApiRequest"]:
71
+ """Factory method to create an OpenApiRequest from a MetadataChangeProposal."""
72
+ if not mcp.aspectName or (
73
+ mcp.changeType != ChangeTypeClass.DELETE and not mcp.aspect
74
+ ):
75
+ return None
76
+
77
+ resolved_async_flag = async_flag if async_flag is not None else async_default
78
+
79
+ method = "post"
80
+ url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if resolved_async_flag else 'false'}"
81
+ payload = []
82
+
83
+ if mcp.changeType == ChangeTypeClass.DELETE:
84
+ method = "delete"
85
+ url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}/{mcp.entityUrn}"
86
+ else:
87
+ if mcp.aspect:
88
+ if mcp.changeType == ChangeTypeClass.PATCH:
89
+ method = "patch"
90
+ obj = mcp.aspect.to_obj()
91
+ content_type = obj.get("contentType")
92
+ if obj.get("value") and content_type == JSON_PATCH_CONTENT_TYPE:
93
+ # Undo double serialization.
94
+ obj = json.loads(obj["value"])
95
+ patch_value = obj
96
+ else:
97
+ raise NotImplementedError(
98
+ f"ChangeType {mcp.changeType} only supports context type {JSON_PATCH_CONTENT_TYPE}, found {content_type}."
99
+ )
100
+
101
+ if isinstance(patch_value, list):
102
+ patch_value = {"patch": patch_value}
103
+
104
+ payload = [
105
+ {
106
+ "urn": mcp.entityUrn,
107
+ mcp.aspectName: {
108
+ "value": patch_value,
109
+ "systemMetadata": mcp.systemMetadata.to_obj()
110
+ if mcp.systemMetadata
111
+ else None,
112
+ },
113
+ }
114
+ ]
115
+ else:
116
+ if isinstance(mcp, MetadataChangeProposalWrapper):
117
+ aspect_value = pre_json_transform(
118
+ mcp.to_obj(simplified_structure=True)
119
+ )["aspect"]["json"]
120
+ else:
121
+ obj = mcp.aspect.to_obj()
122
+ content_type = obj.get("contentType")
123
+ if obj.get("value") and content_type == JSON_CONTENT_TYPE:
124
+ # Undo double serialization.
125
+ obj = json.loads(obj["value"])
126
+ elif content_type == JSON_PATCH_CONTENT_TYPE:
127
+ raise NotImplementedError(
128
+ f"ChangeType {mcp.changeType} does not support patch."
129
+ )
130
+ aspect_value = pre_json_transform(obj)
131
+
132
+ payload = [
133
+ {
134
+ "urn": mcp.entityUrn,
135
+ mcp.aspectName: {
136
+ "value": aspect_value,
137
+ "systemMetadata": mcp.systemMetadata.to_obj()
138
+ if mcp.systemMetadata
139
+ else None,
140
+ },
141
+ }
142
+ ]
143
+ else:
144
+ raise ValueError(f"ChangeType {mcp.changeType} requires a value.")
145
+
146
+ return cls(method=method, url=url, payload=payload)