acryl-datahub 1.0.0.3rc9__py3-none-any.whl → 1.0.0.3rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (87) hide show
  1. {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/METADATA +2524 -2471
  2. {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/RECORD +87 -87
  3. {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  6. datahub/api/entities/datajob/dataflow.py +3 -3
  7. datahub/api/entities/forms/forms.py +34 -34
  8. datahub/api/graphql/assertion.py +1 -1
  9. datahub/api/graphql/operation.py +4 -4
  10. datahub/cli/check_cli.py +3 -2
  11. datahub/cli/config_utils.py +2 -2
  12. datahub/cli/delete_cli.py +6 -5
  13. datahub/cli/docker_cli.py +2 -2
  14. datahub/cli/exists_cli.py +2 -1
  15. datahub/cli/get_cli.py +2 -1
  16. datahub/cli/iceberg_cli.py +6 -5
  17. datahub/cli/ingest_cli.py +9 -6
  18. datahub/cli/migrate.py +4 -3
  19. datahub/cli/migration_utils.py +4 -3
  20. datahub/cli/put_cli.py +3 -2
  21. datahub/cli/specific/assertions_cli.py +2 -1
  22. datahub/cli/specific/datacontract_cli.py +3 -2
  23. datahub/cli/specific/dataproduct_cli.py +10 -9
  24. datahub/cli/specific/dataset_cli.py +4 -3
  25. datahub/cli/specific/forms_cli.py +2 -1
  26. datahub/cli/specific/group_cli.py +2 -1
  27. datahub/cli/specific/structuredproperties_cli.py +4 -3
  28. datahub/cli/specific/user_cli.py +2 -1
  29. datahub/cli/state_cli.py +2 -1
  30. datahub/cli/timeline_cli.py +2 -1
  31. datahub/configuration/source_common.py +1 -1
  32. datahub/emitter/request_helper.py +116 -3
  33. datahub/emitter/rest_emitter.py +163 -93
  34. datahub/entrypoints.py +2 -1
  35. datahub/ingestion/api/source.py +2 -5
  36. datahub/ingestion/glossary/classification_mixin.py +4 -2
  37. datahub/ingestion/graph/client.py +16 -7
  38. datahub/ingestion/graph/config.py +14 -0
  39. datahub/ingestion/graph/filters.py +1 -1
  40. datahub/ingestion/run/pipeline.py +3 -2
  41. datahub/ingestion/run/pipeline_config.py +1 -1
  42. datahub/ingestion/sink/datahub_rest.py +5 -6
  43. datahub/ingestion/source/apply/datahub_apply.py +2 -1
  44. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  45. datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
  46. datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
  47. datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
  48. datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
  49. datahub/ingestion/source/dbt/dbt_common.py +10 -2
  50. datahub/ingestion/source/dbt/dbt_core.py +82 -42
  51. datahub/ingestion/source/feast.py +4 -4
  52. datahub/ingestion/source/ge_data_profiler.py +2 -1
  53. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  54. datahub/ingestion/source/ldap.py +1 -1
  55. datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
  56. datahub/ingestion/source/looker/lookml_source.py +7 -1
  57. datahub/ingestion/source/metadata/lineage.py +2 -1
  58. datahub/ingestion/source/mode.py +74 -28
  59. datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
  60. datahub/ingestion/source/powerbi/config.py +1 -1
  61. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  62. datahub/ingestion/source/redshift/usage.py +10 -9
  63. datahub/ingestion/source/sql/clickhouse.py +5 -1
  64. datahub/ingestion/source/sql/druid.py +7 -2
  65. datahub/ingestion/source/sql/oracle.py +6 -2
  66. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  67. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  68. datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
  69. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
  70. datahub/integrations/assertion/common.py +3 -2
  71. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +490 -490
  72. datahub/metadata/_urns/urn_defs.py +1786 -1786
  73. datahub/metadata/schema.avsc +17364 -16988
  74. datahub/metadata/schema_classes.py +3 -3
  75. datahub/metadata/schemas/__init__.py +3 -3
  76. datahub/sdk/main_client.py +2 -2
  77. datahub/secret/datahub_secret_store.py +2 -1
  78. datahub/telemetry/telemetry.py +2 -2
  79. datahub/testing/check_imports.py +1 -1
  80. datahub/upgrade/upgrade.py +10 -12
  81. datahub/utilities/logging_manager.py +8 -1
  82. datahub/utilities/server_config_util.py +378 -10
  83. datahub/utilities/sqlalchemy_query_combiner.py +4 -5
  84. datahub/utilities/urn_encoder.py +1 -1
  85. {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/entry_points.txt +0 -0
  86. {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/licenses/LICENSE +0 -0
  87. {acryl_datahub-1.0.0.3rc9.dist-info → acryl_datahub-1.0.0.3rc11.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.api.entities.forms.forms import Forms
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
+ from datahub.ingestion.graph.config import ClientMode
10
11
  from datahub.telemetry import telemetry
11
12
  from datahub.upgrade import upgrade
12
13
 
@@ -40,7 +41,7 @@ def upsert(file: Path) -> None:
40
41
  @telemetry.with_telemetry()
41
42
  def get(urn: str, to_file: str) -> None:
42
43
  """Get form from DataHub"""
43
- with get_default_graph() as graph:
44
+ with get_default_graph(ClientMode.CLI) as graph:
44
45
  if graph.exists(urn):
45
46
  form: Forms = Forms.from_datahub(graph=graph, urn=urn)
46
47
  click.secho(
@@ -10,6 +10,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
10
10
  )
11
11
  from datahub.cli.specific.file_loader import load_file
12
12
  from datahub.ingestion.graph.client import get_default_graph
13
+ from datahub.ingestion.graph.config import ClientMode
13
14
  from datahub.telemetry import telemetry
14
15
  from datahub.upgrade import upgrade
15
16
 
@@ -40,7 +41,7 @@ def upsert(file: Path, override_editable: bool) -> None:
40
41
 
41
42
  config_dict = load_file(file)
42
43
  group_configs = config_dict if isinstance(config_dict, list) else [config_dict]
43
- with get_default_graph() as emitter:
44
+ with get_default_graph(ClientMode.CLI) as emitter:
44
45
  for group_config in group_configs:
45
46
  try:
46
47
  datahub_group = CorpGroup.parse_obj(group_config)
@@ -11,6 +11,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
11
11
  StructuredProperties,
12
12
  )
13
13
  from datahub.ingestion.graph.client import get_default_graph
14
+ from datahub.ingestion.graph.config import ClientMode
14
15
  from datahub.telemetry import telemetry
15
16
  from datahub.upgrade import upgrade
16
17
  from datahub.utilities.urns.urn import Urn
@@ -33,7 +34,7 @@ def properties() -> None:
33
34
  def upsert(file: Path) -> None:
34
35
  """Upsert structured properties in DataHub."""
35
36
 
36
- with get_default_graph() as graph:
37
+ with get_default_graph(ClientMode.CLI) as graph:
37
38
  StructuredProperties.create(str(file), graph)
38
39
 
39
40
 
@@ -48,7 +49,7 @@ def get(urn: str, to_file: str) -> None:
48
49
  """Get structured properties from DataHub"""
49
50
  urn = Urn.make_structured_property_urn(urn)
50
51
 
51
- with get_default_graph() as graph:
52
+ with get_default_graph(ClientMode.CLI) as graph:
52
53
  if graph.exists(urn):
53
54
  structuredproperties: StructuredProperties = (
54
55
  StructuredProperties.from_datahub(graph=graph, urn=urn)
@@ -117,7 +118,7 @@ def list(details: bool, to_file: str) -> None:
117
118
  with open(file, "w") as fp:
118
119
  yaml.dump(serialized_objects, fp)
119
120
 
120
- with get_default_graph() as graph:
121
+ with get_default_graph(ClientMode.CLI) as graph:
121
122
  if details:
122
123
  logger.info(
123
124
  "Listing structured properties with details. Use --no-details for urns only"
@@ -8,6 +8,7 @@ from click_default_group import DefaultGroup
8
8
  from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
9
9
  from datahub.cli.specific.file_loader import load_file
10
10
  from datahub.ingestion.graph.client import get_default_graph
11
+ from datahub.ingestion.graph.config import ClientMode
11
12
  from datahub.telemetry import telemetry
12
13
  from datahub.upgrade import upgrade
13
14
 
@@ -38,7 +39,7 @@ def upsert(file: Path, override_editable: bool) -> None:
38
39
 
39
40
  config_dict = load_file(pathlib.Path(file))
40
41
  user_configs = config_dict if isinstance(config_dict, list) else [config_dict]
41
- with get_default_graph() as emitter:
42
+ with get_default_graph(ClientMode.CLI) as emitter:
42
43
  for user_config in user_configs:
43
44
  try:
44
45
  datahub_user: CorpUser = CorpUser.parse_obj(user_config)
datahub/cli/state_cli.py CHANGED
@@ -5,6 +5,7 @@ import click
5
5
  from click_default_group import DefaultGroup
6
6
 
7
7
  from datahub.ingestion.graph.client import get_default_graph
8
+ from datahub.ingestion.graph.config import ClientMode
8
9
  from datahub.telemetry import telemetry
9
10
  from datahub.upgrade import upgrade
10
11
 
@@ -28,7 +29,7 @@ def inspect(pipeline_name: str, platform: str) -> None:
28
29
  Only works for state entity removal for now.
29
30
  """
30
31
 
31
- datahub_graph = get_default_graph()
32
+ datahub_graph = get_default_graph(ClientMode.CLI)
32
33
  checkpoint = datahub_graph.get_latest_pipeline_checkpoint(pipeline_name, platform)
33
34
  if not checkpoint:
34
35
  click.secho("No ingestion state found.", fg="red")
@@ -9,6 +9,7 @@ from requests import Response
9
9
 
10
10
  from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
11
11
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
12
+ from datahub.ingestion.graph.config import ClientMode
12
13
  from datahub.telemetry import telemetry
13
14
  from datahub.upgrade import upgrade
14
15
  from datahub.utilities.urns.urn import Urn
@@ -63,7 +64,7 @@ def get_timeline(
63
64
  diff: bool,
64
65
  graph: Optional[DataHubGraph] = None,
65
66
  ) -> Any:
66
- client = graph if graph else get_default_graph()
67
+ client = graph if graph else get_default_graph(ClientMode.CLI)
67
68
  session = client._session
68
69
  host = client.config.server
69
70
  if urn.startswith("urn%3A"):
@@ -16,7 +16,7 @@ class PlatformInstanceConfigMixin(ConfigModel):
16
16
  default=None,
17
17
  description="The instance of the platform that all assets produced by this recipe belong to. "
18
18
  "This should be unique within the platform. "
19
- "See https://datahubproject.io/docs/platform-instances/ for more details.",
19
+ "See https://docs.datahub.com/docs/platform-instances/ for more details.",
20
20
  )
21
21
 
22
22
 
@@ -1,14 +1,31 @@
1
+ import json
1
2
  import shlex
2
- from typing import List, Optional, Union
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Union
3
5
 
4
6
  import requests
5
7
  from requests.auth import HTTPBasicAuth
6
8
 
9
+ from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
10
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
11
+ from datahub.emitter.serialization_helper import pre_json_transform
12
+ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
13
+ MetadataChangeProposal,
14
+ )
15
+ from datahub.metadata.schema_classes import ChangeTypeClass
16
+
17
+
18
+ def _decode_bytes(value: Union[str, bytes]) -> str:
19
+ """Decode bytes to string, if necessary."""
20
+ if isinstance(value, bytes):
21
+ return value.decode()
22
+ return value
23
+
7
24
 
8
25
  def _format_header(name: str, value: Union[str, bytes]) -> str:
9
26
  if name == "Authorization":
10
27
  return f"{name!s}: <redacted>"
11
- return f"{name!s}: {value!s}"
28
+ return f"{name!s}: {_decode_bytes(value)}"
12
29
 
13
30
 
14
31
  def make_curl_command(
@@ -21,7 +38,9 @@ def make_curl_command(
21
38
 
22
39
  if session.auth:
23
40
  if isinstance(session.auth, HTTPBasicAuth):
24
- fragments.extend(["-u", f"{session.auth.username}:<redacted>"])
41
+ fragments.extend(
42
+ ["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
43
+ )
25
44
  else:
26
45
  # For other auth types, they should be handled via headers
27
46
  fragments.extend(["-H", "<unknown auth type>"])
@@ -31,3 +50,97 @@ def make_curl_command(
31
50
 
32
51
  fragments.append(url)
33
52
  return shlex.join(fragments)
53
+
54
+
55
+ @dataclass
56
+ class OpenApiRequest:
57
+ """Represents an OpenAPI request for entity operations."""
58
+
59
+ method: str
60
+ url: str
61
+ payload: List[Dict[str, Any]]
62
+
63
+ @classmethod
64
+ def from_mcp(
65
+ cls,
66
+ mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
67
+ gms_server: str,
68
+ async_flag: Optional[bool] = None,
69
+ async_default: bool = False,
70
+ ) -> Optional["OpenApiRequest"]:
71
+ """Factory method to create an OpenApiRequest from a MetadataChangeProposal."""
72
+ if not mcp.aspectName or (
73
+ mcp.changeType != ChangeTypeClass.DELETE and not mcp.aspect
74
+ ):
75
+ return None
76
+
77
+ resolved_async_flag = async_flag if async_flag is not None else async_default
78
+
79
+ method = "post"
80
+ url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if resolved_async_flag else 'false'}"
81
+ payload = []
82
+
83
+ if mcp.changeType == ChangeTypeClass.DELETE:
84
+ method = "delete"
85
+ url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}/{mcp.entityUrn}"
86
+ else:
87
+ if mcp.aspect:
88
+ if mcp.changeType == ChangeTypeClass.PATCH:
89
+ method = "patch"
90
+ obj = mcp.aspect.to_obj()
91
+ content_type = obj.get("contentType")
92
+ if obj.get("value") and content_type == JSON_PATCH_CONTENT_TYPE:
93
+ # Undo double serialization.
94
+ obj = json.loads(obj["value"])
95
+ patch_value = obj
96
+ else:
97
+ raise NotImplementedError(
98
+ f"ChangeType {mcp.changeType} only supports context type {JSON_PATCH_CONTENT_TYPE}, found {content_type}."
99
+ )
100
+
101
+ if isinstance(patch_value, list):
102
+ patch_value = {"patch": patch_value}
103
+
104
+ payload = [
105
+ {
106
+ "urn": mcp.entityUrn,
107
+ mcp.aspectName: {
108
+ "value": patch_value,
109
+ "systemMetadata": mcp.systemMetadata.to_obj()
110
+ if mcp.systemMetadata
111
+ else None,
112
+ },
113
+ }
114
+ ]
115
+ else:
116
+ if isinstance(mcp, MetadataChangeProposalWrapper):
117
+ aspect_value = pre_json_transform(
118
+ mcp.to_obj(simplified_structure=True)
119
+ )["aspect"]["json"]
120
+ else:
121
+ obj = mcp.aspect.to_obj()
122
+ content_type = obj.get("contentType")
123
+ if obj.get("value") and content_type == JSON_CONTENT_TYPE:
124
+ # Undo double serialization.
125
+ obj = json.loads(obj["value"])
126
+ elif content_type == JSON_PATCH_CONTENT_TYPE:
127
+ raise NotImplementedError(
128
+ f"ChangeType {mcp.changeType} does not support patch."
129
+ )
130
+ aspect_value = pre_json_transform(obj)
131
+
132
+ payload = [
133
+ {
134
+ "urn": mcp.entityUrn,
135
+ mcp.aspectName: {
136
+ "value": aspect_value,
137
+ "systemMetadata": mcp.systemMetadata.to_obj()
138
+ if mcp.systemMetadata
139
+ else None,
140
+ },
141
+ }
142
+ ]
143
+ else:
144
+ raise ValueError(f"ChangeType {mcp.changeType} requires a value.")
145
+
146
+ return cls(method=method, url=url, payload=payload)