acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (159) hide show
  1. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
  2. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
  3. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  6. datahub/api/entities/datacontract/datacontract.py +35 -3
  7. datahub/api/entities/datajob/dataflow.py +3 -3
  8. datahub/api/entities/datajob/datajob.py +7 -4
  9. datahub/api/entities/dataset/dataset.py +9 -11
  10. datahub/api/entities/forms/forms.py +34 -34
  11. datahub/api/graphql/assertion.py +1 -1
  12. datahub/api/graphql/operation.py +4 -4
  13. datahub/cli/check_cli.py +3 -2
  14. datahub/cli/config_utils.py +2 -2
  15. datahub/cli/delete_cli.py +6 -5
  16. datahub/cli/docker_cli.py +2 -2
  17. datahub/cli/exists_cli.py +2 -1
  18. datahub/cli/get_cli.py +2 -1
  19. datahub/cli/iceberg_cli.py +6 -5
  20. datahub/cli/ingest_cli.py +9 -6
  21. datahub/cli/migrate.py +4 -3
  22. datahub/cli/migration_utils.py +4 -3
  23. datahub/cli/put_cli.py +3 -2
  24. datahub/cli/specific/assertions_cli.py +2 -1
  25. datahub/cli/specific/datacontract_cli.py +3 -2
  26. datahub/cli/specific/dataproduct_cli.py +10 -9
  27. datahub/cli/specific/dataset_cli.py +4 -3
  28. datahub/cli/specific/forms_cli.py +2 -1
  29. datahub/cli/specific/group_cli.py +2 -1
  30. datahub/cli/specific/structuredproperties_cli.py +4 -3
  31. datahub/cli/specific/user_cli.py +2 -1
  32. datahub/cli/state_cli.py +2 -1
  33. datahub/cli/timeline_cli.py +2 -1
  34. datahub/configuration/common.py +5 -0
  35. datahub/configuration/source_common.py +1 -1
  36. datahub/emitter/mcp.py +20 -5
  37. datahub/emitter/request_helper.py +116 -3
  38. datahub/emitter/rest_emitter.py +163 -93
  39. datahub/entrypoints.py +2 -1
  40. datahub/errors.py +4 -0
  41. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  42. datahub/ingestion/api/source.py +2 -5
  43. datahub/ingestion/api/source_helpers.py +1 -0
  44. datahub/ingestion/glossary/classification_mixin.py +4 -2
  45. datahub/ingestion/graph/client.py +33 -8
  46. datahub/ingestion/graph/config.py +14 -0
  47. datahub/ingestion/graph/filters.py +1 -1
  48. datahub/ingestion/graph/links.py +53 -0
  49. datahub/ingestion/run/pipeline.py +9 -6
  50. datahub/ingestion/run/pipeline_config.py +1 -1
  51. datahub/ingestion/sink/datahub_rest.py +5 -6
  52. datahub/ingestion/source/apply/datahub_apply.py +2 -1
  53. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  54. datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
  55. datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
  56. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
  57. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
  58. datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
  59. datahub/ingestion/source/common/subtypes.py +3 -0
  60. datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
  61. datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
  62. datahub/ingestion/source/dbt/dbt_common.py +10 -2
  63. datahub/ingestion/source/dbt/dbt_core.py +82 -42
  64. datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
  65. datahub/ingestion/source/feast.py +4 -4
  66. datahub/ingestion/source/fivetran/config.py +1 -1
  67. datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
  68. datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
  69. datahub/ingestion/source/ge_data_profiler.py +27 -1
  70. datahub/ingestion/source/hex/api.py +1 -20
  71. datahub/ingestion/source/hex/query_fetcher.py +4 -1
  72. datahub/ingestion/source/iceberg/iceberg.py +20 -4
  73. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  74. datahub/ingestion/source/ldap.py +1 -1
  75. datahub/ingestion/source/looker/looker_common.py +17 -2
  76. datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
  77. datahub/ingestion/source/looker/looker_source.py +34 -5
  78. datahub/ingestion/source/looker/lookml_source.py +7 -1
  79. datahub/ingestion/source/metadata/lineage.py +2 -1
  80. datahub/ingestion/source/mlflow.py +19 -6
  81. datahub/ingestion/source/mode.py +74 -28
  82. datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
  83. datahub/ingestion/source/powerbi/config.py +13 -1
  84. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  85. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  86. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
  87. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  88. datahub/ingestion/source/redshift/usage.py +10 -9
  89. datahub/ingestion/source/sigma/config.py +74 -6
  90. datahub/ingestion/source/sigma/sigma.py +16 -1
  91. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  92. datahub/ingestion/source/slack/slack.py +4 -52
  93. datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
  94. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
  95. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  96. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  97. datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
  98. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  99. datahub/ingestion/source/sql/athena.py +2 -1
  100. datahub/ingestion/source/sql/clickhouse.py +5 -1
  101. datahub/ingestion/source/sql/druid.py +7 -2
  102. datahub/ingestion/source/sql/hive.py +7 -2
  103. datahub/ingestion/source/sql/hive_metastore.py +5 -5
  104. datahub/ingestion/source/sql/mssql/source.py +1 -1
  105. datahub/ingestion/source/sql/oracle.py +6 -2
  106. datahub/ingestion/source/sql/sql_config.py +1 -34
  107. datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
  108. datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
  109. datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
  110. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  111. datahub/ingestion/source/tableau/tableau.py +31 -6
  112. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  113. datahub/ingestion/source/unity/config.py +2 -1
  114. datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
  115. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
  116. datahub/ingestion/source/vertexai/vertexai.py +316 -4
  117. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
  118. datahub/integrations/assertion/common.py +3 -2
  119. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
  120. datahub/metadata/_urns/urn_defs.py +1819 -1763
  121. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  122. datahub/metadata/schema.avsc +17296 -16883
  123. datahub/metadata/schema_classes.py +3 -3
  124. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  125. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  126. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  127. datahub/metadata/schemas/FormInfo.avsc +5 -0
  128. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  129. datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  130. datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
  131. datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  132. datahub/metadata/schemas/QueryProperties.avsc +4 -2
  133. datahub/metadata/schemas/SystemMetadata.avsc +86 -0
  134. datahub/metadata/schemas/__init__.py +3 -3
  135. datahub/sdk/_all_entities.py +4 -0
  136. datahub/sdk/_shared.py +142 -4
  137. datahub/sdk/_utils.py +4 -0
  138. datahub/sdk/dataset.py +2 -2
  139. datahub/sdk/entity_client.py +8 -0
  140. datahub/sdk/lineage_client.py +235 -0
  141. datahub/sdk/main_client.py +6 -3
  142. datahub/sdk/mlmodel.py +301 -0
  143. datahub/sdk/mlmodelgroup.py +233 -0
  144. datahub/secret/datahub_secret_store.py +2 -1
  145. datahub/specific/dataset.py +12 -0
  146. datahub/sql_parsing/fingerprint_utils.py +6 -0
  147. datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
  148. datahub/sql_parsing/sqlglot_utils.py +18 -14
  149. datahub/telemetry/telemetry.py +2 -2
  150. datahub/testing/check_imports.py +1 -1
  151. datahub/testing/mcp_diff.py +15 -2
  152. datahub/upgrade/upgrade.py +10 -12
  153. datahub/utilities/logging_manager.py +8 -1
  154. datahub/utilities/server_config_util.py +350 -10
  155. datahub/utilities/sqlalchemy_query_combiner.py +4 -5
  156. datahub/utilities/urn_encoder.py +1 -1
  157. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
  158. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
  159. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.2rc4"
3
+ __version__ = "1.0.0.3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -55,9 +55,9 @@ class OperationCircuitBreaker(AbstractCircuitBreaker):
55
55
  which is set as Airflow connection.
56
56
  :param partition: The partition to check the operation.
57
57
  :param source_type: The source type to filter on. If not set it will accept any source type.
58
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
58
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
59
59
  :param operation_type: The operation type to filter on. If not set it will accept any source type.
60
- See valid types here: https://datahubproject.io/docs/graphql/enums/#operationtype
60
+ See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
61
61
  """
62
62
 
63
63
  start_time_millis: int = int(
@@ -1,5 +1,5 @@
1
1
  import collections
2
- from typing import Iterable, List, Optional, Tuple
2
+ from typing import Dict, Iterable, List, Optional, Tuple, Union
3
3
 
4
4
  from ruamel.yaml import YAML
5
5
  from typing_extensions import Literal
@@ -25,6 +25,8 @@ from datahub.metadata.schema_classes import (
25
25
  FreshnessContractClass,
26
26
  SchemaContractClass,
27
27
  StatusClass,
28
+ StructuredPropertiesClass,
29
+ StructuredPropertyValueAssignmentClass,
28
30
  )
29
31
  from datahub.utilities.urns.urn import guess_entity_type
30
32
 
@@ -47,8 +49,12 @@ class DataContract(v1_ConfigModel):
47
49
  entity: str = v1_Field(
48
50
  description="The entity urn that the Data Contract is associated with"
49
51
  )
50
- # TODO: add support for properties
51
- # properties: Optional[Dict[str, str]] = None
52
+ properties: Optional[Dict[str, Union[str, float, List[Union[str, float]]]]] = (
53
+ v1_Field(
54
+ default=None,
55
+ description="Structured properties associated with the data contract.",
56
+ )
57
+ )
52
58
 
53
59
  schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
54
60
 
@@ -172,6 +178,30 @@ class DataContract(v1_ConfigModel):
172
178
  )
173
179
  yield from dq_assertion_mcps
174
180
 
181
+ # Construct the structured properties aspect if properties are defined
182
+ structured_properties_aspect: Optional[StructuredPropertiesClass] = None
183
+ if self.properties:
184
+ property_assignments: List[StructuredPropertyValueAssignmentClass] = []
185
+ for key, value in self.properties.items():
186
+ # Use f-string formatting for the property URN, like in dataset.py
187
+ prop_urn = f"urn:li:structuredProperty:{key}"
188
+ # Ensure value is a list for StructuredPropertyValueAssignmentClass
189
+ values_list = value if isinstance(value, list) else [value]
190
+ property_assignments.append(
191
+ StructuredPropertyValueAssignmentClass(
192
+ propertyUrn=prop_urn,
193
+ values=[
194
+ str(v) for v in values_list
195
+ ], # Ensure all values are strings
196
+ )
197
+ )
198
+ if (
199
+ property_assignments
200
+ ): # Only create aspect if there are valid assignments
201
+ structured_properties_aspect = StructuredPropertiesClass(
202
+ properties=property_assignments
203
+ )
204
+
175
205
  # Now that we've generated the assertions, we can generate
176
206
  # the actual data contract.
177
207
  yield from MetadataChangeProposalWrapper.construct_many(
@@ -202,6 +232,8 @@ class DataContract(v1_ConfigModel):
202
232
  if True
203
233
  else None
204
234
  ),
235
+ # Add structured properties aspect if defined
236
+ structured_properties_aspect,
205
237
  ],
206
238
  )
207
239
 
@@ -30,7 +30,7 @@ class DataFlow:
30
30
  """The DataHub representation of data-flow.
31
31
 
32
32
  Args:
33
- urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://datahubproject.io/docs/what/urn/.
33
+ urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://docs.datahub.com/docs/what/urn/.
34
34
  id (str): Identifier of DataFlow in orchestrator.
35
35
  orchestrator (str): orchestrator. for example airflow.
36
36
  cluster (Optional[str]): [deprecated] Please use env.
@@ -40,8 +40,8 @@ class DataFlow:
40
40
  url (Optional[str]): URL pointing to DataFlow.
41
41
  tags (Set[str]): tags that need to be apply on DataFlow.
42
42
  owners (Set[str]): owners that need to be apply on DataFlow.
43
- platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://datahubproject.io/docs/platform-instances/.
44
- env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://datahubproject.io/docs/graphql/enums/#fabrictype.
43
+ platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://docs.datahub.com/docs/platform-instances/.
44
+ env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://docs.datahub.com/docs/graphql/enums/#fabrictype.
45
45
  """
46
46
 
47
47
  urn: DataFlowUrn = field(init=False)
@@ -108,7 +108,9 @@ class DataJob:
108
108
  return [tags]
109
109
 
110
110
  def generate_mcp(
111
- self, materialize_iolets: bool = True
111
+ self,
112
+ generate_lineage: bool = True,
113
+ materialize_iolets: bool = True,
112
114
  ) -> Iterable[MetadataChangeProposalWrapper]:
113
115
  env: Optional[str] = None
114
116
  if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
@@ -152,9 +154,10 @@ class DataJob:
152
154
  )
153
155
  yield mcp
154
156
 
155
- yield from self.generate_data_input_output_mcp(
156
- materialize_iolets=materialize_iolets
157
- )
157
+ if generate_lineage:
158
+ yield from self.generate_data_input_output_mcp(
159
+ materialize_iolets=materialize_iolets
160
+ )
158
161
 
159
162
  for owner in self.generate_ownership_aspect():
160
163
  mcp = MetadataChangeProposalWrapper(
@@ -509,16 +509,14 @@ class Dataset(StrictModel):
509
509
  def generate_mcp(
510
510
  self,
511
511
  ) -> Iterable[Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper]]:
512
- mcp = MetadataChangeProposalWrapper(
513
- entityUrn=self.urn,
514
- aspect=DatasetPropertiesClass(
515
- description=self.description,
516
- name=self.name,
517
- customProperties=self.properties,
518
- externalUrl=self.external_url,
519
- ),
520
- )
521
- yield mcp
512
+ patch_builder = self.patch_builder()
513
+
514
+ patch_builder.set_custom_properties(self.properties or {})
515
+ patch_builder.set_description(self.description)
516
+ patch_builder.set_display_name(self.name)
517
+ patch_builder.set_external_url(self.external_url)
518
+
519
+ yield from patch_builder.build()
522
520
 
523
521
  if self.schema_metadata:
524
522
  schema_fields = set()
@@ -981,7 +979,7 @@ class Dataset(StrictModel):
981
979
 
982
980
  def model_dump(self, **kwargs):
983
981
  """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
984
- exclude = kwargs.pop("exclude", set())
982
+ exclude = kwargs.pop("exclude", None) or set()
985
983
 
986
984
  # If id and name are identical, exclude name from the output
987
985
  if self.id == self.name and self.id is not None:
@@ -26,6 +26,7 @@ from datahub.emitter.mce_builder import (
26
26
  )
27
27
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
28
28
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
29
+ from datahub.ingestion.graph.config import ClientMode
29
30
  from datahub.metadata.schema_classes import (
30
31
  FormActorAssignmentClass,
31
32
  FormInfoClass,
@@ -133,47 +134,46 @@ class Forms(ConfigModel):
133
134
  def create(file: str) -> None:
134
135
  emitter: DataHubGraph
135
136
 
136
- with get_default_graph() as emitter:
137
- with open(file) as fp:
138
- forms: List[dict] = yaml.safe_load(fp)
139
- for form_raw in forms:
140
- form = Forms.parse_obj(form_raw)
137
+ with get_default_graph(ClientMode.CLI) as emitter, open(file) as fp:
138
+ forms: List[dict] = yaml.safe_load(fp)
139
+ for form_raw in forms:
140
+ form = Forms.parse_obj(form_raw)
141
141
 
142
- try:
143
- if not FormType.has_value(form.type):
144
- logger.error(
145
- f"Form type {form.type} does not exist. Please try again with a valid type."
146
- )
147
-
148
- mcp = MetadataChangeProposalWrapper(
149
- entityUrn=form.urn,
150
- aspect=FormInfoClass(
151
- name=form.name,
152
- description=form.description,
153
- prompts=form.validate_prompts(emitter),
154
- type=form.type,
155
- actors=form.create_form_actors(form.actors),
156
- ),
142
+ try:
143
+ if not FormType.has_value(form.type):
144
+ logger.error(
145
+ f"Form type {form.type} does not exist. Please try again with a valid type."
157
146
  )
158
- emitter.emit_mcp(mcp)
159
147
 
160
- logger.info(f"Created form {form.urn}")
148
+ mcp = MetadataChangeProposalWrapper(
149
+ entityUrn=form.urn,
150
+ aspect=FormInfoClass(
151
+ name=form.name,
152
+ description=form.description,
153
+ prompts=form.validate_prompts(emitter),
154
+ type=form.type,
155
+ actors=form.create_form_actors(form.actors),
156
+ ),
157
+ )
158
+ emitter.emit_mcp(mcp)
159
+
160
+ logger.info(f"Created form {form.urn}")
161
161
 
162
- if form.owners or form.group_owners:
163
- form.add_owners(emitter)
162
+ if form.owners or form.group_owners:
163
+ form.add_owners(emitter)
164
164
 
165
- if form.entities:
166
- if form.entities.urns:
167
- # Associate specific entities with a form
168
- form.upload_entities_for_form(emitter)
165
+ if form.entities:
166
+ if form.entities.urns:
167
+ # Associate specific entities with a form
168
+ form.upload_entities_for_form(emitter)
169
169
 
170
- if form.entities.filters:
171
- # Associate groups of entities with a form based on filters
172
- form.create_form_filters(emitter)
170
+ if form.entities.filters:
171
+ # Associate groups of entities with a form based on filters
172
+ form.create_form_filters(emitter)
173
173
 
174
- except Exception as e:
175
- logger.error(e)
176
- return
174
+ except Exception as e:
175
+ logger.error(e)
176
+ return
177
177
 
178
178
  def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]:
179
179
  prompts = []
@@ -65,7 +65,7 @@ query dataset($urn: String!, $start: Int, $count: Int, $status: AssertionRunStat
65
65
 
66
66
  :param urn: The DataHub dataset unique identifier.
67
67
  :param status: The assertion status to filter for. Every status will be accepted if it is not set.
68
- See valid status at https://datahubproject.io/docs/graphql/enums#assertionrunstatus
68
+ See valid status at https://docs.datahub.com/docs/graphql/enums#assertionrunstatus
69
69
  :param start_time_millis: The start time in milliseconds from the assertions will be queried.
70
70
  :param end_time_millis: The end time in milliseconds until the assertions will be queried.
71
71
  :param filter: Additional key value filters which will be applied as AND query
@@ -55,10 +55,10 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
55
55
  Report operation metadata for a dataset.
56
56
  :param source_type: The source type to filter on. If not set it will accept any source type.
57
57
  Default value: DATA_PROCESS
58
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
58
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
59
59
  :param operation_type: The operation type to filter on. If not set it will accept any source type.
60
60
  Default value: "UPDATE"
61
- See valid types here: https://datahubproject.io/docs/graphql/enums/#operationtype
61
+ See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
62
62
  :param partition: The partition to set the operation.
63
63
  :param num_affected_rows: The number of rows affected by this operation.
64
64
  :param custom_properties: Key/value pair of custom propertis
@@ -103,9 +103,9 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
103
103
  :param end_time_millis: The end time in milliseconds until the operations will be queried.
104
104
  :param limit: The maximum number of items to return.
105
105
  :param source_type: The source type to filter on. If not set it will accept any source type.
106
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
106
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
107
107
  :param operation_type: The operation type to filter on. If not set it will accept any source type.
108
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
108
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
109
109
  :param partition: The partition to check the operation.
110
110
  """
111
111
 
datahub/cli/check_cli.py CHANGED
@@ -16,6 +16,7 @@ from datahub.configuration import config_loader
16
16
  from datahub.configuration.common import AllowDenyPattern
17
17
  from datahub.emitter.mce_builder import DEFAULT_ENV
18
18
  from datahub.ingestion.graph.client import get_default_graph
19
+ from datahub.ingestion.graph.config import ClientMode
19
20
  from datahub.ingestion.run.pipeline import Pipeline
20
21
  from datahub.ingestion.sink.sink_registry import sink_registry
21
22
  from datahub.ingestion.source.source_registry import source_registry
@@ -259,7 +260,7 @@ def sql_lineage(
259
260
 
260
261
  graph = None
261
262
  if online:
262
- graph = get_default_graph()
263
+ graph = get_default_graph(ClientMode.CLI)
263
264
 
264
265
  lineage = create_lineage_sql_parsed_result(
265
266
  sql,
@@ -472,7 +473,7 @@ WHERE
472
473
  @check.command()
473
474
  def server_config() -> None:
474
475
  """Print the server config."""
475
- graph = get_default_graph()
476
+ graph = get_default_graph(ClientMode.CLI)
476
477
 
477
478
  server_config = graph.get_server_config()
478
479
 
@@ -17,8 +17,8 @@ from datahub.ingestion.graph.config import DatahubClientConfig
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
19
  CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
20
- DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
21
- DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
20
+ DATAHUB_CONFIG_PATH: str = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
21
+ DATAHUB_ROOT_FOLDER: str = os.path.expanduser("~/.datahub")
22
22
  ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
23
23
 
24
24
  ENV_DATAHUB_SYSTEM_CLIENT_ID = "DATAHUB_SYSTEM_CLIENT_ID"
datahub/cli/delete_cli.py CHANGED
@@ -15,6 +15,7 @@ from datahub.cli import cli_utils
15
15
  from datahub.configuration.datetimes import ClickDatetime
16
16
  from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
17
17
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
18
+ from datahub.ingestion.graph.config import ClientMode
18
19
  from datahub.ingestion.graph.filters import RemovedStatusFilter
19
20
  from datahub.telemetry import telemetry
20
21
  from datahub.upgrade import upgrade
@@ -48,7 +49,7 @@ def delete() -> None:
48
49
 
49
50
  See `datahub delete by-filter` for the list of available filters.
50
51
 
51
- See https://datahubproject.io/docs/how/delete-metadata for more detailed docs.
52
+ See https://docs.datahub.com/docs/how/delete-metadata for more detailed docs.
52
53
  """
53
54
  pass
54
55
 
@@ -124,7 +125,7 @@ def by_registry(
124
125
  Delete all metadata written using the given registry id and version pair.
125
126
  """
126
127
 
127
- client = get_default_graph()
128
+ client = get_default_graph(ClientMode.CLI)
128
129
 
129
130
  if soft and not dry_run:
130
131
  raise click.UsageError(
@@ -175,7 +176,7 @@ def references(urn: str, dry_run: bool, force: bool) -> None:
175
176
  Delete all references to an entity (but not the entity itself).
176
177
  """
177
178
 
178
- graph = get_default_graph()
179
+ graph = get_default_graph(ClientMode.CLI)
179
180
  logger.info(f"Using graph: {graph}")
180
181
 
181
182
  references_count, related_aspects = graph.delete_references_to_urn(
@@ -238,7 +239,7 @@ def undo_by_filter(
238
239
  """
239
240
  Undo soft deletion by filters
240
241
  """
241
- graph = get_default_graph()
242
+ graph = get_default_graph(ClientMode.CLI)
242
243
  logger.info(f"Using {graph}")
243
244
  if urn:
244
245
  graph.set_soft_delete_status(urn=urn, delete=False)
@@ -410,7 +411,7 @@ def by_filter(
410
411
  abort=True,
411
412
  )
412
413
 
413
- graph = get_default_graph()
414
+ graph = get_default_graph(ClientMode.CLI)
414
415
  logger.info(f"Using {graph}")
415
416
 
416
417
  # Determine which urns to delete.
datahub/cli/docker_cli.py CHANGED
@@ -811,7 +811,7 @@ def quickstart(
811
811
  raise status.to_exception(
812
812
  header="Unable to run quickstart - the following issues were detected:",
813
813
  footer="If you think something went wrong, please file an issue at https://github.com/datahub-project/datahub/issues\n"
814
- "or send a message in our Slack https://slack.datahubproject.io/\n"
814
+ "or send a message in our Slack https://datahub.com/slack/\n"
815
815
  f"Be sure to attach the logs from {log_file.name}",
816
816
  )
817
817
 
@@ -824,7 +824,7 @@ def quickstart(
824
824
  fg="green",
825
825
  )
826
826
  click.secho(
827
- "Need support? Get in touch on Slack: https://slack.datahubproject.io/",
827
+ "Need support? Get in touch on Slack: https://datahub.com/slack/",
828
828
  fg="magenta",
829
829
  )
830
830
 
datahub/cli/exists_cli.py CHANGED
@@ -6,6 +6,7 @@ import click
6
6
  from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.ingestion.graph.client import get_default_graph
9
+ from datahub.ingestion.graph.config import ClientMode
9
10
  from datahub.telemetry import telemetry
10
11
  from datahub.upgrade import upgrade
11
12
 
@@ -36,4 +37,4 @@ def urn(ctx: Any, urn: Optional[str]) -> None:
36
37
  raise click.UsageError("Nothing for me to get. Maybe provide an urn?")
37
38
  urn = ctx.args[0]
38
39
  logger.debug(f"Using urn from args {urn}")
39
- click.echo(json.dumps(get_default_graph().exists(urn)))
40
+ click.echo(json.dumps(get_default_graph(ClientMode.CLI).exists(urn)))
datahub/cli/get_cli.py CHANGED
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.cli.cli_utils import get_aspects_for_entity
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
+ from datahub.ingestion.graph.config import ClientMode
10
11
  from datahub.telemetry import telemetry
11
12
  from datahub.upgrade import upgrade
12
13
 
@@ -46,7 +47,7 @@ def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
46
47
  urn = ctx.args[0]
47
48
  logger.debug(f"Using urn from args {urn}")
48
49
 
49
- client = get_default_graph()
50
+ client = get_default_graph(ClientMode.CLI)
50
51
 
51
52
  if aspect:
52
53
  # If aspects are specified, we need to do the existence check first.
@@ -13,6 +13,7 @@ import datahub.metadata.schema_classes
13
13
  from datahub.cli.cli_utils import post_entity
14
14
  from datahub.configuration.common import GraphError
15
15
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
16
+ from datahub.ingestion.graph.config import ClientMode
16
17
  from datahub.metadata.schema_classes import SystemMetadataClass
17
18
  from datahub.telemetry import telemetry
18
19
 
@@ -178,7 +179,7 @@ def create(
178
179
  Create an iceberg warehouse.
179
180
  """
180
181
 
181
- client = get_default_graph()
182
+ client = get_default_graph(ClientMode.CLI)
182
183
 
183
184
  urn = iceberg_data_platform_instance_urn(warehouse)
184
185
 
@@ -331,7 +332,7 @@ def update(
331
332
  Update iceberg warehouses. Can only update credentials, and role. Cannot update region
332
333
  """
333
334
 
334
- client = get_default_graph()
335
+ client = get_default_graph(ClientMode.CLI)
335
336
 
336
337
  urn = iceberg_data_platform_instance_urn(warehouse)
337
338
 
@@ -407,7 +408,7 @@ def list() -> None:
407
408
  List iceberg warehouses
408
409
  """
409
410
 
410
- client = get_default_graph()
411
+ client = get_default_graph(ClientMode.CLI)
411
412
 
412
413
  for warehouse in get_all_warehouses(client):
413
414
  click.echo(warehouse)
@@ -420,7 +421,7 @@ def list() -> None:
420
421
  @telemetry.with_telemetry()
421
422
  def get(warehouse: str) -> None:
422
423
  """Fetches the details of the specified iceberg warehouse"""
423
- client = get_default_graph()
424
+ client = get_default_graph(ClientMode.CLI)
424
425
  urn = iceberg_data_platform_instance_urn(warehouse)
425
426
 
426
427
  if client.exists(urn):
@@ -455,7 +456,7 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
455
456
 
456
457
  urn = iceberg_data_platform_instance_urn(warehouse)
457
458
 
458
- client = get_default_graph()
459
+ client = get_default_graph(ClientMode.CLI)
459
460
 
460
461
  if not client.exists(urn):
461
462
  raise click.ClickException(f"urn {urn} not found")
datahub/cli/ingest_cli.py CHANGED
@@ -14,10 +14,11 @@ from tabulate import tabulate
14
14
 
15
15
  from datahub._version import nice_version_name
16
16
  from datahub.cli import cli_utils
17
- from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
17
+ from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH, load_client_config
18
18
  from datahub.configuration.common import GraphError
19
19
  from datahub.configuration.config_loader import load_config_file
20
20
  from datahub.ingestion.graph.client import get_default_graph
21
+ from datahub.ingestion.graph.config import ClientMode
21
22
  from datahub.ingestion.run.connection import ConnectionManager
22
23
  from datahub.ingestion.run.pipeline import Pipeline
23
24
  from datahub.telemetry import telemetry
@@ -269,7 +270,7 @@ def deploy(
269
270
  urn:li:dataHubIngestionSource:<name>
270
271
  """
271
272
 
272
- datahub_graph = get_default_graph()
273
+ datahub_graph = get_default_graph(ClientMode.CLI)
273
274
 
274
275
  variables = deploy_source_vars(
275
276
  name=name,
@@ -360,6 +361,7 @@ def mcps(path: str) -> None:
360
361
  """
361
362
 
362
363
  click.echo("Starting ingestion...")
364
+ datahub_config = load_client_config()
363
365
  recipe: dict = {
364
366
  "source": {
365
367
  "type": "file",
@@ -367,6 +369,7 @@ def mcps(path: str) -> None:
367
369
  "path": path,
368
370
  },
369
371
  },
372
+ "datahub_api": datahub_config,
370
373
  }
371
374
 
372
375
  pipeline = Pipeline.create(recipe, report_to=None)
@@ -422,7 +425,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
422
425
  }
423
426
  }
424
427
 
425
- client = get_default_graph()
428
+ client = get_default_graph(ClientMode.CLI)
426
429
  session = client._session
427
430
  gms_host = client.config.server
428
431
 
@@ -508,7 +511,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
508
511
  def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None:
509
512
  """List recent ingestion runs to datahub"""
510
513
 
511
- client = get_default_graph()
514
+ client = get_default_graph(ClientMode.CLI)
512
515
  session = client._session
513
516
  gms_host = client.config.server
514
517
 
@@ -559,7 +562,7 @@ def show(
559
562
  run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool
560
563
  ) -> None:
561
564
  """Describe a provided ingestion run to datahub"""
562
- client = get_default_graph()
565
+ client = get_default_graph(ClientMode.CLI)
563
566
  session = client._session
564
567
  gms_host = client.config.server
565
568
 
@@ -609,7 +612,7 @@ def rollback(
609
612
  run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
610
613
  ) -> None:
611
614
  """Rollback a provided ingestion run to datahub"""
612
- client = get_default_graph()
615
+ client = get_default_graph(ClientMode.CLI)
613
616
 
614
617
  if not force and not dry_run:
615
618
  click.confirm(
datahub/cli/migrate.py CHANGED
@@ -25,6 +25,7 @@ from datahub.emitter.mcp_builder import (
25
25
  )
26
26
  from datahub.emitter.rest_emitter import DatahubRestEmitter
27
27
  from datahub.ingestion.graph.client import (
28
+ ClientMode,
28
29
  DataHubGraph,
29
30
  RelatedEntity,
30
31
  get_default_graph,
@@ -147,7 +148,7 @@ def dataplatform2instance_func(
147
148
  migration_report = MigrationReport(run_id, dry_run, keep)
148
149
  system_metadata = SystemMetadataClass(runId=run_id)
149
150
 
150
- graph = get_default_graph()
151
+ graph = get_default_graph(ClientMode.CLI)
151
152
 
152
153
  urns_to_migrate: List[str] = []
153
154
 
@@ -386,7 +387,7 @@ def migrate_containers(
386
387
 
387
388
 
388
389
  def get_containers_for_migration(env: str) -> List[Any]:
389
- client = get_default_graph()
390
+ client = get_default_graph(ClientMode.CLI)
390
391
  containers_to_migrate = list(
391
392
  client.get_urns_by_filter(entity_types=["container"], env=env)
392
393
  )
@@ -445,7 +446,7 @@ def process_container_relationships(
445
446
  relationships: Iterable[RelatedEntity] = migration_utils.get_incoming_relationships(
446
447
  urn=src_urn
447
448
  )
448
- client = get_default_graph()
449
+ client = get_default_graph(ClientMode.CLI)
449
450
  for relationship in relationships:
450
451
  log.debug(f"Incoming Relationship: {relationship}")
451
452
  target_urn: str = relationship.urn
@@ -12,6 +12,7 @@ from datahub.ingestion.graph.client import (
12
12
  RelatedEntity,
13
13
  get_default_graph,
14
14
  )
15
+ from datahub.ingestion.graph.config import ClientMode
15
16
  from datahub.metadata.schema_classes import (
16
17
  ChartInfoClass,
17
18
  ContainerClass,
@@ -243,7 +244,7 @@ def clone_aspect(
243
244
  run_id: str = str(uuid.uuid4()),
244
245
  dry_run: bool = False,
245
246
  ) -> Iterable[MetadataChangeProposalWrapper]:
246
- client = get_default_graph()
247
+ client = get_default_graph(ClientMode.CLI)
247
248
  aspect_map = cli_utils.get_aspects_for_entity(
248
249
  client._session,
249
250
  client.config.server,
@@ -274,7 +275,7 @@ def clone_aspect(
274
275
 
275
276
 
276
277
  def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
277
- client = get_default_graph()
278
+ client = get_default_graph(ClientMode.CLI)
278
279
  yield from client.get_related_entities(
279
280
  entity_urn=urn,
280
281
  relationship_types=[
@@ -290,7 +291,7 @@ def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
290
291
 
291
292
 
292
293
  def get_outgoing_relationships(urn: str) -> Iterable[RelatedEntity]:
293
- client = get_default_graph()
294
+ client = get_default_graph(ClientMode.CLI)
294
295
  yield from client.get_related_entities(
295
296
  entity_urn=urn,
296
297
  relationship_types=[