acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (106) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/METADATA +2391 -2392
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/RECORD +105 -88
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/entry_points.txt +2 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/dataset/dataset.py +1 -28
  7. datahub/cli/specific/dataset_cli.py +26 -10
  8. datahub/emitter/mce_builder.py +1 -3
  9. datahub/emitter/mcp_builder.py +8 -0
  10. datahub/emitter/request_helper.py +19 -14
  11. datahub/emitter/response_helper.py +25 -18
  12. datahub/emitter/rest_emitter.py +23 -7
  13. datahub/errors.py +8 -0
  14. datahub/ingestion/api/source.py +7 -2
  15. datahub/ingestion/api/source_helpers.py +14 -2
  16. datahub/ingestion/extractor/schema_util.py +1 -0
  17. datahub/ingestion/graph/client.py +26 -20
  18. datahub/ingestion/graph/filters.py +62 -17
  19. datahub/ingestion/sink/datahub_rest.py +2 -2
  20. datahub/ingestion/source/cassandra/cassandra.py +1 -10
  21. datahub/ingestion/source/common/data_platforms.py +23 -0
  22. datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
  23. datahub/ingestion/source/common/subtypes.py +17 -1
  24. datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
  25. datahub/ingestion/source/dbt/dbt_common.py +6 -4
  26. datahub/ingestion/source/dbt/dbt_core.py +4 -6
  27. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  28. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  29. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  30. datahub/ingestion/source/dremio/dremio_source.py +96 -117
  31. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  32. datahub/ingestion/source/ge_data_profiler.py +11 -1
  33. datahub/ingestion/source/hex/__init__.py +0 -0
  34. datahub/ingestion/source/hex/api.py +394 -0
  35. datahub/ingestion/source/hex/constants.py +3 -0
  36. datahub/ingestion/source/hex/hex.py +167 -0
  37. datahub/ingestion/source/hex/mapper.py +372 -0
  38. datahub/ingestion/source/hex/model.py +68 -0
  39. datahub/ingestion/source/iceberg/iceberg.py +193 -140
  40. datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
  41. datahub/ingestion/source/mlflow.py +217 -8
  42. datahub/ingestion/source/mode.py +11 -1
  43. datahub/ingestion/source/openapi.py +69 -34
  44. datahub/ingestion/source/powerbi/config.py +31 -4
  45. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  46. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
  47. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  48. datahub/ingestion/source/powerbi/powerbi.py +41 -24
  49. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -11
  50. datahub/ingestion/source/redshift/lineage_v2.py +9 -1
  51. datahub/ingestion/source/redshift/query.py +1 -1
  52. datahub/ingestion/source/s3/source.py +11 -0
  53. datahub/ingestion/source/sigma/config.py +3 -4
  54. datahub/ingestion/source/sigma/sigma.py +10 -6
  55. datahub/ingestion/source/slack/slack.py +399 -82
  56. datahub/ingestion/source/snowflake/constants.py +1 -0
  57. datahub/ingestion/source/snowflake/snowflake_config.py +14 -1
  58. datahub/ingestion/source/snowflake/snowflake_query.py +17 -0
  59. datahub/ingestion/source/snowflake/snowflake_report.py +3 -0
  60. datahub/ingestion/source/snowflake/snowflake_schema.py +29 -0
  61. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -42
  62. datahub/ingestion/source/snowflake/snowflake_utils.py +25 -1
  63. datahub/ingestion/source/sql/mssql/job_models.py +15 -1
  64. datahub/ingestion/source/sql/mssql/source.py +8 -4
  65. datahub/ingestion/source/sql/oracle.py +51 -4
  66. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  67. datahub/ingestion/source/sql/stored_procedures/base.py +242 -0
  68. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +1 -29
  69. datahub/ingestion/source/superset.py +291 -35
  70. datahub/ingestion/source/usage/usage_common.py +0 -65
  71. datahub/ingestion/source/vertexai/__init__.py +0 -0
  72. datahub/ingestion/source/vertexai/vertexai.py +1055 -0
  73. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  74. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
  75. datahub/metadata/_schema_classes.py +472 -1
  76. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  77. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  78. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  79. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  80. datahub/metadata/schema.avsc +313 -2
  81. datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
  82. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  83. datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
  84. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  85. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  86. datahub/metadata/schemas/Deprecation.avsc +2 -0
  87. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  88. datahub/metadata/schemas/MetadataChangeEvent.avsc +32 -0
  89. datahub/metadata/schemas/QueryProperties.avsc +20 -0
  90. datahub/metadata/schemas/Siblings.avsc +2 -0
  91. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  92. datahub/sdk/__init__.py +1 -0
  93. datahub/sdk/dataset.py +122 -0
  94. datahub/sdk/entity.py +99 -3
  95. datahub/sdk/entity_client.py +27 -3
  96. datahub/sdk/main_client.py +24 -1
  97. datahub/sdk/search_client.py +81 -8
  98. datahub/sdk/search_filters.py +94 -37
  99. datahub/sql_parsing/split_statements.py +17 -3
  100. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  101. datahub/sql_parsing/tool_meta_extractor.py +27 -2
  102. datahub/testing/mcp_diff.py +1 -18
  103. datahub/utilities/threaded_iterator_executor.py +16 -3
  104. datahub/ingestion/source/vertexai.py +0 -697
  105. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info/licenses}/LICENSE +0 -0
  106. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.0.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,242 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Callable, Dict, Iterable, Optional
4
+
5
+ from datahub.emitter.mce_builder import (
6
+ DEFAULT_ENV,
7
+ datahub_guid,
8
+ make_data_flow_urn,
9
+ make_data_job_urn,
10
+ make_data_platform_urn,
11
+ make_dataplatform_instance_urn,
12
+ )
13
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
+ from datahub.emitter.mcp_builder import DatabaseKey, SchemaKey
15
+ from datahub.ingestion.api.source_helpers import auto_workunit
16
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
17
+ from datahub.ingestion.source.common.subtypes import (
18
+ FlowContainerSubTypes,
19
+ JobContainerSubTypes,
20
+ )
21
+ from datahub.ingestion.source.sql.stored_procedures.lineage import parse_procedure_code
22
+ from datahub.metadata.schema_classes import (
23
+ ContainerClass,
24
+ DataFlowInfoClass,
25
+ DataJobInfoClass,
26
+ DataPlatformInstanceClass,
27
+ DataTransformClass,
28
+ DataTransformLogicClass,
29
+ QueryStatementClass,
30
+ SubTypesClass,
31
+ )
32
+ from datahub.sql_parsing.schema_resolver import SchemaResolver
33
+
34
+
35
+ @dataclass
36
+ class BaseProcedure:
37
+ name: str
38
+ procedure_definition: Optional[str]
39
+ created: Optional[datetime]
40
+ last_altered: Optional[datetime]
41
+ comment: Optional[str]
42
+ argument_signature: Optional[str]
43
+ return_type: Optional[str]
44
+ language: str
45
+ extra_properties: Optional[Dict[str, str]]
46
+
47
+ def get_procedure_identifier(
48
+ self,
49
+ ) -> str:
50
+ if self.argument_signature:
51
+ argument_signature_hash = datahub_guid(
52
+ dict(argument_signature=self.argument_signature)
53
+ )
54
+ return f"{self.name}_{argument_signature_hash}"
55
+
56
+ return self.name
57
+
58
+ def to_urn(self, database_key: DatabaseKey, schema_key: Optional[SchemaKey]) -> str:
59
+ return make_data_job_urn(
60
+ orchestrator=database_key.platform,
61
+ flow_id=_get_procedure_flow_name(database_key, schema_key),
62
+ job_id=self.get_procedure_identifier(),
63
+ cluster=database_key.env or DEFAULT_ENV,
64
+ platform_instance=database_key.instance,
65
+ )
66
+
67
+
68
+ def _generate_flow_workunits(
69
+ database_key: DatabaseKey, schema_key: Optional[SchemaKey]
70
+ ) -> Iterable[MetadataWorkUnit]:
71
+ """Generate flow workunits for database and schema"""
72
+
73
+ procedure_flow_name = _get_procedure_flow_name(database_key, schema_key)
74
+
75
+ flow_urn = make_data_flow_urn(
76
+ orchestrator=database_key.platform,
77
+ flow_id=procedure_flow_name,
78
+ cluster=database_key.env or DEFAULT_ENV,
79
+ platform_instance=database_key.instance,
80
+ )
81
+
82
+ yield MetadataChangeProposalWrapper(
83
+ entityUrn=flow_urn,
84
+ aspect=DataFlowInfoClass(
85
+ name=procedure_flow_name,
86
+ ),
87
+ ).as_workunit()
88
+
89
+ yield MetadataChangeProposalWrapper(
90
+ entityUrn=flow_urn,
91
+ aspect=SubTypesClass(
92
+ typeNames=[FlowContainerSubTypes.MSSQL_PROCEDURE_CONTAINER],
93
+ ),
94
+ ).as_workunit()
95
+
96
+ if database_key.instance:
97
+ yield MetadataChangeProposalWrapper(
98
+ entityUrn=flow_urn,
99
+ aspect=DataPlatformInstanceClass(
100
+ platform=make_data_platform_urn(database_key.platform),
101
+ instance=make_dataplatform_instance_urn(
102
+ platform=database_key.platform,
103
+ instance=database_key.instance,
104
+ ),
105
+ ),
106
+ ).as_workunit()
107
+
108
+ yield MetadataChangeProposalWrapper(
109
+ entityUrn=flow_urn,
110
+ aspect=ContainerClass(container=database_key.as_urn()),
111
+ ).as_workunit()
112
+
113
+
114
+ def _get_procedure_flow_name(
115
+ database_key: DatabaseKey, schema_key: Optional[SchemaKey]
116
+ ) -> str:
117
+ if schema_key:
118
+ procedure_flow_name = (
119
+ f"{schema_key.database}.{schema_key.db_schema}.stored_procedures"
120
+ )
121
+ else:
122
+ procedure_flow_name = f"{database_key.database}.stored_procedures"
123
+ return procedure_flow_name
124
+
125
+
126
+ def _generate_job_workunits(
127
+ database_key: DatabaseKey,
128
+ schema_key: Optional[SchemaKey],
129
+ procedure: BaseProcedure,
130
+ ) -> Iterable[MetadataWorkUnit]:
131
+ """Generate job workunits for database, schema and procedure"""
132
+
133
+ job_urn = procedure.to_urn(database_key, schema_key)
134
+
135
+ yield MetadataChangeProposalWrapper(
136
+ entityUrn=job_urn,
137
+ aspect=DataJobInfoClass(
138
+ name=procedure.name,
139
+ type=JobContainerSubTypes.STORED_PROCEDURE,
140
+ description=procedure.comment,
141
+ customProperties=procedure.extra_properties,
142
+ ),
143
+ ).as_workunit()
144
+
145
+ yield MetadataChangeProposalWrapper(
146
+ entityUrn=job_urn,
147
+ aspect=SubTypesClass(
148
+ typeNames=[JobContainerSubTypes.STORED_PROCEDURE],
149
+ ),
150
+ ).as_workunit()
151
+
152
+ if database_key.instance:
153
+ yield MetadataChangeProposalWrapper(
154
+ entityUrn=job_urn,
155
+ aspect=DataPlatformInstanceClass(
156
+ platform=make_data_platform_urn(database_key.platform),
157
+ instance=make_dataplatform_instance_urn(
158
+ platform=database_key.platform,
159
+ instance=database_key.instance,
160
+ ),
161
+ ),
162
+ ).as_workunit()
163
+
164
+ container_key = schema_key or database_key # database_key for 2-tier
165
+ yield MetadataChangeProposalWrapper(
166
+ entityUrn=job_urn,
167
+ aspect=ContainerClass(container=container_key.as_urn()),
168
+ ).as_workunit()
169
+
170
+ # TODO: Config whether to ingest procedure code
171
+ if procedure.procedure_definition:
172
+ yield MetadataChangeProposalWrapper(
173
+ entityUrn=job_urn,
174
+ aspect=DataTransformLogicClass(
175
+ transforms=[
176
+ DataTransformClass(
177
+ queryStatement=QueryStatementClass(
178
+ value=procedure.procedure_definition,
179
+ language=procedure.language,
180
+ ),
181
+ )
182
+ ]
183
+ ),
184
+ ).as_workunit()
185
+
186
+
187
+ def generate_procedure_lineage(
188
+ *,
189
+ schema_resolver: SchemaResolver,
190
+ procedure: BaseProcedure,
191
+ procedure_job_urn: str,
192
+ default_db: Optional[str] = None,
193
+ default_schema: Optional[str] = None,
194
+ is_temp_table: Callable[[str], bool] = lambda _: False,
195
+ raise_: bool = False,
196
+ ) -> Iterable[MetadataChangeProposalWrapper]:
197
+ if procedure.procedure_definition and procedure.language == "SQL":
198
+ datajob_input_output = parse_procedure_code(
199
+ schema_resolver=schema_resolver,
200
+ default_db=default_db,
201
+ default_schema=default_schema,
202
+ code=procedure.procedure_definition,
203
+ is_temp_table=is_temp_table,
204
+ raise_=raise_,
205
+ )
206
+
207
+ if datajob_input_output:
208
+ yield MetadataChangeProposalWrapper(
209
+ entityUrn=procedure_job_urn,
210
+ aspect=datajob_input_output,
211
+ )
212
+
213
+
214
+ def generate_procedure_container_workunits(
215
+ database_key: DatabaseKey,
216
+ schema_key: Optional[SchemaKey],
217
+ ) -> Iterable[MetadataWorkUnit]:
218
+ """Generate container workunits for database and schema"""
219
+
220
+ yield from _generate_flow_workunits(database_key, schema_key)
221
+
222
+
223
+ def generate_procedure_workunits(
224
+ procedure: BaseProcedure,
225
+ database_key: DatabaseKey,
226
+ schema_key: Optional[SchemaKey],
227
+ schema_resolver: Optional[SchemaResolver],
228
+ ) -> Iterable[MetadataWorkUnit]:
229
+ yield from _generate_job_workunits(database_key, schema_key, procedure)
230
+
231
+ if schema_resolver:
232
+ job_urn = procedure.to_urn(database_key, schema_key)
233
+
234
+ yield from auto_workunit(
235
+ generate_procedure_lineage(
236
+ schema_resolver=schema_resolver,
237
+ procedure=procedure,
238
+ procedure_job_urn=job_urn,
239
+ default_db=database_key.database,
240
+ default_schema=schema_key.db_schema if schema_key else None,
241
+ )
242
+ )
@@ -1,8 +1,6 @@
1
1
  import logging
2
- from typing import Callable, Iterable, Optional
2
+ from typing import Callable, Optional
3
3
 
4
- from datahub.emitter.mcp import MetadataChangeProposalWrapper
5
- from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure
6
4
  from datahub.metadata.schema_classes import DataJobInputOutputClass
7
5
  from datahub.sql_parsing.datajob import to_datajob_input_output
8
6
  from datahub.sql_parsing.schema_resolver import SchemaResolver
@@ -56,29 +54,3 @@ def parse_procedure_code(
56
54
  mcps=mcps,
57
55
  ignore_extra_mcps=True,
58
56
  )
59
-
60
-
61
- # Is procedure handling generic enough to be added to SqlParsingAggregator?
62
- def generate_procedure_lineage(
63
- *,
64
- schema_resolver: SchemaResolver,
65
- procedure: StoredProcedure,
66
- procedure_job_urn: str,
67
- is_temp_table: Callable[[str], bool] = lambda _: False,
68
- raise_: bool = False,
69
- ) -> Iterable[MetadataChangeProposalWrapper]:
70
- if procedure.code:
71
- datajob_input_output = parse_procedure_code(
72
- schema_resolver=schema_resolver,
73
- default_db=procedure.db,
74
- default_schema=procedure.schema,
75
- code=procedure.code,
76
- is_temp_table=is_temp_table,
77
- raise_=raise_,
78
- )
79
-
80
- if datajob_input_output:
81
- yield MetadataChangeProposalWrapper(
82
- entityUrn=procedure_job_urn,
83
- aspect=datajob_input_output,
84
- )