acryl-datahub 1.0.0.1rc1__py3-none-any.whl → 1.0.0.1rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/METADATA +2471 -2470
- {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/RECORD +61 -46
- {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/entry_points.txt +2 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -28
- datahub/emitter/request_helper.py +19 -14
- datahub/ingestion/api/source.py +6 -2
- datahub/ingestion/api/source_helpers.py +6 -2
- datahub/ingestion/extractor/schema_util.py +1 -0
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
- datahub/ingestion/source/common/subtypes.py +15 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
- datahub/ingestion/source/dbt/dbt_common.py +6 -4
- datahub/ingestion/source/dbt/dbt_core.py +4 -6
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_source.py +96 -117
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +394 -0
- datahub/ingestion/source/hex/constants.py +3 -0
- datahub/ingestion/source/hex/hex.py +167 -0
- datahub/ingestion/source/hex/mapper.py +372 -0
- datahub/ingestion/source/hex/model.py +68 -0
- datahub/ingestion/source/iceberg/iceberg.py +62 -66
- datahub/ingestion/source/mlflow.py +198 -7
- datahub/ingestion/source/mode.py +11 -1
- datahub/ingestion/source/openapi.py +69 -34
- datahub/ingestion/source/powerbi/powerbi.py +29 -23
- datahub/ingestion/source/s3/source.py +11 -0
- datahub/ingestion/source/slack/slack.py +399 -82
- datahub/ingestion/source/superset.py +15 -6
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1055 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
- datahub/metadata/_schema_classes.py +472 -1
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/schema.avsc +307 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +30 -0
- datahub/metadata/schemas/QueryProperties.avsc +20 -0
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/sdk/dataset.py +122 -0
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +27 -3
- datahub/sdk/main_client.py +22 -0
- datahub/sdk/search_filters.py +4 -4
- datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
- datahub/sql_parsing/tool_meta_extractor.py +27 -2
- datahub/ingestion/source/vertexai.py +0 -695
- {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from datahub.configuration.source_common import EnvConfigMixin
|
|
6
|
+
from datahub.ingestion.source.common.gcp_credentials_config import GCPCredential
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VertexAIConfig(EnvConfigMixin):
|
|
10
|
+
credential: Optional[GCPCredential] = Field(
|
|
11
|
+
default=None, description="GCP credential information"
|
|
12
|
+
)
|
|
13
|
+
project_id: str = Field(description=("Project ID in Google Cloud Platform"))
|
|
14
|
+
region: str = Field(
|
|
15
|
+
description=("Region of your project in Google Cloud Platform"),
|
|
16
|
+
)
|
|
17
|
+
bucket_uri: Optional[str] = Field(
|
|
18
|
+
default=None,
|
|
19
|
+
description=("Bucket URI used in your project"),
|
|
20
|
+
)
|
|
21
|
+
vertexai_url: Optional[str] = Field(
|
|
22
|
+
default="https://console.cloud.google.com/vertex-ai",
|
|
23
|
+
description=("VertexUI URI"),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def get_credentials(self) -> Optional[Dict[str, str]]:
|
|
27
|
+
if self.credential:
|
|
28
|
+
return self.credential.to_dict(self.project_id)
|
|
29
|
+
return None
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from google.cloud.aiplatform.base import VertexAiResourceNoun
|
|
4
|
+
from google.cloud.aiplatform.jobs import _RunnableJob
|
|
5
|
+
from google.cloud.aiplatform.training_jobs import _TrainingJob
|
|
6
|
+
from google.cloud.aiplatform_v1.types import JobState, PipelineState
|
|
7
|
+
|
|
8
|
+
from datahub.metadata.schema_classes import RunResultTypeClass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_automl_job_result_type(state: PipelineState) -> Union[str, RunResultTypeClass]:
|
|
12
|
+
state_mapping = {
|
|
13
|
+
PipelineState.PIPELINE_STATE_SUCCEEDED: RunResultTypeClass.SUCCESS,
|
|
14
|
+
PipelineState.PIPELINE_STATE_FAILED: RunResultTypeClass.FAILURE,
|
|
15
|
+
PipelineState.PIPELINE_STATE_CANCELLED: "Cancelled",
|
|
16
|
+
PipelineState.PIPELINE_STATE_PAUSED: "Paused",
|
|
17
|
+
PipelineState.PIPELINE_STATE_QUEUED: "Queued",
|
|
18
|
+
PipelineState.PIPELINE_STATE_RUNNING: "Running",
|
|
19
|
+
PipelineState.PIPELINE_STATE_UNSPECIFIED: "Unspecific",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
return state_mapping.get(state, "UNKNOWN")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_custom_job_result_type(state: JobState) -> Union[str, RunResultTypeClass]:
|
|
26
|
+
state_mapping = {
|
|
27
|
+
JobState.JOB_STATE_SUCCEEDED: RunResultTypeClass.SUCCESS,
|
|
28
|
+
JobState.JOB_STATE_FAILED: RunResultTypeClass.FAILURE,
|
|
29
|
+
JobState.JOB_STATE_CANCELLED: "Cancelled",
|
|
30
|
+
JobState.JOB_STATE_PAUSED: "Paused",
|
|
31
|
+
JobState.JOB_STATE_QUEUED: "Queued",
|
|
32
|
+
JobState.JOB_STATE_RUNNING: "Running",
|
|
33
|
+
JobState.JOB_STATE_CANCELLING: "Cancelling",
|
|
34
|
+
JobState.JOB_STATE_EXPIRED: "Expired",
|
|
35
|
+
JobState.JOB_STATE_UPDATING: "Updating",
|
|
36
|
+
}
|
|
37
|
+
return state_mapping.get(state, "UNKNOWN")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_job_result_status(job: VertexAiResourceNoun) -> Union[str, RunResultTypeClass]:
|
|
41
|
+
if isinstance(job, _TrainingJob) and isinstance(job.state, PipelineState):
|
|
42
|
+
return get_automl_job_result_type(job.state)
|
|
43
|
+
elif isinstance(job, _RunnableJob) and isinstance(job.state, JobState):
|
|
44
|
+
return get_custom_job_result_type(job.state)
|
|
45
|
+
return "UNKNOWN"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_execution_result_status(status: int) -> Union[str, RunResultTypeClass]:
|
|
49
|
+
"""
|
|
50
|
+
State of the execution.
|
|
51
|
+
STATE_UNSPECIFIED = 0
|
|
52
|
+
PENDING = 1
|
|
53
|
+
RUNNING = 2
|
|
54
|
+
SUCCEEDED = 3
|
|
55
|
+
FAILED = 4
|
|
56
|
+
"""
|
|
57
|
+
status_mapping = {
|
|
58
|
+
0: "STATE_UNSPECIFIED",
|
|
59
|
+
1: "PENDING",
|
|
60
|
+
2: "RUNNING",
|
|
61
|
+
3: RunResultTypeClass.SUCCESS,
|
|
62
|
+
4: RunResultTypeClass.FAILURE,
|
|
63
|
+
}
|
|
64
|
+
return status_mapping.get(status, "UNKNOWN")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def is_status_for_run_event_class(status: Union[str, RunResultTypeClass]) -> bool:
|
|
68
|
+
return status in [RunResultTypeClass.SUCCESS, RunResultTypeClass.FAILURE]
|