acryl-datahub 1.0.0rc16__py3-none-any.whl → 1.0.0rc17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc16.dist-info → acryl_datahub-1.0.0rc17.dist-info}/METADATA +2571 -2541
- {acryl_datahub-1.0.0rc16.dist-info → acryl_datahub-1.0.0rc17.dist-info}/RECORD +17 -14
- {acryl_datahub-1.0.0rc16.dist-info → acryl_datahub-1.0.0rc17.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/configuration/common.py +8 -0
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +161 -3
- datahub/ingestion/graph/client.py +3 -0
- datahub/ingestion/sink/datahub_rest.py +4 -0
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/salesforce.py +529 -276
- datahub/ingestion/source/sql/hive.py +13 -0
- datahub/ingestion/source/vertexai.py +697 -0
- {acryl_datahub-1.0.0rc16.dist-info → acryl_datahub-1.0.0rc17.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc16.dist-info → acryl_datahub-1.0.0rc17.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc16.dist-info → acryl_datahub-1.0.0rc17.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
4
3
|
import re
|
|
5
|
-
import tempfile
|
|
6
4
|
from datetime import timedelta
|
|
7
5
|
from typing import Any, Dict, List, Optional, Union
|
|
8
6
|
|
|
@@ -17,10 +15,10 @@ from datahub.configuration.source_common import (
|
|
|
17
15
|
PlatformInstanceConfigMixin,
|
|
18
16
|
)
|
|
19
17
|
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
20
|
-
from datahub.configuration.validate_multiline_string import pydantic_multiline_string
|
|
21
18
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
22
19
|
ClassificationSourceConfigMixin,
|
|
23
20
|
)
|
|
21
|
+
from datahub.ingestion.source.common.gcp_credentials_config import GCPCredential
|
|
24
22
|
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
|
25
23
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterConfig
|
|
26
24
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
@@ -107,50 +105,8 @@ class BigQueryUsageConfig(BaseUsageConfig):
|
|
|
107
105
|
)
|
|
108
106
|
|
|
109
107
|
|
|
110
|
-
class BigQueryCredential(ConfigModel):
|
|
111
|
-
project_id: str = Field(description="Project id to set the credentials")
|
|
112
|
-
private_key_id: str = Field(description="Private key id")
|
|
113
|
-
private_key: str = Field(
|
|
114
|
-
description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n'"
|
|
115
|
-
)
|
|
116
|
-
client_email: str = Field(description="Client email")
|
|
117
|
-
client_id: str = Field(description="Client Id")
|
|
118
|
-
auth_uri: str = Field(
|
|
119
|
-
default="https://accounts.google.com/o/oauth2/auth",
|
|
120
|
-
description="Authentication uri",
|
|
121
|
-
)
|
|
122
|
-
token_uri: str = Field(
|
|
123
|
-
default="https://oauth2.googleapis.com/token", description="Token uri"
|
|
124
|
-
)
|
|
125
|
-
auth_provider_x509_cert_url: str = Field(
|
|
126
|
-
default="https://www.googleapis.com/oauth2/v1/certs",
|
|
127
|
-
description="Auth provider x509 certificate url",
|
|
128
|
-
)
|
|
129
|
-
type: str = Field(default="service_account", description="Authentication type")
|
|
130
|
-
client_x509_cert_url: Optional[str] = Field(
|
|
131
|
-
default=None,
|
|
132
|
-
description="If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email",
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
_fix_private_key_newlines = pydantic_multiline_string("private_key")
|
|
136
|
-
|
|
137
|
-
@root_validator(skip_on_failure=True)
|
|
138
|
-
def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
139
|
-
if values.get("client_x509_cert_url") is None:
|
|
140
|
-
values["client_x509_cert_url"] = (
|
|
141
|
-
f"https://www.googleapis.com/robot/v1/metadata/x509/{values['client_email']}"
|
|
142
|
-
)
|
|
143
|
-
return values
|
|
144
|
-
|
|
145
|
-
def create_credential_temp_file(self) -> str:
|
|
146
|
-
with tempfile.NamedTemporaryFile(delete=False) as fp:
|
|
147
|
-
cred_json = json.dumps(self.dict(), indent=4, separators=(",", ": "))
|
|
148
|
-
fp.write(cred_json.encode())
|
|
149
|
-
return fp.name
|
|
150
|
-
|
|
151
|
-
|
|
152
108
|
class BigQueryConnectionConfig(ConfigModel):
|
|
153
|
-
credential: Optional[
|
|
109
|
+
credential: Optional[GCPCredential] = Field(
|
|
154
110
|
default=None, description="BigQuery credential informations"
|
|
155
111
|
)
|
|
156
112
|
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import tempfile
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, root_validator
|
|
6
|
+
|
|
7
|
+
from datahub.configuration import ConfigModel
|
|
8
|
+
from datahub.configuration.validate_multiline_string import pydantic_multiline_string
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GCPCredential(ConfigModel):
|
|
12
|
+
project_id: Optional[str] = Field(description="Project id to set the credentials")
|
|
13
|
+
private_key_id: str = Field(description="Private key id")
|
|
14
|
+
private_key: str = Field(
|
|
15
|
+
description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n'"
|
|
16
|
+
)
|
|
17
|
+
client_email: str = Field(description="Client email")
|
|
18
|
+
client_id: str = Field(description="Client Id")
|
|
19
|
+
auth_uri: str = Field(
|
|
20
|
+
default="https://accounts.google.com/o/oauth2/auth",
|
|
21
|
+
description="Authentication uri",
|
|
22
|
+
)
|
|
23
|
+
token_uri: str = Field(
|
|
24
|
+
default="https://oauth2.googleapis.com/token", description="Token uri"
|
|
25
|
+
)
|
|
26
|
+
auth_provider_x509_cert_url: str = Field(
|
|
27
|
+
default="https://www.googleapis.com/oauth2/v1/certs",
|
|
28
|
+
description="Auth provider x509 certificate url",
|
|
29
|
+
)
|
|
30
|
+
type: str = Field(default="service_account", description="Authentication type")
|
|
31
|
+
client_x509_cert_url: Optional[str] = Field(
|
|
32
|
+
default=None,
|
|
33
|
+
description="If not set it will be default to https://www.googleapis.com/robot/v1/metadata/x509/client_email",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
_fix_private_key_newlines = pydantic_multiline_string("private_key")
|
|
37
|
+
|
|
38
|
+
@root_validator(skip_on_failure=True)
|
|
39
|
+
def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
40
|
+
if values.get("client_x509_cert_url") is None:
|
|
41
|
+
values["client_x509_cert_url"] = (
|
|
42
|
+
f"https://www.googleapis.com/robot/v1/metadata/x509/{values['client_email']}"
|
|
43
|
+
)
|
|
44
|
+
return values
|
|
45
|
+
|
|
46
|
+
def create_credential_temp_file(self, project_id: Optional[str] = None) -> str:
|
|
47
|
+
configs = self.dict()
|
|
48
|
+
if project_id:
|
|
49
|
+
configs["project_id"] = project_id
|
|
50
|
+
with tempfile.NamedTemporaryFile(delete=False) as fp:
|
|
51
|
+
cred_json = json.dumps(configs, indent=4, separators=(",", ": "))
|
|
52
|
+
fp.write(cred_json.encode())
|
|
53
|
+
return fp.name
|