flyteplugins-bigquery 2.0.0b54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyteplugins/__init__.py +0 -0
- flyteplugins/bigquery/__init__.py +4 -0
- flyteplugins/bigquery/connector.py +143 -0
- flyteplugins/bigquery/task.py +80 -0
- flyteplugins_bigquery-2.0.0b54.dist-info/METADATA +34 -0
- flyteplugins_bigquery-2.0.0b54.dist-info/RECORD +9 -0
- flyteplugins_bigquery-2.0.0b54.dist-info/WHEEL +5 -0
- flyteplugins_bigquery-2.0.0b54.dist-info/entry_points.txt +2 -0
- flyteplugins_bigquery-2.0.0b54.dist-info/top_level.txt +1 -0
flyteplugins/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
from async_lru import alru_cache
|
|
6
|
+
from flyte import logger
|
|
7
|
+
from flyte.connectors import (
|
|
8
|
+
AsyncConnector,
|
|
9
|
+
ConnectorRegistry,
|
|
10
|
+
Resource,
|
|
11
|
+
ResourceMeta,
|
|
12
|
+
)
|
|
13
|
+
from flyte.connectors.utils import convert_to_flyte_phase
|
|
14
|
+
from flyte.io import DataFrame
|
|
15
|
+
from flyte.types import TypeEngine
|
|
16
|
+
from flyteidl2.core.execution_pb2 import TaskExecution, TaskLog
|
|
17
|
+
from flyteidl2.core.tasks_pb2 import TaskTemplate
|
|
18
|
+
from google.api_core.client_info import ClientInfo
|
|
19
|
+
from google.cloud import bigquery
|
|
20
|
+
from google.oauth2 import service_account
|
|
21
|
+
from google.protobuf import json_format
|
|
22
|
+
|
|
23
|
+
pythonTypeToBigQueryType: Dict[type, str] = {
|
|
24
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes
|
|
25
|
+
list: "ARRAY",
|
|
26
|
+
bool: "BOOL",
|
|
27
|
+
bytes: "BYTES",
|
|
28
|
+
datetime.datetime: "DATETIME",
|
|
29
|
+
float: "FLOAT64",
|
|
30
|
+
int: "INT64",
|
|
31
|
+
str: "STRING",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class BigQueryMetadata(ResourceMeta):
|
|
37
|
+
job_id: str
|
|
38
|
+
project: str
|
|
39
|
+
location: str
|
|
40
|
+
user_agent: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@alru_cache
|
|
44
|
+
async def _get_bigquery_client(
|
|
45
|
+
project: str, location: str, user_agent: str, google_application_credentials: str
|
|
46
|
+
) -> bigquery.Client:
|
|
47
|
+
if google_application_credentials is not None:
|
|
48
|
+
credentials = service_account.Credentials.from_service_account_info(google_application_credentials)
|
|
49
|
+
else:
|
|
50
|
+
credentials = None
|
|
51
|
+
cinfo = ClientInfo(user_agent=user_agent)
|
|
52
|
+
return bigquery.Client(project=project, location=location, client_info=cinfo, credentials=credentials)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BigQueryConnector(AsyncConnector):
|
|
56
|
+
name = "Bigquery Connector"
|
|
57
|
+
task_type_name = "bigquery_query_job_task"
|
|
58
|
+
metadata_type = BigQueryMetadata
|
|
59
|
+
|
|
60
|
+
async def create(
|
|
61
|
+
self,
|
|
62
|
+
task_template: TaskTemplate,
|
|
63
|
+
inputs: Optional[Dict[str, Any]] = None,
|
|
64
|
+
google_application_credentials: Optional[str] = None,
|
|
65
|
+
**kwargs,
|
|
66
|
+
) -> BigQueryMetadata:
|
|
67
|
+
job_config = None
|
|
68
|
+
if inputs:
|
|
69
|
+
python_interface_inputs = {
|
|
70
|
+
name: TypeEngine.guess_python_type(lt.type)
|
|
71
|
+
for name, lt in task_template.interface.inputs.variables.items()
|
|
72
|
+
}
|
|
73
|
+
job_config = bigquery.QueryJobConfig(
|
|
74
|
+
query_parameters=[
|
|
75
|
+
bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val)
|
|
76
|
+
for name, val in inputs.items()
|
|
77
|
+
]
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
custom = json_format.MessageToDict(task_template.custom) if task_template.custom else None
|
|
81
|
+
|
|
82
|
+
domain = custom.get("Domain")
|
|
83
|
+
sdk_version = task_template.metadata.runtime.version
|
|
84
|
+
|
|
85
|
+
user_agent = f"Flyte/{sdk_version} (GPN:Union;{domain or ''})"
|
|
86
|
+
project = custom["ProjectID"]
|
|
87
|
+
location = custom["Location"]
|
|
88
|
+
|
|
89
|
+
client = await _get_bigquery_client(
|
|
90
|
+
project=project,
|
|
91
|
+
location=location,
|
|
92
|
+
user_agent=user_agent,
|
|
93
|
+
google_application_credentials=google_application_credentials,
|
|
94
|
+
)
|
|
95
|
+
query_job = client.query(task_template.sql.statement, job_config=job_config)
|
|
96
|
+
|
|
97
|
+
return BigQueryMetadata(job_id=str(query_job.job_id), location=location, project=project, user_agent=user_agent)
|
|
98
|
+
|
|
99
|
+
async def get(
|
|
100
|
+
self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
|
|
101
|
+
) -> Resource:
|
|
102
|
+
client = await _get_bigquery_client(
|
|
103
|
+
project=resource_meta.project,
|
|
104
|
+
location=resource_meta.location,
|
|
105
|
+
user_agent=resource_meta.user_agent,
|
|
106
|
+
google_application_credentials=google_application_credentials,
|
|
107
|
+
)
|
|
108
|
+
log_link = TaskLog(
|
|
109
|
+
uri=f"https://console.cloud.google.com/bigquery?project={resource_meta.project}&j=bq:{resource_meta.location}:{resource_meta.job_id}&page=queryresults",
|
|
110
|
+
name="BigQuery Console",
|
|
111
|
+
ready=True,
|
|
112
|
+
link_type=TaskLog.DASHBOARD,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
job = client.get_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
|
|
116
|
+
if job.errors:
|
|
117
|
+
logger.error("failed to run BigQuery job with error:", job.errors.__str__())
|
|
118
|
+
return Resource(phase=TaskExecution.FAILED, message=job.errors.__str__(), log_links=[log_link])
|
|
119
|
+
|
|
120
|
+
cur_phase = convert_to_flyte_phase(str(job.state))
|
|
121
|
+
res = None
|
|
122
|
+
|
|
123
|
+
if cur_phase == TaskExecution.SUCCEEDED:
|
|
124
|
+
dst = job.destination
|
|
125
|
+
if dst:
|
|
126
|
+
output_location = f"bq://{dst.project}:{dst.dataset_id}.{dst.table_id}"
|
|
127
|
+
res = {"results": DataFrame(uri=output_location)}
|
|
128
|
+
|
|
129
|
+
return Resource(phase=cur_phase, message=str(job.state), log_links=[log_link], outputs=res)
|
|
130
|
+
|
|
131
|
+
async def delete(
|
|
132
|
+
self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
|
|
133
|
+
):
|
|
134
|
+
client = await _get_bigquery_client(
|
|
135
|
+
project=resource_meta.project,
|
|
136
|
+
location=resource_meta.location,
|
|
137
|
+
user_agent=resource_meta.user_agent,
|
|
138
|
+
google_application_credentials=google_application_credentials,
|
|
139
|
+
)
|
|
140
|
+
client.cancel_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
ConnectorRegistry.register(BigQueryConnector())
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Dict, Optional, Type
|
|
4
|
+
|
|
5
|
+
from flyte.connectors import AsyncConnectorExecutorMixin
|
|
6
|
+
from flyte.extend import TaskTemplate
|
|
7
|
+
from flyte.io import DataFrame
|
|
8
|
+
from flyte.models import NativeInterface, SerializationContext
|
|
9
|
+
from flyteidl2.core import tasks_pb2
|
|
10
|
+
from google.cloud import bigquery
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class BigQueryConfig(object):
|
|
15
|
+
"""
|
|
16
|
+
BigQueryConfig should be used to configure a BigQuery Task.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
ProjectID: str
|
|
20
|
+
Location: Optional[str] = None
|
|
21
|
+
QueryJobConfig: Optional[bigquery.QueryJobConfig] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BigQueryTask(AsyncConnectorExecutorMixin, TaskTemplate):
|
|
25
|
+
_TASK_TYPE = "bigquery_query_job_task"
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
name: str,
|
|
30
|
+
query_template: str,
|
|
31
|
+
plugin_config: BigQueryConfig,
|
|
32
|
+
inputs: Optional[Dict[str, Type]] = None,
|
|
33
|
+
output_dataframe_type: Optional[Type[DataFrame]] = None,
|
|
34
|
+
google_application_credentials: Optional[str] = None,
|
|
35
|
+
**kwargs,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
To be used to query BigQuery Tables.
|
|
39
|
+
|
|
40
|
+
:param name: The Name of this task, should be unique in the project
|
|
41
|
+
:param query_template: The actual query to run. We use Flyte's Golang templating format for Query templating.
|
|
42
|
+
Refer to the templating documentation
|
|
43
|
+
:param plugin_config: BigQueryConfig object
|
|
44
|
+
:param inputs: Name and type of inputs specified as an ordered dictionary
|
|
45
|
+
:param output_dataframe_type: If some data is produced by this query, then you can specify the
|
|
46
|
+
output dataframe type.
|
|
47
|
+
:param google_application_credentials: The name of the secret containing the Google Application Credentials.
|
|
48
|
+
"""
|
|
49
|
+
outputs = None
|
|
50
|
+
if output_dataframe_type is not None:
|
|
51
|
+
outputs = {
|
|
52
|
+
"results": output_dataframe_type,
|
|
53
|
+
}
|
|
54
|
+
super().__init__(
|
|
55
|
+
name=name,
|
|
56
|
+
interface=NativeInterface({k: (v, None) for k, v in inputs.items()} if inputs else {}, outputs or {}),
|
|
57
|
+
task_type=self._TASK_TYPE,
|
|
58
|
+
image=None,
|
|
59
|
+
**kwargs,
|
|
60
|
+
)
|
|
61
|
+
self.output_dataframe_type = output_dataframe_type
|
|
62
|
+
self.plugin_config = plugin_config
|
|
63
|
+
self.query_template = re.sub(r"\s+", " ", query_template.replace("\n", " ").replace("\t", " ")).strip()
|
|
64
|
+
self.google_application_credentials = google_application_credentials
|
|
65
|
+
|
|
66
|
+
def custom_config(self, sctx: SerializationContext) -> Optional[Dict[str, Any]]:
|
|
67
|
+
config = {
|
|
68
|
+
"Location": self.plugin_config.Location,
|
|
69
|
+
"ProjectID": self.plugin_config.ProjectID,
|
|
70
|
+
"Domain": sctx.domain,
|
|
71
|
+
}
|
|
72
|
+
if self.plugin_config.QueryJobConfig is not None:
|
|
73
|
+
config.update(self.plugin_config.QueryJobConfig.to_api_repr()["query"])
|
|
74
|
+
if self.google_application_credentials is not None:
|
|
75
|
+
config["secrets"] = {"google_application_credentials:": self.google_application_credentials}
|
|
76
|
+
return config
|
|
77
|
+
|
|
78
|
+
def sql(self, sctx: SerializationContext) -> Optional[str]:
|
|
79
|
+
sql = tasks_pb2.Sql(statement=self.query_template, dialect=tasks_pb2.Sql.Dialect.ANSI)
|
|
80
|
+
return sql
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flyteplugins-bigquery
|
|
3
|
+
Version: 2.0.0b54
|
|
4
|
+
Summary: BigQuery plugin for flyte
|
|
5
|
+
Author-email: Kevin Su <pingsutw@users.noreply.github.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: flyte[connector]
|
|
9
|
+
Requires-Dist: google-cloud-bigquery
|
|
10
|
+
Requires-Dist: google-cloud-bigquery-storage
|
|
11
|
+
|
|
12
|
+
# BigQuery Plugin for Flyte
|
|
13
|
+
|
|
14
|
+
This plugin provides BigQuery integration for Flyte, enabling you to run BigQuery queries as Flyte tasks.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install flyteplugins-bigquery
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from flyteplugins.bigquery import BigQueryConfig, BigQueryTask
|
|
26
|
+
|
|
27
|
+
config = BigQueryConfig(ProjectID="my-project", Location="US")
|
|
28
|
+
task = BigQueryTask(
|
|
29
|
+
name="my_query",
|
|
30
|
+
query_template="SELECT * FROM dataset.table WHERE id = {{ .user_id }}",
|
|
31
|
+
plugin_config=config,
|
|
32
|
+
inputs={"user_id": int},
|
|
33
|
+
)
|
|
34
|
+
```
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
flyteplugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
flyteplugins/bigquery/__init__.py,sha256=aXxeQ2_aLVqZej1ERmXPXtpsJkYcPKg448W_4nXWKe0,197
|
|
3
|
+
flyteplugins/bigquery/connector.py,sha256=Ds5MFW0Pndv8F-IG73TfA34iQsDvnbtwFvMX5Qq99jE,5308
|
|
4
|
+
flyteplugins/bigquery/task.py,sha256=VphsNEb0C7pTTwN5SmHM8IjkKfcOB-MODofSX_1OB2Q,3190
|
|
5
|
+
flyteplugins_bigquery-2.0.0b54.dist-info/METADATA,sha256=qVkMPznvvR0WyzjXqL_TQqYmso0CubcuQnluIDI6qJA,867
|
|
6
|
+
flyteplugins_bigquery-2.0.0b54.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
7
|
+
flyteplugins_bigquery-2.0.0b54.dist-info/entry_points.txt,sha256=oBu4yigPOb5w59qt1pJpqLVXltD2IAxUdU1FbYpPeLE,80
|
|
8
|
+
flyteplugins_bigquery-2.0.0b54.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
|
|
9
|
+
flyteplugins_bigquery-2.0.0b54.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flyteplugins
|