flyteplugins-bigquery 2.0.0b54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,4 @@
1
+ from flyteplugins.bigquery.connector import BigQueryConnector
2
+ from flyteplugins.bigquery.task import BigQueryConfig, BigQueryTask
3
+
4
+ __all__ = ["BigQueryConfig", "BigQueryConnector", "BigQueryTask"]
@@ -0,0 +1,143 @@
1
+ import datetime
2
+ from dataclasses import dataclass
3
+ from typing import Any, Dict, Optional
4
+
5
+ from async_lru import alru_cache
6
+ from flyte import logger
7
+ from flyte.connectors import (
8
+ AsyncConnector,
9
+ ConnectorRegistry,
10
+ Resource,
11
+ ResourceMeta,
12
+ )
13
+ from flyte.connectors.utils import convert_to_flyte_phase
14
+ from flyte.io import DataFrame
15
+ from flyte.types import TypeEngine
16
+ from flyteidl2.core.execution_pb2 import TaskExecution, TaskLog
17
+ from flyteidl2.core.tasks_pb2 import TaskTemplate
18
+ from google.api_core.client_info import ClientInfo
19
+ from google.cloud import bigquery
20
+ from google.oauth2 import service_account
21
+ from google.protobuf import json_format
22
+
23
+ pythonTypeToBigQueryType: Dict[type, str] = {
24
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes
25
+ list: "ARRAY",
26
+ bool: "BOOL",
27
+ bytes: "BYTES",
28
+ datetime.datetime: "DATETIME",
29
+ float: "FLOAT64",
30
+ int: "INT64",
31
+ str: "STRING",
32
+ }
33
+
34
+
35
+ @dataclass
36
+ class BigQueryMetadata(ResourceMeta):
37
+ job_id: str
38
+ project: str
39
+ location: str
40
+ user_agent: str
41
+
42
+
43
+ @alru_cache
44
+ async def _get_bigquery_client(
45
+ project: str, location: str, user_agent: str, google_application_credentials: str
46
+ ) -> bigquery.Client:
47
+ if google_application_credentials is not None:
48
+ credentials = service_account.Credentials.from_service_account_info(google_application_credentials)
49
+ else:
50
+ credentials = None
51
+ cinfo = ClientInfo(user_agent=user_agent)
52
+ return bigquery.Client(project=project, location=location, client_info=cinfo, credentials=credentials)
53
+
54
+
55
+ class BigQueryConnector(AsyncConnector):
56
+ name = "Bigquery Connector"
57
+ task_type_name = "bigquery_query_job_task"
58
+ metadata_type = BigQueryMetadata
59
+
60
+ async def create(
61
+ self,
62
+ task_template: TaskTemplate,
63
+ inputs: Optional[Dict[str, Any]] = None,
64
+ google_application_credentials: Optional[str] = None,
65
+ **kwargs,
66
+ ) -> BigQueryMetadata:
67
+ job_config = None
68
+ if inputs:
69
+ python_interface_inputs = {
70
+ name: TypeEngine.guess_python_type(lt.type)
71
+ for name, lt in task_template.interface.inputs.variables.items()
72
+ }
73
+ job_config = bigquery.QueryJobConfig(
74
+ query_parameters=[
75
+ bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val)
76
+ for name, val in inputs.items()
77
+ ]
78
+ )
79
+
80
+ custom = json_format.MessageToDict(task_template.custom) if task_template.custom else None
81
+
82
+ domain = custom.get("Domain")
83
+ sdk_version = task_template.metadata.runtime.version
84
+
85
+ user_agent = f"Flyte/{sdk_version} (GPN:Union;{domain or ''})"
86
+ project = custom["ProjectID"]
87
+ location = custom["Location"]
88
+
89
+ client = await _get_bigquery_client(
90
+ project=project,
91
+ location=location,
92
+ user_agent=user_agent,
93
+ google_application_credentials=google_application_credentials,
94
+ )
95
+ query_job = client.query(task_template.sql.statement, job_config=job_config)
96
+
97
+ return BigQueryMetadata(job_id=str(query_job.job_id), location=location, project=project, user_agent=user_agent)
98
+
99
+ async def get(
100
+ self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
101
+ ) -> Resource:
102
+ client = await _get_bigquery_client(
103
+ project=resource_meta.project,
104
+ location=resource_meta.location,
105
+ user_agent=resource_meta.user_agent,
106
+ google_application_credentials=google_application_credentials,
107
+ )
108
+ log_link = TaskLog(
109
+ uri=f"https://console.cloud.google.com/bigquery?project={resource_meta.project}&j=bq:{resource_meta.location}:{resource_meta.job_id}&page=queryresults",
110
+ name="BigQuery Console",
111
+ ready=True,
112
+ link_type=TaskLog.DASHBOARD,
113
+ )
114
+
115
+ job = client.get_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
116
+ if job.errors:
117
+ logger.error("failed to run BigQuery job with error:", job.errors.__str__())
118
+ return Resource(phase=TaskExecution.FAILED, message=job.errors.__str__(), log_links=[log_link])
119
+
120
+ cur_phase = convert_to_flyte_phase(str(job.state))
121
+ res = None
122
+
123
+ if cur_phase == TaskExecution.SUCCEEDED:
124
+ dst = job.destination
125
+ if dst:
126
+ output_location = f"bq://{dst.project}:{dst.dataset_id}.{dst.table_id}"
127
+ res = {"results": DataFrame(uri=output_location)}
128
+
129
+ return Resource(phase=cur_phase, message=str(job.state), log_links=[log_link], outputs=res)
130
+
131
+ async def delete(
132
+ self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
133
+ ):
134
+ client = await _get_bigquery_client(
135
+ project=resource_meta.project,
136
+ location=resource_meta.location,
137
+ user_agent=resource_meta.user_agent,
138
+ google_application_credentials=google_application_credentials,
139
+ )
140
+ client.cancel_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
141
+
142
+
143
+ ConnectorRegistry.register(BigQueryConnector())
@@ -0,0 +1,80 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from typing import Any, Dict, Optional, Type
4
+
5
+ from flyte.connectors import AsyncConnectorExecutorMixin
6
+ from flyte.extend import TaskTemplate
7
+ from flyte.io import DataFrame
8
+ from flyte.models import NativeInterface, SerializationContext
9
+ from flyteidl2.core import tasks_pb2
10
+ from google.cloud import bigquery
11
+
12
+
13
+ @dataclass
14
+ class BigQueryConfig(object):
15
+ """
16
+ BigQueryConfig should be used to configure a BigQuery Task.
17
+ """
18
+
19
+ ProjectID: str
20
+ Location: Optional[str] = None
21
+ QueryJobConfig: Optional[bigquery.QueryJobConfig] = None
22
+
23
+
24
+ class BigQueryTask(AsyncConnectorExecutorMixin, TaskTemplate):
25
+ _TASK_TYPE = "bigquery_query_job_task"
26
+
27
+ def __init__(
28
+ self,
29
+ name: str,
30
+ query_template: str,
31
+ plugin_config: BigQueryConfig,
32
+ inputs: Optional[Dict[str, Type]] = None,
33
+ output_dataframe_type: Optional[Type[DataFrame]] = None,
34
+ google_application_credentials: Optional[str] = None,
35
+ **kwargs,
36
+ ):
37
+ """
38
+ To be used to query BigQuery Tables.
39
+
40
+ :param name: The Name of this task, should be unique in the project
41
+ :param query_template: The actual query to run. We use Flyte's Golang templating format for Query templating.
42
+ Refer to the templating documentation
43
+ :param plugin_config: BigQueryConfig object
44
+ :param inputs: Name and type of inputs specified as an ordered dictionary
45
+ :param output_dataframe_type: If some data is produced by this query, then you can specify the
46
+ output dataframe type.
47
+ :param google_application_credentials: The name of the secret containing the Google Application Credentials.
48
+ """
49
+ outputs = None
50
+ if output_dataframe_type is not None:
51
+ outputs = {
52
+ "results": output_dataframe_type,
53
+ }
54
+ super().__init__(
55
+ name=name,
56
+ interface=NativeInterface({k: (v, None) for k, v in inputs.items()} if inputs else {}, outputs or {}),
57
+ task_type=self._TASK_TYPE,
58
+ image=None,
59
+ **kwargs,
60
+ )
61
+ self.output_dataframe_type = output_dataframe_type
62
+ self.plugin_config = plugin_config
63
+ self.query_template = re.sub(r"\s+", " ", query_template.replace("\n", " ").replace("\t", " ")).strip()
64
+ self.google_application_credentials = google_application_credentials
65
+
66
+ def custom_config(self, sctx: SerializationContext) -> Optional[Dict[str, Any]]:
67
+ config = {
68
+ "Location": self.plugin_config.Location,
69
+ "ProjectID": self.plugin_config.ProjectID,
70
+ "Domain": sctx.domain,
71
+ }
72
+ if self.plugin_config.QueryJobConfig is not None:
73
+ config.update(self.plugin_config.QueryJobConfig.to_api_repr()["query"])
74
+ if self.google_application_credentials is not None:
75
+ config["secrets"] = {"google_application_credentials:": self.google_application_credentials}
76
+ return config
77
+
78
+ def sql(self, sctx: SerializationContext) -> Optional[str]:
79
+ sql = tasks_pb2.Sql(statement=self.query_template, dialect=tasks_pb2.Sql.Dialect.ANSI)
80
+ return sql
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: flyteplugins-bigquery
3
+ Version: 2.0.0b54
4
+ Summary: BigQuery plugin for flyte
5
+ Author-email: Kevin Su <pingsutw@users.noreply.github.com>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: flyte[connector]
9
+ Requires-Dist: google-cloud-bigquery
10
+ Requires-Dist: google-cloud-bigquery-storage
11
+
12
+ # BigQuery Plugin for Flyte
13
+
14
+ This plugin provides BigQuery integration for Flyte, enabling you to run BigQuery queries as Flyte tasks.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install flyteplugins-bigquery
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```python
25
+ from flyteplugins.bigquery import BigQueryConfig, BigQueryTask
26
+
27
+ config = BigQueryConfig(ProjectID="my-project", Location="US")
28
+ task = BigQueryTask(
29
+ name="my_query",
30
+ query_template="SELECT * FROM dataset.table WHERE id = {{ .user_id }}",
31
+ plugin_config=config,
32
+ inputs={"user_id": int},
33
+ )
34
+ ```
@@ -0,0 +1,9 @@
1
+ flyteplugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ flyteplugins/bigquery/__init__.py,sha256=aXxeQ2_aLVqZej1ERmXPXtpsJkYcPKg448W_4nXWKe0,197
3
+ flyteplugins/bigquery/connector.py,sha256=Ds5MFW0Pndv8F-IG73TfA34iQsDvnbtwFvMX5Qq99jE,5308
4
+ flyteplugins/bigquery/task.py,sha256=VphsNEb0C7pTTwN5SmHM8IjkKfcOB-MODofSX_1OB2Q,3190
5
+ flyteplugins_bigquery-2.0.0b54.dist-info/METADATA,sha256=qVkMPznvvR0WyzjXqL_TQqYmso0CubcuQnluIDI6qJA,867
6
+ flyteplugins_bigquery-2.0.0b54.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
+ flyteplugins_bigquery-2.0.0b54.dist-info/entry_points.txt,sha256=oBu4yigPOb5w59qt1pJpqLVXltD2IAxUdU1FbYpPeLE,80
8
+ flyteplugins_bigquery-2.0.0b54.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
9
+ flyteplugins_bigquery-2.0.0b54.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [flyte.connectors]
2
+ bigquery = flyteplugins.bigquery.connector:BigQueryConnector
@@ -0,0 +1 @@
1
+ flyteplugins