flyteplugins-connectors 2.0.0b29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flyteplugins/connectors/__init__.py +0 -0
- flyteplugins/connectors/bigquery/__init__.py +4 -0
- flyteplugins/connectors/bigquery/connector.py +140 -0
- flyteplugins/connectors/bigquery/task.py +83 -0
- flyteplugins_connectors-2.0.0b29.dist-info/METADATA +156 -0
- flyteplugins_connectors-2.0.0b29.dist-info/RECORD +8 -0
- flyteplugins_connectors-2.0.0b29.dist-info/WHEEL +5 -0
- flyteplugins_connectors-2.0.0b29.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from flyte import logger
|
|
7
|
+
from flyte.connectors import (
|
|
8
|
+
AsyncConnector,
|
|
9
|
+
ConnectorRegistry,
|
|
10
|
+
Resource,
|
|
11
|
+
ResourceMeta,
|
|
12
|
+
)
|
|
13
|
+
from flyte.connectors.utils import convert_to_flyte_phase
|
|
14
|
+
from flyte.io import DataFrame
|
|
15
|
+
from flyte.types import TypeEngine
|
|
16
|
+
from flyteidl2.core.execution_pb2 import TaskExecution, TaskLog
|
|
17
|
+
from flyteidl2.core.tasks_pb2 import TaskTemplate
|
|
18
|
+
from google.api_core.client_info import ClientInfo
|
|
19
|
+
from google.cloud import bigquery
|
|
20
|
+
from google.oauth2 import service_account
|
|
21
|
+
from google.protobuf import json_format
|
|
22
|
+
|
|
23
|
+
pythonTypeToBigQueryType: Dict[type, str] = {
|
|
24
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes
|
|
25
|
+
list: "ARRAY",
|
|
26
|
+
bool: "BOOL",
|
|
27
|
+
bytes: "BYTES",
|
|
28
|
+
datetime.datetime: "DATETIME",
|
|
29
|
+
float: "FLOAT64",
|
|
30
|
+
int: "INT64",
|
|
31
|
+
str: "STRING",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class BigQueryMetadata(ResourceMeta):
|
|
37
|
+
job_id: str
|
|
38
|
+
project: str
|
|
39
|
+
location: str
|
|
40
|
+
user_agent: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@lru_cache
|
|
44
|
+
async def _get_bigquery_client(
|
|
45
|
+
project: str, location: str, user_agent: str, google_application_credentials: str
|
|
46
|
+
) -> bigquery.Client:
|
|
47
|
+
if google_application_credentials is not None:
|
|
48
|
+
credentials = service_account.Credentials.from_service_account_info(google_application_credentials)
|
|
49
|
+
else:
|
|
50
|
+
credentials = None
|
|
51
|
+
cinfo = ClientInfo(user_agent=user_agent)
|
|
52
|
+
return bigquery.Client(project=project, location=location, client_info=cinfo, credentials=credentials)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BigQueryConnector(AsyncConnector):
|
|
56
|
+
name = "Bigquery Connector"
|
|
57
|
+
task_type_name = "bigquery_query_job_task"
|
|
58
|
+
metadata_type = BigQueryMetadata
|
|
59
|
+
|
|
60
|
+
async def create(
|
|
61
|
+
self,
|
|
62
|
+
task_template: TaskTemplate,
|
|
63
|
+
inputs: Optional[Dict[str, Any]] = None,
|
|
64
|
+
**kwargs,
|
|
65
|
+
) -> BigQueryMetadata:
|
|
66
|
+
job_config = None
|
|
67
|
+
if inputs:
|
|
68
|
+
python_interface_inputs = {
|
|
69
|
+
name: TypeEngine.guess_python_type(lt.type)
|
|
70
|
+
for name, lt in task_template.interface.inputs.variables.items()
|
|
71
|
+
}
|
|
72
|
+
job_config = bigquery.QueryJobConfig(
|
|
73
|
+
query_parameters=[
|
|
74
|
+
bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val)
|
|
75
|
+
for name, val in inputs.items()
|
|
76
|
+
]
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
custom = json_format.MessageToDict(task_template.custom) if task_template.custom else None
|
|
80
|
+
|
|
81
|
+
domain = custom.get("Domain")
|
|
82
|
+
sdk_version = task_template.metadata.runtime.version
|
|
83
|
+
|
|
84
|
+
user_agent = f"Flyte/{sdk_version} (GPN:Union;{domain or ''})"
|
|
85
|
+
project = custom["ProjectID"]
|
|
86
|
+
location = custom["Location"]
|
|
87
|
+
|
|
88
|
+
client = _get_bigquery_client(
|
|
89
|
+
project=project,
|
|
90
|
+
location=location,
|
|
91
|
+
user_agent=user_agent,
|
|
92
|
+
google_application_credentials=custom.get("google_application_credentials"),
|
|
93
|
+
)
|
|
94
|
+
query_job = client.query(task_template.sql.statement, job_config=job_config)
|
|
95
|
+
|
|
96
|
+
return BigQueryMetadata(job_id=str(query_job.job_id), location=location, project=project, user_agent=user_agent)
|
|
97
|
+
|
|
98
|
+
async def get(
|
|
99
|
+
self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
|
|
100
|
+
) -> Resource:
|
|
101
|
+
client = _get_bigquery_client(
|
|
102
|
+
project=resource_meta.project,
|
|
103
|
+
location=resource_meta.location,
|
|
104
|
+
user_agent=resource_meta.user_agent,
|
|
105
|
+
google_application_credentials=google_application_credentials,
|
|
106
|
+
)
|
|
107
|
+
log_link = TaskLog(
|
|
108
|
+
uri=f"https://console.cloud.google.com/bigquery?project={resource_meta.project}&j=bq:{resource_meta.location}:{resource_meta.job_id}&page=queryresults",
|
|
109
|
+
name="BigQuery Console",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
job = client.get_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
|
|
113
|
+
if job.errors:
|
|
114
|
+
logger.error("failed to run BigQuery job with error:", job.errors.__str__())
|
|
115
|
+
return Resource(phase=TaskExecution.FAILED, message=job.errors.__str__(), log_links=[log_link])
|
|
116
|
+
|
|
117
|
+
cur_phase = convert_to_flyte_phase(str(job.state))
|
|
118
|
+
res = None
|
|
119
|
+
|
|
120
|
+
if cur_phase == TaskExecution.SUCCEEDED:
|
|
121
|
+
dst = job.destination
|
|
122
|
+
if dst:
|
|
123
|
+
output_location = f"bq://{dst.project}:{dst.dataset_id}.{dst.table_id}"
|
|
124
|
+
res = {"results": DataFrame(uri=output_location)}
|
|
125
|
+
|
|
126
|
+
return Resource(phase=cur_phase, message=str(job.state), log_links=[log_link], outputs=res)
|
|
127
|
+
|
|
128
|
+
async def delete(
|
|
129
|
+
self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
|
|
130
|
+
):
|
|
131
|
+
client = _get_bigquery_client(
|
|
132
|
+
project=resource_meta.project,
|
|
133
|
+
location=resource_meta.location,
|
|
134
|
+
user_agent=resource_meta.user_agent,
|
|
135
|
+
google_application_credentials=google_application_credentials,
|
|
136
|
+
)
|
|
137
|
+
client.cancel_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
ConnectorRegistry.register(BigQueryConnector())
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Dict, Optional, Type
|
|
4
|
+
|
|
5
|
+
from flyte._task import TaskTemplate
|
|
6
|
+
from flyte.connectors._connector import AsyncConnectorExecutorMixin
|
|
7
|
+
from flyte.io import DataFrame
|
|
8
|
+
from flyte.models import NativeInterface, SerializationContext
|
|
9
|
+
from flyteidl2.core import tasks_pb2
|
|
10
|
+
from google.cloud import bigquery
|
|
11
|
+
from google.protobuf import json_format
|
|
12
|
+
from google.protobuf.struct_pb2 import Struct
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class BigQueryConfig(object):
|
|
17
|
+
"""
|
|
18
|
+
BigQueryConfig should be used to configure a BigQuery Task.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
ProjectID: str
|
|
22
|
+
Location: Optional[str] = None
|
|
23
|
+
QueryJobConfig: Optional[bigquery.QueryJobConfig] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BigQueryTask(AsyncConnectorExecutorMixin, TaskTemplate):
|
|
27
|
+
_TASK_TYPE = "bigquery_query_job_task"
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
name: str,
|
|
32
|
+
query_template: str,
|
|
33
|
+
plugin_config: BigQueryConfig,
|
|
34
|
+
inputs: Optional[Dict[str, Type]] = None,
|
|
35
|
+
output_dataframe_type: Optional[Type[DataFrame]] = None,
|
|
36
|
+
google_application_credentials: Optional[str] = None,
|
|
37
|
+
**kwargs,
|
|
38
|
+
):
|
|
39
|
+
"""
|
|
40
|
+
To be used to query BigQuery Tables.
|
|
41
|
+
|
|
42
|
+
:param name: The Name of this task, should be unique in the project
|
|
43
|
+
:param query_template: The actual query to run. We use Flyte's Golang templating format for Query templating.
|
|
44
|
+
Refer to the templating documentation
|
|
45
|
+
:param plugin_config: BigQueryConfig object
|
|
46
|
+
:param inputs: Name and type of inputs specified as an ordered dictionary
|
|
47
|
+
:param output_dataframe_type: If some data is produced by this query, then you can specify the
|
|
48
|
+
output dataframe type.
|
|
49
|
+
:param google_application_credentials: The name of the secret containing the Google Application Credentials.
|
|
50
|
+
"""
|
|
51
|
+
outputs = None
|
|
52
|
+
if output_dataframe_type is not None:
|
|
53
|
+
outputs = {
|
|
54
|
+
"results": output_dataframe_type,
|
|
55
|
+
}
|
|
56
|
+
super().__init__(
|
|
57
|
+
name=name,
|
|
58
|
+
interface=NativeInterface({k: (v, None) for k, v in inputs.items()} if inputs else {}, outputs or {}),
|
|
59
|
+
task_type=self._TASK_TYPE,
|
|
60
|
+
**kwargs,
|
|
61
|
+
)
|
|
62
|
+
self.output_dataframe_type = output_dataframe_type
|
|
63
|
+
self.plugin_config = plugin_config
|
|
64
|
+
self.query_template = re.sub(r"\s+", " ", query_template.replace("\n", " ").replace("\t", " ")).strip()
|
|
65
|
+
self.google_application_credentials = google_application_credentials
|
|
66
|
+
|
|
67
|
+
def custom_config(self, sctx: SerializationContext) -> Optional[Dict[str, Any]]:
|
|
68
|
+
config = {
|
|
69
|
+
"Location": self.plugin_config.Location,
|
|
70
|
+
"ProjectID": self.plugin_config.ProjectID,
|
|
71
|
+
"Domain": sctx.domain,
|
|
72
|
+
}
|
|
73
|
+
if self.plugin_config.QueryJobConfig is not None:
|
|
74
|
+
config.update(self.plugin_config.QueryJobConfig.to_api_repr()["query"])
|
|
75
|
+
if self.google_application_credentials is not None:
|
|
76
|
+
config["secrets"] = {"google_application_credentials:": self.google_application_credentials}
|
|
77
|
+
s = Struct()
|
|
78
|
+
s.update(config)
|
|
79
|
+
return json_format.MessageToDict(s)
|
|
80
|
+
|
|
81
|
+
def sql(self, sctx: SerializationContext) -> Optional[str]:
|
|
82
|
+
sql = tasks_pb2.Sql(statement=self.query_template, dialect=tasks_pb2.Sql.Dialect.ANSI)
|
|
83
|
+
return sql
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flyteplugins-connectors
|
|
3
|
+
Version: 2.0.0b29
|
|
4
|
+
Summary: Connector plugin for flyte
|
|
5
|
+
Author-email: Kevin Su <pingsutw@users.noreply.github.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: flyte
|
|
9
|
+
Requires-Dist: flyteidl
|
|
10
|
+
Provides-Extra: bigquery
|
|
11
|
+
Requires-Dist: google-cloud-bigquery; extra == "bigquery"
|
|
12
|
+
Requires-Dist: google-cloud-bigquery-storage; extra == "bigquery"
|
|
13
|
+
|
|
14
|
+
<div align="center">
|
|
15
|
+
|
|
16
|
+
# ๐ Flyte Connectors Plugin
|
|
17
|
+
|
|
18
|
+
[](https://badge.fury.io/py/flyteplugins-connectors)
|
|
19
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
20
|
+
[](https://github.com/flyteorg/flyte/actions)
|
|
21
|
+
[](https://docs.flyte.org)
|
|
22
|
+
|
|
23
|
+
**๐ Seamlessly connect Flyte workflows to external data sources and services**
|
|
24
|
+
|
|
25
|
+
*Build powerful data pipelines with native integrations to popular cloud services*
|
|
26
|
+
|
|
27
|
+
</div>
|
|
28
|
+
|
|
29
|
+
## ๐ Quick Start
|
|
30
|
+
|
|
31
|
+
### Installation
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install --pre flyteplugins-connectors[bigquery] # Install BigQuery connector
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
### BigQuery Integration
|
|
41
|
+
|
|
42
|
+
Execute SQL queries on BigQuery and seamlessly integrate results into your Flyte workflows:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from flyteplugins.connectors.bigquery.task import BigQueryConfig, BigQueryTask
|
|
46
|
+
import flyte
|
|
47
|
+
from flyte.io import DataFrame
|
|
48
|
+
|
|
49
|
+
# Configure your BigQuery connection
|
|
50
|
+
config = BigQueryConfig(
|
|
51
|
+
ProjectID="your-gcp-project",
|
|
52
|
+
Location="US" # Optional: specify region
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Create a task environment
|
|
56
|
+
env = flyte.TaskEnvironment(name="analytics_env")
|
|
57
|
+
|
|
58
|
+
# Define your BigQuery task
|
|
59
|
+
analytics_task = BigQueryTask(
|
|
60
|
+
name="user_analytics",
|
|
61
|
+
inputs={
|
|
62
|
+
"user_id": int,
|
|
63
|
+
"start_date": str,
|
|
64
|
+
"end_date": str
|
|
65
|
+
},
|
|
66
|
+
output_dataframe_type=DataFrame,
|
|
67
|
+
plugin_config=config,
|
|
68
|
+
query_template="""
|
|
69
|
+
SELECT
|
|
70
|
+
user_id,
|
|
71
|
+
COUNT(*) as event_count,
|
|
72
|
+
MAX(timestamp) as last_activity
|
|
73
|
+
FROM events
|
|
74
|
+
WHERE user_id = {{ .user_id }}
|
|
75
|
+
AND DATE(timestamp) BETWEEN '{{ .start_date }}' AND '{{ .end_date }}'
|
|
76
|
+
GROUP BY user_id
|
|
77
|
+
"""
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
env.from_task(analytics_task)
|
|
81
|
+
|
|
82
|
+
# Run your workflow
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
flyte.init_from_config()
|
|
85
|
+
result = flyte.with_runcontext(mode="remote").run(
|
|
86
|
+
analytics_task,
|
|
87
|
+
user_id=12345,
|
|
88
|
+
start_date="2024-01-01",
|
|
89
|
+
end_date="2024-01-31"
|
|
90
|
+
)
|
|
91
|
+
print(f"Workflow URL: {result.url}")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## ๐ Available Connectors
|
|
95
|
+
|
|
96
|
+
| Connector | Status | Description | Use Cases |
|
|
97
|
+
|-----------|--------|-------------|-----------|
|
|
98
|
+
| ๐ท **BigQuery** | โ
Stable | Google Cloud data warehouse | Analytics, ML training, reporting |
|
|
99
|
+
| ๐ **More Coming Soon** | ๐ง | Additional connectors in development | - |
|
|
100
|
+
|
|
101
|
+
## ๐งช Testing
|
|
102
|
+
|
|
103
|
+
Run the test suite to ensure everything works correctly:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Run all connector tests
|
|
107
|
+
pytest plugins/connectors/tests/ -v
|
|
108
|
+
|
|
109
|
+
# Run specific connector tests
|
|
110
|
+
pytest plugins/connectors/tests/test_bigquery.py -v
|
|
111
|
+
|
|
112
|
+
# Run with coverage
|
|
113
|
+
pytest plugins/connectors/tests/ --cov=flyteplugins.connectors --cov-report=html
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## ๐ค Contributing
|
|
117
|
+
|
|
118
|
+
We welcome contributions! Here's how to get started:
|
|
119
|
+
|
|
120
|
+
1. **Fork the repository**
|
|
121
|
+
2. **Create a feature branch**: `git checkout -b feature/amazing-connector`
|
|
122
|
+
3. **Write tests** for your changes
|
|
123
|
+
4. **Ensure tests pass**: `pytest plugins/connectors/tests/`
|
|
124
|
+
5. **Submit a pull request**
|
|
125
|
+
|
|
126
|
+
### Adding a New Connector
|
|
127
|
+
|
|
128
|
+
1. Create your connector module in `src/flyteplugins/connectors/`
|
|
129
|
+
2. Implement the `TaskTemplate` interface
|
|
130
|
+
3. Add comprehensive tests in `tests/`
|
|
131
|
+
4. Update this README with examples
|
|
132
|
+
5. Add example usage in `examples/connectors/`
|
|
133
|
+
|
|
134
|
+
## ๐ง Requirements
|
|
135
|
+
|
|
136
|
+
- **Python**: 3.10+
|
|
137
|
+
- **Flyte**: Latest version
|
|
138
|
+
- **Dependencies**: See `pyproject.toml` for full requirements
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
## ๐ Support
|
|
142
|
+
|
|
143
|
+
- **๐ฌ Community**: [Flyte Slack](https://slack.flyte.org/)
|
|
144
|
+
- **๐ Issues**: [GitHub Issues](https://github.com/flyteorg/flyte-sdk/issues)
|
|
145
|
+
- **๐ฌ Discussions**: [GitHub Discussions](https://github.com/flyteorg/flyte/discussions)
|
|
146
|
+
- **๐ Documentation**: [docs.flyte.org](https://docs.flyte.org)
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
<div align="center">
|
|
151
|
+
|
|
152
|
+
**Made with โค๏ธ by the Flyte Community**
|
|
153
|
+
|
|
154
|
+
[โญ Star us on GitHub](https://github.com/flyteorg/flyte) โข [๐ฆ Follow us on Twitter](https://twitter.com/flyteorg) โข [๐ผ LinkedIn](https://linkedin.com/company/flyte-org)
|
|
155
|
+
|
|
156
|
+
</div>
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
flyteplugins/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
flyteplugins/connectors/bigquery/__init__.py,sha256=TzgxgF_qb5RL4Ajb36mJAP6oRoMWOmJdUL24LpxFjRI,219
|
|
3
|
+
flyteplugins/connectors/bigquery/connector.py,sha256=KkBb5IInWsu6RdFRs-eKfK9OW5P3tl2LbFpnIWd3qGo,5175
|
|
4
|
+
flyteplugins/connectors/bigquery/task.py,sha256=XCiZElNlRa0PjI5hVNxVAL8tRwsEXY6EhDo8tWUZU8Q,3330
|
|
5
|
+
flyteplugins_connectors-2.0.0b29.dist-info/METADATA,sha256=Z27UVvTDk338Fa8xlz1U9NOsKCWFHp9as24pW0HPJdk,4533
|
|
6
|
+
flyteplugins_connectors-2.0.0b29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
flyteplugins_connectors-2.0.0b29.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
|
|
8
|
+
flyteplugins_connectors-2.0.0b29.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flyteplugins
|