flyteplugins-connectors 2.0.0b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyteplugins-connectors might be problematic. Click here for more details.

File without changes
@@ -0,0 +1,4 @@
1
+ from flyteplugins.connectors.bigquery.connector import BigQueryConnector
2
+ from flyteplugins.connectors.bigquery.task import BigQueryConfig, BigQueryTask
3
+
4
+ __all__ = ["BigQueryConfig", "BigQueryConnector", "BigQueryTask"]
@@ -0,0 +1,140 @@
1
+ import datetime
2
+ from dataclasses import dataclass
3
+ from functools import lru_cache
4
+ from typing import Any, Dict, Optional
5
+
6
+ from flyte import logger
7
+ from flyte.connectors import (
8
+ AsyncConnector,
9
+ ConnectorRegistry,
10
+ Resource,
11
+ ResourceMeta,
12
+ )
13
+ from flyte.connectors.utils import convert_to_flyte_phase
14
+ from flyte.io import DataFrame
15
+ from flyte.types import TypeEngine
16
+ from flyteidl2.core.execution_pb2 import TaskExecution, TaskLog
17
+ from flyteidl2.core.tasks_pb2 import TaskTemplate
18
+ from google.api_core.client_info import ClientInfo
19
+ from google.cloud import bigquery
20
+ from google.oauth2 import service_account
21
+ from google.protobuf import json_format
22
+
23
+ pythonTypeToBigQueryType: Dict[type, str] = {
24
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes
25
+ list: "ARRAY",
26
+ bool: "BOOL",
27
+ bytes: "BYTES",
28
+ datetime.datetime: "DATETIME",
29
+ float: "FLOAT64",
30
+ int: "INT64",
31
+ str: "STRING",
32
+ }
33
+
34
+
35
+ @dataclass
36
+ class BigQueryMetadata(ResourceMeta):
37
+ job_id: str
38
+ project: str
39
+ location: str
40
+ user_agent: str
41
+
42
+
43
+ @lru_cache
44
+ async def _get_bigquery_client(
45
+ project: str, location: str, user_agent: str, google_application_credentials: str
46
+ ) -> bigquery.Client:
47
+ if google_application_credentials is not None:
48
+ credentials = service_account.Credentials.from_service_account_info(google_application_credentials)
49
+ else:
50
+ credentials = None
51
+ cinfo = ClientInfo(user_agent=user_agent)
52
+ return bigquery.Client(project=project, location=location, client_info=cinfo, credentials=credentials)
53
+
54
+
55
+ class BigQueryConnector(AsyncConnector):
56
+ name = "Bigquery Connector"
57
+ task_type_name = "bigquery_query_job_task"
58
+ metadata_type = BigQueryMetadata
59
+
60
+ async def create(
61
+ self,
62
+ task_template: TaskTemplate,
63
+ inputs: Optional[Dict[str, Any]] = None,
64
+ **kwargs,
65
+ ) -> BigQueryMetadata:
66
+ job_config = None
67
+ if inputs:
68
+ python_interface_inputs = {
69
+ name: TypeEngine.guess_python_type(lt.type)
70
+ for name, lt in task_template.interface.inputs.variables.items()
71
+ }
72
+ job_config = bigquery.QueryJobConfig(
73
+ query_parameters=[
74
+ bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val)
75
+ for name, val in inputs.items()
76
+ ]
77
+ )
78
+
79
+ custom = json_format.MessageToDict(task_template.custom) if task_template.custom else None
80
+
81
+ domain = custom.get("Domain")
82
+ sdk_version = task_template.metadata.runtime.version
83
+
84
+ user_agent = f"Flyte/{sdk_version} (GPN:Union;{domain or ''})"
85
+ project = custom["ProjectID"]
86
+ location = custom["Location"]
87
+
88
+ client = _get_bigquery_client(
89
+ project=project,
90
+ location=location,
91
+ user_agent=user_agent,
92
+ google_application_credentials=custom.get("google_application_credentials"),
93
+ )
94
+ query_job = client.query(task_template.sql.statement, job_config=job_config)
95
+
96
+ return BigQueryMetadata(job_id=str(query_job.job_id), location=location, project=project, user_agent=user_agent)
97
+
98
+ async def get(
99
+ self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
100
+ ) -> Resource:
101
+ client = _get_bigquery_client(
102
+ project=resource_meta.project,
103
+ location=resource_meta.location,
104
+ user_agent=resource_meta.user_agent,
105
+ google_application_credentials=google_application_credentials,
106
+ )
107
+ log_link = TaskLog(
108
+ uri=f"https://console.cloud.google.com/bigquery?project={resource_meta.project}&j=bq:{resource_meta.location}:{resource_meta.job_id}&page=queryresults",
109
+ name="BigQuery Console",
110
+ )
111
+
112
+ job = client.get_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
113
+ if job.errors:
114
+ logger.error("failed to run BigQuery job with error:", job.errors.__str__())
115
+ return Resource(phase=TaskExecution.FAILED, message=job.errors.__str__(), log_links=[log_link])
116
+
117
+ cur_phase = convert_to_flyte_phase(str(job.state))
118
+ res = None
119
+
120
+ if cur_phase == TaskExecution.SUCCEEDED:
121
+ dst = job.destination
122
+ if dst:
123
+ output_location = f"bq://{dst.project}:{dst.dataset_id}.{dst.table_id}"
124
+ res = {"results": DataFrame(uri=output_location)}
125
+
126
+ return Resource(phase=cur_phase, message=str(job.state), log_links=[log_link], outputs=res)
127
+
128
+ async def delete(
129
+ self, resource_meta: BigQueryMetadata, google_application_credentials: Optional[str] = None, **kwargs
130
+ ):
131
+ client = _get_bigquery_client(
132
+ project=resource_meta.project,
133
+ location=resource_meta.location,
134
+ user_agent=resource_meta.user_agent,
135
+ google_application_credentials=google_application_credentials,
136
+ )
137
+ client.cancel_job(resource_meta.job_id, resource_meta.project, resource_meta.location)
138
+
139
+
140
+ ConnectorRegistry.register(BigQueryConnector())
@@ -0,0 +1,83 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from typing import Any, Dict, Optional, Type
4
+
5
+ from flyte._task import TaskTemplate
6
+ from flyte.connectors._connector import AsyncConnectorExecutorMixin
7
+ from flyte.io import DataFrame
8
+ from flyte.models import NativeInterface, SerializationContext
9
+ from flyteidl2.core import tasks_pb2
10
+ from google.cloud import bigquery
11
+ from google.protobuf import json_format
12
+ from google.protobuf.struct_pb2 import Struct
13
+
14
+
15
+ @dataclass
16
+ class BigQueryConfig(object):
17
+ """
18
+ BigQueryConfig should be used to configure a BigQuery Task.
19
+ """
20
+
21
+ ProjectID: str
22
+ Location: Optional[str] = None
23
+ QueryJobConfig: Optional[bigquery.QueryJobConfig] = None
24
+
25
+
26
+ class BigQueryTask(AsyncConnectorExecutorMixin, TaskTemplate):
27
+ _TASK_TYPE = "bigquery_query_job_task"
28
+
29
+ def __init__(
30
+ self,
31
+ name: str,
32
+ query_template: str,
33
+ plugin_config: BigQueryConfig,
34
+ inputs: Optional[Dict[str, Type]] = None,
35
+ output_dataframe_type: Optional[Type[DataFrame]] = None,
36
+ google_application_credentials: Optional[str] = None,
37
+ **kwargs,
38
+ ):
39
+ """
40
+ To be used to query BigQuery Tables.
41
+
42
+ :param name: The Name of this task, should be unique in the project
43
+ :param query_template: The actual query to run. We use Flyte's Golang templating format for Query templating.
44
+ Refer to the templating documentation
45
+ :param plugin_config: BigQueryConfig object
46
+ :param inputs: Name and type of inputs specified as an ordered dictionary
47
+ :param output_dataframe_type: If some data is produced by this query, then you can specify the
48
+ output dataframe type.
49
+ :param google_application_credentials: The name of the secret containing the Google Application Credentials.
50
+ """
51
+ outputs = None
52
+ if output_dataframe_type is not None:
53
+ outputs = {
54
+ "results": output_dataframe_type,
55
+ }
56
+ super().__init__(
57
+ name=name,
58
+ interface=NativeInterface({k: (v, None) for k, v in inputs.items()} if inputs else {}, outputs or {}),
59
+ task_type=self._TASK_TYPE,
60
+ **kwargs,
61
+ )
62
+ self.output_dataframe_type = output_dataframe_type
63
+ self.plugin_config = plugin_config
64
+ self.query_template = re.sub(r"\s+", " ", query_template.replace("\n", " ").replace("\t", " ")).strip()
65
+ self.google_application_credentials = google_application_credentials
66
+
67
+ def custom_config(self, sctx: SerializationContext) -> Optional[Dict[str, Any]]:
68
+ config = {
69
+ "Location": self.plugin_config.Location,
70
+ "ProjectID": self.plugin_config.ProjectID,
71
+ "Domain": sctx.domain,
72
+ }
73
+ if self.plugin_config.QueryJobConfig is not None:
74
+ config.update(self.plugin_config.QueryJobConfig.to_api_repr()["query"])
75
+ if self.google_application_credentials is not None:
76
+ config["secrets"] = {"google_application_credentials:": self.google_application_credentials}
77
+ s = Struct()
78
+ s.update(config)
79
+ return json_format.MessageToDict(s)
80
+
81
+ def sql(self, sctx: SerializationContext) -> Optional[str]:
82
+ sql = tasks_pb2.Sql(statement=self.query_template, dialect=tasks_pb2.Sql.Dialect.ANSI)
83
+ return sql
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: flyteplugins-connectors
3
+ Version: 2.0.0b26
4
+ Summary: Connector plugin for flyte
5
+ Author-email: Kevin Su <pingsutw@users.noreply.github.com>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: flyte
9
+ Requires-Dist: flyteidl
10
+ Provides-Extra: bigquery
11
+ Requires-Dist: google-cloud-bigquery; extra == "bigquery"
12
+ Requires-Dist: google-cloud-bigquery-storage; extra == "bigquery"
13
+
14
+ <div align="center">
15
+
16
+ # ๐Ÿ”— Flyte Connectors Plugin
17
+
18
+ [![PyPI version](https://badge.fury.io/py/flyteplugins-connectors.svg)](https://badge.fury.io/py/flyteplugins-connectors)
19
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
20
+ [![Tests](https://github.com/flyteorg/flyte/workflows/tests/badge.svg)](https://github.com/flyteorg/flyte/actions)
21
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://docs.flyte.org)
22
+
23
+ **๐Ÿš€ Seamlessly connect Flyte workflows to external data sources and services**
24
+
25
+ *Build powerful data pipelines with native integrations to popular cloud services*
26
+
27
+ </div>
28
+
29
+ ## ๐Ÿš€ Quick Start
30
+
31
+ ### Installation
32
+
33
+
34
+ ```bash
35
+ pip install --pre flyteplugins-connectors[bigquery] # Install BigQuery connector
36
+ ```
37
+
38
+
39
+
40
+ ### BigQuery Integration
41
+
42
+ Execute SQL queries on BigQuery and seamlessly integrate results into your Flyte workflows:
43
+
44
+ ```python
45
+ from flyteplugins.connectors.bigquery.task import BigQueryConfig, BigQueryTask
46
+ import flyte
47
+ from flyte.io import DataFrame
48
+
49
+ # Configure your BigQuery connection
50
+ config = BigQueryConfig(
51
+ ProjectID="your-gcp-project",
52
+ Location="US" # Optional: specify region
53
+ )
54
+
55
+ # Create a task environment
56
+ env = flyte.TaskEnvironment(name="analytics_env")
57
+
58
+ # Define your BigQuery task
59
+ analytics_task = BigQueryTask(
60
+ name="user_analytics",
61
+ inputs={
62
+ "user_id": int,
63
+ "start_date": str,
64
+ "end_date": str
65
+ },
66
+ output_dataframe_type=DataFrame,
67
+ plugin_config=config,
68
+ query_template="""
69
+ SELECT
70
+ user_id,
71
+ COUNT(*) as event_count,
72
+ MAX(timestamp) as last_activity
73
+ FROM events
74
+ WHERE user_id = {{ .user_id }}
75
+ AND DATE(timestamp) BETWEEN '{{ .start_date }}' AND '{{ .end_date }}'
76
+ GROUP BY user_id
77
+ """
78
+ )
79
+
80
+ env.from_task(analytics_task)
81
+
82
+ # Run your workflow
83
+ if __name__ == "__main__":
84
+ flyte.init_from_config()
85
+ result = flyte.with_runcontext(mode="remote").run(
86
+ analytics_task,
87
+ user_id=12345,
88
+ start_date="2024-01-01",
89
+ end_date="2024-01-31"
90
+ )
91
+ print(f"Workflow URL: {result.url}")
92
+ ```
93
+
94
+ ## ๐Ÿ“š Available Connectors
95
+
96
+ | Connector | Status | Description | Use Cases |
97
+ |-----------|--------|-------------|-----------|
98
+ | ๐Ÿ”ท **BigQuery** | โœ… Stable | Google Cloud data warehouse | Analytics, ML training, reporting |
99
+ | ๐Ÿ”— **More Coming Soon** | ๐Ÿšง | Additional connectors in development | - |
100
+
101
+ ## ๐Ÿงช Testing
102
+
103
+ Run the test suite to ensure everything works correctly:
104
+
105
+ ```bash
106
+ # Run all connector tests
107
+ pytest plugins/connectors/tests/ -v
108
+
109
+ # Run specific connector tests
110
+ pytest plugins/connectors/tests/test_bigquery.py -v
111
+
112
+ # Run with coverage
113
+ pytest plugins/connectors/tests/ --cov=flyteplugins.connectors --cov-report=html
114
+ ```
115
+
116
+ ## ๐Ÿค Contributing
117
+
118
+ We welcome contributions! Here's how to get started:
119
+
120
+ 1. **Fork the repository**
121
+ 2. **Create a feature branch**: `git checkout -b feature/amazing-connector`
122
+ 3. **Write tests** for your changes
123
+ 4. **Ensure tests pass**: `pytest plugins/connectors/tests/`
124
+ 5. **Submit a pull request**
125
+
126
+ ### Adding a New Connector
127
+
128
+ 1. Create your connector module in `src/flyteplugins/connectors/`
129
+ 2. Implement the `TaskTemplate` interface
130
+ 3. Add comprehensive tests in `tests/`
131
+ 4. Update this README with examples
132
+ 5. Add example usage in `examples/connectors/`
133
+
134
+ ## ๐Ÿ”ง Requirements
135
+
136
+ - **Python**: 3.10+
137
+ - **Flyte**: Latest version
138
+ - **Dependencies**: See `pyproject.toml` for full requirements
139
+
140
+
141
+ ## ๐Ÿ†˜ Support
142
+
143
+ - **๐Ÿ“ฌ Community**: [Flyte Slack](https://slack.flyte.org/)
144
+ - **๐Ÿ› Issues**: [GitHub Issues](https://github.com/flyteorg/flyte-sdk/issues)
145
+ - **๐Ÿ’ฌ Discussions**: [GitHub Discussions](https://github.com/flyteorg/flyte/discussions)
146
+ - **๐Ÿ“š Documentation**: [docs.flyte.org](https://docs.flyte.org)
147
+
148
+ ---
149
+
150
+ <div align="center">
151
+
152
+ **Made with โค๏ธ by the Flyte Community**
153
+
154
+ [โญ Star us on GitHub](https://github.com/flyteorg/flyte) โ€ข [๐Ÿฆ Follow us on Twitter](https://twitter.com/flyteorg) โ€ข [๐Ÿ’ผ LinkedIn](https://linkedin.com/company/flyte-org)
155
+
156
+ </div>
@@ -0,0 +1,8 @@
1
+ flyteplugins/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ flyteplugins/connectors/bigquery/__init__.py,sha256=TzgxgF_qb5RL4Ajb36mJAP6oRoMWOmJdUL24LpxFjRI,219
3
+ flyteplugins/connectors/bigquery/connector.py,sha256=KkBb5IInWsu6RdFRs-eKfK9OW5P3tl2LbFpnIWd3qGo,5175
4
+ flyteplugins/connectors/bigquery/task.py,sha256=XCiZElNlRa0PjI5hVNxVAL8tRwsEXY6EhDo8tWUZU8Q,3330
5
+ flyteplugins_connectors-2.0.0b26.dist-info/METADATA,sha256=jK9ArqWSPbowUGyLGtZ5IC_f8ue7c8Hwk3c0KR9-aUM,4533
6
+ flyteplugins_connectors-2.0.0b26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ flyteplugins_connectors-2.0.0b26.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
8
+ flyteplugins_connectors-2.0.0b26.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ flyteplugins