garf-executors 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of garf-executors might be problematic. Click here for more details.
- garf_executors/__init__.py +24 -9
- garf_executors/api_executor.py +29 -40
- garf_executors/bq_executor.py +51 -18
- garf_executors/entrypoints/cli.py +4 -22
- garf_executors/entrypoints/server.py +44 -15
- garf_executors/entrypoints/utils.py +4 -1
- garf_executors/execution_context.py +60 -0
- garf_executors/executor.py +54 -0
- garf_executors/fetchers.py +34 -16
- garf_executors/sql_executor.py +54 -22
- {garf_executors-0.0.6.dist-info → garf_executors-0.0.8.dist-info}/METADATA +1 -1
- garf_executors-0.0.8.dist-info/RECORD +17 -0
- garf_executors-0.0.6.dist-info/RECORD +0 -15
- {garf_executors-0.0.6.dist-info → garf_executors-0.0.8.dist-info}/WHEEL +0 -0
- {garf_executors-0.0.6.dist-info → garf_executors-0.0.8.dist-info}/entry_points.txt +0 -0
- {garf_executors-0.0.6.dist-info → garf_executors-0.0.8.dist-info}/top_level.txt +0 -0
garf_executors/__init__.py
CHANGED
|
@@ -11,20 +11,35 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
Instead of importing `garf_executors.api_executor.ApiQueryExecutor`
|
|
17
|
-
import like this `garf_executors.ApiQueryExecutor`
|
|
18
|
-
"""
|
|
14
|
+
"""Executors to fetch data from various APIS and postprocess them."""
|
|
19
15
|
|
|
20
16
|
from __future__ import annotations
|
|
21
17
|
|
|
22
|
-
from garf_executors
|
|
23
|
-
from garf_executors.
|
|
18
|
+
from garf_executors import bq_executor, fetchers, sql_executor
|
|
19
|
+
from garf_executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def setup_executor(source: str, fetcher_parameters: dict[str, str]):
|
|
23
|
+
"""Initializes executors based on a source and parameters."""
|
|
24
|
+
if source == 'bq':
|
|
25
|
+
query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
|
|
26
|
+
elif source == 'sqldb':
|
|
27
|
+
query_executor = (
|
|
28
|
+
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
29
|
+
fetcher_parameters.get('connection_string')
|
|
30
|
+
)
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
34
|
+
query_executor = ApiQueryExecutor(
|
|
35
|
+
concrete_api_fetcher(**fetcher_parameters)
|
|
36
|
+
)
|
|
37
|
+
return query_executor
|
|
38
|
+
|
|
24
39
|
|
|
25
40
|
__all__ = [
|
|
26
|
-
'FETCHERS',
|
|
27
41
|
'ApiQueryExecutor',
|
|
42
|
+
'ApiExecutionContext',
|
|
28
43
|
]
|
|
29
44
|
|
|
30
|
-
__version__ = '0.0.
|
|
45
|
+
__version__ = '0.0.8'
|
garf_executors/api_executor.py
CHANGED
|
@@ -22,48 +22,19 @@ from __future__ import annotations
|
|
|
22
22
|
|
|
23
23
|
import logging
|
|
24
24
|
|
|
25
|
-
import
|
|
26
|
-
|
|
27
|
-
from garf_core import query_editor, report_fetcher
|
|
28
|
-
from garf_executors import exceptions
|
|
29
|
-
from garf_io import writer
|
|
30
|
-
from garf_io.writers import abs_writer
|
|
25
|
+
from garf_core import report_fetcher
|
|
26
|
+
from garf_executors import exceptions, execution_context, executor, fetchers
|
|
31
27
|
|
|
32
28
|
logger = logging.getLogger(__name__)
|
|
33
29
|
|
|
34
30
|
|
|
35
|
-
class ApiExecutionContext(
|
|
36
|
-
"""Common context for executing one or more queries.
|
|
37
|
-
|
|
38
|
-
Attributes:
|
|
39
|
-
query_parameters: Parameters to dynamically change query text.
|
|
40
|
-
fetcher_parameters: Parameters to specify fetching setup.
|
|
41
|
-
writer: Type of writer to use.
|
|
42
|
-
writer_parameters: Optional parameters to setup writer.
|
|
43
|
-
"""
|
|
31
|
+
class ApiExecutionContext(execution_context.ExecutionContext):
|
|
32
|
+
"""Common context for executing one or more queries."""
|
|
44
33
|
|
|
45
|
-
query_parameters: query_editor.GarfQueryParameters | None = None
|
|
46
|
-
fetcher_parameters: dict[str, str] | None = None
|
|
47
34
|
writer: str = 'console'
|
|
48
|
-
writer_parameters: dict[str, str] | None = None
|
|
49
35
|
|
|
50
|
-
def model_post_init(self, __context__) -> None:
|
|
51
|
-
if self.fetcher_parameters is None:
|
|
52
|
-
self.fetcher_parameters = {}
|
|
53
|
-
if self.writer_parameters is None:
|
|
54
|
-
self.writer_parameters = {}
|
|
55
36
|
|
|
56
|
-
|
|
57
|
-
def writer_client(self) -> abs_writer.AbsWriter:
|
|
58
|
-
writer_client = writer.create_writer(self.writer, **self.writer_parameters)
|
|
59
|
-
if self.writer == 'bq':
|
|
60
|
-
_ = writer_client.create_or_get_dataset()
|
|
61
|
-
if self.writer == 'sheet':
|
|
62
|
-
writer_client.init_client()
|
|
63
|
-
return writer_client
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class ApiQueryExecutor:
|
|
37
|
+
class ApiQueryExecutor(executor.Executor):
|
|
67
38
|
"""Gets data from API and writes them to local/remote storage.
|
|
68
39
|
|
|
69
40
|
Attributes:
|
|
@@ -78,23 +49,37 @@ class ApiQueryExecutor:
|
|
|
78
49
|
"""
|
|
79
50
|
self.fetcher = fetcher
|
|
80
51
|
|
|
52
|
+
@classmethod
|
|
53
|
+
def from_fetcher_alias(
|
|
54
|
+
cls, source: str, fetcher_parameters: dict[str, str]
|
|
55
|
+
) -> ApiQueryExecutor:
|
|
56
|
+
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
57
|
+
return ApiQueryExecutor(concrete_api_fetcher(**fetcher_parameters))
|
|
58
|
+
|
|
81
59
|
async def aexecute(
|
|
82
|
-
self,
|
|
83
|
-
|
|
84
|
-
|
|
60
|
+
self,
|
|
61
|
+
query: str,
|
|
62
|
+
title: str,
|
|
63
|
+
context: ApiExecutionContext,
|
|
64
|
+
) -> str:
|
|
65
|
+
"""Performs query execution asynchronously.
|
|
85
66
|
|
|
86
67
|
Args:
|
|
87
68
|
query: Location of the query.
|
|
69
|
+
title: Name of the query.
|
|
88
70
|
context: Query execution context.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Result of writing the report.
|
|
89
74
|
"""
|
|
90
|
-
self.execute(query, context,
|
|
75
|
+
return await self.execute(query, context, title, context)
|
|
91
76
|
|
|
92
77
|
def execute(
|
|
93
78
|
self,
|
|
94
79
|
query: str,
|
|
95
80
|
title: str,
|
|
96
81
|
context: ApiExecutionContext,
|
|
97
|
-
) ->
|
|
82
|
+
) -> str:
|
|
98
83
|
"""Reads query, extract results and stores them in a specified location.
|
|
99
84
|
|
|
100
85
|
Args:
|
|
@@ -102,6 +87,9 @@ class ApiQueryExecutor:
|
|
|
102
87
|
title: Name of the query.
|
|
103
88
|
context: Query execution context.
|
|
104
89
|
|
|
90
|
+
Returns:
|
|
91
|
+
Result of writing the report.
|
|
92
|
+
|
|
105
93
|
Raises:
|
|
106
94
|
GarfExecutorError: When failed to execute query.
|
|
107
95
|
"""
|
|
@@ -118,13 +106,14 @@ class ApiQueryExecutor:
|
|
|
118
106
|
title,
|
|
119
107
|
type(writer_client),
|
|
120
108
|
)
|
|
121
|
-
writer_client.write(results, title)
|
|
109
|
+
result = writer_client.write(results, title)
|
|
122
110
|
logger.debug(
|
|
123
111
|
'Finish writing data for query %s via %s writer',
|
|
124
112
|
title,
|
|
125
113
|
type(writer_client),
|
|
126
114
|
)
|
|
127
115
|
logger.info('%s executed successfully', title)
|
|
116
|
+
return result
|
|
128
117
|
except Exception as e:
|
|
129
118
|
logger.error('%s generated an exception: %s', title, str(e))
|
|
130
119
|
raise exceptions.GarfExecutorError(
|
garf_executors/bq_executor.py
CHANGED
|
@@ -11,10 +11,12 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""
|
|
14
|
+
"""Executes queries in BigQuery."""
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
+
import os
|
|
19
|
+
|
|
18
20
|
try:
|
|
19
21
|
from google.cloud import bigquery # type: ignore
|
|
20
22
|
except ImportError as e:
|
|
@@ -25,19 +27,19 @@ except ImportError as e:
|
|
|
25
27
|
|
|
26
28
|
import logging
|
|
27
29
|
|
|
28
|
-
import pandas as pd
|
|
29
30
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
30
31
|
|
|
31
|
-
from garf_core import query_editor
|
|
32
|
+
from garf_core import query_editor, report
|
|
33
|
+
from garf_executors import exceptions, execution_context, executor
|
|
32
34
|
|
|
33
35
|
logger = logging.getLogger(__name__)
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
class BigQueryExecutorError(
|
|
37
|
-
"""Error when
|
|
38
|
+
class BigQueryExecutorError(exceptions.GarfExecutorError):
|
|
39
|
+
"""Error when BigQueryExecutor fails to run query."""
|
|
38
40
|
|
|
39
41
|
|
|
40
|
-
class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
42
|
+
class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
41
43
|
"""Handles query execution in BigQuery.
|
|
42
44
|
|
|
43
45
|
Attributes:
|
|
@@ -46,13 +48,22 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
46
48
|
client: BigQuery client.
|
|
47
49
|
"""
|
|
48
50
|
|
|
49
|
-
def __init__(
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
|
|
54
|
+
location: str | None = None,
|
|
55
|
+
) -> None:
|
|
50
56
|
"""Initializes BigQueryExecutor.
|
|
51
57
|
|
|
52
58
|
Args:
|
|
53
59
|
project_id: Google Cloud project id.
|
|
54
60
|
location: BigQuery dataset location.
|
|
55
61
|
"""
|
|
62
|
+
if not project_id:
|
|
63
|
+
raise BigQueryExecutorError(
|
|
64
|
+
'project_id is required. Either provide it as project_id parameter '
|
|
65
|
+
'or GOOGLE_CLOUD_PROJECT env variable.'
|
|
66
|
+
)
|
|
56
67
|
self.project_id = project_id
|
|
57
68
|
self.location = location
|
|
58
69
|
|
|
@@ -62,26 +73,48 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
62
73
|
return bigquery.Client(self.project_id)
|
|
63
74
|
|
|
64
75
|
def execute(
|
|
65
|
-
self,
|
|
66
|
-
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
title: str,
|
|
79
|
+
context: execution_context.ExecutionContext = (
|
|
80
|
+
execution_context.ExecutionContext()
|
|
81
|
+
),
|
|
82
|
+
) -> report.GarfReport:
|
|
67
83
|
"""Executes query in BigQuery.
|
|
68
84
|
|
|
69
85
|
Args:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
86
|
+
query: Location of the query.
|
|
87
|
+
title: Name of the query.
|
|
88
|
+
context: Query execution context.
|
|
73
89
|
|
|
74
90
|
Returns:
|
|
75
|
-
|
|
91
|
+
Report with data if query returns some data otherwise empty Report.
|
|
76
92
|
"""
|
|
77
|
-
query_text = self.replace_params_template(
|
|
93
|
+
query_text = self.replace_params_template(query, context.query_parameters)
|
|
78
94
|
job = self.client.query(query_text)
|
|
79
95
|
try:
|
|
80
96
|
result = job.result()
|
|
81
|
-
logger.debug('%s launched successfully',
|
|
97
|
+
logger.debug('%s launched successfully', title)
|
|
82
98
|
if result.total_rows:
|
|
83
|
-
|
|
84
|
-
|
|
99
|
+
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
100
|
+
else:
|
|
101
|
+
results = report.GarfReport()
|
|
102
|
+
if context.writer and results:
|
|
103
|
+
writer_client = context.writer_client
|
|
104
|
+
logger.debug(
|
|
105
|
+
'Start writing data for query %s via %s writer',
|
|
106
|
+
title,
|
|
107
|
+
type(writer_client),
|
|
108
|
+
)
|
|
109
|
+
writing_result = writer_client.write(results, title)
|
|
110
|
+
logger.debug(
|
|
111
|
+
'Finish writing data for query %s via %s writer',
|
|
112
|
+
title,
|
|
113
|
+
type(writer_client),
|
|
114
|
+
)
|
|
115
|
+
logger.info('%s executed successfully', title)
|
|
116
|
+
return writing_result
|
|
117
|
+
return results
|
|
85
118
|
except google_cloud_exceptions.GoogleCloudError as e:
|
|
86
119
|
raise BigQueryExecutorError(e) from e
|
|
87
120
|
|
|
@@ -92,7 +125,7 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
92
125
|
are treated as dataset names.
|
|
93
126
|
|
|
94
127
|
Args:
|
|
95
|
-
|
|
128
|
+
macros: Mapping containing data for query execution.
|
|
96
129
|
"""
|
|
97
130
|
if macros and (datasets := extract_datasets(macros)):
|
|
98
131
|
for dataset in datasets:
|
|
@@ -21,7 +21,6 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import argparse
|
|
23
23
|
import sys
|
|
24
|
-
from concurrent import futures
|
|
25
24
|
|
|
26
25
|
import garf_executors
|
|
27
26
|
from garf_executors import exceptions
|
|
@@ -56,13 +55,6 @@ def main():
|
|
|
56
55
|
if args.version:
|
|
57
56
|
print(garf_executors.__version__)
|
|
58
57
|
sys.exit()
|
|
59
|
-
if not (source := args.source):
|
|
60
|
-
raise exceptions.GarfExecutorError(
|
|
61
|
-
f'Select one of available sources: {list(garf_executors.FETCHERS.keys())}'
|
|
62
|
-
)
|
|
63
|
-
if not (concrete_api_fetcher := garf_executors.FETCHERS.get(source)):
|
|
64
|
-
raise exceptions.GarfExecutorError(f'Source {source} is not available.')
|
|
65
|
-
|
|
66
58
|
logger = utils.init_logging(
|
|
67
59
|
loglevel=args.loglevel.upper(), logger_type=args.logger
|
|
68
60
|
)
|
|
@@ -88,23 +80,13 @@ def main():
|
|
|
88
80
|
writer_parameters=config.writer_params,
|
|
89
81
|
fetcher_parameters=source_parameters,
|
|
90
82
|
)
|
|
91
|
-
query_executor = garf_executors.
|
|
92
|
-
|
|
83
|
+
query_executor = garf_executors.setup_executor(
|
|
84
|
+
args.source, context.fetcher_parameters
|
|
93
85
|
)
|
|
94
86
|
if args.parallel_queries:
|
|
95
87
|
logger.info('Running queries in parallel')
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
executor.submit(
|
|
99
|
-
query_executor.execute,
|
|
100
|
-
reader_client.read(query),
|
|
101
|
-
query,
|
|
102
|
-
context,
|
|
103
|
-
): query
|
|
104
|
-
for query in args.query
|
|
105
|
-
}
|
|
106
|
-
for future in futures.as_completed(future_to_query):
|
|
107
|
-
future.result()
|
|
88
|
+
batch = {query: reader_client.read(query) for query in args.query}
|
|
89
|
+
query_executor.execute_batch(batch, context, args.parallel_queries)
|
|
108
90
|
else:
|
|
109
91
|
logger.info('Running queries sequentially')
|
|
110
92
|
for query in args.query:
|
|
@@ -20,6 +20,7 @@ import uvicorn
|
|
|
20
20
|
|
|
21
21
|
import garf_executors
|
|
22
22
|
from garf_executors import exceptions
|
|
23
|
+
from garf_io import reader
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class ApiExecutorRequest(pydantic.BaseModel):
|
|
@@ -27,36 +28,64 @@ class ApiExecutorRequest(pydantic.BaseModel):
|
|
|
27
28
|
|
|
28
29
|
Attributes:
|
|
29
30
|
source: Type of API to interact with.
|
|
30
|
-
query: Query to execute.
|
|
31
31
|
title: Name of the query used as an output for writing.
|
|
32
|
+
query: Query to execute.
|
|
33
|
+
query_path: Local or remote path to query.
|
|
32
34
|
context: Execution context.
|
|
33
35
|
"""
|
|
34
36
|
|
|
35
37
|
source: str
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
title: str | None = None
|
|
39
|
+
query: str | None = None
|
|
40
|
+
query_path: str | list[str] | None = None
|
|
41
|
+
context: garf_executors.ApiExecutionContext
|
|
42
|
+
|
|
43
|
+
@pydantic.model_validator(mode='after')
|
|
44
|
+
def check_query_specified(self):
|
|
45
|
+
if not self.query_path and not self.query:
|
|
46
|
+
raise exceptions.GarfExecutorError(
|
|
47
|
+
'Missing one of required parameters: query, query_path'
|
|
48
|
+
)
|
|
49
|
+
return self
|
|
50
|
+
|
|
51
|
+
def model_post_init(self, __context__) -> None:
|
|
52
|
+
if self.query_path and isinstance(self.query_path, str):
|
|
53
|
+
self.query = reader.FileReader().read(self.query_path)
|
|
54
|
+
if not self.title:
|
|
55
|
+
self.title = str(self.query_path)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ApiExecutorResponse(pydantic.BaseModel):
|
|
59
|
+
"""Response after executing a query.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
results: Results of query execution.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
results: list[str]
|
|
39
66
|
|
|
40
67
|
|
|
41
68
|
router = fastapi.APIRouter(prefix='/api')
|
|
42
69
|
|
|
43
70
|
|
|
44
71
|
@router.post('/execute')
|
|
45
|
-
async def execute(request: ApiExecutorRequest) ->
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
f'Source {request.source} is not available.'
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
query_executor = garf_executors.api_executor.ApiQueryExecutor(
|
|
52
|
-
concrete_api_fetcher(**request.context.fetcher_parameters)
|
|
72
|
+
async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
73
|
+
query_executor = garf_executors.setup_executor(
|
|
74
|
+
request.source, request.context.fetcher_parameters
|
|
53
75
|
)
|
|
76
|
+
result = query_executor.execute(request.query, request.title, request.context)
|
|
77
|
+
return ApiExecutorResponse(results=[result])
|
|
54
78
|
|
|
55
|
-
query_executor.execute(request.query, request.title, request.context)
|
|
56
79
|
|
|
57
|
-
|
|
58
|
-
|
|
80
|
+
@router.post('/execute:batch')
|
|
81
|
+
async def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
82
|
+
query_executor = garf_executors.setup_executor(
|
|
83
|
+
request.source, request.context.fetcher_parameters
|
|
59
84
|
)
|
|
85
|
+
reader_client = reader.FileReader()
|
|
86
|
+
batch = {query: reader_client.read(query) for query in request.query_path}
|
|
87
|
+
results = query_executor.execute_batch(batch, request.context)
|
|
88
|
+
return ApiExecutorResponse(results=results)
|
|
60
89
|
|
|
61
90
|
|
|
62
91
|
if __name__ == '__main__':
|
|
@@ -298,6 +298,8 @@ class ParamsParser:
|
|
|
298
298
|
if not identifier or identifier not in key:
|
|
299
299
|
return None
|
|
300
300
|
provided_identifier, *keys = key.split('.')
|
|
301
|
+
if not keys:
|
|
302
|
+
return None
|
|
301
303
|
if len(keys) > 1:
|
|
302
304
|
raise GarfParamsException(
|
|
303
305
|
f'{key} is invalid format,'
|
|
@@ -306,9 +308,10 @@ class ParamsParser:
|
|
|
306
308
|
)
|
|
307
309
|
provided_identifier = provided_identifier.replace('--', '')
|
|
308
310
|
if provided_identifier not in self.identifiers:
|
|
311
|
+
supported_arguments = ', '.join(self.identifiers)
|
|
309
312
|
raise GarfParamsException(
|
|
310
313
|
f'CLI argument {provided_identifier} is not supported'
|
|
311
|
-
f', supported arguments {
|
|
314
|
+
f', supported arguments {supported_arguments}'
|
|
312
315
|
)
|
|
313
316
|
if provided_identifier != identifier:
|
|
314
317
|
return None
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# pylint: disable=C0330, g-bad-import-order, g-multiple-import
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import pydantic
|
|
20
|
+
|
|
21
|
+
from garf_core import query_editor
|
|
22
|
+
from garf_io import writer
|
|
23
|
+
from garf_io.writers import abs_writer
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ExecutionContext(pydantic.BaseModel):
|
|
27
|
+
"""Common context for executing one or more queries.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
query_parameters: Parameters to dynamically change query text.
|
|
31
|
+
fetcher_parameters: Parameters to specify fetching setup.
|
|
32
|
+
writer: Type of writer to use.
|
|
33
|
+
writer_parameters: Optional parameters to setup writer.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
|
|
37
|
+
default_factory=dict
|
|
38
|
+
)
|
|
39
|
+
fetcher_parameters: dict[str, str] | None = pydantic.Field(
|
|
40
|
+
default_factory=dict
|
|
41
|
+
)
|
|
42
|
+
writer: str | None = None
|
|
43
|
+
writer_parameters: dict[str, str] | None = pydantic.Field(
|
|
44
|
+
default_factory=dict
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def model_post_init(self, __context__) -> None:
|
|
48
|
+
if self.fetcher_parameters is None:
|
|
49
|
+
self.fetcher_parameters = {}
|
|
50
|
+
if self.writer_parameters is None:
|
|
51
|
+
self.writer_parameters = {}
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def writer_client(self) -> abs_writer.AbsWriter:
|
|
55
|
+
writer_client = writer.create_writer(self.writer, **self.writer_parameters)
|
|
56
|
+
if self.writer == 'bq':
|
|
57
|
+
_ = writer_client.create_or_get_dataset()
|
|
58
|
+
if self.writer == 'sheet':
|
|
59
|
+
writer_client.init_client()
|
|
60
|
+
return writer_client
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Defines common functionality between executors."""
|
|
16
|
+
|
|
17
|
+
from concurrent import futures
|
|
18
|
+
|
|
19
|
+
from garf_executors import execution_context
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Executor:
|
|
23
|
+
"""Defines common functionality between executors."""
|
|
24
|
+
|
|
25
|
+
def execute_batch(
|
|
26
|
+
self,
|
|
27
|
+
batch: dict[str, str],
|
|
28
|
+
context: execution_context.ExecutionContext,
|
|
29
|
+
parallel_threshold: int = 10,
|
|
30
|
+
) -> list[str]:
|
|
31
|
+
"""Executes batch of queries for a common context.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
batch: Mapping between query_title and its text.
|
|
35
|
+
context: Execution context.
|
|
36
|
+
parallel_threshold: Number of queries to execute in parallel.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Results of execution.
|
|
40
|
+
"""
|
|
41
|
+
results = []
|
|
42
|
+
with futures.ThreadPoolExecutor(max_workers=parallel_threshold) as executor:
|
|
43
|
+
future_to_query = {
|
|
44
|
+
executor.submit(
|
|
45
|
+
self.execute,
|
|
46
|
+
query,
|
|
47
|
+
title,
|
|
48
|
+
context,
|
|
49
|
+
): query
|
|
50
|
+
for title, query in batch.items()
|
|
51
|
+
}
|
|
52
|
+
for future in futures.as_completed(future_to_query):
|
|
53
|
+
results.append(future.result())
|
|
54
|
+
return results
|
garf_executors/fetchers.py
CHANGED
|
@@ -15,23 +15,41 @@
|
|
|
15
15
|
import inspect
|
|
16
16
|
from importlib.metadata import entry_points
|
|
17
17
|
|
|
18
|
-
from garf_core import report_fetcher
|
|
18
|
+
from garf_core import exceptions, report_fetcher
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
def
|
|
21
|
+
def find_fetchers() -> set[str]:
|
|
22
|
+
"""Identifiers all available report fetchers."""
|
|
23
|
+
return {fetcher.name for fetcher in entry_points(group='garf')}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
|
|
27
|
+
"""Loads report fetcher for a given source.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
source: Alias for a source associated with a fetcher.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Class for a found report fetcher.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ApiReportFetcherError: When fetcher cannot be loaded.
|
|
37
|
+
MissingApiReportFetcherError: When fetcher not found.
|
|
38
|
+
"""
|
|
39
|
+
if source not in find_fetchers():
|
|
40
|
+
raise report_fetcher.MissingApiReportFetcherError(source)
|
|
22
41
|
fetchers = entry_points(group='garf')
|
|
23
|
-
found_fetchers = {}
|
|
24
42
|
for fetcher in fetchers:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
obj
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
43
|
+
if fetcher.name == source:
|
|
44
|
+
try:
|
|
45
|
+
fetcher_module = fetcher.load()
|
|
46
|
+
for name, obj in inspect.getmembers(fetcher_module):
|
|
47
|
+
if inspect.isclass(obj) and issubclass(
|
|
48
|
+
obj, report_fetcher.ApiReportFetcher
|
|
49
|
+
):
|
|
50
|
+
return getattr(fetcher_module, name)
|
|
51
|
+
except ModuleNotFoundError:
|
|
52
|
+
continue
|
|
53
|
+
raise exceptions.ApiReportFetcherError(
|
|
54
|
+
f'No fetcher available for the source "{source}"'
|
|
55
|
+
)
|
garf_executors/sql_executor.py
CHANGED
|
@@ -25,14 +25,22 @@ except ImportError as e:
|
|
|
25
25
|
|
|
26
26
|
import logging
|
|
27
27
|
import re
|
|
28
|
-
from typing import Any
|
|
29
28
|
|
|
30
29
|
import pandas as pd
|
|
31
30
|
|
|
32
|
-
from garf_core import query_editor
|
|
31
|
+
from garf_core import query_editor, report
|
|
32
|
+
from garf_executors import exceptions, execution_context, executor
|
|
33
33
|
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
35
|
-
|
|
36
|
+
|
|
37
|
+
class SqlAlchemyQueryExecutorError(exceptions.GarfExecutorError):
|
|
38
|
+
"""Error when SqlAlchemyQueryExecutor fails to run query."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SqlAlchemyQueryExecutor(
|
|
42
|
+
executor.Executor, query_editor.TemplateProcessorMixin
|
|
43
|
+
):
|
|
36
44
|
"""Handles query execution via SqlAlchemy.
|
|
37
45
|
|
|
38
46
|
Attributes:
|
|
@@ -51,36 +59,60 @@ class SqlAlchemyQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
51
59
|
def from_connection_string(
|
|
52
60
|
cls, connection_string: str
|
|
53
61
|
) -> SqlAlchemyQueryExecutor:
|
|
62
|
+
"""Creates executor from SqlAlchemy connection string.
|
|
63
|
+
|
|
64
|
+
https://docs.sqlalchemy.org/en/20/core/engines.html
|
|
65
|
+
"""
|
|
54
66
|
engine = sqlalchemy.create_engine(connection_string)
|
|
55
67
|
return cls(engine)
|
|
56
68
|
|
|
57
69
|
def execute(
|
|
58
70
|
self,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
71
|
+
query: str,
|
|
72
|
+
title: str,
|
|
73
|
+
context: execution_context.ExecutionContext = (
|
|
74
|
+
execution_context.ExecutionContext()
|
|
75
|
+
),
|
|
76
|
+
) -> report.GarfReport:
|
|
63
77
|
"""Executes query in a given database via SqlAlchemy.
|
|
64
78
|
|
|
65
79
|
Args:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
80
|
+
query: Location of the query.
|
|
81
|
+
title: Name of the query.
|
|
82
|
+
context: Query execution context.
|
|
69
83
|
|
|
70
84
|
Returns:
|
|
71
|
-
|
|
85
|
+
Report with data if query returns some data otherwise empty Report.
|
|
72
86
|
"""
|
|
73
|
-
logging.info('Executing script: %s',
|
|
74
|
-
query_text = self.replace_params_template(
|
|
87
|
+
logging.info('Executing script: %s', title)
|
|
88
|
+
query_text = self.replace_params_template(query, context.query_parameters)
|
|
75
89
|
with self.engine.begin() as conn:
|
|
76
90
|
if re.findall(r'(create|update) ', query_text.lower()):
|
|
77
91
|
conn.connection.executescript(query_text)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
results = report.GarfReport()
|
|
93
|
+
else:
|
|
94
|
+
temp_table_name = f'temp_{title}'.replace('.', '_')
|
|
95
|
+
query_text = f'CREATE TABLE {temp_table_name} AS {query_text}'
|
|
96
|
+
conn.connection.executescript(query_text)
|
|
97
|
+
try:
|
|
98
|
+
results = report.GarfReport.from_pandas(
|
|
99
|
+
pd.read_sql(f'SELECT * FROM {temp_table_name}', conn)
|
|
100
|
+
)
|
|
101
|
+
finally:
|
|
102
|
+
conn.connection.execute(f'DROP TABLE {temp_table_name}')
|
|
103
|
+
if context.writer and results:
|
|
104
|
+
writer_client = context.writer_client
|
|
105
|
+
logger.debug(
|
|
106
|
+
'Start writing data for query %s via %s writer',
|
|
107
|
+
title,
|
|
108
|
+
type(writer_client),
|
|
109
|
+
)
|
|
110
|
+
writing_result = writer_client.write(results, title)
|
|
111
|
+
logger.debug(
|
|
112
|
+
'Finish writing data for query %s via %s writer',
|
|
113
|
+
title,
|
|
114
|
+
type(writer_client),
|
|
115
|
+
)
|
|
116
|
+
logger.info('%s executed successfully', title)
|
|
117
|
+
return writing_result
|
|
118
|
+
return results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
garf_executors/__init__.py,sha256=MYFPN_XcBsgaW9jzoM9UHFKerUzhIjmFGgGyaBscWGE,1508
|
|
2
|
+
garf_executors/api_executor.py,sha256=zSLyUGZIav2t0E6uXdaZX9Ps-mE_Q2YLeTjH4s3i028,3497
|
|
3
|
+
garf_executors/bq_executor.py,sha256=XRik48P7aQkoLpABzWzEX2t3ktQVPrXp2v7Zdu8qWVI,4802
|
|
4
|
+
garf_executors/exceptions.py,sha256=U_7Q2ZMOUf89gzZd2pw7y3g7i1NeByPPKfpZ3q7p3ZU,662
|
|
5
|
+
garf_executors/execution_context.py,sha256=0PYYnwkwBJ2B1HpNN5MrANZkIbuzyxH7EzEjUPf0GGA,1966
|
|
6
|
+
garf_executors/executor.py,sha256=bGTGlWZT5B7I_WIjhuQ0CkL7Dij_ijFCBxuC1jGVkng,1626
|
|
7
|
+
garf_executors/fetchers.py,sha256=m2feJ6ByYq-oJXuQ3tmaNMx7soMcGsVC2hY4kOsPaNQ,1833
|
|
8
|
+
garf_executors/sql_executor.py,sha256=vG3-FM4C1O0rVwFMPIw85xoErsnTBYoVhMTgt9jP3QM,3699
|
|
9
|
+
garf_executors/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
garf_executors/entrypoints/cli.py,sha256=ZGwDQ2V6hYMjaVVcNDrAyvagrAET8X1kXFfD2gSG4BI,3455
|
|
11
|
+
garf_executors/entrypoints/server.py,sha256=MEEPxcIfy_PeDuz8oJ7wIZVUcz0q54mW0y89x5I_VgM,2821
|
|
12
|
+
garf_executors/entrypoints/utils.py,sha256=p483h5RY_kfwOhNq2RqwMnunOoTGXGA59nCyHY_Lvgg,15057
|
|
13
|
+
garf_executors-0.0.8.dist-info/METADATA,sha256=Z8A7U3jBsL14uEpv6-mHpjKNSrF480_QpAvUVC9V4fI,2648
|
|
14
|
+
garf_executors-0.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
garf_executors-0.0.8.dist-info/entry_points.txt,sha256=LskWNFIw8j0WJuI18-32OZrlASXAMg1XtrRYwsKBz2E,61
|
|
16
|
+
garf_executors-0.0.8.dist-info/top_level.txt,sha256=sP4dCXOENPn1hDFAunjMV8Js4NND_KGeO_gQWuaT0EY,15
|
|
17
|
+
garf_executors-0.0.8.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
garf_executors/__init__.py,sha256=bcb29OEvsx2XNTpbUW0LvKxoYJt5BSX3S2gqQLdRIqU,955
|
|
2
|
-
garf_executors/api_executor.py,sha256=udrlMiYUmKh5NsIuJkNowqCenvtf5O925FPFawXSXbM,4021
|
|
3
|
-
garf_executors/bq_executor.py,sha256=JBPxbDRYgUgpJv6SqYiFPypTFjZGIZ-SOOb6dS2sZQY,3822
|
|
4
|
-
garf_executors/exceptions.py,sha256=U_7Q2ZMOUf89gzZd2pw7y3g7i1NeByPPKfpZ3q7p3ZU,662
|
|
5
|
-
garf_executors/fetchers.py,sha256=gkAKHsDPzJySg4wYLZeCmNINtk6f17-jFzOP7tE82r8,1226
|
|
6
|
-
garf_executors/sql_executor.py,sha256=6tpsd1Ive5igAlQuhCSkli-tZHp58uWAU86JWGvdVpE,2722
|
|
7
|
-
garf_executors/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
garf_executors/entrypoints/cli.py,sha256=mWvPQkaqarDj5byHRvNAweVbUQiHZLXrC-35zY7l4fs,4043
|
|
9
|
-
garf_executors/entrypoints/server.py,sha256=rJ29VKWKaYJci1BLxZx-0LSILmUMf5BK8G1RRjRS2ts,1836
|
|
10
|
-
garf_executors/entrypoints/utils.py,sha256=ZZJFe2N4KwgzPRvak9gW_B25qESnzOyuF-qYZ2wW2_M,14974
|
|
11
|
-
garf_executors-0.0.6.dist-info/METADATA,sha256=35dBABJ8cVH2nI0NonZ5VGO6W4IF0gtiiw-ZFZqZhgs,2648
|
|
12
|
-
garf_executors-0.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
garf_executors-0.0.6.dist-info/entry_points.txt,sha256=LskWNFIw8j0WJuI18-32OZrlASXAMg1XtrRYwsKBz2E,61
|
|
14
|
-
garf_executors-0.0.6.dist-info/top_level.txt,sha256=sP4dCXOENPn1hDFAunjMV8Js4NND_KGeO_gQWuaT0EY,15
|
|
15
|
-
garf_executors-0.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|