garf-executors 0.0.7__py3-none-any.whl → 0.0.8.post0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of garf-executors might be problematic. Click here for more details.
- garf_executors/__init__.py +23 -4
- garf_executors/api_executor.py +24 -37
- garf_executors/bq_executor.py +51 -18
- garf_executors/entrypoints/cli.py +4 -22
- garf_executors/entrypoints/server.py +26 -14
- garf_executors/execution_context.py +60 -0
- garf_executors/executor.py +54 -0
- garf_executors/fetchers.py +34 -16
- garf_executors/sql_executor.py +54 -22
- {garf_executors-0.0.7.dist-info → garf_executors-0.0.8.post0.dist-info}/METADATA +1 -1
- garf_executors-0.0.8.post0.dist-info/RECORD +17 -0
- garf_executors-0.0.7.dist-info/RECORD +0 -15
- {garf_executors-0.0.7.dist-info → garf_executors-0.0.8.post0.dist-info}/WHEEL +0 -0
- {garf_executors-0.0.7.dist-info → garf_executors-0.0.8.post0.dist-info}/entry_points.txt +0 -0
- {garf_executors-0.0.7.dist-info → garf_executors-0.0.8.post0.dist-info}/top_level.txt +0 -0
garf_executors/__init__.py
CHANGED
|
@@ -15,12 +15,31 @@
|
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
from garf_executors
|
|
19
|
-
from garf_executors.
|
|
18
|
+
from garf_executors import bq_executor, fetchers, sql_executor
|
|
19
|
+
from garf_executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def setup_executor(source: str, fetcher_parameters: dict[str, str]):
|
|
23
|
+
"""Initializes executors based on a source and parameters."""
|
|
24
|
+
if source == 'bq':
|
|
25
|
+
query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
|
|
26
|
+
elif source == 'sqldb':
|
|
27
|
+
query_executor = (
|
|
28
|
+
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
29
|
+
fetcher_parameters.get('connection_string')
|
|
30
|
+
)
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
34
|
+
query_executor = ApiQueryExecutor(
|
|
35
|
+
concrete_api_fetcher(**fetcher_parameters)
|
|
36
|
+
)
|
|
37
|
+
return query_executor
|
|
38
|
+
|
|
20
39
|
|
|
21
40
|
__all__ = [
|
|
22
|
-
'FETCHERS',
|
|
23
41
|
'ApiQueryExecutor',
|
|
42
|
+
'ApiExecutionContext',
|
|
24
43
|
]
|
|
25
44
|
|
|
26
|
-
__version__ = '0.0.
|
|
45
|
+
__version__ = '0.0.8.post0'
|
garf_executors/api_executor.py
CHANGED
|
@@ -22,48 +22,19 @@ from __future__ import annotations
|
|
|
22
22
|
|
|
23
23
|
import logging
|
|
24
24
|
|
|
25
|
-
import
|
|
26
|
-
|
|
27
|
-
from garf_core import query_editor, report_fetcher
|
|
28
|
-
from garf_executors import exceptions
|
|
29
|
-
from garf_io import writer
|
|
30
|
-
from garf_io.writers import abs_writer
|
|
25
|
+
from garf_core import report_fetcher
|
|
26
|
+
from garf_executors import exceptions, execution_context, executor, fetchers
|
|
31
27
|
|
|
32
28
|
logger = logging.getLogger(__name__)
|
|
33
29
|
|
|
34
30
|
|
|
35
|
-
class ApiExecutionContext(
|
|
36
|
-
"""Common context for executing one or more queries.
|
|
37
|
-
|
|
38
|
-
Attributes:
|
|
39
|
-
query_parameters: Parameters to dynamically change query text.
|
|
40
|
-
fetcher_parameters: Parameters to specify fetching setup.
|
|
41
|
-
writer: Type of writer to use.
|
|
42
|
-
writer_parameters: Optional parameters to setup writer.
|
|
43
|
-
"""
|
|
31
|
+
class ApiExecutionContext(execution_context.ExecutionContext):
|
|
32
|
+
"""Common context for executing one or more queries."""
|
|
44
33
|
|
|
45
|
-
query_parameters: query_editor.GarfQueryParameters | None = None
|
|
46
|
-
fetcher_parameters: dict[str, str] | None = None
|
|
47
34
|
writer: str = 'console'
|
|
48
|
-
writer_parameters: dict[str, str] | None = None
|
|
49
35
|
|
|
50
|
-
def model_post_init(self, __context__) -> None:
|
|
51
|
-
if self.fetcher_parameters is None:
|
|
52
|
-
self.fetcher_parameters = {}
|
|
53
|
-
if self.writer_parameters is None:
|
|
54
|
-
self.writer_parameters = {}
|
|
55
36
|
|
|
56
|
-
|
|
57
|
-
def writer_client(self) -> abs_writer.AbsWriter:
|
|
58
|
-
writer_client = writer.create_writer(self.writer, **self.writer_parameters)
|
|
59
|
-
if self.writer == 'bq':
|
|
60
|
-
_ = writer_client.create_or_get_dataset()
|
|
61
|
-
if self.writer == 'sheet':
|
|
62
|
-
writer_client.init_client()
|
|
63
|
-
return writer_client
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class ApiQueryExecutor:
|
|
37
|
+
class ApiQueryExecutor(executor.Executor):
|
|
67
38
|
"""Gets data from API and writes them to local/remote storage.
|
|
68
39
|
|
|
69
40
|
Attributes:
|
|
@@ -78,16 +49,32 @@ class ApiQueryExecutor:
|
|
|
78
49
|
"""
|
|
79
50
|
self.fetcher = fetcher
|
|
80
51
|
|
|
52
|
+
@classmethod
|
|
53
|
+
def from_fetcher_alias(
|
|
54
|
+
cls, source: str, fetcher_parameters: dict[str, str] | None = None
|
|
55
|
+
) -> ApiQueryExecutor:
|
|
56
|
+
if not fetcher_parameters:
|
|
57
|
+
fetcher_parameters = {}
|
|
58
|
+
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
59
|
+
return ApiQueryExecutor(concrete_api_fetcher(**fetcher_parameters))
|
|
60
|
+
|
|
81
61
|
async def aexecute(
|
|
82
|
-
self,
|
|
62
|
+
self,
|
|
63
|
+
query: str,
|
|
64
|
+
title: str,
|
|
65
|
+
context: ApiExecutionContext,
|
|
83
66
|
) -> str:
|
|
84
|
-
"""
|
|
67
|
+
"""Performs query execution asynchronously.
|
|
85
68
|
|
|
86
69
|
Args:
|
|
87
70
|
query: Location of the query.
|
|
71
|
+
title: Name of the query.
|
|
88
72
|
context: Query execution context.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Result of writing the report.
|
|
89
76
|
"""
|
|
90
|
-
await self.execute(query, context,
|
|
77
|
+
return await self.execute(query, context, title, context)
|
|
91
78
|
|
|
92
79
|
def execute(
|
|
93
80
|
self,
|
garf_executors/bq_executor.py
CHANGED
|
@@ -11,10 +11,12 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""
|
|
14
|
+
"""Executes queries in BigQuery."""
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
+
import os
|
|
19
|
+
|
|
18
20
|
try:
|
|
19
21
|
from google.cloud import bigquery # type: ignore
|
|
20
22
|
except ImportError as e:
|
|
@@ -25,19 +27,19 @@ except ImportError as e:
|
|
|
25
27
|
|
|
26
28
|
import logging
|
|
27
29
|
|
|
28
|
-
import pandas as pd
|
|
29
30
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
30
31
|
|
|
31
|
-
from garf_core import query_editor
|
|
32
|
+
from garf_core import query_editor, report
|
|
33
|
+
from garf_executors import exceptions, execution_context, executor
|
|
32
34
|
|
|
33
35
|
logger = logging.getLogger(__name__)
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
class BigQueryExecutorError(
|
|
37
|
-
"""Error when
|
|
38
|
+
class BigQueryExecutorError(exceptions.GarfExecutorError):
|
|
39
|
+
"""Error when BigQueryExecutor fails to run query."""
|
|
38
40
|
|
|
39
41
|
|
|
40
|
-
class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
42
|
+
class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
41
43
|
"""Handles query execution in BigQuery.
|
|
42
44
|
|
|
43
45
|
Attributes:
|
|
@@ -46,13 +48,22 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
46
48
|
client: BigQuery client.
|
|
47
49
|
"""
|
|
48
50
|
|
|
49
|
-
def __init__(
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
|
|
54
|
+
location: str | None = None,
|
|
55
|
+
) -> None:
|
|
50
56
|
"""Initializes BigQueryExecutor.
|
|
51
57
|
|
|
52
58
|
Args:
|
|
53
59
|
project_id: Google Cloud project id.
|
|
54
60
|
location: BigQuery dataset location.
|
|
55
61
|
"""
|
|
62
|
+
if not project_id:
|
|
63
|
+
raise BigQueryExecutorError(
|
|
64
|
+
'project_id is required. Either provide it as project_id parameter '
|
|
65
|
+
'or GOOGLE_CLOUD_PROJECT env variable.'
|
|
66
|
+
)
|
|
56
67
|
self.project_id = project_id
|
|
57
68
|
self.location = location
|
|
58
69
|
|
|
@@ -62,26 +73,48 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
62
73
|
return bigquery.Client(self.project_id)
|
|
63
74
|
|
|
64
75
|
def execute(
|
|
65
|
-
self,
|
|
66
|
-
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
title: str,
|
|
79
|
+
context: execution_context.ExecutionContext = (
|
|
80
|
+
execution_context.ExecutionContext()
|
|
81
|
+
),
|
|
82
|
+
) -> report.GarfReport:
|
|
67
83
|
"""Executes query in BigQuery.
|
|
68
84
|
|
|
69
85
|
Args:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
86
|
+
query: Location of the query.
|
|
87
|
+
title: Name of the query.
|
|
88
|
+
context: Query execution context.
|
|
73
89
|
|
|
74
90
|
Returns:
|
|
75
|
-
|
|
91
|
+
Report with data if query returns some data otherwise empty Report.
|
|
76
92
|
"""
|
|
77
|
-
query_text = self.replace_params_template(
|
|
93
|
+
query_text = self.replace_params_template(query, context.query_parameters)
|
|
78
94
|
job = self.client.query(query_text)
|
|
79
95
|
try:
|
|
80
96
|
result = job.result()
|
|
81
|
-
logger.debug('%s launched successfully',
|
|
97
|
+
logger.debug('%s launched successfully', title)
|
|
82
98
|
if result.total_rows:
|
|
83
|
-
|
|
84
|
-
|
|
99
|
+
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
100
|
+
else:
|
|
101
|
+
results = report.GarfReport()
|
|
102
|
+
if context.writer and results:
|
|
103
|
+
writer_client = context.writer_client
|
|
104
|
+
logger.debug(
|
|
105
|
+
'Start writing data for query %s via %s writer',
|
|
106
|
+
title,
|
|
107
|
+
type(writer_client),
|
|
108
|
+
)
|
|
109
|
+
writing_result = writer_client.write(results, title)
|
|
110
|
+
logger.debug(
|
|
111
|
+
'Finish writing data for query %s via %s writer',
|
|
112
|
+
title,
|
|
113
|
+
type(writer_client),
|
|
114
|
+
)
|
|
115
|
+
logger.info('%s executed successfully', title)
|
|
116
|
+
return writing_result
|
|
117
|
+
return results
|
|
85
118
|
except google_cloud_exceptions.GoogleCloudError as e:
|
|
86
119
|
raise BigQueryExecutorError(e) from e
|
|
87
120
|
|
|
@@ -92,7 +125,7 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
92
125
|
are treated as dataset names.
|
|
93
126
|
|
|
94
127
|
Args:
|
|
95
|
-
|
|
128
|
+
macros: Mapping containing data for query execution.
|
|
96
129
|
"""
|
|
97
130
|
if macros and (datasets := extract_datasets(macros)):
|
|
98
131
|
for dataset in datasets:
|
|
@@ -21,7 +21,6 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import argparse
|
|
23
23
|
import sys
|
|
24
|
-
from concurrent import futures
|
|
25
24
|
|
|
26
25
|
import garf_executors
|
|
27
26
|
from garf_executors import exceptions
|
|
@@ -56,13 +55,6 @@ def main():
|
|
|
56
55
|
if args.version:
|
|
57
56
|
print(garf_executors.__version__)
|
|
58
57
|
sys.exit()
|
|
59
|
-
if not (source := args.source):
|
|
60
|
-
raise exceptions.GarfExecutorError(
|
|
61
|
-
f'Select one of available sources: {list(garf_executors.FETCHERS.keys())}'
|
|
62
|
-
)
|
|
63
|
-
if not (concrete_api_fetcher := garf_executors.FETCHERS.get(source)):
|
|
64
|
-
raise exceptions.GarfExecutorError(f'Source {source} is not available.')
|
|
65
|
-
|
|
66
58
|
logger = utils.init_logging(
|
|
67
59
|
loglevel=args.loglevel.upper(), logger_type=args.logger
|
|
68
60
|
)
|
|
@@ -88,23 +80,13 @@ def main():
|
|
|
88
80
|
writer_parameters=config.writer_params,
|
|
89
81
|
fetcher_parameters=source_parameters,
|
|
90
82
|
)
|
|
91
|
-
query_executor = garf_executors.
|
|
92
|
-
|
|
83
|
+
query_executor = garf_executors.setup_executor(
|
|
84
|
+
args.source, context.fetcher_parameters
|
|
93
85
|
)
|
|
94
86
|
if args.parallel_queries:
|
|
95
87
|
logger.info('Running queries in parallel')
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
executor.submit(
|
|
99
|
-
query_executor.execute,
|
|
100
|
-
reader_client.read(query),
|
|
101
|
-
query,
|
|
102
|
-
context,
|
|
103
|
-
): query
|
|
104
|
-
for query in args.query
|
|
105
|
-
}
|
|
106
|
-
for future in futures.as_completed(future_to_query):
|
|
107
|
-
future.result()
|
|
88
|
+
batch = {query: reader_client.read(query) for query in args.query}
|
|
89
|
+
query_executor.execute_batch(batch, context, args.parallel_queries)
|
|
108
90
|
else:
|
|
109
91
|
logger.info('Running queries sequentially')
|
|
110
92
|
for query in args.query:
|
|
@@ -37,8 +37,8 @@ class ApiExecutorRequest(pydantic.BaseModel):
|
|
|
37
37
|
source: str
|
|
38
38
|
title: str | None = None
|
|
39
39
|
query: str | None = None
|
|
40
|
-
query_path: str | None = None
|
|
41
|
-
context: garf_executors.
|
|
40
|
+
query_path: str | list[str] | None = None
|
|
41
|
+
context: garf_executors.ApiExecutionContext
|
|
42
42
|
|
|
43
43
|
@pydantic.model_validator(mode='after')
|
|
44
44
|
def check_query_specified(self):
|
|
@@ -49,31 +49,43 @@ class ApiExecutorRequest(pydantic.BaseModel):
|
|
|
49
49
|
return self
|
|
50
50
|
|
|
51
51
|
def model_post_init(self, __context__) -> None:
|
|
52
|
-
if self.query_path:
|
|
52
|
+
if self.query_path and isinstance(self.query_path, str):
|
|
53
53
|
self.query = reader.FileReader().read(self.query_path)
|
|
54
54
|
if not self.title:
|
|
55
55
|
self.title = str(self.query_path)
|
|
56
56
|
|
|
57
57
|
|
|
58
|
+
class ApiExecutorResponse(pydantic.BaseModel):
|
|
59
|
+
"""Response after executing a query.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
results: Results of query execution.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
results: list[str]
|
|
66
|
+
|
|
67
|
+
|
|
58
68
|
router = fastapi.APIRouter(prefix='/api')
|
|
59
69
|
|
|
60
70
|
|
|
61
71
|
@router.post('/execute')
|
|
62
|
-
async def execute(request: ApiExecutorRequest) ->
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
f'Source {request.source} is not available.'
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
query_executor = garf_executors.api_executor.ApiQueryExecutor(
|
|
69
|
-
concrete_api_fetcher(**request.context.fetcher_parameters)
|
|
72
|
+
async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
73
|
+
query_executor = garf_executors.setup_executor(
|
|
74
|
+
request.source, request.context.fetcher_parameters
|
|
70
75
|
)
|
|
71
|
-
|
|
72
76
|
result = query_executor.execute(request.query, request.title, request.context)
|
|
77
|
+
return ApiExecutorResponse(results=[result])
|
|
78
|
+
|
|
73
79
|
|
|
74
|
-
|
|
75
|
-
|
|
80
|
+
@router.post('/execute:batch')
|
|
81
|
+
async def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
82
|
+
query_executor = garf_executors.setup_executor(
|
|
83
|
+
request.source, request.context.fetcher_parameters
|
|
76
84
|
)
|
|
85
|
+
reader_client = reader.FileReader()
|
|
86
|
+
batch = {query: reader_client.read(query) for query in request.query_path}
|
|
87
|
+
results = query_executor.execute_batch(batch, request.context)
|
|
88
|
+
return ApiExecutorResponse(results=results)
|
|
77
89
|
|
|
78
90
|
|
|
79
91
|
if __name__ == '__main__':
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# pylint: disable=C0330, g-bad-import-order, g-multiple-import
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import pydantic
|
|
20
|
+
|
|
21
|
+
from garf_core import query_editor
|
|
22
|
+
from garf_io import writer
|
|
23
|
+
from garf_io.writers import abs_writer
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ExecutionContext(pydantic.BaseModel):
|
|
27
|
+
"""Common context for executing one or more queries.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
query_parameters: Parameters to dynamically change query text.
|
|
31
|
+
fetcher_parameters: Parameters to specify fetching setup.
|
|
32
|
+
writer: Type of writer to use.
|
|
33
|
+
writer_parameters: Optional parameters to setup writer.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
|
|
37
|
+
default_factory=dict
|
|
38
|
+
)
|
|
39
|
+
fetcher_parameters: dict[str, str] | None = pydantic.Field(
|
|
40
|
+
default_factory=dict
|
|
41
|
+
)
|
|
42
|
+
writer: str | None = None
|
|
43
|
+
writer_parameters: dict[str, str] | None = pydantic.Field(
|
|
44
|
+
default_factory=dict
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def model_post_init(self, __context__) -> None:
|
|
48
|
+
if self.fetcher_parameters is None:
|
|
49
|
+
self.fetcher_parameters = {}
|
|
50
|
+
if self.writer_parameters is None:
|
|
51
|
+
self.writer_parameters = {}
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def writer_client(self) -> abs_writer.AbsWriter:
|
|
55
|
+
writer_client = writer.create_writer(self.writer, **self.writer_parameters)
|
|
56
|
+
if self.writer == 'bq':
|
|
57
|
+
_ = writer_client.create_or_get_dataset()
|
|
58
|
+
if self.writer == 'sheet':
|
|
59
|
+
writer_client.init_client()
|
|
60
|
+
return writer_client
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Defines common functionality between executors."""
|
|
16
|
+
|
|
17
|
+
from concurrent import futures
|
|
18
|
+
|
|
19
|
+
from garf_executors import execution_context
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Executor:
|
|
23
|
+
"""Defines common functionality between executors."""
|
|
24
|
+
|
|
25
|
+
def execute_batch(
|
|
26
|
+
self,
|
|
27
|
+
batch: dict[str, str],
|
|
28
|
+
context: execution_context.ExecutionContext,
|
|
29
|
+
parallel_threshold: int = 10,
|
|
30
|
+
) -> list[str]:
|
|
31
|
+
"""Executes batch of queries for a common context.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
batch: Mapping between query_title and its text.
|
|
35
|
+
context: Execution context.
|
|
36
|
+
parallel_threshold: Number of queries to execute in parallel.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Results of execution.
|
|
40
|
+
"""
|
|
41
|
+
results = []
|
|
42
|
+
with futures.ThreadPoolExecutor(max_workers=parallel_threshold) as executor:
|
|
43
|
+
future_to_query = {
|
|
44
|
+
executor.submit(
|
|
45
|
+
self.execute,
|
|
46
|
+
query,
|
|
47
|
+
title,
|
|
48
|
+
context,
|
|
49
|
+
): query
|
|
50
|
+
for title, query in batch.items()
|
|
51
|
+
}
|
|
52
|
+
for future in futures.as_completed(future_to_query):
|
|
53
|
+
results.append(future.result())
|
|
54
|
+
return results
|
garf_executors/fetchers.py
CHANGED
|
@@ -15,23 +15,41 @@
|
|
|
15
15
|
import inspect
|
|
16
16
|
from importlib.metadata import entry_points
|
|
17
17
|
|
|
18
|
-
from garf_core import report_fetcher
|
|
18
|
+
from garf_core import exceptions, report_fetcher
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
def
|
|
21
|
+
def find_fetchers() -> set[str]:
|
|
22
|
+
"""Identifiers all available report fetchers."""
|
|
23
|
+
return {fetcher.name for fetcher in entry_points(group='garf')}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
|
|
27
|
+
"""Loads report fetcher for a given source.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
source: Alias for a source associated with a fetcher.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Class for a found report fetcher.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ApiReportFetcherError: When fetcher cannot be loaded.
|
|
37
|
+
MissingApiReportFetcherError: When fetcher not found.
|
|
38
|
+
"""
|
|
39
|
+
if source not in find_fetchers():
|
|
40
|
+
raise report_fetcher.MissingApiReportFetcherError(source)
|
|
22
41
|
fetchers = entry_points(group='garf')
|
|
23
|
-
found_fetchers = {}
|
|
24
42
|
for fetcher in fetchers:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
obj
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
43
|
+
if fetcher.name == source:
|
|
44
|
+
try:
|
|
45
|
+
fetcher_module = fetcher.load()
|
|
46
|
+
for name, obj in inspect.getmembers(fetcher_module):
|
|
47
|
+
if inspect.isclass(obj) and issubclass(
|
|
48
|
+
obj, report_fetcher.ApiReportFetcher
|
|
49
|
+
):
|
|
50
|
+
return getattr(fetcher_module, name)
|
|
51
|
+
except ModuleNotFoundError:
|
|
52
|
+
continue
|
|
53
|
+
raise exceptions.ApiReportFetcherError(
|
|
54
|
+
f'No fetcher available for the source "{source}"'
|
|
55
|
+
)
|
garf_executors/sql_executor.py
CHANGED
|
@@ -25,14 +25,22 @@ except ImportError as e:
|
|
|
25
25
|
|
|
26
26
|
import logging
|
|
27
27
|
import re
|
|
28
|
-
from typing import Any
|
|
29
28
|
|
|
30
29
|
import pandas as pd
|
|
31
30
|
|
|
32
|
-
from garf_core import query_editor
|
|
31
|
+
from garf_core import query_editor, report
|
|
32
|
+
from garf_executors import exceptions, execution_context, executor
|
|
33
33
|
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
35
|
-
|
|
36
|
+
|
|
37
|
+
class SqlAlchemyQueryExecutorError(exceptions.GarfExecutorError):
|
|
38
|
+
"""Error when SqlAlchemyQueryExecutor fails to run query."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SqlAlchemyQueryExecutor(
|
|
42
|
+
executor.Executor, query_editor.TemplateProcessorMixin
|
|
43
|
+
):
|
|
36
44
|
"""Handles query execution via SqlAlchemy.
|
|
37
45
|
|
|
38
46
|
Attributes:
|
|
@@ -51,36 +59,60 @@ class SqlAlchemyQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
|
51
59
|
def from_connection_string(
|
|
52
60
|
cls, connection_string: str
|
|
53
61
|
) -> SqlAlchemyQueryExecutor:
|
|
62
|
+
"""Creates executor from SqlAlchemy connection string.
|
|
63
|
+
|
|
64
|
+
https://docs.sqlalchemy.org/en/20/core/engines.html
|
|
65
|
+
"""
|
|
54
66
|
engine = sqlalchemy.create_engine(connection_string)
|
|
55
67
|
return cls(engine)
|
|
56
68
|
|
|
57
69
|
def execute(
|
|
58
70
|
self,
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
71
|
+
query: str,
|
|
72
|
+
title: str,
|
|
73
|
+
context: execution_context.ExecutionContext = (
|
|
74
|
+
execution_context.ExecutionContext()
|
|
75
|
+
),
|
|
76
|
+
) -> report.GarfReport:
|
|
63
77
|
"""Executes query in a given database via SqlAlchemy.
|
|
64
78
|
|
|
65
79
|
Args:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
80
|
+
query: Location of the query.
|
|
81
|
+
title: Name of the query.
|
|
82
|
+
context: Query execution context.
|
|
69
83
|
|
|
70
84
|
Returns:
|
|
71
|
-
|
|
85
|
+
Report with data if query returns some data otherwise empty Report.
|
|
72
86
|
"""
|
|
73
|
-
logging.info('Executing script: %s',
|
|
74
|
-
query_text = self.replace_params_template(
|
|
87
|
+
logging.info('Executing script: %s', title)
|
|
88
|
+
query_text = self.replace_params_template(query, context.query_parameters)
|
|
75
89
|
with self.engine.begin() as conn:
|
|
76
90
|
if re.findall(r'(create|update) ', query_text.lower()):
|
|
77
91
|
conn.connection.executescript(query_text)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
92
|
+
results = report.GarfReport()
|
|
93
|
+
else:
|
|
94
|
+
temp_table_name = f'temp_{title}'.replace('.', '_')
|
|
95
|
+
query_text = f'CREATE TABLE {temp_table_name} AS {query_text}'
|
|
96
|
+
conn.connection.executescript(query_text)
|
|
97
|
+
try:
|
|
98
|
+
results = report.GarfReport.from_pandas(
|
|
99
|
+
pd.read_sql(f'SELECT * FROM {temp_table_name}', conn)
|
|
100
|
+
)
|
|
101
|
+
finally:
|
|
102
|
+
conn.connection.execute(f'DROP TABLE {temp_table_name}')
|
|
103
|
+
if context.writer and results:
|
|
104
|
+
writer_client = context.writer_client
|
|
105
|
+
logger.debug(
|
|
106
|
+
'Start writing data for query %s via %s writer',
|
|
107
|
+
title,
|
|
108
|
+
type(writer_client),
|
|
109
|
+
)
|
|
110
|
+
writing_result = writer_client.write(results, title)
|
|
111
|
+
logger.debug(
|
|
112
|
+
'Finish writing data for query %s via %s writer',
|
|
113
|
+
title,
|
|
114
|
+
type(writer_client),
|
|
115
|
+
)
|
|
116
|
+
logger.info('%s executed successfully', title)
|
|
117
|
+
return writing_result
|
|
118
|
+
return results
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8.post0
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
garf_executors/__init__.py,sha256=Xc2IDryx7-Nw7V16v93k9HEGxG51zxLDcN9xp8CQRO4,1514
|
|
2
|
+
garf_executors/api_executor.py,sha256=kMiy7PyjCvUDm1sFbW6SOXhrXFZFamL1XPLkg-aYqN8,3572
|
|
3
|
+
garf_executors/bq_executor.py,sha256=XRik48P7aQkoLpABzWzEX2t3ktQVPrXp2v7Zdu8qWVI,4802
|
|
4
|
+
garf_executors/exceptions.py,sha256=U_7Q2ZMOUf89gzZd2pw7y3g7i1NeByPPKfpZ3q7p3ZU,662
|
|
5
|
+
garf_executors/execution_context.py,sha256=0PYYnwkwBJ2B1HpNN5MrANZkIbuzyxH7EzEjUPf0GGA,1966
|
|
6
|
+
garf_executors/executor.py,sha256=bGTGlWZT5B7I_WIjhuQ0CkL7Dij_ijFCBxuC1jGVkng,1626
|
|
7
|
+
garf_executors/fetchers.py,sha256=m2feJ6ByYq-oJXuQ3tmaNMx7soMcGsVC2hY4kOsPaNQ,1833
|
|
8
|
+
garf_executors/sql_executor.py,sha256=vG3-FM4C1O0rVwFMPIw85xoErsnTBYoVhMTgt9jP3QM,3699
|
|
9
|
+
garf_executors/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
garf_executors/entrypoints/cli.py,sha256=ZGwDQ2V6hYMjaVVcNDrAyvagrAET8X1kXFfD2gSG4BI,3455
|
|
11
|
+
garf_executors/entrypoints/server.py,sha256=MEEPxcIfy_PeDuz8oJ7wIZVUcz0q54mW0y89x5I_VgM,2821
|
|
12
|
+
garf_executors/entrypoints/utils.py,sha256=p483h5RY_kfwOhNq2RqwMnunOoTGXGA59nCyHY_Lvgg,15057
|
|
13
|
+
garf_executors-0.0.8.post0.dist-info/METADATA,sha256=z5oroJiy5YtdrVXhMbZlJ5Rv9yzx7ZtJGxIoyXFp2d0,2654
|
|
14
|
+
garf_executors-0.0.8.post0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
garf_executors-0.0.8.post0.dist-info/entry_points.txt,sha256=LskWNFIw8j0WJuI18-32OZrlASXAMg1XtrRYwsKBz2E,61
|
|
16
|
+
garf_executors-0.0.8.post0.dist-info/top_level.txt,sha256=sP4dCXOENPn1hDFAunjMV8Js4NND_KGeO_gQWuaT0EY,15
|
|
17
|
+
garf_executors-0.0.8.post0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
garf_executors/__init__.py,sha256=PK4dnVynkPmJi8EGUYwbv4lwQ2ELO_cTO8FQaZV3VXA,858
|
|
2
|
-
garf_executors/api_executor.py,sha256=HOIBg-bILtLNHqtiklZio5h5xXNwVAuZMIRJTL7_5_U,4104
|
|
3
|
-
garf_executors/bq_executor.py,sha256=JBPxbDRYgUgpJv6SqYiFPypTFjZGIZ-SOOb6dS2sZQY,3822
|
|
4
|
-
garf_executors/exceptions.py,sha256=U_7Q2ZMOUf89gzZd2pw7y3g7i1NeByPPKfpZ3q7p3ZU,662
|
|
5
|
-
garf_executors/fetchers.py,sha256=gkAKHsDPzJySg4wYLZeCmNINtk6f17-jFzOP7tE82r8,1226
|
|
6
|
-
garf_executors/sql_executor.py,sha256=6tpsd1Ive5igAlQuhCSkli-tZHp58uWAU86JWGvdVpE,2722
|
|
7
|
-
garf_executors/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
garf_executors/entrypoints/cli.py,sha256=mWvPQkaqarDj5byHRvNAweVbUQiHZLXrC-35zY7l4fs,4043
|
|
9
|
-
garf_executors/entrypoints/server.py,sha256=CALlrRaKiKFOvsE1uQyjtz2UxqEeh_QqR5nBMQgMjCs,2430
|
|
10
|
-
garf_executors/entrypoints/utils.py,sha256=p483h5RY_kfwOhNq2RqwMnunOoTGXGA59nCyHY_Lvgg,15057
|
|
11
|
-
garf_executors-0.0.7.dist-info/METADATA,sha256=NBFu6iIeYifvVVMCJOr_o9GpMAEp5aMBdnK4cW5FlEs,2648
|
|
12
|
-
garf_executors-0.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
garf_executors-0.0.7.dist-info/entry_points.txt,sha256=LskWNFIw8j0WJuI18-32OZrlASXAMg1XtrRYwsKBz2E,61
|
|
14
|
-
garf_executors-0.0.7.dist-info/top_level.txt,sha256=sP4dCXOENPn1hDFAunjMV8Js4NND_KGeO_gQWuaT0EY,15
|
|
15
|
-
garf_executors-0.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|