garf-executors 1.0.2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- garf/executors/__init__.py +1 -36
- garf/executors/api_executor.py +91 -7
- garf/executors/bq_executor.py +53 -25
- garf/executors/entrypoints/cli.py +48 -58
- garf/executors/entrypoints/grpc_server.py +21 -7
- garf/executors/entrypoints/server.py +64 -7
- garf/executors/entrypoints/tracer.py +29 -4
- garf/executors/execution_context.py +12 -28
- garf/executors/fetchers.py +52 -2
- garf/executors/garf_pb2.py +17 -11
- garf/executors/garf_pb2_grpc.py +45 -2
- garf/executors/query_processor.py +36 -18
- garf/executors/setup.py +76 -0
- garf/executors/sql_executor.py +22 -9
- garf/executors/workflows/__init__.py +0 -0
- garf/executors/workflows/gcp_workflow.yaml +49 -0
- garf/executors/{workflow.py → workflows/workflow.py} +60 -3
- garf/executors/workflows/workflow_runner.py +176 -0
- {garf_executors-1.0.2.dist-info → garf_executors-1.2.0.dist-info}/METADATA +8 -1
- {garf_executors-1.0.2.dist-info → garf_executors-1.2.0.dist-info}/RECORD +23 -19
- {garf_executors-1.0.2.dist-info → garf_executors-1.2.0.dist-info}/WHEEL +1 -1
- {garf_executors-1.0.2.dist-info → garf_executors-1.2.0.dist-info}/entry_points.txt +0 -0
- {garf_executors-1.0.2.dist-info → garf_executors-1.2.0.dist-info}/top_level.txt +0 -0
garf/executors/__init__.py
CHANGED
|
@@ -15,46 +15,11 @@
|
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
import importlib
|
|
19
|
-
|
|
20
|
-
from garf.executors import executor, fetchers
|
|
21
18
|
from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
22
|
-
from garf.executors.telemetry import tracer
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@tracer.start_as_current_span('setup_executor')
|
|
26
|
-
def setup_executor(
|
|
27
|
-
source: str,
|
|
28
|
-
fetcher_parameters: dict[str, str | int | bool],
|
|
29
|
-
enable_cache: bool = False,
|
|
30
|
-
cache_ttl_seconds: int = 3600,
|
|
31
|
-
) -> type[executor.Executor]:
|
|
32
|
-
"""Initializes executors based on a source and parameters."""
|
|
33
|
-
if source == 'bq':
|
|
34
|
-
bq_executor = importlib.import_module('garf.executors.bq_executor')
|
|
35
|
-
query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
|
|
36
|
-
elif source == 'sqldb':
|
|
37
|
-
sql_executor = importlib.import_module('garf.executors.sql_executor')
|
|
38
|
-
query_executor = (
|
|
39
|
-
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
40
|
-
fetcher_parameters.get('connection_string')
|
|
41
|
-
)
|
|
42
|
-
)
|
|
43
|
-
else:
|
|
44
|
-
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
45
|
-
query_executor = ApiQueryExecutor(
|
|
46
|
-
fetcher=concrete_api_fetcher(
|
|
47
|
-
**fetcher_parameters,
|
|
48
|
-
enable_cache=enable_cache,
|
|
49
|
-
cache_ttl_seconds=cache_ttl_seconds,
|
|
50
|
-
)
|
|
51
|
-
)
|
|
52
|
-
return query_executor
|
|
53
|
-
|
|
54
19
|
|
|
55
20
|
__all__ = [
|
|
56
21
|
'ApiQueryExecutor',
|
|
57
22
|
'ApiExecutionContext',
|
|
58
23
|
]
|
|
59
24
|
|
|
60
|
-
__version__ = '1.0
|
|
25
|
+
__version__ = '1.2.0'
|
garf/executors/api_executor.py
CHANGED
|
@@ -21,8 +21,9 @@ GarfReport and saving it to local/remote storage.
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
import logging
|
|
24
|
+
import pathlib
|
|
24
25
|
|
|
25
|
-
from garf.core import report_fetcher
|
|
26
|
+
from garf.core import report_fetcher, simulator
|
|
26
27
|
from garf.executors import (
|
|
27
28
|
exceptions,
|
|
28
29
|
execution_context,
|
|
@@ -31,9 +32,17 @@ from garf.executors import (
|
|
|
31
32
|
query_processor,
|
|
32
33
|
)
|
|
33
34
|
from garf.executors.telemetry import tracer
|
|
34
|
-
from
|
|
35
|
+
from garf.io.writers import abs_writer
|
|
36
|
+
from opentelemetry import metrics, trace
|
|
35
37
|
|
|
36
38
|
logger = logging.getLogger(__name__)
|
|
39
|
+
meter = metrics.get_meter('garf.executors')
|
|
40
|
+
|
|
41
|
+
api_counter = meter.create_counter(
|
|
42
|
+
'garf_api_execute_total',
|
|
43
|
+
unit='1',
|
|
44
|
+
description='Counts number of API executions',
|
|
45
|
+
)
|
|
37
46
|
|
|
38
47
|
|
|
39
48
|
class ApiExecutionContext(execution_context.ExecutionContext):
|
|
@@ -49,13 +58,21 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
49
58
|
api_client: a client used for connecting to API.
|
|
50
59
|
"""
|
|
51
60
|
|
|
52
|
-
def __init__(
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
fetcher: report_fetcher.ApiReportFetcher,
|
|
64
|
+
report_simulator: simulator.ApiReportSimulator | None = None,
|
|
65
|
+
writers: list[abs_writer.AbsWriter] | None = None,
|
|
66
|
+
) -> None:
|
|
53
67
|
"""Initializes ApiQueryExecutor.
|
|
54
68
|
|
|
55
69
|
Args:
|
|
56
|
-
|
|
70
|
+
fetcher: Instantiated report fetcher.
|
|
71
|
+
report_simulator: Instantiated simulator.
|
|
57
72
|
"""
|
|
58
73
|
self.fetcher = fetcher
|
|
74
|
+
self.simulator = report_simulator
|
|
75
|
+
self.writers = writers
|
|
59
76
|
super().__init__(
|
|
60
77
|
preprocessors=self.fetcher.preprocessors,
|
|
61
78
|
postprocessors=self.fetcher.postprocessors,
|
|
@@ -100,6 +117,8 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
100
117
|
Raises:
|
|
101
118
|
GarfExecutorError: When failed to execute query.
|
|
102
119
|
"""
|
|
120
|
+
if self.simulator:
|
|
121
|
+
return self.simulate(query=query, title=title, context=context)
|
|
103
122
|
context = query_processor.process_gquery(context)
|
|
104
123
|
span = trace.get_current_span()
|
|
105
124
|
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
@@ -107,15 +126,80 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
107
126
|
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
108
127
|
)
|
|
109
128
|
try:
|
|
110
|
-
span.set_attribute('query.title', title)
|
|
111
|
-
span.set_attribute('query.text', query)
|
|
112
129
|
logger.debug('starting query %s', query)
|
|
130
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
131
|
+
api_counter.add(
|
|
132
|
+
1, {'api.client.class': self.fetcher.api_client.__class__.__name__}
|
|
133
|
+
)
|
|
113
134
|
results = self.fetcher.fetch(
|
|
114
135
|
query_specification=query,
|
|
115
136
|
args=context.query_parameters,
|
|
137
|
+
title=title,
|
|
138
|
+
**context.fetcher_parameters,
|
|
139
|
+
)
|
|
140
|
+
writer_clients = self.writers or context.writer_clients
|
|
141
|
+
if not writer_clients:
|
|
142
|
+
logger.warning('No writers configured, skipping write operation')
|
|
143
|
+
return None
|
|
144
|
+
writing_results = []
|
|
145
|
+
for writer_client in writer_clients:
|
|
146
|
+
logger.debug(
|
|
147
|
+
'Start writing data for query %s via %s writer',
|
|
148
|
+
title,
|
|
149
|
+
type(writer_client),
|
|
150
|
+
)
|
|
151
|
+
result = writer_client.write(results, title)
|
|
152
|
+
logger.debug(
|
|
153
|
+
'Finish writing data for query %s via %s writer',
|
|
154
|
+
title,
|
|
155
|
+
type(writer_client),
|
|
156
|
+
)
|
|
157
|
+
writing_results.append(result)
|
|
158
|
+
logger.info('%s executed successfully', title)
|
|
159
|
+
# Return the last writer's result for backward compatibility
|
|
160
|
+
return writing_results[-1] if writing_results else None
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.error('%s generated an exception: %s', title, str(e))
|
|
163
|
+
raise exceptions.GarfExecutorError(
|
|
164
|
+
'%s generated an exception: %s', title, str(e)
|
|
165
|
+
) from e
|
|
166
|
+
|
|
167
|
+
@tracer.start_as_current_span('api.simulate')
|
|
168
|
+
def simulate(
|
|
169
|
+
self,
|
|
170
|
+
query: str,
|
|
171
|
+
title: str,
|
|
172
|
+
context: ApiExecutionContext,
|
|
173
|
+
) -> str:
|
|
174
|
+
"""Reads query, simulates results and stores them in a specified location.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
query: Location of the query.
|
|
178
|
+
title: Name of the query.
|
|
179
|
+
context: Query execution context.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Result of writing the report.
|
|
183
|
+
|
|
184
|
+
Raises:
|
|
185
|
+
GarfExecutorError: When failed to execute query.
|
|
186
|
+
"""
|
|
187
|
+
context = query_processor.process_gquery(context)
|
|
188
|
+
span = trace.get_current_span()
|
|
189
|
+
span.set_attribute('simulator.class', self.simulator.__class__.__name__)
|
|
190
|
+
span.set_attribute(
|
|
191
|
+
'api.client.class', self.simulator.api_client.__class__.__name__
|
|
192
|
+
)
|
|
193
|
+
try:
|
|
194
|
+
logger.debug('starting query %s', query)
|
|
195
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
196
|
+
results = self.simulator.simulate(
|
|
197
|
+
query_specification=query,
|
|
198
|
+
args=context.query_parameters,
|
|
199
|
+
title=title,
|
|
116
200
|
**context.fetcher_parameters,
|
|
117
201
|
)
|
|
118
|
-
writer_clients = context.writer_clients
|
|
202
|
+
writer_clients = self.writers or context.writer_clients
|
|
119
203
|
if not writer_clients:
|
|
120
204
|
logger.warning('No writers configured, skipping write operation')
|
|
121
205
|
return None
|
garf/executors/bq_executor.py
CHANGED
|
@@ -17,6 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import contextlib
|
|
19
19
|
import os
|
|
20
|
+
import warnings
|
|
20
21
|
|
|
21
22
|
try:
|
|
22
23
|
from google.cloud import bigquery # type: ignore
|
|
@@ -31,6 +32,7 @@ import logging
|
|
|
31
32
|
from garf.core import query_editor, report
|
|
32
33
|
from garf.executors import exceptions, execution_context, executor
|
|
33
34
|
from garf.executors.telemetry import tracer
|
|
35
|
+
from garf.io.writers import abs_writer
|
|
34
36
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
35
37
|
from opentelemetry import trace
|
|
36
38
|
|
|
@@ -41,7 +43,7 @@ class BigQueryExecutorError(exceptions.GarfExecutorError):
|
|
|
41
43
|
"""Error when BigQueryExecutor fails to run query."""
|
|
42
44
|
|
|
43
45
|
|
|
44
|
-
class BigQueryExecutor(executor.Executor
|
|
46
|
+
class BigQueryExecutor(executor.Executor):
|
|
45
47
|
"""Handles query execution in BigQuery.
|
|
46
48
|
|
|
47
49
|
Attributes:
|
|
@@ -52,29 +54,42 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
52
54
|
|
|
53
55
|
def __init__(
|
|
54
56
|
self,
|
|
55
|
-
|
|
57
|
+
project: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
|
|
56
58
|
location: str | None = None,
|
|
59
|
+
writers: list[abs_writer.AbsWriter] | None = None,
|
|
57
60
|
**kwargs: str,
|
|
58
61
|
) -> None:
|
|
59
62
|
"""Initializes BigQueryExecutor.
|
|
60
63
|
|
|
61
64
|
Args:
|
|
62
|
-
|
|
63
|
-
|
|
65
|
+
project_id: Google Cloud project id.
|
|
66
|
+
location: BigQuery dataset location.
|
|
67
|
+
writers: Instantiated writers.
|
|
64
68
|
"""
|
|
65
|
-
if not project_id:
|
|
69
|
+
if not project and 'project_id' not in kwargs:
|
|
66
70
|
raise BigQueryExecutorError(
|
|
67
|
-
'
|
|
71
|
+
'project is required. Either provide it as project parameter '
|
|
68
72
|
'or GOOGLE_CLOUD_PROJECT env variable.'
|
|
69
73
|
)
|
|
70
|
-
|
|
74
|
+
if project_id := kwargs.get('project_id'):
|
|
75
|
+
warnings.warn(
|
|
76
|
+
"'project_id' parameter is deprecated. Please use 'project' instead.",
|
|
77
|
+
DeprecationWarning,
|
|
78
|
+
stacklevel=2,
|
|
79
|
+
)
|
|
80
|
+
self.project = project or project_id
|
|
71
81
|
self.location = location
|
|
82
|
+
self.writers = writers
|
|
83
|
+
self._client = None
|
|
72
84
|
super().__init__()
|
|
73
85
|
|
|
74
86
|
@property
|
|
75
87
|
def client(self) -> bigquery.Client:
|
|
76
|
-
"""
|
|
77
|
-
|
|
88
|
+
"""Instantiated BigQuery client."""
|
|
89
|
+
if not self._client:
|
|
90
|
+
with tracer.start_as_current_span('bq.create_client'):
|
|
91
|
+
self._client = bigquery.Client(self.project)
|
|
92
|
+
return self._client
|
|
78
93
|
|
|
79
94
|
@tracer.start_as_current_span('bq.execute')
|
|
80
95
|
def execute(
|
|
@@ -96,23 +111,23 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
96
111
|
Report with data if query returns some data otherwise empty Report.
|
|
97
112
|
"""
|
|
98
113
|
span = trace.get_current_span()
|
|
114
|
+
query_spec = (
|
|
115
|
+
query_editor.QuerySpecification(
|
|
116
|
+
text=query, title=title, args=context.query_parameters
|
|
117
|
+
)
|
|
118
|
+
.remove_comments()
|
|
119
|
+
.expand()
|
|
120
|
+
)
|
|
121
|
+
query_text = query_spec.query.text
|
|
122
|
+
title = query_spec.query.title
|
|
123
|
+
span.set_attribute('query.title', title)
|
|
124
|
+
span.set_attribute('query.text', query)
|
|
99
125
|
logger.info('Executing script: %s', title)
|
|
100
|
-
|
|
126
|
+
# TODO: move to initialization
|
|
101
127
|
self.create_datasets(context.query_parameters.macro)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
except google_cloud_exceptions.GoogleCloudError as e:
|
|
106
|
-
raise BigQueryExecutorError(
|
|
107
|
-
f'Failed to execute query {title}: Reason: {e}'
|
|
108
|
-
) from e
|
|
109
|
-
logger.debug('%s launched successfully', title)
|
|
110
|
-
if result.total_rows:
|
|
111
|
-
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
112
|
-
else:
|
|
113
|
-
results = report.GarfReport()
|
|
114
|
-
if context.writer and results:
|
|
115
|
-
writer_clients = context.writer_clients
|
|
128
|
+
results = self._query(query_text, title)
|
|
129
|
+
if results and (self.writers or context.writer):
|
|
130
|
+
writer_clients = self.writers or context.writer_clients
|
|
116
131
|
if not writer_clients:
|
|
117
132
|
logger.warning('No writers configured, skipping write operation')
|
|
118
133
|
else:
|
|
@@ -149,7 +164,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
149
164
|
"""
|
|
150
165
|
if macros and (datasets := extract_datasets(macros)):
|
|
151
166
|
for dataset in datasets:
|
|
152
|
-
dataset_id = f'{self.
|
|
167
|
+
dataset_id = f'{self.project}.{dataset}'
|
|
153
168
|
try:
|
|
154
169
|
self.client.get_dataset(dataset_id)
|
|
155
170
|
except google_cloud_exceptions.NotFound:
|
|
@@ -159,6 +174,19 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
159
174
|
self.client.create_dataset(bq_dataset, timeout=30)
|
|
160
175
|
logger.info('Created new dataset %s', dataset_id)
|
|
161
176
|
|
|
177
|
+
def _query(self, query_text, title) -> report.GarfReport:
|
|
178
|
+
job = self.client.query(query_text)
|
|
179
|
+
try:
|
|
180
|
+
result = job.result()
|
|
181
|
+
except google_cloud_exceptions.GoogleCloudError as e:
|
|
182
|
+
raise BigQueryExecutorError(
|
|
183
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
184
|
+
) from e
|
|
185
|
+
logger.debug('%s launched successfully', title)
|
|
186
|
+
if result.total_rows:
|
|
187
|
+
return report.GarfReport.from_pandas(result.to_dataframe())
|
|
188
|
+
return report.GarfReport()
|
|
189
|
+
|
|
162
190
|
|
|
163
191
|
def extract_datasets(macros: dict | None) -> list[str]:
|
|
164
192
|
"""Finds dataset-related keys based on values in a dict.
|
|
@@ -25,14 +25,19 @@ import pathlib
|
|
|
25
25
|
import sys
|
|
26
26
|
|
|
27
27
|
import garf.executors
|
|
28
|
-
from garf.executors import config, exceptions,
|
|
28
|
+
from garf.executors import config, exceptions, setup
|
|
29
29
|
from garf.executors.entrypoints import utils
|
|
30
|
-
from garf.executors.entrypoints.tracer import
|
|
30
|
+
from garf.executors.entrypoints.tracer import (
|
|
31
|
+
initialize_meter,
|
|
32
|
+
initialize_tracer,
|
|
33
|
+
)
|
|
31
34
|
from garf.executors.telemetry import tracer
|
|
35
|
+
from garf.executors.workflows import workflow, workflow_runner
|
|
32
36
|
from garf.io import reader
|
|
33
37
|
from opentelemetry import trace
|
|
34
38
|
|
|
35
39
|
initialize_tracer()
|
|
40
|
+
meter_provider = initialize_meter()
|
|
36
41
|
|
|
37
42
|
|
|
38
43
|
@tracer.start_as_current_span('garf.entrypoints.cli')
|
|
@@ -53,6 +58,7 @@ def main():
|
|
|
53
58
|
parser.add_argument(
|
|
54
59
|
'--no-parallel-queries', dest='parallel_queries', action='store_false'
|
|
55
60
|
)
|
|
61
|
+
parser.add_argument('--simulate', dest='simulate', action='store_true')
|
|
56
62
|
parser.add_argument('--dry-run', dest='dry_run', action='store_true')
|
|
57
63
|
parser.add_argument('-v', '--version', dest='version', action='store_true')
|
|
58
64
|
parser.add_argument(
|
|
@@ -67,7 +73,12 @@ def main():
|
|
|
67
73
|
default=3600,
|
|
68
74
|
type=int,
|
|
69
75
|
)
|
|
76
|
+
parser.add_argument('--workflow-skip', dest='workflow_skip', default=None)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
'--workflow-include', dest='workflow_include', default=None
|
|
79
|
+
)
|
|
70
80
|
parser.set_defaults(parallel_queries=True)
|
|
81
|
+
parser.set_defaults(simulate=False)
|
|
71
82
|
parser.set_defaults(enable_cache=False)
|
|
72
83
|
parser.set_defaults(dry_run=False)
|
|
73
84
|
args, kwargs = parser.parse_known_args()
|
|
@@ -82,43 +93,38 @@ def main():
|
|
|
82
93
|
loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
|
|
83
94
|
)
|
|
84
95
|
reader_client = reader.create_reader(args.input)
|
|
96
|
+
param_types = ['source', 'macro', 'template']
|
|
97
|
+
outputs = args.output.split(',')
|
|
98
|
+
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(kwargs)
|
|
99
|
+
source_parameters = extra_parameters.get('source', {})
|
|
100
|
+
writer_parameters = {}
|
|
101
|
+
for output in outputs:
|
|
102
|
+
writer_parameters.update(extra_parameters.get(output))
|
|
103
|
+
|
|
104
|
+
context = garf.executors.api_executor.ApiExecutionContext(
|
|
105
|
+
query_parameters={
|
|
106
|
+
'macro': extra_parameters.get('macro'),
|
|
107
|
+
'template': extra_parameters.get('template'),
|
|
108
|
+
},
|
|
109
|
+
writer=outputs,
|
|
110
|
+
writer_parameters=writer_parameters,
|
|
111
|
+
fetcher_parameters=source_parameters,
|
|
112
|
+
)
|
|
85
113
|
if workflow_file := args.workflow:
|
|
86
114
|
wf_parent = pathlib.Path.cwd() / pathlib.Path(workflow_file).parent
|
|
87
|
-
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
'Please provide one or more queries to run'
|
|
101
|
-
)
|
|
102
|
-
for query in queries:
|
|
103
|
-
if isinstance(query, garf.executors.workflow.QueryPath):
|
|
104
|
-
query_path = wf_parent / pathlib.Path(query.path)
|
|
105
|
-
if not query_path.exists():
|
|
106
|
-
raise workflow.GarfWorkflowError(f'Query: {query_path} not found')
|
|
107
|
-
batch[query.path] = reader_client.read(query_path)
|
|
108
|
-
elif isinstance(query, garf.executors.workflow.QueryFolder):
|
|
109
|
-
query_path = wf_parent / pathlib.Path(query.folder)
|
|
110
|
-
if not query_path.exists():
|
|
111
|
-
raise workflow.GarfWorkflowError(
|
|
112
|
-
f'Folder: {query_path} not found'
|
|
113
|
-
)
|
|
114
|
-
for p in query_path.rglob('*'):
|
|
115
|
-
if p.suffix == '.sql':
|
|
116
|
-
batch[p.stem] = reader_client.read(p)
|
|
117
|
-
else:
|
|
118
|
-
batch[query.query.title] = query.query.text
|
|
119
|
-
query_executor.execute_batch(
|
|
120
|
-
batch, step.context, args.parallel_threshold
|
|
121
|
-
)
|
|
115
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file, context)
|
|
116
|
+
workflow_skip = args.workflow_skip if args.workflow_skip else None
|
|
117
|
+
workflow_include = args.workflow_include if args.workflow_include else None
|
|
118
|
+
workflow_runner.WorkflowRunner(
|
|
119
|
+
execution_workflow=execution_workflow, wf_parent=wf_parent
|
|
120
|
+
).run(
|
|
121
|
+
enable_cache=args.enable_cache,
|
|
122
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
123
|
+
selected_aliases=workflow_include,
|
|
124
|
+
skipped_aliases=workflow_skip,
|
|
125
|
+
simulate=args.simulate,
|
|
126
|
+
)
|
|
127
|
+
meter_provider.shutdown()
|
|
122
128
|
sys.exit()
|
|
123
129
|
|
|
124
130
|
if not args.query:
|
|
@@ -132,31 +138,14 @@ def main():
|
|
|
132
138
|
raise exceptions.GarfExecutorError(
|
|
133
139
|
f'No execution context found for source {args.source} in {config_file}'
|
|
134
140
|
)
|
|
135
|
-
|
|
136
|
-
param_types = ['source', 'macro', 'template']
|
|
137
|
-
outputs = args.output.split(',')
|
|
138
|
-
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(
|
|
139
|
-
kwargs
|
|
140
|
-
)
|
|
141
|
-
source_parameters = extra_parameters.get('source', {})
|
|
142
|
-
writer_parameters = {}
|
|
143
|
-
for output in outputs:
|
|
144
|
-
writer_parameters.update(extra_parameters.get(output))
|
|
145
|
-
|
|
146
|
-
context = garf.executors.api_executor.ApiExecutionContext(
|
|
147
|
-
query_parameters={
|
|
148
|
-
'macro': extra_parameters.get('macro'),
|
|
149
|
-
'template': extra_parameters.get('template'),
|
|
150
|
-
},
|
|
151
|
-
writer=outputs,
|
|
152
|
-
writer_parameters=writer_parameters,
|
|
153
|
-
fetcher_parameters=source_parameters,
|
|
154
|
-
)
|
|
155
|
-
query_executor = garf.executors.setup_executor(
|
|
141
|
+
query_executor = setup.setup_executor(
|
|
156
142
|
source=args.source,
|
|
157
143
|
fetcher_parameters=context.fetcher_parameters,
|
|
158
144
|
enable_cache=args.enable_cache,
|
|
159
145
|
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
146
|
+
simulate=args.simulate,
|
|
147
|
+
writers=context.writer,
|
|
148
|
+
writer_parameters=context.writer_parameters,
|
|
160
149
|
)
|
|
161
150
|
batch = {query: reader_client.read(query) for query in args.query}
|
|
162
151
|
if args.parallel_queries and len(args.query) > 1:
|
|
@@ -171,6 +160,7 @@ def main():
|
|
|
171
160
|
query=reader_client.read(query), title=query, context=context
|
|
172
161
|
)
|
|
173
162
|
logging.shutdown()
|
|
163
|
+
meter_provider.shutdown()
|
|
174
164
|
|
|
175
165
|
|
|
176
166
|
if __name__ == '__main__':
|
|
@@ -18,9 +18,8 @@ import argparse
|
|
|
18
18
|
import logging
|
|
19
19
|
from concurrent import futures
|
|
20
20
|
|
|
21
|
-
import garf.executors
|
|
22
21
|
import grpc
|
|
23
|
-
from garf.executors import garf_pb2, garf_pb2_grpc
|
|
22
|
+
from garf.executors import execution_context, garf_pb2, garf_pb2_grpc, setup
|
|
24
23
|
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
25
24
|
from google.protobuf.json_format import MessageToDict
|
|
26
25
|
from grpc_reflection.v1alpha import reflection
|
|
@@ -28,19 +27,34 @@ from grpc_reflection.v1alpha import reflection
|
|
|
28
27
|
|
|
29
28
|
class GarfService(garf_pb2_grpc.GarfService):
|
|
30
29
|
def Execute(self, request, context):
|
|
31
|
-
query_executor =
|
|
30
|
+
query_executor = setup.setup_executor(
|
|
32
31
|
request.source, request.context.fetcher_parameters
|
|
33
32
|
)
|
|
34
|
-
execution_context = garf.executors.execution_context.ExecutionContext(
|
|
35
|
-
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
36
|
-
)
|
|
37
33
|
result = query_executor.execute(
|
|
38
34
|
query=request.query,
|
|
39
35
|
title=request.title,
|
|
40
|
-
context=execution_context
|
|
36
|
+
context=execution_context.ExecutionContext(
|
|
37
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
38
|
+
),
|
|
41
39
|
)
|
|
42
40
|
return garf_pb2.ExecuteResponse(results=[result])
|
|
43
41
|
|
|
42
|
+
def Fetch(self, request, context):
|
|
43
|
+
query_executor = setup.setup_executor(
|
|
44
|
+
request.source, request.context.fetcher_parameters
|
|
45
|
+
)
|
|
46
|
+
query_args = execution_context.ExecutionContext(
|
|
47
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
48
|
+
).query_parameters
|
|
49
|
+
result = query_executor.fetcher.fetch(
|
|
50
|
+
query_specification=request.query,
|
|
51
|
+
title=request.title,
|
|
52
|
+
args=query_args,
|
|
53
|
+
)
|
|
54
|
+
return garf_pb2.FetchResponse(
|
|
55
|
+
columns=result.column_names, rows=result.to_list(row_type='dict')
|
|
56
|
+
)
|
|
57
|
+
|
|
44
58
|
|
|
45
59
|
if __name__ == '__main__':
|
|
46
60
|
parser = argparse.ArgumentParser()
|
|
@@ -21,18 +21,55 @@ import garf.executors
|
|
|
21
21
|
import pydantic
|
|
22
22
|
import typer
|
|
23
23
|
import uvicorn
|
|
24
|
-
from garf.executors import exceptions
|
|
25
|
-
from garf.executors.entrypoints
|
|
24
|
+
from garf.executors import exceptions, setup
|
|
25
|
+
from garf.executors.entrypoints import utils
|
|
26
|
+
from garf.executors.entrypoints.tracer import (
|
|
27
|
+
initialize_meter,
|
|
28
|
+
initialize_tracer,
|
|
29
|
+
)
|
|
30
|
+
from garf.executors.workflows import workflow_runner
|
|
26
31
|
from garf.io import reader
|
|
27
32
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
|
33
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
28
34
|
from typing_extensions import Annotated
|
|
29
35
|
|
|
30
36
|
initialize_tracer()
|
|
37
|
+
initialize_meter()
|
|
31
38
|
app = fastapi.FastAPI()
|
|
32
39
|
FastAPIInstrumentor.instrument_app(app)
|
|
33
40
|
typer_app = typer.Typer()
|
|
34
41
|
|
|
35
42
|
|
|
43
|
+
class GarfSettings(BaseSettings):
|
|
44
|
+
"""Specifies environmental variables for garf.
|
|
45
|
+
|
|
46
|
+
Ensure that mandatory variables are exposed via
|
|
47
|
+
export ENV_VARIABLE_NAME=VALUE.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
loglevel: Level of logging.
|
|
51
|
+
log_name: Name of log.
|
|
52
|
+
logger_type: Type of logger.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
model_config = SettingsConfigDict(env_prefix='garf_')
|
|
56
|
+
|
|
57
|
+
loglevel: str = 'INFO'
|
|
58
|
+
log_name: str = 'garf'
|
|
59
|
+
logger_type: str = 'local'
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GarfDependencies:
|
|
63
|
+
def __init__(self) -> None:
|
|
64
|
+
"""Initializes GarfDependencies."""
|
|
65
|
+
settings = GarfSettings()
|
|
66
|
+
self.logger = utils.init_logging(
|
|
67
|
+
loglevel=settings.loglevel,
|
|
68
|
+
logger_type=settings.logger_type,
|
|
69
|
+
name=settings.log_name,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
36
73
|
class ApiExecutorRequest(pydantic.BaseModel):
|
|
37
74
|
"""Request for executing a query.
|
|
38
75
|
|
|
@@ -81,14 +118,19 @@ async def version() -> str:
|
|
|
81
118
|
|
|
82
119
|
|
|
83
120
|
@app.get('/api/fetchers')
|
|
84
|
-
async def get_fetchers(
|
|
121
|
+
async def get_fetchers(
|
|
122
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
123
|
+
) -> list[str]:
|
|
85
124
|
"""Shows all available API sources."""
|
|
86
125
|
return list(garf.executors.fetchers.find_fetchers())
|
|
87
126
|
|
|
88
127
|
|
|
89
128
|
@app.post('/api/execute')
|
|
90
|
-
|
|
91
|
-
|
|
129
|
+
def execute(
|
|
130
|
+
request: ApiExecutorRequest,
|
|
131
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
132
|
+
) -> ApiExecutorResponse:
|
|
133
|
+
query_executor = setup.setup_executor(
|
|
92
134
|
request.source, request.context.fetcher_parameters
|
|
93
135
|
)
|
|
94
136
|
result = query_executor.execute(request.query, request.title, request.context)
|
|
@@ -96,8 +138,11 @@ async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
96
138
|
|
|
97
139
|
|
|
98
140
|
@app.post('/api/execute:batch')
|
|
99
|
-
def execute_batch(
|
|
100
|
-
|
|
141
|
+
def execute_batch(
|
|
142
|
+
request: ApiExecutorRequest,
|
|
143
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
144
|
+
) -> ApiExecutorResponse:
|
|
145
|
+
query_executor = setup.setup_executor(
|
|
101
146
|
request.source, request.context.fetcher_parameters
|
|
102
147
|
)
|
|
103
148
|
reader_client = reader.FileReader()
|
|
@@ -106,6 +151,18 @@ def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
106
151
|
return ApiExecutorResponse(results=results)
|
|
107
152
|
|
|
108
153
|
|
|
154
|
+
@app.post('/api/execute:workflow')
|
|
155
|
+
def execute_workflow(
|
|
156
|
+
workflow_file: str,
|
|
157
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
158
|
+
enable_cache: bool = False,
|
|
159
|
+
cache_ttl_seconds: int = 3600,
|
|
160
|
+
) -> list[str]:
|
|
161
|
+
return workflow_runner.WorkflowRunner.from_file(workflow_file).run(
|
|
162
|
+
enable_cache=enable_cache, cache_ttl_seconds=cache_ttl_seconds
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
109
166
|
@typer_app.command()
|
|
110
167
|
def main(
|
|
111
168
|
port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,
|