garf-executors 1.0.7__tar.gz → 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {garf_executors-1.0.7 → garf_executors-1.1.3}/PKG-INFO +2 -1
- garf_executors-1.1.3/garf/executors/__init__.py +25 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/api_executor.py +86 -4
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/cli.py +45 -69
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/grpc_server.py +9 -11
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/server.py +64 -7
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/tracer.py +29 -4
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/fetchers.py +48 -1
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/query_processor.py +4 -2
- garf_executors-1.0.7/garf/executors/__init__.py → garf_executors-1.1.3/garf/executors/setup.py +10 -12
- garf_executors-1.1.3/garf/executors/workflows/__init__.py +0 -0
- garf_executors-1.1.3/garf/executors/workflows/gcp_workflow.yaml +49 -0
- {garf_executors-1.0.7/garf/executors → garf_executors-1.1.3/garf/executors/workflows}/workflow.py +57 -2
- garf_executors-1.1.3/garf/executors/workflows/workflow_runner.py +172 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/PKG-INFO +2 -1
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/SOURCES.txt +5 -1
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/requires.txt +1 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/pyproject.toml +5 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/README.md +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/bq_executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/config.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/__init__.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/utils.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/exceptions.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/execution_context.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/garf_pb2.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/garf_pb2_grpc.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/sql_executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/telemetry.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/__init__.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/api_executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/bq_executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/config.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/entrypoints/__init__.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/entrypoints/cli.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/entrypoints/grcp_server.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/entrypoints/server.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/entrypoints/tracer.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/entrypoints/utils.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/exceptions.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/execution_context.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/fetchers.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/sql_executor.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/telemetry.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors/workflow.py +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/dependency_links.txt +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/entry_points.txt +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/top_level.txt +0 -0
- {garf_executors-1.0.7 → garf_executors-1.1.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -36,6 +36,7 @@ Provides-Extra: gcp
|
|
|
36
36
|
Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
|
|
37
37
|
Provides-Extra: server
|
|
38
38
|
Requires-Dist: fastapi[standard]; extra == "server"
|
|
39
|
+
Requires-Dist: pydantic-settings; extra == "server"
|
|
39
40
|
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
|
|
40
41
|
Requires-Dist: typer; extra == "server"
|
|
41
42
|
Requires-Dist: grpcio-reflection; extra == "server"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Executors to fetch data from various APIs."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
'ApiQueryExecutor',
|
|
22
|
+
'ApiExecutionContext',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
__version__ = '1.1.3'
|
|
@@ -23,7 +23,7 @@ from __future__ import annotations
|
|
|
23
23
|
import logging
|
|
24
24
|
import pathlib
|
|
25
25
|
|
|
26
|
-
from garf.core import report_fetcher
|
|
26
|
+
from garf.core import report_fetcher, simulator
|
|
27
27
|
from garf.executors import (
|
|
28
28
|
exceptions,
|
|
29
29
|
execution_context,
|
|
@@ -32,9 +32,16 @@ from garf.executors import (
|
|
|
32
32
|
query_processor,
|
|
33
33
|
)
|
|
34
34
|
from garf.executors.telemetry import tracer
|
|
35
|
-
from opentelemetry import trace
|
|
35
|
+
from opentelemetry import metrics, trace
|
|
36
36
|
|
|
37
37
|
logger = logging.getLogger(__name__)
|
|
38
|
+
meter = metrics.get_meter('garf.executors')
|
|
39
|
+
|
|
40
|
+
api_counter = meter.create_counter(
|
|
41
|
+
'garf_api_execute_total',
|
|
42
|
+
unit='1',
|
|
43
|
+
description='Counts number of API executions',
|
|
44
|
+
)
|
|
38
45
|
|
|
39
46
|
|
|
40
47
|
class ApiExecutionContext(execution_context.ExecutionContext):
|
|
@@ -50,13 +57,19 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
50
57
|
api_client: a client used for connecting to API.
|
|
51
58
|
"""
|
|
52
59
|
|
|
53
|
-
def __init__(
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
fetcher: report_fetcher.ApiReportFetcher,
|
|
63
|
+
report_simulator: simulator.ApiReportSimulator | None = None,
|
|
64
|
+
) -> None:
|
|
54
65
|
"""Initializes ApiQueryExecutor.
|
|
55
66
|
|
|
56
67
|
Args:
|
|
57
|
-
|
|
68
|
+
fetcher: Instantiated report fetcher.
|
|
69
|
+
report_simulator: Instantiated simulator.
|
|
58
70
|
"""
|
|
59
71
|
self.fetcher = fetcher
|
|
72
|
+
self.simulator = report_simulator
|
|
60
73
|
super().__init__(
|
|
61
74
|
preprocessors=self.fetcher.preprocessors,
|
|
62
75
|
postprocessors=self.fetcher.postprocessors,
|
|
@@ -101,6 +114,8 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
101
114
|
Raises:
|
|
102
115
|
GarfExecutorError: When failed to execute query.
|
|
103
116
|
"""
|
|
117
|
+
if self.simulator:
|
|
118
|
+
return self.simulate(query=query, title=title, context=context)
|
|
104
119
|
context = query_processor.process_gquery(context)
|
|
105
120
|
span = trace.get_current_span()
|
|
106
121
|
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
@@ -112,6 +127,9 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
112
127
|
span.set_attribute('query.text', query)
|
|
113
128
|
logger.debug('starting query %s', query)
|
|
114
129
|
title = pathlib.Path(title).name.split('.')[0]
|
|
130
|
+
api_counter.add(
|
|
131
|
+
1, {'api.client.class': self.fetcher.api_client.__class__.__name__}
|
|
132
|
+
)
|
|
115
133
|
results = self.fetcher.fetch(
|
|
116
134
|
query_specification=query,
|
|
117
135
|
args=context.query_parameters,
|
|
@@ -144,3 +162,67 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
144
162
|
raise exceptions.GarfExecutorError(
|
|
145
163
|
'%s generated an exception: %s', title, str(e)
|
|
146
164
|
) from e
|
|
165
|
+
|
|
166
|
+
@tracer.start_as_current_span('api.simulate')
|
|
167
|
+
def simulate(
|
|
168
|
+
self,
|
|
169
|
+
query: str,
|
|
170
|
+
title: str,
|
|
171
|
+
context: ApiExecutionContext,
|
|
172
|
+
) -> str:
|
|
173
|
+
"""Reads query, simulates results and stores them in a specified location.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
query: Location of the query.
|
|
177
|
+
title: Name of the query.
|
|
178
|
+
context: Query execution context.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Result of writing the report.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
GarfExecutorError: When failed to execute query.
|
|
185
|
+
"""
|
|
186
|
+
context = query_processor.process_gquery(context)
|
|
187
|
+
span = trace.get_current_span()
|
|
188
|
+
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
189
|
+
span.set_attribute(
|
|
190
|
+
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
191
|
+
)
|
|
192
|
+
try:
|
|
193
|
+
span.set_attribute('query.title', title)
|
|
194
|
+
span.set_attribute('query.text', query)
|
|
195
|
+
logger.debug('starting query %s', query)
|
|
196
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
197
|
+
results = self.simulator.simulate(
|
|
198
|
+
query_specification=query,
|
|
199
|
+
args=context.query_parameters,
|
|
200
|
+
title=title,
|
|
201
|
+
**context.fetcher_parameters,
|
|
202
|
+
)
|
|
203
|
+
writer_clients = context.writer_clients
|
|
204
|
+
if not writer_clients:
|
|
205
|
+
logger.warning('No writers configured, skipping write operation')
|
|
206
|
+
return None
|
|
207
|
+
writing_results = []
|
|
208
|
+
for writer_client in writer_clients:
|
|
209
|
+
logger.debug(
|
|
210
|
+
'Start writing data for query %s via %s writer',
|
|
211
|
+
title,
|
|
212
|
+
type(writer_client),
|
|
213
|
+
)
|
|
214
|
+
result = writer_client.write(results, title)
|
|
215
|
+
logger.debug(
|
|
216
|
+
'Finish writing data for query %s via %s writer',
|
|
217
|
+
title,
|
|
218
|
+
type(writer_client),
|
|
219
|
+
)
|
|
220
|
+
writing_results.append(result)
|
|
221
|
+
logger.info('%s executed successfully', title)
|
|
222
|
+
# Return the last writer's result for backward compatibility
|
|
223
|
+
return writing_results[-1] if writing_results else None
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error('%s generated an exception: %s', title, str(e))
|
|
226
|
+
raise exceptions.GarfExecutorError(
|
|
227
|
+
'%s generated an exception: %s', title, str(e)
|
|
228
|
+
) from e
|
|
@@ -22,18 +22,22 @@ from __future__ import annotations
|
|
|
22
22
|
import argparse
|
|
23
23
|
import logging
|
|
24
24
|
import pathlib
|
|
25
|
-
import re
|
|
26
25
|
import sys
|
|
27
26
|
|
|
28
27
|
import garf.executors
|
|
29
|
-
from garf.executors import config, exceptions,
|
|
28
|
+
from garf.executors import config, exceptions, setup
|
|
30
29
|
from garf.executors.entrypoints import utils
|
|
31
|
-
from garf.executors.entrypoints.tracer import
|
|
30
|
+
from garf.executors.entrypoints.tracer import (
|
|
31
|
+
initialize_meter,
|
|
32
|
+
initialize_tracer,
|
|
33
|
+
)
|
|
32
34
|
from garf.executors.telemetry import tracer
|
|
35
|
+
from garf.executors.workflows import workflow, workflow_runner
|
|
33
36
|
from garf.io import reader
|
|
34
37
|
from opentelemetry import trace
|
|
35
38
|
|
|
36
39
|
initialize_tracer()
|
|
40
|
+
meter_provider = initialize_meter()
|
|
37
41
|
|
|
38
42
|
|
|
39
43
|
@tracer.start_as_current_span('garf.entrypoints.cli')
|
|
@@ -54,6 +58,7 @@ def main():
|
|
|
54
58
|
parser.add_argument(
|
|
55
59
|
'--no-parallel-queries', dest='parallel_queries', action='store_false'
|
|
56
60
|
)
|
|
61
|
+
parser.add_argument('--simulate', dest='simulate', action='store_true')
|
|
57
62
|
parser.add_argument('--dry-run', dest='dry_run', action='store_true')
|
|
58
63
|
parser.add_argument('-v', '--version', dest='version', action='store_true')
|
|
59
64
|
parser.add_argument(
|
|
@@ -68,7 +73,12 @@ def main():
|
|
|
68
73
|
default=3600,
|
|
69
74
|
type=int,
|
|
70
75
|
)
|
|
76
|
+
parser.add_argument('--workflow-skip', dest='workflow_skip', default=None)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
'--workflow-include', dest='workflow_include', default=None
|
|
79
|
+
)
|
|
71
80
|
parser.set_defaults(parallel_queries=True)
|
|
81
|
+
parser.set_defaults(simulate=False)
|
|
72
82
|
parser.set_defaults(enable_cache=False)
|
|
73
83
|
parser.set_defaults(dry_run=False)
|
|
74
84
|
args, kwargs = parser.parse_known_args()
|
|
@@ -83,53 +93,37 @@ def main():
|
|
|
83
93
|
loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
|
|
84
94
|
)
|
|
85
95
|
reader_client = reader.create_reader(args.input)
|
|
96
|
+
param_types = ['source', 'macro', 'template']
|
|
97
|
+
outputs = args.output.split(',')
|
|
98
|
+
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(kwargs)
|
|
99
|
+
source_parameters = extra_parameters.get('source', {})
|
|
100
|
+
writer_parameters = {}
|
|
101
|
+
for output in outputs:
|
|
102
|
+
writer_parameters.update(extra_parameters.get(output))
|
|
103
|
+
|
|
104
|
+
context = garf.executors.api_executor.ApiExecutionContext(
|
|
105
|
+
query_parameters={
|
|
106
|
+
'macro': extra_parameters.get('macro'),
|
|
107
|
+
'template': extra_parameters.get('template'),
|
|
108
|
+
},
|
|
109
|
+
writer=outputs,
|
|
110
|
+
writer_parameters=writer_parameters,
|
|
111
|
+
fetcher_parameters=source_parameters,
|
|
112
|
+
)
|
|
86
113
|
if workflow_file := args.workflow:
|
|
87
114
|
wf_parent = pathlib.Path.cwd() / pathlib.Path(workflow_file).parent
|
|
88
|
-
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
batch = {}
|
|
101
|
-
if not (queries := step.queries):
|
|
102
|
-
logger.error('Please provide one or more queries to run')
|
|
103
|
-
raise exceptions.GarfExecutorError(
|
|
104
|
-
'Please provide one or more queries to run'
|
|
105
|
-
)
|
|
106
|
-
for query in queries:
|
|
107
|
-
if isinstance(query, garf.executors.workflow.QueryPath):
|
|
108
|
-
if re.match(
|
|
109
|
-
'^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)', query.path
|
|
110
|
-
):
|
|
111
|
-
batch[query.path] = reader_client.read(query.path)
|
|
112
|
-
else:
|
|
113
|
-
query_path = wf_parent / pathlib.Path(query.path)
|
|
114
|
-
if not query_path.exists():
|
|
115
|
-
raise workflow.GarfWorkflowError(
|
|
116
|
-
f'Query: {query_path} not found'
|
|
117
|
-
)
|
|
118
|
-
batch[query.path] = reader_client.read(query_path)
|
|
119
|
-
elif isinstance(query, garf.executors.workflow.QueryFolder):
|
|
120
|
-
query_path = wf_parent / pathlib.Path(query.folder)
|
|
121
|
-
if not query_path.exists():
|
|
122
|
-
raise workflow.GarfWorkflowError(
|
|
123
|
-
f'Folder: {query_path} not found'
|
|
124
|
-
)
|
|
125
|
-
for p in query_path.rglob('*'):
|
|
126
|
-
if p.suffix == '.sql':
|
|
127
|
-
batch[p.stem] = reader_client.read(p)
|
|
128
|
-
else:
|
|
129
|
-
batch[query.query.title] = query.query.text
|
|
130
|
-
query_executor.execute_batch(
|
|
131
|
-
batch, step.context, args.parallel_threshold
|
|
132
|
-
)
|
|
115
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file, context)
|
|
116
|
+
workflow_skip = args.workflow_skip if args.workflow_skip else None
|
|
117
|
+
workflow_include = args.workflow_include if args.workflow_include else None
|
|
118
|
+
workflow_runner.WorkflowRunner(
|
|
119
|
+
execution_workflow=execution_workflow, wf_parent=wf_parent
|
|
120
|
+
).run(
|
|
121
|
+
enable_cache=args.enable_cache,
|
|
122
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
123
|
+
selected_aliases=workflow_include,
|
|
124
|
+
skipped_aliases=workflow_skip,
|
|
125
|
+
)
|
|
126
|
+
meter_provider.shutdown()
|
|
133
127
|
sys.exit()
|
|
134
128
|
|
|
135
129
|
if not args.query:
|
|
@@ -143,31 +137,12 @@ def main():
|
|
|
143
137
|
raise exceptions.GarfExecutorError(
|
|
144
138
|
f'No execution context found for source {args.source} in {config_file}'
|
|
145
139
|
)
|
|
146
|
-
|
|
147
|
-
param_types = ['source', 'macro', 'template']
|
|
148
|
-
outputs = args.output.split(',')
|
|
149
|
-
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(
|
|
150
|
-
kwargs
|
|
151
|
-
)
|
|
152
|
-
source_parameters = extra_parameters.get('source', {})
|
|
153
|
-
writer_parameters = {}
|
|
154
|
-
for output in outputs:
|
|
155
|
-
writer_parameters.update(extra_parameters.get(output))
|
|
156
|
-
|
|
157
|
-
context = garf.executors.api_executor.ApiExecutionContext(
|
|
158
|
-
query_parameters={
|
|
159
|
-
'macro': extra_parameters.get('macro'),
|
|
160
|
-
'template': extra_parameters.get('template'),
|
|
161
|
-
},
|
|
162
|
-
writer=outputs,
|
|
163
|
-
writer_parameters=writer_parameters,
|
|
164
|
-
fetcher_parameters=source_parameters,
|
|
165
|
-
)
|
|
166
|
-
query_executor = garf.executors.setup_executor(
|
|
140
|
+
query_executor = setup.setup_executor(
|
|
167
141
|
source=args.source,
|
|
168
142
|
fetcher_parameters=context.fetcher_parameters,
|
|
169
143
|
enable_cache=args.enable_cache,
|
|
170
144
|
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
145
|
+
simulate=args.simulate,
|
|
171
146
|
)
|
|
172
147
|
batch = {query: reader_client.read(query) for query in args.query}
|
|
173
148
|
if args.parallel_queries and len(args.query) > 1:
|
|
@@ -182,6 +157,7 @@ def main():
|
|
|
182
157
|
query=reader_client.read(query), title=query, context=context
|
|
183
158
|
)
|
|
184
159
|
logging.shutdown()
|
|
160
|
+
meter_provider.shutdown()
|
|
185
161
|
|
|
186
162
|
|
|
187
163
|
if __name__ == '__main__':
|
|
@@ -18,9 +18,8 @@ import argparse
|
|
|
18
18
|
import logging
|
|
19
19
|
from concurrent import futures
|
|
20
20
|
|
|
21
|
-
import garf.executors
|
|
22
21
|
import grpc
|
|
23
|
-
from garf.executors import garf_pb2, garf_pb2_grpc
|
|
22
|
+
from garf.executors import execution_context, garf_pb2, garf_pb2_grpc, setup
|
|
24
23
|
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
25
24
|
from google.protobuf.json_format import MessageToDict
|
|
26
25
|
from grpc_reflection.v1alpha import reflection
|
|
@@ -28,30 +27,29 @@ from grpc_reflection.v1alpha import reflection
|
|
|
28
27
|
|
|
29
28
|
class GarfService(garf_pb2_grpc.GarfService):
|
|
30
29
|
def Execute(self, request, context):
|
|
31
|
-
query_executor =
|
|
30
|
+
query_executor = setup.setup_executor(
|
|
32
31
|
request.source, request.context.fetcher_parameters
|
|
33
32
|
)
|
|
34
|
-
execution_context = garf.executors.execution_context.ExecutionContext(
|
|
35
|
-
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
36
|
-
)
|
|
37
33
|
result = query_executor.execute(
|
|
38
34
|
query=request.query,
|
|
39
35
|
title=request.title,
|
|
40
|
-
context=execution_context
|
|
36
|
+
context=execution_context.ExecutionContext(
|
|
37
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
38
|
+
),
|
|
41
39
|
)
|
|
42
40
|
return garf_pb2.ExecuteResponse(results=[result])
|
|
43
41
|
|
|
44
42
|
def Fetch(self, request, context):
|
|
45
|
-
query_executor =
|
|
43
|
+
query_executor = setup.setup_executor(
|
|
46
44
|
request.source, request.context.fetcher_parameters
|
|
47
45
|
)
|
|
48
|
-
|
|
46
|
+
query_args = execution_context.ExecutionContext(
|
|
49
47
|
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
50
|
-
)
|
|
48
|
+
).query_parameters
|
|
51
49
|
result = query_executor.fetcher.fetch(
|
|
52
50
|
query_specification=request.query,
|
|
53
51
|
title=request.title,
|
|
54
|
-
args=
|
|
52
|
+
args=query_args,
|
|
55
53
|
)
|
|
56
54
|
return garf_pb2.FetchResponse(
|
|
57
55
|
columns=result.column_names, rows=result.to_list(row_type='dict')
|
|
@@ -21,18 +21,55 @@ import garf.executors
|
|
|
21
21
|
import pydantic
|
|
22
22
|
import typer
|
|
23
23
|
import uvicorn
|
|
24
|
-
from garf.executors import exceptions
|
|
25
|
-
from garf.executors.entrypoints
|
|
24
|
+
from garf.executors import exceptions, setup
|
|
25
|
+
from garf.executors.entrypoints import utils
|
|
26
|
+
from garf.executors.entrypoints.tracer import (
|
|
27
|
+
initialize_meter,
|
|
28
|
+
initialize_tracer,
|
|
29
|
+
)
|
|
30
|
+
from garf.executors.workflows import workflow_runner
|
|
26
31
|
from garf.io import reader
|
|
27
32
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
|
33
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
28
34
|
from typing_extensions import Annotated
|
|
29
35
|
|
|
30
36
|
initialize_tracer()
|
|
37
|
+
initialize_meter()
|
|
31
38
|
app = fastapi.FastAPI()
|
|
32
39
|
FastAPIInstrumentor.instrument_app(app)
|
|
33
40
|
typer_app = typer.Typer()
|
|
34
41
|
|
|
35
42
|
|
|
43
|
+
class GarfSettings(BaseSettings):
|
|
44
|
+
"""Specifies environmental variables for garf.
|
|
45
|
+
|
|
46
|
+
Ensure that mandatory variables are exposed via
|
|
47
|
+
export ENV_VARIABLE_NAME=VALUE.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
loglevel: Level of logging.
|
|
51
|
+
log_name: Name of log.
|
|
52
|
+
logger_type: Type of logger.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
model_config = SettingsConfigDict(env_prefix='garf_')
|
|
56
|
+
|
|
57
|
+
loglevel: str = 'INFO'
|
|
58
|
+
log_name: str = 'garf'
|
|
59
|
+
logger_type: str = 'local'
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GarfDependencies:
|
|
63
|
+
def __init__(self) -> None:
|
|
64
|
+
"""Initializes GarfDependencies."""
|
|
65
|
+
settings = GarfSettings()
|
|
66
|
+
self.logger = utils.init_logging(
|
|
67
|
+
loglevel=settings.loglevel,
|
|
68
|
+
logger_type=settings.logger_type,
|
|
69
|
+
name=settings.log_name,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
36
73
|
class ApiExecutorRequest(pydantic.BaseModel):
|
|
37
74
|
"""Request for executing a query.
|
|
38
75
|
|
|
@@ -81,14 +118,19 @@ async def version() -> str:
|
|
|
81
118
|
|
|
82
119
|
|
|
83
120
|
@app.get('/api/fetchers')
|
|
84
|
-
async def get_fetchers(
|
|
121
|
+
async def get_fetchers(
|
|
122
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
123
|
+
) -> list[str]:
|
|
85
124
|
"""Shows all available API sources."""
|
|
86
125
|
return list(garf.executors.fetchers.find_fetchers())
|
|
87
126
|
|
|
88
127
|
|
|
89
128
|
@app.post('/api/execute')
|
|
90
|
-
def execute(
|
|
91
|
-
|
|
129
|
+
def execute(
|
|
130
|
+
request: ApiExecutorRequest,
|
|
131
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
132
|
+
) -> ApiExecutorResponse:
|
|
133
|
+
query_executor = setup.setup_executor(
|
|
92
134
|
request.source, request.context.fetcher_parameters
|
|
93
135
|
)
|
|
94
136
|
result = query_executor.execute(request.query, request.title, request.context)
|
|
@@ -96,8 +138,11 @@ def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
96
138
|
|
|
97
139
|
|
|
98
140
|
@app.post('/api/execute:batch')
|
|
99
|
-
def execute_batch(
|
|
100
|
-
|
|
141
|
+
def execute_batch(
|
|
142
|
+
request: ApiExecutorRequest,
|
|
143
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
144
|
+
) -> ApiExecutorResponse:
|
|
145
|
+
query_executor = setup.setup_executor(
|
|
101
146
|
request.source, request.context.fetcher_parameters
|
|
102
147
|
)
|
|
103
148
|
reader_client = reader.FileReader()
|
|
@@ -106,6 +151,18 @@ def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
106
151
|
return ApiExecutorResponse(results=results)
|
|
107
152
|
|
|
108
153
|
|
|
154
|
+
@app.post('/api/execute:workflow')
|
|
155
|
+
def execute_workflow(
|
|
156
|
+
workflow_file: str,
|
|
157
|
+
dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
|
|
158
|
+
enable_cache: bool = False,
|
|
159
|
+
cache_ttl_seconds: int = 3600,
|
|
160
|
+
) -> list[str]:
|
|
161
|
+
return workflow_runner.WorkflowRunner.from_file(workflow_file).run(
|
|
162
|
+
enable_cache=enable_cache, cache_ttl_seconds=cache_ttl_seconds
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
109
166
|
@typer_app.command()
|
|
110
167
|
def main(
|
|
111
168
|
port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,
|
|
@@ -14,15 +14,20 @@
|
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
16
|
|
|
17
|
-
from opentelemetry import trace
|
|
17
|
+
from opentelemetry import metrics, trace
|
|
18
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
|
|
19
|
+
OTLPMetricExporter,
|
|
20
|
+
)
|
|
18
21
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
19
22
|
OTLPSpanExporter,
|
|
20
23
|
)
|
|
24
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
25
|
+
from opentelemetry.sdk.metrics.export import (
|
|
26
|
+
PeriodicExportingMetricReader,
|
|
27
|
+
)
|
|
21
28
|
from opentelemetry.sdk.resources import Resource
|
|
22
29
|
from opentelemetry.sdk.trace import TracerProvider
|
|
23
|
-
from opentelemetry.sdk.trace.export import
|
|
24
|
-
BatchSpanProcessor,
|
|
25
|
-
)
|
|
30
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
26
31
|
|
|
27
32
|
DEFAULT_SERVICE_NAME = 'garf'
|
|
28
33
|
|
|
@@ -55,3 +60,23 @@ def initialize_tracer():
|
|
|
55
60
|
tracer_provider.add_span_processor(otlp_processor)
|
|
56
61
|
|
|
57
62
|
trace.set_tracer_provider(tracer_provider)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def initialize_meter():
|
|
66
|
+
resource = Resource.create(
|
|
67
|
+
{'service.name': os.getenv('OTLP_SERVICE_NAME', DEFAULT_SERVICE_NAME)}
|
|
68
|
+
)
|
|
69
|
+
meter_provider = MeterProvider(resource=resource)
|
|
70
|
+
|
|
71
|
+
if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
|
|
72
|
+
otlp_metric_exporter = OTLPMetricExporter(
|
|
73
|
+
endpoint=f'{otel_endpoint}/v1/metrics'
|
|
74
|
+
)
|
|
75
|
+
metric_reader = PeriodicExportingMetricReader(otlp_metric_exporter)
|
|
76
|
+
meter_provider = MeterProvider(
|
|
77
|
+
resource=resource, metric_readers=[metric_reader]
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
meter_provider = MeterProvider(resource=resource)
|
|
81
|
+
metrics.set_meter_provider(meter_provider)
|
|
82
|
+
return meter_provider
|
|
@@ -17,7 +17,7 @@ import logging
|
|
|
17
17
|
import sys
|
|
18
18
|
from importlib.metadata import entry_points
|
|
19
19
|
|
|
20
|
-
from garf.core import report_fetcher
|
|
20
|
+
from garf.core import report_fetcher, simulator
|
|
21
21
|
from garf.executors.telemetry import tracer
|
|
22
22
|
|
|
23
23
|
logger = logging.getLogger(name='garf.executors.fetchers')
|
|
@@ -31,6 +31,14 @@ def find_fetchers() -> set[str]:
|
|
|
31
31
|
return set()
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
@tracer.start_as_current_span('find_simulators')
|
|
35
|
+
def find_simulators() -> set[str]:
|
|
36
|
+
"""Identifiers all available report simulators."""
|
|
37
|
+
if entrypoints := _get_entrypoints('garf_simulator'):
|
|
38
|
+
return {simulator.name for simulator in entrypoints}
|
|
39
|
+
return set()
|
|
40
|
+
|
|
41
|
+
|
|
34
42
|
@tracer.start_as_current_span('get_report_fetcher')
|
|
35
43
|
def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
|
|
36
44
|
"""Loads report fetcher for a given source.
|
|
@@ -70,6 +78,45 @@ def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
|
|
|
70
78
|
)
|
|
71
79
|
|
|
72
80
|
|
|
81
|
+
@tracer.start_as_current_span('get_report_simulator')
|
|
82
|
+
def get_report_simulator(source: str) -> type[simulator.ApiReportSimulator]:
|
|
83
|
+
"""Loads report simulator for a given source.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
source: Alias for a source associated with a simulator.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Class for a found report simulator.
|
|
90
|
+
|
|
91
|
+
Raises:
|
|
92
|
+
GarfApiReportSimulatorError: When simulator cannot be loaded.
|
|
93
|
+
MissingApiReportSimulatorError: When simulator not found.
|
|
94
|
+
"""
|
|
95
|
+
if source not in find_simulators():
|
|
96
|
+
raise simulator.MissingApiReportSimulatorError(source)
|
|
97
|
+
for sim in _get_entrypoints('garf_simulator'):
|
|
98
|
+
if sim.name == source:
|
|
99
|
+
try:
|
|
100
|
+
with tracer.start_as_current_span('load_simulator_module') as span:
|
|
101
|
+
simulator_module = sim.load()
|
|
102
|
+
span.set_attribute('loaded_module', simulator_module.__name__)
|
|
103
|
+
for name, obj in inspect.getmembers(simulator_module):
|
|
104
|
+
if inspect.isclass(obj) and issubclass(
|
|
105
|
+
obj, simulator.ApiReportSimulator
|
|
106
|
+
):
|
|
107
|
+
if not hasattr(obj, 'alias'):
|
|
108
|
+
return getattr(simulator_module, name)
|
|
109
|
+
if obj.alias == sim.name:
|
|
110
|
+
return getattr(simulator_module, name)
|
|
111
|
+
except ModuleNotFoundError as e:
|
|
112
|
+
raise simulator.GarfApiReportSimulatorError(
|
|
113
|
+
f'Failed to load simulator for source {source}, reason: {e}'
|
|
114
|
+
)
|
|
115
|
+
raise simulator.GarfApiReportSimulatorError(
|
|
116
|
+
f'No simulator available for the source "{source}"'
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
73
120
|
def _get_entrypoints(group='garf'):
|
|
74
121
|
if sys.version_info.major == 3 and sys.version_info.minor == 9:
|
|
75
122
|
try:
|
|
@@ -41,8 +41,10 @@ def _handle_sub_context(context, sub_context):
|
|
|
41
41
|
if alias == 'sqldb':
|
|
42
42
|
from garf.executors import sql_executor
|
|
43
43
|
|
|
44
|
-
gquery_executor =
|
|
45
|
-
|
|
44
|
+
gquery_executor = (
|
|
45
|
+
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
46
|
+
context.fetcher_parameters.get('connection_string')
|
|
47
|
+
)
|
|
46
48
|
)
|
|
47
49
|
elif alias == 'bq':
|
|
48
50
|
from garf.executors import bq_executor
|
garf_executors-1.0.7/garf/executors/__init__.py → garf_executors-1.1.3/garf/executors/setup.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -11,14 +11,14 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""
|
|
14
|
+
"""Bootstraps executor based on provided parameters."""
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
import importlib
|
|
19
19
|
|
|
20
20
|
from garf.executors import executor, fetchers
|
|
21
|
-
from garf.executors.api_executor import
|
|
21
|
+
from garf.executors.api_executor import ApiQueryExecutor
|
|
22
22
|
from garf.executors.telemetry import tracer
|
|
23
23
|
|
|
24
24
|
|
|
@@ -28,6 +28,7 @@ def setup_executor(
|
|
|
28
28
|
fetcher_parameters: dict[str, str | int | bool],
|
|
29
29
|
enable_cache: bool = False,
|
|
30
30
|
cache_ttl_seconds: int = 3600,
|
|
31
|
+
simulate: bool = False,
|
|
31
32
|
) -> type[executor.Executor]:
|
|
32
33
|
"""Initializes executors based on a source and parameters."""
|
|
33
34
|
if source == 'bq':
|
|
@@ -42,19 +43,16 @@ def setup_executor(
|
|
|
42
43
|
)
|
|
43
44
|
else:
|
|
44
45
|
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
46
|
+
if simulate:
|
|
47
|
+
concrete_simulator = fetchers.get_report_simulator(source)()
|
|
48
|
+
else:
|
|
49
|
+
concrete_simulator = None
|
|
45
50
|
query_executor = ApiQueryExecutor(
|
|
46
51
|
fetcher=concrete_api_fetcher(
|
|
47
52
|
**fetcher_parameters,
|
|
48
53
|
enable_cache=enable_cache,
|
|
49
54
|
cache_ttl_seconds=cache_ttl_seconds,
|
|
50
|
-
)
|
|
55
|
+
),
|
|
56
|
+
report_simulator=concrete_simulator,
|
|
51
57
|
)
|
|
52
58
|
return query_executor
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
__all__ = [
|
|
56
|
-
'ApiQueryExecutor',
|
|
57
|
-
'ApiExecutionContext',
|
|
58
|
-
]
|
|
59
|
-
|
|
60
|
-
__version__ = '1.0.7'
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
run:
|
|
2
|
+
for:
|
|
3
|
+
value: pair
|
|
4
|
+
in: ${pairs}
|
|
5
|
+
steps:
|
|
6
|
+
- log_source:
|
|
7
|
+
call: sys.log
|
|
8
|
+
args:
|
|
9
|
+
data: ${pair.alias}
|
|
10
|
+
- execute_queries:
|
|
11
|
+
parallel:
|
|
12
|
+
for:
|
|
13
|
+
value: query
|
|
14
|
+
in: ${pair.queries}
|
|
15
|
+
steps:
|
|
16
|
+
- log_query:
|
|
17
|
+
call: sys.log
|
|
18
|
+
args:
|
|
19
|
+
data: ${pair}
|
|
20
|
+
- execute_single_query:
|
|
21
|
+
try:
|
|
22
|
+
call: http.post
|
|
23
|
+
args:
|
|
24
|
+
url: ${sys.get_env("GARF_ENDPOINT") + "/api/execute"}
|
|
25
|
+
auth:
|
|
26
|
+
type: OIDC
|
|
27
|
+
body:
|
|
28
|
+
source: ${pair.fetcher}
|
|
29
|
+
# query_path: ${query.path}
|
|
30
|
+
title: ${query.query.title}
|
|
31
|
+
query: ${query.query.text}
|
|
32
|
+
context:
|
|
33
|
+
fetcher_parameters: ${pair.fetcher_parameters}
|
|
34
|
+
writer: ${pair.writer}
|
|
35
|
+
writer_parameters: ${pair.writer_parameters}
|
|
36
|
+
query_parameters:
|
|
37
|
+
macro: ${pair.query_parameters.macro}
|
|
38
|
+
template: ${pair.query_parameters.template}
|
|
39
|
+
result: task_resp
|
|
40
|
+
except:
|
|
41
|
+
as: e
|
|
42
|
+
assign:
|
|
43
|
+
- task_resp:
|
|
44
|
+
status: "failed"
|
|
45
|
+
error: ${e.message}
|
|
46
|
+
- log_result:
|
|
47
|
+
call: sys.log
|
|
48
|
+
args:
|
|
49
|
+
data: ${task_resp}
|
{garf_executors-1.0.7/garf/executors → garf_executors-1.1.3/garf/executors/workflows}/workflow.py
RENAMED
|
@@ -11,10 +11,16 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
"""Workflow specifies steps of end-to-end fetching and processing."""
|
|
15
|
+
|
|
14
16
|
from __future__ import annotations
|
|
15
17
|
|
|
18
|
+
import copy
|
|
16
19
|
import os
|
|
17
20
|
import pathlib
|
|
21
|
+
import re
|
|
22
|
+
from collections import defaultdict
|
|
23
|
+
from typing import Any
|
|
18
24
|
|
|
19
25
|
import pydantic
|
|
20
26
|
import smart_open
|
|
@@ -37,6 +43,13 @@ class QueryPath(pydantic.BaseModel):
|
|
|
37
43
|
"""Path file with query."""
|
|
38
44
|
|
|
39
45
|
path: str
|
|
46
|
+
prefix: str | None = None
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def full_path(self) -> str:
|
|
50
|
+
if self.prefix:
|
|
51
|
+
return re.sub('/$', '', self.prefix) + '/' + self.path
|
|
52
|
+
return self.path
|
|
40
53
|
|
|
41
54
|
|
|
42
55
|
class QueryDefinition(pydantic.BaseModel):
|
|
@@ -65,11 +78,13 @@ class ExecutionStep(ExecutionContext):
|
|
|
65
78
|
alias: Optional alias to identify execution step.
|
|
66
79
|
queries: Queries to run for a particular fetcher.
|
|
67
80
|
context: Execution context for queries and fetcher.
|
|
81
|
+
parallel_threshold: Max allowed parallelism for the queries in the step.
|
|
68
82
|
"""
|
|
69
83
|
|
|
70
84
|
fetcher: str | None = None
|
|
71
85
|
alias: str | None = pydantic.Field(default=None, pattern=r'^[a-zA-Z0-9_]+$')
|
|
72
86
|
queries: list[QueryPath | QueryDefinition | QueryFolder] | None = None
|
|
87
|
+
parallel_threshold: int | None = None
|
|
73
88
|
|
|
74
89
|
@property
|
|
75
90
|
def context(self) -> ExecutionContext:
|
|
@@ -86,17 +101,41 @@ class Workflow(pydantic.BaseModel):
|
|
|
86
101
|
|
|
87
102
|
Attributes:
|
|
88
103
|
steps: Contains one or several fetcher executions.
|
|
104
|
+
context: Query and fetcher parameters to overwrite in steps.
|
|
89
105
|
"""
|
|
90
106
|
|
|
91
107
|
steps: list[ExecutionStep]
|
|
108
|
+
context: ExecutionContext | None = None
|
|
109
|
+
|
|
110
|
+
def model_post_init(self, __context__) -> None:
|
|
111
|
+
if context := self.context:
|
|
112
|
+
custom_parameters = defaultdict(dict)
|
|
113
|
+
if custom_macros := context.query_parameters.macro:
|
|
114
|
+
custom_parameters['query_parameters']['macro'] = custom_macros
|
|
115
|
+
if custom_templates := context.query_parameters.template:
|
|
116
|
+
custom_parameters['query_parameters']['template'] = custom_templates
|
|
117
|
+
if custom_fetcher_parameters := context.fetcher_parameters:
|
|
118
|
+
custom_parameters['fetcher_parameters'] = custom_fetcher_parameters
|
|
119
|
+
|
|
120
|
+
if custom_parameters:
|
|
121
|
+
steps = self.steps
|
|
122
|
+
for i, step in enumerate(steps):
|
|
123
|
+
res = _merge_dicts(
|
|
124
|
+
step.model_dump(exclude_none=True), dict(custom_parameters)
|
|
125
|
+
)
|
|
126
|
+
steps[i] = ExecutionStep(**res)
|
|
92
127
|
|
|
93
128
|
@classmethod
|
|
94
|
-
def from_file(
|
|
129
|
+
def from_file(
|
|
130
|
+
cls,
|
|
131
|
+
path: str | pathlib.Path | os.PathLike[str],
|
|
132
|
+
context: ExecutionContext | None = None,
|
|
133
|
+
) -> Workflow:
|
|
95
134
|
"""Builds workflow from local or remote yaml file."""
|
|
96
135
|
with smart_open.open(path, 'r', encoding='utf-8') as f:
|
|
97
136
|
data = yaml.safe_load(f)
|
|
98
137
|
try:
|
|
99
|
-
return Workflow(
|
|
138
|
+
return Workflow(steps=data.get('steps'), context=context)
|
|
100
139
|
except pydantic.ValidationError as e:
|
|
101
140
|
raise GarfWorkflowError(f'Incorrect workflow:\n {e}') from e
|
|
102
141
|
|
|
@@ -107,3 +146,19 @@ class Workflow(pydantic.BaseModel):
|
|
|
107
146
|
self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
|
|
108
147
|
)
|
|
109
148
|
return f'Workflow is saved to {str(path)}'
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _merge_dicts(
|
|
152
|
+
dict1: dict[str, Any], dict2: dict[str, Any]
|
|
153
|
+
) -> dict[str, Any]:
|
|
154
|
+
result = copy.deepcopy(dict1)
|
|
155
|
+
for key, value in dict2.items():
|
|
156
|
+
if (
|
|
157
|
+
key in result
|
|
158
|
+
and isinstance(result[key], dict)
|
|
159
|
+
and isinstance(value, dict)
|
|
160
|
+
):
|
|
161
|
+
result[key] = _merge_dicts(result[key], value)
|
|
162
|
+
else:
|
|
163
|
+
result[key] = value
|
|
164
|
+
return result
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Runs garf workflow."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import pathlib
|
|
20
|
+
import re
|
|
21
|
+
from typing import Final
|
|
22
|
+
|
|
23
|
+
import yaml
|
|
24
|
+
from garf.executors import exceptions, setup
|
|
25
|
+
from garf.executors.telemetry import tracer
|
|
26
|
+
from garf.executors.workflows import workflow
|
|
27
|
+
from garf.io import reader
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
_REMOTE_FILES_PATTERN: Final[str] = (
|
|
32
|
+
'^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)'
|
|
33
|
+
)
|
|
34
|
+
_SCRIPT_PATH = pathlib.Path(__file__).parent
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class WorkflowRunner:
|
|
38
|
+
"""Runs garf workflow.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
workflow: Workflow to execute.
|
|
42
|
+
wf_parent: Optional location of a workflow file.
|
|
43
|
+
parallel_threshold: Max allowed parallelism for the queries in the workflow.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
execution_workflow: workflow.Workflow,
|
|
49
|
+
wf_parent: pathlib.Path | str,
|
|
50
|
+
parallel_threshold: int = 10,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""Initializes WorkflowRunner."""
|
|
53
|
+
self.workflow = execution_workflow
|
|
54
|
+
self.wf_parent = wf_parent
|
|
55
|
+
self.parallel_threshold = parallel_threshold
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def from_file(
|
|
59
|
+
cls,
|
|
60
|
+
workflow_file: str | pathlib.Path,
|
|
61
|
+
) -> WorkflowRunner:
|
|
62
|
+
"""Initialized Workflow runner from a local or remote file."""
|
|
63
|
+
if isinstance(workflow_file, str):
|
|
64
|
+
workflow_file = pathlib.Path(workflow_file)
|
|
65
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
66
|
+
return cls(
|
|
67
|
+
execution_workflow=execution_workflow, wf_parent=workflow_file.parent
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def run(
|
|
71
|
+
self,
|
|
72
|
+
enable_cache: bool = False,
|
|
73
|
+
cache_ttl_seconds: int = 3600,
|
|
74
|
+
selected_aliases: list[str] | None = None,
|
|
75
|
+
skipped_aliases: list[str] | None = None,
|
|
76
|
+
) -> list[str]:
|
|
77
|
+
skipped_aliases = skipped_aliases or []
|
|
78
|
+
selected_aliases = selected_aliases or []
|
|
79
|
+
reader_client = reader.create_reader('file')
|
|
80
|
+
execution_results = []
|
|
81
|
+
logger.info('Starting Garf Workflow...')
|
|
82
|
+
for i, step in enumerate(self.workflow.steps, 1):
|
|
83
|
+
step_name = f'{i}-{step.fetcher}'
|
|
84
|
+
if step.alias:
|
|
85
|
+
step_name = f'{step_name}-{step.alias}'
|
|
86
|
+
if step.alias in skipped_aliases:
|
|
87
|
+
logger.warning(
|
|
88
|
+
'Skipping step %d, fetcher: %s, alias: %s',
|
|
89
|
+
i,
|
|
90
|
+
step.fetcher,
|
|
91
|
+
step.alias,
|
|
92
|
+
)
|
|
93
|
+
continue
|
|
94
|
+
if selected_aliases and step.alias not in selected_aliases:
|
|
95
|
+
logger.warning(
|
|
96
|
+
'Skipping step %d, fetcher: %s, alias: %s',
|
|
97
|
+
i,
|
|
98
|
+
step.fetcher,
|
|
99
|
+
step.alias,
|
|
100
|
+
)
|
|
101
|
+
continue
|
|
102
|
+
with tracer.start_as_current_span(step_name):
|
|
103
|
+
logger.info(
|
|
104
|
+
'Running step %d, fetcher: %s, alias: %s', i, step.fetcher, step.alias
|
|
105
|
+
)
|
|
106
|
+
query_executor = setup.setup_executor(
|
|
107
|
+
source=step.fetcher,
|
|
108
|
+
fetcher_parameters=step.fetcher_parameters,
|
|
109
|
+
enable_cache=enable_cache,
|
|
110
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
111
|
+
)
|
|
112
|
+
batch = {}
|
|
113
|
+
if not (queries := step.queries):
|
|
114
|
+
logger.error('Please provide one or more queries to run')
|
|
115
|
+
raise exceptions.GarfExecutorError(
|
|
116
|
+
'Please provide one or more queries to run'
|
|
117
|
+
)
|
|
118
|
+
for query in queries:
|
|
119
|
+
if isinstance(query, workflow.QueryPath):
|
|
120
|
+
query_path = query.full_path
|
|
121
|
+
if re.match(_REMOTE_FILES_PATTERN, query_path):
|
|
122
|
+
batch[query.path] = reader_client.read(query_path)
|
|
123
|
+
else:
|
|
124
|
+
if not query.prefix:
|
|
125
|
+
query_path = self.wf_parent / pathlib.Path(query.path)
|
|
126
|
+
if not query_path.exists():
|
|
127
|
+
raise workflow.GarfWorkflowError(
|
|
128
|
+
f'Query: {query_path} not found'
|
|
129
|
+
)
|
|
130
|
+
batch[query.path] = reader_client.read(query_path)
|
|
131
|
+
elif isinstance(query, workflow.QueryFolder):
|
|
132
|
+
query_path = self.wf_parent / pathlib.Path(query.folder)
|
|
133
|
+
if not query_path.exists():
|
|
134
|
+
raise workflow.GarfWorkflowError(
|
|
135
|
+
f'Folder: {query_path} not found'
|
|
136
|
+
)
|
|
137
|
+
for p in query_path.rglob('*'):
|
|
138
|
+
if p.suffix == '.sql':
|
|
139
|
+
batch[p.stem] = reader_client.read(p)
|
|
140
|
+
else:
|
|
141
|
+
batch[query.query.title] = query.query.text
|
|
142
|
+
query_executor.execute_batch(
|
|
143
|
+
batch,
|
|
144
|
+
step.context,
|
|
145
|
+
step.parallel_threshold or self.parallel_threshold,
|
|
146
|
+
)
|
|
147
|
+
execution_results.append(step_name)
|
|
148
|
+
return execution_results
|
|
149
|
+
|
|
150
|
+
def compile(self, path: str | pathlib.Path) -> str:
|
|
151
|
+
"""Saves workflow with expanded anchors."""
|
|
152
|
+
return self.workflow.save(path)
|
|
153
|
+
|
|
154
|
+
def deploy(self, path: str | pathlib.Path) -> str:
|
|
155
|
+
"""Prepares workflow for deployment to Google Cloud Workflows."""
|
|
156
|
+
wf = self.workflow.model_dump(exclude_none=True).get('steps')
|
|
157
|
+
with open(_SCRIPT_PATH / 'gcp_workflow.yaml', 'r', encoding='utf-8') as f:
|
|
158
|
+
cloud_workflow_run_template = yaml.safe_load(f)
|
|
159
|
+
init = {
|
|
160
|
+
'init': {
|
|
161
|
+
'assign': [{'pairs': wf}],
|
|
162
|
+
},
|
|
163
|
+
}
|
|
164
|
+
cloud_workflow = {
|
|
165
|
+
'main': {
|
|
166
|
+
'params': [],
|
|
167
|
+
'steps': [init, cloud_workflow_run_template],
|
|
168
|
+
},
|
|
169
|
+
}
|
|
170
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
171
|
+
yaml.dump(cloud_workflow, f, sort_keys=False)
|
|
172
|
+
return f'Workflow is saved to {path}'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -36,6 +36,7 @@ Provides-Extra: gcp
|
|
|
36
36
|
Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
|
|
37
37
|
Provides-Extra: server
|
|
38
38
|
Requires-Dist: fastapi[standard]; extra == "server"
|
|
39
|
+
Requires-Dist: pydantic-settings; extra == "server"
|
|
39
40
|
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
|
|
40
41
|
Requires-Dist: typer; extra == "server"
|
|
41
42
|
Requires-Dist: grpcio-reflection; extra == "server"
|
|
@@ -11,15 +11,19 @@ garf/executors/fetchers.py
|
|
|
11
11
|
garf/executors/garf_pb2.py
|
|
12
12
|
garf/executors/garf_pb2_grpc.py
|
|
13
13
|
garf/executors/query_processor.py
|
|
14
|
+
garf/executors/setup.py
|
|
14
15
|
garf/executors/sql_executor.py
|
|
15
16
|
garf/executors/telemetry.py
|
|
16
|
-
garf/executors/workflow.py
|
|
17
17
|
garf/executors/entrypoints/__init__.py
|
|
18
18
|
garf/executors/entrypoints/cli.py
|
|
19
19
|
garf/executors/entrypoints/grpc_server.py
|
|
20
20
|
garf/executors/entrypoints/server.py
|
|
21
21
|
garf/executors/entrypoints/tracer.py
|
|
22
22
|
garf/executors/entrypoints/utils.py
|
|
23
|
+
garf/executors/workflows/__init__.py
|
|
24
|
+
garf/executors/workflows/gcp_workflow.yaml
|
|
25
|
+
garf/executors/workflows/workflow.py
|
|
26
|
+
garf/executors/workflows/workflow_runner.py
|
|
23
27
|
garf_executors/__init__.py
|
|
24
28
|
garf_executors/api_executor.py
|
|
25
29
|
garf_executors/bq_executor.py
|
|
@@ -55,6 +55,7 @@ gcp= [
|
|
|
55
55
|
]
|
|
56
56
|
server=[
|
|
57
57
|
"fastapi[standard]",
|
|
58
|
+
"pydantic-settings",
|
|
58
59
|
"opentelemetry-instrumentation-fastapi",
|
|
59
60
|
"typer",
|
|
60
61
|
"grpcio-reflection",
|
|
@@ -68,6 +69,10 @@ tests = [
|
|
|
68
69
|
all = [
|
|
69
70
|
"garf-executors[bq,sql,server,gcp]"
|
|
70
71
|
]
|
|
72
|
+
|
|
73
|
+
[tool.setuptools.package-data]
|
|
74
|
+
"*"= ["gcp_workflow.yaml"]
|
|
75
|
+
|
|
71
76
|
[project.scripts]
|
|
72
77
|
garf="garf.executors.entrypoints.cli:main"
|
|
73
78
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|