garf-executors 0.0.11__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- garf_executors/__init__.py +13 -4
- garf_executors/api_executor.py +47 -34
- garf_executors/bq_executor.py +42 -22
- garf_executors/config.py +3 -1
- garf_executors/entrypoints/cli.py +80 -24
- garf_executors/entrypoints/grpc_server.py +68 -0
- garf_executors/entrypoints/server.py +30 -8
- garf_executors/entrypoints/tracer.py +57 -0
- garf_executors/entrypoints/utils.py +19 -0
- garf_executors/execution_context.py +40 -7
- garf_executors/executor.py +84 -14
- garf_executors/fetchers.py +16 -5
- garf_executors/garf_pb2.py +45 -0
- garf_executors/garf_pb2_grpc.py +97 -0
- garf_executors/sql_executor.py +41 -18
- garf_executors/telemetry.py +20 -0
- garf_executors/workflow.py +96 -0
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/METADATA +13 -4
- garf_executors-0.2.3.dist-info/RECORD +24 -0
- garf_executors-0.0.11.dist-info/RECORD +0 -18
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/WHEEL +0 -0
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/entry_points.txt +0 -0
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/top_level.txt +0 -0
garf_executors/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""Executors to fetch data from various
|
|
14
|
+
"""Executors to fetch data from various APIs."""
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
@@ -19,10 +19,15 @@ import importlib
|
|
|
19
19
|
|
|
20
20
|
from garf_executors import executor, fetchers
|
|
21
21
|
from garf_executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
22
|
+
from garf_executors.telemetry import tracer
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
@tracer.start_as_current_span('setup_executor')
|
|
24
26
|
def setup_executor(
|
|
25
|
-
source: str,
|
|
27
|
+
source: str,
|
|
28
|
+
fetcher_parameters: dict[str, str | int | bool],
|
|
29
|
+
enable_cache: bool = False,
|
|
30
|
+
cache_ttl_seconds: int = 3600,
|
|
26
31
|
) -> type[executor.Executor]:
|
|
27
32
|
"""Initializes executors based on a source and parameters."""
|
|
28
33
|
if source == 'bq':
|
|
@@ -38,7 +43,11 @@ def setup_executor(
|
|
|
38
43
|
else:
|
|
39
44
|
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
40
45
|
query_executor = ApiQueryExecutor(
|
|
41
|
-
concrete_api_fetcher(
|
|
46
|
+
fetcher=concrete_api_fetcher(
|
|
47
|
+
**fetcher_parameters,
|
|
48
|
+
enable_cache=enable_cache,
|
|
49
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
50
|
+
)
|
|
42
51
|
)
|
|
43
52
|
return query_executor
|
|
44
53
|
|
|
@@ -48,4 +57,4 @@ __all__ = [
|
|
|
48
57
|
'ApiExecutionContext',
|
|
49
58
|
]
|
|
50
59
|
|
|
51
|
-
__version__ = '0.
|
|
60
|
+
__version__ = '0.2.3'
|
garf_executors/api_executor.py
CHANGED
|
@@ -23,8 +23,10 @@ from __future__ import annotations
|
|
|
23
23
|
import logging
|
|
24
24
|
|
|
25
25
|
from garf_core import report_fetcher
|
|
26
|
+
from opentelemetry import trace
|
|
26
27
|
|
|
27
28
|
from garf_executors import exceptions, execution_context, executor, fetchers
|
|
29
|
+
from garf_executors.telemetry import tracer
|
|
28
30
|
|
|
29
31
|
logger = logging.getLogger(__name__)
|
|
30
32
|
|
|
@@ -32,7 +34,7 @@ logger = logging.getLogger(__name__)
|
|
|
32
34
|
class ApiExecutionContext(execution_context.ExecutionContext):
|
|
33
35
|
"""Common context for executing one or more queries."""
|
|
34
36
|
|
|
35
|
-
writer: str = 'console'
|
|
37
|
+
writer: str | list[str] = 'console'
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
class ApiQueryExecutor(executor.Executor):
|
|
@@ -49,34 +51,31 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
49
51
|
fetcher: Instantiated report fetcher.
|
|
50
52
|
"""
|
|
51
53
|
self.fetcher = fetcher
|
|
54
|
+
super().__init__(
|
|
55
|
+
preprocessors=self.fetcher.preprocessors,
|
|
56
|
+
postprocessors=self.fetcher.postprocessors,
|
|
57
|
+
)
|
|
52
58
|
|
|
53
59
|
@classmethod
|
|
54
60
|
def from_fetcher_alias(
|
|
55
|
-
cls,
|
|
61
|
+
cls,
|
|
62
|
+
source: str,
|
|
63
|
+
fetcher_parameters: dict[str, str] | None = None,
|
|
64
|
+
enable_cache: bool = False,
|
|
65
|
+
cache_ttl_seconds: int = 3600,
|
|
56
66
|
) -> ApiQueryExecutor:
|
|
57
67
|
if not fetcher_parameters:
|
|
58
68
|
fetcher_parameters = {}
|
|
59
69
|
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
60
|
-
return ApiQueryExecutor(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
) -> str:
|
|
68
|
-
"""Performs query execution asynchronously.
|
|
69
|
-
|
|
70
|
-
Args:
|
|
71
|
-
query: Location of the query.
|
|
72
|
-
title: Name of the query.
|
|
73
|
-
context: Query execution context.
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
Result of writing the report.
|
|
77
|
-
"""
|
|
78
|
-
return await self.execute(query, context, title, context)
|
|
70
|
+
return ApiQueryExecutor(
|
|
71
|
+
fetcher=concrete_api_fetcher(
|
|
72
|
+
**fetcher_parameters,
|
|
73
|
+
enable_cache=enable_cache,
|
|
74
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
79
77
|
|
|
78
|
+
@tracer.start_as_current_span('api.execute')
|
|
80
79
|
def execute(
|
|
81
80
|
self,
|
|
82
81
|
query: str,
|
|
@@ -96,27 +95,41 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
96
95
|
Raises:
|
|
97
96
|
GarfExecutorError: When failed to execute query.
|
|
98
97
|
"""
|
|
98
|
+
span = trace.get_current_span()
|
|
99
|
+
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
100
|
+
span.set_attribute(
|
|
101
|
+
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
102
|
+
)
|
|
99
103
|
try:
|
|
104
|
+
span.set_attribute('query.title', title)
|
|
105
|
+
span.set_attribute('query.text', query)
|
|
100
106
|
logger.debug('starting query %s', query)
|
|
101
107
|
results = self.fetcher.fetch(
|
|
102
108
|
query_specification=query,
|
|
103
109
|
args=context.query_parameters,
|
|
104
110
|
**context.fetcher_parameters,
|
|
105
111
|
)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
'
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
112
|
+
writer_clients = context.writer_clients
|
|
113
|
+
if not writer_clients:
|
|
114
|
+
logger.warning('No writers configured, skipping write operation')
|
|
115
|
+
return None
|
|
116
|
+
writing_results = []
|
|
117
|
+
for writer_client in writer_clients:
|
|
118
|
+
logger.debug(
|
|
119
|
+
'Start writing data for query %s via %s writer',
|
|
120
|
+
title,
|
|
121
|
+
type(writer_client),
|
|
122
|
+
)
|
|
123
|
+
result = writer_client.write(results, title)
|
|
124
|
+
logger.debug(
|
|
125
|
+
'Finish writing data for query %s via %s writer',
|
|
126
|
+
title,
|
|
127
|
+
type(writer_client),
|
|
128
|
+
)
|
|
129
|
+
writing_results.append(result)
|
|
118
130
|
logger.info('%s executed successfully', title)
|
|
119
|
-
|
|
131
|
+
# Return the last writer's result for backward compatibility
|
|
132
|
+
return writing_results[-1] if writing_results else None
|
|
120
133
|
except Exception as e:
|
|
121
134
|
logger.error('%s generated an exception: %s', title, str(e))
|
|
122
135
|
raise exceptions.GarfExecutorError(
|
garf_executors/bq_executor.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
+
import contextlib
|
|
18
19
|
import os
|
|
19
20
|
|
|
20
21
|
try:
|
|
@@ -29,8 +30,10 @@ import logging
|
|
|
29
30
|
|
|
30
31
|
from garf_core import query_editor, report
|
|
31
32
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
33
|
+
from opentelemetry import trace
|
|
32
34
|
|
|
33
35
|
from garf_executors import exceptions, execution_context, executor
|
|
36
|
+
from garf_executors.telemetry import tracer
|
|
34
37
|
|
|
35
38
|
logger = logging.getLogger(__name__)
|
|
36
39
|
|
|
@@ -66,12 +69,14 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
66
69
|
)
|
|
67
70
|
self.project_id = project_id
|
|
68
71
|
self.location = location
|
|
72
|
+
super().__init__()
|
|
69
73
|
|
|
70
74
|
@property
|
|
71
75
|
def client(self) -> bigquery.Client:
|
|
72
76
|
"""Instantiates bigquery client."""
|
|
73
77
|
return bigquery.Client(self.project_id)
|
|
74
78
|
|
|
79
|
+
@tracer.start_as_current_span('bq.execute')
|
|
75
80
|
def execute(
|
|
76
81
|
self,
|
|
77
82
|
query: str,
|
|
@@ -90,35 +95,49 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
90
95
|
Returns:
|
|
91
96
|
Report with data if query returns some data otherwise empty Report.
|
|
92
97
|
"""
|
|
98
|
+
span = trace.get_current_span()
|
|
99
|
+
logger.info('Executing script: %s', title)
|
|
93
100
|
query_text = self.replace_params_template(query, context.query_parameters)
|
|
94
101
|
self.create_datasets(context.query_parameters.macro)
|
|
95
102
|
job = self.client.query(query_text)
|
|
96
103
|
try:
|
|
97
104
|
result = job.result()
|
|
105
|
+
except google_cloud_exceptions.GoogleCloudError as e:
|
|
106
|
+
raise BigQueryExecutorError(
|
|
107
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
108
|
+
) from e
|
|
98
109
|
logger.debug('%s launched successfully', title)
|
|
99
|
-
|
|
100
|
-
|
|
110
|
+
if result.total_rows:
|
|
111
|
+
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
112
|
+
else:
|
|
113
|
+
results = report.GarfReport()
|
|
114
|
+
if context.writer and results:
|
|
115
|
+
writer_clients = context.writer_clients
|
|
116
|
+
if not writer_clients:
|
|
117
|
+
logger.warning('No writers configured, skipping write operation')
|
|
101
118
|
else:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
119
|
+
writing_results = []
|
|
120
|
+
for writer_client in writer_clients:
|
|
121
|
+
logger.debug(
|
|
122
|
+
'Start writing data for query %s via %s writer',
|
|
123
|
+
title,
|
|
124
|
+
type(writer_client),
|
|
125
|
+
)
|
|
126
|
+
writing_result = writer_client.write(results, title)
|
|
127
|
+
logger.debug(
|
|
128
|
+
'Finish writing data for query %s via %s writer',
|
|
129
|
+
title,
|
|
130
|
+
type(writer_client),
|
|
131
|
+
)
|
|
132
|
+
writing_results.append(writing_result)
|
|
133
|
+
# Return the last writer's result for backward compatibility
|
|
116
134
|
logger.info('%s executed successfully', title)
|
|
117
|
-
return
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
135
|
+
return writing_results[-1] if writing_results else None
|
|
136
|
+
logger.info('%s executed successfully', title)
|
|
137
|
+
span.set_attribute('execute.num_results', len(results))
|
|
138
|
+
return results
|
|
121
139
|
|
|
140
|
+
@tracer.start_as_current_span('bq.create_datasets')
|
|
122
141
|
def create_datasets(self, macros: dict | None) -> None:
|
|
123
142
|
"""Creates datasets in BQ based on values in a dict.
|
|
124
143
|
|
|
@@ -136,8 +155,9 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
136
155
|
except google_cloud_exceptions.NotFound:
|
|
137
156
|
bq_dataset = bigquery.Dataset(dataset_id)
|
|
138
157
|
bq_dataset.location = self.location
|
|
139
|
-
|
|
140
|
-
|
|
158
|
+
with contextlib.suppress(google_cloud_exceptions.Conflict):
|
|
159
|
+
self.client.create_dataset(bq_dataset, timeout=30)
|
|
160
|
+
logger.info('Created new dataset %s', dataset_id)
|
|
141
161
|
|
|
142
162
|
|
|
143
163
|
def extract_datasets(macros: dict | None) -> list[str]:
|
garf_executors/config.py
CHANGED
|
@@ -47,5 +47,7 @@ class Config(pydantic.BaseModel):
|
|
|
47
47
|
def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
|
|
48
48
|
"""Saves config to local or remote yaml file."""
|
|
49
49
|
with smart_open.open(path, 'w', encoding='utf-8') as f:
|
|
50
|
-
yaml.dump(
|
|
50
|
+
yaml.dump(
|
|
51
|
+
self.model_dump(exclude_none=True).get('sources'), f, encoding='utf-8'
|
|
52
|
+
)
|
|
51
53
|
return f'Config is saved to {str(path)}'
|
|
@@ -20,24 +20,33 @@ storage.
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
import argparse
|
|
23
|
+
import logging
|
|
23
24
|
import sys
|
|
24
25
|
|
|
25
26
|
from garf_io import reader
|
|
27
|
+
from opentelemetry import trace
|
|
26
28
|
|
|
27
29
|
import garf_executors
|
|
28
|
-
from garf_executors import config, exceptions
|
|
30
|
+
from garf_executors import config, exceptions, workflow
|
|
29
31
|
from garf_executors.entrypoints import utils
|
|
32
|
+
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
33
|
+
from garf_executors.telemetry import tracer
|
|
30
34
|
|
|
35
|
+
initialize_tracer()
|
|
31
36
|
|
|
37
|
+
|
|
38
|
+
@tracer.start_as_current_span('garf.entrypoints.cli')
|
|
32
39
|
def main():
|
|
33
40
|
parser = argparse.ArgumentParser()
|
|
34
41
|
parser.add_argument('query', nargs='*')
|
|
35
42
|
parser.add_argument('-c', '--config', dest='config', default=None)
|
|
43
|
+
parser.add_argument('-w', '--workflow', dest='workflow', default=None)
|
|
36
44
|
parser.add_argument('--source', dest='source', default=None)
|
|
37
45
|
parser.add_argument('--output', dest='output', default='console')
|
|
38
46
|
parser.add_argument('--input', dest='input', default='file')
|
|
39
47
|
parser.add_argument('--log', '--loglevel', dest='loglevel', default='info')
|
|
40
48
|
parser.add_argument('--logger', dest='logger', default='local')
|
|
49
|
+
parser.add_argument('--log-name', dest='log_name', default='garf')
|
|
41
50
|
parser.add_argument(
|
|
42
51
|
'--parallel-queries', dest='parallel_queries', action='store_true'
|
|
43
52
|
)
|
|
@@ -47,61 +56,108 @@ def main():
|
|
|
47
56
|
parser.add_argument('--dry-run', dest='dry_run', action='store_true')
|
|
48
57
|
parser.add_argument('-v', '--version', dest='version', action='store_true')
|
|
49
58
|
parser.add_argument(
|
|
50
|
-
'--parallel-threshold', dest='parallel_threshold', default=
|
|
59
|
+
'--parallel-threshold', dest='parallel_threshold', default=10, type=int
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
'--enable-cache', dest='enable_cache', action='store_true'
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
'--cache-ttl-seconds',
|
|
66
|
+
dest='cache_ttl_seconds',
|
|
67
|
+
default=3600,
|
|
68
|
+
type=int,
|
|
51
69
|
)
|
|
52
70
|
parser.set_defaults(parallel_queries=True)
|
|
71
|
+
parser.set_defaults(enable_cache=False)
|
|
53
72
|
parser.set_defaults(dry_run=False)
|
|
54
73
|
args, kwargs = parser.parse_known_args()
|
|
55
74
|
|
|
75
|
+
span = trace.get_current_span()
|
|
76
|
+
command_args = ' '.join(sys.argv[1:])
|
|
77
|
+
span.set_attribute('cli.command', f'garf {command_args}')
|
|
56
78
|
if args.version:
|
|
57
79
|
print(garf_executors.__version__)
|
|
58
80
|
sys.exit()
|
|
59
81
|
logger = utils.init_logging(
|
|
60
|
-
loglevel=args.loglevel.upper(), logger_type=args.logger
|
|
82
|
+
loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
|
|
61
83
|
)
|
|
84
|
+
reader_client = reader.create_reader(args.input)
|
|
85
|
+
if workflow_file := args.workflow:
|
|
86
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
87
|
+
for i, step in enumerate(execution_workflow.steps, 1):
|
|
88
|
+
with tracer.start_as_current_span(f'{i}-{step.fetcher}'):
|
|
89
|
+
query_executor = garf_executors.setup_executor(
|
|
90
|
+
source=step.fetcher,
|
|
91
|
+
fetcher_parameters=step.fetcher_parameters,
|
|
92
|
+
enable_cache=args.enable_cache,
|
|
93
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
94
|
+
)
|
|
95
|
+
batch = {}
|
|
96
|
+
if not (queries := step.queries):
|
|
97
|
+
logger.error('Please provide one or more queries to run')
|
|
98
|
+
raise exceptions.GarfExecutorError(
|
|
99
|
+
'Please provide one or more queries to run'
|
|
100
|
+
)
|
|
101
|
+
for query in queries:
|
|
102
|
+
if isinstance(query, garf_executors.workflow.QueryPath):
|
|
103
|
+
batch[query.path] = reader_client.read(query.path)
|
|
104
|
+
else:
|
|
105
|
+
batch[query.query.title] = query.query.text
|
|
106
|
+
query_executor.execute_batch(
|
|
107
|
+
batch, step.context, args.parallel_threshold
|
|
108
|
+
)
|
|
109
|
+
sys.exit()
|
|
110
|
+
|
|
62
111
|
if not args.query:
|
|
63
112
|
logger.error('Please provide one or more queries to run')
|
|
64
113
|
raise exceptions.GarfExecutorError(
|
|
65
114
|
'Please provide one or more queries to run'
|
|
66
115
|
)
|
|
67
|
-
reader_client = reader.create_reader(args.input)
|
|
68
116
|
if config_file := args.config:
|
|
69
117
|
execution_config = config.Config.from_file(config_file)
|
|
70
118
|
if not (context := execution_config.sources.get(args.source)):
|
|
71
119
|
raise exceptions.GarfExecutorError(
|
|
72
120
|
f'No execution context found for source {args.source} in {config_file}'
|
|
73
121
|
)
|
|
74
|
-
query_executor = garf_executors.setup_executor(
|
|
75
|
-
args.source, context.fetcher_parameters
|
|
76
|
-
)
|
|
77
|
-
batch = {query: reader_client.read(query) for query in args.query}
|
|
78
|
-
query_executor.execute_batch(batch, context, args.parallel_queries)
|
|
79
122
|
else:
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
).parse(
|
|
123
|
+
param_types = ['source', 'macro', 'template']
|
|
124
|
+
outputs = args.output.split(',')
|
|
125
|
+
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(
|
|
126
|
+
kwargs
|
|
127
|
+
)
|
|
83
128
|
source_parameters = extra_parameters.get('source', {})
|
|
129
|
+
writer_parameters = {}
|
|
130
|
+
for output in outputs:
|
|
131
|
+
writer_parameters.update(extra_parameters.get(output))
|
|
84
132
|
|
|
85
133
|
context = garf_executors.api_executor.ApiExecutionContext(
|
|
86
134
|
query_parameters={
|
|
87
135
|
'macro': extra_parameters.get('macro'),
|
|
88
136
|
'template': extra_parameters.get('template'),
|
|
89
137
|
},
|
|
90
|
-
writer=
|
|
91
|
-
writer_parameters=
|
|
138
|
+
writer=outputs,
|
|
139
|
+
writer_parameters=writer_parameters,
|
|
92
140
|
fetcher_parameters=source_parameters,
|
|
93
141
|
)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
142
|
+
query_executor = garf_executors.setup_executor(
|
|
143
|
+
source=args.source,
|
|
144
|
+
fetcher_parameters=context.fetcher_parameters,
|
|
145
|
+
enable_cache=args.enable_cache,
|
|
146
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
147
|
+
)
|
|
148
|
+
batch = {query: reader_client.read(query) for query in args.query}
|
|
149
|
+
if args.parallel_queries and len(args.query) > 1:
|
|
150
|
+
logger.info('Running queries in parallel')
|
|
151
|
+
batch = {query: reader_client.read(query) for query in args.query}
|
|
152
|
+
query_executor.execute_batch(batch, context, args.parallel_threshold)
|
|
153
|
+
else:
|
|
154
|
+
if len(args.query) > 1:
|
|
102
155
|
logger.info('Running queries sequentially')
|
|
103
|
-
|
|
104
|
-
|
|
156
|
+
for query in args.query:
|
|
157
|
+
query_executor.execute(
|
|
158
|
+
query=reader_client.read(query), title=query, context=context
|
|
159
|
+
)
|
|
160
|
+
logging.shutdown()
|
|
105
161
|
|
|
106
162
|
|
|
107
163
|
if __name__ == '__main__':
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""gRPC endpoint for garf."""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import logging
|
|
19
|
+
from concurrent import futures
|
|
20
|
+
|
|
21
|
+
import grpc
|
|
22
|
+
from google.protobuf.json_format import MessageToDict
|
|
23
|
+
from grpc_reflection.v1alpha import reflection
|
|
24
|
+
|
|
25
|
+
import garf_executors
|
|
26
|
+
from garf_executors import garf_pb2, garf_pb2_grpc
|
|
27
|
+
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class GarfService(garf_pb2_grpc.GarfService):
|
|
31
|
+
def Execute(self, request, context):
|
|
32
|
+
query_executor = garf_executors.setup_executor(
|
|
33
|
+
request.source, request.context.fetcher_parameters
|
|
34
|
+
)
|
|
35
|
+
execution_context = garf_executors.execution_context.ExecutionContext(
|
|
36
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
37
|
+
)
|
|
38
|
+
result = query_executor.execute(
|
|
39
|
+
query=request.query,
|
|
40
|
+
title=request.title,
|
|
41
|
+
context=execution_context,
|
|
42
|
+
)
|
|
43
|
+
return garf_pb2.ExecuteResponse(results=[result])
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
if __name__ == '__main__':
|
|
47
|
+
parser = argparse.ArgumentParser()
|
|
48
|
+
parser.add_argument('--port', dest='port', default=50051, type=int)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
'--parallel-threshold', dest='parallel_threshold', default=10, type=int
|
|
51
|
+
)
|
|
52
|
+
args, _ = parser.parse_known_args()
|
|
53
|
+
initialize_tracer()
|
|
54
|
+
server = grpc.server(
|
|
55
|
+
futures.ThreadPoolExecutor(max_workers=args.parallel_threshold)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
service = GarfService()
|
|
59
|
+
garf_pb2_grpc.add_GarfServiceServicer_to_server(service, server)
|
|
60
|
+
SERVICE_NAMES = (
|
|
61
|
+
garf_pb2.DESCRIPTOR.services_by_name['GarfService'].full_name,
|
|
62
|
+
reflection.SERVICE_NAME,
|
|
63
|
+
)
|
|
64
|
+
reflection.enable_server_reflection(SERVICE_NAMES, server)
|
|
65
|
+
server.add_insecure_port(f'[::]:{args.port}')
|
|
66
|
+
server.start()
|
|
67
|
+
logging.info('Garf service started, listening on port %d', 50051)
|
|
68
|
+
server.wait_for_termination()
|
|
@@ -18,11 +18,20 @@ from typing import Optional, Union
|
|
|
18
18
|
|
|
19
19
|
import fastapi
|
|
20
20
|
import pydantic
|
|
21
|
+
import typer
|
|
21
22
|
import uvicorn
|
|
22
23
|
from garf_io import reader
|
|
24
|
+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
|
25
|
+
from typing_extensions import Annotated
|
|
23
26
|
|
|
24
27
|
import garf_executors
|
|
25
28
|
from garf_executors import exceptions
|
|
29
|
+
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
30
|
+
|
|
31
|
+
initialize_tracer()
|
|
32
|
+
app = fastapi.FastAPI()
|
|
33
|
+
FastAPIInstrumentor.instrument_app(app)
|
|
34
|
+
typer_app = typer.Typer()
|
|
26
35
|
|
|
27
36
|
|
|
28
37
|
class ApiExecutorRequest(pydantic.BaseModel):
|
|
@@ -40,7 +49,7 @@ class ApiExecutorRequest(pydantic.BaseModel):
|
|
|
40
49
|
title: Optional[str] = None
|
|
41
50
|
query: Optional[str] = None
|
|
42
51
|
query_path: Optional[Union[str, list[str]]] = None
|
|
43
|
-
context: garf_executors.ApiExecutionContext
|
|
52
|
+
context: garf_executors.api_executor.ApiExecutionContext
|
|
44
53
|
|
|
45
54
|
@pydantic.model_validator(mode='after')
|
|
46
55
|
def check_query_specified(self):
|
|
@@ -67,10 +76,18 @@ class ApiExecutorResponse(pydantic.BaseModel):
|
|
|
67
76
|
results: list[str]
|
|
68
77
|
|
|
69
78
|
|
|
70
|
-
|
|
79
|
+
@app.get('/api/version')
|
|
80
|
+
async def version() -> str:
|
|
81
|
+
return garf_executors.__version__
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@app.get('/api/fetchers')
|
|
85
|
+
async def get_fetchers() -> list[str]:
|
|
86
|
+
"""Shows all available API sources."""
|
|
87
|
+
return list(garf_executors.fetchers.find_fetchers())
|
|
71
88
|
|
|
72
89
|
|
|
73
|
-
@
|
|
90
|
+
@app.post('/api/execute')
|
|
74
91
|
async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
75
92
|
query_executor = garf_executors.setup_executor(
|
|
76
93
|
request.source, request.context.fetcher_parameters
|
|
@@ -79,8 +96,8 @@ async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
79
96
|
return ApiExecutorResponse(results=[result])
|
|
80
97
|
|
|
81
98
|
|
|
82
|
-
@
|
|
83
|
-
|
|
99
|
+
@app.post('/api/execute:batch')
|
|
100
|
+
def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
84
101
|
query_executor = garf_executors.setup_executor(
|
|
85
102
|
request.source, request.context.fetcher_parameters
|
|
86
103
|
)
|
|
@@ -90,7 +107,12 @@ async def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
90
107
|
return ApiExecutorResponse(results=results)
|
|
91
108
|
|
|
92
109
|
|
|
110
|
+
@typer_app.command()
|
|
111
|
+
def main(
|
|
112
|
+
port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,
|
|
113
|
+
):
|
|
114
|
+
uvicorn.run(app, port=port)
|
|
115
|
+
|
|
116
|
+
|
|
93
117
|
if __name__ == '__main__':
|
|
94
|
-
|
|
95
|
-
app.include_router(router)
|
|
96
|
-
uvicorn.run(app)
|
|
118
|
+
typer_app()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
from opentelemetry import trace
|
|
18
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
19
|
+
OTLPSpanExporter,
|
|
20
|
+
)
|
|
21
|
+
from opentelemetry.sdk.resources import Resource
|
|
22
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
23
|
+
from opentelemetry.sdk.trace.export import (
|
|
24
|
+
BatchSpanProcessor,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
DEFAULT_SERVICE_NAME = 'garf'
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def initialize_tracer():
|
|
31
|
+
resource = Resource.create(
|
|
32
|
+
{'service.name': os.getenv('OTLP_SERVICE_NAME', DEFAULT_SERVICE_NAME)}
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
36
|
+
|
|
37
|
+
if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
|
|
38
|
+
if gcp_project_id := os.getenv('OTEL_EXPORTER_GCP_PROJECT_ID'):
|
|
39
|
+
try:
|
|
40
|
+
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
|
|
41
|
+
except ImportError as e:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
'Please install garf_executors with GCP support '
|
|
44
|
+
'- `pip install garf_executors[gcp]`'
|
|
45
|
+
) from e
|
|
46
|
+
|
|
47
|
+
cloud_span_processor = BatchSpanProcessor(
|
|
48
|
+
CloudTraceSpanExporter(project_id=gcp_project_id)
|
|
49
|
+
)
|
|
50
|
+
tracer_provider.add_span_processor(cloud_span_processor)
|
|
51
|
+
else:
|
|
52
|
+
otlp_processor = BatchSpanProcessor(
|
|
53
|
+
OTLPSpanExporter(endpoint=otel_endpoint, insecure=True)
|
|
54
|
+
)
|
|
55
|
+
tracer_provider.add_span_processor(otlp_processor)
|
|
56
|
+
|
|
57
|
+
trace.set_tracer_provider(tracer_provider)
|