garf-executors 0.0.11__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- garf_executors/__init__.py +13 -4
- garf_executors/api_executor.py +47 -34
- garf_executors/bq_executor.py +42 -22
- garf_executors/config.py +3 -1
- garf_executors/entrypoints/cli.py +80 -24
- garf_executors/entrypoints/grpc_server.py +68 -0
- garf_executors/entrypoints/server.py +30 -8
- garf_executors/entrypoints/tracer.py +57 -0
- garf_executors/entrypoints/utils.py +19 -0
- garf_executors/execution_context.py +40 -7
- garf_executors/executor.py +84 -14
- garf_executors/fetchers.py +16 -5
- garf_executors/garf_pb2.py +45 -0
- garf_executors/garf_pb2_grpc.py +97 -0
- garf_executors/sql_executor.py +41 -18
- garf_executors/telemetry.py +20 -0
- garf_executors/workflow.py +96 -0
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/METADATA +13 -4
- garf_executors-0.2.3.dist-info/RECORD +24 -0
- garf_executors-0.0.11.dist-info/RECORD +0 -18
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/WHEEL +0 -0
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/entry_points.txt +0 -0
- {garf_executors-0.0.11.dist-info → garf_executors-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -93,6 +93,7 @@ class GarfParamsException(Exception):
|
|
|
93
93
|
class LoggerEnum(str, enum.Enum):
|
|
94
94
|
local = 'local'
|
|
95
95
|
rich = 'rich'
|
|
96
|
+
gcloud = 'gcloud'
|
|
96
97
|
|
|
97
98
|
|
|
98
99
|
def init_logging(
|
|
@@ -100,6 +101,7 @@ def init_logging(
|
|
|
100
101
|
logger_type: str | LoggerEnum = 'local',
|
|
101
102
|
name: str = __name__,
|
|
102
103
|
) -> logging.Logger:
|
|
104
|
+
loglevel = getattr(logging, loglevel)
|
|
103
105
|
if logger_type == 'rich':
|
|
104
106
|
logging.basicConfig(
|
|
105
107
|
format='%(message)s',
|
|
@@ -109,6 +111,23 @@ def init_logging(
|
|
|
109
111
|
rich_logging.RichHandler(rich_tracebacks=True),
|
|
110
112
|
],
|
|
111
113
|
)
|
|
114
|
+
elif logger_type == 'gcloud':
|
|
115
|
+
try:
|
|
116
|
+
import google.cloud.logging as glogging
|
|
117
|
+
except ImportError as e:
|
|
118
|
+
raise ImportError(
|
|
119
|
+
'Please install garf-executors with Cloud logging support - '
|
|
120
|
+
'`pip install garf-executors[bq]`'
|
|
121
|
+
) from e
|
|
122
|
+
|
|
123
|
+
client = glogging.Client()
|
|
124
|
+
handler = glogging.handlers.CloudLoggingHandler(client, name=name)
|
|
125
|
+
handler.close()
|
|
126
|
+
glogging.handlers.setup_logging(handler, log_level=loglevel)
|
|
127
|
+
logging.basicConfig(
|
|
128
|
+
level=loglevel,
|
|
129
|
+
handlers=[handler],
|
|
130
|
+
)
|
|
112
131
|
else:
|
|
113
132
|
logging.basicConfig(
|
|
114
133
|
format='[%(asctime)s][%(name)s][%(levelname)s] %(message)s',
|
|
@@ -35,17 +35,17 @@ class ExecutionContext(pydantic.BaseModel):
|
|
|
35
35
|
Attributes:
|
|
36
36
|
query_parameters: Parameters to dynamically change query text.
|
|
37
37
|
fetcher_parameters: Parameters to specify fetching setup.
|
|
38
|
-
writer: Type of writer to use.
|
|
38
|
+
writer: Type of writer to use. Can be a single writer string or list of writers.
|
|
39
39
|
writer_parameters: Optional parameters to setup writer.
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
42
|
query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
|
|
43
43
|
default_factory=dict
|
|
44
44
|
)
|
|
45
|
-
fetcher_parameters: dict[str, str | list[str | int]] | None =
|
|
46
|
-
default_factory=dict
|
|
45
|
+
fetcher_parameters: dict[str, str | bool | int | list[str | int]] | None = (
|
|
46
|
+
pydantic.Field(default_factory=dict)
|
|
47
47
|
)
|
|
48
|
-
writer: str | None = None
|
|
48
|
+
writer: str | list[str] | None = None
|
|
49
49
|
writer_parameters: dict[str, str] | None = pydantic.Field(
|
|
50
50
|
default_factory=dict
|
|
51
51
|
)
|
|
@@ -75,9 +75,42 @@ class ExecutionContext(pydantic.BaseModel):
|
|
|
75
75
|
|
|
76
76
|
@property
|
|
77
77
|
def writer_client(self) -> abs_writer.AbsWriter:
|
|
78
|
-
|
|
79
|
-
if self.writer
|
|
78
|
+
"""Returns single writer client."""
|
|
79
|
+
if isinstance(self.writer, list) and len(self.writer) > 0:
|
|
80
|
+
writer_type = self.writer[0]
|
|
81
|
+
else:
|
|
82
|
+
writer_type = self.writer
|
|
83
|
+
|
|
84
|
+
writer_params = self.writer_parameters or {}
|
|
85
|
+
|
|
86
|
+
if not writer_type:
|
|
87
|
+
raise ValueError('No writer specified')
|
|
88
|
+
|
|
89
|
+
writer_client = writer.create_writer(writer_type, **writer_params)
|
|
90
|
+
if writer_type == 'bq':
|
|
80
91
|
_ = writer_client.create_or_get_dataset()
|
|
81
|
-
if
|
|
92
|
+
if writer_type == 'sheet':
|
|
82
93
|
writer_client.init_client()
|
|
83
94
|
return writer_client
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def writer_clients(self) -> list[abs_writer.AbsWriter]:
|
|
98
|
+
"""Returns list of writer clients."""
|
|
99
|
+
if not self.writer:
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
# Convert single writer to list for uniform processing
|
|
103
|
+
writers_to_use = (
|
|
104
|
+
self.writer if isinstance(self.writer, list) else [self.writer]
|
|
105
|
+
)
|
|
106
|
+
writer_params = self.writer_parameters or {}
|
|
107
|
+
|
|
108
|
+
clients = []
|
|
109
|
+
for writer_type in writers_to_use:
|
|
110
|
+
writer_client = writer.create_writer(writer_type, **writer_params)
|
|
111
|
+
if writer_type == 'bq':
|
|
112
|
+
_ = writer_client.create_or_get_dataset()
|
|
113
|
+
if writer_type == 'sheet':
|
|
114
|
+
writer_client.init_client()
|
|
115
|
+
clients.append(writer_client)
|
|
116
|
+
return clients
|
garf_executors/executor.py
CHANGED
|
@@ -14,14 +14,29 @@
|
|
|
14
14
|
|
|
15
15
|
"""Defines common functionality between executors."""
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
import asyncio
|
|
18
|
+
import inspect
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from garf_core import report_fetcher
|
|
22
|
+
from opentelemetry import trace
|
|
18
23
|
|
|
19
24
|
from garf_executors import execution_context
|
|
25
|
+
from garf_executors.telemetry import tracer
|
|
20
26
|
|
|
21
27
|
|
|
22
28
|
class Executor:
|
|
23
29
|
"""Defines common functionality between executors."""
|
|
24
30
|
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
preprocessors: Optional[dict[str, report_fetcher.Processor]] = None,
|
|
34
|
+
postprocessors: Optional[dict[str, report_fetcher.Processor]] = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
self.preprocessors = preprocessors or {}
|
|
37
|
+
self.postprocessors = postprocessors or {}
|
|
38
|
+
|
|
39
|
+
@tracer.start_as_current_span('api.execute_batch')
|
|
25
40
|
def execute_batch(
|
|
26
41
|
self,
|
|
27
42
|
batch: dict[str, str],
|
|
@@ -30,6 +45,9 @@ class Executor:
|
|
|
30
45
|
) -> list[str]:
|
|
31
46
|
"""Executes batch of queries for a common context.
|
|
32
47
|
|
|
48
|
+
If an executor has any pre/post processors, executes them first while
|
|
49
|
+
modifying the context.
|
|
50
|
+
|
|
33
51
|
Args:
|
|
34
52
|
batch: Mapping between query_title and its text.
|
|
35
53
|
context: Execution context.
|
|
@@ -38,17 +56,69 @@ class Executor:
|
|
|
38
56
|
Returns:
|
|
39
57
|
Results of execution.
|
|
40
58
|
"""
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
for title, query in batch.items()
|
|
51
|
-
}
|
|
52
|
-
for future in futures.as_completed(future_to_query):
|
|
53
|
-
results.append(future.result())
|
|
59
|
+
span = trace.get_current_span()
|
|
60
|
+
span.set_attribute('api.parallel_threshold', parallel_threshold)
|
|
61
|
+
_handle_processors(processors=self.preprocessors, context=context)
|
|
62
|
+
results = asyncio.run(
|
|
63
|
+
self._run(
|
|
64
|
+
batch=batch, context=context, parallel_threshold=parallel_threshold
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
_handle_processors(processors=self.postprocessors, context=context)
|
|
54
68
|
return results
|
|
69
|
+
|
|
70
|
+
def add_preprocessor(
|
|
71
|
+
self, preprocessors: dict[str, report_fetcher.Processor]
|
|
72
|
+
) -> None:
|
|
73
|
+
self.preprocessors.update(preprocessors)
|
|
74
|
+
|
|
75
|
+
async def aexecute(
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
title: str,
|
|
79
|
+
context: execution_context.ExecutionContext,
|
|
80
|
+
) -> str:
|
|
81
|
+
"""Performs query execution asynchronously.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
query: Location of the query.
|
|
85
|
+
title: Name of the query.
|
|
86
|
+
context: Query execution context.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Result of writing the report.
|
|
90
|
+
"""
|
|
91
|
+
return await asyncio.to_thread(self.execute, query, title, context)
|
|
92
|
+
|
|
93
|
+
async def _run(
|
|
94
|
+
self,
|
|
95
|
+
batch: dict[str, str],
|
|
96
|
+
context: execution_context.ExecutionContext,
|
|
97
|
+
parallel_threshold: int,
|
|
98
|
+
):
|
|
99
|
+
semaphore = asyncio.Semaphore(value=parallel_threshold)
|
|
100
|
+
|
|
101
|
+
async def run_with_semaphore(fn):
|
|
102
|
+
async with semaphore:
|
|
103
|
+
return await fn
|
|
104
|
+
|
|
105
|
+
tasks = [
|
|
106
|
+
self.aexecute(query=query, title=title, context=context)
|
|
107
|
+
for title, query in batch.items()
|
|
108
|
+
]
|
|
109
|
+
return await asyncio.gather(*(run_with_semaphore(task) for task in tasks))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _handle_processors(
|
|
113
|
+
processors: dict[str, report_fetcher.Processor],
|
|
114
|
+
context: execution_context.ExecutionContext,
|
|
115
|
+
) -> None:
|
|
116
|
+
for k, processor in processors.items():
|
|
117
|
+
processor_signature = list(inspect.signature(processor).parameters.keys())
|
|
118
|
+
if k in context.fetcher_parameters:
|
|
119
|
+
processor_parameters = {
|
|
120
|
+
k: v
|
|
121
|
+
for k, v in context.fetcher_parameters.items()
|
|
122
|
+
if k in processor_signature
|
|
123
|
+
}
|
|
124
|
+
context.fetcher_parameters[k] = processor(**processor_parameters)
|
garf_executors/fetchers.py
CHANGED
|
@@ -13,12 +13,18 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import inspect
|
|
16
|
+
import logging
|
|
16
17
|
import sys
|
|
17
18
|
from importlib.metadata import entry_points
|
|
18
19
|
|
|
19
|
-
from garf_core import
|
|
20
|
+
from garf_core import report_fetcher
|
|
20
21
|
|
|
22
|
+
from garf_executors.telemetry import tracer
|
|
21
23
|
|
|
24
|
+
logger = logging.getLogger(name='garf_executors.fetchers')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@tracer.start_as_current_span('find_fetchers')
|
|
22
28
|
def find_fetchers() -> set[str]:
|
|
23
29
|
"""Identifiers all available report fetchers."""
|
|
24
30
|
if entrypoints := _get_entrypoints('garf'):
|
|
@@ -26,6 +32,7 @@ def find_fetchers() -> set[str]:
|
|
|
26
32
|
return set()
|
|
27
33
|
|
|
28
34
|
|
|
35
|
+
@tracer.start_as_current_span('get_report_fetcher')
|
|
29
36
|
def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
|
|
30
37
|
"""Loads report fetcher for a given source.
|
|
31
38
|
|
|
@@ -44,15 +51,19 @@ def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
|
|
|
44
51
|
for fetcher in _get_entrypoints('garf'):
|
|
45
52
|
if fetcher.name == source:
|
|
46
53
|
try:
|
|
47
|
-
|
|
54
|
+
with tracer.start_as_current_span('load_fetcher_module') as span:
|
|
55
|
+
fetcher_module = fetcher.load()
|
|
56
|
+
span.set_attribute('loaded_module', fetcher_module.__name__)
|
|
48
57
|
for name, obj in inspect.getmembers(fetcher_module):
|
|
49
58
|
if inspect.isclass(obj) and issubclass(
|
|
50
59
|
obj, report_fetcher.ApiReportFetcher
|
|
51
60
|
):
|
|
52
61
|
return getattr(fetcher_module, name)
|
|
53
|
-
except ModuleNotFoundError:
|
|
54
|
-
|
|
55
|
-
|
|
62
|
+
except ModuleNotFoundError as e:
|
|
63
|
+
raise report_fetcher.ApiReportFetcherError(
|
|
64
|
+
f'Failed to load fetcher for source {source}, reason: {e}'
|
|
65
|
+
)
|
|
66
|
+
raise report_fetcher.ApiReportFetcherError(
|
|
56
67
|
f'No fetcher available for the source "{source}"'
|
|
57
68
|
)
|
|
58
69
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# NO CHECKED-IN PROTOBUF GENCODE
|
|
4
|
+
# source: garf.proto
|
|
5
|
+
# Protobuf Python Version: 6.31.1
|
|
6
|
+
"""Generated protocol buffer code."""
|
|
7
|
+
from google.protobuf import descriptor as _descriptor
|
|
8
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
9
|
+
from google.protobuf import runtime_version as _runtime_version
|
|
10
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
11
|
+
from google.protobuf.internal import builder as _builder
|
|
12
|
+
_runtime_version.ValidateProtobufRuntimeVersion(
|
|
13
|
+
_runtime_version.Domain.PUBLIC,
|
|
14
|
+
6,
|
|
15
|
+
31,
|
|
16
|
+
1,
|
|
17
|
+
'',
|
|
18
|
+
'garf.proto'
|
|
19
|
+
)
|
|
20
|
+
# @@protoc_insertion_point(imports)
|
|
21
|
+
|
|
22
|
+
_sym_db = _symbol_database.Default()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2G\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x62\x06proto3')
|
|
29
|
+
|
|
30
|
+
_globals = globals()
|
|
31
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
32
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'garf_pb2', _globals)
|
|
33
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
34
|
+
DESCRIPTOR._loaded_options = None
|
|
35
|
+
_globals['_EXECUTEREQUEST']._serialized_start=50
|
|
36
|
+
_globals['_EXECUTEREQUEST']._serialized_end=153
|
|
37
|
+
_globals['_EXECUTIONCONTEXT']._serialized_start=156
|
|
38
|
+
_globals['_EXECUTIONCONTEXT']._serialized_end=344
|
|
39
|
+
_globals['_QUERYPARAMETERS']._serialized_start=346
|
|
40
|
+
_globals['_QUERYPARAMETERS']._serialized_end=446
|
|
41
|
+
_globals['_EXECUTERESPONSE']._serialized_start=448
|
|
42
|
+
_globals['_EXECUTERESPONSE']._serialized_end=482
|
|
43
|
+
_globals['_GARFSERVICE']._serialized_start=484
|
|
44
|
+
_globals['_GARFSERVICE']._serialized_end=555
|
|
45
|
+
# @@protoc_insertion_point(module_scope)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
2
|
+
"""Client and server classes corresponding to protobuf-defined services."""
|
|
3
|
+
import grpc
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from . import garf_pb2 as garf__pb2
|
|
7
|
+
|
|
8
|
+
GRPC_GENERATED_VERSION = '1.75.0'
|
|
9
|
+
GRPC_VERSION = grpc.__version__
|
|
10
|
+
_version_not_supported = False
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from grpc._utilities import first_version_is_lower
|
|
14
|
+
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
|
|
15
|
+
except ImportError:
|
|
16
|
+
_version_not_supported = True
|
|
17
|
+
|
|
18
|
+
if _version_not_supported:
|
|
19
|
+
raise RuntimeError(
|
|
20
|
+
f'The grpc package installed is at version {GRPC_VERSION},'
|
|
21
|
+
+ f' but the generated code in garf_pb2_grpc.py depends on'
|
|
22
|
+
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
|
|
23
|
+
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
|
|
24
|
+
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class GarfServiceStub(object):
|
|
29
|
+
"""Missing associated documentation comment in .proto file."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, channel):
|
|
32
|
+
"""Constructor.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
channel: A grpc.Channel.
|
|
36
|
+
"""
|
|
37
|
+
self.Execute = channel.unary_unary(
|
|
38
|
+
'/garf.GarfService/Execute',
|
|
39
|
+
request_serializer=garf__pb2.ExecuteRequest.SerializeToString,
|
|
40
|
+
response_deserializer=garf__pb2.ExecuteResponse.FromString,
|
|
41
|
+
_registered_method=True)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GarfServiceServicer(object):
|
|
45
|
+
"""Missing associated documentation comment in .proto file."""
|
|
46
|
+
|
|
47
|
+
def Execute(self, request, context):
|
|
48
|
+
"""Missing associated documentation comment in .proto file."""
|
|
49
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
50
|
+
context.set_details('Method not implemented!')
|
|
51
|
+
raise NotImplementedError('Method not implemented!')
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def add_GarfServiceServicer_to_server(servicer, server):
|
|
55
|
+
rpc_method_handlers = {
|
|
56
|
+
'Execute': grpc.unary_unary_rpc_method_handler(
|
|
57
|
+
servicer.Execute,
|
|
58
|
+
request_deserializer=garf__pb2.ExecuteRequest.FromString,
|
|
59
|
+
response_serializer=garf__pb2.ExecuteResponse.SerializeToString,
|
|
60
|
+
),
|
|
61
|
+
}
|
|
62
|
+
generic_handler = grpc.method_handlers_generic_handler(
|
|
63
|
+
'garf.GarfService', rpc_method_handlers)
|
|
64
|
+
server.add_generic_rpc_handlers((generic_handler,))
|
|
65
|
+
server.add_registered_method_handlers('garf.GarfService', rpc_method_handlers)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# This class is part of an EXPERIMENTAL API.
|
|
69
|
+
class GarfService(object):
|
|
70
|
+
"""Missing associated documentation comment in .proto file."""
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def Execute(request,
|
|
74
|
+
target,
|
|
75
|
+
options=(),
|
|
76
|
+
channel_credentials=None,
|
|
77
|
+
call_credentials=None,
|
|
78
|
+
insecure=False,
|
|
79
|
+
compression=None,
|
|
80
|
+
wait_for_ready=None,
|
|
81
|
+
timeout=None,
|
|
82
|
+
metadata=None):
|
|
83
|
+
return grpc.experimental.unary_unary(
|
|
84
|
+
request,
|
|
85
|
+
target,
|
|
86
|
+
'/garf.GarfService/Execute',
|
|
87
|
+
garf__pb2.ExecuteRequest.SerializeToString,
|
|
88
|
+
garf__pb2.ExecuteResponse.FromString,
|
|
89
|
+
options,
|
|
90
|
+
channel_credentials,
|
|
91
|
+
insecure,
|
|
92
|
+
call_credentials,
|
|
93
|
+
compression,
|
|
94
|
+
wait_for_ready,
|
|
95
|
+
timeout,
|
|
96
|
+
metadata,
|
|
97
|
+
_registered_method=True)
|
garf_executors/sql_executor.py
CHANGED
|
@@ -25,11 +25,14 @@ except ImportError as e:
|
|
|
25
25
|
|
|
26
26
|
import logging
|
|
27
27
|
import re
|
|
28
|
+
import uuid
|
|
28
29
|
|
|
29
30
|
import pandas as pd
|
|
30
31
|
from garf_core import query_editor, report
|
|
32
|
+
from opentelemetry import trace
|
|
31
33
|
|
|
32
34
|
from garf_executors import exceptions, execution_context, executor
|
|
35
|
+
from garf_executors.telemetry import tracer
|
|
33
36
|
|
|
34
37
|
logger = logging.getLogger(__name__)
|
|
35
38
|
|
|
@@ -54,6 +57,7 @@ class SqlAlchemyQueryExecutor(
|
|
|
54
57
|
engine: Initialized Engine object to operated on a given database.
|
|
55
58
|
"""
|
|
56
59
|
self.engine = engine
|
|
60
|
+
super().__init__()
|
|
57
61
|
|
|
58
62
|
@classmethod
|
|
59
63
|
def from_connection_string(
|
|
@@ -66,6 +70,7 @@ class SqlAlchemyQueryExecutor(
|
|
|
66
70
|
engine = sqlalchemy.create_engine(connection_string)
|
|
67
71
|
return cls(engine)
|
|
68
72
|
|
|
73
|
+
@tracer.start_as_current_span('sql.execute')
|
|
69
74
|
def execute(
|
|
70
75
|
self,
|
|
71
76
|
query: str,
|
|
@@ -84,35 +89,53 @@ class SqlAlchemyQueryExecutor(
|
|
|
84
89
|
Returns:
|
|
85
90
|
Report with data if query returns some data otherwise empty Report.
|
|
86
91
|
"""
|
|
87
|
-
|
|
92
|
+
span = trace.get_current_span()
|
|
93
|
+
logger.info('Executing script: %s', title)
|
|
88
94
|
query_text = self.replace_params_template(query, context.query_parameters)
|
|
89
95
|
with self.engine.begin() as conn:
|
|
90
96
|
if re.findall(r'(create|update) ', query_text.lower()):
|
|
91
|
-
|
|
92
|
-
|
|
97
|
+
try:
|
|
98
|
+
conn.connection.executescript(query_text)
|
|
99
|
+
results = report.GarfReport()
|
|
100
|
+
except Exception as e:
|
|
101
|
+
raise SqlAlchemyQueryExecutorError(
|
|
102
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
103
|
+
) from e
|
|
93
104
|
else:
|
|
94
|
-
temp_table_name = f'temp_{
|
|
105
|
+
temp_table_name = f'temp_{uuid.uuid4().hex}'
|
|
95
106
|
query_text = f'CREATE TABLE {temp_table_name} AS {query_text}'
|
|
96
107
|
conn.connection.executescript(query_text)
|
|
97
108
|
try:
|
|
98
109
|
results = report.GarfReport.from_pandas(
|
|
99
110
|
pd.read_sql(f'SELECT * FROM {temp_table_name}', conn)
|
|
100
111
|
)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
raise SqlAlchemyQueryExecutorError(
|
|
114
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
115
|
+
) from e
|
|
101
116
|
finally:
|
|
102
117
|
conn.connection.execute(f'DROP TABLE {temp_table_name}')
|
|
103
118
|
if context.writer and results:
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
'
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
119
|
+
writer_clients = context.writer_clients
|
|
120
|
+
if not writer_clients:
|
|
121
|
+
logger.warning('No writers configured, skipping write operation')
|
|
122
|
+
else:
|
|
123
|
+
writing_results = []
|
|
124
|
+
for writer_client in writer_clients:
|
|
125
|
+
logger.debug(
|
|
126
|
+
'Start writing data for query %s via %s writer',
|
|
127
|
+
title,
|
|
128
|
+
type(writer_client),
|
|
129
|
+
)
|
|
130
|
+
writing_result = writer_client.write(results, title)
|
|
131
|
+
logger.debug(
|
|
132
|
+
'Finish writing data for query %s via %s writer',
|
|
133
|
+
title,
|
|
134
|
+
type(writer_client),
|
|
135
|
+
)
|
|
136
|
+
writing_results.append(writing_result)
|
|
137
|
+
logger.info('%s executed successfully', title)
|
|
138
|
+
# Return the last writer's result for backward compatibility
|
|
139
|
+
return writing_results[-1] if writing_results else None
|
|
140
|
+
span.set_attribute('execute.num_results', len(results))
|
|
118
141
|
return results
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# pylint: disable=C0330, g-bad-import-order, g-multiple-import
|
|
16
|
+
from opentelemetry import trace
|
|
17
|
+
|
|
18
|
+
tracer = trace.get_tracer(
|
|
19
|
+
instrumenting_module_name='garf_executors',
|
|
20
|
+
)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import pathlib
|
|
18
|
+
|
|
19
|
+
import pydantic
|
|
20
|
+
import smart_open
|
|
21
|
+
import yaml
|
|
22
|
+
|
|
23
|
+
from garf_executors.execution_context import ExecutionContext
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class QueryPath(pydantic.BaseModel):
|
|
27
|
+
"""Path file with query."""
|
|
28
|
+
|
|
29
|
+
path: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class QueryDefinition(pydantic.BaseModel):
|
|
33
|
+
"""Definition of a query."""
|
|
34
|
+
|
|
35
|
+
query: Query
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Query(pydantic.BaseModel):
|
|
39
|
+
"""Query elements.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
text: Query text.
|
|
43
|
+
title: Name of the query.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
text: str
|
|
47
|
+
title: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ExecutionStep(ExecutionContext):
|
|
51
|
+
"""Common context for executing one or more queries.
|
|
52
|
+
|
|
53
|
+
Attributes:
|
|
54
|
+
fetcher: Name of a fetcher to get data from API.
|
|
55
|
+
alias: Optional alias to identify execution step.
|
|
56
|
+
queries: Queries to run for a particular fetcher.
|
|
57
|
+
context: Execution context for queries and fetcher.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
fetcher: str | None = None
|
|
61
|
+
alias: str | None = None
|
|
62
|
+
queries: list[QueryPath | QueryDefinition] | None = None
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def context(self) -> ExecutionContext:
|
|
66
|
+
return ExecutionContext(
|
|
67
|
+
writer=self.writer,
|
|
68
|
+
writer_parameters=self.writer_parameters,
|
|
69
|
+
query_parameters=self.query_parameters,
|
|
70
|
+
fetcher_parameters=self.fetcher_parameters,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class Workflow(pydantic.BaseModel):
|
|
75
|
+
"""Orchestrates execution of queries for multiple fetchers.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
steps: Contains one or several fetcher executions.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
steps: list[ExecutionStep]
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Workflow:
|
|
85
|
+
"""Builds workflow from local or remote yaml file."""
|
|
86
|
+
with smart_open.open(path, 'r', encoding='utf-8') as f:
|
|
87
|
+
data = yaml.safe_load(f)
|
|
88
|
+
return Workflow(steps=data.get('steps'))
|
|
89
|
+
|
|
90
|
+
def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
|
|
91
|
+
"""Saves workflow to local or remote yaml file."""
|
|
92
|
+
with smart_open.open(path, 'w', encoding='utf-8') as f:
|
|
93
|
+
yaml.dump(
|
|
94
|
+
self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
|
|
95
|
+
)
|
|
96
|
+
return f'Workflow is saved to {str(path)}'
|