garf-executors 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- garf/executors/__init__.py +1 -36
- garf/executors/api_executor.py +89 -4
- garf/executors/bq_executor.py +2 -0
- garf/executors/entrypoints/cli.py +45 -58
- garf/executors/entrypoints/grpc_server.py +21 -7
- garf/executors/entrypoints/server.py +64 -7
- garf/executors/entrypoints/tracer.py +29 -4
- garf/executors/fetchers.py +52 -2
- garf/executors/garf_pb2.py +17 -11
- garf/executors/garf_pb2_grpc.py +45 -2
- garf/executors/query_processor.py +36 -18
- garf/executors/setup.py +58 -0
- garf/executors/sql_executor.py +2 -0
- garf/executors/workflows/__init__.py +0 -0
- garf/executors/workflows/gcp_workflow.yaml +49 -0
- garf/executors/{workflow.py → workflows/workflow.py} +57 -2
- garf/executors/workflows/workflow_runner.py +172 -0
- {garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/METADATA +8 -1
- {garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/RECORD +22 -18
- {garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/WHEEL +1 -1
- {garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/entry_points.txt +0 -0
- {garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/top_level.txt +0 -0
garf/executors/garf_pb2.py
CHANGED
|
@@ -25,21 +25,27 @@ _sym_db = _symbol_database.Default()
|
|
|
25
25
|
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\
|
|
28
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"a\n\x0c\x46\x65tchRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12#\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x12.garf.FetchContext\"G\n\rFetchResponse\x12\x0f\n\x07\x63olumns\x18\x01 \x03(\t\x12%\n\x04rows\x18\x02 \x03(\x0b\x32\x17.google.protobuf.Struct\"t\n\x0c\x46\x65tchContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2{\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x12\x32\n\x05\x46\x65tch\x12\x12.garf.FetchRequest\x1a\x13.garf.FetchResponse\"\x00\x62\x06proto3')
|
|
29
29
|
|
|
30
30
|
_globals = globals()
|
|
31
31
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
32
32
|
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'garf_pb2', _globals)
|
|
33
33
|
if not _descriptor._USE_C_DESCRIPTORS:
|
|
34
34
|
DESCRIPTOR._loaded_options = None
|
|
35
|
-
_globals['
|
|
36
|
-
_globals['
|
|
37
|
-
_globals['
|
|
38
|
-
_globals['
|
|
39
|
-
_globals['
|
|
40
|
-
_globals['
|
|
41
|
-
_globals['
|
|
42
|
-
_globals['
|
|
43
|
-
_globals['
|
|
44
|
-
_globals['
|
|
35
|
+
_globals['_FETCHREQUEST']._serialized_start=50
|
|
36
|
+
_globals['_FETCHREQUEST']._serialized_end=147
|
|
37
|
+
_globals['_FETCHRESPONSE']._serialized_start=149
|
|
38
|
+
_globals['_FETCHRESPONSE']._serialized_end=220
|
|
39
|
+
_globals['_FETCHCONTEXT']._serialized_start=222
|
|
40
|
+
_globals['_FETCHCONTEXT']._serialized_end=338
|
|
41
|
+
_globals['_EXECUTEREQUEST']._serialized_start=340
|
|
42
|
+
_globals['_EXECUTEREQUEST']._serialized_end=443
|
|
43
|
+
_globals['_EXECUTIONCONTEXT']._serialized_start=446
|
|
44
|
+
_globals['_EXECUTIONCONTEXT']._serialized_end=634
|
|
45
|
+
_globals['_QUERYPARAMETERS']._serialized_start=636
|
|
46
|
+
_globals['_QUERYPARAMETERS']._serialized_end=736
|
|
47
|
+
_globals['_EXECUTERESPONSE']._serialized_start=738
|
|
48
|
+
_globals['_EXECUTERESPONSE']._serialized_end=772
|
|
49
|
+
_globals['_GARFSERVICE']._serialized_start=774
|
|
50
|
+
_globals['_GARFSERVICE']._serialized_end=897
|
|
45
51
|
# @@protoc_insertion_point(module_scope)
|
garf/executors/garf_pb2_grpc.py
CHANGED
|
@@ -5,7 +5,7 @@ import warnings
|
|
|
5
5
|
|
|
6
6
|
from . import garf_pb2 as garf__pb2
|
|
7
7
|
|
|
8
|
-
GRPC_GENERATED_VERSION = '1.
|
|
8
|
+
GRPC_GENERATED_VERSION = '1.76.0'
|
|
9
9
|
GRPC_VERSION = grpc.__version__
|
|
10
10
|
_version_not_supported = False
|
|
11
11
|
|
|
@@ -18,7 +18,7 @@ except ImportError:
|
|
|
18
18
|
if _version_not_supported:
|
|
19
19
|
raise RuntimeError(
|
|
20
20
|
f'The grpc package installed is at version {GRPC_VERSION},'
|
|
21
|
-
+
|
|
21
|
+
+ ' but the generated code in garf_pb2_grpc.py depends on'
|
|
22
22
|
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
|
|
23
23
|
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
|
|
24
24
|
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
|
|
@@ -39,6 +39,11 @@ class GarfServiceStub(object):
|
|
|
39
39
|
request_serializer=garf__pb2.ExecuteRequest.SerializeToString,
|
|
40
40
|
response_deserializer=garf__pb2.ExecuteResponse.FromString,
|
|
41
41
|
_registered_method=True)
|
|
42
|
+
self.Fetch = channel.unary_unary(
|
|
43
|
+
'/garf.GarfService/Fetch',
|
|
44
|
+
request_serializer=garf__pb2.FetchRequest.SerializeToString,
|
|
45
|
+
response_deserializer=garf__pb2.FetchResponse.FromString,
|
|
46
|
+
_registered_method=True)
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
class GarfServiceServicer(object):
|
|
@@ -50,6 +55,12 @@ class GarfServiceServicer(object):
|
|
|
50
55
|
context.set_details('Method not implemented!')
|
|
51
56
|
raise NotImplementedError('Method not implemented!')
|
|
52
57
|
|
|
58
|
+
def Fetch(self, request, context):
|
|
59
|
+
"""Missing associated documentation comment in .proto file."""
|
|
60
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
61
|
+
context.set_details('Method not implemented!')
|
|
62
|
+
raise NotImplementedError('Method not implemented!')
|
|
63
|
+
|
|
53
64
|
|
|
54
65
|
def add_GarfServiceServicer_to_server(servicer, server):
|
|
55
66
|
rpc_method_handlers = {
|
|
@@ -58,6 +69,11 @@ def add_GarfServiceServicer_to_server(servicer, server):
|
|
|
58
69
|
request_deserializer=garf__pb2.ExecuteRequest.FromString,
|
|
59
70
|
response_serializer=garf__pb2.ExecuteResponse.SerializeToString,
|
|
60
71
|
),
|
|
72
|
+
'Fetch': grpc.unary_unary_rpc_method_handler(
|
|
73
|
+
servicer.Fetch,
|
|
74
|
+
request_deserializer=garf__pb2.FetchRequest.FromString,
|
|
75
|
+
response_serializer=garf__pb2.FetchResponse.SerializeToString,
|
|
76
|
+
),
|
|
61
77
|
}
|
|
62
78
|
generic_handler = grpc.method_handlers_generic_handler(
|
|
63
79
|
'garf.GarfService', rpc_method_handlers)
|
|
@@ -95,3 +111,30 @@ class GarfService(object):
|
|
|
95
111
|
timeout,
|
|
96
112
|
metadata,
|
|
97
113
|
_registered_method=True)
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def Fetch(request,
|
|
117
|
+
target,
|
|
118
|
+
options=(),
|
|
119
|
+
channel_credentials=None,
|
|
120
|
+
call_credentials=None,
|
|
121
|
+
insecure=False,
|
|
122
|
+
compression=None,
|
|
123
|
+
wait_for_ready=None,
|
|
124
|
+
timeout=None,
|
|
125
|
+
metadata=None):
|
|
126
|
+
return grpc.experimental.unary_unary(
|
|
127
|
+
request,
|
|
128
|
+
target,
|
|
129
|
+
'/garf.GarfService/Fetch',
|
|
130
|
+
garf__pb2.FetchRequest.SerializeToString,
|
|
131
|
+
garf__pb2.FetchResponse.FromString,
|
|
132
|
+
options,
|
|
133
|
+
channel_credentials,
|
|
134
|
+
insecure,
|
|
135
|
+
call_credentials,
|
|
136
|
+
compression,
|
|
137
|
+
wait_for_ready,
|
|
138
|
+
timeout,
|
|
139
|
+
metadata,
|
|
140
|
+
_registered_method=True)
|
|
@@ -12,29 +12,39 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
"""qQuery can be used as a parameter in garf queries."""
|
|
16
|
+
|
|
15
17
|
import contextlib
|
|
16
18
|
|
|
17
|
-
from garf.core import query_editor
|
|
18
|
-
from garf.executors import
|
|
19
|
+
from garf.core import query_editor, query_parser
|
|
20
|
+
from garf.executors import execution_context
|
|
19
21
|
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
class GqueryError(query_parser.GarfQueryError):
|
|
24
|
+
"""Errors on incorrect qQuery syntax."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _handle_sub_context(context, sub_context):
|
|
28
|
+
for k, v in sub_context.items():
|
|
25
29
|
if isinstance(v, str) and v.startswith('gquery'):
|
|
26
30
|
no_writer_context = context.model_copy(update={'writer': None})
|
|
27
31
|
try:
|
|
28
|
-
_, alias, query = v.split(':', maxsplit=3)
|
|
32
|
+
_, alias, *query = v.split(':', maxsplit=3)
|
|
29
33
|
except ValueError:
|
|
30
|
-
raise
|
|
34
|
+
raise GqueryError(
|
|
31
35
|
f'Incorrect gquery format, should be gquery:alias:query, got {v}'
|
|
32
36
|
)
|
|
37
|
+
if not alias:
|
|
38
|
+
raise GqueryError(f'Missing alias in gquery: {v}')
|
|
39
|
+
if not query:
|
|
40
|
+
raise GqueryError(f'Missing query text in gquery: {v}')
|
|
33
41
|
if alias == 'sqldb':
|
|
34
42
|
from garf.executors import sql_executor
|
|
35
43
|
|
|
36
|
-
gquery_executor =
|
|
37
|
-
|
|
44
|
+
gquery_executor = (
|
|
45
|
+
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
46
|
+
context.fetcher_parameters.get('connection_string')
|
|
47
|
+
)
|
|
38
48
|
)
|
|
39
49
|
elif alias == 'bq':
|
|
40
50
|
from garf.executors import bq_executor
|
|
@@ -43,19 +53,27 @@ def process_gquery(
|
|
|
43
53
|
**context.fetcher_parameters
|
|
44
54
|
)
|
|
45
55
|
else:
|
|
46
|
-
raise
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
56
|
+
raise GqueryError(f'Unsupported alias {alias} for gquery: {v}')
|
|
57
|
+
with contextlib.suppress(
|
|
58
|
+
query_editor.GarfResourceError, query_parser.GarfVirtualColumnError
|
|
59
|
+
):
|
|
60
|
+
query = ':'.join(query)
|
|
50
61
|
query_spec = query_editor.QuerySpecification(
|
|
51
62
|
text=query, args=context.query_parameters
|
|
52
63
|
).generate()
|
|
53
64
|
if len(columns := [c for c in query_spec.column_names if c != '_']) > 1:
|
|
54
|
-
raise
|
|
55
|
-
f'Multiple columns in gquery: {columns}'
|
|
56
|
-
)
|
|
65
|
+
raise GqueryError(f'Multiple columns in gquery definition: {columns}')
|
|
57
66
|
res = gquery_executor.execute(
|
|
58
67
|
query=query, title='gquery', context=no_writer_context
|
|
59
68
|
)
|
|
60
|
-
|
|
69
|
+
if len(columns := [c for c in res.column_names if c != '_']) > 1:
|
|
70
|
+
raise GqueryError(f'Multiple columns in gquery result: {columns}')
|
|
71
|
+
sub_context[k] = res.to_list(row_type='scalar')
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def process_gquery(
|
|
75
|
+
context: execution_context.ExecutionContext,
|
|
76
|
+
) -> execution_context.ExecutionContext:
|
|
77
|
+
_handle_sub_context(context, context.fetcher_parameters)
|
|
78
|
+
_handle_sub_context(context, context.query_parameters.macro)
|
|
61
79
|
return context
|
garf/executors/setup.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Bootstraps executor based on provided parameters."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import importlib
|
|
19
|
+
|
|
20
|
+
from garf.executors import executor, fetchers
|
|
21
|
+
from garf.executors.api_executor import ApiQueryExecutor
|
|
22
|
+
from garf.executors.telemetry import tracer
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@tracer.start_as_current_span('setup_executor')
|
|
26
|
+
def setup_executor(
|
|
27
|
+
source: str,
|
|
28
|
+
fetcher_parameters: dict[str, str | int | bool],
|
|
29
|
+
enable_cache: bool = False,
|
|
30
|
+
cache_ttl_seconds: int = 3600,
|
|
31
|
+
simulate: bool = False,
|
|
32
|
+
) -> type[executor.Executor]:
|
|
33
|
+
"""Initializes executors based on a source and parameters."""
|
|
34
|
+
if source == 'bq':
|
|
35
|
+
bq_executor = importlib.import_module('garf.executors.bq_executor')
|
|
36
|
+
query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
|
|
37
|
+
elif source == 'sqldb':
|
|
38
|
+
sql_executor = importlib.import_module('garf.executors.sql_executor')
|
|
39
|
+
query_executor = (
|
|
40
|
+
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
41
|
+
fetcher_parameters.get('connection_string')
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
46
|
+
if simulate:
|
|
47
|
+
concrete_simulator = fetchers.get_report_simulator(source)()
|
|
48
|
+
else:
|
|
49
|
+
concrete_simulator = None
|
|
50
|
+
query_executor = ApiQueryExecutor(
|
|
51
|
+
fetcher=concrete_api_fetcher(
|
|
52
|
+
**fetcher_parameters,
|
|
53
|
+
enable_cache=enable_cache,
|
|
54
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
55
|
+
),
|
|
56
|
+
report_simulator=concrete_simulator,
|
|
57
|
+
)
|
|
58
|
+
return query_executor
|
garf/executors/sql_executor.py
CHANGED
|
@@ -91,6 +91,8 @@ class SqlAlchemyQueryExecutor(
|
|
|
91
91
|
Report with data if query returns some data otherwise empty Report.
|
|
92
92
|
"""
|
|
93
93
|
span = trace.get_current_span()
|
|
94
|
+
span.set_attribute('query.title', title)
|
|
95
|
+
span.set_attribute('query.text', query)
|
|
94
96
|
logger.info('Executing script: %s', title)
|
|
95
97
|
query_text = self.replace_params_template(query, context.query_parameters)
|
|
96
98
|
with self.engine.begin() as conn:
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
run:
|
|
2
|
+
for:
|
|
3
|
+
value: pair
|
|
4
|
+
in: ${pairs}
|
|
5
|
+
steps:
|
|
6
|
+
- log_source:
|
|
7
|
+
call: sys.log
|
|
8
|
+
args:
|
|
9
|
+
data: ${pair.alias}
|
|
10
|
+
- execute_queries:
|
|
11
|
+
parallel:
|
|
12
|
+
for:
|
|
13
|
+
value: query
|
|
14
|
+
in: ${pair.queries}
|
|
15
|
+
steps:
|
|
16
|
+
- log_query:
|
|
17
|
+
call: sys.log
|
|
18
|
+
args:
|
|
19
|
+
data: ${pair}
|
|
20
|
+
- execute_single_query:
|
|
21
|
+
try:
|
|
22
|
+
call: http.post
|
|
23
|
+
args:
|
|
24
|
+
url: ${sys.get_env("GARF_ENDPOINT") + "/api/execute"}
|
|
25
|
+
auth:
|
|
26
|
+
type: OIDC
|
|
27
|
+
body:
|
|
28
|
+
source: ${pair.fetcher}
|
|
29
|
+
# query_path: ${query.path}
|
|
30
|
+
title: ${query.query.title}
|
|
31
|
+
query: ${query.query.text}
|
|
32
|
+
context:
|
|
33
|
+
fetcher_parameters: ${pair.fetcher_parameters}
|
|
34
|
+
writer: ${pair.writer}
|
|
35
|
+
writer_parameters: ${pair.writer_parameters}
|
|
36
|
+
query_parameters:
|
|
37
|
+
macro: ${pair.query_parameters.macro}
|
|
38
|
+
template: ${pair.query_parameters.template}
|
|
39
|
+
result: task_resp
|
|
40
|
+
except:
|
|
41
|
+
as: e
|
|
42
|
+
assign:
|
|
43
|
+
- task_resp:
|
|
44
|
+
status: "failed"
|
|
45
|
+
error: ${e.message}
|
|
46
|
+
- log_result:
|
|
47
|
+
call: sys.log
|
|
48
|
+
args:
|
|
49
|
+
data: ${task_resp}
|
|
@@ -11,10 +11,16 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
"""Workflow specifies steps of end-to-end fetching and processing."""
|
|
15
|
+
|
|
14
16
|
from __future__ import annotations
|
|
15
17
|
|
|
18
|
+
import copy
|
|
16
19
|
import os
|
|
17
20
|
import pathlib
|
|
21
|
+
import re
|
|
22
|
+
from collections import defaultdict
|
|
23
|
+
from typing import Any
|
|
18
24
|
|
|
19
25
|
import pydantic
|
|
20
26
|
import smart_open
|
|
@@ -37,6 +43,13 @@ class QueryPath(pydantic.BaseModel):
|
|
|
37
43
|
"""Path file with query."""
|
|
38
44
|
|
|
39
45
|
path: str
|
|
46
|
+
prefix: str | None = None
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def full_path(self) -> str:
|
|
50
|
+
if self.prefix:
|
|
51
|
+
return re.sub('/$', '', self.prefix) + '/' + self.path
|
|
52
|
+
return self.path
|
|
40
53
|
|
|
41
54
|
|
|
42
55
|
class QueryDefinition(pydantic.BaseModel):
|
|
@@ -65,11 +78,13 @@ class ExecutionStep(ExecutionContext):
|
|
|
65
78
|
alias: Optional alias to identify execution step.
|
|
66
79
|
queries: Queries to run for a particular fetcher.
|
|
67
80
|
context: Execution context for queries and fetcher.
|
|
81
|
+
parallel_threshold: Max allowed parallelism for the queries in the step.
|
|
68
82
|
"""
|
|
69
83
|
|
|
70
84
|
fetcher: str | None = None
|
|
71
85
|
alias: str | None = pydantic.Field(default=None, pattern=r'^[a-zA-Z0-9_]+$')
|
|
72
86
|
queries: list[QueryPath | QueryDefinition | QueryFolder] | None = None
|
|
87
|
+
parallel_threshold: int | None = None
|
|
73
88
|
|
|
74
89
|
@property
|
|
75
90
|
def context(self) -> ExecutionContext:
|
|
@@ -86,17 +101,41 @@ class Workflow(pydantic.BaseModel):
|
|
|
86
101
|
|
|
87
102
|
Attributes:
|
|
88
103
|
steps: Contains one or several fetcher executions.
|
|
104
|
+
context: Query and fetcher parameters to overwrite in steps.
|
|
89
105
|
"""
|
|
90
106
|
|
|
91
107
|
steps: list[ExecutionStep]
|
|
108
|
+
context: ExecutionContext | None = None
|
|
109
|
+
|
|
110
|
+
def model_post_init(self, __context__) -> None:
|
|
111
|
+
if context := self.context:
|
|
112
|
+
custom_parameters = defaultdict(dict)
|
|
113
|
+
if custom_macros := context.query_parameters.macro:
|
|
114
|
+
custom_parameters['query_parameters']['macro'] = custom_macros
|
|
115
|
+
if custom_templates := context.query_parameters.template:
|
|
116
|
+
custom_parameters['query_parameters']['template'] = custom_templates
|
|
117
|
+
if custom_fetcher_parameters := context.fetcher_parameters:
|
|
118
|
+
custom_parameters['fetcher_parameters'] = custom_fetcher_parameters
|
|
119
|
+
|
|
120
|
+
if custom_parameters:
|
|
121
|
+
steps = self.steps
|
|
122
|
+
for i, step in enumerate(steps):
|
|
123
|
+
res = _merge_dicts(
|
|
124
|
+
step.model_dump(exclude_none=True), dict(custom_parameters)
|
|
125
|
+
)
|
|
126
|
+
steps[i] = ExecutionStep(**res)
|
|
92
127
|
|
|
93
128
|
@classmethod
|
|
94
|
-
def from_file(
|
|
129
|
+
def from_file(
|
|
130
|
+
cls,
|
|
131
|
+
path: str | pathlib.Path | os.PathLike[str],
|
|
132
|
+
context: ExecutionContext | None = None,
|
|
133
|
+
) -> Workflow:
|
|
95
134
|
"""Builds workflow from local or remote yaml file."""
|
|
96
135
|
with smart_open.open(path, 'r', encoding='utf-8') as f:
|
|
97
136
|
data = yaml.safe_load(f)
|
|
98
137
|
try:
|
|
99
|
-
return Workflow(
|
|
138
|
+
return Workflow(steps=data.get('steps'), context=context)
|
|
100
139
|
except pydantic.ValidationError as e:
|
|
101
140
|
raise GarfWorkflowError(f'Incorrect workflow:\n {e}') from e
|
|
102
141
|
|
|
@@ -107,3 +146,19 @@ class Workflow(pydantic.BaseModel):
|
|
|
107
146
|
self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
|
|
108
147
|
)
|
|
109
148
|
return f'Workflow is saved to {str(path)}'
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _merge_dicts(
|
|
152
|
+
dict1: dict[str, Any], dict2: dict[str, Any]
|
|
153
|
+
) -> dict[str, Any]:
|
|
154
|
+
result = copy.deepcopy(dict1)
|
|
155
|
+
for key, value in dict2.items():
|
|
156
|
+
if (
|
|
157
|
+
key in result
|
|
158
|
+
and isinstance(result[key], dict)
|
|
159
|
+
and isinstance(value, dict)
|
|
160
|
+
):
|
|
161
|
+
result[key] = _merge_dicts(result[key], value)
|
|
162
|
+
else:
|
|
163
|
+
result[key] = value
|
|
164
|
+
return result
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Runs garf workflow."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import pathlib
|
|
20
|
+
import re
|
|
21
|
+
from typing import Final
|
|
22
|
+
|
|
23
|
+
import yaml
|
|
24
|
+
from garf.executors import exceptions, setup
|
|
25
|
+
from garf.executors.telemetry import tracer
|
|
26
|
+
from garf.executors.workflows import workflow
|
|
27
|
+
from garf.io import reader
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
_REMOTE_FILES_PATTERN: Final[str] = (
|
|
32
|
+
'^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)'
|
|
33
|
+
)
|
|
34
|
+
_SCRIPT_PATH = pathlib.Path(__file__).parent
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class WorkflowRunner:
|
|
38
|
+
"""Runs garf workflow.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
workflow: Workflow to execute.
|
|
42
|
+
wf_parent: Optional location of a workflow file.
|
|
43
|
+
parallel_threshold: Max allowed parallelism for the queries in the workflow.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
execution_workflow: workflow.Workflow,
|
|
49
|
+
wf_parent: pathlib.Path | str,
|
|
50
|
+
parallel_threshold: int = 10,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""Initializes WorkflowRunner."""
|
|
53
|
+
self.workflow = execution_workflow
|
|
54
|
+
self.wf_parent = wf_parent
|
|
55
|
+
self.parallel_threshold = parallel_threshold
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def from_file(
|
|
59
|
+
cls,
|
|
60
|
+
workflow_file: str | pathlib.Path,
|
|
61
|
+
) -> WorkflowRunner:
|
|
62
|
+
"""Initialized Workflow runner from a local or remote file."""
|
|
63
|
+
if isinstance(workflow_file, str):
|
|
64
|
+
workflow_file = pathlib.Path(workflow_file)
|
|
65
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
66
|
+
return cls(
|
|
67
|
+
execution_workflow=execution_workflow, wf_parent=workflow_file.parent
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def run(
|
|
71
|
+
self,
|
|
72
|
+
enable_cache: bool = False,
|
|
73
|
+
cache_ttl_seconds: int = 3600,
|
|
74
|
+
selected_aliases: list[str] | None = None,
|
|
75
|
+
skipped_aliases: list[str] | None = None,
|
|
76
|
+
) -> list[str]:
|
|
77
|
+
skipped_aliases = skipped_aliases or []
|
|
78
|
+
selected_aliases = selected_aliases or []
|
|
79
|
+
reader_client = reader.create_reader('file')
|
|
80
|
+
execution_results = []
|
|
81
|
+
logger.info('Starting Garf Workflow...')
|
|
82
|
+
for i, step in enumerate(self.workflow.steps, 1):
|
|
83
|
+
step_name = f'{i}-{step.fetcher}'
|
|
84
|
+
if step.alias:
|
|
85
|
+
step_name = f'{step_name}-{step.alias}'
|
|
86
|
+
if step.alias in skipped_aliases:
|
|
87
|
+
logger.warning(
|
|
88
|
+
'Skipping step %d, fetcher: %s, alias: %s',
|
|
89
|
+
i,
|
|
90
|
+
step.fetcher,
|
|
91
|
+
step.alias,
|
|
92
|
+
)
|
|
93
|
+
continue
|
|
94
|
+
if selected_aliases and step.alias not in selected_aliases:
|
|
95
|
+
logger.warning(
|
|
96
|
+
'Skipping step %d, fetcher: %s, alias: %s',
|
|
97
|
+
i,
|
|
98
|
+
step.fetcher,
|
|
99
|
+
step.alias,
|
|
100
|
+
)
|
|
101
|
+
continue
|
|
102
|
+
with tracer.start_as_current_span(step_name):
|
|
103
|
+
logger.info(
|
|
104
|
+
'Running step %d, fetcher: %s, alias: %s', i, step.fetcher, step.alias
|
|
105
|
+
)
|
|
106
|
+
query_executor = setup.setup_executor(
|
|
107
|
+
source=step.fetcher,
|
|
108
|
+
fetcher_parameters=step.fetcher_parameters,
|
|
109
|
+
enable_cache=enable_cache,
|
|
110
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
111
|
+
)
|
|
112
|
+
batch = {}
|
|
113
|
+
if not (queries := step.queries):
|
|
114
|
+
logger.error('Please provide one or more queries to run')
|
|
115
|
+
raise exceptions.GarfExecutorError(
|
|
116
|
+
'Please provide one or more queries to run'
|
|
117
|
+
)
|
|
118
|
+
for query in queries:
|
|
119
|
+
if isinstance(query, workflow.QueryPath):
|
|
120
|
+
query_path = query.full_path
|
|
121
|
+
if re.match(_REMOTE_FILES_PATTERN, query_path):
|
|
122
|
+
batch[query.path] = reader_client.read(query_path)
|
|
123
|
+
else:
|
|
124
|
+
if not query.prefix:
|
|
125
|
+
query_path = self.wf_parent / pathlib.Path(query.path)
|
|
126
|
+
if not query_path.exists():
|
|
127
|
+
raise workflow.GarfWorkflowError(
|
|
128
|
+
f'Query: {query_path} not found'
|
|
129
|
+
)
|
|
130
|
+
batch[query.path] = reader_client.read(query_path)
|
|
131
|
+
elif isinstance(query, workflow.QueryFolder):
|
|
132
|
+
query_path = self.wf_parent / pathlib.Path(query.folder)
|
|
133
|
+
if not query_path.exists():
|
|
134
|
+
raise workflow.GarfWorkflowError(
|
|
135
|
+
f'Folder: {query_path} not found'
|
|
136
|
+
)
|
|
137
|
+
for p in query_path.rglob('*'):
|
|
138
|
+
if p.suffix == '.sql':
|
|
139
|
+
batch[p.stem] = reader_client.read(p)
|
|
140
|
+
else:
|
|
141
|
+
batch[query.query.title] = query.query.text
|
|
142
|
+
query_executor.execute_batch(
|
|
143
|
+
batch,
|
|
144
|
+
step.context,
|
|
145
|
+
step.parallel_threshold or self.parallel_threshold,
|
|
146
|
+
)
|
|
147
|
+
execution_results.append(step_name)
|
|
148
|
+
return execution_results
|
|
149
|
+
|
|
150
|
+
def compile(self, path: str | pathlib.Path) -> str:
|
|
151
|
+
"""Saves workflow with expanded anchors."""
|
|
152
|
+
return self.workflow.save(path)
|
|
153
|
+
|
|
154
|
+
def deploy(self, path: str | pathlib.Path) -> str:
|
|
155
|
+
"""Prepares workflow for deployment to Google Cloud Workflows."""
|
|
156
|
+
wf = self.workflow.model_dump(exclude_none=True).get('steps')
|
|
157
|
+
with open(_SCRIPT_PATH / 'gcp_workflow.yaml', 'r', encoding='utf-8') as f:
|
|
158
|
+
cloud_workflow_run_template = yaml.safe_load(f)
|
|
159
|
+
init = {
|
|
160
|
+
'init': {
|
|
161
|
+
'assign': [{'pairs': wf}],
|
|
162
|
+
},
|
|
163
|
+
}
|
|
164
|
+
cloud_workflow = {
|
|
165
|
+
'main': {
|
|
166
|
+
'params': [],
|
|
167
|
+
'steps': [init, cloud_workflow_run_template],
|
|
168
|
+
},
|
|
169
|
+
}
|
|
170
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
171
|
+
yaml.dump(cloud_workflow, f, sort_keys=False)
|
|
172
|
+
return f'Workflow is saved to {path}'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -36,8 +36,15 @@ Provides-Extra: gcp
|
|
|
36
36
|
Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
|
|
37
37
|
Provides-Extra: server
|
|
38
38
|
Requires-Dist: fastapi[standard]; extra == "server"
|
|
39
|
+
Requires-Dist: pydantic-settings; extra == "server"
|
|
39
40
|
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
|
|
40
41
|
Requires-Dist: typer; extra == "server"
|
|
42
|
+
Requires-Dist: grpcio-reflection; extra == "server"
|
|
43
|
+
Provides-Extra: tests
|
|
44
|
+
Requires-Dist: pytest; extra == "tests"
|
|
45
|
+
Requires-Dist: pytest-mock; extra == "tests"
|
|
46
|
+
Requires-Dist: pytest-xdist; extra == "tests"
|
|
47
|
+
Requires-Dist: pytest-grpc; extra == "tests"
|
|
41
48
|
Provides-Extra: all
|
|
42
49
|
Requires-Dist: garf-executors[bq,gcp,server,sql]; extra == "all"
|
|
43
50
|
|