garf-executors 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,21 +25,27 @@ _sym_db = _symbol_database.Default()
25
25
  from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
26
26
 
27
27
 
28
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2G\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x62\x06proto3')
28
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"a\n\x0c\x46\x65tchRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12#\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x12.garf.FetchContext\"G\n\rFetchResponse\x12\x0f\n\x07\x63olumns\x18\x01 \x03(\t\x12%\n\x04rows\x18\x02 \x03(\x0b\x32\x17.google.protobuf.Struct\"t\n\x0c\x46\x65tchContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2{\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x12\x32\n\x05\x46\x65tch\x12\x12.garf.FetchRequest\x1a\x13.garf.FetchResponse\"\x00\x62\x06proto3')
29
29
 
30
30
  _globals = globals()
31
31
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
32
32
  _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'garf_pb2', _globals)
33
33
  if not _descriptor._USE_C_DESCRIPTORS:
34
34
  DESCRIPTOR._loaded_options = None
35
- _globals['_EXECUTEREQUEST']._serialized_start=50
36
- _globals['_EXECUTEREQUEST']._serialized_end=153
37
- _globals['_EXECUTIONCONTEXT']._serialized_start=156
38
- _globals['_EXECUTIONCONTEXT']._serialized_end=344
39
- _globals['_QUERYPARAMETERS']._serialized_start=346
40
- _globals['_QUERYPARAMETERS']._serialized_end=446
41
- _globals['_EXECUTERESPONSE']._serialized_start=448
42
- _globals['_EXECUTERESPONSE']._serialized_end=482
43
- _globals['_GARFSERVICE']._serialized_start=484
44
- _globals['_GARFSERVICE']._serialized_end=555
35
+ _globals['_FETCHREQUEST']._serialized_start=50
36
+ _globals['_FETCHREQUEST']._serialized_end=147
37
+ _globals['_FETCHRESPONSE']._serialized_start=149
38
+ _globals['_FETCHRESPONSE']._serialized_end=220
39
+ _globals['_FETCHCONTEXT']._serialized_start=222
40
+ _globals['_FETCHCONTEXT']._serialized_end=338
41
+ _globals['_EXECUTEREQUEST']._serialized_start=340
42
+ _globals['_EXECUTEREQUEST']._serialized_end=443
43
+ _globals['_EXECUTIONCONTEXT']._serialized_start=446
44
+ _globals['_EXECUTIONCONTEXT']._serialized_end=634
45
+ _globals['_QUERYPARAMETERS']._serialized_start=636
46
+ _globals['_QUERYPARAMETERS']._serialized_end=736
47
+ _globals['_EXECUTERESPONSE']._serialized_start=738
48
+ _globals['_EXECUTERESPONSE']._serialized_end=772
49
+ _globals['_GARFSERVICE']._serialized_start=774
50
+ _globals['_GARFSERVICE']._serialized_end=897
45
51
  # @@protoc_insertion_point(module_scope)
@@ -5,7 +5,7 @@ import warnings
5
5
 
6
6
  from . import garf_pb2 as garf__pb2
7
7
 
8
- GRPC_GENERATED_VERSION = '1.75.0'
8
+ GRPC_GENERATED_VERSION = '1.76.0'
9
9
  GRPC_VERSION = grpc.__version__
10
10
  _version_not_supported = False
11
11
 
@@ -18,7 +18,7 @@ except ImportError:
18
18
  if _version_not_supported:
19
19
  raise RuntimeError(
20
20
  f'The grpc package installed is at version {GRPC_VERSION},'
21
- + f' but the generated code in garf_pb2_grpc.py depends on'
21
+ + ' but the generated code in garf_pb2_grpc.py depends on'
22
22
  + f' grpcio>={GRPC_GENERATED_VERSION}.'
23
23
  + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
24
24
  + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
@@ -39,6 +39,11 @@ class GarfServiceStub(object):
39
39
  request_serializer=garf__pb2.ExecuteRequest.SerializeToString,
40
40
  response_deserializer=garf__pb2.ExecuteResponse.FromString,
41
41
  _registered_method=True)
42
+ self.Fetch = channel.unary_unary(
43
+ '/garf.GarfService/Fetch',
44
+ request_serializer=garf__pb2.FetchRequest.SerializeToString,
45
+ response_deserializer=garf__pb2.FetchResponse.FromString,
46
+ _registered_method=True)
42
47
 
43
48
 
44
49
  class GarfServiceServicer(object):
@@ -50,6 +55,12 @@ class GarfServiceServicer(object):
50
55
  context.set_details('Method not implemented!')
51
56
  raise NotImplementedError('Method not implemented!')
52
57
 
58
+ def Fetch(self, request, context):
59
+ """Missing associated documentation comment in .proto file."""
60
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
61
+ context.set_details('Method not implemented!')
62
+ raise NotImplementedError('Method not implemented!')
63
+
53
64
 
54
65
  def add_GarfServiceServicer_to_server(servicer, server):
55
66
  rpc_method_handlers = {
@@ -58,6 +69,11 @@ def add_GarfServiceServicer_to_server(servicer, server):
58
69
  request_deserializer=garf__pb2.ExecuteRequest.FromString,
59
70
  response_serializer=garf__pb2.ExecuteResponse.SerializeToString,
60
71
  ),
72
+ 'Fetch': grpc.unary_unary_rpc_method_handler(
73
+ servicer.Fetch,
74
+ request_deserializer=garf__pb2.FetchRequest.FromString,
75
+ response_serializer=garf__pb2.FetchResponse.SerializeToString,
76
+ ),
61
77
  }
62
78
  generic_handler = grpc.method_handlers_generic_handler(
63
79
  'garf.GarfService', rpc_method_handlers)
@@ -95,3 +111,30 @@ class GarfService(object):
95
111
  timeout,
96
112
  metadata,
97
113
  _registered_method=True)
114
+
115
+ @staticmethod
116
+ def Fetch(request,
117
+ target,
118
+ options=(),
119
+ channel_credentials=None,
120
+ call_credentials=None,
121
+ insecure=False,
122
+ compression=None,
123
+ wait_for_ready=None,
124
+ timeout=None,
125
+ metadata=None):
126
+ return grpc.experimental.unary_unary(
127
+ request,
128
+ target,
129
+ '/garf.GarfService/Fetch',
130
+ garf__pb2.FetchRequest.SerializeToString,
131
+ garf__pb2.FetchResponse.FromString,
132
+ options,
133
+ channel_credentials,
134
+ insecure,
135
+ call_credentials,
136
+ compression,
137
+ wait_for_ready,
138
+ timeout,
139
+ metadata,
140
+ _registered_method=True)
@@ -12,29 +12,39 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ """qQuery can be used as a parameter in garf queries."""
16
+
15
17
  import contextlib
16
18
 
17
- from garf.core import query_editor
18
- from garf.executors import exceptions, execution_context
19
+ from garf.core import query_editor, query_parser
20
+ from garf.executors import execution_context
19
21
 
20
22
 
21
- def process_gquery(
22
- context: execution_context.ExecutionContext,
23
- ) -> execution_context.ExecutionContext:
24
- for k, v in context.fetcher_parameters.items():
23
+ class GqueryError(query_parser.GarfQueryError):
24
+ """Errors on incorrect qQuery syntax."""
25
+
26
+
27
+ def _handle_sub_context(context, sub_context):
28
+ for k, v in sub_context.items():
25
29
  if isinstance(v, str) and v.startswith('gquery'):
26
30
  no_writer_context = context.model_copy(update={'writer': None})
27
31
  try:
28
- _, alias, query = v.split(':', maxsplit=3)
32
+ _, alias, *query = v.split(':', maxsplit=3)
29
33
  except ValueError:
30
- raise exceptions.GarfExecutorError(
34
+ raise GqueryError(
31
35
  f'Incorrect gquery format, should be gquery:alias:query, got {v}'
32
36
  )
37
+ if not alias:
38
+ raise GqueryError(f'Missing alias in gquery: {v}')
39
+ if not query:
40
+ raise GqueryError(f'Missing query text in gquery: {v}')
33
41
  if alias == 'sqldb':
34
42
  from garf.executors import sql_executor
35
43
 
36
- gquery_executor = sql_executor.SqlAlchemyQueryExecutor(
37
- **context.fetcher_parameters
44
+ gquery_executor = (
45
+ sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
46
+ context.fetcher_parameters.get('connection_string')
47
+ )
38
48
  )
39
49
  elif alias == 'bq':
40
50
  from garf.executors import bq_executor
@@ -43,19 +53,27 @@ def process_gquery(
43
53
  **context.fetcher_parameters
44
54
  )
45
55
  else:
46
- raise exceptions.GarfExecutorError(
47
- f'Unsupported alias for gquery: {alias}'
48
- )
49
- with contextlib.suppress(query_editor.GarfResourceError):
56
+ raise GqueryError(f'Unsupported alias {alias} for gquery: {v}')
57
+ with contextlib.suppress(
58
+ query_editor.GarfResourceError, query_parser.GarfVirtualColumnError
59
+ ):
60
+ query = ':'.join(query)
50
61
  query_spec = query_editor.QuerySpecification(
51
62
  text=query, args=context.query_parameters
52
63
  ).generate()
53
64
  if len(columns := [c for c in query_spec.column_names if c != '_']) > 1:
54
- raise exceptions.GarfExecutorError(
55
- f'Multiple columns in gquery: {columns}'
56
- )
65
+ raise GqueryError(f'Multiple columns in gquery definition: {columns}')
57
66
  res = gquery_executor.execute(
58
67
  query=query, title='gquery', context=no_writer_context
59
68
  )
60
- context.fetcher_parameters[k] = res.to_list(row_type='scalar')
69
+ if len(columns := [c for c in res.column_names if c != '_']) > 1:
70
+ raise GqueryError(f'Multiple columns in gquery result: {columns}')
71
+ sub_context[k] = res.to_list(row_type='scalar')
72
+
73
+
74
+ def process_gquery(
75
+ context: execution_context.ExecutionContext,
76
+ ) -> execution_context.ExecutionContext:
77
+ _handle_sub_context(context, context.fetcher_parameters)
78
+ _handle_sub_context(context, context.query_parameters.macro)
61
79
  return context
@@ -0,0 +1,58 @@
1
+ # Copyright 2026 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Bootstraps executor based on provided parameters."""
15
+
16
+ from __future__ import annotations
17
+
18
+ import importlib
19
+
20
+ from garf.executors import executor, fetchers
21
+ from garf.executors.api_executor import ApiQueryExecutor
22
+ from garf.executors.telemetry import tracer
23
+
24
+
25
+ @tracer.start_as_current_span('setup_executor')
26
+ def setup_executor(
27
+ source: str,
28
+ fetcher_parameters: dict[str, str | int | bool],
29
+ enable_cache: bool = False,
30
+ cache_ttl_seconds: int = 3600,
31
+ simulate: bool = False,
32
+ ) -> type[executor.Executor]:
33
+ """Initializes executors based on a source and parameters."""
34
+ if source == 'bq':
35
+ bq_executor = importlib.import_module('garf.executors.bq_executor')
36
+ query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
37
+ elif source == 'sqldb':
38
+ sql_executor = importlib.import_module('garf.executors.sql_executor')
39
+ query_executor = (
40
+ sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
41
+ fetcher_parameters.get('connection_string')
42
+ )
43
+ )
44
+ else:
45
+ concrete_api_fetcher = fetchers.get_report_fetcher(source)
46
+ if simulate:
47
+ concrete_simulator = fetchers.get_report_simulator(source)()
48
+ else:
49
+ concrete_simulator = None
50
+ query_executor = ApiQueryExecutor(
51
+ fetcher=concrete_api_fetcher(
52
+ **fetcher_parameters,
53
+ enable_cache=enable_cache,
54
+ cache_ttl_seconds=cache_ttl_seconds,
55
+ ),
56
+ report_simulator=concrete_simulator,
57
+ )
58
+ return query_executor
@@ -91,6 +91,8 @@ class SqlAlchemyQueryExecutor(
91
91
  Report with data if query returns some data otherwise empty Report.
92
92
  """
93
93
  span = trace.get_current_span()
94
+ span.set_attribute('query.title', title)
95
+ span.set_attribute('query.text', query)
94
96
  logger.info('Executing script: %s', title)
95
97
  query_text = self.replace_params_template(query, context.query_parameters)
96
98
  with self.engine.begin() as conn:
File without changes
@@ -0,0 +1,49 @@
1
+ run:
2
+ for:
3
+ value: pair
4
+ in: ${pairs}
5
+ steps:
6
+ - log_source:
7
+ call: sys.log
8
+ args:
9
+ data: ${pair.alias}
10
+ - execute_queries:
11
+ parallel:
12
+ for:
13
+ value: query
14
+ in: ${pair.queries}
15
+ steps:
16
+ - log_query:
17
+ call: sys.log
18
+ args:
19
+ data: ${pair}
20
+ - execute_single_query:
21
+ try:
22
+ call: http.post
23
+ args:
24
+ url: ${sys.get_env("GARF_ENDPOINT") + "/api/execute"}
25
+ auth:
26
+ type: OIDC
27
+ body:
28
+ source: ${pair.fetcher}
29
+ # query_path: ${query.path}
30
+ title: ${query.query.title}
31
+ query: ${query.query.text}
32
+ context:
33
+ fetcher_parameters: ${pair.fetcher_parameters}
34
+ writer: ${pair.writer}
35
+ writer_parameters: ${pair.writer_parameters}
36
+ query_parameters:
37
+ macro: ${pair.query_parameters.macro}
38
+ template: ${pair.query_parameters.template}
39
+ result: task_resp
40
+ except:
41
+ as: e
42
+ assign:
43
+ - task_resp:
44
+ status: "failed"
45
+ error: ${e.message}
46
+ - log_result:
47
+ call: sys.log
48
+ args:
49
+ data: ${task_resp}
@@ -11,10 +11,16 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ """Workflow specifies steps of end-to-end fetching and processing."""
15
+
14
16
  from __future__ import annotations
15
17
 
18
+ import copy
16
19
  import os
17
20
  import pathlib
21
+ import re
22
+ from collections import defaultdict
23
+ from typing import Any
18
24
 
19
25
  import pydantic
20
26
  import smart_open
@@ -37,6 +43,13 @@ class QueryPath(pydantic.BaseModel):
37
43
  """Path file with query."""
38
44
 
39
45
  path: str
46
+ prefix: str | None = None
47
+
48
+ @property
49
+ def full_path(self) -> str:
50
+ if self.prefix:
51
+ return re.sub('/$', '', self.prefix) + '/' + self.path
52
+ return self.path
40
53
 
41
54
 
42
55
  class QueryDefinition(pydantic.BaseModel):
@@ -65,11 +78,13 @@ class ExecutionStep(ExecutionContext):
65
78
  alias: Optional alias to identify execution step.
66
79
  queries: Queries to run for a particular fetcher.
67
80
  context: Execution context for queries and fetcher.
81
+ parallel_threshold: Max allowed parallelism for the queries in the step.
68
82
  """
69
83
 
70
84
  fetcher: str | None = None
71
85
  alias: str | None = pydantic.Field(default=None, pattern=r'^[a-zA-Z0-9_]+$')
72
86
  queries: list[QueryPath | QueryDefinition | QueryFolder] | None = None
87
+ parallel_threshold: int | None = None
73
88
 
74
89
  @property
75
90
  def context(self) -> ExecutionContext:
@@ -86,17 +101,41 @@ class Workflow(pydantic.BaseModel):
86
101
 
87
102
  Attributes:
88
103
  steps: Contains one or several fetcher executions.
104
+ context: Query and fetcher parameters to overwrite in steps.
89
105
  """
90
106
 
91
107
  steps: list[ExecutionStep]
108
+ context: ExecutionContext | None = None
109
+
110
+ def model_post_init(self, __context__) -> None:
111
+ if context := self.context:
112
+ custom_parameters = defaultdict(dict)
113
+ if custom_macros := context.query_parameters.macro:
114
+ custom_parameters['query_parameters']['macro'] = custom_macros
115
+ if custom_templates := context.query_parameters.template:
116
+ custom_parameters['query_parameters']['template'] = custom_templates
117
+ if custom_fetcher_parameters := context.fetcher_parameters:
118
+ custom_parameters['fetcher_parameters'] = custom_fetcher_parameters
119
+
120
+ if custom_parameters:
121
+ steps = self.steps
122
+ for i, step in enumerate(steps):
123
+ res = _merge_dicts(
124
+ step.model_dump(exclude_none=True), dict(custom_parameters)
125
+ )
126
+ steps[i] = ExecutionStep(**res)
92
127
 
93
128
  @classmethod
94
- def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Workflow:
129
+ def from_file(
130
+ cls,
131
+ path: str | pathlib.Path | os.PathLike[str],
132
+ context: ExecutionContext | None = None,
133
+ ) -> Workflow:
95
134
  """Builds workflow from local or remote yaml file."""
96
135
  with smart_open.open(path, 'r', encoding='utf-8') as f:
97
136
  data = yaml.safe_load(f)
98
137
  try:
99
- return Workflow(**data)
138
+ return Workflow(steps=data.get('steps'), context=context)
100
139
  except pydantic.ValidationError as e:
101
140
  raise GarfWorkflowError(f'Incorrect workflow:\n {e}') from e
102
141
 
@@ -107,3 +146,19 @@ class Workflow(pydantic.BaseModel):
107
146
  self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
108
147
  )
109
148
  return f'Workflow is saved to {str(path)}'
149
+
150
+
151
+ def _merge_dicts(
152
+ dict1: dict[str, Any], dict2: dict[str, Any]
153
+ ) -> dict[str, Any]:
154
+ result = copy.deepcopy(dict1)
155
+ for key, value in dict2.items():
156
+ if (
157
+ key in result
158
+ and isinstance(result[key], dict)
159
+ and isinstance(value, dict)
160
+ ):
161
+ result[key] = _merge_dicts(result[key], value)
162
+ else:
163
+ result[key] = value
164
+ return result
@@ -0,0 +1,172 @@
1
+ # Copyright 2026 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Runs garf workflow."""
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import pathlib
20
+ import re
21
+ from typing import Final
22
+
23
+ import yaml
24
+ from garf.executors import exceptions, setup
25
+ from garf.executors.telemetry import tracer
26
+ from garf.executors.workflows import workflow
27
+ from garf.io import reader
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ _REMOTE_FILES_PATTERN: Final[str] = (
32
+ '^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)'
33
+ )
34
+ _SCRIPT_PATH = pathlib.Path(__file__).parent
35
+
36
+
37
+ class WorkflowRunner:
38
+ """Runs garf workflow.
39
+
40
+ Attributes:
41
+ workflow: Workflow to execute.
42
+ wf_parent: Optional location of a workflow file.
43
+ parallel_threshold: Max allowed parallelism for the queries in the workflow.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ execution_workflow: workflow.Workflow,
49
+ wf_parent: pathlib.Path | str,
50
+ parallel_threshold: int = 10,
51
+ ) -> None:
52
+ """Initializes WorkflowRunner."""
53
+ self.workflow = execution_workflow
54
+ self.wf_parent = wf_parent
55
+ self.parallel_threshold = parallel_threshold
56
+
57
+ @classmethod
58
+ def from_file(
59
+ cls,
60
+ workflow_file: str | pathlib.Path,
61
+ ) -> WorkflowRunner:
62
+ """Initialized Workflow runner from a local or remote file."""
63
+ if isinstance(workflow_file, str):
64
+ workflow_file = pathlib.Path(workflow_file)
65
+ execution_workflow = workflow.Workflow.from_file(workflow_file)
66
+ return cls(
67
+ execution_workflow=execution_workflow, wf_parent=workflow_file.parent
68
+ )
69
+
70
+ def run(
71
+ self,
72
+ enable_cache: bool = False,
73
+ cache_ttl_seconds: int = 3600,
74
+ selected_aliases: list[str] | None = None,
75
+ skipped_aliases: list[str] | None = None,
76
+ ) -> list[str]:
77
+ skipped_aliases = skipped_aliases or []
78
+ selected_aliases = selected_aliases or []
79
+ reader_client = reader.create_reader('file')
80
+ execution_results = []
81
+ logger.info('Starting Garf Workflow...')
82
+ for i, step in enumerate(self.workflow.steps, 1):
83
+ step_name = f'{i}-{step.fetcher}'
84
+ if step.alias:
85
+ step_name = f'{step_name}-{step.alias}'
86
+ if step.alias in skipped_aliases:
87
+ logger.warning(
88
+ 'Skipping step %d, fetcher: %s, alias: %s',
89
+ i,
90
+ step.fetcher,
91
+ step.alias,
92
+ )
93
+ continue
94
+ if selected_aliases and step.alias not in selected_aliases:
95
+ logger.warning(
96
+ 'Skipping step %d, fetcher: %s, alias: %s',
97
+ i,
98
+ step.fetcher,
99
+ step.alias,
100
+ )
101
+ continue
102
+ with tracer.start_as_current_span(step_name):
103
+ logger.info(
104
+ 'Running step %d, fetcher: %s, alias: %s', i, step.fetcher, step.alias
105
+ )
106
+ query_executor = setup.setup_executor(
107
+ source=step.fetcher,
108
+ fetcher_parameters=step.fetcher_parameters,
109
+ enable_cache=enable_cache,
110
+ cache_ttl_seconds=cache_ttl_seconds,
111
+ )
112
+ batch = {}
113
+ if not (queries := step.queries):
114
+ logger.error('Please provide one or more queries to run')
115
+ raise exceptions.GarfExecutorError(
116
+ 'Please provide one or more queries to run'
117
+ )
118
+ for query in queries:
119
+ if isinstance(query, workflow.QueryPath):
120
+ query_path = query.full_path
121
+ if re.match(_REMOTE_FILES_PATTERN, query_path):
122
+ batch[query.path] = reader_client.read(query_path)
123
+ else:
124
+ if not query.prefix:
125
+ query_path = self.wf_parent / pathlib.Path(query.path)
126
+ if not query_path.exists():
127
+ raise workflow.GarfWorkflowError(
128
+ f'Query: {query_path} not found'
129
+ )
130
+ batch[query.path] = reader_client.read(query_path)
131
+ elif isinstance(query, workflow.QueryFolder):
132
+ query_path = self.wf_parent / pathlib.Path(query.folder)
133
+ if not query_path.exists():
134
+ raise workflow.GarfWorkflowError(
135
+ f'Folder: {query_path} not found'
136
+ )
137
+ for p in query_path.rglob('*'):
138
+ if p.suffix == '.sql':
139
+ batch[p.stem] = reader_client.read(p)
140
+ else:
141
+ batch[query.query.title] = query.query.text
142
+ query_executor.execute_batch(
143
+ batch,
144
+ step.context,
145
+ step.parallel_threshold or self.parallel_threshold,
146
+ )
147
+ execution_results.append(step_name)
148
+ return execution_results
149
+
150
+ def compile(self, path: str | pathlib.Path) -> str:
151
+ """Saves workflow with expanded anchors."""
152
+ return self.workflow.save(path)
153
+
154
+ def deploy(self, path: str | pathlib.Path) -> str:
155
+ """Prepares workflow for deployment to Google Cloud Workflows."""
156
+ wf = self.workflow.model_dump(exclude_none=True).get('steps')
157
+ with open(_SCRIPT_PATH / 'gcp_workflow.yaml', 'r', encoding='utf-8') as f:
158
+ cloud_workflow_run_template = yaml.safe_load(f)
159
+ init = {
160
+ 'init': {
161
+ 'assign': [{'pairs': wf}],
162
+ },
163
+ }
164
+ cloud_workflow = {
165
+ 'main': {
166
+ 'params': [],
167
+ 'steps': [init, cloud_workflow_run_template],
168
+ },
169
+ }
170
+ with open(path, 'w', encoding='utf-8') as f:
171
+ yaml.dump(cloud_workflow, f, sort_keys=False)
172
+ return f'Workflow is saved to {path}'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: garf-executors
3
- Version: 1.0.2
3
+ Version: 1.1.3
4
4
  Summary: Executes queries against API and writes data to local/remote storage.
5
5
  Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
6
6
  License: Apache 2.0
@@ -36,8 +36,15 @@ Provides-Extra: gcp
36
36
  Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
37
37
  Provides-Extra: server
38
38
  Requires-Dist: fastapi[standard]; extra == "server"
39
+ Requires-Dist: pydantic-settings; extra == "server"
39
40
  Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
40
41
  Requires-Dist: typer; extra == "server"
42
+ Requires-Dist: grpcio-reflection; extra == "server"
43
+ Provides-Extra: tests
44
+ Requires-Dist: pytest; extra == "tests"
45
+ Requires-Dist: pytest-mock; extra == "tests"
46
+ Requires-Dist: pytest-xdist; extra == "tests"
47
+ Requires-Dist: pytest-grpc; extra == "tests"
41
48
  Provides-Extra: all
42
49
  Requires-Dist: garf-executors[bq,gcp,server,sql]; extra == "all"
43
50