garf-executors 0.0.11__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -93,6 +93,7 @@ class GarfParamsException(Exception):
93
93
  class LoggerEnum(str, enum.Enum):
94
94
  local = 'local'
95
95
  rich = 'rich'
96
+ gcloud = 'gcloud'
96
97
 
97
98
 
98
99
  def init_logging(
@@ -100,6 +101,7 @@ def init_logging(
100
101
  logger_type: str | LoggerEnum = 'local',
101
102
  name: str = __name__,
102
103
  ) -> logging.Logger:
104
+ loglevel = getattr(logging, loglevel)
103
105
  if logger_type == 'rich':
104
106
  logging.basicConfig(
105
107
  format='%(message)s',
@@ -109,6 +111,23 @@ def init_logging(
109
111
  rich_logging.RichHandler(rich_tracebacks=True),
110
112
  ],
111
113
  )
114
+ elif logger_type == 'gcloud':
115
+ try:
116
+ import google.cloud.logging as glogging
117
+ except ImportError as e:
118
+ raise ImportError(
119
+ 'Please install garf-executors with Cloud logging support - '
120
+ '`pip install garf-executors[bq]`'
121
+ ) from e
122
+
123
+ client = glogging.Client()
124
+ handler = glogging.handlers.CloudLoggingHandler(client, name=name)
125
+ handler.close()
126
+ glogging.handlers.setup_logging(handler, log_level=loglevel)
127
+ logging.basicConfig(
128
+ level=loglevel,
129
+ handlers=[handler],
130
+ )
112
131
  else:
113
132
  logging.basicConfig(
114
133
  format='[%(asctime)s][%(name)s][%(levelname)s] %(message)s',
@@ -35,17 +35,17 @@ class ExecutionContext(pydantic.BaseModel):
35
35
  Attributes:
36
36
  query_parameters: Parameters to dynamically change query text.
37
37
  fetcher_parameters: Parameters to specify fetching setup.
38
- writer: Type of writer to use.
38
+ writer: Type of writer to use. Can be a single writer string or list of writers.
39
39
  writer_parameters: Optional parameters to setup writer.
40
40
  """
41
41
 
42
42
  query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
43
43
  default_factory=dict
44
44
  )
45
- fetcher_parameters: dict[str, str | list[str | int]] | None = pydantic.Field(
46
- default_factory=dict
45
+ fetcher_parameters: dict[str, str | bool | int | list[str | int]] | None = (
46
+ pydantic.Field(default_factory=dict)
47
47
  )
48
- writer: str | None = None
48
+ writer: str | list[str] | None = None
49
49
  writer_parameters: dict[str, str] | None = pydantic.Field(
50
50
  default_factory=dict
51
51
  )
@@ -75,9 +75,42 @@ class ExecutionContext(pydantic.BaseModel):
75
75
 
76
76
  @property
77
77
  def writer_client(self) -> abs_writer.AbsWriter:
78
- writer_client = writer.create_writer(self.writer, **self.writer_parameters)
79
- if self.writer == 'bq':
78
+ """Returns single writer client."""
79
+ if isinstance(self.writer, list) and len(self.writer) > 0:
80
+ writer_type = self.writer[0]
81
+ else:
82
+ writer_type = self.writer
83
+
84
+ writer_params = self.writer_parameters or {}
85
+
86
+ if not writer_type:
87
+ raise ValueError('No writer specified')
88
+
89
+ writer_client = writer.create_writer(writer_type, **writer_params)
90
+ if writer_type == 'bq':
80
91
  _ = writer_client.create_or_get_dataset()
81
- if self.writer == 'sheet':
92
+ if writer_type == 'sheet':
82
93
  writer_client.init_client()
83
94
  return writer_client
95
+
96
+ @property
97
+ def writer_clients(self) -> list[abs_writer.AbsWriter]:
98
+ """Returns list of writer clients."""
99
+ if not self.writer:
100
+ return []
101
+
102
+ # Convert single writer to list for uniform processing
103
+ writers_to_use = (
104
+ self.writer if isinstance(self.writer, list) else [self.writer]
105
+ )
106
+ writer_params = self.writer_parameters or {}
107
+
108
+ clients = []
109
+ for writer_type in writers_to_use:
110
+ writer_client = writer.create_writer(writer_type, **writer_params)
111
+ if writer_type == 'bq':
112
+ _ = writer_client.create_or_get_dataset()
113
+ if writer_type == 'sheet':
114
+ writer_client.init_client()
115
+ clients.append(writer_client)
116
+ return clients
@@ -14,14 +14,29 @@
14
14
 
15
15
  """Defines common functionality between executors."""
16
16
 
17
- from concurrent import futures
17
+ import asyncio
18
+ import inspect
19
+ from typing import Optional
20
+
21
+ from garf_core import report_fetcher
22
+ from opentelemetry import trace
18
23
 
19
24
  from garf_executors import execution_context
25
+ from garf_executors.telemetry import tracer
20
26
 
21
27
 
22
28
  class Executor:
23
29
  """Defines common functionality between executors."""
24
30
 
31
+ def __init__(
32
+ self,
33
+ preprocessors: Optional[dict[str, report_fetcher.Processor]] = None,
34
+ postprocessors: Optional[dict[str, report_fetcher.Processor]] = None,
35
+ ) -> None:
36
+ self.preprocessors = preprocessors or {}
37
+ self.postprocessors = postprocessors or {}
38
+
39
+ @tracer.start_as_current_span('api.execute_batch')
25
40
  def execute_batch(
26
41
  self,
27
42
  batch: dict[str, str],
@@ -30,6 +45,9 @@ class Executor:
30
45
  ) -> list[str]:
31
46
  """Executes batch of queries for a common context.
32
47
 
48
+ If an executor has any pre/post processors, executes them first while
49
+ modifying the context.
50
+
33
51
  Args:
34
52
  batch: Mapping between query_title and its text.
35
53
  context: Execution context.
@@ -38,17 +56,69 @@ class Executor:
38
56
  Returns:
39
57
  Results of execution.
40
58
  """
41
- results = []
42
- with futures.ThreadPoolExecutor(max_workers=parallel_threshold) as executor:
43
- future_to_query = {
44
- executor.submit(
45
- self.execute,
46
- query,
47
- title,
48
- context,
49
- ): query
50
- for title, query in batch.items()
51
- }
52
- for future in futures.as_completed(future_to_query):
53
- results.append(future.result())
59
+ span = trace.get_current_span()
60
+ span.set_attribute('api.parallel_threshold', parallel_threshold)
61
+ _handle_processors(processors=self.preprocessors, context=context)
62
+ results = asyncio.run(
63
+ self._run(
64
+ batch=batch, context=context, parallel_threshold=parallel_threshold
65
+ )
66
+ )
67
+ _handle_processors(processors=self.postprocessors, context=context)
54
68
  return results
69
+
70
+ def add_preprocessor(
71
+ self, preprocessors: dict[str, report_fetcher.Processor]
72
+ ) -> None:
73
+ self.preprocessors.update(preprocessors)
74
+
75
+ async def aexecute(
76
+ self,
77
+ query: str,
78
+ title: str,
79
+ context: execution_context.ExecutionContext,
80
+ ) -> str:
81
+ """Performs query execution asynchronously.
82
+
83
+ Args:
84
+ query: Location of the query.
85
+ title: Name of the query.
86
+ context: Query execution context.
87
+
88
+ Returns:
89
+ Result of writing the report.
90
+ """
91
+ return await asyncio.to_thread(self.execute, query, title, context)
92
+
93
+ async def _run(
94
+ self,
95
+ batch: dict[str, str],
96
+ context: execution_context.ExecutionContext,
97
+ parallel_threshold: int,
98
+ ):
99
+ semaphore = asyncio.Semaphore(value=parallel_threshold)
100
+
101
+ async def run_with_semaphore(fn):
102
+ async with semaphore:
103
+ return await fn
104
+
105
+ tasks = [
106
+ self.aexecute(query=query, title=title, context=context)
107
+ for title, query in batch.items()
108
+ ]
109
+ return await asyncio.gather(*(run_with_semaphore(task) for task in tasks))
110
+
111
+
112
+ def _handle_processors(
113
+ processors: dict[str, report_fetcher.Processor],
114
+ context: execution_context.ExecutionContext,
115
+ ) -> None:
116
+ for k, processor in processors.items():
117
+ processor_signature = list(inspect.signature(processor).parameters.keys())
118
+ if k in context.fetcher_parameters:
119
+ processor_parameters = {
120
+ k: v
121
+ for k, v in context.fetcher_parameters.items()
122
+ if k in processor_signature
123
+ }
124
+ context.fetcher_parameters[k] = processor(**processor_parameters)
@@ -13,12 +13,18 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
+ import logging
16
17
  import sys
17
18
  from importlib.metadata import entry_points
18
19
 
19
- from garf_core import exceptions, report_fetcher
20
+ from garf_core import report_fetcher
20
21
 
22
+ from garf_executors.telemetry import tracer
21
23
 
24
+ logger = logging.getLogger(name='garf_executors.fetchers')
25
+
26
+
27
+ @tracer.start_as_current_span('find_fetchers')
22
28
  def find_fetchers() -> set[str]:
23
29
  """Identifiers all available report fetchers."""
24
30
  if entrypoints := _get_entrypoints('garf'):
@@ -26,6 +32,7 @@ def find_fetchers() -> set[str]:
26
32
  return set()
27
33
 
28
34
 
35
+ @tracer.start_as_current_span('get_report_fetcher')
29
36
  def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
30
37
  """Loads report fetcher for a given source.
31
38
 
@@ -44,15 +51,19 @@ def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
44
51
  for fetcher in _get_entrypoints('garf'):
45
52
  if fetcher.name == source:
46
53
  try:
47
- fetcher_module = fetcher.load()
54
+ with tracer.start_as_current_span('load_fetcher_module') as span:
55
+ fetcher_module = fetcher.load()
56
+ span.set_attribute('loaded_module', fetcher_module.__name__)
48
57
  for name, obj in inspect.getmembers(fetcher_module):
49
58
  if inspect.isclass(obj) and issubclass(
50
59
  obj, report_fetcher.ApiReportFetcher
51
60
  ):
52
61
  return getattr(fetcher_module, name)
53
- except ModuleNotFoundError:
54
- continue
55
- raise exceptions.ApiReportFetcherError(
62
+ except ModuleNotFoundError as e:
63
+ raise report_fetcher.ApiReportFetcherError(
64
+ f'Failed to load fetcher for source {source}, reason: {e}'
65
+ )
66
+ raise report_fetcher.ApiReportFetcherError(
56
67
  f'No fetcher available for the source "{source}"'
57
68
  )
58
69
 
@@ -0,0 +1,45 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # NO CHECKED-IN PROTOBUF GENCODE
4
+ # source: garf.proto
5
+ # Protobuf Python Version: 6.31.1
6
+ """Generated protocol buffer code."""
7
+ from google.protobuf import descriptor as _descriptor
8
+ from google.protobuf import descriptor_pool as _descriptor_pool
9
+ from google.protobuf import runtime_version as _runtime_version
10
+ from google.protobuf import symbol_database as _symbol_database
11
+ from google.protobuf.internal import builder as _builder
12
+ _runtime_version.ValidateProtobufRuntimeVersion(
13
+ _runtime_version.Domain.PUBLIC,
14
+ 6,
15
+ 31,
16
+ 1,
17
+ '',
18
+ 'garf.proto'
19
+ )
20
+ # @@protoc_insertion_point(imports)
21
+
22
+ _sym_db = _symbol_database.Default()
23
+
24
+
25
+ from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
26
+
27
+
28
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2G\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x62\x06proto3')
29
+
30
+ _globals = globals()
31
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
32
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'garf_pb2', _globals)
33
+ if not _descriptor._USE_C_DESCRIPTORS:
34
+ DESCRIPTOR._loaded_options = None
35
+ _globals['_EXECUTEREQUEST']._serialized_start=50
36
+ _globals['_EXECUTEREQUEST']._serialized_end=153
37
+ _globals['_EXECUTIONCONTEXT']._serialized_start=156
38
+ _globals['_EXECUTIONCONTEXT']._serialized_end=344
39
+ _globals['_QUERYPARAMETERS']._serialized_start=346
40
+ _globals['_QUERYPARAMETERS']._serialized_end=446
41
+ _globals['_EXECUTERESPONSE']._serialized_start=448
42
+ _globals['_EXECUTERESPONSE']._serialized_end=482
43
+ _globals['_GARFSERVICE']._serialized_start=484
44
+ _globals['_GARFSERVICE']._serialized_end=555
45
+ # @@protoc_insertion_point(module_scope)
@@ -0,0 +1,97 @@
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+ import warnings
5
+
6
+ from . import garf_pb2 as garf__pb2
7
+
8
+ GRPC_GENERATED_VERSION = '1.75.0'
9
+ GRPC_VERSION = grpc.__version__
10
+ _version_not_supported = False
11
+
12
+ try:
13
+ from grpc._utilities import first_version_is_lower
14
+ _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
15
+ except ImportError:
16
+ _version_not_supported = True
17
+
18
+ if _version_not_supported:
19
+ raise RuntimeError(
20
+ f'The grpc package installed is at version {GRPC_VERSION},'
21
+ + f' but the generated code in garf_pb2_grpc.py depends on'
22
+ + f' grpcio>={GRPC_GENERATED_VERSION}.'
23
+ + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
24
+ + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
25
+ )
26
+
27
+
28
+ class GarfServiceStub(object):
29
+ """Missing associated documentation comment in .proto file."""
30
+
31
+ def __init__(self, channel):
32
+ """Constructor.
33
+
34
+ Args:
35
+ channel: A grpc.Channel.
36
+ """
37
+ self.Execute = channel.unary_unary(
38
+ '/garf.GarfService/Execute',
39
+ request_serializer=garf__pb2.ExecuteRequest.SerializeToString,
40
+ response_deserializer=garf__pb2.ExecuteResponse.FromString,
41
+ _registered_method=True)
42
+
43
+
44
+ class GarfServiceServicer(object):
45
+ """Missing associated documentation comment in .proto file."""
46
+
47
+ def Execute(self, request, context):
48
+ """Missing associated documentation comment in .proto file."""
49
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
50
+ context.set_details('Method not implemented!')
51
+ raise NotImplementedError('Method not implemented!')
52
+
53
+
54
+ def add_GarfServiceServicer_to_server(servicer, server):
55
+ rpc_method_handlers = {
56
+ 'Execute': grpc.unary_unary_rpc_method_handler(
57
+ servicer.Execute,
58
+ request_deserializer=garf__pb2.ExecuteRequest.FromString,
59
+ response_serializer=garf__pb2.ExecuteResponse.SerializeToString,
60
+ ),
61
+ }
62
+ generic_handler = grpc.method_handlers_generic_handler(
63
+ 'garf.GarfService', rpc_method_handlers)
64
+ server.add_generic_rpc_handlers((generic_handler,))
65
+ server.add_registered_method_handlers('garf.GarfService', rpc_method_handlers)
66
+
67
+
68
+ # This class is part of an EXPERIMENTAL API.
69
+ class GarfService(object):
70
+ """Missing associated documentation comment in .proto file."""
71
+
72
+ @staticmethod
73
+ def Execute(request,
74
+ target,
75
+ options=(),
76
+ channel_credentials=None,
77
+ call_credentials=None,
78
+ insecure=False,
79
+ compression=None,
80
+ wait_for_ready=None,
81
+ timeout=None,
82
+ metadata=None):
83
+ return grpc.experimental.unary_unary(
84
+ request,
85
+ target,
86
+ '/garf.GarfService/Execute',
87
+ garf__pb2.ExecuteRequest.SerializeToString,
88
+ garf__pb2.ExecuteResponse.FromString,
89
+ options,
90
+ channel_credentials,
91
+ insecure,
92
+ call_credentials,
93
+ compression,
94
+ wait_for_ready,
95
+ timeout,
96
+ metadata,
97
+ _registered_method=True)
@@ -25,11 +25,14 @@ except ImportError as e:
25
25
 
26
26
  import logging
27
27
  import re
28
+ import uuid
28
29
 
29
30
  import pandas as pd
30
31
  from garf_core import query_editor, report
32
+ from opentelemetry import trace
31
33
 
32
34
  from garf_executors import exceptions, execution_context, executor
35
+ from garf_executors.telemetry import tracer
33
36
 
34
37
  logger = logging.getLogger(__name__)
35
38
 
@@ -54,6 +57,7 @@ class SqlAlchemyQueryExecutor(
54
57
  engine: Initialized Engine object to operated on a given database.
55
58
  """
56
59
  self.engine = engine
60
+ super().__init__()
57
61
 
58
62
  @classmethod
59
63
  def from_connection_string(
@@ -66,6 +70,7 @@ class SqlAlchemyQueryExecutor(
66
70
  engine = sqlalchemy.create_engine(connection_string)
67
71
  return cls(engine)
68
72
 
73
+ @tracer.start_as_current_span('sql.execute')
69
74
  def execute(
70
75
  self,
71
76
  query: str,
@@ -84,35 +89,53 @@ class SqlAlchemyQueryExecutor(
84
89
  Returns:
85
90
  Report with data if query returns some data otherwise empty Report.
86
91
  """
87
- logging.info('Executing script: %s', title)
92
+ span = trace.get_current_span()
93
+ logger.info('Executing script: %s', title)
88
94
  query_text = self.replace_params_template(query, context.query_parameters)
89
95
  with self.engine.begin() as conn:
90
96
  if re.findall(r'(create|update) ', query_text.lower()):
91
- conn.connection.executescript(query_text)
92
- results = report.GarfReport()
97
+ try:
98
+ conn.connection.executescript(query_text)
99
+ results = report.GarfReport()
100
+ except Exception as e:
101
+ raise SqlAlchemyQueryExecutorError(
102
+ f'Failed to execute query {title}: Reason: {e}'
103
+ ) from e
93
104
  else:
94
- temp_table_name = f'temp_{title}'.replace('.', '_')
105
+ temp_table_name = f'temp_{uuid.uuid4().hex}'
95
106
  query_text = f'CREATE TABLE {temp_table_name} AS {query_text}'
96
107
  conn.connection.executescript(query_text)
97
108
  try:
98
109
  results = report.GarfReport.from_pandas(
99
110
  pd.read_sql(f'SELECT * FROM {temp_table_name}', conn)
100
111
  )
112
+ except Exception as e:
113
+ raise SqlAlchemyQueryExecutorError(
114
+ f'Failed to execute query {title}: Reason: {e}'
115
+ ) from e
101
116
  finally:
102
117
  conn.connection.execute(f'DROP TABLE {temp_table_name}')
103
118
  if context.writer and results:
104
- writer_client = context.writer_client
105
- logger.debug(
106
- 'Start writing data for query %s via %s writer',
107
- title,
108
- type(writer_client),
109
- )
110
- writing_result = writer_client.write(results, title)
111
- logger.debug(
112
- 'Finish writing data for query %s via %s writer',
113
- title,
114
- type(writer_client),
115
- )
116
- logger.info('%s executed successfully', title)
117
- return writing_result
119
+ writer_clients = context.writer_clients
120
+ if not writer_clients:
121
+ logger.warning('No writers configured, skipping write operation')
122
+ else:
123
+ writing_results = []
124
+ for writer_client in writer_clients:
125
+ logger.debug(
126
+ 'Start writing data for query %s via %s writer',
127
+ title,
128
+ type(writer_client),
129
+ )
130
+ writing_result = writer_client.write(results, title)
131
+ logger.debug(
132
+ 'Finish writing data for query %s via %s writer',
133
+ title,
134
+ type(writer_client),
135
+ )
136
+ writing_results.append(writing_result)
137
+ logger.info('%s executed successfully', title)
138
+ # Return the last writer's result for backward compatibility
139
+ return writing_results[-1] if writing_results else None
140
+ span.set_attribute('execute.num_results', len(results))
118
141
  return results
@@ -0,0 +1,20 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # pylint: disable=C0330, g-bad-import-order, g-multiple-import
16
+ from opentelemetry import trace
17
+
18
+ tracer = trace.get_tracer(
19
+ instrumenting_module_name='garf_executors',
20
+ )
@@ -0,0 +1,96 @@
1
+ # Copyright 2026 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import pathlib
18
+
19
+ import pydantic
20
+ import smart_open
21
+ import yaml
22
+
23
+ from garf_executors.execution_context import ExecutionContext
24
+
25
+
26
+ class QueryPath(pydantic.BaseModel):
27
+ """Path file with query."""
28
+
29
+ path: str
30
+
31
+
32
+ class QueryDefinition(pydantic.BaseModel):
33
+ """Definition of a query."""
34
+
35
+ query: Query
36
+
37
+
38
+ class Query(pydantic.BaseModel):
39
+ """Query elements.
40
+
41
+ Attributes:
42
+ text: Query text.
43
+ title: Name of the query.
44
+ """
45
+
46
+ text: str
47
+ title: str
48
+
49
+
50
+ class ExecutionStep(ExecutionContext):
51
+ """Common context for executing one or more queries.
52
+
53
+ Attributes:
54
+ fetcher: Name of a fetcher to get data from API.
55
+ alias: Optional alias to identify execution step.
56
+ queries: Queries to run for a particular fetcher.
57
+ context: Execution context for queries and fetcher.
58
+ """
59
+
60
+ fetcher: str | None = None
61
+ alias: str | None = None
62
+ queries: list[QueryPath | QueryDefinition] | None = None
63
+
64
+ @property
65
+ def context(self) -> ExecutionContext:
66
+ return ExecutionContext(
67
+ writer=self.writer,
68
+ writer_parameters=self.writer_parameters,
69
+ query_parameters=self.query_parameters,
70
+ fetcher_parameters=self.fetcher_parameters,
71
+ )
72
+
73
+
74
+ class Workflow(pydantic.BaseModel):
75
+ """Orchestrates execution of queries for multiple fetchers.
76
+
77
+ Attributes:
78
+ steps: Contains one or several fetcher executions.
79
+ """
80
+
81
+ steps: list[ExecutionStep]
82
+
83
+ @classmethod
84
+ def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Workflow:
85
+ """Builds workflow from local or remote yaml file."""
86
+ with smart_open.open(path, 'r', encoding='utf-8') as f:
87
+ data = yaml.safe_load(f)
88
+ return Workflow(steps=data.get('steps'))
89
+
90
+ def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
91
+ """Saves workflow to local or remote yaml file."""
92
+ with smart_open.open(path, 'w', encoding='utf-8') as f:
93
+ yaml.dump(
94
+ self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
95
+ )
96
+ return f'Workflow is saved to {str(path)}'