PyPI - garf-executors - Versions diffs - 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

garf-executors 1.0.2py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

garf/executors/__init__.py +1 -36
garf/executors/api_executor.py +89 -4
garf/executors/bq_executor.py +2 -0
garf/executors/entrypoints/cli.py +45 -58
garf/executors/entrypoints/grpc_server.py +21 -7
garf/executors/entrypoints/server.py +64 -7
garf/executors/entrypoints/tracer.py +29 -4
garf/executors/fetchers.py +52 -2
garf/executors/garf_pb2.py +17 -11
garf/executors/garf_pb2_grpc.py +45 -2
garf/executors/query_processor.py +36 -18
garf/executors/setup.py +58 -0
garf/executors/sql_executor.py +2 -0
garf/executors/workflows/__init__.py +0 -0
garf/executors/workflows/gcp_workflow.yaml +49 -0
garf/executors/{workflow.py → workflows/workflow.py} +57 -2
garf/executors/workflows/workflow_runner.py +172 -0
{garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/METADATA +8 -1
{garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/RECORD +22 -18
{garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/WHEEL +1 -1
{garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/entry_points.txt +0 -0
{garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/top_level.txt +0 -0

garf/executors/garf_pb2.py CHANGED Viewed

@@ -25,21 +25,27 @@ _sym_db = _symbol_database.Default()
 from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2G\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ngarf.proto\x12\x04garf\x1a\x1cgoogle/protobuf/struct.proto\"a\n\x0c\x46\x65tchRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12#\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x12.garf.FetchContext\"G\n\rFetchResponse\x12\x0f\n\x07\x63olumns\x18\x01 \x03(\t\x12%\n\x04rows\x18\x02 \x03(\x0b\x32\x17.google.protobuf.Struct\"t\n\x0c\x46\x65tchContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"g\n\x0e\x45xecuteRequest\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\r\n\x05title\x18\x02 \x01(\t\x12\r\n\x05query\x18\x03 \x01(\t\x12\'\n\x07\x63ontext\x18\x04 \x01(\x0b\x32\x16.garf.ExecutionContext\"\xbc\x01\n\x10\x45xecutionContext\x12/\n\x10query_parameters\x18\x01 \x01(\x0b\x32\x15.garf.QueryParameters\x12\x33\n\x12\x66\x65tcher_parameters\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x0e\n\x06writer\x18\x03 \x01(\t\x12\x32\n\x11writer_parameters\x18\x04 \x01(\x0b\x32\x17.google.protobuf.Struct\"d\n\x0fQueryParameters\x12&\n\x05macro\x18\x01 \x01(\x0b\x32\x17.google.protobuf.Struct\x12)\n\x08template\x18\x02 \x01(\x0b\x32\x17.google.protobuf.Struct\"\"\n\x0f\x45xecuteResponse\x12\x0f\n\x07results\x18\x01 \x03(\t2{\n\x0bGarfService\x12\x38\n\x07\x45xecute\x12\x14.garf.ExecuteRequest\x1a\x15.garf.ExecuteResponse\"\x00\x12\x32\n\x05\x46\x65tch\x12\x12.garf.FetchRequest\x1a\x13.garf.FetchResponse\"\x00\x62\x06proto3')
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'garf_pb2', _globals)
 if not _descriptor._USE_C_DESCRIPTORS:
   DESCRIPTOR._loaded_options = None
-  _globals['_EXECUTEREQUEST']._serialized_start=50
-  _globals['_EXECUTEREQUEST']._serialized_end=153
-  _globals['_EXECUTIONCONTEXT']._serialized_start=156
-  _globals['_EXECUTIONCONTEXT']._serialized_end=344
-  _globals['_QUERYPARAMETERS']._serialized_start=346
-  _globals['_QUERYPARAMETERS']._serialized_end=446
-  _globals['_EXECUTERESPONSE']._serialized_start=448
-  _globals['_EXECUTERESPONSE']._serialized_end=482
-  _globals['_GARFSERVICE']._serialized_start=484
-  _globals['_GARFSERVICE']._serialized_end=555
+  _globals['_FETCHREQUEST']._serialized_start=50
+  _globals['_FETCHREQUEST']._serialized_end=147
+  _globals['_FETCHRESPONSE']._serialized_start=149
+  _globals['_FETCHRESPONSE']._serialized_end=220
+  _globals['_FETCHCONTEXT']._serialized_start=222
+  _globals['_FETCHCONTEXT']._serialized_end=338
+  _globals['_EXECUTEREQUEST']._serialized_start=340
+  _globals['_EXECUTEREQUEST']._serialized_end=443
+  _globals['_EXECUTIONCONTEXT']._serialized_start=446
+  _globals['_EXECUTIONCONTEXT']._serialized_end=634
+  _globals['_QUERYPARAMETERS']._serialized_start=636
+  _globals['_QUERYPARAMETERS']._serialized_end=736
+  _globals['_EXECUTERESPONSE']._serialized_start=738
+  _globals['_EXECUTERESPONSE']._serialized_end=772
+  _globals['_GARFSERVICE']._serialized_start=774
+  _globals['_GARFSERVICE']._serialized_end=897
 # @@protoc_insertion_point(module_scope)

garf/executors/garf_pb2_grpc.py CHANGED Viewed

@@ -5,7 +5,7 @@ import warnings
 from . import garf_pb2 as garf__pb2
-GRPC_GENERATED_VERSION = '1.75.0'
+GRPC_GENERATED_VERSION = '1.76.0'
 GRPC_VERSION = grpc.__version__
 _version_not_supported = False
@@ -18,7 +18,7 @@ except ImportError:
 if _version_not_supported:
     raise RuntimeError(
         f'The grpc package installed is at version {GRPC_VERSION},'
-        + f' but the generated code in garf_pb2_grpc.py depends on'
+        + ' but the generated code in garf_pb2_grpc.py depends on'
         + f' grpcio>={GRPC_GENERATED_VERSION}.'
         + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
         + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
@@ -39,6 +39,11 @@ class GarfServiceStub(object):
                 request_serializer=garf__pb2.ExecuteRequest.SerializeToString,
                 response_deserializer=garf__pb2.ExecuteResponse.FromString,
                 _registered_method=True)
+        self.Fetch = channel.unary_unary(
+                '/garf.GarfService/Fetch',
+                request_serializer=garf__pb2.FetchRequest.SerializeToString,
+                response_deserializer=garf__pb2.FetchResponse.FromString,
+                _registered_method=True)
 class GarfServiceServicer(object):
@@ -50,6 +55,12 @@ class GarfServiceServicer(object):
         context.set_details('Method not implemented!')
         raise NotImplementedError('Method not implemented!')
+    def Fetch(self, request, context):
+        """Missing associated documentation comment in .proto file."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details('Method not implemented!')
+        raise NotImplementedError('Method not implemented!')
 def add_GarfServiceServicer_to_server(servicer, server):
     rpc_method_handlers = {
@@ -58,6 +69,11 @@ def add_GarfServiceServicer_to_server(servicer, server):
                     request_deserializer=garf__pb2.ExecuteRequest.FromString,
                     response_serializer=garf__pb2.ExecuteResponse.SerializeToString,
             ),
+            'Fetch': grpc.unary_unary_rpc_method_handler(
+                    servicer.Fetch,
+                    request_deserializer=garf__pb2.FetchRequest.FromString,
+                    response_serializer=garf__pb2.FetchResponse.SerializeToString,
+            ),
     }
     generic_handler = grpc.method_handlers_generic_handler(
             'garf.GarfService', rpc_method_handlers)
@@ -95,3 +111,30 @@ class GarfService(object):
             timeout,
             metadata,
             _registered_method=True)
+    @staticmethod
+    def Fetch(request,
+            target,
+            options=(),
+            channel_credentials=None,
+            call_credentials=None,
+            insecure=False,
+            compression=None,
+            wait_for_ready=None,
+            timeout=None,
+            metadata=None):
+        return grpc.experimental.unary_unary(
+            request,
+            target,
+            '/garf.GarfService/Fetch',
+            garf__pb2.FetchRequest.SerializeToString,
+            garf__pb2.FetchResponse.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+            _registered_method=True)

garf/executors/query_processor.py CHANGED Viewed

@@ -12,29 +12,39 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""qQuery can be used as a parameter in garf queries."""
 import contextlib
-from garf.core import query_editor
-from garf.executors import exceptions, execution_context
+from garf.core import query_editor, query_parser
+from garf.executors import execution_context
-def process_gquery(
-  context: execution_context.ExecutionContext,
-) -> execution_context.ExecutionContext:
-  for k, v in context.fetcher_parameters.items():
+class GqueryError(query_parser.GarfQueryError):
+  """Errors on incorrect qQuery syntax."""
+def _handle_sub_context(context, sub_context):
+  for k, v in sub_context.items():
     if isinstance(v, str) and v.startswith('gquery'):
       no_writer_context = context.model_copy(update={'writer': None})
       try:
-        _, alias, query = v.split(':', maxsplit=3)
+        _, alias, *query = v.split(':', maxsplit=3)
       except ValueError:
-        raise exceptions.GarfExecutorError(
+        raise GqueryError(
           f'Incorrect gquery format, should be gquery:alias:query, got {v}'
         )
+      if not alias:
+        raise GqueryError(f'Missing alias in gquery: {v}')
+      if not query:
+        raise GqueryError(f'Missing query text in gquery: {v}')
       if alias == 'sqldb':
         from garf.executors import sql_executor
-        gquery_executor = sql_executor.SqlAlchemyQueryExecutor(
-          **context.fetcher_parameters
+        gquery_executor = (
+          sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
+            context.fetcher_parameters.get('connection_string')
+          )
         )
       elif alias == 'bq':
         from garf.executors import bq_executor
@@ -43,19 +53,27 @@ def process_gquery(
           **context.fetcher_parameters
         )
       else:
-        raise exceptions.GarfExecutorError(
-          f'Unsupported alias for gquery: {alias}'
-        )
-      with contextlib.suppress(query_editor.GarfResourceError):
+        raise GqueryError(f'Unsupported alias {alias} for gquery: {v}')
+      with contextlib.suppress(
+        query_editor.GarfResourceError, query_parser.GarfVirtualColumnError
+      ):
+        query = ':'.join(query)
         query_spec = query_editor.QuerySpecification(
           text=query, args=context.query_parameters
         ).generate()
         if len(columns := [c for c in query_spec.column_names if c != '_']) > 1:
-          raise exceptions.GarfExecutorError(
-            f'Multiple columns in gquery: {columns}'
-          )
+          raise GqueryError(f'Multiple columns in gquery definition: {columns}')
       res = gquery_executor.execute(
         query=query, title='gquery', context=no_writer_context
       )
-      context.fetcher_parameters[k] = res.to_list(row_type='scalar')
+      if len(columns := [c for c in res.column_names if c != '_']) > 1:
+        raise GqueryError(f'Multiple columns in gquery result: {columns}')
+      sub_context[k] = res.to_list(row_type='scalar')
+def process_gquery(
+  context: execution_context.ExecutionContext,
+) -> execution_context.ExecutionContext:
+  _handle_sub_context(context, context.fetcher_parameters)
+  _handle_sub_context(context, context.query_parameters.macro)
   return context

garf/executors/setup.py ADDED Viewed

@@ -0,0 +1,58 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Bootstraps executor based on provided parameters."""
+from __future__ import annotations
+import importlib
+from garf.executors import executor, fetchers
+from garf.executors.api_executor import ApiQueryExecutor
+from garf.executors.telemetry import tracer
+@tracer.start_as_current_span('setup_executor')
+def setup_executor(
+  source: str,
+  fetcher_parameters: dict[str, str | int | bool],
+  enable_cache: bool = False,
+  cache_ttl_seconds: int = 3600,
+  simulate: bool = False,
+) -> type[executor.Executor]:
+  """Initializes executors based on a source and parameters."""
+  if source == 'bq':
+    bq_executor = importlib.import_module('garf.executors.bq_executor')
+    query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
+  elif source == 'sqldb':
+    sql_executor = importlib.import_module('garf.executors.sql_executor')
+    query_executor = (
+      sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
+        fetcher_parameters.get('connection_string')
+      )
+    )
+  else:
+    concrete_api_fetcher = fetchers.get_report_fetcher(source)
+    if simulate:
+      concrete_simulator = fetchers.get_report_simulator(source)()
+    else:
+      concrete_simulator = None
+    query_executor = ApiQueryExecutor(
+      fetcher=concrete_api_fetcher(
+        **fetcher_parameters,
+        enable_cache=enable_cache,
+        cache_ttl_seconds=cache_ttl_seconds,
+      ),
+      report_simulator=concrete_simulator,
+    )
+  return query_executor

garf/executors/sql_executor.py CHANGED Viewed

@@ -91,6 +91,8 @@ class SqlAlchemyQueryExecutor(
       Report with data if query returns some data otherwise empty Report.
     """
     span = trace.get_current_span()
+    span.set_attribute('query.title', title)
+    span.set_attribute('query.text', query)
     logger.info('Executing script: %s', title)
     query_text = self.replace_params_template(query, context.query_parameters)
     with self.engine.begin() as conn:

garf/executors/workflows/__init__.py ADDED Viewed

File without changes

garf/executors/workflows/gcp_workflow.yaml ADDED Viewed

@@ -0,0 +1,49 @@
+run:
+  for:
+    value: pair
+    in: ${pairs}
+    steps:
+      - log_source:
+          call: sys.log
+          args:
+            data: ${pair.alias}
+      - execute_queries:
+          parallel:
+            for:
+              value: query
+              in: ${pair.queries}
+              steps:
+                - log_query:
+                    call: sys.log
+                    args:
+                      data: ${pair}
+                - execute_single_query:
+                    try:
+                      call: http.post
+                      args:
+                        url: ${sys.get_env("GARF_ENDPOINT") + "/api/execute"}
+                        auth:
+                          type: OIDC
+                        body:
+                          source: ${pair.fetcher}
+                          # query_path: ${query.path}
+                          title: ${query.query.title}
+                          query: ${query.query.text}
+                          context:
+                            fetcher_parameters: ${pair.fetcher_parameters}
+                            writer: ${pair.writer}
+                            writer_parameters: ${pair.writer_parameters}
+                            query_parameters:
+                              macro: ${pair.query_parameters.macro}
+                              template: ${pair.query_parameters.template}
+                      result: task_resp
+                    except:
+                      as: e
+                      assign:
+                        - task_resp:
+                            status: "failed"
+                            error: ${e.message}
+                - log_result:
+                    call: sys.log
+                    args:
+                      data: ${task_resp}

garf/executors/{workflow.py → workflows/workflow.py} RENAMED Viewed

@@ -11,10 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Workflow specifies steps of end-to-end fetching and processing."""
 from __future__ import annotations
+import copy
 import os
 import pathlib
+import re
+from collections import defaultdict
+from typing import Any
 import pydantic
 import smart_open
@@ -37,6 +43,13 @@ class QueryPath(pydantic.BaseModel):
   """Path file with query."""
   path: str
+  prefix: str | None = None
+  @property
+  def full_path(self) -> str:
+    if self.prefix:
+      return re.sub('/$', '', self.prefix) + '/' + self.path
+    return self.path
 class QueryDefinition(pydantic.BaseModel):
@@ -65,11 +78,13 @@ class ExecutionStep(ExecutionContext):
     alias: Optional alias to identify execution step.
     queries: Queries to run for a particular fetcher.
     context: Execution context for queries and fetcher.
+    parallel_threshold: Max allowed parallelism for the queries in the step.
   """
   fetcher: str | None = None
   alias: str | None = pydantic.Field(default=None, pattern=r'^[a-zA-Z0-9_]+$')
   queries: list[QueryPath | QueryDefinition | QueryFolder] | None = None
+  parallel_threshold: int | None = None
   @property
   def context(self) -> ExecutionContext:
@@ -86,17 +101,41 @@ class Workflow(pydantic.BaseModel):
   Attributes:
     steps: Contains one or several fetcher executions.
+    context: Query and fetcher parameters to overwrite in steps.
   """
   steps: list[ExecutionStep]
+  context: ExecutionContext | None = None
+  def model_post_init(self, __context__) -> None:
+    if context := self.context:
+      custom_parameters = defaultdict(dict)
+      if custom_macros := context.query_parameters.macro:
+        custom_parameters['query_parameters']['macro'] = custom_macros
+      if custom_templates := context.query_parameters.template:
+        custom_parameters['query_parameters']['template'] = custom_templates
+      if custom_fetcher_parameters := context.fetcher_parameters:
+        custom_parameters['fetcher_parameters'] = custom_fetcher_parameters
+      if custom_parameters:
+        steps = self.steps
+        for i, step in enumerate(steps):
+          res = _merge_dicts(
+            step.model_dump(exclude_none=True), dict(custom_parameters)
+          )
+          steps[i] = ExecutionStep(**res)
   @classmethod
-  def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Workflow:
+  def from_file(
+    cls,
+    path: str | pathlib.Path | os.PathLike[str],
+    context: ExecutionContext | None = None,
+  ) -> Workflow:
     """Builds workflow from local or remote yaml file."""
     with smart_open.open(path, 'r', encoding='utf-8') as f:
       data = yaml.safe_load(f)
     try:
-      return Workflow(**data)
+      return Workflow(steps=data.get('steps'), context=context)
     except pydantic.ValidationError as e:
       raise GarfWorkflowError(f'Incorrect workflow:\n {e}') from e
@@ -107,3 +146,19 @@ class Workflow(pydantic.BaseModel):
         self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
       )
     return f'Workflow is saved to {str(path)}'
+def _merge_dicts(
+  dict1: dict[str, Any], dict2: dict[str, Any]
+) -> dict[str, Any]:
+  result = copy.deepcopy(dict1)
+  for key, value in dict2.items():
+    if (
+      key in result
+      and isinstance(result[key], dict)
+      and isinstance(value, dict)
+    ):
+      result[key] = _merge_dicts(result[key], value)
+    else:
+      result[key] = value
+  return result

garf/executors/workflows/workflow_runner.py ADDED Viewed

@@ -0,0 +1,172 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Runs garf workflow."""
+from __future__ import annotations
+import logging
+import pathlib
+import re
+from typing import Final
+import yaml
+from garf.executors import exceptions, setup
+from garf.executors.telemetry import tracer
+from garf.executors.workflows import workflow
+from garf.io import reader
+logger = logging.getLogger(__name__)
+_REMOTE_FILES_PATTERN: Final[str] = (
+  '^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)'
+)
+_SCRIPT_PATH = pathlib.Path(__file__).parent
+class WorkflowRunner:
+  """Runs garf workflow.
+  Attributes:
+    workflow: Workflow to execute.
+    wf_parent: Optional location of a workflow file.
+    parallel_threshold: Max allowed parallelism for the queries in the workflow.
+  """
+  def __init__(
+    self,
+    execution_workflow: workflow.Workflow,
+    wf_parent: pathlib.Path | str,
+    parallel_threshold: int = 10,
+  ) -> None:
+    """Initializes WorkflowRunner."""
+    self.workflow = execution_workflow
+    self.wf_parent = wf_parent
+    self.parallel_threshold = parallel_threshold
+  @classmethod
+  def from_file(
+    cls,
+    workflow_file: str | pathlib.Path,
+  ) -> WorkflowRunner:
+    """Initialized Workflow runner from a local or remote file."""
+    if isinstance(workflow_file, str):
+      workflow_file = pathlib.Path(workflow_file)
+    execution_workflow = workflow.Workflow.from_file(workflow_file)
+    return cls(
+      execution_workflow=execution_workflow, wf_parent=workflow_file.parent
+    )
+  def run(
+    self,
+    enable_cache: bool = False,
+    cache_ttl_seconds: int = 3600,
+    selected_aliases: list[str] | None = None,
+    skipped_aliases: list[str] | None = None,
+  ) -> list[str]:
+    skipped_aliases = skipped_aliases or []
+    selected_aliases = selected_aliases or []
+    reader_client = reader.create_reader('file')
+    execution_results = []
+    logger.info('Starting Garf Workflow...')
+    for i, step in enumerate(self.workflow.steps, 1):
+      step_name = f'{i}-{step.fetcher}'
+      if step.alias:
+        step_name = f'{step_name}-{step.alias}'
+      if step.alias in skipped_aliases:
+        logger.warning(
+          'Skipping step %d, fetcher: %s, alias: %s',
+          i,
+          step.fetcher,
+          step.alias,
+        )
+        continue
+      if selected_aliases and step.alias not in selected_aliases:
+        logger.warning(
+          'Skipping step %d, fetcher: %s, alias: %s',
+          i,
+          step.fetcher,
+          step.alias,
+        )
+        continue
+      with tracer.start_as_current_span(step_name):
+        logger.info(
+          'Running step %d, fetcher: %s, alias: %s', i, step.fetcher, step.alias
+        )
+        query_executor = setup.setup_executor(
+          source=step.fetcher,
+          fetcher_parameters=step.fetcher_parameters,
+          enable_cache=enable_cache,
+          cache_ttl_seconds=cache_ttl_seconds,
+        )
+        batch = {}
+        if not (queries := step.queries):
+          logger.error('Please provide one or more queries to run')
+          raise exceptions.GarfExecutorError(
+            'Please provide one or more queries to run'
+          )
+        for query in queries:
+          if isinstance(query, workflow.QueryPath):
+            query_path = query.full_path
+            if re.match(_REMOTE_FILES_PATTERN, query_path):
+              batch[query.path] = reader_client.read(query_path)
+            else:
+              if not query.prefix:
+                query_path = self.wf_parent / pathlib.Path(query.path)
+              if not query_path.exists():
+                raise workflow.GarfWorkflowError(
+                  f'Query: {query_path} not found'
+                )
+              batch[query.path] = reader_client.read(query_path)
+          elif isinstance(query, workflow.QueryFolder):
+            query_path = self.wf_parent / pathlib.Path(query.folder)
+            if not query_path.exists():
+              raise workflow.GarfWorkflowError(
+                f'Folder: {query_path} not found'
+              )
+            for p in query_path.rglob('*'):
+              if p.suffix == '.sql':
+                batch[p.stem] = reader_client.read(p)
+          else:
+            batch[query.query.title] = query.query.text
+        query_executor.execute_batch(
+          batch,
+          step.context,
+          step.parallel_threshold or self.parallel_threshold,
+        )
+        execution_results.append(step_name)
+    return execution_results
+  def compile(self, path: str | pathlib.Path) -> str:
+    """Saves workflow with expanded anchors."""
+    return self.workflow.save(path)
+  def deploy(self, path: str | pathlib.Path) -> str:
+    """Prepares workflow for deployment to Google Cloud Workflows."""
+    wf = self.workflow.model_dump(exclude_none=True).get('steps')
+    with open(_SCRIPT_PATH / 'gcp_workflow.yaml', 'r', encoding='utf-8') as f:
+      cloud_workflow_run_template = yaml.safe_load(f)
+    init = {
+      'init': {
+        'assign': [{'pairs': wf}],
+      },
+    }
+    cloud_workflow = {
+      'main': {
+        'params': [],
+        'steps': [init, cloud_workflow_run_template],
+      },
+    }
+    with open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(cloud_workflow, f, sort_keys=False)
+    return f'Workflow is saved to {path}'

{garf_executors-1.0.2.dist-info → garf_executors-1.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: garf-executors
-Version: 1.0.2
+Version: 1.1.3
 Summary: Executes queries against API and writes data to local/remote storage.
 Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
 License: Apache 2.0
@@ -36,8 +36,15 @@ Provides-Extra: gcp
 Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
 Provides-Extra: server
 Requires-Dist: fastapi[standard]; extra == "server"
+Requires-Dist: pydantic-settings; extra == "server"
 Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
 Requires-Dist: typer; extra == "server"
+Requires-Dist: grpcio-reflection; extra == "server"
+Provides-Extra: tests
+Requires-Dist: pytest; extra == "tests"
+Requires-Dist: pytest-mock; extra == "tests"
+Requires-Dist: pytest-xdist; extra == "tests"
+Requires-Dist: pytest-grpc; extra == "tests"
 Provides-Extra: all
 Requires-Dist: garf-executors[bq,gcp,server,sql]; extra == "all"

garf-executors 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl

garf-executors 1.0.2py3-none-any.whl → 1.1.3py3-none-any.whl