PyPI - garf-executors - Versions diffs - 1.0.7__tar.gz → 1.1.3__tar.gz - Mend

garf-executors 1.0.7tar.gz → 1.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{garf_executors-1.0.7 → garf_executors-1.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: garf-executors
-Version: 1.0.7
+Version: 1.1.3
 Summary: Executes queries against API and writes data to local/remote storage.
 Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
 License: Apache 2.0
@@ -36,6 +36,7 @@ Provides-Extra: gcp
 Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
 Provides-Extra: server
 Requires-Dist: fastapi[standard]; extra == "server"
+Requires-Dist: pydantic-settings; extra == "server"
 Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
 Requires-Dist: typer; extra == "server"
 Requires-Dist: grpcio-reflection; extra == "server"

garf_executors-1.1.3/garf/executors/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Executors to fetch data from various APIs."""
+from __future__ import annotations
+from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
+__all__ = [
+  'ApiQueryExecutor',
+  'ApiExecutionContext',
+]
+__version__ = '1.1.3'

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/api_executor.py RENAMED Viewed

@@ -23,7 +23,7 @@ from __future__ import annotations
 import logging
 import pathlib
-from garf.core import report_fetcher
+from garf.core import report_fetcher, simulator
 from garf.executors import (
   exceptions,
   execution_context,
@@ -32,9 +32,16 @@ from garf.executors import (
   query_processor,
 )
 from garf.executors.telemetry import tracer
-from opentelemetry import trace
+from opentelemetry import metrics, trace
 logger = logging.getLogger(__name__)
+meter = metrics.get_meter('garf.executors')
+api_counter = meter.create_counter(
+  'garf_api_execute_total',
+  unit='1',
+  description='Counts number of API executions',
+)
 class ApiExecutionContext(execution_context.ExecutionContext):
@@ -50,13 +57,19 @@ class ApiQueryExecutor(executor.Executor):
       api_client: a client used for connecting to API.
   """
-  def __init__(self, fetcher: report_fetcher.ApiReportFetcher) -> None:
+  def __init__(
+    self,
+    fetcher: report_fetcher.ApiReportFetcher,
+    report_simulator: simulator.ApiReportSimulator | None = None,
+  ) -> None:
     """Initializes ApiQueryExecutor.
     Args:
-        fetcher: Instantiated report fetcher.
+      fetcher: Instantiated report fetcher.
+      report_simulator: Instantiated simulator.
     """
     self.fetcher = fetcher
+    self.simulator = report_simulator
     super().__init__(
       preprocessors=self.fetcher.preprocessors,
       postprocessors=self.fetcher.postprocessors,
@@ -101,6 +114,8 @@ class ApiQueryExecutor(executor.Executor):
     Raises:
       GarfExecutorError: When failed to execute query.
     """
+    if self.simulator:
+      return self.simulate(query=query, title=title, context=context)
     context = query_processor.process_gquery(context)
     span = trace.get_current_span()
     span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
@@ -112,6 +127,9 @@ class ApiQueryExecutor(executor.Executor):
       span.set_attribute('query.text', query)
       logger.debug('starting query %s', query)
       title = pathlib.Path(title).name.split('.')[0]
+      api_counter.add(
+        1, {'api.client.class': self.fetcher.api_client.__class__.__name__}
+      )
       results = self.fetcher.fetch(
         query_specification=query,
         args=context.query_parameters,
@@ -144,3 +162,67 @@ class ApiQueryExecutor(executor.Executor):
       raise exceptions.GarfExecutorError(
         '%s generated an exception: %s', title, str(e)
       ) from e
+  @tracer.start_as_current_span('api.simulate')
+  def simulate(
+    self,
+    query: str,
+    title: str,
+    context: ApiExecutionContext,
+  ) -> str:
+    """Reads query, simulates results and stores them in a specified location.
+    Args:
+      query: Location of the query.
+      title: Name of the query.
+      context: Query execution context.
+    Returns:
+      Result of writing the report.
+    Raises:
+      GarfExecutorError: When failed to execute query.
+    """
+    context = query_processor.process_gquery(context)
+    span = trace.get_current_span()
+    span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
+    span.set_attribute(
+      'api.client.class', self.fetcher.api_client.__class__.__name__
+    )
+    try:
+      span.set_attribute('query.title', title)
+      span.set_attribute('query.text', query)
+      logger.debug('starting query %s', query)
+      title = pathlib.Path(title).name.split('.')[0]
+      results = self.simulator.simulate(
+        query_specification=query,
+        args=context.query_parameters,
+        title=title,
+        **context.fetcher_parameters,
+      )
+      writer_clients = context.writer_clients
+      if not writer_clients:
+        logger.warning('No writers configured, skipping write operation')
+        return None
+      writing_results = []
+      for writer_client in writer_clients:
+        logger.debug(
+          'Start writing data for query %s via %s writer',
+          title,
+          type(writer_client),
+        )
+        result = writer_client.write(results, title)
+        logger.debug(
+          'Finish writing data for query %s via %s writer',
+          title,
+          type(writer_client),
+        )
+        writing_results.append(result)
+      logger.info('%s executed successfully', title)
+      # Return the last writer's result for backward compatibility
+      return writing_results[-1] if writing_results else None
+    except Exception as e:
+      logger.error('%s generated an exception: %s', title, str(e))
+      raise exceptions.GarfExecutorError(
+        '%s generated an exception: %s', title, str(e)
+      ) from e

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/cli.py RENAMED Viewed

@@ -22,18 +22,22 @@ from __future__ import annotations
 import argparse
 import logging
 import pathlib
-import re
 import sys
 import garf.executors
-from garf.executors import config, exceptions, workflow
+from garf.executors import config, exceptions, setup
 from garf.executors.entrypoints import utils
-from garf.executors.entrypoints.tracer import initialize_tracer
+from garf.executors.entrypoints.tracer import (
+  initialize_meter,
+  initialize_tracer,
+)
 from garf.executors.telemetry import tracer
+from garf.executors.workflows import workflow, workflow_runner
 from garf.io import reader
 from opentelemetry import trace
 initialize_tracer()
+meter_provider = initialize_meter()
 @tracer.start_as_current_span('garf.entrypoints.cli')
@@ -54,6 +58,7 @@ def main():
   parser.add_argument(
     '--no-parallel-queries', dest='parallel_queries', action='store_false'
   )
+  parser.add_argument('--simulate', dest='simulate', action='store_true')
   parser.add_argument('--dry-run', dest='dry_run', action='store_true')
   parser.add_argument('-v', '--version', dest='version', action='store_true')
   parser.add_argument(
@@ -68,7 +73,12 @@ def main():
     default=3600,
     type=int,
   )
+  parser.add_argument('--workflow-skip', dest='workflow_skip', default=None)
+  parser.add_argument(
+    '--workflow-include', dest='workflow_include', default=None
+  )
   parser.set_defaults(parallel_queries=True)
+  parser.set_defaults(simulate=False)
   parser.set_defaults(enable_cache=False)
   parser.set_defaults(dry_run=False)
   args, kwargs = parser.parse_known_args()
@@ -83,53 +93,37 @@ def main():
     loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
   )
   reader_client = reader.create_reader(args.input)
+  param_types = ['source', 'macro', 'template']
+  outputs = args.output.split(',')
+  extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(kwargs)
+  source_parameters = extra_parameters.get('source', {})
+  writer_parameters = {}
+  for output in outputs:
+    writer_parameters.update(extra_parameters.get(output))
+  context = garf.executors.api_executor.ApiExecutionContext(
+    query_parameters={
+      'macro': extra_parameters.get('macro'),
+      'template': extra_parameters.get('template'),
+    },
+    writer=outputs,
+    writer_parameters=writer_parameters,
+    fetcher_parameters=source_parameters,
+  )
   if workflow_file := args.workflow:
     wf_parent = pathlib.Path.cwd() / pathlib.Path(workflow_file).parent
-    execution_workflow = workflow.Workflow.from_file(workflow_file)
-    for i, step in enumerate(execution_workflow.steps, 1):
-      step_span_name = f'{i}-{step.fetcher}'
-      if step.alias:
-        step_span_name = f'{step_span_name}-{step.alias}'
-      with tracer.start_as_current_span(step_span_name):
-        query_executor = garf.executors.setup_executor(
-          source=step.fetcher,
-          fetcher_parameters=step.fetcher_parameters,
-          enable_cache=args.enable_cache,
-          cache_ttl_seconds=args.cache_ttl_seconds,
-        )
-        batch = {}
-        if not (queries := step.queries):
-          logger.error('Please provide one or more queries to run')
-          raise exceptions.GarfExecutorError(
-            'Please provide one or more queries to run'
-          )
-        for query in queries:
-          if isinstance(query, garf.executors.workflow.QueryPath):
-            if re.match(
-              '^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)', query.path
-            ):
-              batch[query.path] = reader_client.read(query.path)
-            else:
-              query_path = wf_parent / pathlib.Path(query.path)
-              if not query_path.exists():
-                raise workflow.GarfWorkflowError(
-                  f'Query: {query_path} not found'
-                )
-              batch[query.path] = reader_client.read(query_path)
-          elif isinstance(query, garf.executors.workflow.QueryFolder):
-            query_path = wf_parent / pathlib.Path(query.folder)
-            if not query_path.exists():
-              raise workflow.GarfWorkflowError(
-                f'Folder: {query_path} not found'
-              )
-            for p in query_path.rglob('*'):
-              if p.suffix == '.sql':
-                batch[p.stem] = reader_client.read(p)
-          else:
-            batch[query.query.title] = query.query.text
-        query_executor.execute_batch(
-          batch, step.context, args.parallel_threshold
-        )
+    execution_workflow = workflow.Workflow.from_file(workflow_file, context)
+    workflow_skip = args.workflow_skip if args.workflow_skip else None
+    workflow_include = args.workflow_include if args.workflow_include else None
+    workflow_runner.WorkflowRunner(
+      execution_workflow=execution_workflow, wf_parent=wf_parent
+    ).run(
+      enable_cache=args.enable_cache,
+      cache_ttl_seconds=args.cache_ttl_seconds,
+      selected_aliases=workflow_include,
+      skipped_aliases=workflow_skip,
+    )
+    meter_provider.shutdown()
     sys.exit()
   if not args.query:
@@ -143,31 +137,12 @@ def main():
       raise exceptions.GarfExecutorError(
         f'No execution context found for source {args.source} in {config_file}'
       )
-  else:
-    param_types = ['source', 'macro', 'template']
-    outputs = args.output.split(',')
-    extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(
-      kwargs
-    )
-    source_parameters = extra_parameters.get('source', {})
-    writer_parameters = {}
-    for output in outputs:
-      writer_parameters.update(extra_parameters.get(output))
-    context = garf.executors.api_executor.ApiExecutionContext(
-      query_parameters={
-        'macro': extra_parameters.get('macro'),
-        'template': extra_parameters.get('template'),
-      },
-      writer=outputs,
-      writer_parameters=writer_parameters,
-      fetcher_parameters=source_parameters,
-    )
-  query_executor = garf.executors.setup_executor(
+  query_executor = setup.setup_executor(
     source=args.source,
     fetcher_parameters=context.fetcher_parameters,
     enable_cache=args.enable_cache,
     cache_ttl_seconds=args.cache_ttl_seconds,
+    simulate=args.simulate,
   )
   batch = {query: reader_client.read(query) for query in args.query}
   if args.parallel_queries and len(args.query) > 1:
@@ -182,6 +157,7 @@ def main():
         query=reader_client.read(query), title=query, context=context
       )
   logging.shutdown()
+  meter_provider.shutdown()
 if __name__ == '__main__':

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/grpc_server.py RENAMED Viewed

@@ -18,9 +18,8 @@ import argparse
 import logging
 from concurrent import futures
-import garf.executors
 import grpc
-from garf.executors import garf_pb2, garf_pb2_grpc
+from garf.executors import execution_context, garf_pb2, garf_pb2_grpc, setup
 from garf.executors.entrypoints.tracer import initialize_tracer
 from google.protobuf.json_format import MessageToDict
 from grpc_reflection.v1alpha import reflection
@@ -28,30 +27,29 @@ from grpc_reflection.v1alpha import reflection
 class GarfService(garf_pb2_grpc.GarfService):
   def Execute(self, request, context):
-    query_executor = garf.executors.setup_executor(
+    query_executor = setup.setup_executor(
       request.source, request.context.fetcher_parameters
     )
-    execution_context = garf.executors.execution_context.ExecutionContext(
-      **MessageToDict(request.context, preserving_proto_field_name=True)
-    )
     result = query_executor.execute(
       query=request.query,
       title=request.title,
-      context=execution_context,
+      context=execution_context.ExecutionContext(
+        **MessageToDict(request.context, preserving_proto_field_name=True)
+      ),
     )
     return garf_pb2.ExecuteResponse(results=[result])
   def Fetch(self, request, context):
-    query_executor = garf.executors.setup_executor(
+    query_executor = setup.setup_executor(
       request.source, request.context.fetcher_parameters
     )
-    execution_context = garf.executors.execution_context.ExecutionContext(
+    query_args = execution_context.ExecutionContext(
       **MessageToDict(request.context, preserving_proto_field_name=True)
-    )
+    ).query_parameters
     result = query_executor.fetcher.fetch(
       query_specification=request.query,
       title=request.title,
-      args=execution_context.query_parameters,
+      args=query_args,
     )
     return garf_pb2.FetchResponse(
       columns=result.column_names, rows=result.to_list(row_type='dict')

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/server.py RENAMED Viewed

@@ -21,18 +21,55 @@ import garf.executors
 import pydantic
 import typer
 import uvicorn
-from garf.executors import exceptions
-from garf.executors.entrypoints.tracer import initialize_tracer
+from garf.executors import exceptions, setup
+from garf.executors.entrypoints import utils
+from garf.executors.entrypoints.tracer import (
+  initialize_meter,
+  initialize_tracer,
+)
+from garf.executors.workflows import workflow_runner
 from garf.io import reader
 from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import Annotated
 initialize_tracer()
+initialize_meter()
 app = fastapi.FastAPI()
 FastAPIInstrumentor.instrument_app(app)
 typer_app = typer.Typer()
+class GarfSettings(BaseSettings):
+  """Specifies environmental variables for garf.
+  Ensure that mandatory variables are exposed via
+  export ENV_VARIABLE_NAME=VALUE.
+  Attributes:
+    loglevel: Level of logging.
+    log_name: Name of log.
+    logger_type: Type of logger.
+  """
+  model_config = SettingsConfigDict(env_prefix='garf_')
+  loglevel: str = 'INFO'
+  log_name: str = 'garf'
+  logger_type: str = 'local'
+class GarfDependencies:
+  def __init__(self) -> None:
+    """Initializes GarfDependencies."""
+    settings = GarfSettings()
+    self.logger = utils.init_logging(
+      loglevel=settings.loglevel,
+      logger_type=settings.logger_type,
+      name=settings.log_name,
+    )
 class ApiExecutorRequest(pydantic.BaseModel):
   """Request for executing a query.
@@ -81,14 +118,19 @@ async def version() -> str:
 @app.get('/api/fetchers')
-async def get_fetchers() -> list[str]:
+async def get_fetchers(
+  dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
+) -> list[str]:
   """Shows all available API sources."""
   return list(garf.executors.fetchers.find_fetchers())
 @app.post('/api/execute')
-def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
-  query_executor = garf.executors.setup_executor(
+def execute(
+  request: ApiExecutorRequest,
+  dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
+) -> ApiExecutorResponse:
+  query_executor = setup.setup_executor(
     request.source, request.context.fetcher_parameters
   )
   result = query_executor.execute(request.query, request.title, request.context)
@@ -96,8 +138,11 @@ def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
 @app.post('/api/execute:batch')
-def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
-  query_executor = garf.executors.setup_executor(
+def execute_batch(
+  request: ApiExecutorRequest,
+  dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
+) -> ApiExecutorResponse:
+  query_executor = setup.setup_executor(
     request.source, request.context.fetcher_parameters
   )
   reader_client = reader.FileReader()
@@ -106,6 +151,18 @@ def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
   return ApiExecutorResponse(results=results)
+@app.post('/api/execute:workflow')
+def execute_workflow(
+  workflow_file: str,
+  dependencies: Annotated[GarfDependencies, fastapi.Depends(GarfDependencies)],
+  enable_cache: bool = False,
+  cache_ttl_seconds: int = 3600,
+) -> list[str]:
+  return workflow_runner.WorkflowRunner.from_file(workflow_file).run(
+    enable_cache=enable_cache, cache_ttl_seconds=cache_ttl_seconds
+  )
 @typer_app.command()
 def main(
   port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/entrypoints/tracer.py RENAMED Viewed

@@ -14,15 +14,20 @@
 import os
-from opentelemetry import trace
+from opentelemetry import metrics, trace
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
+  OTLPMetricExporter,
+)
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
   OTLPSpanExporter,
 )
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import (
+  PeriodicExportingMetricReader,
+)
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import (
-  BatchSpanProcessor,
-)
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
 DEFAULT_SERVICE_NAME = 'garf'
@@ -55,3 +60,23 @@ def initialize_tracer():
       tracer_provider.add_span_processor(otlp_processor)
   trace.set_tracer_provider(tracer_provider)
+def initialize_meter():
+  resource = Resource.create(
+    {'service.name': os.getenv('OTLP_SERVICE_NAME', DEFAULT_SERVICE_NAME)}
+  )
+  meter_provider = MeterProvider(resource=resource)
+  if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
+    otlp_metric_exporter = OTLPMetricExporter(
+      endpoint=f'{otel_endpoint}/v1/metrics'
+    )
+    metric_reader = PeriodicExportingMetricReader(otlp_metric_exporter)
+    meter_provider = MeterProvider(
+      resource=resource, metric_readers=[metric_reader]
+    )
+  else:
+    meter_provider = MeterProvider(resource=resource)
+  metrics.set_meter_provider(meter_provider)
+  return meter_provider

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/fetchers.py RENAMED Viewed

@@ -17,7 +17,7 @@ import logging
 import sys
 from importlib.metadata import entry_points
-from garf.core import report_fetcher
+from garf.core import report_fetcher, simulator
 from garf.executors.telemetry import tracer
 logger = logging.getLogger(name='garf.executors.fetchers')
@@ -31,6 +31,14 @@ def find_fetchers() -> set[str]:
   return set()
+@tracer.start_as_current_span('find_simulators')
+def find_simulators() -> set[str]:
+  """Identifiers all available report simulators."""
+  if entrypoints := _get_entrypoints('garf_simulator'):
+    return {simulator.name for simulator in entrypoints}
+  return set()
 @tracer.start_as_current_span('get_report_fetcher')
 def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
   """Loads report fetcher for a given source.
@@ -70,6 +78,45 @@ def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
   )
+@tracer.start_as_current_span('get_report_simulator')
+def get_report_simulator(source: str) -> type[simulator.ApiReportSimulator]:
+  """Loads report simulator for a given source.
+  Args:
+    source: Alias for a source associated with a simulator.
+  Returns:
+    Class for a found report simulator.
+  Raises:
+    GarfApiReportSimulatorError: When simulator cannot be loaded.
+    MissingApiReportSimulatorError: When simulator not found.
+  """
+  if source not in find_simulators():
+    raise simulator.MissingApiReportSimulatorError(source)
+  for sim in _get_entrypoints('garf_simulator'):
+    if sim.name == source:
+      try:
+        with tracer.start_as_current_span('load_simulator_module') as span:
+          simulator_module = sim.load()
+          span.set_attribute('loaded_module', simulator_module.__name__)
+        for name, obj in inspect.getmembers(simulator_module):
+          if inspect.isclass(obj) and issubclass(
+            obj, simulator.ApiReportSimulator
+          ):
+            if not hasattr(obj, 'alias'):
+              return getattr(simulator_module, name)
+            if obj.alias == sim.name:
+              return getattr(simulator_module, name)
+      except ModuleNotFoundError as e:
+        raise simulator.GarfApiReportSimulatorError(
+          f'Failed to load simulator for source {source}, reason: {e}'
+        )
+  raise simulator.GarfApiReportSimulatorError(
+    f'No simulator available for the source "{source}"'
+  )
 def _get_entrypoints(group='garf'):
   if sys.version_info.major == 3 and sys.version_info.minor == 9:
     try:

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf/executors/query_processor.py RENAMED Viewed

@@ -41,8 +41,10 @@ def _handle_sub_context(context, sub_context):
       if alias == 'sqldb':
         from garf.executors import sql_executor
-        gquery_executor = sql_executor.SqlAlchemyQueryExecutor(
-          **context.fetcher_parameters
+        gquery_executor = (
+          sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
+            context.fetcher_parameters.get('connection_string')
+          )
         )
       elif alias == 'bq':
         from garf.executors import bq_executor

garf_executors-1.0.7/garf/executors/__init__.py → garf_executors-1.1.3/garf/executors/setup.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2025 Google LLC
+# Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,14 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Executors to fetch data from various APIs."""
+"""Bootstraps executor based on provided parameters."""
 from __future__ import annotations
 import importlib
 from garf.executors import executor, fetchers
-from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
+from garf.executors.api_executor import ApiQueryExecutor
 from garf.executors.telemetry import tracer
@@ -28,6 +28,7 @@ def setup_executor(
   fetcher_parameters: dict[str, str | int | bool],
   enable_cache: bool = False,
   cache_ttl_seconds: int = 3600,
+  simulate: bool = False,
 ) -> type[executor.Executor]:
   """Initializes executors based on a source and parameters."""
   if source == 'bq':
@@ -42,19 +43,16 @@ def setup_executor(
     )
   else:
     concrete_api_fetcher = fetchers.get_report_fetcher(source)
+    if simulate:
+      concrete_simulator = fetchers.get_report_simulator(source)()
+    else:
+      concrete_simulator = None
     query_executor = ApiQueryExecutor(
       fetcher=concrete_api_fetcher(
         **fetcher_parameters,
         enable_cache=enable_cache,
         cache_ttl_seconds=cache_ttl_seconds,
-      )
+      ),
+      report_simulator=concrete_simulator,
     )
   return query_executor
-__all__ = [
-  'ApiQueryExecutor',
-  'ApiExecutionContext',
-]
-__version__ = '1.0.7'

garf_executors-1.1.3/garf/executors/workflows/__init__.py ADDED Viewed

File without changes

garf_executors-1.1.3/garf/executors/workflows/gcp_workflow.yaml ADDED Viewed

@@ -0,0 +1,49 @@
+run:
+  for:
+    value: pair
+    in: ${pairs}
+    steps:
+      - log_source:
+          call: sys.log
+          args:
+            data: ${pair.alias}
+      - execute_queries:
+          parallel:
+            for:
+              value: query
+              in: ${pair.queries}
+              steps:
+                - log_query:
+                    call: sys.log
+                    args:
+                      data: ${pair}
+                - execute_single_query:
+                    try:
+                      call: http.post
+                      args:
+                        url: ${sys.get_env("GARF_ENDPOINT") + "/api/execute"}
+                        auth:
+                          type: OIDC
+                        body:
+                          source: ${pair.fetcher}
+                          # query_path: ${query.path}
+                          title: ${query.query.title}
+                          query: ${query.query.text}
+                          context:
+                            fetcher_parameters: ${pair.fetcher_parameters}
+                            writer: ${pair.writer}
+                            writer_parameters: ${pair.writer_parameters}
+                            query_parameters:
+                              macro: ${pair.query_parameters.macro}
+                              template: ${pair.query_parameters.template}
+                      result: task_resp
+                    except:
+                      as: e
+                      assign:
+                        - task_resp:
+                            status: "failed"
+                            error: ${e.message}
+                - log_result:
+                    call: sys.log
+                    args:
+                      data: ${task_resp}

{garf_executors-1.0.7/garf/executors → garf_executors-1.1.3/garf/executors/workflows}/workflow.py RENAMED Viewed

@@ -11,10 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Workflow specifies steps of end-to-end fetching and processing."""
 from __future__ import annotations
+import copy
 import os
 import pathlib
+import re
+from collections import defaultdict
+from typing import Any
 import pydantic
 import smart_open
@@ -37,6 +43,13 @@ class QueryPath(pydantic.BaseModel):
   """Path file with query."""
   path: str
+  prefix: str | None = None
+  @property
+  def full_path(self) -> str:
+    if self.prefix:
+      return re.sub('/$', '', self.prefix) + '/' + self.path
+    return self.path
 class QueryDefinition(pydantic.BaseModel):
@@ -65,11 +78,13 @@ class ExecutionStep(ExecutionContext):
     alias: Optional alias to identify execution step.
     queries: Queries to run for a particular fetcher.
     context: Execution context for queries and fetcher.
+    parallel_threshold: Max allowed parallelism for the queries in the step.
   """
   fetcher: str | None = None
   alias: str | None = pydantic.Field(default=None, pattern=r'^[a-zA-Z0-9_]+$')
   queries: list[QueryPath | QueryDefinition | QueryFolder] | None = None
+  parallel_threshold: int | None = None
   @property
   def context(self) -> ExecutionContext:
@@ -86,17 +101,41 @@ class Workflow(pydantic.BaseModel):
   Attributes:
     steps: Contains one or several fetcher executions.
+    context: Query and fetcher parameters to overwrite in steps.
   """
   steps: list[ExecutionStep]
+  context: ExecutionContext | None = None
+  def model_post_init(self, __context__) -> None:
+    if context := self.context:
+      custom_parameters = defaultdict(dict)
+      if custom_macros := context.query_parameters.macro:
+        custom_parameters['query_parameters']['macro'] = custom_macros
+      if custom_templates := context.query_parameters.template:
+        custom_parameters['query_parameters']['template'] = custom_templates
+      if custom_fetcher_parameters := context.fetcher_parameters:
+        custom_parameters['fetcher_parameters'] = custom_fetcher_parameters
+      if custom_parameters:
+        steps = self.steps
+        for i, step in enumerate(steps):
+          res = _merge_dicts(
+            step.model_dump(exclude_none=True), dict(custom_parameters)
+          )
+          steps[i] = ExecutionStep(**res)
   @classmethod
-  def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Workflow:
+  def from_file(
+    cls,
+    path: str | pathlib.Path | os.PathLike[str],
+    context: ExecutionContext | None = None,
+  ) -> Workflow:
     """Builds workflow from local or remote yaml file."""
     with smart_open.open(path, 'r', encoding='utf-8') as f:
       data = yaml.safe_load(f)
     try:
-      return Workflow(**data)
+      return Workflow(steps=data.get('steps'), context=context)
     except pydantic.ValidationError as e:
       raise GarfWorkflowError(f'Incorrect workflow:\n {e}') from e
@@ -107,3 +146,19 @@ class Workflow(pydantic.BaseModel):
         self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
       )
     return f'Workflow is saved to {str(path)}'
+def _merge_dicts(
+  dict1: dict[str, Any], dict2: dict[str, Any]
+) -> dict[str, Any]:
+  result = copy.deepcopy(dict1)
+  for key, value in dict2.items():
+    if (
+      key in result
+      and isinstance(result[key], dict)
+      and isinstance(value, dict)
+    ):
+      result[key] = _merge_dicts(result[key], value)
+    else:
+      result[key] = value
+  return result

garf_executors-1.1.3/garf/executors/workflows/workflow_runner.py ADDED Viewed

@@ -0,0 +1,172 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Runs garf workflow."""
+from __future__ import annotations
+import logging
+import pathlib
+import re
+from typing import Final
+import yaml
+from garf.executors import exceptions, setup
+from garf.executors.telemetry import tracer
+from garf.executors.workflows import workflow
+from garf.io import reader
+logger = logging.getLogger(__name__)
+_REMOTE_FILES_PATTERN: Final[str] = (
+  '^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)'
+)
+_SCRIPT_PATH = pathlib.Path(__file__).parent
+class WorkflowRunner:
+  """Runs garf workflow.
+  Attributes:
+    workflow: Workflow to execute.
+    wf_parent: Optional location of a workflow file.
+    parallel_threshold: Max allowed parallelism for the queries in the workflow.
+  """
+  def __init__(
+    self,
+    execution_workflow: workflow.Workflow,
+    wf_parent: pathlib.Path | str,
+    parallel_threshold: int = 10,
+  ) -> None:
+    """Initializes WorkflowRunner."""
+    self.workflow = execution_workflow
+    self.wf_parent = wf_parent
+    self.parallel_threshold = parallel_threshold
+  @classmethod
+  def from_file(
+    cls,
+    workflow_file: str | pathlib.Path,
+  ) -> WorkflowRunner:
+    """Initialized Workflow runner from a local or remote file."""
+    if isinstance(workflow_file, str):
+      workflow_file = pathlib.Path(workflow_file)
+    execution_workflow = workflow.Workflow.from_file(workflow_file)
+    return cls(
+      execution_workflow=execution_workflow, wf_parent=workflow_file.parent
+    )
+  def run(
+    self,
+    enable_cache: bool = False,
+    cache_ttl_seconds: int = 3600,
+    selected_aliases: list[str] | None = None,
+    skipped_aliases: list[str] | None = None,
+  ) -> list[str]:
+    skipped_aliases = skipped_aliases or []
+    selected_aliases = selected_aliases or []
+    reader_client = reader.create_reader('file')
+    execution_results = []
+    logger.info('Starting Garf Workflow...')
+    for i, step in enumerate(self.workflow.steps, 1):
+      step_name = f'{i}-{step.fetcher}'
+      if step.alias:
+        step_name = f'{step_name}-{step.alias}'
+      if step.alias in skipped_aliases:
+        logger.warning(
+          'Skipping step %d, fetcher: %s, alias: %s',
+          i,
+          step.fetcher,
+          step.alias,
+        )
+        continue
+      if selected_aliases and step.alias not in selected_aliases:
+        logger.warning(
+          'Skipping step %d, fetcher: %s, alias: %s',
+          i,
+          step.fetcher,
+          step.alias,
+        )
+        continue
+      with tracer.start_as_current_span(step_name):
+        logger.info(
+          'Running step %d, fetcher: %s, alias: %s', i, step.fetcher, step.alias
+        )
+        query_executor = setup.setup_executor(
+          source=step.fetcher,
+          fetcher_parameters=step.fetcher_parameters,
+          enable_cache=enable_cache,
+          cache_ttl_seconds=cache_ttl_seconds,
+        )
+        batch = {}
+        if not (queries := step.queries):
+          logger.error('Please provide one or more queries to run')
+          raise exceptions.GarfExecutorError(
+            'Please provide one or more queries to run'
+          )
+        for query in queries:
+          if isinstance(query, workflow.QueryPath):
+            query_path = query.full_path
+            if re.match(_REMOTE_FILES_PATTERN, query_path):
+              batch[query.path] = reader_client.read(query_path)
+            else:
+              if not query.prefix:
+                query_path = self.wf_parent / pathlib.Path(query.path)
+              if not query_path.exists():
+                raise workflow.GarfWorkflowError(
+                  f'Query: {query_path} not found'
+                )
+              batch[query.path] = reader_client.read(query_path)
+          elif isinstance(query, workflow.QueryFolder):
+            query_path = self.wf_parent / pathlib.Path(query.folder)
+            if not query_path.exists():
+              raise workflow.GarfWorkflowError(
+                f'Folder: {query_path} not found'
+              )
+            for p in query_path.rglob('*'):
+              if p.suffix == '.sql':
+                batch[p.stem] = reader_client.read(p)
+          else:
+            batch[query.query.title] = query.query.text
+        query_executor.execute_batch(
+          batch,
+          step.context,
+          step.parallel_threshold or self.parallel_threshold,
+        )
+        execution_results.append(step_name)
+    return execution_results
+  def compile(self, path: str | pathlib.Path) -> str:
+    """Saves workflow with expanded anchors."""
+    return self.workflow.save(path)
+  def deploy(self, path: str | pathlib.Path) -> str:
+    """Prepares workflow for deployment to Google Cloud Workflows."""
+    wf = self.workflow.model_dump(exclude_none=True).get('steps')
+    with open(_SCRIPT_PATH / 'gcp_workflow.yaml', 'r', encoding='utf-8') as f:
+      cloud_workflow_run_template = yaml.safe_load(f)
+    init = {
+      'init': {
+        'assign': [{'pairs': wf}],
+      },
+    }
+    cloud_workflow = {
+      'main': {
+        'params': [],
+        'steps': [init, cloud_workflow_run_template],
+      },
+    }
+    with open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(cloud_workflow, f, sort_keys=False)
+    return f'Workflow is saved to {path}'

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: garf-executors
-Version: 1.0.7
+Version: 1.1.3
 Summary: Executes queries against API and writes data to local/remote storage.
 Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
 License: Apache 2.0
@@ -36,6 +36,7 @@ Provides-Extra: gcp
 Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
 Provides-Extra: server
 Requires-Dist: fastapi[standard]; extra == "server"
+Requires-Dist: pydantic-settings; extra == "server"
 Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
 Requires-Dist: typer; extra == "server"
 Requires-Dist: grpcio-reflection; extra == "server"

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/SOURCES.txt RENAMED Viewed

@@ -11,15 +11,19 @@ garf/executors/fetchers.py
 garf/executors/garf_pb2.py
 garf/executors/garf_pb2_grpc.py
 garf/executors/query_processor.py
+garf/executors/setup.py
 garf/executors/sql_executor.py
 garf/executors/telemetry.py
-garf/executors/workflow.py
 garf/executors/entrypoints/__init__.py
 garf/executors/entrypoints/cli.py
 garf/executors/entrypoints/grpc_server.py
 garf/executors/entrypoints/server.py
 garf/executors/entrypoints/tracer.py
 garf/executors/entrypoints/utils.py
+garf/executors/workflows/__init__.py
+garf/executors/workflows/gcp_workflow.yaml
+garf/executors/workflows/workflow.py
+garf/executors/workflows/workflow_runner.py
 garf_executors/__init__.py
 garf_executors/api_executor.py
 garf_executors/bq_executor.py

{garf_executors-1.0.7 → garf_executors-1.1.3}/garf_executors.egg-info/requires.txt RENAMED Viewed

@@ -20,6 +20,7 @@ opentelemetry-exporter-gcp-trace
 [server]
 fastapi[standard]
+pydantic-settings
 opentelemetry-instrumentation-fastapi
 typer
 grpcio-reflection

{garf_executors-1.0.7 → garf_executors-1.1.3}/pyproject.toml RENAMED Viewed

@@ -55,6 +55,7 @@ gcp= [
 ]
 server=[
   "fastapi[standard]",
+  "pydantic-settings",
   "opentelemetry-instrumentation-fastapi",
   "typer",
   "grpcio-reflection",
@@ -68,6 +69,10 @@ tests = [
 all = [
   "garf-executors[bq,sql,server,gcp]"
 ]
+[tool.setuptools.package-data]
+"*"= ["gcp_workflow.yaml"]
 [project.scripts]
 garf="garf.executors.entrypoints.cli:main"