PyPI - garf-executors - Versions diffs - 0.2.3__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

garf-executors 0.2.3py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

garf/executors/__init__.py +25 -0
garf/executors/api_executor.py +228 -0
garf/executors/bq_executor.py +179 -0
garf/executors/config.py +52 -0
garf/executors/entrypoints/__init__.py +0 -0
garf/executors/entrypoints/cli.py +164 -0
{garf_executors → garf/executors}/entrypoints/grpc_server.py +22 -9
garf/executors/entrypoints/server.py +174 -0
garf/executors/entrypoints/tracer.py +82 -0
garf/executors/entrypoints/utils.py +140 -0
garf/executors/exceptions.py +17 -0
garf/executors/execution_context.py +117 -0
garf/executors/executor.py +124 -0
garf/executors/fetchers.py +128 -0
garf/executors/garf_pb2.py +51 -0
{garf_executors → garf/executors}/garf_pb2_grpc.py +45 -2
garf/executors/query_processor.py +79 -0
garf/executors/setup.py +58 -0
garf/executors/sql_executor.py +144 -0
garf/executors/telemetry.py +20 -0
garf/executors/workflows/__init__.py +0 -0
garf/executors/workflows/gcp_workflow.yaml +49 -0
garf/executors/workflows/workflow.py +164 -0
garf/executors/workflows/workflow_runner.py +172 -0
garf_executors/__init__.py +9 -44
garf_executors/api_executor.py +9 -121
garf_executors/bq_executor.py +9 -161
garf_executors/config.py +9 -37
garf_executors/entrypoints/__init__.py +25 -0
garf_executors/entrypoints/cli.py +9 -148
garf_executors/entrypoints/grcp_server.py +25 -0
garf_executors/entrypoints/server.py +9 -102
garf_executors/entrypoints/tracer.py +8 -40
garf_executors/entrypoints/utils.py +9 -124
garf_executors/exceptions.py +11 -3
garf_executors/execution_context.py +9 -100
garf_executors/executor.py +9 -108
garf_executors/fetchers.py +9 -63
garf_executors/sql_executor.py +9 -125
garf_executors/telemetry.py +10 -5
garf_executors/workflow.py +8 -79
{garf_executors-0.2.3.dist-info → garf_executors-1.1.3.dist-info}/METADATA +18 -5
garf_executors-1.1.3.dist-info/RECORD +46 -0
{garf_executors-0.2.3.dist-info → garf_executors-1.1.3.dist-info}/WHEEL +1 -1
garf_executors-1.1.3.dist-info/entry_points.txt +2 -0
{garf_executors-0.2.3.dist-info → garf_executors-1.1.3.dist-info}/top_level.txt +1 -0
garf_executors/garf_pb2.py +0 -45
garf_executors-0.2.3.dist-info/RECORD +0 -24
garf_executors-0.2.3.dist-info/entry_points.txt +0 -2

garf/executors/workflows/__init__.py ADDED Viewed

File without changes

garf/executors/workflows/gcp_workflow.yaml ADDED Viewed

@@ -0,0 +1,49 @@
+run:
+  for:
+    value: pair
+    in: ${pairs}
+    steps:
+      - log_source:
+          call: sys.log
+          args:
+            data: ${pair.alias}
+      - execute_queries:
+          parallel:
+            for:
+              value: query
+              in: ${pair.queries}
+              steps:
+                - log_query:
+                    call: sys.log
+                    args:
+                      data: ${pair}
+                - execute_single_query:
+                    try:
+                      call: http.post
+                      args:
+                        url: ${sys.get_env("GARF_ENDPOINT") + "/api/execute"}
+                        auth:
+                          type: OIDC
+                        body:
+                          source: ${pair.fetcher}
+                          # query_path: ${query.path}
+                          title: ${query.query.title}
+                          query: ${query.query.text}
+                          context:
+                            fetcher_parameters: ${pair.fetcher_parameters}
+                            writer: ${pair.writer}
+                            writer_parameters: ${pair.writer_parameters}
+                            query_parameters:
+                              macro: ${pair.query_parameters.macro}
+                              template: ${pair.query_parameters.template}
+                      result: task_resp
+                    except:
+                      as: e
+                      assign:
+                        - task_resp:
+                            status: "failed"
+                            error: ${e.message}
+                - log_result:
+                    call: sys.log
+                    args:
+                      data: ${task_resp}

garf/executors/workflows/workflow.py ADDED Viewed

@@ -0,0 +1,164 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Workflow specifies steps of end-to-end fetching and processing."""
+from __future__ import annotations
+import copy
+import os
+import pathlib
+import re
+from collections import defaultdict
+from typing import Any
+import pydantic
+import smart_open
+import yaml
+from garf.executors import exceptions
+from garf.executors.execution_context import ExecutionContext
+class GarfWorkflowError(exceptions.GarfExecutorError):
+  """Workflow specific exception."""
+class QueryFolder(pydantic.BaseModel):
+  """Path to folder with queries."""
+  folder: str
+class QueryPath(pydantic.BaseModel):
+  """Path file with query."""
+  path: str
+  prefix: str | None = None
+  @property
+  def full_path(self) -> str:
+    if self.prefix:
+      return re.sub('/$', '', self.prefix) + '/' + self.path
+    return self.path
+class QueryDefinition(pydantic.BaseModel):
+  """Definition of a query."""
+  query: Query
+class Query(pydantic.BaseModel):
+  """Query elements.
+  Attributes:
+    text: Query text.
+    title: Name of the query.
+  """
+  text: str
+  title: str
+class ExecutionStep(ExecutionContext):
+  """Common context for executing one or more queries.
+  Attributes:
+    fetcher: Name of a fetcher to get data from API.
+    alias: Optional alias to identify execution step.
+    queries: Queries to run for a particular fetcher.
+    context: Execution context for queries and fetcher.
+    parallel_threshold: Max allowed parallelism for the queries in the step.
+  """
+  fetcher: str | None = None
+  alias: str | None = pydantic.Field(default=None, pattern=r'^[a-zA-Z0-9_]+$')
+  queries: list[QueryPath | QueryDefinition | QueryFolder] | None = None
+  parallel_threshold: int | None = None
+  @property
+  def context(self) -> ExecutionContext:
+    return ExecutionContext(
+      writer=self.writer,
+      writer_parameters=self.writer_parameters,
+      query_parameters=self.query_parameters,
+      fetcher_parameters=self.fetcher_parameters,
+    )
+class Workflow(pydantic.BaseModel):
+  """Orchestrates execution of queries for multiple fetchers.
+  Attributes:
+    steps: Contains one or several fetcher executions.
+    context: Query and fetcher parameters to overwrite in steps.
+  """
+  steps: list[ExecutionStep]
+  context: ExecutionContext | None = None
+  def model_post_init(self, __context__) -> None:
+    if context := self.context:
+      custom_parameters = defaultdict(dict)
+      if custom_macros := context.query_parameters.macro:
+        custom_parameters['query_parameters']['macro'] = custom_macros
+      if custom_templates := context.query_parameters.template:
+        custom_parameters['query_parameters']['template'] = custom_templates
+      if custom_fetcher_parameters := context.fetcher_parameters:
+        custom_parameters['fetcher_parameters'] = custom_fetcher_parameters
+      if custom_parameters:
+        steps = self.steps
+        for i, step in enumerate(steps):
+          res = _merge_dicts(
+            step.model_dump(exclude_none=True), dict(custom_parameters)
+          )
+          steps[i] = ExecutionStep(**res)
+  @classmethod
+  def from_file(
+    cls,
+    path: str | pathlib.Path | os.PathLike[str],
+    context: ExecutionContext | None = None,
+  ) -> Workflow:
+    """Builds workflow from local or remote yaml file."""
+    with smart_open.open(path, 'r', encoding='utf-8') as f:
+      data = yaml.safe_load(f)
+    try:
+      return Workflow(steps=data.get('steps'), context=context)
+    except pydantic.ValidationError as e:
+      raise GarfWorkflowError(f'Incorrect workflow:\n {e}') from e
+  def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
+    """Saves workflow to local or remote yaml file."""
+    with smart_open.open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(
+        self.model_dump(exclude_none=True).get('steps'), f, encoding='utf-8'
+      )
+    return f'Workflow is saved to {str(path)}'
+def _merge_dicts(
+  dict1: dict[str, Any], dict2: dict[str, Any]
+) -> dict[str, Any]:
+  result = copy.deepcopy(dict1)
+  for key, value in dict2.items():
+    if (
+      key in result
+      and isinstance(result[key], dict)
+      and isinstance(value, dict)
+    ):
+      result[key] = _merge_dicts(result[key], value)
+    else:
+      result[key] = value
+  return result

garf/executors/workflows/workflow_runner.py ADDED Viewed

@@ -0,0 +1,172 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Runs garf workflow."""
+from __future__ import annotations
+import logging
+import pathlib
+import re
+from typing import Final
+import yaml
+from garf.executors import exceptions, setup
+from garf.executors.telemetry import tracer
+from garf.executors.workflows import workflow
+from garf.io import reader
+logger = logging.getLogger(__name__)
+_REMOTE_FILES_PATTERN: Final[str] = (
+  '^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)'
+)
+_SCRIPT_PATH = pathlib.Path(__file__).parent
+class WorkflowRunner:
+  """Runs garf workflow.
+  Attributes:
+    workflow: Workflow to execute.
+    wf_parent: Optional location of a workflow file.
+    parallel_threshold: Max allowed parallelism for the queries in the workflow.
+  """
+  def __init__(
+    self,
+    execution_workflow: workflow.Workflow,
+    wf_parent: pathlib.Path | str,
+    parallel_threshold: int = 10,
+  ) -> None:
+    """Initializes WorkflowRunner."""
+    self.workflow = execution_workflow
+    self.wf_parent = wf_parent
+    self.parallel_threshold = parallel_threshold
+  @classmethod
+  def from_file(
+    cls,
+    workflow_file: str | pathlib.Path,
+  ) -> WorkflowRunner:
+    """Initialized Workflow runner from a local or remote file."""
+    if isinstance(workflow_file, str):
+      workflow_file = pathlib.Path(workflow_file)
+    execution_workflow = workflow.Workflow.from_file(workflow_file)
+    return cls(
+      execution_workflow=execution_workflow, wf_parent=workflow_file.parent
+    )
+  def run(
+    self,
+    enable_cache: bool = False,
+    cache_ttl_seconds: int = 3600,
+    selected_aliases: list[str] | None = None,
+    skipped_aliases: list[str] | None = None,
+  ) -> list[str]:
+    skipped_aliases = skipped_aliases or []
+    selected_aliases = selected_aliases or []
+    reader_client = reader.create_reader('file')
+    execution_results = []
+    logger.info('Starting Garf Workflow...')
+    for i, step in enumerate(self.workflow.steps, 1):
+      step_name = f'{i}-{step.fetcher}'
+      if step.alias:
+        step_name = f'{step_name}-{step.alias}'
+      if step.alias in skipped_aliases:
+        logger.warning(
+          'Skipping step %d, fetcher: %s, alias: %s',
+          i,
+          step.fetcher,
+          step.alias,
+        )
+        continue
+      if selected_aliases and step.alias not in selected_aliases:
+        logger.warning(
+          'Skipping step %d, fetcher: %s, alias: %s',
+          i,
+          step.fetcher,
+          step.alias,
+        )
+        continue
+      with tracer.start_as_current_span(step_name):
+        logger.info(
+          'Running step %d, fetcher: %s, alias: %s', i, step.fetcher, step.alias
+        )
+        query_executor = setup.setup_executor(
+          source=step.fetcher,
+          fetcher_parameters=step.fetcher_parameters,
+          enable_cache=enable_cache,
+          cache_ttl_seconds=cache_ttl_seconds,
+        )
+        batch = {}
+        if not (queries := step.queries):
+          logger.error('Please provide one or more queries to run')
+          raise exceptions.GarfExecutorError(
+            'Please provide one or more queries to run'
+          )
+        for query in queries:
+          if isinstance(query, workflow.QueryPath):
+            query_path = query.full_path
+            if re.match(_REMOTE_FILES_PATTERN, query_path):
+              batch[query.path] = reader_client.read(query_path)
+            else:
+              if not query.prefix:
+                query_path = self.wf_parent / pathlib.Path(query.path)
+              if not query_path.exists():
+                raise workflow.GarfWorkflowError(
+                  f'Query: {query_path} not found'
+                )
+              batch[query.path] = reader_client.read(query_path)
+          elif isinstance(query, workflow.QueryFolder):
+            query_path = self.wf_parent / pathlib.Path(query.folder)
+            if not query_path.exists():
+              raise workflow.GarfWorkflowError(
+                f'Folder: {query_path} not found'
+              )
+            for p in query_path.rglob('*'):
+              if p.suffix == '.sql':
+                batch[p.stem] = reader_client.read(p)
+          else:
+            batch[query.query.title] = query.query.text
+        query_executor.execute_batch(
+          batch,
+          step.context,
+          step.parallel_threshold or self.parallel_threshold,
+        )
+        execution_results.append(step_name)
+    return execution_results
+  def compile(self, path: str | pathlib.Path) -> str:
+    """Saves workflow with expanded anchors."""
+    return self.workflow.save(path)
+  def deploy(self, path: str | pathlib.Path) -> str:
+    """Prepares workflow for deployment to Google Cloud Workflows."""
+    wf = self.workflow.model_dump(exclude_none=True).get('steps')
+    with open(_SCRIPT_PATH / 'gcp_workflow.yaml', 'r', encoding='utf-8') as f:
+      cloud_workflow_run_template = yaml.safe_load(f)
+    init = {
+      'init': {
+        'assign': [{'pairs': wf}],
+      },
+    }
+    cloud_workflow = {
+      'main': {
+        'params': [],
+        'steps': [init, cloud_workflow_run_template],
+      },
+    }
+    with open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(cloud_workflow, f, sort_keys=False)
+    return f'Workflow is saved to {path}'

garf_executors/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2025 Google LLC
+# Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,50 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Executors to fetch data from various APIs."""
-from __future__ import annotations
-import importlib
+import warnings
-from garf_executors import executor, fetchers
-from garf_executors.api_executor import ApiExecutionContext, ApiQueryExecutor
-from garf_executors.telemetry import tracer
+from garf.executors import *
-@tracer.start_as_current_span('setup_executor')
-def setup_executor(
-  source: str,
-  fetcher_parameters: dict[str, str | int | bool],
-  enable_cache: bool = False,
-  cache_ttl_seconds: int = 3600,
-) -> type[executor.Executor]:
-  """Initializes executors based on a source and parameters."""
-  if source == 'bq':
-    bq_executor = importlib.import_module('garf_executors.bq_executor')
-    query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
-  elif source == 'sqldb':
-    sql_executor = importlib.import_module('garf_executors.sql_executor')
-    query_executor = (
-      sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
-        fetcher_parameters.get('connection_string')
-      )
-    )
-  else:
-    concrete_api_fetcher = fetchers.get_report_fetcher(source)
-    query_executor = ApiQueryExecutor(
-      fetcher=concrete_api_fetcher(
-        **fetcher_parameters,
-        enable_cache=enable_cache,
-        cache_ttl_seconds=cache_ttl_seconds,
-      )
-    )
-  return query_executor
-__all__ = [
-  'ApiQueryExecutor',
-  'ApiExecutionContext',
-]
-__version__ = '0.2.3'
+warnings.warn(
+  "The 'garf_executors' namespace is deprecated. "
+  "Please use 'garf.executors' instead.",
+  DeprecationWarning,
+  stacklevel=2,
+)

garf_executors/api_executor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 Google LLC
+# Copyright 2026 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,127 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Module for executing Garf queries and writing them to local/remote.
-ApiQueryExecutor performs fetching data from API in a form of
-GarfReport and saving it to local/remote storage.
-"""
-# pylint: disable=C0330, g-bad-import-order, g-multiple-import
-from __future__ import annotations
+import warnings
-import logging
+from garf.executors.api_executor import *
-from garf_core import report_fetcher
-from opentelemetry import trace
-from garf_executors import exceptions, execution_context, executor, fetchers
-from garf_executors.telemetry import tracer
-logger = logging.getLogger(__name__)
-class ApiExecutionContext(execution_context.ExecutionContext):
-  """Common context for executing one or more queries."""
-  writer: str | list[str] = 'console'
-class ApiQueryExecutor(executor.Executor):
-  """Gets data from API and writes them to local/remote storage.
-  Attributes:
-      api_client: a client used for connecting to API.
-  """
-  def __init__(self, fetcher: report_fetcher.ApiReportFetcher) -> None:
-    """Initializes ApiQueryExecutor.
-    Args:
-        fetcher: Instantiated report fetcher.
-    """
-    self.fetcher = fetcher
-    super().__init__(
-      preprocessors=self.fetcher.preprocessors,
-      postprocessors=self.fetcher.postprocessors,
-    )
-  @classmethod
-  def from_fetcher_alias(
-    cls,
-    source: str,
-    fetcher_parameters: dict[str, str] | None = None,
-    enable_cache: bool = False,
-    cache_ttl_seconds: int = 3600,
-  ) -> ApiQueryExecutor:
-    if not fetcher_parameters:
-      fetcher_parameters = {}
-    concrete_api_fetcher = fetchers.get_report_fetcher(source)
-    return ApiQueryExecutor(
-      fetcher=concrete_api_fetcher(
-        **fetcher_parameters,
-        enable_cache=enable_cache,
-        cache_ttl_seconds=cache_ttl_seconds,
-      )
-    )
-  @tracer.start_as_current_span('api.execute')
-  def execute(
-    self,
-    query: str,
-    title: str,
-    context: ApiExecutionContext,
-  ) -> str:
-    """Reads query, extract results and stores them in a specified location.
-    Args:
-      query: Location of the query.
-      title: Name of the query.
-      context: Query execution context.
-    Returns:
-      Result of writing the report.
-    Raises:
-      GarfExecutorError: When failed to execute query.
-    """
-    span = trace.get_current_span()
-    span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
-    span.set_attribute(
-      'api.client.class', self.fetcher.api_client.__class__.__name__
-    )
-    try:
-      span.set_attribute('query.title', title)
-      span.set_attribute('query.text', query)
-      logger.debug('starting query %s', query)
-      results = self.fetcher.fetch(
-        query_specification=query,
-        args=context.query_parameters,
-        **context.fetcher_parameters,
-      )
-      writer_clients = context.writer_clients
-      if not writer_clients:
-        logger.warning('No writers configured, skipping write operation')
-        return None
-      writing_results = []
-      for writer_client in writer_clients:
-        logger.debug(
-          'Start writing data for query %s via %s writer',
-          title,
-          type(writer_client),
-        )
-        result = writer_client.write(results, title)
-        logger.debug(
-          'Finish writing data for query %s via %s writer',
-          title,
-          type(writer_client),
-        )
-        writing_results.append(result)
-      logger.info('%s executed successfully', title)
-      # Return the last writer's result for backward compatibility
-      return writing_results[-1] if writing_results else None
-    except Exception as e:
-      logger.error('%s generated an exception: %s', title, str(e))
-      raise exceptions.GarfExecutorError(
-        '%s generated an exception: %s', title, str(e)
-      ) from e
+warnings.warn(
+  "The 'garf_executors' namespace is deprecated. "
+  "Please use 'garf.executors' instead.",
+  DeprecationWarning,
+  stacklevel=2,
+)

garf-executors 0.2.3__py3-none-any.whl → 1.1.3__py3-none-any.whl

garf-executors 0.2.3py3-none-any.whl → 1.1.3py3-none-any.whl