PyPI - garf-executors - Versions diffs - 0.2.2__tar.gz - Mend

garf-executors 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of garf-executors might be problematic. Click here for more details.

Files changed (29) hide show

garf_executors-0.2.2/PKG-INFO +72 -0
garf_executors-0.2.2/README.md +32 -0
garf_executors-0.2.2/garf_executors/__init__.py +60 -0
garf_executors-0.2.2/garf_executors/api_executor.py +137 -0
garf_executors-0.2.2/garf_executors/bq_executor.py +177 -0
garf_executors-0.2.2/garf_executors/config.py +53 -0
garf_executors-0.2.2/garf_executors/entrypoints/__init__.py +0 -0
garf_executors-0.2.2/garf_executors/entrypoints/cli.py +164 -0
garf_executors-0.2.2/garf_executors/entrypoints/grpc_server.py +68 -0
garf_executors-0.2.2/garf_executors/entrypoints/server.py +118 -0
garf_executors-0.2.2/garf_executors/entrypoints/tracer.py +42 -0
garf_executors-0.2.2/garf_executors/entrypoints/utils.py +140 -0
garf_executors-0.2.2/garf_executors/exceptions.py +17 -0
garf_executors-0.2.2/garf_executors/execution_context.py +116 -0
garf_executors-0.2.2/garf_executors/executor.py +124 -0
garf_executors-0.2.2/garf_executors/fetchers.py +79 -0
garf_executors-0.2.2/garf_executors/garf_pb2.py +45 -0
garf_executors-0.2.2/garf_executors/garf_pb2_grpc.py +97 -0
garf_executors-0.2.2/garf_executors/sql_executor.py +141 -0
garf_executors-0.2.2/garf_executors/telemetry.py +20 -0
garf_executors-0.2.2/garf_executors/workflow.py +96 -0
garf_executors-0.2.2/garf_executors.egg-info/PKG-INFO +72 -0
garf_executors-0.2.2/garf_executors.egg-info/SOURCES.txt +27 -0
garf_executors-0.2.2/garf_executors.egg-info/dependency_links.txt +1 -0
garf_executors-0.2.2/garf_executors.egg-info/entry_points.txt +2 -0
garf_executors-0.2.2/garf_executors.egg-info/requires.txt +24 -0
garf_executors-0.2.2/garf_executors.egg-info/top_level.txt +1 -0
garf_executors-0.2.2/pyproject.toml +61 -0
garf_executors-0.2.2/setup.cfg +4 -0

garf_executors-0.2.2/PKG-INFO ADDED Viewed

@@ -0,0 +1,72 @@
+Metadata-Version: 2.4
+Name: garf-executors
+Version: 0.2.2
+Summary: Executes queries against API and writes data to local/remote storage.
+Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
+License: Apache 2.0
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Operating System :: OS Independent
+Classifier: License :: OSI Approved :: Apache Software License
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: garf-core
+Requires-Dist: garf-io
+Requires-Dist: pyyaml
+Requires-Dist: pydantic
+Requires-Dist: opentelemetry-api
+Requires-Dist: opentelemetry-sdk
+Requires-Dist: opentelemetry-exporter-otlp
+Provides-Extra: bq
+Requires-Dist: garf-io[bq]; extra == "bq"
+Requires-Dist: pandas; extra == "bq"
+Requires-Dist: google-cloud-logging; extra == "bq"
+Provides-Extra: sql
+Requires-Dist: garf-io[sqlalchemy]; extra == "sql"
+Requires-Dist: pandas; extra == "sql"
+Provides-Extra: server
+Requires-Dist: fastapi[standard]; extra == "server"
+Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
+Requires-Dist: typer; extra == "server"
+Provides-Extra: all
+Requires-Dist: garf-executors[bq,server,sql]; extra == "all"
+# `garf-executors` - One stop-shop for interacting with Reporting APIs.
+`garf-executors` is responsible for orchestrating process of fetching from API and storing data in a storage.
+Currently the following executors are supports:
+* `ApiExecutor` - fetching data from reporting API and saves it to a requested destination.
+* `BigQueryExecutor` - executes SQL code in BigQuery.
+* `SqlExecutor` - executes SQL code in a SqlAlchemy supported DB.
+## Installation
+`pip install garf-executors`
+## Usage
+After `garf-executors` is installed you can use `garf` utility to perform fetching.
+```
+garf <QUERIES> --source <API_SOURCE> \
+  --output <OUTPUT_TYPE> \
+  --source.params1=<VALUE>
+```
+where
+* `<QUERIES>`- local or remote path(s) to files with queries.
+* `<API_SOURCE>`- type of API to use. Based on that the appropriate report fetcher will be initialized.
+* `<OUTPUT_TYPE>` - output supported by [`garf-io` library](../garf_io/README.md).
+If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
+> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.

garf_executors-0.2.2/README.md ADDED Viewed

@@ -0,0 +1,32 @@
+# `garf-executors` - One stop-shop for interacting with Reporting APIs.
+`garf-executors` is responsible for orchestrating process of fetching from API and storing data in a storage.
+Currently the following executors are supports:
+* `ApiExecutor` - fetching data from reporting API and saves it to a requested destination.
+* `BigQueryExecutor` - executes SQL code in BigQuery.
+* `SqlExecutor` - executes SQL code in a SqlAlchemy supported DB.
+## Installation
+`pip install garf-executors`
+## Usage
+After `garf-executors` is installed you can use `garf` utility to perform fetching.
+```
+garf <QUERIES> --source <API_SOURCE> \
+  --output <OUTPUT_TYPE> \
+  --source.params1=<VALUE>
+```
+where
+* `<QUERIES>`- local or remote path(s) to files with queries.
+* `<API_SOURCE>`- type of API to use. Based on that the appropriate report fetcher will be initialized.
+* `<OUTPUT_TYPE>` - output supported by [`garf-io` library](../garf_io/README.md).
+If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
+> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.

garf_executors-0.2.2/garf_executors/__init__.py ADDED Viewed

@@ -0,0 +1,60 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Executors to fetch data from various APIs."""
+from __future__ import annotations
+import importlib
+from garf_executors import executor, fetchers
+from garf_executors.api_executor import ApiExecutionContext, ApiQueryExecutor
+from garf_executors.telemetry import tracer
+@tracer.start_as_current_span('setup_executor')
+def setup_executor(
+  source: str,
+  fetcher_parameters: dict[str, str | int | bool],
+  enable_cache: bool = False,
+  cache_ttl_seconds: int = 3600,
+) -> type[executor.Executor]:
+  """Initializes executors based on a source and parameters."""
+  if source == 'bq':
+    bq_executor = importlib.import_module('garf_executors.bq_executor')
+    query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
+  elif source == 'sqldb':
+    sql_executor = importlib.import_module('garf_executors.sql_executor')
+    query_executor = (
+      sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
+        fetcher_parameters.get('connection_string')
+      )
+    )
+  else:
+    concrete_api_fetcher = fetchers.get_report_fetcher(source)
+    query_executor = ApiQueryExecutor(
+      fetcher=concrete_api_fetcher(
+        **fetcher_parameters,
+        enable_cache=enable_cache,
+        cache_ttl_seconds=cache_ttl_seconds,
+      )
+    )
+  return query_executor
+__all__ = [
+  'ApiQueryExecutor',
+  'ApiExecutionContext',
+]
+__version__ = '0.2.2'

garf_executors-0.2.2/garf_executors/api_executor.py ADDED Viewed

@@ -0,0 +1,137 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module for executing Garf queries and writing them to local/remote.
+ApiQueryExecutor performs fetching data from API in a form of
+GarfReport and saving it to local/remote storage.
+"""
+# pylint: disable=C0330, g-bad-import-order, g-multiple-import
+from __future__ import annotations
+import logging
+from garf_core import report_fetcher
+from opentelemetry import trace
+from garf_executors import exceptions, execution_context, executor, fetchers
+from garf_executors.telemetry import tracer
+logger = logging.getLogger(__name__)
+class ApiExecutionContext(execution_context.ExecutionContext):
+  """Common context for executing one or more queries."""
+  writer: str | list[str] = 'console'
+class ApiQueryExecutor(executor.Executor):
+  """Gets data from API and writes them to local/remote storage.
+  Attributes:
+      api_client: a client used for connecting to API.
+  """
+  def __init__(self, fetcher: report_fetcher.ApiReportFetcher) -> None:
+    """Initializes ApiQueryExecutor.
+    Args:
+        fetcher: Instantiated report fetcher.
+    """
+    self.fetcher = fetcher
+    super().__init__(
+      preprocessors=self.fetcher.preprocessors,
+      postprocessors=self.fetcher.postprocessors,
+    )
+  @classmethod
+  def from_fetcher_alias(
+    cls,
+    source: str,
+    fetcher_parameters: dict[str, str] | None = None,
+    enable_cache: bool = False,
+    cache_ttl_seconds: int = 3600,
+  ) -> ApiQueryExecutor:
+    if not fetcher_parameters:
+      fetcher_parameters = {}
+    concrete_api_fetcher = fetchers.get_report_fetcher(source)
+    return ApiQueryExecutor(
+      fetcher=concrete_api_fetcher(
+        **fetcher_parameters,
+        enable_cache=enable_cache,
+        cache_ttl_seconds=cache_ttl_seconds,
+      )
+    )
+  @tracer.start_as_current_span('api.execute')
+  def execute(
+    self,
+    query: str,
+    title: str,
+    context: ApiExecutionContext,
+  ) -> str:
+    """Reads query, extract results and stores them in a specified location.
+    Args:
+      query: Location of the query.
+      title: Name of the query.
+      context: Query execution context.
+    Returns:
+      Result of writing the report.
+    Raises:
+      GarfExecutorError: When failed to execute query.
+    """
+    span = trace.get_current_span()
+    span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
+    span.set_attribute(
+      'api.client.class', self.fetcher.api_client.__class__.__name__
+    )
+    try:
+      span.set_attribute('query.title', title)
+      span.set_attribute('query.text', query)
+      logger.debug('starting query %s', query)
+      results = self.fetcher.fetch(
+        query_specification=query,
+        args=context.query_parameters,
+        **context.fetcher_parameters,
+      )
+      writer_clients = context.writer_clients
+      if not writer_clients:
+        logger.warning('No writers configured, skipping write operation')
+        return None
+      writing_results = []
+      for writer_client in writer_clients:
+        logger.debug(
+          'Start writing data for query %s via %s writer',
+          title,
+          type(writer_client),
+        )
+        result = writer_client.write(results, title)
+        logger.debug(
+          'Finish writing data for query %s via %s writer',
+          title,
+          type(writer_client),
+        )
+        writing_results.append(result)
+      logger.info('%s executed successfully', title)
+      # Return the last writer's result for backward compatibility
+      return writing_results[-1] if writing_results else None
+    except Exception as e:
+      logger.error('%s generated an exception: %s', title, str(e))
+      raise exceptions.GarfExecutorError(
+        '%s generated an exception: %s', title, str(e)
+      ) from e

garf_executors-0.2.2/garf_executors/bq_executor.py ADDED Viewed

@@ -0,0 +1,177 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Executes queries in BigQuery."""
+from __future__ import annotations
+import contextlib
+import os
+try:
+  from google.cloud import bigquery  # type: ignore
+except ImportError as e:
+  raise ImportError(
+    'Please install garf-executors with BigQuery support '
+    '- `pip install garf-executors[bq]`'
+  ) from e
+import logging
+from garf_core import query_editor, report
+from google.cloud import exceptions as google_cloud_exceptions
+from opentelemetry import trace
+from garf_executors import exceptions, execution_context, executor
+from garf_executors.telemetry import tracer
+logger = logging.getLogger(__name__)
+class BigQueryExecutorError(exceptions.GarfExecutorError):
+  """Error when BigQueryExecutor fails to run query."""
+class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
+  """Handles query execution in BigQuery.
+  Attributes:
+      project_id: Google Cloud project id.
+      location: BigQuery dataset location.
+      client: BigQuery client.
+  """
+  def __init__(
+    self,
+    project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
+    location: str | None = None,
+  ) -> None:
+    """Initializes BigQueryExecutor.
+    Args:
+        project_id: Google Cloud project id.
+        location: BigQuery dataset location.
+    """
+    if not project_id:
+      raise BigQueryExecutorError(
+        'project_id is required. Either provide it as project_id parameter '
+        'or GOOGLE_CLOUD_PROJECT env variable.'
+      )
+    self.project_id = project_id
+    self.location = location
+    super().__init__()
+  @property
+  def client(self) -> bigquery.Client:
+    """Instantiates bigquery client."""
+    return bigquery.Client(self.project_id)
+  @tracer.start_as_current_span('bq.execute')
+  def execute(
+    self,
+    query: str,
+    title: str,
+    context: execution_context.ExecutionContext = (
+      execution_context.ExecutionContext()
+    ),
+  ) -> report.GarfReport:
+    """Executes query in BigQuery.
+    Args:
+      query: Location of the query.
+      title: Name of the query.
+      context: Query execution context.
+    Returns:
+      Report with data if query returns some data otherwise empty Report.
+    """
+    span = trace.get_current_span()
+    logger.info('Executing script: %s', title)
+    query_text = self.replace_params_template(query, context.query_parameters)
+    self.create_datasets(context.query_parameters.macro)
+    job = self.client.query(query_text)
+    try:
+      result = job.result()
+    except google_cloud_exceptions.GoogleCloudError as e:
+      raise BigQueryExecutorError(
+        f'Failed to execute query {title}: Reason: {e}'
+      ) from e
+      logger.debug('%s launched successfully', title)
+    if result.total_rows:
+      results = report.GarfReport.from_pandas(result.to_dataframe())
+    else:
+      results = report.GarfReport()
+    if context.writer and results:
+      writer_clients = context.writer_clients
+      if not writer_clients:
+        logger.warning('No writers configured, skipping write operation')
+      else:
+        writing_results = []
+        for writer_client in writer_clients:
+          logger.debug(
+            'Start writing data for query %s via %s writer',
+            title,
+            type(writer_client),
+          )
+          writing_result = writer_client.write(results, title)
+          logger.debug(
+            'Finish writing data for query %s via %s writer',
+            title,
+            type(writer_client),
+          )
+          writing_results.append(writing_result)
+        # Return the last writer's result for backward compatibility
+        logger.info('%s executed successfully', title)
+        return writing_results[-1] if writing_results else None
+    logger.info('%s executed successfully', title)
+    span.set_attribute('execute.num_results', len(results))
+    return results
+  @tracer.start_as_current_span('bq.create_datasets')
+  def create_datasets(self, macros: dict | None) -> None:
+    """Creates datasets in BQ based on values in a dict.
+    If dict contains keys with 'dataset' in them, then values for such keys
+    are treated as dataset names.
+    Args:
+      macros: Mapping containing data for query execution.
+    """
+    if macros and (datasets := extract_datasets(macros)):
+      for dataset in datasets:
+        dataset_id = f'{self.project_id}.{dataset}'
+        try:
+          self.client.get_dataset(dataset_id)
+        except google_cloud_exceptions.NotFound:
+          bq_dataset = bigquery.Dataset(dataset_id)
+          bq_dataset.location = self.location
+          with contextlib.suppress(google_cloud_exceptions.Conflict):
+            self.client.create_dataset(bq_dataset, timeout=30)
+            logger.info('Created new dataset %s', dataset_id)
+def extract_datasets(macros: dict | None) -> list[str]:
+  """Finds dataset-related keys based on values in a dict.
+  If dict contains keys with 'dataset' in them, then values for such keys
+  are treated as dataset names.
+  Args:
+      macros: Mapping containing data for query execution.
+  Returns:
+      Possible names of datasets.
+  """
+  if not macros:
+    return []
+  return [value for macro, value in macros.items() if 'dataset' in macro]

garf_executors-0.2.2/garf_executors/config.py ADDED Viewed

@@ -0,0 +1,53 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=C0330, g-bad-import-order, g-multiple-import
+"""Stores mapping between API aliases and their execution context."""
+from __future__ import annotations
+import os
+import pathlib
+import pydantic
+import smart_open
+import yaml
+from garf_executors.execution_context import ExecutionContext
+class Config(pydantic.BaseModel):
+  """Stores necessary parameters for one or multiple API sources.
+  Attributes:
+    source: Mapping between API source alias and execution parameters.
+  """
+  sources: dict[str, ExecutionContext]
+  @classmethod
+  def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Config:
+    """Builds config from local or remote yaml file."""
+    with smart_open.open(path, 'r', encoding='utf-8') as f:
+      data = yaml.safe_load(f)
+    return Config(sources=data)
+  def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
+    """Saves config to local or remote yaml file."""
+    with smart_open.open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(
+        self.model_dump(exclude_none=True).get('sources'), f, encoding='utf-8'
+      )
+    return f'Config is saved to {str(path)}'

garf_executors-0.2.2/garf_executors/entrypoints/__init__.py ADDED Viewed

File without changes