PyPI - garf-executors - Versions diffs - 0.0.6__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

garf-executors 0.0.6py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

garf_executors/__init__.py +40 -10
garf_executors/api_executor.py +28 -45
garf_executors/bq_executor.py +56 -19
garf_executors/config.py +51 -0
garf_executors/entrypoints/cli.py +66 -49
garf_executors/entrypoints/server.py +60 -17
garf_executors/entrypoints/tracer.py +42 -0
garf_executors/entrypoints/utils.py +32 -359
garf_executors/execution_context.py +83 -0
garf_executors/executor.py +87 -0
garf_executors/fetchers.py +54 -16
garf_executors/sql_executor.py +59 -22
garf_executors/telemetry.py +20 -0
{garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/METADATA +8 -3
garf_executors-0.1.4.dist-info/RECORD +20 -0
garf_executors-0.0.6.dist-info/RECORD +0 -15
{garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/WHEEL +0 -0
{garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/entry_points.txt +0 -0
{garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/top_level.txt +0 -0

garf_executors/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 Google LLC
+# Copyright 2025 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,20 +11,50 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Defines simplified import of executors.
-Instead of importing `garf_executors.api_executor.ApiQueryExecutor`
-import like this `garf_executors.ApiQueryExecutor`
-"""
+"""Executors to fetch data from various APIs."""
 from __future__ import annotations
-from garf_executors.api_executor import ApiQueryExecutor
-from garf_executors.fetchers import FETCHERS
+import importlib
+from garf_executors import executor, fetchers
+from garf_executors.api_executor import ApiExecutionContext, ApiQueryExecutor
+from garf_executors.telemetry import tracer
+@tracer.start_as_current_span('setup_executor')
+def setup_executor(
+  source: str,
+  fetcher_parameters: dict[str, str | int | bool],
+  enable_cache: bool = False,
+  cache_ttl_seconds: int = 3600,
+) -> type[executor.Executor]:
+  """Initializes executors based on a source and parameters."""
+  if source == 'bq':
+    bq_executor = importlib.import_module('garf_executors.bq_executor')
+    query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
+  elif source == 'sqldb':
+    sql_executor = importlib.import_module('garf_executors.sql_executor')
+    query_executor = (
+      sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
+        fetcher_parameters.get('connection_string')
+      )
+    )
+  else:
+    concrete_api_fetcher = fetchers.get_report_fetcher(source)
+    query_executor = ApiQueryExecutor(
+      concrete_api_fetcher(
+        **fetcher_parameters,
+        enable_cache=enable_cache,
+        cache_ttl_seconds=cache_ttl_seconds,
+      )
+    )
+  return query_executor
 __all__ = [
-  'FETCHERS',
   'ApiQueryExecutor',
+  'ApiExecutionContext',
 ]
-__version__ = '0.0.6'
+__version__ = '0.1.4'

garf_executors/api_executor.py CHANGED Viewed

@@ -20,50 +20,25 @@ GarfReport and saving it to local/remote storage.
 from __future__ import annotations
+import asyncio
 import logging
-import pydantic
+from garf_core import report_fetcher
+from opentelemetry import trace
-from garf_core import query_editor, report_fetcher
-from garf_executors import exceptions
-from garf_io import writer
-from garf_io.writers import abs_writer
+from garf_executors import exceptions, execution_context, executor, fetchers
+from garf_executors.telemetry import tracer
 logger = logging.getLogger(__name__)
-class ApiExecutionContext(pydantic.BaseModel):
-  """Common context for executing one or more queries.
+class ApiExecutionContext(execution_context.ExecutionContext):
+  """Common context for executing one or more queries."""
-  Attributes:
-    query_parameters: Parameters to dynamically change query text.
-    fetcher_parameters: Parameters to specify fetching setup.
-    writer: Type of writer to use.
-    writer_parameters: Optional parameters to setup writer.
-  """
-  query_parameters: query_editor.GarfQueryParameters | None = None
-  fetcher_parameters: dict[str, str] | None = None
   writer: str = 'console'
-  writer_parameters: dict[str, str] | None = None
-  def model_post_init(self, __context__) -> None:
-    if self.fetcher_parameters is None:
-      self.fetcher_parameters = {}
-    if self.writer_parameters is None:
-      self.writer_parameters = {}
-  @property
-  def writer_client(self) -> abs_writer.AbsWriter:
-    writer_client = writer.create_writer(self.writer, **self.writer_parameters)
-    if self.writer == 'bq':
-      _ = writer_client.create_or_get_dataset()
-    if self.writer == 'sheet':
-      writer_client.init_client()
-    return writer_client
-class ApiQueryExecutor:
+class ApiQueryExecutor(executor.Executor):
   """Gets data from API and writes them to local/remote storage.
   Attributes:
@@ -78,23 +53,22 @@ class ApiQueryExecutor:
     """
     self.fetcher = fetcher
-  async def aexecute(
-    self, query: str, context: ApiExecutionContext, **kwargs: str
-  ) -> None:
-    """Reads query, extract results and stores them in a specified location.
-    Args:
-      query: Location of the query.
-      context: Query execution context.
-    """
-    self.execute(query, context, **kwargs)
+  @classmethod
+  def from_fetcher_alias(
+    cls, source: str, fetcher_parameters: dict[str, str] | None = None
+  ) -> ApiQueryExecutor:
+    if not fetcher_parameters:
+      fetcher_parameters = {}
+    concrete_api_fetcher = fetchers.get_report_fetcher(source)
+    return ApiQueryExecutor(concrete_api_fetcher(**fetcher_parameters))
+  @tracer.start_as_current_span('api.execute')
   def execute(
     self,
     query: str,
     title: str,
     context: ApiExecutionContext,
-  ) -> None:
+  ) -> str:
     """Reads query, extract results and stores them in a specified location.
     Args:
@@ -102,10 +76,18 @@ class ApiQueryExecutor:
       title: Name of the query.
       context: Query execution context.
+    Returns:
+      Result of writing the report.
     Raises:
       GarfExecutorError: When failed to execute query.
     """
+    span = trace.get_current_span()
+    span.set_attribute('fetcher', self.fetcher.__class__.__name__)
+    span.set_attribute('api_client', self.fetcher.api_client.__class__.__name__)
     try:
+      span.set_attribute('query_title', title)
+      span.set_attribute('query_text', query)
       logger.debug('starting query %s', query)
       results = self.fetcher.fetch(
         query_specification=query,
@@ -118,13 +100,14 @@ class ApiQueryExecutor:
         title,
         type(writer_client),
       )
-      writer_client.write(results, title)
+      result = writer_client.write(results, title)
       logger.debug(
         'Finish writing data for query %s via %s writer',
         title,
         type(writer_client),
       )
       logger.info('%s executed successfully', title)
+      return result
     except Exception as e:
       logger.error('%s generated an exception: %s', title, str(e))
       raise exceptions.GarfExecutorError(

garf_executors/bq_executor.py CHANGED Viewed

@@ -11,10 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Module for executing queries in BigQuery."""
+"""Executes queries in BigQuery."""
 from __future__ import annotations
+import os
 try:
   from google.cloud import bigquery  # type: ignore
 except ImportError as e:
@@ -25,19 +27,20 @@ except ImportError as e:
 import logging
-import pandas as pd
+from garf_core import query_editor, report
 from google.cloud import exceptions as google_cloud_exceptions
-from garf_core import query_editor
+from garf_executors import exceptions, execution_context, executor
+from garf_executors.telemetry import tracer
 logger = logging.getLogger(__name__)
-class BigQueryExecutorError(Exception):
-  """Error when executor fails to run query."""
+class BigQueryExecutorError(exceptions.GarfExecutorError):
+  """Error when BigQueryExecutor fails to run query."""
-class BigQueryExecutor(query_editor.TemplateProcessorMixin):
+class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
   """Handles query execution in BigQuery.
   Attributes:
@@ -46,13 +49,22 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
       client: BigQuery client.
   """
-  def __init__(self, project_id: str, location: str | None = None) -> None:
+  def __init__(
+    self,
+    project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
+    location: str | None = None,
+  ) -> None:
     """Initializes BigQueryExecutor.
     Args:
         project_id: Google Cloud project id.
         location: BigQuery dataset location.
     """
+    if not project_id:
+      raise BigQueryExecutorError(
+        'project_id is required. Either provide it as project_id parameter '
+        'or GOOGLE_CLOUD_PROJECT env variable.'
+      )
     self.project_id = project_id
     self.location = location
@@ -61,30 +73,55 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
     """Instantiates bigquery client."""
     return bigquery.Client(self.project_id)
+  @tracer.start_as_current_span('bq.execute')
   def execute(
-    self, script_name: str, query_text: str, params: dict | None = None
-  ) -> pd.DataFrame:
+    self,
+    query: str,
+    title: str,
+    context: execution_context.ExecutionContext = (
+      execution_context.ExecutionContext()
+    ),
+  ) -> report.GarfReport:
     """Executes query in BigQuery.
     Args:
-        script_name: Script identifier.
-        query_text: Query to be executed.
-        params: Optional parameters to be replaced in query text.
+      query: Location of the query.
+      title: Name of the query.
+      context: Query execution context.
     Returns:
-        DataFrame if query returns some data otherwise empty DataFrame.
+      Report with data if query returns some data otherwise empty Report.
     """
-    query_text = self.replace_params_template(query_text, params)
+    query_text = self.replace_params_template(query, context.query_parameters)
+    self.create_datasets(context.query_parameters.macro)
     job = self.client.query(query_text)
     try:
       result = job.result()
-      logger.debug('%s launched successfully', script_name)
+      logger.debug('%s launched successfully', title)
       if result.total_rows:
-        return result.to_dataframe()
-      return pd.DataFrame()
+        results = report.GarfReport.from_pandas(result.to_dataframe())
+      else:
+        results = report.GarfReport()
+      if context.writer and results:
+        writer_client = context.writer_client
+        logger.debug(
+          'Start writing data for query %s via %s writer',
+          title,
+          type(writer_client),
+        )
+        writing_result = writer_client.write(results, title)
+        logger.debug(
+          'Finish writing data for query %s via %s writer',
+          title,
+          type(writer_client),
+        )
+        logger.info('%s executed successfully', title)
+        return writing_result
+      return results
     except google_cloud_exceptions.GoogleCloudError as e:
       raise BigQueryExecutorError(e) from e
+  @tracer.start_as_current_span('bq.create_datasets')
   def create_datasets(self, macros: dict | None) -> None:
     """Creates datasets in BQ based on values in a dict.
@@ -92,7 +129,7 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
     are treated as dataset names.
     Args:
-        macros: Mapping containing data for query execution.
+      macros: Mapping containing data for query execution.
     """
     if macros and (datasets := extract_datasets(macros)):
       for dataset in datasets:
@@ -103,7 +140,7 @@ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
           bq_dataset = bigquery.Dataset(dataset_id)
           bq_dataset.location = self.location
           self.client.create_dataset(bq_dataset, timeout=30)
-          logger.debug('Created new dataset %s', dataset_id)
+          logger.info('Created new dataset %s', dataset_id)
 def extract_datasets(macros: dict | None) -> list[str]:

garf_executors/config.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=C0330, g-bad-import-order, g-multiple-import
+"""Stores mapping between API aliases and their execution context."""
+from __future__ import annotations
+import os
+import pathlib
+import pydantic
+import smart_open
+import yaml
+from garf_executors.execution_context import ExecutionContext
+class Config(pydantic.BaseModel):
+  """Stores necessary parameters for one or multiple API sources.
+  Attributes:
+    source: Mapping between API source alias and execution parameters.
+  """
+  sources: dict[str, ExecutionContext]
+  @classmethod
+  def from_file(cls, path: str | pathlib.Path | os.PathLike[str]) -> Config:
+    """Builds config from local or remote yaml file."""
+    with smart_open.open(path, 'r', encoding='utf-8') as f:
+      data = yaml.safe_load(f)
+    return Config(sources=data)
+  def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
+    """Saves config to local or remote yaml file."""
+    with smart_open.open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(self.model_dump().get('sources'), f, encoding='utf-8')
+    return f'Config is saved to {str(path)}'

garf_executors/entrypoints/cli.py CHANGED Viewed

@@ -20,24 +20,31 @@ storage.
 from __future__ import annotations
 import argparse
+import logging
 import sys
-from concurrent import futures
+from garf_io import reader
 import garf_executors
-from garf_executors import exceptions
+from garf_executors import config, exceptions
 from garf_executors.entrypoints import utils
-from garf_io import reader
+from garf_executors.entrypoints.tracer import initialize_tracer
+from garf_executors.telemetry import tracer
+initialize_tracer()
+@tracer.start_as_current_span('garf.entrypoints.cli')
 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument('query', nargs='*')
-  parser.add_argument('-c', '--config', dest='garf_config', default=None)
+  parser.add_argument('-c', '--config', dest='config', default=None)
   parser.add_argument('--source', dest='source', default=None)
   parser.add_argument('--output', dest='output', default='console')
   parser.add_argument('--input', dest='input', default='file')
   parser.add_argument('--log', '--loglevel', dest='loglevel', default='info')
   parser.add_argument('--logger', dest='logger', default='local')
+  parser.add_argument('--log-name', dest='log_name', default='garf')
   parser.add_argument(
     '--parallel-queries', dest='parallel_queries', action='store_true'
   )
@@ -47,68 +54,78 @@ def main():
   parser.add_argument('--dry-run', dest='dry_run', action='store_true')
   parser.add_argument('-v', '--version', dest='version', action='store_true')
   parser.add_argument(
-    '--parallel-threshold', dest='parallel_threshold', default=None, type=int
+    '--parallel-threshold', dest='parallel_threshold', default=10, type=int
+  )
+  parser.add_argument(
+    '--enable-cache', dest='enable_cache', action='store_true'
+  )
+  parser.add_argument(
+    '--cache-ttl-seconds',
+    dest='cache_ttl_seconds',
+    default=3600,
+    type=int,
   )
   parser.set_defaults(parallel_queries=True)
+  parser.set_defaults(enable_cache=False)
   parser.set_defaults(dry_run=False)
   args, kwargs = parser.parse_known_args()
   if args.version:
     print(garf_executors.__version__)
     sys.exit()
-  if not (source := args.source):
-    raise exceptions.GarfExecutorError(
-      f'Select one of available sources: {list(garf_executors.FETCHERS.keys())}'
-    )
-  if not (concrete_api_fetcher := garf_executors.FETCHERS.get(source)):
-    raise exceptions.GarfExecutorError(f'Source {source} is not available.')
   logger = utils.init_logging(
-    loglevel=args.loglevel.upper(), logger_type=args.logger
+    loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
   )
   if not args.query:
     logger.error('Please provide one or more queries to run')
     raise exceptions.GarfExecutorError(
       'Please provide one or more queries to run'
     )
-  config = utils.ConfigBuilder('garf').build(vars(args), kwargs)
-  logger.debug('config: %s', config)
-  if config.params:
-    config = utils.initialize_runtime_parameters(config)
-  logger.debug('initialized config: %s', config)
-  extra_parameters = utils.ParamsParser(['source']).parse(kwargs)
-  source_parameters = extra_parameters.get('source', {})
   reader_client = reader.create_reader(args.input)
-  context = garf_executors.api_executor.ApiExecutionContext(
-    query_parameters=config.params,
-    writer=args.output,
-    writer_parameters=config.writer_params,
-    fetcher_parameters=source_parameters,
-  )
-  query_executor = garf_executors.api_executor.ApiQueryExecutor(
-    concrete_api_fetcher(**source_parameters)
-  )
-  if args.parallel_queries:
-    logger.info('Running queries in parallel')
-    with futures.ThreadPoolExecutor(args.parallel_threshold) as executor:
-      future_to_query = {
-        executor.submit(
-          query_executor.execute,
-          reader_client.read(query),
-          query,
-          context,
-        ): query
-        for query in args.query
-      }
-      for future in futures.as_completed(future_to_query):
-        future.result()
+  if config_file := args.config:
+    execution_config = config.Config.from_file(config_file)
+    if not (context := execution_config.sources.get(args.source)):
+      raise exceptions.GarfExecutorError(
+        f'No execution context found for source {args.source} in {config_file}'
+      )
+    query_executor = garf_executors.setup_executor(
+      source=args.source,
+      fetcher_parameters=context.fetcher_parameters,
+      enable_cache=args.enable_cache,
+      cache_ttl_seconds=args.cache_ttl_seconds,
+    )
+    batch = {query: reader_client.read(query) for query in args.query}
+    query_executor.execute_batch(batch, context, args.parallel_threshold)
   else:
-    logger.info('Running queries sequentially')
-    for query in args.query:
-      query_executor.execute(reader_client.read(query), query, context)
+    extra_parameters = utils.ParamsParser(
+      ['source', args.output, 'macro', 'template']
+    ).parse(kwargs)
+    source_parameters = extra_parameters.get('source', {})
+    context = garf_executors.api_executor.ApiExecutionContext(
+      query_parameters={
+        'macro': extra_parameters.get('macro'),
+        'template': extra_parameters.get('template'),
+      },
+      writer=args.output,
+      writer_parameters=extra_parameters.get(args.output),
+      fetcher_parameters=source_parameters,
+    )
+    query_executor = garf_executors.setup_executor(
+      source=args.source,
+      fetcher_parameters=context.fetcher_parameters,
+      enable_cache=args.enable_cache,
+      cache_ttl_seconds=args.cache_ttl_seconds,
+    )
+    if args.parallel_queries:
+      logger.info('Running queries in parallel')
+      batch = {query: reader_client.read(query) for query in args.query}
+      query_executor.execute_batch(batch, context, args.parallel_threshold)
+    else:
+      logger.info('Running queries sequentially')
+      for query in args.query:
+        query_executor.execute(reader_client.read(query), query, context)
+  logging.shutdown()
 if __name__ == '__main__':

garf_executors/entrypoints/server.py CHANGED Viewed

@@ -14,12 +14,21 @@
 """FastAPI endpoint for executing queries."""
+from typing import Optional, Union
 import fastapi
 import pydantic
 import uvicorn
+from garf_io import reader
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
 import garf_executors
 from garf_executors import exceptions
+from garf_executors.entrypoints.tracer import initialize_tracer
+initialize_tracer()
+app = fastapi.FastAPI()
+FastAPIInstrumentor.instrument_app(app)
 class ApiExecutorRequest(pydantic.BaseModel):
@@ -27,39 +36,73 @@ class ApiExecutorRequest(pydantic.BaseModel):
   Attributes:
     source: Type of API to interact with.
-    query: Query to execute.
     title: Name of the query used as an output for writing.
+    query: Query to execute.
+    query_path: Local or remote path to query.
     context: Execution context.
   """
   source: str
-  query: str
-  title: str
+  title: Optional[str] = None
+  query: Optional[str] = None
+  query_path: Optional[Union[str, list[str]]] = None
   context: garf_executors.api_executor.ApiExecutionContext
+  @pydantic.model_validator(mode='after')
+  def check_query_specified(self):
+    if not self.query_path and not self.query:
+      raise exceptions.GarfExecutorError(
+        'Missing one of required parameters: query, query_path'
+      )
+    return self
+  def model_post_init(self, __context__) -> None:
+    if self.query_path and isinstance(self.query_path, str):
+      self.query = reader.FileReader().read(self.query_path)
+    if not self.title:
+      self.title = str(self.query_path)
+class ApiExecutorResponse(pydantic.BaseModel):
+  """Response after executing a query.
+  Attributes:
+    results: Results of query execution.
+  """
+  results: list[str]
+@app.get('/api/version')
+async def version() -> str:
+  return garf_executors.__version__
-router = fastapi.APIRouter(prefix='/api')
+@app.get('/api/fetchers')
+async def get_fetchers() -> list[str]:
+  """Shows all available API sources."""
+  return list(garf_executors.fetchers.find_fetchers())
-@router.post('/execute')
-async def execute(request: ApiExecutorRequest) -> dict[str, str]:
-  if not (concrete_api_fetcher := garf_executors.FETCHERS.get(request.source)):
-    raise exceptions.GarfExecutorError(
-      f'Source {request.source} is not available.'
-    )
-  query_executor = garf_executors.api_executor.ApiQueryExecutor(
-    concrete_api_fetcher(**request.context.fetcher_parameters)
+@app.post('/api/execute')
+async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
+  query_executor = garf_executors.setup_executor(
+    request.source, request.context.fetcher_parameters
   )
+  result = query_executor.execute(request.query, request.title, request.context)
+  return ApiExecutorResponse(results=[result])
-  query_executor.execute(request.query, request.title, request.context)
-  return fastapi.responses.JSONResponse(
-    content=fastapi.encoders.jsonable_encoder({'result': 'success'})
+@app.post('/api/execute:batch')
+def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
+  query_executor = garf_executors.setup_executor(
+    request.source, request.context.fetcher_parameters
   )
+  reader_client = reader.FileReader()
+  batch = {query: reader_client.read(query) for query in request.query_path}
+  results = query_executor.execute_batch(batch, request.context)
+  return ApiExecutorResponse(results=results)
 if __name__ == '__main__':
-  app = fastapi.FastAPI()
-  app.include_router(router)
   uvicorn.run(app)

garf-executors 0.0.6__py3-none-any.whl → 0.1.4__py3-none-any.whl

garf-executors 0.0.6py3-none-any.whl → 0.1.4py3-none-any.whl