PyPI - garf-executors - Versions diffs - 0.2.3__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

garf-executors 0.2.3py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

garf/executors/__init__.py +60 -0
garf/executors/api_executor.py +143 -0
garf/executors/bq_executor.py +177 -0
garf/executors/config.py +52 -0
garf/executors/entrypoints/__init__.py +0 -0
garf/executors/entrypoints/cli.py +177 -0
{garf_executors → garf/executors}/entrypoints/grpc_server.py +5 -6
garf/executors/entrypoints/server.py +117 -0
garf/executors/entrypoints/tracer.py +57 -0
garf/executors/entrypoints/utils.py +140 -0
garf/executors/exceptions.py +17 -0
garf/executors/execution_context.py +117 -0
garf/executors/executor.py +124 -0
garf/executors/fetchers.py +78 -0
garf/executors/query_processor.py +61 -0
garf/executors/sql_executor.py +142 -0
garf/executors/telemetry.py +20 -0
garf/executors/workflow.py +109 -0
garf_executors/__init__.py +9 -44
garf_executors/api_executor.py +9 -121
garf_executors/bq_executor.py +9 -161
garf_executors/config.py +9 -37
garf_executors/entrypoints/__init__.py +25 -0
garf_executors/entrypoints/cli.py +9 -148
garf_executors/entrypoints/grcp_server.py +25 -0
garf_executors/entrypoints/server.py +9 -102
garf_executors/entrypoints/tracer.py +8 -40
garf_executors/entrypoints/utils.py +9 -124
garf_executors/exceptions.py +11 -3
garf_executors/execution_context.py +9 -100
garf_executors/executor.py +9 -108
garf_executors/fetchers.py +9 -63
garf_executors/sql_executor.py +9 -125
garf_executors/telemetry.py +10 -5
garf_executors/workflow.py +8 -79
{garf_executors-0.2.3.dist-info → garf_executors-1.0.2.dist-info}/METADATA +11 -5
garf_executors-1.0.2.dist-info/RECORD +42 -0
garf_executors-1.0.2.dist-info/entry_points.txt +2 -0
{garf_executors-0.2.3.dist-info → garf_executors-1.0.2.dist-info}/top_level.txt +1 -0
garf_executors-0.2.3.dist-info/RECORD +0 -24
garf_executors-0.2.3.dist-info/entry_points.txt +0 -2
{garf_executors → garf/executors}/garf_pb2.py +0 -0
{garf_executors → garf/executors}/garf_pb2_grpc.py +0 -0
{garf_executors-0.2.3.dist-info → garf_executors-1.0.2.dist-info}/WHEEL +0 -0

garf/executors/entrypoints/server.py ADDED Viewed

@@ -0,0 +1,117 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""FastAPI endpoint for executing queries."""
+from typing import Optional, Union
+import fastapi
+import garf.executors
+import pydantic
+import typer
+import uvicorn
+from garf.executors import exceptions
+from garf.executors.entrypoints.tracer import initialize_tracer
+from garf.io import reader
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from typing_extensions import Annotated
+initialize_tracer()
+app = fastapi.FastAPI()
+FastAPIInstrumentor.instrument_app(app)
+typer_app = typer.Typer()
+class ApiExecutorRequest(pydantic.BaseModel):
+  """Request for executing a query.
+  Attributes:
+    source: Type of API to interact with.
+    title: Name of the query used as an output for writing.
+    query: Query to execute.
+    query_path: Local or remote path to query.
+    context: Execution context.
+  """
+  source: str
+  title: Optional[str] = None
+  query: Optional[str] = None
+  query_path: Optional[Union[str, list[str]]] = None
+  context: garf.executors.api_executor.ApiExecutionContext
+  @pydantic.model_validator(mode='after')
+  def check_query_specified(self):
+    if not self.query_path and not self.query:
+      raise exceptions.GarfExecutorError(
+        'Missing one of required parameters: query, query_path'
+      )
+    return self
+  def model_post_init(self, __context__) -> None:
+    if self.query_path and isinstance(self.query_path, str):
+      self.query = reader.FileReader().read(self.query_path)
+    if not self.title:
+      self.title = str(self.query_path)
+class ApiExecutorResponse(pydantic.BaseModel):
+  """Response after executing a query.
+  Attributes:
+    results: Results of query execution.
+  """
+  results: list[str]
+@app.get('/api/version')
+async def version() -> str:
+  return garf.executors.__version__
+@app.get('/api/fetchers')
+async def get_fetchers() -> list[str]:
+  """Shows all available API sources."""
+  return list(garf.executors.fetchers.find_fetchers())
+@app.post('/api/execute')
+async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
+  query_executor = garf.executors.setup_executor(
+    request.source, request.context.fetcher_parameters
+  )
+  result = query_executor.execute(request.query, request.title, request.context)
+  return ApiExecutorResponse(results=[result])
+@app.post('/api/execute:batch')
+def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
+  query_executor = garf.executors.setup_executor(
+    request.source, request.context.fetcher_parameters
+  )
+  reader_client = reader.FileReader()
+  batch = {query: reader_client.read(query) for query in request.query_path}
+  results = query_executor.execute_batch(batch, request.context)
+  return ApiExecutorResponse(results=results)
+@typer_app.command()
+def main(
+  port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,
+):
+  uvicorn.run(app, port=port)
+if __name__ == '__main__':
+  typer_app()

garf/executors/entrypoints/tracer.py ADDED Viewed

@@ -0,0 +1,57 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
+  OTLPSpanExporter,
+)
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import (
+  BatchSpanProcessor,
+)
+DEFAULT_SERVICE_NAME = 'garf'
+def initialize_tracer():
+  resource = Resource.create(
+    {'service.name': os.getenv('OTLP_SERVICE_NAME', DEFAULT_SERVICE_NAME)}
+  )
+  tracer_provider = TracerProvider(resource=resource)
+  if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
+    if gcp_project_id := os.getenv('OTEL_EXPORTER_GCP_PROJECT_ID'):
+      try:
+        from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
+      except ImportError as e:
+        raise ImportError(
+          'Please install garf-executors with GCP support '
+          '- `pip install garf-executors[gcp]`'
+        ) from e
+      cloud_span_processor = BatchSpanProcessor(
+        CloudTraceSpanExporter(project_id=gcp_project_id)
+      )
+      tracer_provider.add_span_processor(cloud_span_processor)
+    else:
+      otlp_processor = BatchSpanProcessor(
+        OTLPSpanExporter(endpoint=otel_endpoint, insecure=True)
+      )
+      tracer_provider.add_span_processor(otlp_processor)
+  trace.set_tracer_provider(tracer_provider)

garf/executors/entrypoints/utils.py ADDED Viewed

@@ -0,0 +1,140 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module for various helpers for executing Garf as CLI tool."""
+from __future__ import annotations
+import enum
+import logging
+import sys
+from collections.abc import Sequence
+from typing import Any
+from rich import logging as rich_logging
+class ParamsParser:
+  def __init__(self, identifiers: Sequence[str]) -> None:
+    self.identifiers = identifiers
+  def parse(self, params: Sequence) -> dict[str, dict | None]:
+    return {
+      identifier: self._parse_params(identifier, params)
+      for identifier in self.identifiers
+    }
+  def _parse_params(self, identifier: str, params: Sequence[Any]) -> dict:
+    parsed_params = {}
+    if params:
+      raw_params = [param.split('=', maxsplit=1) for param in params]
+      for param in raw_params:
+        param_pair = self._identify_param_pair(identifier, param)
+        if param_pair:
+          parsed_params.update(param_pair)
+    return parsed_params
+  def _identify_param_pair(
+    self, identifier: str, param: Sequence[str]
+  ) -> dict[str, Any] | None:
+    key = param[0]
+    if not identifier or identifier not in key:
+      return None
+    provided_identifier, *keys = key.split('.')
+    if not keys:
+      return None
+    if len(keys) > 1:
+      raise GarfParamsException(
+        f'{key} is invalid format,'
+        f'`--{identifier}.key=value` or `--{identifier}.key` '
+        'are the correct formats'
+      )
+    provided_identifier = provided_identifier.replace('--', '')
+    if provided_identifier not in self.identifiers:
+      supported_arguments = ', '.join(self.identifiers)
+      raise GarfParamsException(
+        f'CLI argument {provided_identifier} is not supported'
+        f', supported arguments {supported_arguments}'
+      )
+    if provided_identifier != identifier:
+      return None
+    key = keys[0].replace('-', '_')
+    if not key:
+      raise GarfParamsException(
+        f'{identifier} {key} is invalid,'
+        f'`--{identifier}.key=value` or `--{identifier}.key` '
+        'are the correct formats'
+      )
+    if len(param) == 2:
+      return {key: param[1]}
+    if len(param) == 1:
+      return {key: True}
+    raise GarfParamsException(
+      f'{identifier} {key} is invalid,'
+      f'`--{identifier}.key=value` or `--{identifier}.key` '
+      'are the correct formats'
+    )
+class GarfParamsException(Exception):
+  """Defines exception for incorrect parameters."""
+class LoggerEnum(str, enum.Enum):
+  local = 'local'
+  rich = 'rich'
+  gcloud = 'gcloud'
+def init_logging(
+  loglevel: str = 'INFO',
+  logger_type: str | LoggerEnum = 'local',
+  name: str = __name__,
+) -> logging.Logger:
+  loglevel = getattr(logging, loglevel)
+  if logger_type == 'rich':
+    logging.basicConfig(
+      format='%(message)s',
+      level=loglevel,
+      datefmt='%Y-%m-%d %H:%M:%S',
+      handlers=[
+        rich_logging.RichHandler(rich_tracebacks=True),
+      ],
+    )
+  elif logger_type == 'gcloud':
+    try:
+      import google.cloud.logging as glogging
+    except ImportError as e:
+      raise ImportError(
+        'Please install garf-executors with Cloud logging support - '
+        '`pip install garf-executors[bq]`'
+      ) from e
+    client = glogging.Client()
+    handler = glogging.handlers.CloudLoggingHandler(client, name=name)
+    handler.close()
+    glogging.handlers.setup_logging(handler, log_level=loglevel)
+    logging.basicConfig(
+      level=loglevel,
+      handlers=[handler],
+    )
+  else:
+    logging.basicConfig(
+      format='[%(asctime)s][%(name)s][%(levelname)s] %(message)s',
+      stream=sys.stdout,
+      level=loglevel,
+      datefmt='%Y-%m-%d %H:%M:%S',
+    )
+  logging.getLogger('smart_open.smart_open_lib').setLevel(logging.WARNING)
+  logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING)
+  return logging.getLogger(name)

garf/executors/exceptions.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+class GarfExecutorError(Exception):
+  """Base class for garf executor exceptions."""

garf/executors/execution_context.py ADDED Viewed

@@ -0,0 +1,117 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=C0330, g-bad-import-order, g-multiple-import
+"""Captures parameters for fetching data from APIs."""
+from __future__ import annotations
+import os
+import pathlib
+from typing import Any
+import pydantic
+import smart_open
+import yaml
+from garf.core import query_editor
+from garf.io import writer
+from garf.io.writers import abs_writer
+class ExecutionContext(pydantic.BaseModel):
+  """Common context for executing one or more queries.
+  Attributes:
+    query_parameters: Parameters to dynamically change query text.
+    fetcher_parameters: Parameters to specify fetching setup.
+    writer: Type of writer to use. Can be a single writer string or list of writers.
+    writer_parameters: Optional parameters to setup writer.
+  """
+  query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
+    default_factory=dict
+  )
+  fetcher_parameters: dict[str, Any] | None = pydantic.Field(
+    default_factory=dict
+  )
+  writer: str | list[str] | None = None
+  writer_parameters: dict[str, str] | None = pydantic.Field(
+    default_factory=dict
+  )
+  def model_post_init(self, __context__) -> None:
+    if self.fetcher_parameters is None:
+      self.fetcher_parameters = {}
+    if self.writer_parameters is None:
+      self.writer_parameters = {}
+    if not self.query_parameters:
+      self.query_parameters = query_editor.GarfQueryParameters()
+  @classmethod
+  def from_file(
+    cls, path: str | pathlib.Path | os.PathLike[str]
+  ) -> ExecutionContext:
+    """Builds context from local or remote yaml file."""
+    with smart_open.open(path, 'r', encoding='utf-8') as f:
+      data = yaml.safe_load(f)
+    return ExecutionContext(**data)
+  def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
+    """Saves context to local or remote yaml file."""
+    with smart_open.open(path, 'w', encoding='utf-8') as f:
+      yaml.dump(self.model_dump(), f, encoding='utf-8')
+    return f'ExecutionContext is saved to {str(path)}'
+  @property
+  def writer_client(self) -> abs_writer.AbsWriter:
+    """Returns single writer client."""
+    if isinstance(self.writer, list) and len(self.writer) > 0:
+      writer_type = self.writer[0]
+    else:
+      writer_type = self.writer
+    writer_params = self.writer_parameters or {}
+    if not writer_type:
+      raise ValueError('No writer specified')
+    writer_client = writer.create_writer(writer_type, **writer_params)
+    if writer_type == 'bq':
+      _ = writer_client.create_or_get_dataset()
+    if writer_type == 'sheet':
+      writer_client.init_client()
+    return writer_client
+  @property
+  def writer_clients(self) -> list[abs_writer.AbsWriter]:
+    """Returns list of writer clients."""
+    if not self.writer:
+      return []
+    # Convert single writer to list for uniform processing
+    writers_to_use = (
+      self.writer if isinstance(self.writer, list) else [self.writer]
+    )
+    writer_params = self.writer_parameters or {}
+    clients = []
+    for writer_type in writers_to_use:
+      writer_client = writer.create_writer(writer_type, **writer_params)
+      if writer_type == 'bq':
+        _ = writer_client.create_or_get_dataset()
+      if writer_type == 'sheet':
+        writer_client.init_client()
+      clients.append(writer_client)
+    return clients

garf/executors/executor.py ADDED Viewed

@@ -0,0 +1,124 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Defines common functionality between executors."""
+import asyncio
+import inspect
+from typing import Optional
+from garf.core import report_fetcher
+from garf.executors import execution_context, query_processor
+from garf.executors.telemetry import tracer
+from opentelemetry import trace
+class Executor:
+  """Defines common functionality between executors."""
+  def __init__(
+    self,
+    preprocessors: Optional[dict[str, report_fetcher.Processor]] = None,
+    postprocessors: Optional[dict[str, report_fetcher.Processor]] = None,
+  ) -> None:
+    self.preprocessors = preprocessors or {}
+    self.postprocessors = postprocessors or {}
+  @tracer.start_as_current_span('api.execute_batch')
+  def execute_batch(
+    self,
+    batch: dict[str, str],
+    context: execution_context.ExecutionContext,
+    parallel_threshold: int = 10,
+  ) -> list[str]:
+    """Executes batch of queries for a common context.
+    If an executor has any pre/post processors, executes them first while
+    modifying the context.
+    Args:
+      batch: Mapping between query_title and its text.
+      context: Execution context.
+      parallel_threshold: Number of queries to execute in parallel.
+    Returns:
+      Results of execution.
+    """
+    span = trace.get_current_span()
+    span.set_attribute('api.parallel_threshold', parallel_threshold)
+    _handle_processors(processors=self.preprocessors, context=context)
+    results = asyncio.run(
+      self._run(
+        batch=batch, context=context, parallel_threshold=parallel_threshold
+      )
+    )
+    _handle_processors(processors=self.postprocessors, context=context)
+    return results
+  def add_preprocessor(
+    self, preprocessors: dict[str, report_fetcher.Processor]
+  ) -> None:
+    self.preprocessors.update(preprocessors)
+  async def aexecute(
+    self,
+    query: str,
+    title: str,
+    context: execution_context.ExecutionContext,
+  ) -> str:
+    """Performs query execution asynchronously.
+    Args:
+      query: Location of the query.
+      title: Name of the query.
+      context: Query execution context.
+    Returns:
+      Result of writing the report.
+    """
+    return await asyncio.to_thread(self.execute, query, title, context)
+  async def _run(
+    self,
+    batch: dict[str, str],
+    context: execution_context.ExecutionContext,
+    parallel_threshold: int,
+  ):
+    semaphore = asyncio.Semaphore(value=parallel_threshold)
+    async def run_with_semaphore(fn):
+      async with semaphore:
+        return await fn
+    tasks = [
+      self.aexecute(query=query, title=title, context=context)
+      for title, query in batch.items()
+    ]
+    return await asyncio.gather(*(run_with_semaphore(task) for task in tasks))
+def _handle_processors(
+  processors: dict[str, report_fetcher.Processor],
+  context: execution_context.ExecutionContext,
+) -> None:
+  context = query_processor.process_gquery(context)
+  for k, processor in processors.items():
+    processor_signature = list(inspect.signature(processor).parameters.keys())
+    if k in context.fetcher_parameters:
+      processor_parameters = {
+        k: v
+        for k, v in context.fetcher_parameters.items()
+        if k in processor_signature
+      }
+      context.fetcher_parameters[k] = processor(**processor_parameters)

garf/executors/fetchers.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import logging
+import sys
+from importlib.metadata import entry_points
+from garf.core import report_fetcher
+from garf.executors.telemetry import tracer
+logger = logging.getLogger(name='garf.executors.fetchers')
+@tracer.start_as_current_span('find_fetchers')
+def find_fetchers() -> set[str]:
+  """Identifiers all available report fetchers."""
+  if entrypoints := _get_entrypoints('garf'):
+    return {fetcher.name for fetcher in entrypoints}
+  return set()
+@tracer.start_as_current_span('get_report_fetcher')
+def get_report_fetcher(source: str) -> type[report_fetcher.ApiReportFetcher]:
+  """Loads report fetcher for a given source.
+  Args:
+    source: Alias for a source associated with a fetcher.
+  Returns:
+    Class for a found report fetcher.
+  Raises:
+    ApiReportFetcherError: When fetcher cannot be loaded.
+    MissingApiReportFetcherError: When fetcher not found.
+  """
+  if source not in find_fetchers():
+    raise report_fetcher.MissingApiReportFetcherError(source)
+  for fetcher in _get_entrypoints('garf'):
+    if fetcher.name == source:
+      try:
+        with tracer.start_as_current_span('load_fetcher_module') as span:
+          fetcher_module = fetcher.load()
+          span.set_attribute('loaded_module', fetcher_module.__name__)
+        for name, obj in inspect.getmembers(fetcher_module):
+          if inspect.isclass(obj) and issubclass(
+            obj, report_fetcher.ApiReportFetcher
+          ):
+            return getattr(fetcher_module, name)
+      except ModuleNotFoundError as e:
+        raise report_fetcher.ApiReportFetcherError(
+          f'Failed to load fetcher for source {source}, reason: {e}'
+        )
+  raise report_fetcher.ApiReportFetcherError(
+    f'No fetcher available for the source "{source}"'
+  )
+def _get_entrypoints(group='garf'):
+  if sys.version_info.major == 3 and sys.version_info.minor == 9:
+    try:
+      fetchers = entry_points()[group]
+    except KeyError:
+      fetchers = []
+  else:
+    fetchers = entry_points(group=group)
+  return fetchers

garf-executors 0.2.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

garf-executors 0.2.3py3-none-any.whl → 1.0.2py3-none-any.whl