garf-executors 0.0.6__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+
17
+ from opentelemetry import trace
18
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
19
+ OTLPSpanExporter,
20
+ )
21
+ from opentelemetry.sdk.resources import Resource
22
+ from opentelemetry.sdk.trace import TracerProvider
23
+ from opentelemetry.sdk.trace.export import (
24
+ BatchSpanProcessor,
25
+ )
26
+
27
+ DEFAULT_SERVICE_NAME = 'garf'
28
+
29
+
30
+ def initialize_tracer():
31
+ resource = Resource.create(
32
+ {'service.name': os.getenv('OTLP_SERVICE_NAME', DEFAULT_SERVICE_NAME)}
33
+ )
34
+
35
+ tracer_provider = TracerProvider(resource=resource)
36
+
37
+ if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
38
+ otlp_processor = BatchSpanProcessor(
39
+ OTLPSpanExporter(endpoint=otel_endpoint, insecure=True)
40
+ )
41
+ tracer_provider.add_span_processor(otlp_processor)
42
+ trace.set_tracer_provider(tracer_provider)
@@ -15,261 +15,14 @@
15
15
 
16
16
  from __future__ import annotations
17
17
 
18
- import dataclasses
19
- import datetime
18
+ import enum
20
19
  import logging
21
- import os
22
20
  import sys
23
- from collections.abc import MutableSequence, Sequence
24
- from typing import Any, TypedDict
21
+ from collections.abc import Sequence
22
+ from typing import Any
25
23
 
26
- import smart_open
27
- import yaml
28
- from dateutil import relativedelta
29
24
  from rich import logging as rich_logging
30
25
 
31
- from garf_core import query_editor
32
-
33
-
34
- class GarfQueryParameters(TypedDict):
35
- """Annotation for dictionary of query specific parameters passed via CLI.
36
-
37
- Attributes:
38
- macros: Mapping for elements that will be replaced in the queries.
39
- template: Mapping for elements that will rendered via Jinja templates.
40
- """
41
-
42
- macros: dict[str, str]
43
- template: dict[str, str]
44
-
45
-
46
- @dataclasses.dataclass
47
- class BaseConfig:
48
- """Base config to inherit other configs from."""
49
-
50
- def __add__(self, other: BaseConfig) -> BaseConfig:
51
- """Creates new config of the same type from two configs.
52
-
53
- Parameters from added config overwrite already present parameters.
54
-
55
- Args:
56
- other: Config that could be merged with the original one.
57
-
58
- Returns:
59
- New config with values from both configs.
60
- """
61
- right_dict = _remove_empty_values(self.__dict__)
62
- left_dict = _remove_empty_values(other.__dict__)
63
- new_dict = {**right_dict, **left_dict}
64
- return self.__class__(**new_dict)
65
-
66
- @classmethod
67
- def from_dict(
68
- cls, config_parameters: dict[str, str | GarfQueryParameters]
69
- ) -> BaseConfig:
70
- """Builds config from provided parameters ignoring empty ones."""
71
- return cls(**_remove_empty_values(config_parameters))
72
-
73
-
74
- @dataclasses.dataclass
75
- class GarfConfig(BaseConfig):
76
- """Stores values to run garf from command line.
77
-
78
- Attributes:
79
- account:
80
- Account(s) to get data from.
81
- output:
82
- Specifies where to store fetched data (console, csv, BQ.)
83
- api_version:
84
- Google Ads API version.
85
- params:
86
- Any parameters passed to Garf query for substitution.
87
- writer_params:
88
- Any parameters that can be passed to writer for data saving.
89
- customer_ids_query:
90
- Query text to limit accounts fetched from Ads API.
91
- customer_ids_query_file:
92
- Path to query to limit accounts fetched from Ads API.
93
- """
94
-
95
- account: str | list[str] | None = None
96
- output: str = 'console'
97
- params: GarfQueryParameters = dataclasses.field(default_factory=dict)
98
- writer_params: dict[str, str | int] = dataclasses.field(default_factory=dict)
99
- customer_ids_query: str | None = None
100
- customer_ids_query_file: str | None = None
101
-
102
- def __post_init__(self) -> None:
103
- """Ensures that values passed during __init__ correctly formatted."""
104
- if isinstance(self.account, MutableSequence):
105
- self.account = [
106
- str(account).replace('-', '').strip() for account in self.account
107
- ]
108
- else:
109
- self.account = (
110
- str(self.account).replace('-', '').strip() if self.account else None
111
- )
112
- self.writer_params = {
113
- key.replace('-', '_'): value for key, value in self.writer_params.items()
114
- }
115
-
116
-
117
- class GarfConfigException(Exception):
118
- """Exception for invalid GarfConfig."""
119
-
120
-
121
- @dataclasses.dataclass
122
- class GarfBqConfig(BaseConfig):
123
- """Stores values to run garf-bq from command line.
124
-
125
- Attributes:
126
- project:
127
- Google Cloud project name.
128
- dataset_location:
129
- Location of BigQuery dataset.
130
- params:
131
- Any parameters passed to BigQuery query for substitution.
132
- """
133
-
134
- project: str | None = None
135
- dataset_location: str | None = None
136
- params: GarfQueryParameters = dataclasses.field(default_factory=dict)
137
-
138
-
139
- @dataclasses.dataclass
140
- class GarfSqlConfig(BaseConfig):
141
- """Stores values to run garf-sql from command line.
142
-
143
- Attributes:
144
- connection_string:
145
- Connection string to SqlAlchemy database engine.
146
- params:
147
- Any parameters passed to SQL query for substitution.
148
- """
149
-
150
- connection_string: str | None = None
151
- params: GarfQueryParameters = dataclasses.field(default_factory=dict)
152
-
153
-
154
- class ConfigBuilder:
155
- """Builds config of provided type.
156
-
157
- Config can be created from file, build from arguments or both.
158
-
159
- Attributes:
160
- config: Concrete config class that needs to be built.
161
- """
162
-
163
- _config_mapping: dict[str, BaseConfig] = {
164
- 'garf': GarfConfig,
165
- 'garf-bq': GarfBqConfig,
166
- 'garf-sql': GarfSqlConfig,
167
- }
168
-
169
- def __init__(self, config_type: str) -> None:
170
- """Sets concrete config type.
171
-
172
- Args:
173
- config_type: Type of config that should be built.
174
-
175
- Raises:
176
- GarfConfigException: When incorrect config_type is supplied.
177
- """
178
- if config_type not in self._config_mapping:
179
- raise GarfConfigException(f'Invalid config_type: {config_type}')
180
- self._config_type = config_type
181
- self.config = self._config_mapping.get(config_type)
182
-
183
- def build(
184
- self, parameters: dict[str, str], cli_named_args: Sequence[str]
185
- ) -> BaseConfig | None:
186
- """Builds config from file, build from arguments or both ways.
187
-
188
- When there are both config_file and CLI arguments the latter have more
189
- priority.
190
-
191
- Args:
192
- parameters: Parsed CLI arguments.
193
- cli_named_args: Unparsed CLI args in a form `--key.subkey=value`.
194
-
195
- Returns:
196
- Concrete config with injected values.
197
- """
198
- if not (garf_config_path := parameters.get('garf_config')):
199
- return self._build_config(parameters, cli_named_args)
200
- config_file = self._load_config(garf_config_path)
201
- config_cli = self._build_config(
202
- parameters, cli_named_args, init_defaults=False
203
- )
204
- if config_file and config_cli:
205
- config_file = config_file + config_cli
206
- return config_file
207
-
208
- def _build_config(
209
- self,
210
- parameters: dict[str, str],
211
- cli_named_args: Sequence[str],
212
- init_defaults: bool = True,
213
- ) -> BaseConfig | None:
214
- """Builds config from named and unnamed CLI parameters.
215
-
216
- Args:
217
- parameters: Parsed CLI arguments.
218
- cli_named_args: Unparsed CLI args in a form `--key.subkey=value`.
219
- init_defaults: Whether to provided default config values if
220
- expected parameter is missing
221
-
222
- Returns:
223
- Concrete config with injected values.
224
- """
225
- output = parameters.get('output')
226
- config_parameters = {
227
- k: v for k, v in parameters.items() if k in self.config.__annotations__
228
- }
229
- cli_params = ParamsParser(['macro', 'template', output]).parse(
230
- cli_named_args
231
- )
232
- cli_params = _remove_empty_values(cli_params)
233
- if output and (writer_params := cli_params.get(output)):
234
- _ = cli_params.pop(output)
235
- config_parameters.update({'writer_params': writer_params})
236
- if cli_params:
237
- config_parameters.update({'params': cli_params})
238
- if not config_parameters:
239
- return None
240
- if init_defaults:
241
- return self.config.from_dict(config_parameters)
242
- return self.config(**config_parameters)
243
-
244
- def _load_config(self, garf_config_path: str) -> BaseConfig:
245
- """Loads config from provided path.
246
-
247
- Args:
248
- garf_config_path: Path to local or remote storage.
249
-
250
- Returns:
251
- Concreate config with values taken from config file.
252
-
253
- Raises:
254
- GarfConfigException:
255
- If config file missing `garf` section.
256
- """
257
- with smart_open.open(garf_config_path, encoding='utf-8') as f:
258
- config = yaml.safe_load(f)
259
- garf_section = config.get(self._config_type)
260
- if not garf_section:
261
- raise GarfConfigException(
262
- f'Invalid config, must have `{self._config_type}` section!'
263
- )
264
- config_parameters = {
265
- k: v for k, v in garf_section.items() if k in self.config.__annotations__
266
- }
267
- if params := garf_section.get('params', {}):
268
- config_parameters.update({'params': params})
269
- if writer_params := garf_section.get(garf_section.get('output', '')):
270
- config_parameters.update({'writer_params': writer_params})
271
- return self.config(**config_parameters)
272
-
273
26
 
274
27
  class ParamsParser:
275
28
  def __init__(self, identifiers: Sequence[str]) -> None:
@@ -298,6 +51,8 @@ class ParamsParser:
298
51
  if not identifier or identifier not in key:
299
52
  return None
300
53
  provided_identifier, *keys = key.split('.')
54
+ if not keys:
55
+ return None
301
56
  if len(keys) > 1:
302
57
  raise GarfParamsException(
303
58
  f'{key} is invalid format,'
@@ -306,9 +61,10 @@ class ParamsParser:
306
61
  )
307
62
  provided_identifier = provided_identifier.replace('--', '')
308
63
  if provided_identifier not in self.identifiers:
64
+ supported_arguments = ', '.join(self.identifiers)
309
65
  raise GarfParamsException(
310
66
  f'CLI argument {provided_identifier} is not supported'
311
- f', supported arguments {", ".join(self.identifiers)}'
67
+ f', supported arguments {supported_arguments}'
312
68
  )
313
69
  if provided_identifier != identifier:
314
70
  return None
@@ -334,118 +90,18 @@ class GarfParamsException(Exception):
334
90
  """Defines exception for incorrect parameters."""
335
91
 
336
92
 
337
- def convert_date(date_string: str) -> str:
338
- """Converts specific dates parameters to actual dates.
339
-
340
- Returns:
341
- Date string in YYYY-MM-DD format.
342
-
343
- Raises:
344
- ValueError:
345
- If dynamic lookback value (:YYYYMMDD-N) is incorrect.
346
- """
347
- if isinstance(date_string, list) or date_string.find(':YYYY') == -1:
348
- return date_string
349
- current_date = datetime.date.today()
350
- date_object = date_string.split('-')
351
- base_date = date_object[0]
352
- if len(date_object) == 2:
353
- try:
354
- days_ago = int(date_object[1])
355
- except ValueError as e:
356
- raise ValueError(
357
- 'Must provide numeric value for a number lookback period, '
358
- 'i.e. :YYYYMMDD-1'
359
- ) from e
360
- else:
361
- days_ago = 0
362
- if base_date == ':YYYY':
363
- new_date = datetime.datetime(current_date.year, 1, 1)
364
- delta = relativedelta.relativedelta(years=days_ago)
365
- elif base_date == ':YYYYMM':
366
- new_date = datetime.datetime(current_date.year, current_date.month, 1)
367
- delta = relativedelta.relativedelta(months=days_ago)
368
- elif base_date == ':YYYYMMDD':
369
- new_date = current_date
370
- delta = relativedelta.relativedelta(days=days_ago)
371
- return (new_date - delta).strftime('%Y-%m-%d')
372
-
373
-
374
- class ConfigSaver:
375
- def __init__(self, path: str) -> None:
376
- self.path = path
377
-
378
- def save(self, garf_config: BaseConfig):
379
- if os.path.exists(self.path):
380
- with smart_open.open(self.path, 'r', encoding='utf-8') as f:
381
- config = yaml.safe_load(f)
382
- else:
383
- config = {}
384
- config = self.prepare_config(config, garf_config)
385
- with smart_open.open(self.path, 'w', encoding='utf-8') as f:
386
- yaml.dump(
387
- config, f, default_flow_style=False, sort_keys=False, encoding='utf-8'
388
- )
389
-
390
- def prepare_config(self, config: dict, garf_config: BaseConfig) -> dict:
391
- garf = dataclasses.asdict(garf_config)
392
- if isinstance(garf_config, GarfConfig):
393
- garf[garf_config.output] = garf_config.writer_params
394
- if not isinstance(garf_config.account, MutableSequence):
395
- garf['account'] = garf_config.account.split(',')
396
- del garf['writer_params']
397
- garf = _remove_empty_values(garf)
398
- config.update({'garf': garf})
399
- if isinstance(garf_config, GarfBqConfig):
400
- garf = _remove_empty_values(garf)
401
- config.update({'garf-bq': garf})
402
- if isinstance(garf_config, GarfSqlConfig):
403
- garf = _remove_empty_values(garf)
404
- config.update({'garf-sql': garf})
405
- return config
406
-
407
-
408
- def initialize_runtime_parameters(config: BaseConfig) -> BaseConfig:
409
- """Formats parameters and add common parameter in config.
410
-
411
- Initialization identifies whether there are `date` parameters and performs
412
- necessary date conversions.
413
- Set of parameters that need to be generally available are injected into
414
- every parameter section of the config.
415
-
416
- Args:
417
- config: Instantiated config.
418
-
419
- Returns:
420
- Config with formatted parameters.
421
- """
422
- common_params = query_editor.CommonParametersMixin().common_params
423
- for key, param in config.params.items():
424
- for key_param, value_param in param.items():
425
- config.params[key][key_param] = convert_date(value_param)
426
- for common_param_key, common_param_value in common_params.items():
427
- if common_param_key not in config.params[key]:
428
- config.params[key][common_param_key] = common_param_value
429
- return config
430
-
431
-
432
- def _remove_empty_values(dict_object: dict[str, Any]) -> dict[str, Any]:
433
- """Remove all empty elements: strings, dictionaries from a dictionary."""
434
- if isinstance(dict_object, dict):
435
- return {
436
- key: value
437
- for key, value in (
438
- (key, _remove_empty_values(value)) for key, value in dict_object.items()
439
- )
440
- if value
441
- }
442
- if isinstance(dict_object, (int, str, MutableSequence)):
443
- return dict_object
93
+ class LoggerEnum(str, enum.Enum):
94
+ local = 'local'
95
+ rich = 'rich'
96
+ gcloud = 'gcloud'
444
97
 
445
98
 
446
99
  def init_logging(
447
- loglevel: str = 'INFO', logger_type: str = 'local', name: str = __name__
100
+ loglevel: str = 'INFO',
101
+ logger_type: str | LoggerEnum = 'local',
102
+ name: str = __name__,
448
103
  ) -> logging.Logger:
104
+ loglevel = getattr(logging, loglevel)
449
105
  if logger_type == 'rich':
450
106
  logging.basicConfig(
451
107
  format='%(message)s',
@@ -455,6 +111,23 @@ def init_logging(
455
111
  rich_logging.RichHandler(rich_tracebacks=True),
456
112
  ],
457
113
  )
114
+ elif logger_type == 'gcloud':
115
+ try:
116
+ import google.cloud.logging as glogging
117
+ except ImportError as e:
118
+ raise ImportError(
119
+ 'Please install garf-executors with Cloud logging support - '
120
+ '`pip install garf-executors[bq]`'
121
+ ) from e
122
+
123
+ client = glogging.Client()
124
+ handler = glogging.handlers.CloudLoggingHandler(client, name=name)
125
+ handler.close()
126
+ glogging.handlers.setup_logging(handler, log_level=loglevel)
127
+ logging.basicConfig(
128
+ level=loglevel,
129
+ handlers=[handler],
130
+ )
458
131
  else:
459
132
  logging.basicConfig(
460
133
  format='[%(asctime)s][%(name)s][%(levelname)s] %(message)s',
@@ -0,0 +1,83 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # pylint: disable=C0330, g-bad-import-order, g-multiple-import
16
+
17
+ """Captures parameters for fetching data from APIs."""
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ import pathlib
23
+
24
+ import pydantic
25
+ import smart_open
26
+ import yaml
27
+ from garf_core import query_editor
28
+ from garf_io import writer
29
+ from garf_io.writers import abs_writer
30
+
31
+
32
+ class ExecutionContext(pydantic.BaseModel):
33
+ """Common context for executing one or more queries.
34
+
35
+ Attributes:
36
+ query_parameters: Parameters to dynamically change query text.
37
+ fetcher_parameters: Parameters to specify fetching setup.
38
+ writer: Type of writer to use.
39
+ writer_parameters: Optional parameters to setup writer.
40
+ """
41
+
42
+ query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
43
+ default_factory=dict
44
+ )
45
+ fetcher_parameters: dict[str, str | bool | int | list[str | int]] | None = (
46
+ pydantic.Field(default_factory=dict)
47
+ )
48
+ writer: str | None = None
49
+ writer_parameters: dict[str, str] | None = pydantic.Field(
50
+ default_factory=dict
51
+ )
52
+
53
+ def model_post_init(self, __context__) -> None:
54
+ if self.fetcher_parameters is None:
55
+ self.fetcher_parameters = {}
56
+ if self.writer_parameters is None:
57
+ self.writer_parameters = {}
58
+ if not self.query_parameters:
59
+ self.query_parameters = query_editor.GarfQueryParameters()
60
+
61
+ @classmethod
62
+ def from_file(
63
+ cls, path: str | pathlib.Path | os.PathLike[str]
64
+ ) -> ExecutionContext:
65
+ """Builds context from local or remote yaml file."""
66
+ with smart_open.open(path, 'r', encoding='utf-8') as f:
67
+ data = yaml.safe_load(f)
68
+ return ExecutionContext(**data)
69
+
70
+ def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
71
+ """Saves context to local or remote yaml file."""
72
+ with smart_open.open(path, 'w', encoding='utf-8') as f:
73
+ yaml.dump(self.model_dump(), f, encoding='utf-8')
74
+ return f'ExecutionContext is saved to {str(path)}'
75
+
76
+ @property
77
+ def writer_client(self) -> abs_writer.AbsWriter:
78
+ writer_client = writer.create_writer(self.writer, **self.writer_parameters)
79
+ if self.writer == 'bq':
80
+ _ = writer_client.create_or_get_dataset()
81
+ if self.writer == 'sheet':
82
+ writer_client.init_client()
83
+ return writer_client
@@ -0,0 +1,87 @@
1
+ # Copyright 2025 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Defines common functionality between executors."""
16
+
17
+ import asyncio
18
+
19
+ from opentelemetry import trace
20
+
21
+ from garf_executors import execution_context
22
+ from garf_executors.telemetry import tracer
23
+
24
+
25
+ class Executor:
26
+ """Defines common functionality between executors."""
27
+
28
+ @tracer.start_as_current_span('api.execute_batch')
29
+ def execute_batch(
30
+ self,
31
+ batch: dict[str, str],
32
+ context: execution_context.ExecutionContext,
33
+ parallel_threshold: int = 10,
34
+ ) -> list[str]:
35
+ """Executes batch of queries for a common context.
36
+
37
+ Args:
38
+ batch: Mapping between query_title and its text.
39
+ context: Execution context.
40
+ parallel_threshold: Number of queries to execute in parallel.
41
+
42
+ Returns:
43
+ Results of execution.
44
+ """
45
+ span = trace.get_current_span()
46
+ span.set_attribute('api.parallel_threshold', parallel_threshold)
47
+ return asyncio.run(
48
+ self._run(
49
+ batch=batch, context=context, parallel_threshold=parallel_threshold
50
+ )
51
+ )
52
+
53
+ async def aexecute(
54
+ self,
55
+ query: str,
56
+ title: str,
57
+ context: execution_context.ExecutionContext,
58
+ ) -> str:
59
+ """Performs query execution asynchronously.
60
+
61
+ Args:
62
+ query: Location of the query.
63
+ title: Name of the query.
64
+ context: Query execution context.
65
+
66
+ Returns:
67
+ Result of writing the report.
68
+ """
69
+ return await asyncio.to_thread(self.execute, query, title, context)
70
+
71
+ async def _run(
72
+ self,
73
+ batch: dict[str, str],
74
+ context: execution_context.ExecutionContext,
75
+ parallel_threshold: int,
76
+ ):
77
+ semaphore = asyncio.Semaphore(value=parallel_threshold)
78
+
79
+ async def run_with_semaphore(fn):
80
+ async with semaphore:
81
+ return await fn
82
+
83
+ tasks = [
84
+ self.aexecute(query=query, title=title, context=context)
85
+ for title, query in batch.items()
86
+ ]
87
+ return await asyncio.gather(*(run_with_semaphore(task) for task in tasks))