garf-executors 0.0.6__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- garf_executors/__init__.py +40 -10
- garf_executors/api_executor.py +28 -45
- garf_executors/bq_executor.py +56 -19
- garf_executors/config.py +51 -0
- garf_executors/entrypoints/cli.py +66 -49
- garf_executors/entrypoints/server.py +60 -17
- garf_executors/entrypoints/tracer.py +42 -0
- garf_executors/entrypoints/utils.py +32 -359
- garf_executors/execution_context.py +83 -0
- garf_executors/executor.py +87 -0
- garf_executors/fetchers.py +54 -16
- garf_executors/sql_executor.py +59 -22
- garf_executors/telemetry.py +20 -0
- {garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/METADATA +8 -3
- garf_executors-0.1.4.dist-info/RECORD +20 -0
- garf_executors-0.0.6.dist-info/RECORD +0 -15
- {garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/WHEEL +0 -0
- {garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/entry_points.txt +0 -0
- {garf_executors-0.0.6.dist-info → garf_executors-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
from opentelemetry import trace
|
|
18
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
|
|
19
|
+
OTLPSpanExporter,
|
|
20
|
+
)
|
|
21
|
+
from opentelemetry.sdk.resources import Resource
|
|
22
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
23
|
+
from opentelemetry.sdk.trace.export import (
|
|
24
|
+
BatchSpanProcessor,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
DEFAULT_SERVICE_NAME = 'garf'
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def initialize_tracer():
|
|
31
|
+
resource = Resource.create(
|
|
32
|
+
{'service.name': os.getenv('OTLP_SERVICE_NAME', DEFAULT_SERVICE_NAME)}
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
36
|
+
|
|
37
|
+
if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
|
|
38
|
+
otlp_processor = BatchSpanProcessor(
|
|
39
|
+
OTLPSpanExporter(endpoint=otel_endpoint, insecure=True)
|
|
40
|
+
)
|
|
41
|
+
tracer_provider.add_span_processor(otlp_processor)
|
|
42
|
+
trace.set_tracer_provider(tracer_provider)
|
|
@@ -15,261 +15,14 @@
|
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
-
import
|
|
19
|
-
import datetime
|
|
18
|
+
import enum
|
|
20
19
|
import logging
|
|
21
|
-
import os
|
|
22
20
|
import sys
|
|
23
|
-
from collections.abc import
|
|
24
|
-
from typing import Any
|
|
21
|
+
from collections.abc import Sequence
|
|
22
|
+
from typing import Any
|
|
25
23
|
|
|
26
|
-
import smart_open
|
|
27
|
-
import yaml
|
|
28
|
-
from dateutil import relativedelta
|
|
29
24
|
from rich import logging as rich_logging
|
|
30
25
|
|
|
31
|
-
from garf_core import query_editor
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class GarfQueryParameters(TypedDict):
|
|
35
|
-
"""Annotation for dictionary of query specific parameters passed via CLI.
|
|
36
|
-
|
|
37
|
-
Attributes:
|
|
38
|
-
macros: Mapping for elements that will be replaced in the queries.
|
|
39
|
-
template: Mapping for elements that will rendered via Jinja templates.
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
macros: dict[str, str]
|
|
43
|
-
template: dict[str, str]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@dataclasses.dataclass
|
|
47
|
-
class BaseConfig:
|
|
48
|
-
"""Base config to inherit other configs from."""
|
|
49
|
-
|
|
50
|
-
def __add__(self, other: BaseConfig) -> BaseConfig:
|
|
51
|
-
"""Creates new config of the same type from two configs.
|
|
52
|
-
|
|
53
|
-
Parameters from added config overwrite already present parameters.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
other: Config that could be merged with the original one.
|
|
57
|
-
|
|
58
|
-
Returns:
|
|
59
|
-
New config with values from both configs.
|
|
60
|
-
"""
|
|
61
|
-
right_dict = _remove_empty_values(self.__dict__)
|
|
62
|
-
left_dict = _remove_empty_values(other.__dict__)
|
|
63
|
-
new_dict = {**right_dict, **left_dict}
|
|
64
|
-
return self.__class__(**new_dict)
|
|
65
|
-
|
|
66
|
-
@classmethod
|
|
67
|
-
def from_dict(
|
|
68
|
-
cls, config_parameters: dict[str, str | GarfQueryParameters]
|
|
69
|
-
) -> BaseConfig:
|
|
70
|
-
"""Builds config from provided parameters ignoring empty ones."""
|
|
71
|
-
return cls(**_remove_empty_values(config_parameters))
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@dataclasses.dataclass
|
|
75
|
-
class GarfConfig(BaseConfig):
|
|
76
|
-
"""Stores values to run garf from command line.
|
|
77
|
-
|
|
78
|
-
Attributes:
|
|
79
|
-
account:
|
|
80
|
-
Account(s) to get data from.
|
|
81
|
-
output:
|
|
82
|
-
Specifies where to store fetched data (console, csv, BQ.)
|
|
83
|
-
api_version:
|
|
84
|
-
Google Ads API version.
|
|
85
|
-
params:
|
|
86
|
-
Any parameters passed to Garf query for substitution.
|
|
87
|
-
writer_params:
|
|
88
|
-
Any parameters that can be passed to writer for data saving.
|
|
89
|
-
customer_ids_query:
|
|
90
|
-
Query text to limit accounts fetched from Ads API.
|
|
91
|
-
customer_ids_query_file:
|
|
92
|
-
Path to query to limit accounts fetched from Ads API.
|
|
93
|
-
"""
|
|
94
|
-
|
|
95
|
-
account: str | list[str] | None = None
|
|
96
|
-
output: str = 'console'
|
|
97
|
-
params: GarfQueryParameters = dataclasses.field(default_factory=dict)
|
|
98
|
-
writer_params: dict[str, str | int] = dataclasses.field(default_factory=dict)
|
|
99
|
-
customer_ids_query: str | None = None
|
|
100
|
-
customer_ids_query_file: str | None = None
|
|
101
|
-
|
|
102
|
-
def __post_init__(self) -> None:
|
|
103
|
-
"""Ensures that values passed during __init__ correctly formatted."""
|
|
104
|
-
if isinstance(self.account, MutableSequence):
|
|
105
|
-
self.account = [
|
|
106
|
-
str(account).replace('-', '').strip() for account in self.account
|
|
107
|
-
]
|
|
108
|
-
else:
|
|
109
|
-
self.account = (
|
|
110
|
-
str(self.account).replace('-', '').strip() if self.account else None
|
|
111
|
-
)
|
|
112
|
-
self.writer_params = {
|
|
113
|
-
key.replace('-', '_'): value for key, value in self.writer_params.items()
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
class GarfConfigException(Exception):
|
|
118
|
-
"""Exception for invalid GarfConfig."""
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
@dataclasses.dataclass
|
|
122
|
-
class GarfBqConfig(BaseConfig):
|
|
123
|
-
"""Stores values to run garf-bq from command line.
|
|
124
|
-
|
|
125
|
-
Attributes:
|
|
126
|
-
project:
|
|
127
|
-
Google Cloud project name.
|
|
128
|
-
dataset_location:
|
|
129
|
-
Location of BigQuery dataset.
|
|
130
|
-
params:
|
|
131
|
-
Any parameters passed to BigQuery query for substitution.
|
|
132
|
-
"""
|
|
133
|
-
|
|
134
|
-
project: str | None = None
|
|
135
|
-
dataset_location: str | None = None
|
|
136
|
-
params: GarfQueryParameters = dataclasses.field(default_factory=dict)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
@dataclasses.dataclass
|
|
140
|
-
class GarfSqlConfig(BaseConfig):
|
|
141
|
-
"""Stores values to run garf-sql from command line.
|
|
142
|
-
|
|
143
|
-
Attributes:
|
|
144
|
-
connection_string:
|
|
145
|
-
Connection string to SqlAlchemy database engine.
|
|
146
|
-
params:
|
|
147
|
-
Any parameters passed to SQL query for substitution.
|
|
148
|
-
"""
|
|
149
|
-
|
|
150
|
-
connection_string: str | None = None
|
|
151
|
-
params: GarfQueryParameters = dataclasses.field(default_factory=dict)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class ConfigBuilder:
|
|
155
|
-
"""Builds config of provided type.
|
|
156
|
-
|
|
157
|
-
Config can be created from file, build from arguments or both.
|
|
158
|
-
|
|
159
|
-
Attributes:
|
|
160
|
-
config: Concrete config class that needs to be built.
|
|
161
|
-
"""
|
|
162
|
-
|
|
163
|
-
_config_mapping: dict[str, BaseConfig] = {
|
|
164
|
-
'garf': GarfConfig,
|
|
165
|
-
'garf-bq': GarfBqConfig,
|
|
166
|
-
'garf-sql': GarfSqlConfig,
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
def __init__(self, config_type: str) -> None:
|
|
170
|
-
"""Sets concrete config type.
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
config_type: Type of config that should be built.
|
|
174
|
-
|
|
175
|
-
Raises:
|
|
176
|
-
GarfConfigException: When incorrect config_type is supplied.
|
|
177
|
-
"""
|
|
178
|
-
if config_type not in self._config_mapping:
|
|
179
|
-
raise GarfConfigException(f'Invalid config_type: {config_type}')
|
|
180
|
-
self._config_type = config_type
|
|
181
|
-
self.config = self._config_mapping.get(config_type)
|
|
182
|
-
|
|
183
|
-
def build(
|
|
184
|
-
self, parameters: dict[str, str], cli_named_args: Sequence[str]
|
|
185
|
-
) -> BaseConfig | None:
|
|
186
|
-
"""Builds config from file, build from arguments or both ways.
|
|
187
|
-
|
|
188
|
-
When there are both config_file and CLI arguments the latter have more
|
|
189
|
-
priority.
|
|
190
|
-
|
|
191
|
-
Args:
|
|
192
|
-
parameters: Parsed CLI arguments.
|
|
193
|
-
cli_named_args: Unparsed CLI args in a form `--key.subkey=value`.
|
|
194
|
-
|
|
195
|
-
Returns:
|
|
196
|
-
Concrete config with injected values.
|
|
197
|
-
"""
|
|
198
|
-
if not (garf_config_path := parameters.get('garf_config')):
|
|
199
|
-
return self._build_config(parameters, cli_named_args)
|
|
200
|
-
config_file = self._load_config(garf_config_path)
|
|
201
|
-
config_cli = self._build_config(
|
|
202
|
-
parameters, cli_named_args, init_defaults=False
|
|
203
|
-
)
|
|
204
|
-
if config_file and config_cli:
|
|
205
|
-
config_file = config_file + config_cli
|
|
206
|
-
return config_file
|
|
207
|
-
|
|
208
|
-
def _build_config(
|
|
209
|
-
self,
|
|
210
|
-
parameters: dict[str, str],
|
|
211
|
-
cli_named_args: Sequence[str],
|
|
212
|
-
init_defaults: bool = True,
|
|
213
|
-
) -> BaseConfig | None:
|
|
214
|
-
"""Builds config from named and unnamed CLI parameters.
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
parameters: Parsed CLI arguments.
|
|
218
|
-
cli_named_args: Unparsed CLI args in a form `--key.subkey=value`.
|
|
219
|
-
init_defaults: Whether to provided default config values if
|
|
220
|
-
expected parameter is missing
|
|
221
|
-
|
|
222
|
-
Returns:
|
|
223
|
-
Concrete config with injected values.
|
|
224
|
-
"""
|
|
225
|
-
output = parameters.get('output')
|
|
226
|
-
config_parameters = {
|
|
227
|
-
k: v for k, v in parameters.items() if k in self.config.__annotations__
|
|
228
|
-
}
|
|
229
|
-
cli_params = ParamsParser(['macro', 'template', output]).parse(
|
|
230
|
-
cli_named_args
|
|
231
|
-
)
|
|
232
|
-
cli_params = _remove_empty_values(cli_params)
|
|
233
|
-
if output and (writer_params := cli_params.get(output)):
|
|
234
|
-
_ = cli_params.pop(output)
|
|
235
|
-
config_parameters.update({'writer_params': writer_params})
|
|
236
|
-
if cli_params:
|
|
237
|
-
config_parameters.update({'params': cli_params})
|
|
238
|
-
if not config_parameters:
|
|
239
|
-
return None
|
|
240
|
-
if init_defaults:
|
|
241
|
-
return self.config.from_dict(config_parameters)
|
|
242
|
-
return self.config(**config_parameters)
|
|
243
|
-
|
|
244
|
-
def _load_config(self, garf_config_path: str) -> BaseConfig:
|
|
245
|
-
"""Loads config from provided path.
|
|
246
|
-
|
|
247
|
-
Args:
|
|
248
|
-
garf_config_path: Path to local or remote storage.
|
|
249
|
-
|
|
250
|
-
Returns:
|
|
251
|
-
Concreate config with values taken from config file.
|
|
252
|
-
|
|
253
|
-
Raises:
|
|
254
|
-
GarfConfigException:
|
|
255
|
-
If config file missing `garf` section.
|
|
256
|
-
"""
|
|
257
|
-
with smart_open.open(garf_config_path, encoding='utf-8') as f:
|
|
258
|
-
config = yaml.safe_load(f)
|
|
259
|
-
garf_section = config.get(self._config_type)
|
|
260
|
-
if not garf_section:
|
|
261
|
-
raise GarfConfigException(
|
|
262
|
-
f'Invalid config, must have `{self._config_type}` section!'
|
|
263
|
-
)
|
|
264
|
-
config_parameters = {
|
|
265
|
-
k: v for k, v in garf_section.items() if k in self.config.__annotations__
|
|
266
|
-
}
|
|
267
|
-
if params := garf_section.get('params', {}):
|
|
268
|
-
config_parameters.update({'params': params})
|
|
269
|
-
if writer_params := garf_section.get(garf_section.get('output', '')):
|
|
270
|
-
config_parameters.update({'writer_params': writer_params})
|
|
271
|
-
return self.config(**config_parameters)
|
|
272
|
-
|
|
273
26
|
|
|
274
27
|
class ParamsParser:
|
|
275
28
|
def __init__(self, identifiers: Sequence[str]) -> None:
|
|
@@ -298,6 +51,8 @@ class ParamsParser:
|
|
|
298
51
|
if not identifier or identifier not in key:
|
|
299
52
|
return None
|
|
300
53
|
provided_identifier, *keys = key.split('.')
|
|
54
|
+
if not keys:
|
|
55
|
+
return None
|
|
301
56
|
if len(keys) > 1:
|
|
302
57
|
raise GarfParamsException(
|
|
303
58
|
f'{key} is invalid format,'
|
|
@@ -306,9 +61,10 @@ class ParamsParser:
|
|
|
306
61
|
)
|
|
307
62
|
provided_identifier = provided_identifier.replace('--', '')
|
|
308
63
|
if provided_identifier not in self.identifiers:
|
|
64
|
+
supported_arguments = ', '.join(self.identifiers)
|
|
309
65
|
raise GarfParamsException(
|
|
310
66
|
f'CLI argument {provided_identifier} is not supported'
|
|
311
|
-
f', supported arguments {
|
|
67
|
+
f', supported arguments {supported_arguments}'
|
|
312
68
|
)
|
|
313
69
|
if provided_identifier != identifier:
|
|
314
70
|
return None
|
|
@@ -334,118 +90,18 @@ class GarfParamsException(Exception):
|
|
|
334
90
|
"""Defines exception for incorrect parameters."""
|
|
335
91
|
|
|
336
92
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
Date string in YYYY-MM-DD format.
|
|
342
|
-
|
|
343
|
-
Raises:
|
|
344
|
-
ValueError:
|
|
345
|
-
If dynamic lookback value (:YYYYMMDD-N) is incorrect.
|
|
346
|
-
"""
|
|
347
|
-
if isinstance(date_string, list) or date_string.find(':YYYY') == -1:
|
|
348
|
-
return date_string
|
|
349
|
-
current_date = datetime.date.today()
|
|
350
|
-
date_object = date_string.split('-')
|
|
351
|
-
base_date = date_object[0]
|
|
352
|
-
if len(date_object) == 2:
|
|
353
|
-
try:
|
|
354
|
-
days_ago = int(date_object[1])
|
|
355
|
-
except ValueError as e:
|
|
356
|
-
raise ValueError(
|
|
357
|
-
'Must provide numeric value for a number lookback period, '
|
|
358
|
-
'i.e. :YYYYMMDD-1'
|
|
359
|
-
) from e
|
|
360
|
-
else:
|
|
361
|
-
days_ago = 0
|
|
362
|
-
if base_date == ':YYYY':
|
|
363
|
-
new_date = datetime.datetime(current_date.year, 1, 1)
|
|
364
|
-
delta = relativedelta.relativedelta(years=days_ago)
|
|
365
|
-
elif base_date == ':YYYYMM':
|
|
366
|
-
new_date = datetime.datetime(current_date.year, current_date.month, 1)
|
|
367
|
-
delta = relativedelta.relativedelta(months=days_ago)
|
|
368
|
-
elif base_date == ':YYYYMMDD':
|
|
369
|
-
new_date = current_date
|
|
370
|
-
delta = relativedelta.relativedelta(days=days_ago)
|
|
371
|
-
return (new_date - delta).strftime('%Y-%m-%d')
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
class ConfigSaver:
|
|
375
|
-
def __init__(self, path: str) -> None:
|
|
376
|
-
self.path = path
|
|
377
|
-
|
|
378
|
-
def save(self, garf_config: BaseConfig):
|
|
379
|
-
if os.path.exists(self.path):
|
|
380
|
-
with smart_open.open(self.path, 'r', encoding='utf-8') as f:
|
|
381
|
-
config = yaml.safe_load(f)
|
|
382
|
-
else:
|
|
383
|
-
config = {}
|
|
384
|
-
config = self.prepare_config(config, garf_config)
|
|
385
|
-
with smart_open.open(self.path, 'w', encoding='utf-8') as f:
|
|
386
|
-
yaml.dump(
|
|
387
|
-
config, f, default_flow_style=False, sort_keys=False, encoding='utf-8'
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
def prepare_config(self, config: dict, garf_config: BaseConfig) -> dict:
|
|
391
|
-
garf = dataclasses.asdict(garf_config)
|
|
392
|
-
if isinstance(garf_config, GarfConfig):
|
|
393
|
-
garf[garf_config.output] = garf_config.writer_params
|
|
394
|
-
if not isinstance(garf_config.account, MutableSequence):
|
|
395
|
-
garf['account'] = garf_config.account.split(',')
|
|
396
|
-
del garf['writer_params']
|
|
397
|
-
garf = _remove_empty_values(garf)
|
|
398
|
-
config.update({'garf': garf})
|
|
399
|
-
if isinstance(garf_config, GarfBqConfig):
|
|
400
|
-
garf = _remove_empty_values(garf)
|
|
401
|
-
config.update({'garf-bq': garf})
|
|
402
|
-
if isinstance(garf_config, GarfSqlConfig):
|
|
403
|
-
garf = _remove_empty_values(garf)
|
|
404
|
-
config.update({'garf-sql': garf})
|
|
405
|
-
return config
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
def initialize_runtime_parameters(config: BaseConfig) -> BaseConfig:
|
|
409
|
-
"""Formats parameters and add common parameter in config.
|
|
410
|
-
|
|
411
|
-
Initialization identifies whether there are `date` parameters and performs
|
|
412
|
-
necessary date conversions.
|
|
413
|
-
Set of parameters that need to be generally available are injected into
|
|
414
|
-
every parameter section of the config.
|
|
415
|
-
|
|
416
|
-
Args:
|
|
417
|
-
config: Instantiated config.
|
|
418
|
-
|
|
419
|
-
Returns:
|
|
420
|
-
Config with formatted parameters.
|
|
421
|
-
"""
|
|
422
|
-
common_params = query_editor.CommonParametersMixin().common_params
|
|
423
|
-
for key, param in config.params.items():
|
|
424
|
-
for key_param, value_param in param.items():
|
|
425
|
-
config.params[key][key_param] = convert_date(value_param)
|
|
426
|
-
for common_param_key, common_param_value in common_params.items():
|
|
427
|
-
if common_param_key not in config.params[key]:
|
|
428
|
-
config.params[key][common_param_key] = common_param_value
|
|
429
|
-
return config
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
def _remove_empty_values(dict_object: dict[str, Any]) -> dict[str, Any]:
|
|
433
|
-
"""Remove all empty elements: strings, dictionaries from a dictionary."""
|
|
434
|
-
if isinstance(dict_object, dict):
|
|
435
|
-
return {
|
|
436
|
-
key: value
|
|
437
|
-
for key, value in (
|
|
438
|
-
(key, _remove_empty_values(value)) for key, value in dict_object.items()
|
|
439
|
-
)
|
|
440
|
-
if value
|
|
441
|
-
}
|
|
442
|
-
if isinstance(dict_object, (int, str, MutableSequence)):
|
|
443
|
-
return dict_object
|
|
93
|
+
class LoggerEnum(str, enum.Enum):
|
|
94
|
+
local = 'local'
|
|
95
|
+
rich = 'rich'
|
|
96
|
+
gcloud = 'gcloud'
|
|
444
97
|
|
|
445
98
|
|
|
446
99
|
def init_logging(
|
|
447
|
-
loglevel: str = 'INFO',
|
|
100
|
+
loglevel: str = 'INFO',
|
|
101
|
+
logger_type: str | LoggerEnum = 'local',
|
|
102
|
+
name: str = __name__,
|
|
448
103
|
) -> logging.Logger:
|
|
104
|
+
loglevel = getattr(logging, loglevel)
|
|
449
105
|
if logger_type == 'rich':
|
|
450
106
|
logging.basicConfig(
|
|
451
107
|
format='%(message)s',
|
|
@@ -455,6 +111,23 @@ def init_logging(
|
|
|
455
111
|
rich_logging.RichHandler(rich_tracebacks=True),
|
|
456
112
|
],
|
|
457
113
|
)
|
|
114
|
+
elif logger_type == 'gcloud':
|
|
115
|
+
try:
|
|
116
|
+
import google.cloud.logging as glogging
|
|
117
|
+
except ImportError as e:
|
|
118
|
+
raise ImportError(
|
|
119
|
+
'Please install garf-executors with Cloud logging support - '
|
|
120
|
+
'`pip install garf-executors[bq]`'
|
|
121
|
+
) from e
|
|
122
|
+
|
|
123
|
+
client = glogging.Client()
|
|
124
|
+
handler = glogging.handlers.CloudLoggingHandler(client, name=name)
|
|
125
|
+
handler.close()
|
|
126
|
+
glogging.handlers.setup_logging(handler, log_level=loglevel)
|
|
127
|
+
logging.basicConfig(
|
|
128
|
+
level=loglevel,
|
|
129
|
+
handlers=[handler],
|
|
130
|
+
)
|
|
458
131
|
else:
|
|
459
132
|
logging.basicConfig(
|
|
460
133
|
format='[%(asctime)s][%(name)s][%(levelname)s] %(message)s',
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# pylint: disable=C0330, g-bad-import-order, g-multiple-import
|
|
16
|
+
|
|
17
|
+
"""Captures parameters for fetching data from APIs."""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import pathlib
|
|
23
|
+
|
|
24
|
+
import pydantic
|
|
25
|
+
import smart_open
|
|
26
|
+
import yaml
|
|
27
|
+
from garf_core import query_editor
|
|
28
|
+
from garf_io import writer
|
|
29
|
+
from garf_io.writers import abs_writer
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ExecutionContext(pydantic.BaseModel):
|
|
33
|
+
"""Common context for executing one or more queries.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
query_parameters: Parameters to dynamically change query text.
|
|
37
|
+
fetcher_parameters: Parameters to specify fetching setup.
|
|
38
|
+
writer: Type of writer to use.
|
|
39
|
+
writer_parameters: Optional parameters to setup writer.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
|
|
43
|
+
default_factory=dict
|
|
44
|
+
)
|
|
45
|
+
fetcher_parameters: dict[str, str | bool | int | list[str | int]] | None = (
|
|
46
|
+
pydantic.Field(default_factory=dict)
|
|
47
|
+
)
|
|
48
|
+
writer: str | None = None
|
|
49
|
+
writer_parameters: dict[str, str] | None = pydantic.Field(
|
|
50
|
+
default_factory=dict
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def model_post_init(self, __context__) -> None:
|
|
54
|
+
if self.fetcher_parameters is None:
|
|
55
|
+
self.fetcher_parameters = {}
|
|
56
|
+
if self.writer_parameters is None:
|
|
57
|
+
self.writer_parameters = {}
|
|
58
|
+
if not self.query_parameters:
|
|
59
|
+
self.query_parameters = query_editor.GarfQueryParameters()
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_file(
|
|
63
|
+
cls, path: str | pathlib.Path | os.PathLike[str]
|
|
64
|
+
) -> ExecutionContext:
|
|
65
|
+
"""Builds context from local or remote yaml file."""
|
|
66
|
+
with smart_open.open(path, 'r', encoding='utf-8') as f:
|
|
67
|
+
data = yaml.safe_load(f)
|
|
68
|
+
return ExecutionContext(**data)
|
|
69
|
+
|
|
70
|
+
def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
|
|
71
|
+
"""Saves context to local or remote yaml file."""
|
|
72
|
+
with smart_open.open(path, 'w', encoding='utf-8') as f:
|
|
73
|
+
yaml.dump(self.model_dump(), f, encoding='utf-8')
|
|
74
|
+
return f'ExecutionContext is saved to {str(path)}'
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def writer_client(self) -> abs_writer.AbsWriter:
|
|
78
|
+
writer_client = writer.create_writer(self.writer, **self.writer_parameters)
|
|
79
|
+
if self.writer == 'bq':
|
|
80
|
+
_ = writer_client.create_or_get_dataset()
|
|
81
|
+
if self.writer == 'sheet':
|
|
82
|
+
writer_client.init_client()
|
|
83
|
+
return writer_client
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Defines common functionality between executors."""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
|
|
19
|
+
from opentelemetry import trace
|
|
20
|
+
|
|
21
|
+
from garf_executors import execution_context
|
|
22
|
+
from garf_executors.telemetry import tracer
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Executor:
|
|
26
|
+
"""Defines common functionality between executors."""
|
|
27
|
+
|
|
28
|
+
@tracer.start_as_current_span('api.execute_batch')
|
|
29
|
+
def execute_batch(
|
|
30
|
+
self,
|
|
31
|
+
batch: dict[str, str],
|
|
32
|
+
context: execution_context.ExecutionContext,
|
|
33
|
+
parallel_threshold: int = 10,
|
|
34
|
+
) -> list[str]:
|
|
35
|
+
"""Executes batch of queries for a common context.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
batch: Mapping between query_title and its text.
|
|
39
|
+
context: Execution context.
|
|
40
|
+
parallel_threshold: Number of queries to execute in parallel.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Results of execution.
|
|
44
|
+
"""
|
|
45
|
+
span = trace.get_current_span()
|
|
46
|
+
span.set_attribute('api.parallel_threshold', parallel_threshold)
|
|
47
|
+
return asyncio.run(
|
|
48
|
+
self._run(
|
|
49
|
+
batch=batch, context=context, parallel_threshold=parallel_threshold
|
|
50
|
+
)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
async def aexecute(
|
|
54
|
+
self,
|
|
55
|
+
query: str,
|
|
56
|
+
title: str,
|
|
57
|
+
context: execution_context.ExecutionContext,
|
|
58
|
+
) -> str:
|
|
59
|
+
"""Performs query execution asynchronously.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
query: Location of the query.
|
|
63
|
+
title: Name of the query.
|
|
64
|
+
context: Query execution context.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Result of writing the report.
|
|
68
|
+
"""
|
|
69
|
+
return await asyncio.to_thread(self.execute, query, title, context)
|
|
70
|
+
|
|
71
|
+
async def _run(
|
|
72
|
+
self,
|
|
73
|
+
batch: dict[str, str],
|
|
74
|
+
context: execution_context.ExecutionContext,
|
|
75
|
+
parallel_threshold: int,
|
|
76
|
+
):
|
|
77
|
+
semaphore = asyncio.Semaphore(value=parallel_threshold)
|
|
78
|
+
|
|
79
|
+
async def run_with_semaphore(fn):
|
|
80
|
+
async with semaphore:
|
|
81
|
+
return await fn
|
|
82
|
+
|
|
83
|
+
tasks = [
|
|
84
|
+
self.aexecute(query=query, title=title, context=context)
|
|
85
|
+
for title, query in batch.items()
|
|
86
|
+
]
|
|
87
|
+
return await asyncio.gather(*(run_with_semaphore(task) for task in tasks))
|