garf-executors 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of garf-executors might be problematic. Click here for more details.

@@ -0,0 +1,26 @@
1
+ # Copyright 2024 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Defines simplified import of executors.
15
+
16
+ Instead of importing `garf_executors.api_executor.ApiQueryExecutor`
17
+ import like this `garf_executors.ApiQueryExecutor`
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from garf_executors.api_executor import ApiQueryExecutor
23
+
24
+ __all__ = [
25
+ 'ApiQueryExecutor',
26
+ ]
@@ -0,0 +1,98 @@
1
+ # Copyright 2024 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Module for executing Garf queries and writing them to local/remote.
15
+
16
+ ApiQueryExecutor performs fetching data from API in a form of
17
+ GarfReport and saving it to local/remote storage.
18
+ """
19
+ # pylint: disable=C0330, g-bad-import-order, g-multiple-import
20
+
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+
25
+ from garf_core import report_fetcher
26
+ from garf_io.writers import abs_writer, console_writer
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class ApiQueryExecutor:
32
+ """Gets data from API and writes them to local/remote storage.
33
+
34
+ Attributes:
35
+ api_client: a client used for connecting to API.
36
+ """
37
+
38
+ def __init__(self, fetcher: report_fetcher.ApiReportFetcher) -> None:
39
+ """Initializes QueryExecutor.
40
+
41
+ Args:
42
+ fetcher: Instantiated report fetcher.
43
+ """
44
+ self.fetcher = fetcher
45
+
46
+ async def aexecute(
47
+ self,
48
+ query_text: str,
49
+ query_name: str,
50
+ writer_client: abs_writer.AbsWriter = console_writer.ConsoleWriter(),
51
+ args: dict[str, str] | None = None,
52
+ **kwargs: str,
53
+ ) -> None:
54
+ """Reads query, extract results and stores them in a specified location.
55
+
56
+ Args:
57
+ query_text: Text for the query.
58
+ query_name: Identifier of a query.
59
+ customer_ids: All accounts for which query will be executed.
60
+ writer_client: Client responsible for writing data to local/remote
61
+ location.
62
+ args: Arguments that need to be passed to the query.
63
+ optimize_performance: strategy for speeding up query execution
64
+ ("NONE", "PROTOBUF", "BATCH", "BATCH_PROTOBUF").
65
+ """
66
+ self.execute(query_text, query_name, writer_client, args, **kwargs)
67
+
68
+ def execute(
69
+ self,
70
+ query_text: str,
71
+ query_name: str,
72
+ writer_client: abs_writer.AbsWriter = console_writer.ConsoleWriter(),
73
+ args: dict[str, str] | None = None,
74
+ **kwargs: str,
75
+ ) -> None:
76
+ """Reads query, extract results and stores them in a specified location.
77
+
78
+ Args:
79
+ query_text: Text for the query.
80
+ query_name: Identifier of a query.
81
+ writer_client: Client responsible for writing data to local/remote
82
+ location.
83
+ args: Arguments that need to be passed to the query.
84
+ """
85
+ results = self.fetcher.fetch(
86
+ query_specification=query_text, args=args, **kwargs
87
+ )
88
+ logger.debug(
89
+ 'Start writing data for query %s via %s writer',
90
+ query_name,
91
+ type(writer_client),
92
+ )
93
+ writer_client.write(results, query_name)
94
+ logger.debug(
95
+ 'Finish writing data for query %s via %s writer',
96
+ query_name,
97
+ type(writer_client),
98
+ )
@@ -0,0 +1,123 @@
1
+ # Copyright 2024 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Module for executing queries in BigQuery."""
15
+
16
+ from __future__ import annotations
17
+
18
+ try:
19
+ from google.cloud import bigquery # type: ignore
20
+ except ImportError as e:
21
+ raise ImportError(
22
+ 'Please install garf-executors with BigQuery support '
23
+ '- `pip install garf-executors[bq]`'
24
+ ) from e
25
+
26
+ import logging
27
+
28
+ import pandas as pd
29
+ from google.cloud import exceptions as google_cloud_exceptions
30
+
31
+ from garf_core import query_editor
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class BigQueryExecutorError(Exception):
37
+ """Error when executor fails to run query."""
38
+
39
+
40
+ class BigQueryExecutor(query_editor.TemplateProcessorMixin):
41
+ """Handles query execution in BigQuery.
42
+
43
+ Attributes:
44
+ project_id: Google Cloud project id.
45
+ location: BigQuery dataset location.
46
+ client: BigQuery client.
47
+ """
48
+
49
+ def __init__(self, project_id: str, location: str | None = None) -> None:
50
+ """Initializes BigQueryExecutor.
51
+
52
+ Args:
53
+ project_id: Google Cloud project id.
54
+ location: BigQuery dataset location.
55
+ """
56
+ self.project_id = project_id
57
+ self.location = location
58
+
59
+ @property
60
+ def client(self) -> bigquery.Client:
61
+ """Instantiates bigquery client."""
62
+ return bigquery.Client(self.project_id)
63
+
64
+ def execute(
65
+ self, script_name: str, query_text: str, params: dict | None = None
66
+ ) -> pd.DataFrame:
67
+ """Executes query in BigQuery.
68
+
69
+ Args:
70
+ script_name: Script identifier.
71
+ query_text: Query to be executed.
72
+ params: Optional parameters to be replaced in query text.
73
+
74
+ Returns:
75
+ DataFrame if query returns some data otherwise empty DataFrame.
76
+ """
77
+ query_text = self.replace_params_template(query_text, params)
78
+ job = self.client.query(query_text)
79
+ try:
80
+ result = job.result()
81
+ logger.debug('%s launched successfully', script_name)
82
+ if result.total_rows:
83
+ return result.to_dataframe()
84
+ return pd.DataFrame()
85
+ except google_cloud_exceptions.GoogleCloudError as e:
86
+ raise BigQueryExecutorError(e) from e
87
+
88
+ def create_datasets(self, macros: dict | None) -> None:
89
+ """Creates datasets in BQ based on values in a dict.
90
+
91
+ If dict contains keys with 'dataset' in them, then values for such keys
92
+ are treated as dataset names.
93
+
94
+ Args:
95
+ macros: Mapping containing data for query execution.
96
+ """
97
+ if macros and (datasets := extract_datasets(macros)):
98
+ for dataset in datasets:
99
+ dataset_id = f'{self.project_id}.{dataset}'
100
+ try:
101
+ self.client.get_dataset(dataset_id)
102
+ except google_cloud_exceptions.NotFound:
103
+ bq_dataset = bigquery.Dataset(dataset_id)
104
+ bq_dataset.location = self.location
105
+ self.client.create_dataset(bq_dataset, timeout=30)
106
+ logger.debug('Created new dataset %s', dataset_id)
107
+
108
+
109
+ def extract_datasets(macros: dict | None) -> list[str]:
110
+ """Finds dataset-related keys based on values in a dict.
111
+
112
+ If dict contains keys with 'dataset' in them, then values for such keys
113
+ are treated as dataset names.
114
+
115
+ Args:
116
+ macros: Mapping containing data for query execution.
117
+
118
+ Returns:
119
+ Possible names of datasets.
120
+ """
121
+ if not macros:
122
+ return []
123
+ return [value for macro, value in macros.items() if 'dataset' in macro]
File without changes
File without changes
@@ -0,0 +1,213 @@
1
+ # Copyright 2022 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ """Module for defing `garf` CLI utility.
14
+
15
+ `garf` allows to execute GAQL queries and store results in local/remote
16
+ storage.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import functools
23
+ import sys
24
+ from collections.abc import MutableSequence
25
+ from concurrent import futures
26
+ from pathlib import Path
27
+
28
+ import smart_open
29
+ import yaml
30
+ from garf import api_clients, exceptions, query_executor
31
+ from garf.cli import utils
32
+ from garf.io import reader, writer
33
+
34
+
35
+ def main():
36
+ parser = argparse.ArgumentParser()
37
+ parser.add_argument('query', nargs='*')
38
+ parser.add_argument('-c', '--config', dest='garf_config', default=None)
39
+ parser.add_argument('--account', dest='account', default=None)
40
+ parser.add_argument('--output', dest='output', default=None)
41
+ parser.add_argument('--input', dest='input', default='file')
42
+ parser.add_argument(
43
+ '--ads-config', dest='config', default=str(Path.home() / 'google-ads.yaml')
44
+ )
45
+ parser.add_argument('--api-version', dest='api_version', default=None)
46
+ parser.add_argument('--log', '--loglevel', dest='loglevel', default='info')
47
+ parser.add_argument('--logger', dest='logger', default='local')
48
+ parser.add_argument(
49
+ '--customer-ids-query', dest='customer_ids_query', default=None
50
+ )
51
+ parser.add_argument(
52
+ '--customer-ids-query-file', dest='customer_ids_query_file', default=None
53
+ )
54
+ parser.add_argument('--save-config', dest='save_config', action='store_true')
55
+ parser.add_argument(
56
+ '--no-save-config', dest='save_config', action='store_false'
57
+ )
58
+ parser.add_argument(
59
+ '--config-destination', dest='save_config_dest', default='config.yaml'
60
+ )
61
+ parser.add_argument(
62
+ '--parallel-queries', dest='parallel_queries', action='store_true'
63
+ )
64
+ parser.add_argument(
65
+ '--no-parallel-queries', dest='parallel_queries', action='store_false'
66
+ )
67
+ parser.add_argument(
68
+ '--optimize-performance', dest='optimize_performance', default='NONE'
69
+ )
70
+ parser.add_argument('--dry-run', dest='dry_run', action='store_true')
71
+ parser.add_argument(
72
+ '--disable-account-expansion',
73
+ dest='disable_account_expansion',
74
+ action='store_true',
75
+ )
76
+ parser.add_argument('-v', '--version', dest='version', action='store_true')
77
+ parser.add_argument(
78
+ '--parallel-threshold', dest='parallel_threshold', default=None, type=int
79
+ )
80
+ parser.set_defaults(save_config=False)
81
+ parser.set_defaults(parallel_queries=True)
82
+ parser.set_defaults(dry_run=False)
83
+ parser.set_defaults(disable_account_expansion=False)
84
+ args = parser.parse_known_args()
85
+ main_args = args[0]
86
+
87
+ if main_args.version:
88
+ import pkg_resources
89
+
90
+ version = pkg_resources.require('google-ads-api-report-fetcher')[0].version
91
+ print(f'garf version {version}')
92
+ sys.exit()
93
+
94
+ logger = utils.init_logging(
95
+ loglevel=main_args.loglevel.upper(), logger_type=main_args.logger
96
+ )
97
+ if not main_args.query:
98
+ logger.error('Please provide one or more queries to run')
99
+ raise exceptions.GarfMissingQueryException(
100
+ 'Please provide one or more queries to run'
101
+ )
102
+
103
+ with smart_open.open(main_args.config, 'r', encoding='utf-8') as f:
104
+ google_ads_config_dict = yaml.safe_load(f)
105
+
106
+ config = utils.ConfigBuilder('garf').build(vars(main_args), args[1])
107
+ if not config.account:
108
+ if mcc := google_ads_config_dict.get('login_customer_id'):
109
+ config.account = str(mcc)
110
+ else:
111
+ raise exceptions.GarfMissingAccountException(
112
+ 'No account found, please specify via --account CLI flag'
113
+ 'or add as login_customer_id in google-ads.yaml'
114
+ )
115
+ logger.debug('config: %s', config)
116
+
117
+ if main_args.save_config and not main_args.garf_config:
118
+ utils.ConfigSaver(main_args.save_config_dest).save(config)
119
+ if main_args.dry_run:
120
+ sys.exit()
121
+
122
+ if config.params:
123
+ config = utils.initialize_runtime_parameters(config)
124
+ logger.debug('initialized config: %s', config)
125
+
126
+ ads_client = api_clients.GoogleAdsApiClient(
127
+ config_dict=google_ads_config_dict,
128
+ version=config.api_version,
129
+ use_proto_plus=main_args.optimize_performance
130
+ not in ('PROTOBUF', 'BATCH_PROTOBUF'),
131
+ )
132
+ ads_query_executor = query_executor.AdsQueryExecutor(ads_client)
133
+ reader_factory = reader.ReaderFactory()
134
+ reader_client = reader_factory.create_reader(main_args.input)
135
+
136
+ if config.customer_ids_query:
137
+ customer_ids_query = config.customer_ids_query
138
+ elif config.customer_ids_query_file:
139
+ file_reader = reader_factory.create_reader('file')
140
+ customer_ids_query = file_reader.read(config.customer_ids_query_file)
141
+ else:
142
+ customer_ids_query = None
143
+
144
+ if main_args.disable_account_expansion:
145
+ logger.info(
146
+ 'Skipping account expansion because of ' 'disable_account_expansion flag'
147
+ )
148
+ customer_ids = (
149
+ config.account
150
+ if isinstance(config.account, MutableSequence)
151
+ else [config.account]
152
+ )
153
+ else:
154
+ customer_ids = ads_query_executor.expand_mcc(
155
+ config.account, customer_ids_query
156
+ )
157
+ if not customer_ids:
158
+ logger.warning(
159
+ 'Not a single under MCC %s is found that satisfies '
160
+ 'the following customer_id query: "%s"',
161
+ config.account,
162
+ customer_ids_query,
163
+ )
164
+ sys.exit()
165
+ writer_client = writer.WriterFactory().create_writer(
166
+ config.output, **config.writer_params
167
+ )
168
+ if config.output == 'bq':
169
+ _ = writer_client.create_or_get_dataset()
170
+ if config.output == 'sheet':
171
+ writer_client.init_client()
172
+
173
+ logger.info(
174
+ 'Total number of customer_ids is %d, accounts=[%s]',
175
+ len(customer_ids),
176
+ ','.join(map(str, customer_ids)),
177
+ )
178
+
179
+ if main_args.parallel_queries:
180
+ logger.info('Running queries in parallel')
181
+ with futures.ThreadPoolExecutor(main_args.parallel_threshold) as executor:
182
+ future_to_query = {
183
+ executor.submit(
184
+ ads_query_executor.execute,
185
+ reader_client.read(query),
186
+ query,
187
+ customer_ids,
188
+ writer_client,
189
+ config.params,
190
+ main_args.optimize_performance,
191
+ ): query
192
+ for query in main_args.query
193
+ }
194
+ for future in futures.as_completed(future_to_query):
195
+ query = future_to_query[future]
196
+ utils.garf_runner(query, future.result, logger)
197
+ else:
198
+ logger.info('Running queries sequentially')
199
+ for query in main_args.query:
200
+ callback = functools.partial(
201
+ ads_query_executor.execute,
202
+ reader_client.read(query),
203
+ query,
204
+ customer_ids,
205
+ writer_client,
206
+ config.params,
207
+ main_args.optimize_performance,
208
+ )
209
+ utils.garf_runner(query, callback, logger)
210
+
211
+
212
+ if __name__ == '__main__':
213
+ main()
@@ -0,0 +1,112 @@
1
+ # Copyright 2022 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Module for defing `garf-bq` CLI utility.
15
+
16
+ `garf-bq` allows to execute BigQuery queries based on Garf config.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import functools
23
+ import sys
24
+ from concurrent import futures
25
+
26
+ from garf_writers import reader # type: ignore
27
+
28
+ from garf_executors import bq_executor
29
+ from garf_executors.entrypoints import utils
30
+
31
+
32
+ def main():
33
+ parser = argparse.ArgumentParser()
34
+ parser.add_argument('query', nargs='+')
35
+ parser.add_argument('-c', '--config', dest='garf_config', default=None)
36
+ parser.add_argument('--project', dest='project')
37
+ parser.add_argument(
38
+ '--dataset-location', dest='dataset_location', default=None
39
+ )
40
+ parser.add_argument('--save-config', dest='save_config', action='store_true')
41
+ parser.add_argument(
42
+ '--no-save-config', dest='save_config', action='store_false'
43
+ )
44
+ parser.add_argument(
45
+ '--config-destination', dest='save_config_dest', default='config.yaml'
46
+ )
47
+ parser.add_argument('--log', '--loglevel', dest='loglevel', default='info')
48
+ parser.add_argument('--logger', dest='logger', default='local')
49
+ parser.add_argument('--dry-run', dest='dry_run', action='store_true')
50
+ parser.add_argument(
51
+ '--parallel-queries', dest='parallel_queries', action='store_true'
52
+ )
53
+ parser.add_argument(
54
+ '--no-parallel-queries', dest='parallel_queries', action='store_false'
55
+ )
56
+ parser.add_argument(
57
+ '--parallel-threshold', dest='parallel_threshold', default=None, type=int
58
+ )
59
+ parser.set_defaults(save_config=False)
60
+ parser.set_defaults(dry_run=False)
61
+ parser.set_defaults(parallel_queries=True)
62
+ args = parser.parse_known_args()
63
+ main_args = args[0]
64
+
65
+ logger = utils.init_logging(
66
+ loglevel=main_args.loglevel.upper(), logger_type=main_args.logger
67
+ )
68
+ config = utils.ConfigBuilder('garf-bq').build(vars(main_args), args[1])
69
+ logger.debug('config: %s', config)
70
+ if main_args.save_config and not main_args.garf_config:
71
+ utils.ConfigSaver(main_args.save_config_dest).save(config)
72
+ if main_args.dry_run:
73
+ sys.exit()
74
+
75
+ config = utils.initialize_runtime_parameters(config)
76
+ logger.debug('initialized config: %s', config)
77
+
78
+ bigquery_executor = bq_executor.BigQueryExecutor(
79
+ project_id=config.project, location=config.dataset_location
80
+ )
81
+ bigquery_executor.create_datasets(config.params.get('macro'))
82
+
83
+ reader_client = reader.FileReader()
84
+
85
+ if main_args.parallel_queries:
86
+ logger.info('Running queries in parallel')
87
+ with futures.ThreadPoolExecutor(
88
+ max_workers=main_args.parallel_threshold
89
+ ) as executor:
90
+ future_to_query = {
91
+ executor.submit(
92
+ bigquery_executor.execute,
93
+ query,
94
+ reader_client.read(query),
95
+ config.params,
96
+ ): query
97
+ for query in sorted(main_args.query)
98
+ }
99
+ for future in futures.as_completed(future_to_query):
100
+ query = future_to_query[future]
101
+ utils.postprocessor_runner(query, future.result, logger)
102
+ else:
103
+ logger.info('Running queries sequentially')
104
+ for query in sorted(main_args.query):
105
+ callback = functools.partial(
106
+ executor.execute, query, reader_client.read(query), config.params
107
+ )
108
+ utils.postprocessor_runner(query, callback, logger)
109
+
110
+
111
+ if __name__ == '__main__':
112
+ main()