garf-executors 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of garf-executors might be problematic. Click here for more details.
- garf_executors-0.0.1/PKG-INFO +30 -0
- garf_executors-0.0.1/README.md +1 -0
- garf_executors-0.0.1/garf_executors/__init__.py +26 -0
- garf_executors-0.0.1/garf_executors/api_executor.py +98 -0
- garf_executors-0.0.1/garf_executors/bq_executor.py +123 -0
- garf_executors-0.0.1/garf_executors/entrypoints/__init__.py +0 -0
- garf_executors-0.0.1/garf_executors/entrypoints/cli/__init__.py +0 -0
- garf_executors-0.0.1/garf_executors/entrypoints/cli/api.py +213 -0
- garf_executors-0.0.1/garf_executors/entrypoints/cli/bq.py +112 -0
- garf_executors-0.0.1/garf_executors/entrypoints/cli/gaarf.py +213 -0
- garf_executors-0.0.1/garf_executors/entrypoints/cli/sql.py +109 -0
- garf_executors-0.0.1/garf_executors/entrypoints/utils.py +470 -0
- garf_executors-0.0.1/garf_executors/sql_executor.py +79 -0
- garf_executors-0.0.1/garf_executors.egg-info/PKG-INFO +30 -0
- garf_executors-0.0.1/garf_executors.egg-info/SOURCES.txt +19 -0
- garf_executors-0.0.1/garf_executors.egg-info/dependency_links.txt +1 -0
- garf_executors-0.0.1/garf_executors.egg-info/entry_points.txt +3 -0
- garf_executors-0.0.1/garf_executors.egg-info/requires.txt +13 -0
- garf_executors-0.0.1/garf_executors.egg-info/top_level.txt +1 -0
- garf_executors-0.0.1/pyproject.toml +46 -0
- garf_executors-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: garf-executors
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
|
+
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>
|
|
6
|
+
License: Apache 2.0
|
|
7
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
17
|
+
Requires-Python: >=3.8
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: garf-core
|
|
20
|
+
Requires-Dist: garf-io
|
|
21
|
+
Provides-Extra: bq
|
|
22
|
+
Requires-Dist: garf-io[bq]; extra == "bq"
|
|
23
|
+
Requires-Dist: pandas; extra == "bq"
|
|
24
|
+
Provides-Extra: sql
|
|
25
|
+
Requires-Dist: garf-io[sqlalchemy]; extra == "sql"
|
|
26
|
+
Requires-Dist: pandas; extra == "sql"
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: garf-executors[bq,sql]; extra == "all"
|
|
29
|
+
|
|
30
|
+
# Gaarf Executors
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Gaarf Executors
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 2024 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Defines simplified import of executors.
|
|
15
|
+
|
|
16
|
+
Instead of importing `garf_executors.api_executor.ApiQueryExecutor`
|
|
17
|
+
import like this `garf_executors.ApiQueryExecutor`
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from garf_executors.api_executor import ApiQueryExecutor
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
'ApiQueryExecutor',
|
|
26
|
+
]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Copyright 2024 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Module for executing Garf queries and writing them to local/remote.
|
|
15
|
+
|
|
16
|
+
ApiQueryExecutor performs fetching data from API in a form of
|
|
17
|
+
GarfReport and saving it to local/remote storage.
|
|
18
|
+
"""
|
|
19
|
+
# pylint: disable=C0330, g-bad-import-order, g-multiple-import
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
|
|
25
|
+
from garf_core import report_fetcher
|
|
26
|
+
from garf_io.writers import abs_writer, console_writer
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ApiQueryExecutor:
|
|
32
|
+
"""Gets data from API and writes them to local/remote storage.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
api_client: a client used for connecting to API.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, fetcher: report_fetcher.ApiReportFetcher) -> None:
|
|
39
|
+
"""Initializes QueryExecutor.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
fetcher: Instantiated report fetcher.
|
|
43
|
+
"""
|
|
44
|
+
self.fetcher = fetcher
|
|
45
|
+
|
|
46
|
+
async def aexecute(
|
|
47
|
+
self,
|
|
48
|
+
query_text: str,
|
|
49
|
+
query_name: str,
|
|
50
|
+
writer_client: abs_writer.AbsWriter = console_writer.ConsoleWriter(),
|
|
51
|
+
args: dict[str, str] | None = None,
|
|
52
|
+
**kwargs: str,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Reads query, extract results and stores them in a specified location.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
query_text: Text for the query.
|
|
58
|
+
query_name: Identifier of a query.
|
|
59
|
+
customer_ids: All accounts for which query will be executed.
|
|
60
|
+
writer_client: Client responsible for writing data to local/remote
|
|
61
|
+
location.
|
|
62
|
+
args: Arguments that need to be passed to the query.
|
|
63
|
+
optimize_performance: strategy for speeding up query execution
|
|
64
|
+
("NONE", "PROTOBUF", "BATCH", "BATCH_PROTOBUF").
|
|
65
|
+
"""
|
|
66
|
+
self.execute(query_text, query_name, writer_client, args, **kwargs)
|
|
67
|
+
|
|
68
|
+
def execute(
|
|
69
|
+
self,
|
|
70
|
+
query_text: str,
|
|
71
|
+
query_name: str,
|
|
72
|
+
writer_client: abs_writer.AbsWriter = console_writer.ConsoleWriter(),
|
|
73
|
+
args: dict[str, str] | None = None,
|
|
74
|
+
**kwargs: str,
|
|
75
|
+
) -> None:
|
|
76
|
+
"""Reads query, extract results and stores them in a specified location.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
query_text: Text for the query.
|
|
80
|
+
query_name: Identifier of a query.
|
|
81
|
+
writer_client: Client responsible for writing data to local/remote
|
|
82
|
+
location.
|
|
83
|
+
args: Arguments that need to be passed to the query.
|
|
84
|
+
"""
|
|
85
|
+
results = self.fetcher.fetch(
|
|
86
|
+
query_specification=query_text, args=args, **kwargs
|
|
87
|
+
)
|
|
88
|
+
logger.debug(
|
|
89
|
+
'Start writing data for query %s via %s writer',
|
|
90
|
+
query_name,
|
|
91
|
+
type(writer_client),
|
|
92
|
+
)
|
|
93
|
+
writer_client.write(results, query_name)
|
|
94
|
+
logger.debug(
|
|
95
|
+
'Finish writing data for query %s via %s writer',
|
|
96
|
+
query_name,
|
|
97
|
+
type(writer_client),
|
|
98
|
+
)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# Copyright 2024 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Module for executing queries in BigQuery."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from google.cloud import bigquery # type: ignore
|
|
20
|
+
except ImportError as e:
|
|
21
|
+
raise ImportError(
|
|
22
|
+
'Please install garf-executors with BigQuery support '
|
|
23
|
+
'- `pip install garf-executors[bq]`'
|
|
24
|
+
) from e
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
|
|
28
|
+
import pandas as pd
|
|
29
|
+
from google.cloud import exceptions as google_cloud_exceptions
|
|
30
|
+
|
|
31
|
+
from garf_core import query_editor
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class BigQueryExecutorError(Exception):
|
|
37
|
+
"""Error when executor fails to run query."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class BigQueryExecutor(query_editor.TemplateProcessorMixin):
|
|
41
|
+
"""Handles query execution in BigQuery.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
project_id: Google Cloud project id.
|
|
45
|
+
location: BigQuery dataset location.
|
|
46
|
+
client: BigQuery client.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, project_id: str, location: str | None = None) -> None:
|
|
50
|
+
"""Initializes BigQueryExecutor.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
project_id: Google Cloud project id.
|
|
54
|
+
location: BigQuery dataset location.
|
|
55
|
+
"""
|
|
56
|
+
self.project_id = project_id
|
|
57
|
+
self.location = location
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def client(self) -> bigquery.Client:
|
|
61
|
+
"""Instantiates bigquery client."""
|
|
62
|
+
return bigquery.Client(self.project_id)
|
|
63
|
+
|
|
64
|
+
def execute(
|
|
65
|
+
self, script_name: str, query_text: str, params: dict | None = None
|
|
66
|
+
) -> pd.DataFrame:
|
|
67
|
+
"""Executes query in BigQuery.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
script_name: Script identifier.
|
|
71
|
+
query_text: Query to be executed.
|
|
72
|
+
params: Optional parameters to be replaced in query text.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
DataFrame if query returns some data otherwise empty DataFrame.
|
|
76
|
+
"""
|
|
77
|
+
query_text = self.replace_params_template(query_text, params)
|
|
78
|
+
job = self.client.query(query_text)
|
|
79
|
+
try:
|
|
80
|
+
result = job.result()
|
|
81
|
+
logger.debug('%s launched successfully', script_name)
|
|
82
|
+
if result.total_rows:
|
|
83
|
+
return result.to_dataframe()
|
|
84
|
+
return pd.DataFrame()
|
|
85
|
+
except google_cloud_exceptions.GoogleCloudError as e:
|
|
86
|
+
raise BigQueryExecutorError(e) from e
|
|
87
|
+
|
|
88
|
+
def create_datasets(self, macros: dict | None) -> None:
|
|
89
|
+
"""Creates datasets in BQ based on values in a dict.
|
|
90
|
+
|
|
91
|
+
If dict contains keys with 'dataset' in them, then values for such keys
|
|
92
|
+
are treated as dataset names.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
macros: Mapping containing data for query execution.
|
|
96
|
+
"""
|
|
97
|
+
if macros and (datasets := extract_datasets(macros)):
|
|
98
|
+
for dataset in datasets:
|
|
99
|
+
dataset_id = f'{self.project_id}.{dataset}'
|
|
100
|
+
try:
|
|
101
|
+
self.client.get_dataset(dataset_id)
|
|
102
|
+
except google_cloud_exceptions.NotFound:
|
|
103
|
+
bq_dataset = bigquery.Dataset(dataset_id)
|
|
104
|
+
bq_dataset.location = self.location
|
|
105
|
+
self.client.create_dataset(bq_dataset, timeout=30)
|
|
106
|
+
logger.debug('Created new dataset %s', dataset_id)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def extract_datasets(macros: dict | None) -> list[str]:
|
|
110
|
+
"""Finds dataset-related keys based on values in a dict.
|
|
111
|
+
|
|
112
|
+
If dict contains keys with 'dataset' in them, then values for such keys
|
|
113
|
+
are treated as dataset names.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
macros: Mapping containing data for query execution.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Possible names of datasets.
|
|
120
|
+
"""
|
|
121
|
+
if not macros:
|
|
122
|
+
return []
|
|
123
|
+
return [value for macro, value in macros.items() if 'dataset' in macro]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Copyright 2022 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
"""Module for defing `garf` CLI utility.
|
|
14
|
+
|
|
15
|
+
`garf` allows to execute GAQL queries and store results in local/remote
|
|
16
|
+
storage.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import functools
|
|
23
|
+
import sys
|
|
24
|
+
from collections.abc import MutableSequence
|
|
25
|
+
from concurrent import futures
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
import smart_open
|
|
29
|
+
import yaml
|
|
30
|
+
from garf import api_clients, exceptions, query_executor
|
|
31
|
+
from garf.cli import utils
|
|
32
|
+
from garf.io import reader, writer
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def main():
|
|
36
|
+
parser = argparse.ArgumentParser()
|
|
37
|
+
parser.add_argument('query', nargs='*')
|
|
38
|
+
parser.add_argument('-c', '--config', dest='garf_config', default=None)
|
|
39
|
+
parser.add_argument('--account', dest='account', default=None)
|
|
40
|
+
parser.add_argument('--output', dest='output', default=None)
|
|
41
|
+
parser.add_argument('--input', dest='input', default='file')
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
'--ads-config', dest='config', default=str(Path.home() / 'google-ads.yaml')
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument('--api-version', dest='api_version', default=None)
|
|
46
|
+
parser.add_argument('--log', '--loglevel', dest='loglevel', default='info')
|
|
47
|
+
parser.add_argument('--logger', dest='logger', default='local')
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
'--customer-ids-query', dest='customer_ids_query', default=None
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
'--customer-ids-query-file', dest='customer_ids_query_file', default=None
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument('--save-config', dest='save_config', action='store_true')
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
'--no-save-config', dest='save_config', action='store_false'
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
'--config-destination', dest='save_config_dest', default='config.yaml'
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
'--parallel-queries', dest='parallel_queries', action='store_true'
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
'--no-parallel-queries', dest='parallel_queries', action='store_false'
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
'--optimize-performance', dest='optimize_performance', default='NONE'
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument('--dry-run', dest='dry_run', action='store_true')
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
'--disable-account-expansion',
|
|
73
|
+
dest='disable_account_expansion',
|
|
74
|
+
action='store_true',
|
|
75
|
+
)
|
|
76
|
+
parser.add_argument('-v', '--version', dest='version', action='store_true')
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
'--parallel-threshold', dest='parallel_threshold', default=None, type=int
|
|
79
|
+
)
|
|
80
|
+
parser.set_defaults(save_config=False)
|
|
81
|
+
parser.set_defaults(parallel_queries=True)
|
|
82
|
+
parser.set_defaults(dry_run=False)
|
|
83
|
+
parser.set_defaults(disable_account_expansion=False)
|
|
84
|
+
args = parser.parse_known_args()
|
|
85
|
+
main_args = args[0]
|
|
86
|
+
|
|
87
|
+
if main_args.version:
|
|
88
|
+
import pkg_resources
|
|
89
|
+
|
|
90
|
+
version = pkg_resources.require('google-ads-api-report-fetcher')[0].version
|
|
91
|
+
print(f'garf version {version}')
|
|
92
|
+
sys.exit()
|
|
93
|
+
|
|
94
|
+
logger = utils.init_logging(
|
|
95
|
+
loglevel=main_args.loglevel.upper(), logger_type=main_args.logger
|
|
96
|
+
)
|
|
97
|
+
if not main_args.query:
|
|
98
|
+
logger.error('Please provide one or more queries to run')
|
|
99
|
+
raise exceptions.GarfMissingQueryException(
|
|
100
|
+
'Please provide one or more queries to run'
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
with smart_open.open(main_args.config, 'r', encoding='utf-8') as f:
|
|
104
|
+
google_ads_config_dict = yaml.safe_load(f)
|
|
105
|
+
|
|
106
|
+
config = utils.ConfigBuilder('garf').build(vars(main_args), args[1])
|
|
107
|
+
if not config.account:
|
|
108
|
+
if mcc := google_ads_config_dict.get('login_customer_id'):
|
|
109
|
+
config.account = str(mcc)
|
|
110
|
+
else:
|
|
111
|
+
raise exceptions.GarfMissingAccountException(
|
|
112
|
+
'No account found, please specify via --account CLI flag'
|
|
113
|
+
'or add as login_customer_id in google-ads.yaml'
|
|
114
|
+
)
|
|
115
|
+
logger.debug('config: %s', config)
|
|
116
|
+
|
|
117
|
+
if main_args.save_config and not main_args.garf_config:
|
|
118
|
+
utils.ConfigSaver(main_args.save_config_dest).save(config)
|
|
119
|
+
if main_args.dry_run:
|
|
120
|
+
sys.exit()
|
|
121
|
+
|
|
122
|
+
if config.params:
|
|
123
|
+
config = utils.initialize_runtime_parameters(config)
|
|
124
|
+
logger.debug('initialized config: %s', config)
|
|
125
|
+
|
|
126
|
+
ads_client = api_clients.GoogleAdsApiClient(
|
|
127
|
+
config_dict=google_ads_config_dict,
|
|
128
|
+
version=config.api_version,
|
|
129
|
+
use_proto_plus=main_args.optimize_performance
|
|
130
|
+
not in ('PROTOBUF', 'BATCH_PROTOBUF'),
|
|
131
|
+
)
|
|
132
|
+
ads_query_executor = query_executor.AdsQueryExecutor(ads_client)
|
|
133
|
+
reader_factory = reader.ReaderFactory()
|
|
134
|
+
reader_client = reader_factory.create_reader(main_args.input)
|
|
135
|
+
|
|
136
|
+
if config.customer_ids_query:
|
|
137
|
+
customer_ids_query = config.customer_ids_query
|
|
138
|
+
elif config.customer_ids_query_file:
|
|
139
|
+
file_reader = reader_factory.create_reader('file')
|
|
140
|
+
customer_ids_query = file_reader.read(config.customer_ids_query_file)
|
|
141
|
+
else:
|
|
142
|
+
customer_ids_query = None
|
|
143
|
+
|
|
144
|
+
if main_args.disable_account_expansion:
|
|
145
|
+
logger.info(
|
|
146
|
+
'Skipping account expansion because of ' 'disable_account_expansion flag'
|
|
147
|
+
)
|
|
148
|
+
customer_ids = (
|
|
149
|
+
config.account
|
|
150
|
+
if isinstance(config.account, MutableSequence)
|
|
151
|
+
else [config.account]
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
customer_ids = ads_query_executor.expand_mcc(
|
|
155
|
+
config.account, customer_ids_query
|
|
156
|
+
)
|
|
157
|
+
if not customer_ids:
|
|
158
|
+
logger.warning(
|
|
159
|
+
'Not a single under MCC %s is found that satisfies '
|
|
160
|
+
'the following customer_id query: "%s"',
|
|
161
|
+
config.account,
|
|
162
|
+
customer_ids_query,
|
|
163
|
+
)
|
|
164
|
+
sys.exit()
|
|
165
|
+
writer_client = writer.WriterFactory().create_writer(
|
|
166
|
+
config.output, **config.writer_params
|
|
167
|
+
)
|
|
168
|
+
if config.output == 'bq':
|
|
169
|
+
_ = writer_client.create_or_get_dataset()
|
|
170
|
+
if config.output == 'sheet':
|
|
171
|
+
writer_client.init_client()
|
|
172
|
+
|
|
173
|
+
logger.info(
|
|
174
|
+
'Total number of customer_ids is %d, accounts=[%s]',
|
|
175
|
+
len(customer_ids),
|
|
176
|
+
','.join(map(str, customer_ids)),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if main_args.parallel_queries:
|
|
180
|
+
logger.info('Running queries in parallel')
|
|
181
|
+
with futures.ThreadPoolExecutor(main_args.parallel_threshold) as executor:
|
|
182
|
+
future_to_query = {
|
|
183
|
+
executor.submit(
|
|
184
|
+
ads_query_executor.execute,
|
|
185
|
+
reader_client.read(query),
|
|
186
|
+
query,
|
|
187
|
+
customer_ids,
|
|
188
|
+
writer_client,
|
|
189
|
+
config.params,
|
|
190
|
+
main_args.optimize_performance,
|
|
191
|
+
): query
|
|
192
|
+
for query in main_args.query
|
|
193
|
+
}
|
|
194
|
+
for future in futures.as_completed(future_to_query):
|
|
195
|
+
query = future_to_query[future]
|
|
196
|
+
utils.garf_runner(query, future.result, logger)
|
|
197
|
+
else:
|
|
198
|
+
logger.info('Running queries sequentially')
|
|
199
|
+
for query in main_args.query:
|
|
200
|
+
callback = functools.partial(
|
|
201
|
+
ads_query_executor.execute,
|
|
202
|
+
reader_client.read(query),
|
|
203
|
+
query,
|
|
204
|
+
customer_ids,
|
|
205
|
+
writer_client,
|
|
206
|
+
config.params,
|
|
207
|
+
main_args.optimize_performance,
|
|
208
|
+
)
|
|
209
|
+
utils.garf_runner(query, callback, logger)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
if __name__ == '__main__':
|
|
213
|
+
main()
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Copyright 2022 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Module for defing `garf-bq` CLI utility.
|
|
15
|
+
|
|
16
|
+
`garf-bq` allows to execute BigQuery queries based on Garf config.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import functools
|
|
23
|
+
import sys
|
|
24
|
+
from concurrent import futures
|
|
25
|
+
|
|
26
|
+
from garf_writers import reader # type: ignore
|
|
27
|
+
|
|
28
|
+
from garf_executors import bq_executor
|
|
29
|
+
from garf_executors.entrypoints import utils
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def main():
|
|
33
|
+
parser = argparse.ArgumentParser()
|
|
34
|
+
parser.add_argument('query', nargs='+')
|
|
35
|
+
parser.add_argument('-c', '--config', dest='garf_config', default=None)
|
|
36
|
+
parser.add_argument('--project', dest='project')
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
'--dataset-location', dest='dataset_location', default=None
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument('--save-config', dest='save_config', action='store_true')
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
'--no-save-config', dest='save_config', action='store_false'
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
'--config-destination', dest='save_config_dest', default='config.yaml'
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument('--log', '--loglevel', dest='loglevel', default='info')
|
|
48
|
+
parser.add_argument('--logger', dest='logger', default='local')
|
|
49
|
+
parser.add_argument('--dry-run', dest='dry_run', action='store_true')
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
'--parallel-queries', dest='parallel_queries', action='store_true'
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
'--no-parallel-queries', dest='parallel_queries', action='store_false'
|
|
55
|
+
)
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
'--parallel-threshold', dest='parallel_threshold', default=None, type=int
|
|
58
|
+
)
|
|
59
|
+
parser.set_defaults(save_config=False)
|
|
60
|
+
parser.set_defaults(dry_run=False)
|
|
61
|
+
parser.set_defaults(parallel_queries=True)
|
|
62
|
+
args = parser.parse_known_args()
|
|
63
|
+
main_args = args[0]
|
|
64
|
+
|
|
65
|
+
logger = utils.init_logging(
|
|
66
|
+
loglevel=main_args.loglevel.upper(), logger_type=main_args.logger
|
|
67
|
+
)
|
|
68
|
+
config = utils.ConfigBuilder('garf-bq').build(vars(main_args), args[1])
|
|
69
|
+
logger.debug('config: %s', config)
|
|
70
|
+
if main_args.save_config and not main_args.garf_config:
|
|
71
|
+
utils.ConfigSaver(main_args.save_config_dest).save(config)
|
|
72
|
+
if main_args.dry_run:
|
|
73
|
+
sys.exit()
|
|
74
|
+
|
|
75
|
+
config = utils.initialize_runtime_parameters(config)
|
|
76
|
+
logger.debug('initialized config: %s', config)
|
|
77
|
+
|
|
78
|
+
bigquery_executor = bq_executor.BigQueryExecutor(
|
|
79
|
+
project_id=config.project, location=config.dataset_location
|
|
80
|
+
)
|
|
81
|
+
bigquery_executor.create_datasets(config.params.get('macro'))
|
|
82
|
+
|
|
83
|
+
reader_client = reader.FileReader()
|
|
84
|
+
|
|
85
|
+
if main_args.parallel_queries:
|
|
86
|
+
logger.info('Running queries in parallel')
|
|
87
|
+
with futures.ThreadPoolExecutor(
|
|
88
|
+
max_workers=main_args.parallel_threshold
|
|
89
|
+
) as executor:
|
|
90
|
+
future_to_query = {
|
|
91
|
+
executor.submit(
|
|
92
|
+
bigquery_executor.execute,
|
|
93
|
+
query,
|
|
94
|
+
reader_client.read(query),
|
|
95
|
+
config.params,
|
|
96
|
+
): query
|
|
97
|
+
for query in sorted(main_args.query)
|
|
98
|
+
}
|
|
99
|
+
for future in futures.as_completed(future_to_query):
|
|
100
|
+
query = future_to_query[future]
|
|
101
|
+
utils.postprocessor_runner(query, future.result, logger)
|
|
102
|
+
else:
|
|
103
|
+
logger.info('Running queries sequentially')
|
|
104
|
+
for query in sorted(main_args.query):
|
|
105
|
+
callback = functools.partial(
|
|
106
|
+
executor.execute, query, reader_client.read(query), config.params
|
|
107
|
+
)
|
|
108
|
+
utils.postprocessor_runner(query, callback, logger)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == '__main__':
|
|
112
|
+
main()
|