megaton 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megaton/__init__.py +62 -0
- megaton/auth/__init__.py +6 -0
- megaton/auth/google_auth.py +231 -0
- megaton/auth/provider.py +117 -0
- megaton/bq.py +479 -0
- megaton/constants.py +17 -0
- megaton/dates.py +210 -0
- megaton/errors.py +109 -0
- megaton/files.py +51 -0
- megaton/ga3.py +796 -0
- megaton/ga4.py +1063 -0
- megaton/gdrive.py +21 -0
- megaton/google_api.py +124 -0
- megaton/gsheet.py +369 -0
- megaton/install/__init__.py +0 -0
- megaton/install/install_bigquery.py +9 -0
- megaton/install/install_ga4.py +10 -0
- megaton/os.py +19 -0
- megaton/recipes/__init__.py +5 -0
- megaton/recipes/config_loader.py +84 -0
- megaton/searchconsole.py +50 -0
- megaton/services/__init__.py +1 -0
- megaton/services/bq_service.py +20 -0
- megaton/services/gsc_service.py +382 -0
- megaton/services/sheets_service.py +448 -0
- megaton/start.py +3456 -0
- megaton/state.py +18 -0
- megaton/transform/__init__.py +28 -0
- megaton/transform/classify.py +43 -0
- megaton/transform/ga4.py +329 -0
- megaton/transform/table.py +114 -0
- megaton/transform/text.py +152 -0
- megaton/ui/__init__.py +1 -0
- megaton/ui/widgets.py +149 -0
- megaton/utils.py +206 -0
- megaton-1.0.0.dist-info/METADATA +101 -0
- megaton-1.0.0.dist-info/RECORD +40 -0
- megaton-1.0.0.dist-info/WHEEL +5 -0
- megaton-1.0.0.dist-info/licenses/LICENSE +21 -0
- megaton-1.0.0.dist-info/top_level.txt +1 -0
megaton/__init__.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from IPython.display import clear_output
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
__all__ = ['start', 'mount_google_drive']
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _is_colab() -> bool:
|
|
10
|
+
return 'google.colab' in sys.modules
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _auto_install_enabled() -> bool:
|
|
14
|
+
env_value = os.environ.get("MEGATON_AUTO_INSTALL")
|
|
15
|
+
if env_value == "1":
|
|
16
|
+
return True
|
|
17
|
+
if env_value == "0":
|
|
18
|
+
return False
|
|
19
|
+
return _is_colab()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _print_install_help():
|
|
23
|
+
print(
|
|
24
|
+
"Megaton requires GA4 packages. Install with:\n"
|
|
25
|
+
" pip install -U -q google-analytics-admin google-analytics-data\n"
|
|
26
|
+
" pip install -U -q google-cloud-bigquery-datatransfer\n"
|
|
27
|
+
"Or set MEGATON_AUTO_INSTALL=1 in Colab."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
# check if packages for GA4 are installed
|
|
33
|
+
from google.analytics.data import BetaAnalyticsDataClient
|
|
34
|
+
from google.analytics.admin import AnalyticsAdminServiceClient
|
|
35
|
+
except ModuleNotFoundError:
|
|
36
|
+
if _auto_install_enabled():
|
|
37
|
+
clear_output()
|
|
38
|
+
print("Installing packages for GA4...")
|
|
39
|
+
from .install import install_ga4, install_bigquery
|
|
40
|
+
|
|
41
|
+
install_ga4.install()
|
|
42
|
+
install_bigquery.install()
|
|
43
|
+
clear_output()
|
|
44
|
+
else:
|
|
45
|
+
_print_install_help()
|
|
46
|
+
raise
|
|
47
|
+
|
|
48
|
+
IS_COLAB = _is_colab()
|
|
49
|
+
|
|
50
|
+
if IS_COLAB:
|
|
51
|
+
from google.colab import data_table
|
|
52
|
+
data_table.enable_dataframe_formatter()
|
|
53
|
+
data_table._DEFAULT_FORMATTERS[float] = lambda x: f"{x:.3f}"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def mount_google_drive():
|
|
57
|
+
'''Mount Google Drive when running in Google Colab.'''
|
|
58
|
+
if not IS_COLAB:
|
|
59
|
+
print("Google Drive mounting is only available in Google Colab.")
|
|
60
|
+
return None
|
|
61
|
+
from . import gdrive
|
|
62
|
+
return gdrive.link_nbs()
|
megaton/auth/__init__.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Functions for handling Authentications
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
|
|
10
|
+
import google.oauth2.credentials
|
|
11
|
+
from google.oauth2 import service_account
|
|
12
|
+
from google.oauth2.credentials import Credentials
|
|
13
|
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
|
14
|
+
|
|
15
|
+
_REQUIRED_CONFIG_KEYS = frozenset(("auth_uri", "token_uri", "client_id"))
|
|
16
|
+
LOGGER = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _is_service_account(json_text: str):
|
|
20
|
+
"""Return true if the provided text is a JSON service credentials file."""
|
|
21
|
+
try:
|
|
22
|
+
key_obj = json.loads(json_text)
|
|
23
|
+
except json.JSONDecodeError:
|
|
24
|
+
return False
|
|
25
|
+
if not key_obj or key_obj.get('type', '') != 'service_account':
|
|
26
|
+
return False
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _is_service_account_json(json_path: str):
|
|
31
|
+
"""Return true if the provided JSON file is for a service account."""
|
|
32
|
+
with open(json_path, 'r') as f:
|
|
33
|
+
return _is_service_account(f.read())
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_credential_type(client_config: dict):
|
|
37
|
+
"""Gets a client type from client configuration loaded from a Google-format client secrets file.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
client_config (Mapping[str, Any]): The client
|
|
41
|
+
configuration in the Google `client secrets`_ format.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
client_type [str]: The client type, either ``'service_account'`` or ``'web'`` or ``'installed'``
|
|
45
|
+
"""
|
|
46
|
+
if client_config.get('type', '') == "service_account":
|
|
47
|
+
return "service_account"
|
|
48
|
+
elif "web" in client_config:
|
|
49
|
+
client_type = "web"
|
|
50
|
+
elif "installed" in client_config:
|
|
51
|
+
client_type = "installed"
|
|
52
|
+
else:
|
|
53
|
+
return None
|
|
54
|
+
config = client_config[client_type]
|
|
55
|
+
if _REQUIRED_CONFIG_KEYS.issubset(config.keys()):
|
|
56
|
+
return client_type
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_credential_type_from_file(json_path: str):
|
|
60
|
+
"""Gets a client type from a Google client secrets file.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
json_path (str): The path to the client secrets .json file.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
client_type [str]: The client type, either ``'service_account'`` or ``'web'`` or ``'installed'``
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
with open(json_path, "r") as fp:
|
|
70
|
+
client_config = json.load(fp)
|
|
71
|
+
except (OSError, json.JSONDecodeError):
|
|
72
|
+
LOGGER.debug("Skipping non JSON credential file: %s", json_path)
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
if not isinstance(client_config, dict):
|
|
76
|
+
LOGGER.debug('Credential file %s does not contain a JSON object; skipping.', json_path)
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
return get_credential_type(client_config)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_credential_type_from_info(info: dict) -> str:
|
|
83
|
+
if isinstance(info, dict):
|
|
84
|
+
if info.get("type") == "service_account":
|
|
85
|
+
return "service_account"
|
|
86
|
+
if "installed" in info:
|
|
87
|
+
return "installed"
|
|
88
|
+
if "web" in info:
|
|
89
|
+
return "web"
|
|
90
|
+
return "unknown"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_json_files_from_dir(json_dir: str):
|
|
94
|
+
"""Gets a list of valid credentials json files from a directory recursively"""
|
|
95
|
+
json_files = defaultdict(lambda: {})
|
|
96
|
+
for root, dirs, files in os.walk(json_dir):
|
|
97
|
+
for file in files:
|
|
98
|
+
if file.endswith('.json'):
|
|
99
|
+
client_type = get_credential_type_from_file(os.path.join(root, file))
|
|
100
|
+
if client_type == 'service_account':
|
|
101
|
+
json_files['Service Account'][file] = os.path.join(root, file)
|
|
102
|
+
elif client_type in ['installed', 'web']:
|
|
103
|
+
json_files['OAuth'][file] = os.path.join(root, file)
|
|
104
|
+
return json_files
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_cache_path(json_path: str):
|
|
108
|
+
"""Gets the path to the Google user credentials based on the provided source file
|
|
109
|
+
"""
|
|
110
|
+
dir_path = os.path.join(os.path.expanduser("~"), ".config")
|
|
111
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
112
|
+
base_name = os.path.splitext(os.path.basename(json_path))[0]
|
|
113
|
+
return os.path.join(dir_path, f"cache_{base_name}.json")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def save_credentials(file_path: str, credentials: Credentials):
|
|
117
|
+
"""Save Credentials to cache file
|
|
118
|
+
"""
|
|
119
|
+
cache_path = get_cache_path(file_path)
|
|
120
|
+
with open(cache_path, 'w') as w:
|
|
121
|
+
LOGGER.debug(f"saving credentials to {cache_path}")
|
|
122
|
+
w.write(credentials.to_json())
|
|
123
|
+
return credentials
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def load_credentials(file_path: str, scopes: list):
|
|
127
|
+
"""Load Credentials from cache file
|
|
128
|
+
"""
|
|
129
|
+
cache_path = get_cache_path(file_path)
|
|
130
|
+
if os.path.isfile(cache_path):
|
|
131
|
+
LOGGER.debug(f"loading credentials from {cache_path}")
|
|
132
|
+
return Credentials.from_authorized_user_file(cache_path, scopes=scopes)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def load_service_account_credentials_from_info(info: dict, scopes: list):
|
|
136
|
+
if not isinstance(info, dict) or info.get("type") != "service_account":
|
|
137
|
+
raise ValueError("service_account info required")
|
|
138
|
+
credentials = service_account.Credentials.from_service_account_info(info, scopes=scopes)
|
|
139
|
+
if not credentials.valid:
|
|
140
|
+
request = google.auth.transport.requests.Request()
|
|
141
|
+
try:
|
|
142
|
+
credentials.refresh(request)
|
|
143
|
+
except google.auth.exceptions.RefreshError as exc:
|
|
144
|
+
email = info.get("client_email") or getattr(credentials, "service_account_email", None)
|
|
145
|
+
if email:
|
|
146
|
+
message = f"指定の {email} のサービスアカウントは存在しない、または無効です。"
|
|
147
|
+
else:
|
|
148
|
+
message = "指定したサービスアカウントは存在しない、または無効です。"
|
|
149
|
+
LOGGER.error(message)
|
|
150
|
+
LOGGER.debug(f"Service account refresh error detail: {exc}")
|
|
151
|
+
return None
|
|
152
|
+
return credentials
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def delete_credentials(cache_file: str = "creden-cache.json"):
|
|
156
|
+
"""Delete Credentials cache file
|
|
157
|
+
"""
|
|
158
|
+
if os.path.isfile(cache_file):
|
|
159
|
+
LOGGER.debug(f"deleting cache file {cache_file}")
|
|
160
|
+
os.remove(cache_file)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_oauth_redirect(client_secret_file: str, scopes: list):
|
|
164
|
+
"""Run OAuth2 Flow"""
|
|
165
|
+
flow = InstalledAppFlow.from_client_secrets_file(
|
|
166
|
+
client_secret_file,
|
|
167
|
+
scopes=scopes,
|
|
168
|
+
redirect_uri="urn:ietf:wg:oauth:2.0:oob"
|
|
169
|
+
)
|
|
170
|
+
auth_url, _ = flow.authorization_url(prompt="consent")
|
|
171
|
+
return flow, auth_url
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_oauth_redirect_from_info(info: dict, scopes: list):
|
|
175
|
+
# mirrors get_oauth_redirect(file, scopes) but uses in-memory client config
|
|
176
|
+
flow = InstalledAppFlow.from_client_config(info, scopes=scopes)
|
|
177
|
+
flow.redirect_uri = "urn:ietf:wg:oauth:2.0:oob"
|
|
178
|
+
auth_url, _ = flow.authorization_url(
|
|
179
|
+
prompt='consent',
|
|
180
|
+
access_type='offline',
|
|
181
|
+
include_granted_scopes='true'
|
|
182
|
+
)
|
|
183
|
+
return flow, auth_url
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def get_token(flow, code: str):
|
|
187
|
+
flow.fetch_token(code=code)
|
|
188
|
+
return flow.credentials
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def load_service_account_credentials_from_file(path: str, scopes: list):
|
|
192
|
+
"""Gets service account credentials from JSON file at ``path``.
|
|
193
|
+
|
|
194
|
+
:param path: Path to credentials JSON file.
|
|
195
|
+
:param scopes: A list of scopes to use when authenticating to Google APIs.
|
|
196
|
+
:return: google.oauth2.service_account.Credentials
|
|
197
|
+
"""
|
|
198
|
+
credentials = service_account.Credentials.from_service_account_file(path, scopes=scopes)
|
|
199
|
+
if not credentials.valid:
|
|
200
|
+
request = google.auth.transport.requests.Request()
|
|
201
|
+
try:
|
|
202
|
+
credentials.refresh(request)
|
|
203
|
+
except google.auth.exceptions.RefreshError as exc:
|
|
204
|
+
email = getattr(credentials, "service_account_email", None)
|
|
205
|
+
if email:
|
|
206
|
+
message = f"指定の {email} のサービスアカウントは存在しない、または無効です。"
|
|
207
|
+
else:
|
|
208
|
+
message = "指定したサービスアカウントは存在しない、または無効です。"
|
|
209
|
+
LOGGER.error(message)
|
|
210
|
+
LOGGER.debug("Service account refresh error detail: %s", exc)
|
|
211
|
+
return None
|
|
212
|
+
return credentials
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
__all__ = [
|
|
216
|
+
"_is_service_account",
|
|
217
|
+
"_is_service_account_json",
|
|
218
|
+
"get_credential_type",
|
|
219
|
+
"get_credential_type_from_file",
|
|
220
|
+
"get_credential_type_from_info",
|
|
221
|
+
"get_json_files_from_dir",
|
|
222
|
+
"get_cache_path",
|
|
223
|
+
"save_credentials",
|
|
224
|
+
"load_credentials",
|
|
225
|
+
"load_service_account_credentials_from_info",
|
|
226
|
+
"delete_credentials",
|
|
227
|
+
"get_oauth_redirect",
|
|
228
|
+
"get_oauth_redirect_from_info",
|
|
229
|
+
"get_token",
|
|
230
|
+
"load_service_account_credentials_from_file",
|
|
231
|
+
]
|
megaton/auth/provider.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Credential source resolution helpers."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import base64
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class CredentialSource:
|
|
12
|
+
raw: Any
|
|
13
|
+
origin: str
|
|
14
|
+
kind: str
|
|
15
|
+
value: Optional[str] = None
|
|
16
|
+
info: Optional[dict] = None
|
|
17
|
+
credential_type: Optional[str] = None
|
|
18
|
+
error: Optional[Exception] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_credential_type_from_info(info: Optional[dict]) -> str:
|
|
22
|
+
from . import google_auth
|
|
23
|
+
|
|
24
|
+
return google_auth.get_credential_type_from_info(info)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_json_input(value: Any) -> Optional[dict]:
|
|
28
|
+
"""Return dict if value looks like JSON (or base64 JSON); else None."""
|
|
29
|
+
if isinstance(value, dict):
|
|
30
|
+
return value
|
|
31
|
+
if not isinstance(value, str):
|
|
32
|
+
return None
|
|
33
|
+
s = value.strip()
|
|
34
|
+
if s.startswith("{") and s.endswith("}"):
|
|
35
|
+
try:
|
|
36
|
+
return json.loads(s)
|
|
37
|
+
except Exception:
|
|
38
|
+
return None
|
|
39
|
+
try:
|
|
40
|
+
decoded = base64.b64decode(s).decode("utf-8", errors="ignore")
|
|
41
|
+
ds = decoded.strip()
|
|
42
|
+
if ds.startswith("{") and ds.endswith("}"):
|
|
43
|
+
return json.loads(ds)
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_json_file(path: str) -> Tuple[Optional[dict], Optional[Exception]]:
|
|
50
|
+
try:
|
|
51
|
+
with open(path) as fp:
|
|
52
|
+
data = json.load(fp)
|
|
53
|
+
except Exception as exc:
|
|
54
|
+
return None, exc
|
|
55
|
+
if not isinstance(data, dict):
|
|
56
|
+
return None, ValueError("JSON object required")
|
|
57
|
+
return data, None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def extract_email_from_file(path: str) -> Optional[str]:
|
|
61
|
+
info, _ = load_json_file(path)
|
|
62
|
+
if not isinstance(info, dict):
|
|
63
|
+
return None
|
|
64
|
+
return info.get("client_email")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def resolve_credential_source(
|
|
68
|
+
credential: Any,
|
|
69
|
+
*,
|
|
70
|
+
env_var: str = "MEGATON_CREDS_JSON",
|
|
71
|
+
in_colab: bool = False,
|
|
72
|
+
colab_default: str = "/nbs",
|
|
73
|
+
) -> CredentialSource:
|
|
74
|
+
origin = "explicit"
|
|
75
|
+
raw = credential
|
|
76
|
+
value = credential
|
|
77
|
+
if credential is None:
|
|
78
|
+
env_val = os.environ.get(env_var)
|
|
79
|
+
if env_val:
|
|
80
|
+
origin = "env"
|
|
81
|
+
value = env_val
|
|
82
|
+
elif in_colab:
|
|
83
|
+
origin = "colab_default"
|
|
84
|
+
value = colab_default
|
|
85
|
+
else:
|
|
86
|
+
return CredentialSource(raw=None, origin="none", kind="none")
|
|
87
|
+
|
|
88
|
+
if isinstance(value, dict):
|
|
89
|
+
info = value
|
|
90
|
+
ctype = get_credential_type_from_info(info)
|
|
91
|
+
return CredentialSource(raw=raw, origin=origin, kind="inline", info=info, credential_type=ctype)
|
|
92
|
+
|
|
93
|
+
if not isinstance(value, str):
|
|
94
|
+
return CredentialSource(raw=raw, origin=origin, kind="unknown")
|
|
95
|
+
|
|
96
|
+
info = parse_json_input(value)
|
|
97
|
+
if info is not None:
|
|
98
|
+
ctype = get_credential_type_from_info(info)
|
|
99
|
+
return CredentialSource(raw=raw, origin=origin, kind="inline", value=value, info=info, credential_type=ctype)
|
|
100
|
+
|
|
101
|
+
if os.path.isdir(value):
|
|
102
|
+
return CredentialSource(raw=raw, origin=origin, kind="directory", value=value)
|
|
103
|
+
|
|
104
|
+
if os.path.isfile(value):
|
|
105
|
+
info, error = load_json_file(value)
|
|
106
|
+
ctype = get_credential_type_from_info(info) if info else None
|
|
107
|
+
return CredentialSource(
|
|
108
|
+
raw=raw,
|
|
109
|
+
origin=origin,
|
|
110
|
+
kind="file",
|
|
111
|
+
value=value,
|
|
112
|
+
info=info,
|
|
113
|
+
credential_type=ctype,
|
|
114
|
+
error=error,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
return CredentialSource(raw=raw, origin=origin, kind="unknown", value=value)
|