sdgym 0.14.2.dev0__tar.gz → 0.14.3.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdgym-0.14.2.dev0/sdgym.egg-info → sdgym-0.14.3.dev0}/PKG-INFO +1 -1
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/pyproject.toml +1 -1
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/__init__.py +1 -1
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark/benchmark.py +10 -11
- sdgym-0.14.3.dev0/sdgym/_benchmark_launcher/_storage_manager.py +175 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/_validation.py +25 -3
- sdgym-0.14.3.dev0/sdgym/_benchmark_launcher/benchmark_base.yaml +16 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/benchmark_config.py +2 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/benchmark_launcher.py +134 -24
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/script.py +2 -2
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/utils.py +75 -5
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/result_explorer/result_handler.py +9 -8
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/run_benchmark/run_benchmark.py +3 -11
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/run_benchmark/upload_benchmark_results.py +25 -8
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/s3.py +8 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizer_descriptions.yaml +31 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0/sdgym.egg-info}/PKG-INFO +1 -1
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym.egg-info/SOURCES.txt +0 -1
- sdgym-0.14.2.dev0/sdgym/_benchmark/config_utils.py +0 -123
- sdgym-0.14.2.dev0/sdgym/_benchmark_launcher/_storage_manager.py +0 -64
- sdgym-0.14.2.dev0/sdgym/_benchmark_launcher/benchmark_base.yaml +0 -9
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/LICENSE +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/README.md +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark/__init__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark/credentials_utils.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/__init__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/_instance_manager.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/benchmark_multi_table.yaml +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_benchmark_launcher/benchmark_single_table.yaml +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/_dataset_utils.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/benchmark.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/cli/__init__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/cli/__main__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/cli/collect.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/cli/summary.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/cli/utils.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/dataset_explorer.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/datasets.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/errors.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/metrics.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/progress.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/result_explorer/__init__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/result_explorer/result_explorer.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/result_writer.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/run_benchmark/__init__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/run_benchmark/utils.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/__init__.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/base.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/column.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/generate.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/identity.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/realtabformer.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/sdv.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/uniform.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/synthesizers/utils.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym/utils.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym.egg-info/dependency_links.txt +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym.egg-info/entry_points.txt +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym.egg-info/requires.txt +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/sdgym.egg-info/top_level.txt +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/setup.cfg +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/tests/test_scripts.py +0 -0
- {sdgym-0.14.2.dev0 → sdgym-0.14.3.dev0}/tests/test_tasks.py +0 -0
|
@@ -163,7 +163,7 @@ namespaces = false
|
|
|
163
163
|
version = {attr = 'sdgym.__version__'}
|
|
164
164
|
|
|
165
165
|
[tool.bumpversion]
|
|
166
|
-
current_version = "0.14.
|
|
166
|
+
current_version = "0.14.3.dev0"
|
|
167
167
|
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
|
|
168
168
|
serialize = [
|
|
169
169
|
'{major}.{minor}.{patch}.{release}{candidate}',
|
|
@@ -1,14 +1,11 @@
|
|
|
1
1
|
import textwrap
|
|
2
|
+
import uuid
|
|
3
|
+
from datetime import datetime, timezone
|
|
2
4
|
from urllib.parse import urlparse
|
|
3
5
|
|
|
4
6
|
from google.cloud import compute_v1
|
|
5
7
|
from google.oauth2 import service_account
|
|
6
8
|
|
|
7
|
-
from sdgym._benchmark.config_utils import (
|
|
8
|
-
_make_instance_name,
|
|
9
|
-
resolve_compute_config,
|
|
10
|
-
validate_compute_config,
|
|
11
|
-
)
|
|
12
9
|
from sdgym._benchmark.credentials_utils import sdv_install_cmd
|
|
13
10
|
from sdgym.benchmark import (
|
|
14
11
|
DEFAULT_MULTI_TABLE_DATASETS,
|
|
@@ -26,6 +23,12 @@ from sdgym.benchmark import (
|
|
|
26
23
|
)
|
|
27
24
|
|
|
28
25
|
|
|
26
|
+
def _make_instance_name(prefix):
|
|
27
|
+
day = datetime.now(timezone.utc).strftime('%Y%m%d-%H%M')
|
|
28
|
+
suffix = uuid.uuid4().hex[:6]
|
|
29
|
+
return f'{prefix}-{day}-{suffix}'
|
|
30
|
+
|
|
31
|
+
|
|
29
32
|
def _get_logs_s3_uri(output_destination, instance_name):
|
|
30
33
|
"""Store logs next to output destination prefix.
|
|
31
34
|
|
|
@@ -144,12 +147,11 @@ def _get_user_data_script(
|
|
|
144
147
|
or int(config.get('gpu_count', 0)) > 0
|
|
145
148
|
or bool(config.get('gpu_type'))
|
|
146
149
|
)
|
|
147
|
-
upload_logs = bool(config.get('upload_logs', True))
|
|
148
150
|
|
|
149
151
|
aws_key = credentials['aws']['aws_access_key_id']
|
|
150
152
|
aws_secret = credentials['aws']['aws_secret_access_key']
|
|
151
153
|
|
|
152
|
-
log_uri = _get_logs_s3_uri(output_destination, instance_name)
|
|
154
|
+
log_uri = _get_logs_s3_uri(output_destination, instance_name)
|
|
153
155
|
|
|
154
156
|
sdv_install = sdv_install_cmd(credentials).rstrip()
|
|
155
157
|
sdv_install = textwrap.indent(sdv_install, ' ') if sdv_install else ''
|
|
@@ -363,9 +365,6 @@ def _benchmark_compute_gcp(
|
|
|
363
365
|
modality,
|
|
364
366
|
):
|
|
365
367
|
"""Run the SDGym benchmark on datasets for the given modality."""
|
|
366
|
-
compute_config = resolve_compute_config('gcp', compute_config)
|
|
367
|
-
validate_compute_config(compute_config)
|
|
368
|
-
|
|
369
368
|
s3_client = _validate_output_destination(
|
|
370
369
|
output_destination,
|
|
371
370
|
aws_keys={
|
|
@@ -454,7 +453,7 @@ def _benchmark_single_table_compute_gcp(
|
|
|
454
453
|
compute_diagnostic_score (bool, optional):
|
|
455
454
|
Whether to compute the diagnostic score. Defaults to True.
|
|
456
455
|
compute_privacy_score (bool, optional):
|
|
457
|
-
Whether to compute the privacy score. Defaults to
|
|
456
|
+
Whether to compute the privacy score. Defaults to False.
|
|
458
457
|
sdmetrics (list of str, optional):
|
|
459
458
|
The sdmetrics to use for evaluation. If None, default metrics will be used.
|
|
460
459
|
timeout (int, optional):
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from botocore.exceptions import BotoCoreError, ClientError
|
|
6
|
+
|
|
7
|
+
from sdgym._benchmark_launcher.utils import resolve_credentials
|
|
8
|
+
from sdgym.result_writer import S3ResultsWriter
|
|
9
|
+
from sdgym.s3 import _list_s3_bucket_contents, get_s3_client, is_s3_path, parse_s3_path
|
|
10
|
+
|
|
11
|
+
LOGGER = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _validate_s3_output_destinations(instance_jobs):
|
|
15
|
+
"""Validate that all output destinations are S3 paths."""
|
|
16
|
+
for instance_job in instance_jobs:
|
|
17
|
+
output_destination = instance_job['output_destination']
|
|
18
|
+
if not is_s3_path(output_destination):
|
|
19
|
+
raise ValueError(
|
|
20
|
+
f'Only S3 storage is currently supported. Found: {output_destination!r}.'
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BaseStorageManager:
|
|
25
|
+
"""Base class for storage-specific managers."""
|
|
26
|
+
|
|
27
|
+
def handles_destination(self, output_destination):
|
|
28
|
+
"""Return whether this manager supports the given destination."""
|
|
29
|
+
raise NotImplementedError
|
|
30
|
+
|
|
31
|
+
def list_files(self, output_destination):
|
|
32
|
+
"""Return the files currently stored under the given destination."""
|
|
33
|
+
raise NotImplementedError
|
|
34
|
+
|
|
35
|
+
def get_existing_filenames(self, output_destination):
|
|
36
|
+
"""Return the existing filenames for the given destination."""
|
|
37
|
+
raise NotImplementedError
|
|
38
|
+
|
|
39
|
+
def file_exists(self, filepath):
|
|
40
|
+
"""Return whether the provided key exists in the destination."""
|
|
41
|
+
raise NotImplementedError
|
|
42
|
+
|
|
43
|
+
def read_csv(self, filepath):
|
|
44
|
+
"""Read a CSV artifact from storage."""
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
def write_csv(self, result, filepath):
|
|
48
|
+
"""Write a CSV artifact to storage."""
|
|
49
|
+
raise NotImplementedError
|
|
50
|
+
|
|
51
|
+
def _load_job_result(self, filepath):
|
|
52
|
+
"""Load a per-job result CSV if it exists, otherwise return None."""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
def update_metainfo(self, filepath, content):
|
|
56
|
+
"""Update metainfo for an artifact."""
|
|
57
|
+
raise NotImplementedError
|
|
58
|
+
|
|
59
|
+
def delete(self, filepath):
|
|
60
|
+
"""Delete an artifact from storage."""
|
|
61
|
+
raise NotImplementedError
|
|
62
|
+
|
|
63
|
+
def save_pickle(self, object, filepath):
|
|
64
|
+
"""Save a picklable object to storage."""
|
|
65
|
+
raise NotImplementedError
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class S3StorageManager(BaseStorageManager):
|
|
69
|
+
"""Manage benchmark artifacts stored in S3."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, credentials_filepath, instance_jobs):
|
|
72
|
+
_validate_s3_output_destinations(instance_jobs)
|
|
73
|
+
self.credentials_filepath = credentials_filepath
|
|
74
|
+
self._existing_files = {}
|
|
75
|
+
self._writer = None
|
|
76
|
+
|
|
77
|
+
def __getstate__(self):
|
|
78
|
+
"""Return the picklable state."""
|
|
79
|
+
state = self.__dict__.copy()
|
|
80
|
+
state['_writer'] = None
|
|
81
|
+
return state
|
|
82
|
+
|
|
83
|
+
def __setstate__(self, state):
|
|
84
|
+
"""Restore the state after unpickling."""
|
|
85
|
+
self.__dict__.update(state)
|
|
86
|
+
|
|
87
|
+
def _get_writer(self):
|
|
88
|
+
"""Build the results writer."""
|
|
89
|
+
if self._writer is None:
|
|
90
|
+
self._writer = S3ResultsWriter(self._get_client())
|
|
91
|
+
|
|
92
|
+
return self._writer
|
|
93
|
+
|
|
94
|
+
def handles_destination(self, output_destination):
|
|
95
|
+
"""Return whether the destination is an S3 path."""
|
|
96
|
+
return is_s3_path(output_destination)
|
|
97
|
+
|
|
98
|
+
def _get_client(self):
|
|
99
|
+
"""Build and return the S3 client."""
|
|
100
|
+
credentials = resolve_credentials(self.credentials_filepath)
|
|
101
|
+
aws_credentials = credentials.get('aws', {})
|
|
102
|
+
return get_s3_client(
|
|
103
|
+
aws_access_key_id=aws_credentials.get('aws_access_key_id'),
|
|
104
|
+
aws_secret_access_key=aws_credentials.get('aws_secret_access_key'),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def _get_s3_resources(self, filepath):
|
|
108
|
+
"""Return the S3 client and bucket name for a destination."""
|
|
109
|
+
if not is_s3_path(filepath):
|
|
110
|
+
raise ValueError(f'S3StorageManager only supports S3 paths. Found: {filepath!r}.')
|
|
111
|
+
|
|
112
|
+
s3_client = self._get_client()
|
|
113
|
+
bucket_name, key = parse_s3_path(filepath)
|
|
114
|
+
return s3_client, bucket_name, key
|
|
115
|
+
|
|
116
|
+
def list_files(self, output_destination):
|
|
117
|
+
"""List files under the provided S3 output destination."""
|
|
118
|
+
if not self.handles_destination(output_destination):
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f'S3StorageManager only supports S3 paths. Found: {output_destination!r}.'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
s3_client = self._get_client()
|
|
124
|
+
bucket_name, key_prefix = parse_s3_path(output_destination)
|
|
125
|
+
return _list_s3_bucket_contents(s3_client, bucket_name, key_prefix)
|
|
126
|
+
|
|
127
|
+
def get_existing_filenames(self, output_destination):
|
|
128
|
+
"""Return the existing filenames for the given destination."""
|
|
129
|
+
return {obj['Key'] for obj in self.list_files(output_destination)}
|
|
130
|
+
|
|
131
|
+
def file_exists(self, filepath):
|
|
132
|
+
"""Check if a file exists in S3."""
|
|
133
|
+
s3_client, bucket_name, key = self._get_s3_resources(filepath)
|
|
134
|
+
try:
|
|
135
|
+
s3_client.head_object(Bucket=bucket_name, Key=key)
|
|
136
|
+
return True
|
|
137
|
+
except ClientError as error:
|
|
138
|
+
if error.response['Error']['Code'] == '404':
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
raise
|
|
142
|
+
|
|
143
|
+
def read_csv(self, filepath):
|
|
144
|
+
"""Read a CSV artifact from S3."""
|
|
145
|
+
s3_client, bucket_name, key = self._get_s3_resources(filepath)
|
|
146
|
+
response = s3_client.get_object(Bucket=bucket_name, Key=key)
|
|
147
|
+
return pd.read_csv(io.BytesIO(response['Body'].read()))
|
|
148
|
+
|
|
149
|
+
def write_csv(self, result, filepath):
|
|
150
|
+
self._get_writer().write_dataframe(result, filepath, index=False)
|
|
151
|
+
|
|
152
|
+
def _load_job_result(self, filepath):
|
|
153
|
+
if not self.file_exists(filepath):
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
return self.read_csv(filepath)
|
|
157
|
+
|
|
158
|
+
def update_metainfo(self, filepath, content):
|
|
159
|
+
"""Update metainfo for an artifact."""
|
|
160
|
+
self._get_writer().write_yaml(data=content, file_path=filepath, append=True)
|
|
161
|
+
|
|
162
|
+
def delete(self, filepath):
|
|
163
|
+
"""Delete an artifact from storage."""
|
|
164
|
+
s3_client, bucket_name, key = self._get_s3_resources(filepath)
|
|
165
|
+
try:
|
|
166
|
+
s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
167
|
+
LOGGER.info(f'Deleted S3 object {filepath} successfully.')
|
|
168
|
+
|
|
169
|
+
except (ClientError, BotoCoreError):
|
|
170
|
+
LOGGER.exception(f'Failed to delete S3 object {filepath}.')
|
|
171
|
+
raise
|
|
172
|
+
|
|
173
|
+
def save_pickle(self, object, filepath):
|
|
174
|
+
"""Save a picklable object to S3."""
|
|
175
|
+
self._get_writer().write_pickle(object, filepath)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from sdgym._benchmark_launcher.utils import (
|
|
2
2
|
_AWS_CREDENTIAL_KEYS,
|
|
3
3
|
_GCP_SERVICE_ACCOUNT_REQUIRED_KEYS,
|
|
4
|
+
_REQUIRED_CANONICAL_KEYS,
|
|
4
5
|
_is_unique_string_list,
|
|
5
6
|
resolve_credentials,
|
|
6
7
|
)
|
|
@@ -9,7 +10,7 @@ _INJECTED_PARAMS = {
|
|
|
9
10
|
'credentials',
|
|
10
11
|
'synthesizers',
|
|
11
12
|
'sdv_datasets',
|
|
12
|
-
'
|
|
13
|
+
'compute',
|
|
13
14
|
'output_destination',
|
|
14
15
|
}
|
|
15
16
|
|
|
@@ -61,12 +62,33 @@ def _validate_structure(config):
|
|
|
61
62
|
compute = getattr(config, 'compute', None)
|
|
62
63
|
if isinstance(compute, dict):
|
|
63
64
|
service = compute.get('service')
|
|
64
|
-
if service
|
|
65
|
-
errors.append(
|
|
65
|
+
if service is None:
|
|
66
|
+
errors.append('compute.service: is required but missing.')
|
|
66
67
|
|
|
67
68
|
return sorted(errors)
|
|
68
69
|
|
|
69
70
|
|
|
71
|
+
def _validate_compute_canonical(compute):
|
|
72
|
+
errors = []
|
|
73
|
+
for key in _REQUIRED_CANONICAL_KEYS:
|
|
74
|
+
if not compute.get(key):
|
|
75
|
+
errors.append(f'compute.{key} is required but missing.')
|
|
76
|
+
|
|
77
|
+
gpu_count = int(compute.get('gpu_count') or 0)
|
|
78
|
+
if gpu_count > 0 and not compute.get('gpu_type'):
|
|
79
|
+
errors.append('compute.gpu_type is required when compute.gpu_count > 0.')
|
|
80
|
+
|
|
81
|
+
return sorted(errors)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _validate_compute(compute):
|
|
85
|
+
"""Validate the 'compute' section of the config.
|
|
86
|
+
|
|
87
|
+
This includes validating the canonical compute keys and any service-specific requirements.
|
|
88
|
+
"""
|
|
89
|
+
return _validate_compute_canonical(compute)
|
|
90
|
+
|
|
91
|
+
|
|
70
92
|
def _validate_method_params(method_params, method_to_run):
|
|
71
93
|
errors = []
|
|
72
94
|
timeout = method_params.get('timeout')
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
method_params:
|
|
2
|
+
timeout: 345600
|
|
3
|
+
compute_quality_score: true
|
|
4
|
+
compute_diagnostic_score: true
|
|
5
|
+
|
|
6
|
+
compute:
|
|
7
|
+
service: gcp
|
|
8
|
+
instance_type: n1-highmem-16
|
|
9
|
+
boot_image: projects/deeplearning-platform-release/global/images/family/common-cu129-ubuntu-2204-nvidia-580
|
|
10
|
+
root_disk_gb: 300
|
|
11
|
+
gpu_type: nvidia-tesla-t4
|
|
12
|
+
gpu_count: 1
|
|
13
|
+
swap_gb: 64
|
|
14
|
+
name_prefix: sdgym-run
|
|
15
|
+
|
|
16
|
+
credentials_filepath: null
|
|
@@ -7,6 +7,7 @@ import yaml
|
|
|
7
7
|
|
|
8
8
|
from sdgym._benchmark_launcher._validation import (
|
|
9
9
|
_format_sectioned_errors,
|
|
10
|
+
_validate_compute,
|
|
10
11
|
_validate_credentials,
|
|
11
12
|
_validate_instance_jobs,
|
|
12
13
|
_validate_method_params,
|
|
@@ -75,6 +76,7 @@ class BenchmarkConfig:
|
|
|
75
76
|
section_errors = {
|
|
76
77
|
'method_params': _validate_method_params(self.method_params, method_to_run),
|
|
77
78
|
'credentials_filepath': _validate_credentials(self.credentials_filepath),
|
|
79
|
+
'compute': _validate_compute(self.compute),
|
|
78
80
|
'instance_jobs': _validate_instance_jobs(self.instance_jobs),
|
|
79
81
|
}
|
|
80
82
|
if any(section_errors.values()):
|
|
@@ -11,11 +11,14 @@ from sdgym._benchmark_launcher._storage_manager import S3StorageManager
|
|
|
11
11
|
from sdgym._benchmark_launcher.utils import (
|
|
12
12
|
_METHODS,
|
|
13
13
|
_add_dataset_suffix,
|
|
14
|
+
_build_instance_artifact_filepaths,
|
|
15
|
+
_build_job_artifact_filepaths,
|
|
14
16
|
_build_job_artifact_keys,
|
|
15
17
|
_build_job_output_destination,
|
|
16
18
|
_get_top_folder_prefix,
|
|
17
19
|
_resolve_datasets,
|
|
18
20
|
generate_ids,
|
|
21
|
+
resolve_compute,
|
|
19
22
|
resolve_credentials,
|
|
20
23
|
)
|
|
21
24
|
|
|
@@ -59,9 +62,10 @@ class BenchmarkLauncher:
|
|
|
59
62
|
])
|
|
60
63
|
self._launch_to_instance_names = {}
|
|
61
64
|
self._instance_name_to_status = {}
|
|
62
|
-
self.
|
|
65
|
+
self._instance_name_to_artifacts = {}
|
|
63
66
|
self._instance_manager = self._build_instance_manager()
|
|
64
67
|
self._storage_manager = self._build_storage_manager()
|
|
68
|
+
self._timestamp = None
|
|
65
69
|
|
|
66
70
|
def _build_storage_manager(self):
|
|
67
71
|
"""Build the storage manager."""
|
|
@@ -84,39 +88,71 @@ class BenchmarkLauncher:
|
|
|
84
88
|
|
|
85
89
|
raise NotImplementedError(f'Compute service {self.compute_service!r} is not supported.')
|
|
86
90
|
|
|
87
|
-
def
|
|
88
|
-
"""Return the
|
|
89
|
-
return
|
|
91
|
+
def _add_filename_suffix(self, filename, suffix):
|
|
92
|
+
"""Return the filename with the instance suffix."""
|
|
93
|
+
return filename if suffix == 0 else f'{filename}({suffix})'
|
|
90
94
|
|
|
91
|
-
def
|
|
92
|
-
"""Build the
|
|
93
|
-
artifact_key_prefix = _get_top_folder_prefix(
|
|
95
|
+
def _build_instance_artifacts(self, datasets, synthesizers, output_destination, instance_idx):
|
|
96
|
+
"""Build the artifact information for one instance."""
|
|
97
|
+
artifact_key_prefix, modality_prefix = _get_top_folder_prefix(
|
|
98
|
+
output_destination, self.modality
|
|
99
|
+
)
|
|
94
100
|
jobs = []
|
|
101
|
+
|
|
95
102
|
for dataset in datasets:
|
|
96
103
|
artifact_dataset = _add_dataset_suffix(dataset)
|
|
97
104
|
for synthesizer in synthesizers:
|
|
98
|
-
artifact_synthesizer = self.
|
|
105
|
+
artifact_synthesizer = self._add_filename_suffix(synthesizer, instance_idx)
|
|
106
|
+
job_output_destination = _build_job_output_destination(
|
|
107
|
+
output_destination=output_destination,
|
|
108
|
+
artifact_key_prefix=artifact_key_prefix,
|
|
109
|
+
artifact_dataset=artifact_dataset,
|
|
110
|
+
artifact_synthesizer=artifact_synthesizer,
|
|
111
|
+
)
|
|
112
|
+
benchmark_fp, synthetic_data_fp, synthesizer_fp = _build_job_artifact_filepaths(
|
|
113
|
+
artifact_key_prefix=artifact_key_prefix,
|
|
114
|
+
artifact_dataset=artifact_dataset,
|
|
115
|
+
artifact_synthesizer=artifact_synthesizer,
|
|
116
|
+
modality=self.modality,
|
|
117
|
+
output_destination=output_destination,
|
|
118
|
+
)
|
|
119
|
+
|
|
99
120
|
jobs.append({
|
|
100
121
|
'dataset': dataset,
|
|
101
122
|
'synthesizer': synthesizer,
|
|
102
123
|
'artifact_dataset': artifact_dataset,
|
|
103
124
|
'artifact_synthesizer': artifact_synthesizer,
|
|
104
125
|
'artifact_key_prefix': artifact_key_prefix,
|
|
105
|
-
'
|
|
106
|
-
'
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
artifact_dataset=artifact_dataset,
|
|
110
|
-
artifact_synthesizer=artifact_synthesizer,
|
|
111
|
-
),
|
|
126
|
+
'job_output_destination': job_output_destination,
|
|
127
|
+
'benchmark_result_filepath': benchmark_fp,
|
|
128
|
+
'synthetic_data_filepath': synthetic_data_fp,
|
|
129
|
+
'synthesizer_filepath': synthesizer_fp,
|
|
112
130
|
})
|
|
113
131
|
|
|
114
|
-
|
|
132
|
+
metainfo_name = self._add_filename_suffix('metainfo', instance_idx)
|
|
133
|
+
results_name = self._add_filename_suffix('results', instance_idx)
|
|
134
|
+
metainfo_fp, result_fp, job_arg_fp = _build_instance_artifact_filepaths(
|
|
135
|
+
output_destination=output_destination,
|
|
136
|
+
artifact_key_prefix=artifact_key_prefix,
|
|
137
|
+
modality_prefix=modality_prefix,
|
|
138
|
+
metainfo_name=metainfo_name,
|
|
139
|
+
results_name=results_name,
|
|
140
|
+
)
|
|
141
|
+
results = {
|
|
142
|
+
'jobs': jobs,
|
|
143
|
+
'output_destination': output_destination,
|
|
144
|
+
'result_filepath': result_fp,
|
|
145
|
+
'metainfo_filepath': metainfo_fp,
|
|
146
|
+
'job_arg_filepath': job_arg_fp,
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return results
|
|
115
150
|
|
|
116
151
|
def _launch(self):
|
|
117
152
|
launch_id = generate_ids(['LAUNCH_ID'])
|
|
118
153
|
self._launch_to_instance_names[launch_id] = []
|
|
119
154
|
credentials = resolve_credentials(self.benchmark_config.credentials_filepath)
|
|
155
|
+
compute = resolve_compute(self.benchmark_config.compute)
|
|
120
156
|
|
|
121
157
|
for instance_idx, instance_job in enumerate(self.benchmark_config.instance_jobs):
|
|
122
158
|
datasets = _resolve_datasets(instance_job['datasets'])
|
|
@@ -128,19 +164,20 @@ class BenchmarkLauncher:
|
|
|
128
164
|
synthesizers=synthesizers,
|
|
129
165
|
sdv_datasets=datasets,
|
|
130
166
|
credentials=credentials,
|
|
131
|
-
compute_config=
|
|
167
|
+
compute_config=compute,
|
|
132
168
|
**self.benchmark_config.method_params,
|
|
133
169
|
)
|
|
134
|
-
|
|
135
170
|
self._launch_to_instance_names[launch_id].append(instance_name)
|
|
136
171
|
self._instance_name_to_status[instance_name] = 'running'
|
|
137
|
-
self.
|
|
172
|
+
self._instance_name_to_artifacts[instance_name] = self._build_instance_artifacts(
|
|
138
173
|
datasets=datasets,
|
|
139
174
|
synthesizers=synthesizers,
|
|
140
175
|
output_destination=output_destination,
|
|
141
176
|
instance_idx=instance_idx,
|
|
142
177
|
)
|
|
143
178
|
|
|
179
|
+
self._timestamp = pd.Timestamp.now().strftime('%d_%m_%Y %H:%M:%S')
|
|
180
|
+
|
|
144
181
|
def launch(self):
|
|
145
182
|
"""Run the BenchmarkConfig: validate it and then execute the specified benchmark method."""
|
|
146
183
|
if not self.benchmark_config._is_validated:
|
|
@@ -274,10 +311,10 @@ class BenchmarkLauncher:
|
|
|
274
311
|
instances = self._validate_instance_names(instance_names)
|
|
275
312
|
output_destinations = []
|
|
276
313
|
for instance_name in instances:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
314
|
+
instance_artifacts = self._instance_name_to_artifacts.get(instance_name, {})
|
|
315
|
+
output_destination = instance_artifacts.get('output_destination')
|
|
316
|
+
if output_destination not in output_destinations:
|
|
317
|
+
output_destinations.append(output_destination)
|
|
281
318
|
|
|
282
319
|
return output_destinations
|
|
283
320
|
|
|
@@ -387,7 +424,7 @@ class BenchmarkLauncher:
|
|
|
387
424
|
}
|
|
388
425
|
rows = []
|
|
389
426
|
for instance_name in instances:
|
|
390
|
-
jobs = self.
|
|
427
|
+
jobs = self._instance_name_to_artifacts.get(instance_name, {}).get('jobs', [])
|
|
391
428
|
instance_rows = self._get_instance_job_rows(
|
|
392
429
|
instance_name=instance_name,
|
|
393
430
|
jobs=jobs,
|
|
@@ -400,6 +437,79 @@ class BenchmarkLauncher:
|
|
|
400
437
|
|
|
401
438
|
return pd.DataFrame(rows)
|
|
402
439
|
|
|
440
|
+
def _build_missing_result_row(self, job):
|
|
441
|
+
"""Build a result row for a job missing its benchmark_result.csv."""
|
|
442
|
+
return pd.DataFrame([
|
|
443
|
+
{
|
|
444
|
+
'Dataset': job['dataset'],
|
|
445
|
+
'Synthesizer': job['synthesizer'],
|
|
446
|
+
'Dataset_Size_MB': None,
|
|
447
|
+
'Train_Time': None,
|
|
448
|
+
'Peak_Memory_MB': None,
|
|
449
|
+
'Synthesizer_Size_MB': None,
|
|
450
|
+
'Sample_Time': None,
|
|
451
|
+
'Evaluate_Time': None,
|
|
452
|
+
'Error': 'Instance Deadline Error',
|
|
453
|
+
}
|
|
454
|
+
])
|
|
455
|
+
|
|
456
|
+
def _build_or_load_instance_results(self, instance_name):
|
|
457
|
+
"""Get instance result table.
|
|
458
|
+
|
|
459
|
+
If the instance's result file exists in storage, load and return it.
|
|
460
|
+
Otherwise, build the result table by loading each job's result file if it exists,
|
|
461
|
+
or adding a row with an error if it doesn't.
|
|
462
|
+
"""
|
|
463
|
+
jobs = self._instance_name_to_artifacts[instance_name]['jobs']
|
|
464
|
+
results_filepath = self._instance_name_to_artifacts[instance_name]['result_filepath']
|
|
465
|
+
if self._storage_manager.file_exists(results_filepath):
|
|
466
|
+
return self._storage_manager.read_csv(results_filepath)
|
|
467
|
+
|
|
468
|
+
frames = []
|
|
469
|
+
for job in jobs:
|
|
470
|
+
job_result = self._storage_manager._load_job_result(job['benchmark_result_filepath'])
|
|
471
|
+
if job_result is None:
|
|
472
|
+
frames.append(self._build_missing_result_row(job))
|
|
473
|
+
else:
|
|
474
|
+
frames.append(job_result)
|
|
475
|
+
|
|
476
|
+
return pd.concat(frames, ignore_index=True)
|
|
477
|
+
|
|
478
|
+
def _update_instance_metainfo(self, instance_name):
|
|
479
|
+
"""Update the instance metainfo file with the completion date."""
|
|
480
|
+
metainfo_filepath = self._instance_name_to_artifacts[instance_name]['metainfo_filepath']
|
|
481
|
+
content = {'completed_date': pd.Timestamp.now().strftime('%d_%m_%Y %H:%M:%S')}
|
|
482
|
+
self._storage_manager.update_metainfo(metainfo_filepath, content)
|
|
483
|
+
|
|
484
|
+
def finalize(self):
|
|
485
|
+
"""Finalize the benchmark using the results available so far.
|
|
486
|
+
|
|
487
|
+
This method is used for an early stop scenario. For each launched instance,
|
|
488
|
+
it builds or loads the instance-level results file from the available job
|
|
489
|
+
artifacts, updates the metainfo file, and removes temporary job argument
|
|
490
|
+
artifacts. Missing job results are preserved as incomplete or failed entries
|
|
491
|
+
in the final output.
|
|
492
|
+
|
|
493
|
+
Once the available artifacts have been saved, all remaining running
|
|
494
|
+
instances are terminated.
|
|
495
|
+
"""
|
|
496
|
+
self._validate_compute_service()
|
|
497
|
+
self._update_instance_statuses()
|
|
498
|
+
for instance_name in self._get_all_instance_names():
|
|
499
|
+
instance_artifacts = self._instance_name_to_artifacts[instance_name]
|
|
500
|
+
result_filepath = instance_artifacts['result_filepath']
|
|
501
|
+
job_arg_filepath = instance_artifacts['job_arg_filepath']
|
|
502
|
+
result_df = self._build_or_load_instance_results(instance_name)
|
|
503
|
+
self._storage_manager.write_csv(result=result_df, filepath=result_filepath)
|
|
504
|
+
self._storage_manager.delete(job_arg_filepath)
|
|
505
|
+
self._update_instance_metainfo(instance_name)
|
|
506
|
+
|
|
507
|
+
self.terminate(verbose=True)
|
|
508
|
+
|
|
509
|
+
def save_to_cloud(self, filepath):
|
|
510
|
+
"""Save the benchmark launcher in the cloud using the storage manager."""
|
|
511
|
+
self._storage_manager.save_pickle(self, filepath)
|
|
512
|
+
|
|
403
513
|
def save(self, filepath):
|
|
404
514
|
"""Save the benchmark configuration to a file."""
|
|
405
515
|
with open(filepath, 'wb') as output:
|
|
@@ -156,7 +156,7 @@ def _split_instance_jobs(instance_job):
|
|
|
156
156
|
raise ValueError('Cannot split the instance job any further.')
|
|
157
157
|
|
|
158
158
|
|
|
159
|
-
def
|
|
159
|
+
def _build_instance_artifacts(datasets, synthesizers, num_instances, output_destination):
|
|
160
160
|
"""Build exactly ``num_instances`` instance jobs."""
|
|
161
161
|
max_jobs = len(synthesizers) * len(datasets)
|
|
162
162
|
if num_instances > max_jobs:
|
|
@@ -223,7 +223,7 @@ def build_dict_from_args(args):
|
|
|
223
223
|
num_instances = num_instances if num_instances is not None else DEFAULT_NUM_INSTANCES
|
|
224
224
|
return {
|
|
225
225
|
'method_params': method_params,
|
|
226
|
-
'instance_jobs':
|
|
226
|
+
'instance_jobs': _build_instance_artifacts(
|
|
227
227
|
datasets=datasets,
|
|
228
228
|
synthesizers=synthesizers,
|
|
229
229
|
num_instances=num_instances,
|