sdgym 0.12.2.dev0__tar.gz → 0.13.1.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sdgym-0.12.2.dev0/sdgym.egg-info → sdgym-0.13.1.dev0}/PKG-INFO +31 -19
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/pyproject.toml +32 -20
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/__init__.py +1 -1
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/credentials_utils.py +1 -1
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/benchmark.py +12 -12
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/__main__.py +4 -4
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/summary.py +6 -6
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_writer.py +2 -0
- sdgym-0.13.1.dev0/sdgym/run_benchmark/run_benchmark.py +206 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/run_benchmark/upload_benchmark_results.py +20 -14
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/run_benchmark/utils.py +41 -16
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/utils.py +9 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0/sdgym.egg-info}/PKG-INFO +31 -19
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/requires.txt +29 -13
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/tests/test_tasks.py +1 -1
- sdgym-0.12.2.dev0/sdgym/run_benchmark/run_benchmark.py +0 -152
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/LICENSE +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/README.md +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/__init__.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/benchmark.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_benchmark/config_utils.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/_dataset_utils.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/__init__.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/collect.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/cli/utils.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/dataset_explorer.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/datasets.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/errors.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/metrics.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/progress.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_explorer/__init__.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_explorer/result_explorer.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/result_explorer/result_handler.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/run_benchmark/__init__.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/s3.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizer_descriptions.yaml +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/__init__.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/base.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/column.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/generate.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/identity.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/realtabformer.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/sdv.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/uniform.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym/synthesizers/utils.py +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/SOURCES.txt +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/dependency_links.txt +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/entry_points.txt +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/sdgym.egg-info/top_level.txt +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/setup.cfg +0 -0
- {sdgym-0.12.2.dev0 → sdgym-0.13.1.dev0}/tests/test_scripts.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdgym
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.1.dev0
|
|
4
4
|
Summary: Benchmark tabular synthetic data generators using a variety of datasets
|
|
5
5
|
Author-email: "DataCebo, Inc." <info@sdv.dev>
|
|
6
|
-
License:
|
|
6
|
+
License-Expression: BUSL-1.1
|
|
7
7
|
Project-URL: Source Code, https://github.com/sdv-dev/SDGym/
|
|
8
8
|
Project-URL: Issue Tracker, https://github.com/sdv-dev/SDGym/issues
|
|
9
9
|
Project-URL: Changes, https://github.com/sdv-dev/SDGym/blob/main/HISTORY.md
|
|
@@ -12,7 +12,6 @@ Project-URL: Chat, https://bit.ly/sdv-slack-invite
|
|
|
12
12
|
Keywords: machine learning,synthetic data generation,benchmark,generative models
|
|
13
13
|
Classifier: Development Status :: 2 - Pre-Alpha
|
|
14
14
|
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: License :: Free for non-commercial use
|
|
16
15
|
Classifier: Natural Language :: English
|
|
17
16
|
Classifier: Programming Language :: Python :: 3
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -20,44 +19,57 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
-
Requires-Python: <3.
|
|
24
|
+
Requires-Python: <3.15,>=3.9
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
27
|
Requires-Dist: appdirs>=1.3
|
|
28
28
|
Requires-Dist: boto3<2,>=1.28
|
|
29
29
|
Requires-Dist: botocore<2,>=1.31
|
|
30
|
-
Requires-Dist: cloudpickle>=2.1.0
|
|
30
|
+
Requires-Dist: cloudpickle>=2.1.0; python_version < "3.14"
|
|
31
|
+
Requires-Dist: cloudpickle>=3.1.1; python_version >= "3.14"
|
|
31
32
|
Requires-Dist: compress-pickle>=1.2.0
|
|
32
|
-
Requires-Dist: google-cloud-compute>=1.
|
|
33
|
-
Requires-Dist: google-auth>=2.
|
|
33
|
+
Requires-Dist: google-cloud-compute>=1.30.0
|
|
34
|
+
Requires-Dist: google-auth>=2.14.1
|
|
34
35
|
Requires-Dist: humanfriendly>=10.0
|
|
35
36
|
Requires-Dist: numpy>=1.22.2; python_version < "3.10"
|
|
36
37
|
Requires-Dist: numpy>=1.24.0; python_version >= "3.10" and python_version < "3.12"
|
|
37
38
|
Requires-Dist: numpy>=1.26.0; python_version >= "3.12" and python_version < "3.13"
|
|
38
|
-
Requires-Dist: numpy>=2.1.0; python_version >= "3.13"
|
|
39
|
+
Requires-Dist: numpy>=2.1.0; python_version >= "3.13" and python_version < "3.14"
|
|
40
|
+
Requires-Dist: numpy>=2.3.2; python_version >= "3.14"
|
|
39
41
|
Requires-Dist: openpyxl>=3.1.2
|
|
40
|
-
Requires-Dist: pandas<3
|
|
41
|
-
Requires-Dist: pandas<3
|
|
42
|
-
Requires-Dist: pandas<3
|
|
43
|
-
Requires-Dist: pandas<3
|
|
44
|
-
Requires-Dist:
|
|
42
|
+
Requires-Dist: pandas<3,>=1.4.0; python_version < "3.11"
|
|
43
|
+
Requires-Dist: pandas<3,>=1.5.0; python_version >= "3.11" and python_version < "3.12"
|
|
44
|
+
Requires-Dist: pandas<3,>=2.1.1; python_version >= "3.12" and python_version < "3.13"
|
|
45
|
+
Requires-Dist: pandas<3,>=2.2.3; python_version >= "3.13" and python_version < "3.14"
|
|
46
|
+
Requires-Dist: pandas<3,>=2.3.3; python_version >= "3.14"
|
|
47
|
+
Requires-Dist: psutil>=5.8
|
|
45
48
|
Requires-Dist: scikit-learn>=1.0.2; python_version < "3.10"
|
|
46
49
|
Requires-Dist: scikit-learn>=1.1.0; python_version >= "3.10" and python_version < "3.11"
|
|
47
50
|
Requires-Dist: scikit-learn>=1.1.3; python_version >= "3.11" and python_version < "3.12"
|
|
48
51
|
Requires-Dist: scikit-learn>=1.3.1; python_version >= "3.12" and python_version < "3.13"
|
|
49
|
-
Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13"
|
|
52
|
+
Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13" and python_version < "3.14"
|
|
53
|
+
Requires-Dist: scikit-learn>=1.8.0; python_version >= "3.14"
|
|
50
54
|
Requires-Dist: scipy>=1.7.3; python_version < "3.10"
|
|
51
55
|
Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
|
|
52
56
|
Requires-Dist: scipy>=1.12.0; python_version >= "3.12" and python_version < "3.13"
|
|
53
|
-
Requires-Dist: scipy>=1.14.1; python_version >= "3.13"
|
|
57
|
+
Requires-Dist: scipy>=1.14.1; python_version >= "3.13" and python_version < "3.14"
|
|
58
|
+
Requires-Dist: scipy>=1.16.1; python_version >= "3.14"
|
|
54
59
|
Requires-Dist: tabulate<0.9,>=0.8.3
|
|
55
|
-
Requires-Dist: torch>=
|
|
60
|
+
Requires-Dist: torch>=1.13.0; python_version < "3.11"
|
|
61
|
+
Requires-Dist: torch>=2.0.0; python_version >= "3.11" and python_version < "3.12"
|
|
62
|
+
Requires-Dist: torch>=2.3.0; python_version >= "3.12" and python_version < "3.13"
|
|
63
|
+
Requires-Dist: torch>=2.6.0; python_version >= "3.13" and python_version < "3.14"
|
|
64
|
+
Requires-Dist: torch>=2.9.0; python_version >= "3.14"
|
|
56
65
|
Requires-Dist: tqdm>=4.66.3
|
|
57
66
|
Requires-Dist: XlsxWriter>=1.2.8
|
|
58
|
-
Requires-Dist: rdt>=1.
|
|
59
|
-
Requires-Dist:
|
|
60
|
-
Requires-Dist:
|
|
67
|
+
Requires-Dist: rdt>=1.18.2; python_version < "3.14"
|
|
68
|
+
Requires-Dist: rdt>=1.20.0; python_version >= "3.14"
|
|
69
|
+
Requires-Dist: sdmetrics>=0.21.0; python_version < "3.14"
|
|
70
|
+
Requires-Dist: sdmetrics>=0.26.0; python_version >= "3.14"
|
|
71
|
+
Requires-Dist: sdv>=1.21.0; python_version < "3.14"
|
|
72
|
+
Requires-Dist: sdv>=1.33.0; python_version >= "3.14"
|
|
61
73
|
Provides-Extra: dask
|
|
62
74
|
Requires-Dist: dask; extra == "dask"
|
|
63
75
|
Requires-Dist: distributed; extra == "dask"
|
|
@@ -5,7 +5,6 @@ authors = [{ name = 'DataCebo, Inc.', email = 'info@sdv.dev' }]
|
|
|
5
5
|
classifiers = [
|
|
6
6
|
'Development Status :: 2 - Pre-Alpha',
|
|
7
7
|
'Intended Audience :: Developers',
|
|
8
|
-
'License :: Free for non-commercial use',
|
|
9
8
|
'Natural Language :: English',
|
|
10
9
|
'Programming Language :: Python :: 3',
|
|
11
10
|
'Programming Language :: Python :: 3.9',
|
|
@@ -13,48 +12,62 @@ classifiers = [
|
|
|
13
12
|
'Programming Language :: Python :: 3.11',
|
|
14
13
|
'Programming Language :: Python :: 3.12',
|
|
15
14
|
'Programming Language :: Python :: 3.13',
|
|
15
|
+
'Programming Language :: Python :: 3.14',
|
|
16
16
|
'Topic :: Scientific/Engineering :: Artificial Intelligence',
|
|
17
17
|
]
|
|
18
18
|
keywords = ['machine learning', 'synthetic data generation', 'benchmark', 'generative models']
|
|
19
19
|
dynamic = ['version']
|
|
20
|
-
license =
|
|
21
|
-
|
|
20
|
+
license = 'BUSL-1.1'
|
|
21
|
+
license-files = ['LICENSE']
|
|
22
|
+
requires-python = '>=3.9,<3.15'
|
|
22
23
|
readme = 'README.md'
|
|
23
24
|
dependencies = [
|
|
24
25
|
'appdirs>=1.3',
|
|
25
26
|
'boto3>=1.28,<2',
|
|
26
27
|
'botocore>=1.31,<2',
|
|
27
|
-
|
|
28
|
+
"cloudpickle>=2.1.0;python_version<'3.14'",
|
|
29
|
+
"cloudpickle>=3.1.1;python_version>='3.14'",
|
|
28
30
|
'compress-pickle>=1.2.0',
|
|
29
|
-
'google-cloud-compute>=1.
|
|
30
|
-
'google-auth>=2.
|
|
31
|
+
'google-cloud-compute>=1.30.0',
|
|
32
|
+
'google-auth>=2.14.1',
|
|
31
33
|
'humanfriendly>=10.0',
|
|
32
34
|
"numpy>=1.22.2;python_version<'3.10'",
|
|
33
35
|
"numpy>=1.24.0;python_version>='3.10' and python_version<'3.12'",
|
|
34
36
|
"numpy>=1.26.0;python_version>='3.12' and python_version<'3.13'",
|
|
35
|
-
"numpy>=2.1.0;python_version>='3.13'",
|
|
37
|
+
"numpy>=2.1.0;python_version>='3.13' and python_version<'3.14'",
|
|
38
|
+
"numpy>=2.3.2;python_version>='3.14'",
|
|
36
39
|
'openpyxl>=3.1.2',
|
|
37
|
-
"pandas>=1.4.0,<3
|
|
38
|
-
"pandas>=1.5.0,<3
|
|
39
|
-
"pandas>=2.1.1,<3
|
|
40
|
-
"pandas>=2.2.3,<3
|
|
41
|
-
|
|
40
|
+
"pandas>=1.4.0,<3;python_version<'3.11'",
|
|
41
|
+
"pandas>=1.5.0,<3;python_version>='3.11' and python_version<'3.12'",
|
|
42
|
+
"pandas>=2.1.1,<3;python_version>='3.12' and python_version<'3.13'",
|
|
43
|
+
"pandas>=2.2.3,<3;python_version>='3.13' and python_version<'3.14'",
|
|
44
|
+
"pandas>=2.3.3,<3;python_version>='3.14'",
|
|
45
|
+
'psutil>=5.8',
|
|
42
46
|
"scikit-learn>=1.0.2;python_version<'3.10'",
|
|
43
47
|
"scikit-learn>=1.1.0;python_version>='3.10' and python_version<'3.11'",
|
|
44
48
|
"scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'",
|
|
45
49
|
"scikit-learn>=1.3.1;python_version>='3.12' and python_version<'3.13'",
|
|
46
|
-
"scikit-learn>=1.5.2;python_version>='3.13'",
|
|
50
|
+
"scikit-learn>=1.5.2;python_version>='3.13' and python_version<'3.14'",
|
|
51
|
+
"scikit-learn>=1.8.0;python_version>='3.14'",
|
|
47
52
|
"scipy>=1.7.3;python_version<'3.10'",
|
|
48
53
|
"scipy>=1.9.2;python_version>='3.10' and python_version<'3.12'",
|
|
49
54
|
"scipy>=1.12.0;python_version>='3.12' and python_version<'3.13'",
|
|
50
|
-
"scipy>=1.14.1;python_version>='3.13'",
|
|
55
|
+
"scipy>=1.14.1;python_version>='3.13' and python_version<'3.14'",
|
|
56
|
+
"scipy>=1.16.1;python_version>='3.14'",
|
|
51
57
|
'tabulate>=0.8.3,<0.9',
|
|
52
|
-
"torch>=
|
|
58
|
+
"torch>=1.13.0;python_version<'3.11'",
|
|
59
|
+
"torch>=2.0.0;python_version>='3.11' and python_version<'3.12'",
|
|
60
|
+
"torch>=2.3.0;python_version>='3.12' and python_version<'3.13'",
|
|
61
|
+
"torch>=2.6.0;python_version>='3.13' and python_version<'3.14'",
|
|
62
|
+
"torch>=2.9.0;python_version>='3.14'",
|
|
53
63
|
'tqdm>=4.66.3',
|
|
54
64
|
'XlsxWriter>=1.2.8',
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
65
|
+
"rdt>=1.18.2;python_version<'3.14'",
|
|
66
|
+
"rdt>=1.20.0;python_version>='3.14'",
|
|
67
|
+
"sdmetrics>=0.21.0;python_version<'3.14'",
|
|
68
|
+
"sdmetrics>=0.26.0;python_version>='3.14'",
|
|
69
|
+
"sdv>=1.21.0;python_version<'3.14'",
|
|
70
|
+
"sdv>=1.33.0;python_version>='3.14'",
|
|
58
71
|
]
|
|
59
72
|
|
|
60
73
|
[project.urls]
|
|
@@ -113,7 +126,6 @@ all = [
|
|
|
113
126
|
|
|
114
127
|
[tool.setuptools]
|
|
115
128
|
include-package-data = true
|
|
116
|
-
license-files = ['LICENSE']
|
|
117
129
|
|
|
118
130
|
[tool.setuptools.packages.find]
|
|
119
131
|
include = ['sdgym', 'sdgym.*']
|
|
@@ -149,7 +161,7 @@ namespaces = false
|
|
|
149
161
|
version = {attr = 'sdgym.__version__'}
|
|
150
162
|
|
|
151
163
|
[tool.bumpversion]
|
|
152
|
-
current_version = "0.
|
|
164
|
+
current_version = "0.13.1.dev0"
|
|
153
165
|
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
|
|
154
166
|
serialize = [
|
|
155
167
|
'{major}.{minor}.{patch}.{release}{candidate}',
|
|
@@ -74,7 +74,7 @@ def sdv_install_cmd(credentials):
|
|
|
74
74
|
pip install sdv-installer
|
|
75
75
|
|
|
76
76
|
python -c "from sdv_installer.installation.installer import install_packages; \\
|
|
77
|
-
install_packages(username='{username}', license_key='{license_key}'
|
|
77
|
+
install_packages(username='{username}', license_key='{license_key}')"
|
|
78
78
|
""")
|
|
79
79
|
|
|
80
80
|
|
|
@@ -514,7 +514,7 @@ def _compute_scores(
|
|
|
514
514
|
for metric_name, metric in metrics.items():
|
|
515
515
|
scores.append({
|
|
516
516
|
'metric': metric_name,
|
|
517
|
-
'
|
|
517
|
+
'Error': 'Metric Timeout',
|
|
518
518
|
})
|
|
519
519
|
# re-inject list to multiprocessing output
|
|
520
520
|
output['scores'] = scores
|
|
@@ -537,7 +537,7 @@ def _compute_scores(
|
|
|
537
537
|
scores[-1].update({
|
|
538
538
|
'score': score,
|
|
539
539
|
'normalized_score': normalized_score,
|
|
540
|
-
'
|
|
540
|
+
'Error': error,
|
|
541
541
|
'metric_time': calculate_score_time(start),
|
|
542
542
|
})
|
|
543
543
|
# re-inject list to multiprocessing output
|
|
@@ -603,7 +603,7 @@ def _score(
|
|
|
603
603
|
output = {}
|
|
604
604
|
|
|
605
605
|
output['timeout'] = True # To be deleted if there is no error
|
|
606
|
-
output['
|
|
606
|
+
output['Error'] = 'Load Timeout' # To be deleted if there is no error
|
|
607
607
|
try:
|
|
608
608
|
LOGGER.info(
|
|
609
609
|
'Running %s on %s dataset %s; %s',
|
|
@@ -615,7 +615,7 @@ def _score(
|
|
|
615
615
|
|
|
616
616
|
output['dataset_size'] = get_size_of(data) / N_BYTES_IN_MB
|
|
617
617
|
# To be deleted if there is no error
|
|
618
|
-
output['
|
|
618
|
+
output['Error'] = 'Synthesizer Timeout'
|
|
619
619
|
|
|
620
620
|
try:
|
|
621
621
|
synthetic_data, train_time, sample_time, synthesizer_size, peak_memory = _synthesize(
|
|
@@ -642,7 +642,7 @@ def _score(
|
|
|
642
642
|
)
|
|
643
643
|
|
|
644
644
|
# No error so far. _compute_scores tracks its own errors by metric
|
|
645
|
-
del output['
|
|
645
|
+
del output['Error']
|
|
646
646
|
_compute_scores(
|
|
647
647
|
metrics,
|
|
648
648
|
data,
|
|
@@ -671,14 +671,14 @@ def _score(
|
|
|
671
671
|
output['peak_memory'] = err.peak_memory
|
|
672
672
|
|
|
673
673
|
output['exception'] = err.exception
|
|
674
|
-
output['
|
|
674
|
+
output['Error'] = err.error
|
|
675
675
|
output['timeout'] = False
|
|
676
676
|
|
|
677
677
|
except Exception:
|
|
678
678
|
LOGGER.exception('Error running %s on dataset %s;', synthesizer['name'], dataset_name)
|
|
679
679
|
exception, error = format_exception()
|
|
680
680
|
output['exception'] = exception
|
|
681
|
-
output['
|
|
681
|
+
output['Error'] = error
|
|
682
682
|
output['timeout'] = False # There was no timeout
|
|
683
683
|
|
|
684
684
|
finally:
|
|
@@ -744,7 +744,7 @@ def _score_with_timeout(
|
|
|
744
744
|
thread.join(timeout)
|
|
745
745
|
if thread.is_alive():
|
|
746
746
|
LOGGER.error('Timeout running %s on dataset %s;', synthesizer['name'], dataset_name)
|
|
747
|
-
return {'timeout': True, '
|
|
747
|
+
return {'timeout': True, 'Error': 'Synthesizer Timeout'}
|
|
748
748
|
|
|
749
749
|
return output
|
|
750
750
|
|
|
@@ -815,8 +815,8 @@ def _format_output(
|
|
|
815
815
|
for score in output.get('scores', []):
|
|
816
816
|
scores.insert(len(scores.columns), score['metric'], score['normalized_score'])
|
|
817
817
|
|
|
818
|
-
if '
|
|
819
|
-
scores['
|
|
818
|
+
if 'Error' in output:
|
|
819
|
+
scores['Error'] = output['Error']
|
|
820
820
|
|
|
821
821
|
return scores
|
|
822
822
|
|
|
@@ -1085,8 +1085,8 @@ def _add_adjusted_scores(scores, timeout):
|
|
|
1085
1085
|
|
|
1086
1086
|
fit_times = scores.loc[dataset_mask, 'Train_Time'].fillna(0)
|
|
1087
1087
|
sample_times = scores.loc[dataset_mask, 'Sample_Time'].fillna(0)
|
|
1088
|
-
if '
|
|
1089
|
-
errors = scores.loc[dataset_mask, '
|
|
1088
|
+
if 'Error' in scores.columns:
|
|
1089
|
+
errors = scores.loc[dataset_mask, 'Error']
|
|
1090
1090
|
else:
|
|
1091
1091
|
errors = pd.Series([None] * dataset_mask.sum(), index=scores.index[dataset_mask])
|
|
1092
1092
|
|
|
@@ -39,13 +39,13 @@ def _print_table(data, sort=None, reverse=False, format=None):
|
|
|
39
39
|
for field, formatter in format.items():
|
|
40
40
|
data[field] = data[field].apply(formatter)
|
|
41
41
|
|
|
42
|
-
if '
|
|
43
|
-
error = data['
|
|
42
|
+
if 'Error' in data:
|
|
43
|
+
error = data['Error']
|
|
44
44
|
if pd.isna(error).all():
|
|
45
|
-
del data['
|
|
45
|
+
del data['Error']
|
|
46
46
|
else:
|
|
47
47
|
long_error = error.str.len() > 30
|
|
48
|
-
data.loc[long_error, '
|
|
48
|
+
data.loc[long_error, 'Error'] = error[long_error].str[:30] + '...'
|
|
49
49
|
|
|
50
50
|
print(tabulate.tabulate(data, tablefmt='github', headers=data.columns, showindex=False)) # noqa: T201
|
|
51
51
|
|
|
@@ -35,11 +35,11 @@ def preprocess(data):
|
|
|
35
35
|
bydataset = grouped.mean()
|
|
36
36
|
data = bydataset.reset_index()
|
|
37
37
|
|
|
38
|
-
if '
|
|
38
|
+
if 'Error' in data.columns:
|
|
39
39
|
errors = data.error.fillna('')
|
|
40
40
|
for message, column in KNOWN_ERRORS:
|
|
41
41
|
data[column] = errors.str.contains(message)
|
|
42
|
-
data.loc[data[column], '
|
|
42
|
+
data.loc[data[column], 'Error'] = np.nan
|
|
43
43
|
|
|
44
44
|
return data
|
|
45
45
|
|
|
@@ -122,7 +122,7 @@ def summarize(data, baselines=(), datasets=None):
|
|
|
122
122
|
baseline_scores = baseline_data.set_index('Dataset').Quality_Score
|
|
123
123
|
results[f'beat_{baseline.lower()}'] = _beat_baseline(data, baseline_scores)
|
|
124
124
|
|
|
125
|
-
if '
|
|
125
|
+
if 'Error' in data.columns:
|
|
126
126
|
grouped = data.groupby('Synthesizer')
|
|
127
127
|
for _, error_column in KNOWN_ERRORS:
|
|
128
128
|
results[error_column] = grouped[error_column].sum()
|
|
@@ -135,7 +135,7 @@ def summarize(data, baselines=(), datasets=None):
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
def _error_counts(data):
|
|
138
|
-
if '
|
|
138
|
+
if 'Error' in data.columns:
|
|
139
139
|
return data.error.value_counts()
|
|
140
140
|
return 0
|
|
141
141
|
|
|
@@ -158,8 +158,8 @@ def errors_summary(data):
|
|
|
158
158
|
Returns:
|
|
159
159
|
pandas.DataFrame
|
|
160
160
|
"""
|
|
161
|
-
if '
|
|
162
|
-
all_errors = pd.DataFrame(_error_counts(data)).rename(columns={'
|
|
161
|
+
if 'Error' in data.columns:
|
|
162
|
+
all_errors = pd.DataFrame(_error_counts(data)).rename(columns={'Error': 'all'})
|
|
163
163
|
synthesizer_errors = data.groupby('Synthesizer').apply(_error_counts).pivot_table(level=0)
|
|
164
164
|
for synthesizer, errors in synthesizer_errors.items():
|
|
165
165
|
all_errors[synthesizer] = errors.fillna(0).astype(int)
|
|
@@ -12,6 +12,7 @@ import yaml
|
|
|
12
12
|
from openpyxl import load_workbook
|
|
13
13
|
|
|
14
14
|
from sdgym.s3 import parse_s3_path
|
|
15
|
+
from sdgym.utils import _set_column_width
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class ResultsWriter(ABC):
|
|
@@ -79,6 +80,7 @@ class LocalResultsWriter:
|
|
|
79
80
|
with writer:
|
|
80
81
|
for sheet_name, df in data.items():
|
|
81
82
|
df.to_excel(writer, sheet_name=sheet_name, index=index)
|
|
83
|
+
_set_column_width(writer, df, sheet_name)
|
|
82
84
|
|
|
83
85
|
wb = load_workbook(file_path)
|
|
84
86
|
for sheet_name in reversed(data.keys()):
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Script to run a benchmark and upload results to S3."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
|
|
7
|
+
from botocore.exceptions import ClientError
|
|
8
|
+
|
|
9
|
+
from sdgym._benchmark.benchmark import (
|
|
10
|
+
_benchmark_multi_table_compute_gcp,
|
|
11
|
+
_benchmark_single_table_compute_gcp,
|
|
12
|
+
)
|
|
13
|
+
from sdgym.run_benchmark.utils import (
|
|
14
|
+
KEY_DATE_FILE,
|
|
15
|
+
OUTPUT_DESTINATION_AWS,
|
|
16
|
+
_exclude_datasets,
|
|
17
|
+
_parse_args,
|
|
18
|
+
get_result_folder_name,
|
|
19
|
+
post_benchmark_launch_message,
|
|
20
|
+
)
|
|
21
|
+
from sdgym.s3 import get_s3_client, parse_s3_path
|
|
22
|
+
|
|
23
|
+
SINGLE_TABLE_DATASETS = [
|
|
24
|
+
'adult',
|
|
25
|
+
'alarm',
|
|
26
|
+
'census',
|
|
27
|
+
'child',
|
|
28
|
+
'covtype',
|
|
29
|
+
'expedia_hotel_logs',
|
|
30
|
+
'insurance',
|
|
31
|
+
'intrusion',
|
|
32
|
+
'news',
|
|
33
|
+
]
|
|
34
|
+
MULTI_TABLE_DATASETS = [
|
|
35
|
+
'WebKP',
|
|
36
|
+
'DCG',
|
|
37
|
+
'UW_std',
|
|
38
|
+
'Same_gen',
|
|
39
|
+
'CORA',
|
|
40
|
+
'got_families',
|
|
41
|
+
'SalesDB',
|
|
42
|
+
'UTube',
|
|
43
|
+
'Student_loan',
|
|
44
|
+
'Hepatitis_std',
|
|
45
|
+
'Elti',
|
|
46
|
+
'Bupa',
|
|
47
|
+
'Toxicology',
|
|
48
|
+
'imdb_ijs',
|
|
49
|
+
'ftp',
|
|
50
|
+
'imdb_small',
|
|
51
|
+
'imdb_MovieLens',
|
|
52
|
+
'Pima',
|
|
53
|
+
'university',
|
|
54
|
+
'legalActs',
|
|
55
|
+
'Dunur',
|
|
56
|
+
'Mesh',
|
|
57
|
+
'world',
|
|
58
|
+
'airbnb-simplified',
|
|
59
|
+
'trains',
|
|
60
|
+
'FNHK',
|
|
61
|
+
'fake_hotels',
|
|
62
|
+
'SAT',
|
|
63
|
+
'genes',
|
|
64
|
+
'Biodegradability',
|
|
65
|
+
'Pyrimidine',
|
|
66
|
+
'mutagenesis',
|
|
67
|
+
'restbase',
|
|
68
|
+
'Triazine',
|
|
69
|
+
'Carcinogenesis',
|
|
70
|
+
'fake_hotels_extended',
|
|
71
|
+
'Mooney_Family',
|
|
72
|
+
'PTE',
|
|
73
|
+
'Facebook',
|
|
74
|
+
'multi_table_ID_demo_dataset',
|
|
75
|
+
'SAP',
|
|
76
|
+
'Chess',
|
|
77
|
+
'Countries',
|
|
78
|
+
'NCAA',
|
|
79
|
+
'Atherosclerosis',
|
|
80
|
+
'nations',
|
|
81
|
+
'TubePricing',
|
|
82
|
+
'financial',
|
|
83
|
+
'Accidents',
|
|
84
|
+
'MuskSmall',
|
|
85
|
+
'NBA',
|
|
86
|
+
'AustralianFootball',
|
|
87
|
+
'PremierLeague',
|
|
88
|
+
'OMOP_CDM_dayz',
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _get_benchmark_setup(modality):
|
|
93
|
+
"""Get the benchmark setup for a given modality.
|
|
94
|
+
|
|
95
|
+
The setup includes the method to run the benchmark and the job split,
|
|
96
|
+
which is a list of tuples where each tuple contains a list of synthesizers and
|
|
97
|
+
a list of datasets to run those synthesizers on.
|
|
98
|
+
"""
|
|
99
|
+
if modality == 'single_table':
|
|
100
|
+
real_tab_former_to_exclude = ['covtype', 'intrusion', 'expedia_hotel_logs', 'census']
|
|
101
|
+
gan_to_exclude = ['covtype', 'intrusion']
|
|
102
|
+
job_split = [
|
|
103
|
+
(['ColumnSynthesizer', 'GaussianCopulaSynthesizer'], SINGLE_TABLE_DATASETS),
|
|
104
|
+
(['TVAESynthesizer'], SINGLE_TABLE_DATASETS),
|
|
105
|
+
(['SegmentSynthesizer'], SINGLE_TABLE_DATASETS),
|
|
106
|
+
(['XGCSynthesizer'], SINGLE_TABLE_DATASETS),
|
|
107
|
+
(['BootstrapSynthesizer'], SINGLE_TABLE_DATASETS),
|
|
108
|
+
(['CTGANSynthesizer'], _exclude_datasets(SINGLE_TABLE_DATASETS, gan_to_exclude)),
|
|
109
|
+
(['CopulaGANSynthesizer'], _exclude_datasets(SINGLE_TABLE_DATASETS, gan_to_exclude)),
|
|
110
|
+
(
|
|
111
|
+
['RealTabFormerSynthesizer'],
|
|
112
|
+
_exclude_datasets(SINGLE_TABLE_DATASETS, real_tab_former_to_exclude),
|
|
113
|
+
),
|
|
114
|
+
]
|
|
115
|
+
for dataset in real_tab_former_to_exclude:
|
|
116
|
+
job_split.append((['RealTabFormerSynthesizer'], [dataset]))
|
|
117
|
+
|
|
118
|
+
for dataset in gan_to_exclude:
|
|
119
|
+
job_split.append((['CTGANSynthesizer'], [dataset]))
|
|
120
|
+
job_split.append((['CopulaGANSynthesizer'], [dataset]))
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
'method': _benchmark_single_table_compute_gcp,
|
|
124
|
+
'job_split': job_split,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if modality == 'multi_table':
|
|
128
|
+
hma_to_exclude = [
|
|
129
|
+
'Accidents',
|
|
130
|
+
'AustralianFootball',
|
|
131
|
+
'Countries',
|
|
132
|
+
'MuskSmall',
|
|
133
|
+
'NBA',
|
|
134
|
+
'OMOP_CDM_dayz',
|
|
135
|
+
'PremierLeague',
|
|
136
|
+
'SalesDB',
|
|
137
|
+
'airbnb-simplified',
|
|
138
|
+
'imdb_ijs',
|
|
139
|
+
'legalActs',
|
|
140
|
+
'SAP',
|
|
141
|
+
'imdb_MovieLens',
|
|
142
|
+
]
|
|
143
|
+
job_split = [
|
|
144
|
+
(['HSASynthesizer', 'IndependentSynthesizer'], MULTI_TABLE_DATASETS),
|
|
145
|
+
(['HMASynthesizer'], _exclude_datasets(MULTI_TABLE_DATASETS, hma_to_exclude)),
|
|
146
|
+
]
|
|
147
|
+
for dataset in hma_to_exclude:
|
|
148
|
+
job_split.append((['HMASynthesizer'], [dataset]))
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
'method': _benchmark_multi_table_compute_gcp,
|
|
152
|
+
'job_split': job_split,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def append_benchmark_run(
|
|
157
|
+
aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
|
|
158
|
+
):
|
|
159
|
+
"""Append a new benchmark run to the benchmark dates file in S3."""
|
|
160
|
+
s3_client = get_s3_client(
|
|
161
|
+
aws_access_key_id=aws_access_key_id,
|
|
162
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
163
|
+
)
|
|
164
|
+
bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
|
|
165
|
+
try:
|
|
166
|
+
object = s3_client.get_object(Bucket=bucket, Key=f'{prefix}{modality}/{KEY_DATE_FILE}')
|
|
167
|
+
body = object['Body'].read().decode('utf-8')
|
|
168
|
+
data = json.loads(body)
|
|
169
|
+
except ClientError as e:
|
|
170
|
+
if e.response['Error']['Code'] == 'NoSuchKey':
|
|
171
|
+
data = {'runs': []}
|
|
172
|
+
else:
|
|
173
|
+
raise RuntimeError(f'Failed to read {KEY_DATE_FILE} from S3: {e}')
|
|
174
|
+
|
|
175
|
+
data['runs'].append({'date': date_str, 'folder_name': get_result_folder_name(date_str)})
|
|
176
|
+
data['runs'] = sorted(data['runs'], key=lambda x: x['date'])
|
|
177
|
+
s3_client.put_object(
|
|
178
|
+
Bucket=bucket,
|
|
179
|
+
Key=f'{prefix}{modality}/{KEY_DATE_FILE}',
|
|
180
|
+
Body=json.dumps(data).encode('utf-8'),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def main():
|
|
185
|
+
"""Main function to run the benchmark and upload results."""
|
|
186
|
+
args = _parse_args()
|
|
187
|
+
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
|
|
188
|
+
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
|
|
189
|
+
date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
|
|
190
|
+
modality = args.modality
|
|
191
|
+
benchmark_setup = _get_benchmark_setup(modality)
|
|
192
|
+
for synthesizers, datasets in benchmark_setup['job_split']:
|
|
193
|
+
benchmark_setup['method'](
|
|
194
|
+
output_destination=OUTPUT_DESTINATION_AWS,
|
|
195
|
+
credential_filepath=os.getenv('CREDENTIALS_FILEPATH'),
|
|
196
|
+
synthesizers=synthesizers,
|
|
197
|
+
sdv_datasets=datasets,
|
|
198
|
+
timeout=345600, # 4 days
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
append_benchmark_run(aws_access_key_id, aws_secret_access_key, date_str, modality=modality)
|
|
202
|
+
post_benchmark_launch_message(date_str, compute_service='GCP', modality=modality)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
if __name__ == '__main__':
|
|
206
|
+
main()
|
|
@@ -29,6 +29,7 @@ from sdgym.run_benchmark.utils import (
|
|
|
29
29
|
get_df_to_plot,
|
|
30
30
|
)
|
|
31
31
|
from sdgym.s3 import S3_REGION, parse_s3_path
|
|
32
|
+
from sdgym.utils import _set_column_width
|
|
32
33
|
|
|
33
34
|
LOGGER = logging.getLogger(__name__)
|
|
34
35
|
SYNTHESIZER_TO_GLOBAL_POSITION = {
|
|
@@ -231,7 +232,6 @@ def get_model_details(summary, results, df_to_plot, modality):
|
|
|
231
232
|
with open(SYNTHESIZER_DESCRIPTION_PATH, 'r', encoding='utf-8') as f:
|
|
232
233
|
synthesizer_info = yaml.safe_load(f) or {}
|
|
233
234
|
|
|
234
|
-
err_column = 'error' if 'error' in results.columns else 'Error'
|
|
235
235
|
paretos_synthesizers = (
|
|
236
236
|
df_to_plot.loc[df_to_plot['Pareto'].eq(True), 'Synthesizer'].astype(str).add('Synthesizer')
|
|
237
237
|
)
|
|
@@ -258,18 +258,23 @@ def get_model_details(summary, results, df_to_plot, modality):
|
|
|
258
258
|
model_details['Number of datasets - Wins'] = (
|
|
259
259
|
model_details['Synthesizer'].map(wins).fillna(0).astype(int)
|
|
260
260
|
)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
261
|
+
if 'Error' in results.columns:
|
|
262
|
+
timeout_counts = (
|
|
263
|
+
results
|
|
264
|
+
.loc[results['Error'].eq('Synthesizer Timeout')]
|
|
265
|
+
.groupby('Synthesizer')['Dataset']
|
|
266
|
+
.nunique()
|
|
267
|
+
)
|
|
268
|
+
error_counts = (
|
|
269
|
+
results
|
|
270
|
+
.loc[results['Error'].notna() & ~results['Error'].eq('Synthesizer Timeout')]
|
|
271
|
+
.groupby('Synthesizer')['Dataset']
|
|
272
|
+
.nunique()
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
timeout_counts = pd.Series(0, index=model_details['Synthesizer'])
|
|
276
|
+
error_counts = pd.Series(0, index=model_details['Synthesizer'])
|
|
277
|
+
|
|
273
278
|
model_details['Number of datasets - Timeout'] = (
|
|
274
279
|
model_details['Synthesizer'].map(timeout_counts).fillna(0).astype(int)
|
|
275
280
|
)
|
|
@@ -313,7 +318,8 @@ def update_table_aws(s3_client, bucket, filename, table, reference_column):
|
|
|
313
318
|
updated_table = pd.concat([existing_table, table], ignore_index=True)
|
|
314
319
|
output = io.BytesIO()
|
|
315
320
|
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
|
316
|
-
updated_table.to_excel(writer, index=False)
|
|
321
|
+
updated_table.to_excel(writer, index=False, sheet_name='Sheet1')
|
|
322
|
+
_set_column_width(writer, updated_table, 'Sheet1')
|
|
317
323
|
|
|
318
324
|
output.seek(0)
|
|
319
325
|
s3_client.upload_fileobj(output, bucket, filename)
|
|
@@ -6,13 +6,15 @@ from datetime import datetime
|
|
|
6
6
|
from urllib.parse import parse_qs, quote_plus, urlparse
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy.interpolate import interp1d
|
|
9
11
|
from slack_sdk import WebClient
|
|
10
12
|
|
|
11
13
|
from sdgym.s3 import parse_s3_path
|
|
12
14
|
|
|
13
15
|
OUTPUT_DESTINATION_AWS = 's3://sdgym-benchmark/Benchmarks/'
|
|
14
16
|
DEBUG_SLACK_CHANNEL = 'sdv-alerts-debug'
|
|
15
|
-
SLACK_CHANNEL = '
|
|
17
|
+
SLACK_CHANNEL = 'sdgym'
|
|
16
18
|
KEY_DATE_FILE = '_BENCHMARK_DATES.json'
|
|
17
19
|
PLOTLY_MARKERS = [
|
|
18
20
|
'circle',
|
|
@@ -45,18 +47,7 @@ PLOTLY_MARKERS = [
|
|
|
45
47
|
'diamond-cross',
|
|
46
48
|
'diamond-x',
|
|
47
49
|
]
|
|
48
|
-
|
|
49
|
-
# The synthesizers inside the same list will be run by the same ec2 instance
|
|
50
|
-
SYNTHESIZERS_SPLIT_SINGLE_TABLE = [
|
|
51
|
-
['UniformSynthesizer', 'ColumnSynthesizer', 'GaussianCopulaSynthesizer', 'TVAESynthesizer'],
|
|
52
|
-
['CopulaGANSynthesizer'],
|
|
53
|
-
['CTGANSynthesizer'],
|
|
54
|
-
['RealTabFormerSynthesizer'],
|
|
55
|
-
]
|
|
56
|
-
SYNTHESIZERS_SPLIT_MULTI_TABLE = [
|
|
57
|
-
['HMASynthesizer'],
|
|
58
|
-
['HSASynthesizer', 'IndependentSynthesizer', 'MultiTableUniformSynthesizer'],
|
|
59
|
-
]
|
|
50
|
+
PLOT_PADDING = 0.25
|
|
60
51
|
|
|
61
52
|
|
|
62
53
|
def _get_filename_to_gdrive_link():
|
|
@@ -104,7 +95,7 @@ def post_slack_message(channel, text):
|
|
|
104
95
|
|
|
105
96
|
|
|
106
97
|
def post_benchmark_launch_message(date_str, compute_service='AWS', modality='single_table'):
|
|
107
|
-
"""Post a message to the
|
|
98
|
+
"""Post a message to the sdgym Slack channel when the benchmark is launched."""
|
|
108
99
|
channel = SLACK_CHANNEL
|
|
109
100
|
folder_name = get_result_folder_name(date_str)
|
|
110
101
|
bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
|
|
@@ -116,7 +107,7 @@ def post_benchmark_launch_message(date_str, compute_service='AWS', modality='sin
|
|
|
116
107
|
|
|
117
108
|
|
|
118
109
|
def post_benchmark_uploaded_message(folder_name, commit_url=None, modality='single_table'):
|
|
119
|
-
"""Post benchmark uploaded message to
|
|
110
|
+
"""Post benchmark uploaded message to the sdgym Slack channel."""
|
|
120
111
|
file_to_gdrive_link = _get_filename_to_gdrive_link()
|
|
121
112
|
channel = SLACK_CHANNEL
|
|
122
113
|
bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
|
|
@@ -136,6 +127,34 @@ def post_benchmark_uploaded_message(folder_name, commit_url=None, modality='sing
|
|
|
136
127
|
post_slack_message(channel, body)
|
|
137
128
|
|
|
138
129
|
|
|
130
|
+
def _add_pareto_curve_extremity_points(df_to_plot):
|
|
131
|
+
"""Add extremity points to the Pareto curve for better visualization."""
|
|
132
|
+
pareto = df_to_plot.loc[df_to_plot['Pareto']].sort_values('Aggregated_Time')
|
|
133
|
+
if len(pareto) < 2:
|
|
134
|
+
return df_to_plot.reset_index(drop=True) # Not enough points to define a curve
|
|
135
|
+
|
|
136
|
+
interp = interp1d(
|
|
137
|
+
pareto['Log10 Aggregated_Time'],
|
|
138
|
+
pareto['Quality_Score'],
|
|
139
|
+
kind='linear',
|
|
140
|
+
fill_value='extrapolate',
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
min_log = np.log10(df_to_plot['Aggregated_Time'].min()) - PLOT_PADDING
|
|
144
|
+
max_log = np.log10(df_to_plot['Aggregated_Time'].max()) + PLOT_PADDING
|
|
145
|
+
extremities = pd.DataFrame({
|
|
146
|
+
'Synthesizer': np.nan,
|
|
147
|
+
'Aggregated_Time': 10 ** np.array([min_log, max_log]),
|
|
148
|
+
'Quality_Score': interp([min_log, max_log]),
|
|
149
|
+
'Log10 Aggregated_Time': [min_log, max_log],
|
|
150
|
+
'Pareto': True,
|
|
151
|
+
'Color': '#01E0C9',
|
|
152
|
+
'Marker': np.nan,
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
return pd.concat([df_to_plot, extremities], ignore_index=True).reset_index(drop=True)
|
|
156
|
+
|
|
157
|
+
|
|
139
158
|
def get_df_to_plot(benchmark_result):
|
|
140
159
|
"""Get the data to plot from the benchmark result.
|
|
141
160
|
|
|
@@ -177,8 +196,9 @@ def get_df_to_plot(benchmark_result):
|
|
|
177
196
|
}
|
|
178
197
|
df_to_plot['Marker'] = df_to_plot['Synthesizer'].map(marker_map)
|
|
179
198
|
df_to_plot = df_to_plot.rename(columns={'Adjusted_Quality_Score': 'Quality_Score'})
|
|
199
|
+
df_to_plot = df_to_plot.drop(columns=['Cumulative Quality Score'])
|
|
180
200
|
|
|
181
|
-
return df_to_plot
|
|
201
|
+
return _add_pareto_curve_extremity_points(df_to_plot)
|
|
182
202
|
|
|
183
203
|
|
|
184
204
|
def _parse_args():
|
|
@@ -203,3 +223,8 @@ def _extract_google_file_id(google_drive_link):
|
|
|
203
223
|
return parsed.path.split(marker, 1)[1].split('/', 1)[0]
|
|
204
224
|
|
|
205
225
|
raise ValueError(f'Invalid Google Drive link format: {google_drive_link}')
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _exclude_datasets(datasets, dataset_to_exclude):
|
|
229
|
+
"""Exclude datasets that are in the dataset_to_exclude list."""
|
|
230
|
+
return [dataset for dataset in datasets if dataset not in dataset_to_exclude]
|
|
@@ -11,6 +11,7 @@ import humanfriendly
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import pandas as pd
|
|
13
13
|
import psutil
|
|
14
|
+
from openpyxl.utils import get_column_letter
|
|
14
15
|
|
|
15
16
|
from sdgym.errors import SDGymError
|
|
16
17
|
from sdgym.synthesizers.base import BaselineSynthesizer
|
|
@@ -195,3 +196,11 @@ def convert_metadata_to_sdmetrics(metadata_dict):
|
|
|
195
196
|
"""Convert a sdv metadata dictionary into sdmetrics expected metadata."""
|
|
196
197
|
table_name = next(iter(metadata_dict['tables']))
|
|
197
198
|
return metadata_dict['tables'][table_name]
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _set_column_width(writer, df, sheet_name):
|
|
202
|
+
worksheet = writer.sheets[sheet_name]
|
|
203
|
+
for col_idx, column in enumerate(df.columns, 1):
|
|
204
|
+
max_length = max(df[column].astype(str).map(len).max(), len(column))
|
|
205
|
+
column_letter = get_column_letter(col_idx)
|
|
206
|
+
worksheet.column_dimensions[column_letter].width = max_length + 2
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sdgym
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.1.dev0
|
|
4
4
|
Summary: Benchmark tabular synthetic data generators using a variety of datasets
|
|
5
5
|
Author-email: "DataCebo, Inc." <info@sdv.dev>
|
|
6
|
-
License:
|
|
6
|
+
License-Expression: BUSL-1.1
|
|
7
7
|
Project-URL: Source Code, https://github.com/sdv-dev/SDGym/
|
|
8
8
|
Project-URL: Issue Tracker, https://github.com/sdv-dev/SDGym/issues
|
|
9
9
|
Project-URL: Changes, https://github.com/sdv-dev/SDGym/blob/main/HISTORY.md
|
|
@@ -12,7 +12,6 @@ Project-URL: Chat, https://bit.ly/sdv-slack-invite
|
|
|
12
12
|
Keywords: machine learning,synthetic data generation,benchmark,generative models
|
|
13
13
|
Classifier: Development Status :: 2 - Pre-Alpha
|
|
14
14
|
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: License :: Free for non-commercial use
|
|
16
15
|
Classifier: Natural Language :: English
|
|
17
16
|
Classifier: Programming Language :: Python :: 3
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -20,44 +19,57 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
-
Requires-Python: <3.
|
|
24
|
+
Requires-Python: <3.15,>=3.9
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
27
|
Requires-Dist: appdirs>=1.3
|
|
28
28
|
Requires-Dist: boto3<2,>=1.28
|
|
29
29
|
Requires-Dist: botocore<2,>=1.31
|
|
30
|
-
Requires-Dist: cloudpickle>=2.1.0
|
|
30
|
+
Requires-Dist: cloudpickle>=2.1.0; python_version < "3.14"
|
|
31
|
+
Requires-Dist: cloudpickle>=3.1.1; python_version >= "3.14"
|
|
31
32
|
Requires-Dist: compress-pickle>=1.2.0
|
|
32
|
-
Requires-Dist: google-cloud-compute>=1.
|
|
33
|
-
Requires-Dist: google-auth>=2.
|
|
33
|
+
Requires-Dist: google-cloud-compute>=1.30.0
|
|
34
|
+
Requires-Dist: google-auth>=2.14.1
|
|
34
35
|
Requires-Dist: humanfriendly>=10.0
|
|
35
36
|
Requires-Dist: numpy>=1.22.2; python_version < "3.10"
|
|
36
37
|
Requires-Dist: numpy>=1.24.0; python_version >= "3.10" and python_version < "3.12"
|
|
37
38
|
Requires-Dist: numpy>=1.26.0; python_version >= "3.12" and python_version < "3.13"
|
|
38
|
-
Requires-Dist: numpy>=2.1.0; python_version >= "3.13"
|
|
39
|
+
Requires-Dist: numpy>=2.1.0; python_version >= "3.13" and python_version < "3.14"
|
|
40
|
+
Requires-Dist: numpy>=2.3.2; python_version >= "3.14"
|
|
39
41
|
Requires-Dist: openpyxl>=3.1.2
|
|
40
|
-
Requires-Dist: pandas<3
|
|
41
|
-
Requires-Dist: pandas<3
|
|
42
|
-
Requires-Dist: pandas<3
|
|
43
|
-
Requires-Dist: pandas<3
|
|
44
|
-
Requires-Dist:
|
|
42
|
+
Requires-Dist: pandas<3,>=1.4.0; python_version < "3.11"
|
|
43
|
+
Requires-Dist: pandas<3,>=1.5.0; python_version >= "3.11" and python_version < "3.12"
|
|
44
|
+
Requires-Dist: pandas<3,>=2.1.1; python_version >= "3.12" and python_version < "3.13"
|
|
45
|
+
Requires-Dist: pandas<3,>=2.2.3; python_version >= "3.13" and python_version < "3.14"
|
|
46
|
+
Requires-Dist: pandas<3,>=2.3.3; python_version >= "3.14"
|
|
47
|
+
Requires-Dist: psutil>=5.8
|
|
45
48
|
Requires-Dist: scikit-learn>=1.0.2; python_version < "3.10"
|
|
46
49
|
Requires-Dist: scikit-learn>=1.1.0; python_version >= "3.10" and python_version < "3.11"
|
|
47
50
|
Requires-Dist: scikit-learn>=1.1.3; python_version >= "3.11" and python_version < "3.12"
|
|
48
51
|
Requires-Dist: scikit-learn>=1.3.1; python_version >= "3.12" and python_version < "3.13"
|
|
49
|
-
Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13"
|
|
52
|
+
Requires-Dist: scikit-learn>=1.5.2; python_version >= "3.13" and python_version < "3.14"
|
|
53
|
+
Requires-Dist: scikit-learn>=1.8.0; python_version >= "3.14"
|
|
50
54
|
Requires-Dist: scipy>=1.7.3; python_version < "3.10"
|
|
51
55
|
Requires-Dist: scipy>=1.9.2; python_version >= "3.10" and python_version < "3.12"
|
|
52
56
|
Requires-Dist: scipy>=1.12.0; python_version >= "3.12" and python_version < "3.13"
|
|
53
|
-
Requires-Dist: scipy>=1.14.1; python_version >= "3.13"
|
|
57
|
+
Requires-Dist: scipy>=1.14.1; python_version >= "3.13" and python_version < "3.14"
|
|
58
|
+
Requires-Dist: scipy>=1.16.1; python_version >= "3.14"
|
|
54
59
|
Requires-Dist: tabulate<0.9,>=0.8.3
|
|
55
|
-
Requires-Dist: torch>=
|
|
60
|
+
Requires-Dist: torch>=1.13.0; python_version < "3.11"
|
|
61
|
+
Requires-Dist: torch>=2.0.0; python_version >= "3.11" and python_version < "3.12"
|
|
62
|
+
Requires-Dist: torch>=2.3.0; python_version >= "3.12" and python_version < "3.13"
|
|
63
|
+
Requires-Dist: torch>=2.6.0; python_version >= "3.13" and python_version < "3.14"
|
|
64
|
+
Requires-Dist: torch>=2.9.0; python_version >= "3.14"
|
|
56
65
|
Requires-Dist: tqdm>=4.66.3
|
|
57
66
|
Requires-Dist: XlsxWriter>=1.2.8
|
|
58
|
-
Requires-Dist: rdt>=1.
|
|
59
|
-
Requires-Dist:
|
|
60
|
-
Requires-Dist:
|
|
67
|
+
Requires-Dist: rdt>=1.18.2; python_version < "3.14"
|
|
68
|
+
Requires-Dist: rdt>=1.20.0; python_version >= "3.14"
|
|
69
|
+
Requires-Dist: sdmetrics>=0.21.0; python_version < "3.14"
|
|
70
|
+
Requires-Dist: sdmetrics>=0.26.0; python_version >= "3.14"
|
|
71
|
+
Requires-Dist: sdv>=1.21.0; python_version < "3.14"
|
|
72
|
+
Requires-Dist: sdv>=1.33.0; python_version >= "3.14"
|
|
61
73
|
Provides-Extra: dask
|
|
62
74
|
Requires-Dist: dask; extra == "dask"
|
|
63
75
|
Requires-Dist: distributed; extra == "dask"
|
|
@@ -1,20 +1,15 @@
|
|
|
1
1
|
appdirs>=1.3
|
|
2
2
|
boto3<2,>=1.28
|
|
3
3
|
botocore<2,>=1.31
|
|
4
|
-
cloudpickle>=2.1.0
|
|
5
4
|
compress-pickle>=1.2.0
|
|
6
|
-
google-cloud-compute>=1.
|
|
7
|
-
google-auth>=2.
|
|
5
|
+
google-cloud-compute>=1.30.0
|
|
6
|
+
google-auth>=2.14.1
|
|
8
7
|
humanfriendly>=10.0
|
|
9
8
|
openpyxl>=3.1.2
|
|
10
|
-
psutil>=5.
|
|
9
|
+
psutil>=5.8
|
|
11
10
|
tabulate<0.9,>=0.8.3
|
|
12
|
-
torch>=2.6.0
|
|
13
11
|
tqdm>=4.66.3
|
|
14
12
|
XlsxWriter>=1.2.8
|
|
15
|
-
rdt>=1.17.0
|
|
16
|
-
sdmetrics>=0.20.1
|
|
17
|
-
sdv>=1.21.0
|
|
18
13
|
|
|
19
14
|
[:python_version < "3.10"]
|
|
20
15
|
numpy>=1.22.2
|
|
@@ -22,7 +17,14 @@ scikit-learn>=1.0.2
|
|
|
22
17
|
scipy>=1.7.3
|
|
23
18
|
|
|
24
19
|
[:python_version < "3.11"]
|
|
25
|
-
pandas<3
|
|
20
|
+
pandas<3,>=1.4.0
|
|
21
|
+
torch>=1.13.0
|
|
22
|
+
|
|
23
|
+
[:python_version < "3.14"]
|
|
24
|
+
cloudpickle>=2.1.0
|
|
25
|
+
rdt>=1.18.2
|
|
26
|
+
sdmetrics>=0.21.0
|
|
27
|
+
sdv>=1.21.0
|
|
26
28
|
|
|
27
29
|
[:python_version >= "3.10" and python_version < "3.11"]
|
|
28
30
|
scikit-learn>=1.1.0
|
|
@@ -32,20 +34,34 @@ numpy>=1.24.0
|
|
|
32
34
|
scipy>=1.9.2
|
|
33
35
|
|
|
34
36
|
[:python_version >= "3.11" and python_version < "3.12"]
|
|
35
|
-
pandas<3
|
|
37
|
+
pandas<3,>=1.5.0
|
|
36
38
|
scikit-learn>=1.1.3
|
|
39
|
+
torch>=2.0.0
|
|
37
40
|
|
|
38
41
|
[:python_version >= "3.12" and python_version < "3.13"]
|
|
39
42
|
numpy>=1.26.0
|
|
40
|
-
pandas<3
|
|
43
|
+
pandas<3,>=2.1.1
|
|
41
44
|
scikit-learn>=1.3.1
|
|
42
45
|
scipy>=1.12.0
|
|
46
|
+
torch>=2.3.0
|
|
43
47
|
|
|
44
|
-
[:python_version >= "3.13"]
|
|
48
|
+
[:python_version >= "3.13" and python_version < "3.14"]
|
|
45
49
|
numpy>=2.1.0
|
|
46
|
-
pandas<3
|
|
50
|
+
pandas<3,>=2.2.3
|
|
47
51
|
scikit-learn>=1.5.2
|
|
48
52
|
scipy>=1.14.1
|
|
53
|
+
torch>=2.6.0
|
|
54
|
+
|
|
55
|
+
[:python_version >= "3.14"]
|
|
56
|
+
cloudpickle>=3.1.1
|
|
57
|
+
numpy>=2.3.2
|
|
58
|
+
pandas<3,>=2.3.3
|
|
59
|
+
scikit-learn>=1.8.0
|
|
60
|
+
scipy>=1.16.1
|
|
61
|
+
torch>=2.9.0
|
|
62
|
+
rdt>=1.20.0
|
|
63
|
+
sdmetrics>=0.26.0
|
|
64
|
+
sdv>=1.33.0
|
|
49
65
|
|
|
50
66
|
[all]
|
|
51
67
|
sdgym[dask,dev,test]
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
"""Script to run a benchmark and upload results to S3."""
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
from datetime import datetime, timezone
|
|
6
|
-
|
|
7
|
-
from botocore.exceptions import ClientError
|
|
8
|
-
|
|
9
|
-
from sdgym._benchmark.benchmark import (
|
|
10
|
-
_benchmark_multi_table_compute_gcp,
|
|
11
|
-
_benchmark_single_table_compute_gcp,
|
|
12
|
-
)
|
|
13
|
-
from sdgym.run_benchmark.utils import (
|
|
14
|
-
KEY_DATE_FILE,
|
|
15
|
-
OUTPUT_DESTINATION_AWS,
|
|
16
|
-
SYNTHESIZERS_SPLIT_MULTI_TABLE,
|
|
17
|
-
SYNTHESIZERS_SPLIT_SINGLE_TABLE,
|
|
18
|
-
_parse_args,
|
|
19
|
-
get_result_folder_name,
|
|
20
|
-
post_benchmark_launch_message,
|
|
21
|
-
)
|
|
22
|
-
from sdgym.s3 import get_s3_client, parse_s3_path
|
|
23
|
-
|
|
24
|
-
MODALITY_TO_SETUP = {
|
|
25
|
-
'single_table': {
|
|
26
|
-
'method': _benchmark_single_table_compute_gcp,
|
|
27
|
-
'synthesizers_split': SYNTHESIZERS_SPLIT_SINGLE_TABLE,
|
|
28
|
-
'datasets': [
|
|
29
|
-
'adult',
|
|
30
|
-
'alarm',
|
|
31
|
-
'census',
|
|
32
|
-
'child',
|
|
33
|
-
'covtype',
|
|
34
|
-
'expedia_hotel_logs',
|
|
35
|
-
'insurance',
|
|
36
|
-
'intrusion',
|
|
37
|
-
'news',
|
|
38
|
-
],
|
|
39
|
-
},
|
|
40
|
-
'multi_table': {
|
|
41
|
-
'method': _benchmark_multi_table_compute_gcp,
|
|
42
|
-
'synthesizers_split': SYNTHESIZERS_SPLIT_MULTI_TABLE,
|
|
43
|
-
'datasets': [
|
|
44
|
-
'WebKP',
|
|
45
|
-
'DCG',
|
|
46
|
-
'UW_std',
|
|
47
|
-
'Same_gen',
|
|
48
|
-
'CORA',
|
|
49
|
-
'got_families',
|
|
50
|
-
'SalesDB',
|
|
51
|
-
'UTube',
|
|
52
|
-
'Student_loan',
|
|
53
|
-
'Hepatitis_std',
|
|
54
|
-
'Elti',
|
|
55
|
-
'Bupa',
|
|
56
|
-
'Toxicology',
|
|
57
|
-
'imdb_ijs',
|
|
58
|
-
'ftp',
|
|
59
|
-
'imdb_small',
|
|
60
|
-
'imdb_MovieLens',
|
|
61
|
-
'Pima',
|
|
62
|
-
'university',
|
|
63
|
-
'legalActs',
|
|
64
|
-
'Dunur',
|
|
65
|
-
'Mesh',
|
|
66
|
-
'world',
|
|
67
|
-
'airbnb-simplified',
|
|
68
|
-
'trains',
|
|
69
|
-
'FNHK',
|
|
70
|
-
'fake_hotels',
|
|
71
|
-
'SAT',
|
|
72
|
-
'genes',
|
|
73
|
-
'Biodegradability',
|
|
74
|
-
'Pyrimidine',
|
|
75
|
-
'mutagenesis',
|
|
76
|
-
'restbase',
|
|
77
|
-
'Triazine',
|
|
78
|
-
'Carcinogenesis',
|
|
79
|
-
'fake_hotels_extended',
|
|
80
|
-
'Mooney_Family',
|
|
81
|
-
'PTE',
|
|
82
|
-
'Facebook',
|
|
83
|
-
'multi_table_ID_demo_dataset',
|
|
84
|
-
'SAP',
|
|
85
|
-
'Chess',
|
|
86
|
-
'Countries',
|
|
87
|
-
'NCAA',
|
|
88
|
-
'Atherosclerosis',
|
|
89
|
-
'nations',
|
|
90
|
-
'TubePricing',
|
|
91
|
-
'financial',
|
|
92
|
-
'Accidents',
|
|
93
|
-
'MuskSmall',
|
|
94
|
-
'NBA',
|
|
95
|
-
'AustralianFootball',
|
|
96
|
-
'PremierLeague',
|
|
97
|
-
'OMOP_CDM_dayz',
|
|
98
|
-
],
|
|
99
|
-
},
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def append_benchmark_run(
|
|
104
|
-
aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
|
|
105
|
-
):
|
|
106
|
-
"""Append a new benchmark run to the benchmark dates file in S3."""
|
|
107
|
-
s3_client = get_s3_client(
|
|
108
|
-
aws_access_key_id=aws_access_key_id,
|
|
109
|
-
aws_secret_access_key=aws_secret_access_key,
|
|
110
|
-
)
|
|
111
|
-
bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS)
|
|
112
|
-
try:
|
|
113
|
-
object = s3_client.get_object(Bucket=bucket, Key=f'{prefix}{modality}/{KEY_DATE_FILE}')
|
|
114
|
-
body = object['Body'].read().decode('utf-8')
|
|
115
|
-
data = json.loads(body)
|
|
116
|
-
except ClientError as e:
|
|
117
|
-
if e.response['Error']['Code'] == 'NoSuchKey':
|
|
118
|
-
data = {'runs': []}
|
|
119
|
-
else:
|
|
120
|
-
raise RuntimeError(f'Failed to read {KEY_DATE_FILE} from S3: {e}')
|
|
121
|
-
|
|
122
|
-
data['runs'].append({'date': date_str, 'folder_name': get_result_folder_name(date_str)})
|
|
123
|
-
data['runs'] = sorted(data['runs'], key=lambda x: x['date'])
|
|
124
|
-
s3_client.put_object(
|
|
125
|
-
Bucket=bucket,
|
|
126
|
-
Key=f'{prefix}{modality}/{KEY_DATE_FILE}',
|
|
127
|
-
Body=json.dumps(data).encode('utf-8'),
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def main():
|
|
132
|
-
"""Main function to run the benchmark and upload results."""
|
|
133
|
-
args = _parse_args()
|
|
134
|
-
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
|
|
135
|
-
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
|
|
136
|
-
date_str = datetime.now(timezone.utc).strftime('%Y-%m-%d')
|
|
137
|
-
modality = args.modality
|
|
138
|
-
for synthesizer_group in MODALITY_TO_SETUP[modality]['synthesizers_split']:
|
|
139
|
-
MODALITY_TO_SETUP[modality]['method'](
|
|
140
|
-
output_destination=OUTPUT_DESTINATION_AWS,
|
|
141
|
-
credential_filepath=os.getenv('CREDENTIALS_FILEPATH'),
|
|
142
|
-
synthesizers=synthesizer_group,
|
|
143
|
-
sdv_datasets=MODALITY_TO_SETUP[modality]['datasets'],
|
|
144
|
-
timeout=345600, # 4 days
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
append_benchmark_run(aws_access_key_id, aws_secret_access_key, date_str, modality=modality)
|
|
148
|
-
post_benchmark_launch_message(date_str, compute_service='GCP', modality=modality)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if __name__ == '__main__':
|
|
152
|
-
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|